From 5427f207852d5b905e251a5a728c8604d3594d58 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Oct 2017 22:32:34 +0100 Subject: [PATCH 001/260] drm/i915: Bump wait-times for the final CS interrupt before parking In the idle worker we drop the prolonged GT wakeref used to cover such essentials as interrupt delivery. (When a CS interrupt arrives, we also assert that the GT is awake.) However, it turns out that 10ms is not long enough to be assured that the last CS interrupt has been delivered, so bump that to 200ms, and move the entirety of that wait to before we take the struct_mutex to avoid blocking. As this is now a potentially long wait, restore the earlier behaviour of bailing out early when a new request arrives. v2: Break out the repeated check for new requests into its own little helper to try and improve the self-commentary. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Imre Deak Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171023213237.26536-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 37 +++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 026cb52ece0b..bb0e85043e01 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3276,13 +3276,20 @@ i915_gem_retire_work_handler(struct work_struct *work) } } +static inline bool +new_requests_since_last_retire(const struct drm_i915_private *i915) +{ + return (READ_ONCE(i915->gt.active_requests) || + work_pending(&i915->gt.idle_work.work)); +} + static void i915_gem_idle_work_handler(struct work_struct *work) { struct drm_i915_private *dev_priv = container_of(work, typeof(*dev_priv), gt.idle_work.work); - struct drm_device *dev = &dev_priv->drm; bool rearm_hangcheck; + ktime_t end; if (!READ_ONCE(dev_priv->gt.awake)) return; @@ -3291,14 +3298,21 @@ i915_gem_idle_work_handler(struct work_struct *work) * Wait for last execlists context complete, but bail out in case a * new request is submitted. */ - wait_for(intel_engines_are_idle(dev_priv), 10); - if (READ_ONCE(dev_priv->gt.active_requests)) - return; + end = ktime_add_ms(ktime_get(), 200); + do { + if (new_requests_since_last_retire(dev_priv)) + return; + + if (intel_engines_are_idle(dev_priv)) + break; + + usleep_range(100, 500); + } while (ktime_before(ktime_get(), end)); rearm_hangcheck = cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); - if (!mutex_trylock(&dev->struct_mutex)) { + if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { /* Currently busy, come back later */ mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, @@ -3310,13 +3324,14 @@ i915_gem_idle_work_handler(struct work_struct *work) * New request retired after this work handler started, extend active * period until next instance of the work. */ - if (work_pending(work)) + if (new_requests_since_last_retire(dev_priv)) goto out_unlock; - if (dev_priv->gt.active_requests) - goto out_unlock; - - if (wait_for(intel_engines_are_idle(dev_priv), 10)) + /* + * We are committed now to parking the engines, make sure there + * will be no more interrupts arriving later. + */ + if (!intel_engines_are_idle(dev_priv)) DRM_ERROR("Timeout waiting for engines to idle\n"); intel_engines_mark_idle(dev_priv); @@ -3330,7 +3345,7 @@ i915_gem_idle_work_handler(struct work_struct *work) gen6_rps_idle(dev_priv); intel_runtime_pm_put(dev_priv); out_unlock: - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev_priv->drm.struct_mutex); out_rearm: if (rearm_hangcheck) { From ff320d6e72ff3600184a1c01ea75b689b4b8a16d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Oct 2017 22:32:35 +0100 Subject: [PATCH 002/260] drm/i915: Synchronize irq before parking each engine When we park the engine (upon idling), we kill the irq tasklet. However, to be sure that it is not restarted by a spurious interrupt after doing so, flush the interrupt handler before parking. As we only park the engines when we believe the system is idle, there should not be any interrupts to distrub us; so flushing the final in-flight interrupt should be sufficient. (However, we are still dependent on the HW behaving in an orderly and timely fashion, which we shall endeavour to improve upon later.) Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20171023213237.26536-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bb0e85043e01..bafe1cdd57d8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3327,6 +3327,19 @@ i915_gem_idle_work_handler(struct work_struct *work) if (new_requests_since_last_retire(dev_priv)) goto out_unlock; + /* + * Be paranoid and flush a concurrent interrupt to make sure + * we don't reactivate any irq tasklets after parking. + * + * FIXME: Note that even though we have waited for execlists to be idle, + * there may still be an in-flight interrupt even though the CSB + * is now empty. synchronize_irq() makes sure that a residual interrupt + * is completed before we continue, but it doesn't prevent the HW from + * raising a spurious interrupt later. To complete the shield we should + * coordinate disabling the CS irq with flushing the interrupts. + */ + synchronize_irq(dev_priv->drm.irq); + /* * We are committed now to parking the engines, make sure there * will be no more interrupts arriving later. From 4a118ecbe99c93cf9f9582e83a88d03f18d6cb84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Oct 2017 22:32:36 +0100 Subject: [PATCH 003/260] drm/i915: Filter out spurious execlists context-switch interrupts Back in commit a4b2b01523a8 ("drm/i915: Don't mark an execlists context-switch when idle") we noticed the presence of late context-switch interrupts. We were able to filter those out by looking at whether the ELSP remained active, but in commit beecec901790 ("drm/i915/execlists: Preemption!") that became problematic as we now anticipate receiving a context-switch event for preemption while ELSP may be empty. To restore the spurious interrupt suppression, add a counter for the expected number of pending context-switches and skip if we do not need to handle this interrupt to make forward progress. v2: Don't forget to switch on for preempt. v3: Reduce the counter to a on/off boolean tracker. Declare the HW as active when we first submit, and idle after the final completion event (with which we confirm the HW says it is idle), and track each source of activity separately. With a finite number of sources, it should aide us in debugging which gets stuck. Fixes: beecec901790 ("drm/i915/execlists: Preemption!") Signed-off-by: Chris Wilson Cc: Michal Winiarski Cc: Tvrtko Ursulin Cc: Arkadiusz Hiler Cc: Mika Kuoppala Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171023213237.26536-3-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_guc_submission.c | 3 ++ drivers/gpu/drm/i915/i915_irq.c | 6 ++-- drivers/gpu/drm/i915/intel_engine_cs.c | 5 ++-- drivers/gpu/drm/i915/intel_lrc.c | 27 ++++++++++++----- drivers/gpu/drm/i915/intel_ringbuffer.h | 34 ++++++++++++++++++++-- 5 files changed, 62 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index a2e8114b739d..f84c267728fd 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -610,6 +610,7 @@ done: execlists->first = rb; if (submit) { port_assign(port, last); + execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); i915_guc_submit(engine); } spin_unlock_irq(&engine->timeline->lock); @@ -633,6 +634,8 @@ static void i915_guc_irq_handler(unsigned long data) rq = port_request(&port[0]); } + if (!rq) + execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); if (!port_isset(last_port)) i915_guc_dequeue(engine); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b1296a55c1e4..f8205841868b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1388,8 +1388,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet = true; + if (READ_ONCE(engine->execlists.active)) { + __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet = true; + } } if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a47a9c6bea52..ab5bf4e2e28e 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1548,8 +1548,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) return false; - /* Both ports drained, no more ELSP submission? */ - if (port_request(&engine->execlists.port[0])) + /* Waiting to drain ELSP? */ + if (READ_ONCE(engine->execlists.active)) return false; /* ELSP is empty, but there are ready requests? */ @@ -1749,6 +1749,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) idx); } } + drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); rcu_read_unlock(); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7f45dd7dc3e5..5b87d5284a84 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -575,7 +575,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the state of the GPU is known (idle). */ inject_preempt_context(engine); - execlists->preempt = true; + execlists_set_active(execlists, + EXECLISTS_ACTIVE_PREEMPT); goto unlock; } else { /* @@ -683,8 +684,10 @@ done: unlock: spin_unlock_irq(&engine->timeline->lock); - if (submit) + if (submit) { + execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); execlists_submit_ports(engine); + } } static void @@ -696,6 +699,7 @@ execlist_cancel_port_requests(struct intel_engine_execlists *execlists) while (num_ports-- && port_isset(port)) { struct drm_i915_gem_request *rq = port_request(port); + GEM_BUG_ON(!execlists->active); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); i915_gem_request_put(rq); @@ -861,15 +865,21 @@ static void intel_lrc_irq_handler(unsigned long data) unwind_incomplete_requests(engine); spin_unlock_irq(&engine->timeline->lock); - GEM_BUG_ON(!execlists->preempt); - execlists->preempt = false; + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_PREEMPT)); + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_PREEMPT); continue; } if (status & GEN8_CTX_STATUS_PREEMPTED && - execlists->preempt) + execlists_is_active(execlists, + EXECLISTS_ACTIVE_PREEMPT)) continue; + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_USER)); + /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); @@ -892,6 +902,9 @@ static void intel_lrc_irq_handler(unsigned long data) /* After the final element, the hw should be idle */ GEM_BUG_ON(port_count(port) == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + if (port_count(port) == 0) + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_USER); } if (head != execlists->csb_head) { @@ -901,7 +914,7 @@ static void intel_lrc_irq_handler(unsigned long data) } } - if (!execlists->preempt) + if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) execlists_dequeue(engine); intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); @@ -1460,7 +1473,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); execlists->csb_head = -1; - execlists->preempt = false; + execlists->active = 0; /* After a GPU reset, we may have requests to replay */ if (!i915_modparams.enable_guc_submission && execlists->first) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 17186f067408..6a42ed618a28 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -241,9 +241,17 @@ struct intel_engine_execlists { } port[EXECLIST_MAX_PORTS]; /** - * @preempt: are we currently handling a preempting context switch? + * @active: is the HW active? We consider the HW as active after + * submitting any context for execution and until we have seen the + * last context completion event. After that, we do not expect any + * more events until we submit, and so can park the HW. + * + * As we have a small number of different sources from which we feed + * the HW, we track the state of each inside a single bitfield. */ - bool preempt; + unsigned int active; +#define EXECLISTS_ACTIVE_USER 0 +#define EXECLISTS_ACTIVE_PREEMPT 1 /** * @port_mask: number of execlist ports - 1 @@ -525,6 +533,27 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; +static inline void +execlists_set_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __set_bit(bit, (unsigned long *)&execlists->active); +} + +static inline void +execlists_clear_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __clear_bit(bit, (unsigned long *)&execlists->active); +} + +static inline bool +execlists_is_active(const struct intel_engine_execlists *execlists, + unsigned int bit) +{ + return test_bit(bit, (unsigned long *)&execlists->active); +} + static inline unsigned int execlists_num_ports(const struct intel_engine_execlists * const execlists) { @@ -538,6 +567,7 @@ execlists_port_complete(struct intel_engine_execlists * const execlists, const unsigned int m = execlists->port_mask; GEM_BUG_ON(port_index(port, execlists) != 0); + GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); memmove(port, port + 1, m * sizeof(struct execlist_port)); memset(port + m, 0, sizeof(struct execlist_port)); From 64b80085dd3603d401fc05879f700b86a3a5c8e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Oct 2017 12:55:01 +0100 Subject: [PATCH 004/260] drm/i915/execlists: Remove the priority "optimisation" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Originally we set the priority to max upon inserting the request into the execlists queue (and removing it from the scheduler lists). We could then use the prio==INT_MAX as a shortcut within execlists_schedule() to detect the end of the dependency chain. Since commit 1f181225f8ec ("drm/i915/execlists: Keep request->priority for its lifetime") this is no longer true as we use the request completion as an indicator the schedule dependency chain is complete instead. (This allows us to then reschedule requests even when its context is in flight.) However, this makes the GEM_BUG_ON() inside execlists_schedule() racy as we may change the rq->prio at the same time. As the assertion is useful, let's keep the assertion and remove the micro-optimisation. Fixes: 1f181225f8ec ("drm/i915/execlists: Keep request->priority for its lifetime") Signed-off-by: Chris Wilson Cc: Michał Winiarski Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171024115501.21033-1-chris@chris-wilson.co.uk Reviewed-by: Michał Winiarski --- drivers/gpu/drm/i915/intel_lrc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5b87d5284a84..d36e25607435 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -734,7 +734,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { INIT_LIST_HEAD(&rq->priotree.link); - rq->priotree.priority = INT_MAX; dma_fence_set_error(&rq->fence, -EIO); __i915_gem_request_submit(rq); @@ -891,7 +890,6 @@ static void intel_lrc_irq_handler(unsigned long data) execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); trace_i915_gem_request_out(rq); - rq->priotree.priority = INT_MAX; i915_gem_request_put(rq); execlists_port_complete(execlists, port); From 191f896085cf3b5d85920d58a759da4eea141721 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 24 Oct 2017 16:27:28 +0100 Subject: [PATCH 005/260] drm/i915/perf: fix perf enable/disable ioctls with 32bits userspace The compat callback was missing and triggered failures in 32bits userspace when enabling/disable the perf stream. We don't require any particular processing here as these ioctls don't take any argument. Signed-off-by: Lionel Landwerlin Fixes: eec688e1420 ("drm/i915: Add i915 perf infrastructure") Cc: linux-stable Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171024152728.4873-1-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1383a2995a69..59ee808f8fd9 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2537,6 +2537,10 @@ static const struct file_operations fops = { .poll = i915_perf_poll, .read = i915_perf_read, .unlocked_ioctl = i915_perf_ioctl, + /* Our ioctl have no arguments, so it's safe to use the same function + * to handle 32bits compatibility. + */ + .compat_ioctl = i915_perf_ioctl, }; From 436009b578ab872619350a4c4f2b36c98c7e55a2 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 23 Oct 2017 10:39:20 -0700 Subject: [PATCH 006/260] drm/i915/cnl: Force DDI_A_4_LANES when needed. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we faced in BXT, on CNL DDI_A_4_LANES is not set as expected when system is boot with multiple monitors connected. This result in wrong lane setup impacting the max data rate available and consequently blocking modeset on eDP, resulting in a blank screen. Most of CNL SKUs don't support DDI-E. The only SKU that supports DDI-E is the same that supports the full A/E split called DDI-F. Also when DDI-F is used DDI-E cannot be used because they share Interrupts. So DDI-E is almost useless. Anyways let's consider this is possible and rely on VBT for that. This patch was initialy start by Clint, but required many changes including full commit message. So Credits entirely to Clint for finding this. v2: Extract all messy conditions into a helper function as suggested by Ville. Along with simplification I removed the debug message on the working case since now all conditions are grouped. v3: Split the conditions even more as suggested by Ville. Get's cleaner and easier to add new cases in the future. Suggested-by: Clint Taylor Cc: Clint Taylor Cc: Mika Kahola Cc: Jani Nikula Cc: Ville Syrjälä Signed-off-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171023173920.22890-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 46 ++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 933c18fd4258..08f04df08fa5 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2711,6 +2711,34 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) return connector; } +static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dport) +{ + struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); + + if (dport->port != PORT_A) + return false; + + if (dport->saved_port_bits & DDI_A_4_LANES) + return false; + + /* Broxton/Geminilake: Bspec says that DDI_A_4_LANES is the only + * supported configuration + */ + if (IS_GEN9_LP(dev_priv)) + return true; + + /* Cannonlake: Most of SKUs don't support DDI_E, and the only + * one who does also have a full A/E split called + * DDI_F what makes DDI_E useless. However for this + * case let's trust VBT info. + */ + if (IS_CANNONLAKE(dev_priv) && + !intel_bios_is_port_present(dev_priv, PORT_E)) + return true; + + return false; +} + void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) { struct intel_digital_port *intel_dig_port; @@ -2820,18 +2848,14 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) } /* - * Bspec says that DDI_A_4_LANES is the only supported configuration - * for Broxton. Yet some BIOS fail to set this bit on port A if eDP - * wasn't lit up at boot. Force this bit on in our internal - * configuration so that we use the proper lane count for our - * calculations. + * Some BIOS might fail to set this bit on port A if eDP + * wasn't lit up at boot. Force this bit set when needed + * so we use the proper lane count for our calculations. */ - if (IS_GEN9_LP(dev_priv) && port == PORT_A) { - if (!(intel_dig_port->saved_port_bits & DDI_A_4_LANES)) { - DRM_DEBUG_KMS("BXT BIOS forgot to set DDI_A_4_LANES for port A; fixing\n"); - intel_dig_port->saved_port_bits |= DDI_A_4_LANES; - max_lanes = 4; - } + if (intel_ddi_a_force_4_lanes(intel_dig_port)) { + DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n"); + intel_dig_port->saved_port_bits |= DDI_A_4_LANES; + max_lanes = 4; } intel_dig_port->max_lanes = max_lanes; From 0aab201b4ad10fa530e4d12f8ea30b6f0a0540bd Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 23 Oct 2017 15:46:12 -0700 Subject: [PATCH 007/260] drm/i915/cnl: Get RC6 working. On CNL, individual wake rate limit was added to each engine. GT can only go to RC6 if both Render and Media engines are individually qualified. So we need to set their individual wake rate limit. +-----------------+---------------+--------------+--------------+ | | GT RC6 | Render C6 | Media C6 | +-----------------+---------------+--------------+--------------+ | Wake rate limit | 0xA09C[31:16] | 0xA09C[15:0] | 0xA0A0[15:0] | +-----------------+---------------+--------------+--------------+ v2: - Tune Render and Media wake rate values according to some extra info I got from HW engineers. Value can be tuned, but for now these are the recommended values. - Fix typos pointed by James. Cc: Nathan Ciobanu Cc: Wayne Boyer Cc: Joe Konno Cc: David Weinehall Signed-off-by: Rodrigo Vivi Reviewed-by: James Ausmus Reviewed-by: David Weinehall Link: https://patchwork.freedesktop.org/patch/msgid/20171023224612.27208-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 68a58cce6ab1..f138eae82bf0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7905,6 +7905,7 @@ enum { #define GEN6_RC1_WAKE_RATE_LIMIT _MMIO(0xA098) #define GEN6_RC6_WAKE_RATE_LIMIT _MMIO(0xA09C) #define GEN6_RC6pp_WAKE_RATE_LIMIT _MMIO(0xA0A0) +#define GEN10_MEDIA_WAKE_RATE_LIMIT _MMIO(0xA0A0) #define GEN6_RC_EVALUATION_INTERVAL _MMIO(0xA0A8) #define GEN6_RC_IDLE_HYSTERSIS _MMIO(0xA0AC) #define GEN6_RC_SLEEP _MMIO(0xA0B0) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c42a65a93b3a..1ead51754e8f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6604,12 +6604,19 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC_CONTROL, 0); /* 2b: Program RC6 thresholds.*/ - - /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */ - if (IS_SKYLAKE(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10) { + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + } else if (IS_SKYLAKE(dev_priv)) { + /* + * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only + * when CPG is enabled + */ I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); - else + } else { I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); + } + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ for_each_engine(engine, dev_priv, id) From b40c88fda1300b2dd65fcc3377d54b724edf47e6 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Tue, 3 Oct 2017 10:59:48 -0700 Subject: [PATCH 008/260] drm/i915/cnl: Update the DMC version on CNL The latest version of DMC on CNL is 1.06. Update the version so as to load the latest firmware. Release Notes: Version: 1.06 1. DDI and AUX IO related fix. v2: Improve the prefixes in commit message. Add Release Notes directly. (Rodrigo) Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1507053588-677-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index da9de47562b8..3e1f86d0c6cc 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -37,8 +37,8 @@ #define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin" #define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) -#define I915_CSR_CNL "i915/cnl_dmc_ver1_04.bin" -#define CNL_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) +#define I915_CSR_CNL "i915/cnl_dmc_ver1_06.bin" +#define CNL_CSR_VERSION_REQUIRED CSR_VERSION(1, 6) #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" MODULE_FIRMWARE(I915_CSR_KBL); From 2b58417ffbca9fafa1d54b9f1272965f98f456ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Oct 2017 12:52:07 +0300 Subject: [PATCH 009/260] drm/i915: Clean up some cdclk switch statements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Redo some switch statements in the cdclk code to use a common fall through for the default case. Makes everything look a bit more uniform Cc: Mika Kahola Cc: Manasi Navare Cc: Rodrigo Vivi Signed-off-by: Ville Syrjälä Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20171024095216.1638-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 68 +++++++++++++++--------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index b2a6d62b71c0..4bffd31a8924 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -681,6 +681,13 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, val &= ~LCPLL_CLK_FREQ_MASK; switch (cdclk) { + default: + MISSING_CASE(cdclk); + /* fall through */ + case 337500: + val |= LCPLL_CLK_FREQ_337_5_BDW; + data = 2; + break; case 450000: val |= LCPLL_CLK_FREQ_450; data = 0; @@ -689,17 +696,10 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, val |= LCPLL_CLK_FREQ_54O_BDW; data = 1; break; - case 337500: - val |= LCPLL_CLK_FREQ_337_5_BDW; - data = 2; - break; case 675000: val |= LCPLL_CLK_FREQ_675_BDW; data = 3; break; - default: - WARN(1, "invalid cdclk frequency\n"); - return; } I915_WRITE(LCPLL_CTL, val); @@ -926,8 +926,6 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, u32 freq_select, pcu_ack; int ret; - WARN_ON((cdclk == 24000) != (vco == 0)); - mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, @@ -942,6 +940,15 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, /* set CDCLK_CTL */ switch (cdclk) { + default: + WARN_ON(cdclk != dev_priv->cdclk.hw.ref); + WARN_ON(vco != 0); + /* fall through */ + case 308571: + case 337500: + freq_select = CDCLK_FREQ_337_308; + pcu_ack = 0; + break; case 450000: case 432000: freq_select = CDCLK_FREQ_450_432; @@ -951,12 +958,6 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, freq_select = CDCLK_FREQ_540; pcu_ack = 2; break; - case 308571: - case 337500: - default: - freq_select = CDCLK_FREQ_337_308; - pcu_ack = 0; - break; case 617143: case 675000: freq_select = CDCLK_FREQ_675_617; @@ -1110,6 +1111,7 @@ static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) switch (cdclk) { default: MISSING_CASE(cdclk); + /* fall through */ case 144000: case 288000: case 384000: @@ -1134,6 +1136,7 @@ static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) switch (cdclk) { default: MISSING_CASE(cdclk); + /* fall through */ case 79200: case 158400: case 316800: @@ -1246,24 +1249,22 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, /* cdclk = vco / 2 / div{1,1.5,2,4} */ switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - case 8: - divider = BXT_CDCLK_CD2X_DIV_SEL_4; - break; - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; + default: + WARN_ON(cdclk != dev_priv->cdclk.hw.ref); + WARN_ON(vco != 0); + /* fall through */ + case 2: + divider = BXT_CDCLK_CD2X_DIV_SEL_1; break; case 3: WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; break; - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; + case 4: + divider = BXT_CDCLK_CD2X_DIV_SEL_2; break; - default: - WARN_ON(cdclk != dev_priv->cdclk.hw.ref); - WARN_ON(vco != 0); - - divider = BXT_CDCLK_CD2X_DIV_SEL_1; + case 8: + divider = BXT_CDCLK_CD2X_DIV_SEL_4; break; } @@ -1532,18 +1533,16 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, /* cdclk = vco / 2 / div{1,2} */ switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; - break; - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; default: WARN_ON(cdclk != dev_priv->cdclk.hw.ref); WARN_ON(vco != 0); - + /* fall through */ + case 2: divider = BXT_CDCLK_CD2X_DIV_SEL_1; break; + case 4: + divider = BXT_CDCLK_CD2X_DIV_SEL_2; + break; } switch (cdclk) { @@ -1592,6 +1591,7 @@ static int cnl_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) switch (cdclk) { default: MISSING_CASE(cdclk); + /* fall through */ case 168000: case 336000: ratio = dev_priv->cdclk.hw.ref == 19200 ? 35 : 28; From 64600bd5b8280208dfbd33bcd7a5f32255058f07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Oct 2017 12:52:08 +0300 Subject: [PATCH 010/260] drm/i915: Start tracking voltage level in the cdclk state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For CNL we'll need to start considering the port clocks when we select the voltage level for the system agent. To that end start tracking the voltage in the cdclk state (since that already has to adjust it). v2: s/voltage/voltage_level/ (Rodrigo) Cc: Mika Kahola Cc: Manasi Navare Cc: Rodrigo Vivi Signed-off-by: Ville Syrjälä Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171024095216.1638-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_cdclk.c | 31 +++++++++++++++++++------ drivers/gpu/drm/i915/intel_display.c | 8 +++---- drivers/gpu/drm/i915/intel_drv.h | 4 +++- drivers/gpu/drm/i915/intel_runtime_pm.c | 3 ++- 5 files changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 54b5d4c582b6..2c64c6680ac7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2228,6 +2228,7 @@ struct i915_oa_ops { struct intel_cdclk_state { unsigned int cdclk, vco, ref; + u8 voltage_level; }; struct drm_i915_private { diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 4bffd31a8924..6dad2efd5e27 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1690,17 +1690,34 @@ void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) } /** - * intel_cdclk_state_compare - Determine if two CDCLK states differ + * intel_cdclk_needs_modeset - Determine if two CDCLK states require a modeset on all pipes * @a: first CDCLK state * @b: second CDCLK state * * Returns: - * True if the CDCLK states are identical, false if they differ. + * True if the CDCLK states require pipes to be off during reprogramming, false if not. */ -bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, +bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a, const struct intel_cdclk_state *b) { - return memcmp(a, b, sizeof(*a)) == 0; + return a->cdclk != b->cdclk || + a->vco != b->vco || + a->ref != b->ref; +} + +/** + * intel_cdclk_changed - Determine if two CDCLK states are different + * @a: first CDCLK state + * @b: second CDCLK state + * + * Returns: + * True if the CDCLK states don't match, false if they do. + */ +bool intel_cdclk_changed(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) +{ + return intel_cdclk_needs_modeset(a, b) || + a->voltage_level != b->voltage_level; } /** @@ -1714,15 +1731,15 @@ bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, void intel_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state) { - if (intel_cdclk_state_compare(&dev_priv->cdclk.hw, cdclk_state)) + if (!intel_cdclk_changed(&dev_priv->cdclk.hw, cdclk_state)) return; if (WARN_ON_ONCE(!dev_priv->display.set_cdclk)) return; - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz, VCO %d kHz, ref %d kHz\n", + DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz, VCO %d kHz, ref %d kHz, voltage_level %d\n", cdclk_state->cdclk, cdclk_state->vco, - cdclk_state->ref); + cdclk_state->ref, cdclk_state->voltage_level); dev_priv->display.set_cdclk(dev_priv, cdclk_state); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e2ac976844d8..1d2031a5f4bb 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11931,16 +11931,16 @@ static int intel_modeset_checks(struct drm_atomic_state *state) * holding all the crtc locks, even if we don't end up * touching the hardware */ - if (!intel_cdclk_state_compare(&dev_priv->cdclk.logical, - &intel_state->cdclk.logical)) { + if (intel_cdclk_changed(&dev_priv->cdclk.logical, + &intel_state->cdclk.logical)) { ret = intel_lock_all_pipes(state); if (ret < 0) return ret; } /* All pipes must be switched off while we change the cdclk. */ - if (!intel_cdclk_state_compare(&dev_priv->cdclk.actual, - &intel_state->cdclk.actual)) { + if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, + &intel_state->cdclk.actual)) { ret = intel_modeset_all_pipes(state); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 47d022d48718..a551aadb157b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1323,8 +1323,10 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); void intel_update_max_cdclk(struct drm_i915_private *dev_priv); void intel_update_cdclk(struct drm_i915_private *dev_priv); void intel_update_rawclk(struct drm_i915_private *dev_priv); -bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, +bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a, const struct intel_cdclk_state *b); +bool intel_cdclk_changed(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b); void intel_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8af286c63d3b..8315499452dc 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -705,7 +705,8 @@ static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); dev_priv->display.get_cdclk(dev_priv, &cdclk_state); - WARN_ON(!intel_cdclk_state_compare(&dev_priv->cdclk.hw, &cdclk_state)); + /* Can't read out voltage_level so can't use intel_cdclk_changed() */ + WARN_ON(intel_cdclk_needs_modeset(&dev_priv->cdclk.hw, &cdclk_state)); gen9_assert_dbuf_enabled(dev_priv); From 999c5766f39e3a21aeeb40b5f00ee8d3b48c4331 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Oct 2017 12:52:09 +0300 Subject: [PATCH 011/260] drm/i915: Use cdclk_state->voltage on VLV/CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Store the punit DSPFREQUAR value into cdclk_state->voltage on VLV/CHV. Since we can actually read that out from the hardware this can give us a bit more cross checking between the hardware and software state. v2: Don't break waiting for cdclk change on VLV/CHV v3: Split out the cdclk sanity check in vlv_set_cdclk() (Rodrigo) v4: s/voltage/voltage_level/ (Rodrigo) Cc: Mika Kahola Cc: Manasi Navare Cc: Rodrigo Vivi Signed-off-by: Ville Syrjälä Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171024095216.1638-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 54 +++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 6dad2efd5e27..8bc093082511 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -437,13 +437,45 @@ static int vlv_calc_cdclk(struct drm_i915_private *dev_priv, int min_cdclk) return 200000; } +static u8 vlv_calc_voltage_level(struct drm_i915_private *dev_priv, int cdclk) +{ + if (IS_VALLEYVIEW(dev_priv)) { + if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ + return 2; + else if (cdclk >= 266667) + return 1; + else + return 0; + } else { + /* + * Specs are full of misinformation, but testing on actual + * hardware has shown that we just need to write the desired + * CCK divider into the Punit register. + */ + return DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; + } +} + static void vlv_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { + u32 val; + cdclk_state->vco = vlv_get_hpll_vco(dev_priv); cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, cdclk_state->vco); + + mutex_lock(&dev_priv->pcu_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + mutex_unlock(&dev_priv->pcu_lock); + + if (IS_VALLEYVIEW(dev_priv)) + cdclk_state->voltage_level = (val & DSPFREQGUAR_MASK) >> + DSPFREQGUAR_SHIFT; + else + cdclk_state->voltage_level = (val & DSPFREQGUAR_MASK_CHV) >> + DSPFREQGUAR_SHIFT_CHV; } static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) @@ -486,7 +518,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state) { int cdclk = cdclk_state->cdclk; - u32 val, cmd; + u32 val, cmd = cdclk_state->voltage_level; /* There are cases where we can end up here with power domains * off and a CDCLK frequency other than the minimum, like when @@ -496,13 +528,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, */ intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ - cmd = 2; - else if (cdclk == 266667) - cmd = 1; - else - cmd = 0; - mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); val &= ~DSPFREQGUAR_MASK; @@ -562,7 +587,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state) { int cdclk = cdclk_state->cdclk; - u32 val, cmd; + u32 val, cmd = cdclk_state->voltage_level; switch (cdclk) { case 333333: @@ -583,13 +608,6 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, */ intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - /* - * Specs are full of misinformation, but testing on actual - * hardware has shown that we just need to write the desired - * CCK divider into the Punit register. - */ - cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); val &= ~DSPFREQGUAR_MASK_CHV; @@ -1859,11 +1877,15 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) cdclk = vlv_calc_cdclk(dev_priv, min_cdclk); intel_state->cdclk.logical.cdclk = cdclk; + intel_state->cdclk.logical.voltage_level = + vlv_calc_voltage_level(dev_priv, cdclk); if (!intel_state->active_crtcs) { cdclk = vlv_calc_cdclk(dev_priv, 0); intel_state->cdclk.actual.cdclk = cdclk; + intel_state->cdclk.actual.voltage_level = + vlv_calc_voltage_level(dev_priv, cdclk); } else { intel_state->cdclk.actual = intel_state->cdclk.logical; From d7ffaeef96f39870fec47f79b9a81a501737b01c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Oct 2017 12:52:10 +0300 Subject: [PATCH 012/260] drm/i915: Use cdclk_state->voltage on BDW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track the system agent voltage we request from pcode in the cdclk state on BDW. Annoyingly we can't actually read out the current value since there's no pcode command to do that, so we'll have to just assume that it worked. v2: Keep the WARN_ON (Rodrigo) v3: s/voltage/voltage_level/ (Rodrigo) Cc: Mika Kahola Cc: Manasi Navare Cc: Rodrigo Vivi Signed-off-by: Ville Syrjälä Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171024095216.1638-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 35 +++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 8bc093082511..c98fec333dd1 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -639,6 +639,21 @@ static int bdw_calc_cdclk(int min_cdclk) return 337500; } +static u8 bdw_calc_voltage_level(int cdclk) +{ + switch (cdclk) { + default: + case 337500: + return 2; + case 450000: + return 0; + case 540000: + return 1; + case 675000: + return 3; + } +} + static void bdw_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { @@ -657,13 +672,20 @@ static void bdw_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = 337500; else cdclk_state->cdclk = 675000; + + /* + * Can't read this out :( Let's assume it's + * at least what the CDCLK frequency requires. + */ + cdclk_state->voltage_level = + bdw_calc_voltage_level(cdclk_state->cdclk); } static void bdw_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state) { int cdclk = cdclk_state->cdclk; - uint32_t val, data; + uint32_t val; int ret; if (WARN((I915_READ(LCPLL_CTL) & @@ -704,19 +726,15 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, /* fall through */ case 337500: val |= LCPLL_CLK_FREQ_337_5_BDW; - data = 2; break; case 450000: val |= LCPLL_CLK_FREQ_450; - data = 0; break; case 540000: val |= LCPLL_CLK_FREQ_54O_BDW; - data = 1; break; case 675000: val |= LCPLL_CLK_FREQ_675_BDW; - data = 3; break; } @@ -731,7 +749,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, DRM_ERROR("Switching back to LCPLL failed\n"); mutex_lock(&dev_priv->pcu_lock); - sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); + sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + cdclk_state->voltage_level); mutex_unlock(&dev_priv->pcu_lock); I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); @@ -1910,11 +1929,15 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) cdclk = bdw_calc_cdclk(min_cdclk); intel_state->cdclk.logical.cdclk = cdclk; + intel_state->cdclk.logical.voltage_level = + bdw_calc_voltage_level(cdclk); if (!intel_state->active_crtcs) { cdclk = bdw_calc_cdclk(0); intel_state->cdclk.actual.cdclk = cdclk; + intel_state->cdclk.actual.voltage_level = + bdw_calc_voltage_level(cdclk); } else { intel_state->cdclk.actual = intel_state->cdclk.logical; From 2aa97491da8aa237e792079541ee2fa57ae9d960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Oct 2017 12:52:11 +0300 Subject: [PATCH 013/260] drm/i915: Use cdclk_state->voltage on SKL/KBL/CFL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track the system agent voltage we request from pcode in the cdclk state on SKL/KBL/CFL. Annoyingly we can't actually read out the current value since there's no pcode command to do that, so we'll have to just assume that it worked. v2: s/voltage/voltage_level/ (Rodrigo) Cc: Mika Kahola Cc: Manasi Navare Cc: Rodrigo Vivi Signed-off-by: Ville Syrjälä Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171024095216.1638-6-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 43 +++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index c98fec333dd1..38e31df265bd 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -785,6 +785,24 @@ static int skl_calc_cdclk(int min_cdclk, int vco) } } +static u8 skl_calc_voltage_level(int cdclk) +{ + switch (cdclk) { + default: + case 308571: + case 337500: + return 0; + case 450000: + case 432000: + return 1; + case 540000: + return 2; + case 617143: + case 675000: + return 3; + } +} + static void skl_dpll0_update(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { @@ -835,7 +853,7 @@ static void skl_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = cdclk_state->ref; if (cdclk_state->vco == 0) - return; + goto out; cdctl = I915_READ(CDCLK_CTL); @@ -876,6 +894,14 @@ static void skl_get_cdclk(struct drm_i915_private *dev_priv, break; } } + + out: + /* + * Can't read this out :( Let's assume it's + * at least what the CDCLK frequency requires. + */ + cdclk_state->voltage_level = + skl_calc_voltage_level(cdclk_state->cdclk); } /* convert from kHz to .1 fixpoint MHz with -1MHz offset */ @@ -960,7 +986,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_state->cdclk; int vco = cdclk_state->vco; - u32 freq_select, pcu_ack; + u32 freq_select; int ret; mutex_lock(&dev_priv->pcu_lock); @@ -984,21 +1010,17 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, case 308571: case 337500: freq_select = CDCLK_FREQ_337_308; - pcu_ack = 0; break; case 450000: case 432000: freq_select = CDCLK_FREQ_450_432; - pcu_ack = 1; break; case 540000: freq_select = CDCLK_FREQ_540; - pcu_ack = 2; break; case 617143: case 675000: freq_select = CDCLK_FREQ_675_617; - pcu_ack = 3; break; } @@ -1014,7 +1036,8 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, /* inform PCU of the change */ mutex_lock(&dev_priv->pcu_lock); - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); + sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, + cdclk_state->voltage_level); mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); @@ -1093,6 +1116,7 @@ void skl_init_cdclk(struct drm_i915_private *dev_priv) if (cdclk_state.vco == 0) cdclk_state.vco = 8100000; cdclk_state.cdclk = skl_calc_cdclk(0, cdclk_state.vco); + cdclk_state.voltage_level = skl_calc_voltage_level(cdclk_state.cdclk); skl_set_cdclk(dev_priv, &cdclk_state); } @@ -1110,6 +1134,7 @@ void skl_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = cdclk_state.ref; cdclk_state.vco = 0; + cdclk_state.voltage_level = skl_calc_voltage_level(cdclk_state.cdclk); skl_set_cdclk(dev_priv, &cdclk_state); } @@ -1968,12 +1993,16 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) intel_state->cdclk.logical.vco = vco; intel_state->cdclk.logical.cdclk = cdclk; + intel_state->cdclk.logical.voltage_level = + skl_calc_voltage_level(cdclk); if (!intel_state->active_crtcs) { cdclk = skl_calc_cdclk(0, vco); intel_state->cdclk.actual.vco = vco; intel_state->cdclk.actual.cdclk = cdclk; + intel_state->cdclk.actual.voltage_level = + skl_calc_voltage_level(cdclk); } else { intel_state->cdclk.actual = intel_state->cdclk.logical; From 2123f442ca42e9dce2fcb51e8d4fd8180339b4f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Oct 2017 12:52:12 +0300 Subject: [PATCH 014/260] drm/i915: Use cdclk_state->voltage on BXT/GLK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track the system agent voltage we request from pcode in the cdclk state on BXT/GLK. Annoyingly we can't actually read out the current value since there's no pcode command to do that, so we'll have to just assume that it worked. v2: s/voltage/voltage_level/ (Rodrigo) Cc: Mika Kahola Cc: Manasi Navare Cc: Rodrigo Vivi Signed-off-by: Ville Syrjälä Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171024095216.1638-7-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 38e31df265bd..a40dc66dc773 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1163,6 +1163,11 @@ static int glk_calc_cdclk(int min_cdclk) return 79200; } +static u8 bxt_calc_voltage_level(int cdclk) +{ + return DIV_ROUND_UP(cdclk, 25000); +} + static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { int ratio; @@ -1239,7 +1244,7 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = cdclk_state->ref; if (cdclk_state->vco == 0) - return; + goto out; divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; @@ -1263,6 +1268,14 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, } cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); + + out: + /* + * Can't read this out :( Let's assume it's + * at least what the CDCLK frequency requires. + */ + cdclk_state->voltage_level = + bxt_calc_voltage_level(cdclk_state->cdclk); } static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) @@ -1365,7 +1378,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - DIV_ROUND_UP(cdclk, 25000)); + cdclk_state->voltage_level); mutex_unlock(&dev_priv->pcu_lock); if (ret) { @@ -1457,6 +1470,7 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = bxt_calc_cdclk(0); cdclk_state.vco = bxt_de_pll_vco(dev_priv, cdclk_state.cdclk); } + cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); bxt_set_cdclk(dev_priv, &cdclk_state); } @@ -1474,6 +1488,7 @@ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = cdclk_state.ref; cdclk_state.vco = 0; + cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); bxt_set_cdclk(dev_priv, &cdclk_state); } @@ -2031,6 +2046,8 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) intel_state->cdclk.logical.vco = vco; intel_state->cdclk.logical.cdclk = cdclk; + intel_state->cdclk.logical.voltage_level = + bxt_calc_voltage_level(cdclk); if (!intel_state->active_crtcs) { if (IS_GEMINILAKE(dev_priv)) { @@ -2043,6 +2060,8 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) intel_state->cdclk.actual.vco = vco; intel_state->cdclk.actual.cdclk = cdclk; + intel_state->cdclk.actual.voltage_level = + bxt_calc_voltage_level(cdclk); } else { intel_state->cdclk.actual = intel_state->cdclk.logical; From 48469eced282155608a80a37626d28a0abd3c2e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Oct 2017 12:52:13 +0300 Subject: [PATCH 015/260] drm/i915: Use cdclk_state->voltage on CNL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track the system agent voltage we request from pcode in the cdclk state on CNL. Annoyingly we can't actually read out the current value since there's no pcode command to do that, so we'll have to just assume that it worked. v2: s/voltage/voltage_level/ (Rodrigo) Cc: Mika Kahola Cc: Manasi Navare Cc: Rodrigo Vivi Signed-off-by: Ville Syrjälä Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171024095216.1638-8-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 47 ++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index a40dc66dc773..210e79193fe6 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1503,6 +1503,19 @@ static int cnl_calc_cdclk(int min_cdclk) return 168000; } +static u8 cnl_calc_voltage_level(int cdclk) +{ + switch (cdclk) { + default: + case 168000: + return 0; + case 336000: + return 1; + case 528000: + return 2; + } +} + static void cnl_cdclk_pll_update(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { @@ -1536,7 +1549,7 @@ static void cnl_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = cdclk_state->ref; if (cdclk_state->vco == 0) - return; + goto out; divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; @@ -1553,6 +1566,14 @@ static void cnl_get_cdclk(struct drm_i915_private *dev_priv, } cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); + + out: + /* + * Can't read this out :( Let's assume it's + * at least what the CDCLK frequency requires. + */ + cdclk_state->voltage_level = + cnl_calc_voltage_level(cdclk_state->cdclk); } static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) @@ -1593,7 +1614,7 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_state->cdclk; int vco = cdclk_state->vco; - u32 val, divider, pcu_ack; + u32 val, divider; int ret; mutex_lock(&dev_priv->pcu_lock); @@ -1622,19 +1643,6 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, break; } - switch (cdclk) { - case 528000: - pcu_ack = 2; - break; - case 336000: - pcu_ack = 1; - break; - case 168000: - default: - pcu_ack = 0; - break; - } - if (dev_priv->cdclk.hw.vco != 0 && dev_priv->cdclk.hw.vco != vco) cnl_cdclk_pll_disable(dev_priv); @@ -1652,7 +1660,8 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, /* inform PCU of the change */ mutex_lock(&dev_priv->pcu_lock); - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); + sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, + cdclk_state->voltage_level); mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); @@ -1745,6 +1754,7 @@ void cnl_init_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = cnl_calc_cdclk(0); cdclk_state.vco = cnl_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); + cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); cnl_set_cdclk(dev_priv, &cdclk_state); } @@ -1762,6 +1772,7 @@ void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = cdclk_state.ref; cdclk_state.vco = 0; + cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); cnl_set_cdclk(dev_priv, &cdclk_state); } @@ -2085,6 +2096,8 @@ static int cnl_modeset_calc_cdclk(struct drm_atomic_state *state) intel_state->cdclk.logical.vco = vco; intel_state->cdclk.logical.cdclk = cdclk; + intel_state->cdclk.logical.voltage_level = + cnl_calc_voltage_level(cdclk); if (!intel_state->active_crtcs) { cdclk = cnl_calc_cdclk(0); @@ -2092,6 +2105,8 @@ static int cnl_modeset_calc_cdclk(struct drm_atomic_state *state) intel_state->cdclk.actual.vco = vco; intel_state->cdclk.actual.cdclk = cdclk; + intel_state->cdclk.actual.voltage_level = + cnl_calc_voltage_level(cdclk); } else { intel_state->cdclk.actual = intel_state->cdclk.logical; From 53e9bf5e8159765e0dc807567180afd0b389f149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Oct 2017 12:52:14 +0300 Subject: [PATCH 016/260] drm/i915: Adjust system agent voltage on CNL if required by DDI ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On CNL we may need to bump up the system agent voltage not only due to CDCLK but also when driving DDI port with a sufficiently high clock. To that end start tracking the minimum acceptable voltage for each crtc. We do the tracking via crtcs because we don't have any kind of encoder state. Also there's no downside to doing it this way, and it matches how we track cdclk requirements on account of pixel rate. v2: Allow disabled crtcs to use the min voltage Add IS_CNL check to intel_ddi_compute_min_voltage() since we're using CNL specific values there s/intel_compute_min_voltage/cnl_compute_min_voltage/ since the function makes hw specific assumptions about the voltage values v3: Drop the test hack leftovers from skl_modeset_calc_cdclk() v4: s/voltage/voltage_level/ (Rodrigo) Replace DPLL DVFS FIXMEs with an explanation why we don't do anything there (Rodrigo) Cc: Mika Kahola Cc: Manasi Navare Cc: Rodrigo Vivi Signed-off-by: Ville Syrjälä Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171024095216.1638-9-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_cdclk.c | 46 ++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_ddi.c | 11 +++++++ drivers/gpu/drm/i915/intel_display.c | 11 +++++++ drivers/gpu/drm/i915/intel_dp_mst.c | 5 +++ drivers/gpu/drm/i915/intel_dpll_mgr.c | 16 +++++----- drivers/gpu/drm/i915/intel_drv.h | 7 ++++ 7 files changed, 89 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2c64c6680ac7..366ba74b0ad2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2416,6 +2416,8 @@ struct drm_i915_private { unsigned int active_crtcs; /* minimum acceptable cdclk for each pipe */ int min_cdclk[I915_MAX_PIPES]; + /* minimum acceptable voltage level for each pipe */ + u8 min_voltage_level[I915_MAX_PIPES]; int dpio_phy_iosf_port[I915_NUM_PHYS_VLV]; diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 210e79193fe6..4ca4a34b7bfa 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1665,6 +1665,12 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); + + /* + * Can't read out the voltage level :( + * Let's just assume everything is as expected. + */ + dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level; } static int cnl_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) @@ -1934,6 +1940,43 @@ static int intel_compute_min_cdclk(struct drm_atomic_state *state) return min_cdclk; } +/* + * Note that this functions assumes that 0 is + * the lowest voltage value, and higher values + * correspond to increasingly higher voltages. + * + * Should that relationship no longer hold on + * future platforms this code will need to be + * adjusted. + */ +static u8 cnl_compute_min_voltage_level(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; + struct intel_crtc_state *crtc_state; + u8 min_voltage_level; + int i; + enum pipe pipe; + + memcpy(state->min_voltage_level, dev_priv->min_voltage_level, + sizeof(state->min_voltage_level)); + + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + if (crtc_state->base.enable) + state->min_voltage_level[i] = + crtc_state->min_voltage_level; + else + state->min_voltage_level[i] = 0; + } + + min_voltage_level = 0; + for_each_pipe(dev_priv, pipe) + min_voltage_level = max(state->min_voltage_level[pipe], + min_voltage_level); + + return min_voltage_level; +} + static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); @@ -2097,7 +2140,8 @@ static int cnl_modeset_calc_cdclk(struct drm_atomic_state *state) intel_state->cdclk.logical.vco = vco; intel_state->cdclk.logical.cdclk = cdclk; intel_state->cdclk.logical.voltage_level = - cnl_calc_voltage_level(cdclk); + max(cnl_calc_voltage_level(cdclk), + cnl_compute_min_voltage_level(intel_state)); if (!intel_state->active_crtcs) { cdclk = cnl_calc_cdclk(0); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 08f04df08fa5..28c25cb9eb2c 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2542,6 +2542,13 @@ bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, return false; } +void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, + struct intel_crtc_state *crtc_state) +{ + if (IS_CANNONLAKE(dev_priv) && crtc_state->port_clock > 594000) + crtc_state->min_voltage_level = 2; +} + void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { @@ -2641,6 +2648,8 @@ void intel_ddi_get_config(struct intel_encoder *encoder, if (IS_GEN9_LP(dev_priv)) pipe_config->lane_lat_optim_mask = bxt_ddi_phy_get_lane_lat_optim_mask(encoder); + + intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); } static bool intel_ddi_compute_config(struct intel_encoder *encoder, @@ -2667,6 +2676,8 @@ static bool intel_ddi_compute_config(struct intel_encoder *encoder, bxt_ddi_phy_calc_lane_lat_optim_mask(encoder, pipe_config->lane_count); + intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); + return ret; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1d2031a5f4bb..b5fa643e1812 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5938,6 +5938,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe); dev_priv->min_cdclk[intel_crtc->pipe] = 0; + dev_priv->min_voltage_level[intel_crtc->pipe] = 0; } /* @@ -11289,6 +11290,8 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_CLOCK_FUZZY(base.adjusted_mode.crtc_clock); PIPE_CONF_CHECK_CLOCK_FUZZY(port_clock); + PIPE_CONF_CHECK_I(min_voltage_level); + #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I #undef PIPE_CONF_CHECK_P @@ -11949,6 +11952,9 @@ static int intel_modeset_checks(struct drm_atomic_state *state) DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", intel_state->cdclk.logical.cdclk, intel_state->cdclk.actual.cdclk); + DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n", + intel_state->cdclk.logical.voltage_level, + intel_state->cdclk.actual.voltage_level); } else { to_intel_atomic_state(state)->cdclk.logical = dev_priv->cdclk.logical; } @@ -12517,6 +12523,9 @@ static int intel_atomic_commit(struct drm_device *dev, if (intel_state->modeset) { memcpy(dev_priv->min_cdclk, intel_state->min_cdclk, sizeof(intel_state->min_cdclk)); + memcpy(dev_priv->min_voltage_level, + intel_state->min_voltage_level, + sizeof(intel_state->min_voltage_level)); dev_priv->active_crtcs = intel_state->active_crtcs; dev_priv->cdclk.logical = intel_state->cdclk.logical; dev_priv->cdclk.actual = intel_state->cdclk.actual; @@ -15027,6 +15036,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) } dev_priv->min_cdclk[crtc->pipe] = min_cdclk; + dev_priv->min_voltage_level[crtc->pipe] = + crtc_state->min_voltage_level; intel_pipe_config_sanity_check(dev_priv, crtc_state); } diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 772521440a9f..3d62c63c0763 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -34,6 +34,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; @@ -87,6 +88,8 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->dp_m_n.tu = slots; + intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); + return true; } @@ -307,6 +310,8 @@ static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder, intel_dp_get_m_n(crtc, pipe_config); intel_ddi_clock_get(&intel_dig_port->base, pipe_config); + + intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); } static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index df808a94c511..897fffe1ecd8 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2008,8 +2008,8 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, * requirement, follow the Display Voltage Frequency Switching * Sequence Before Frequency Change * - * FIXME: (DVFS) is used to adjust the display voltage to match the - * display clock frequencies + * Note: DVFS is actually handled via the cdclk code paths, + * hence we do nothing here. */ /* 6. Enable DPLL in DPLL_ENABLE. */ @@ -2030,8 +2030,8 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, * requirement, follow the Display Voltage Frequency Switching * Sequence After Frequency Change * - * FIXME: (DVFS) is used to adjust the display voltage to match the - * display clock frequencies + * Note: DVFS is actually handled via the cdclk code paths, + * hence we do nothing here. */ /* @@ -2055,8 +2055,8 @@ static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, * requirement, follow the Display Voltage Frequency Switching * Sequence Before Frequency Change * - * FIXME: (DVFS) is used to adjust the display voltage to match the - * display clock frequencies + * Note: DVFS is actually handled via the cdclk code paths, + * hence we do nothing here. */ /* 3. Disable DPLL through DPLL_ENABLE. */ @@ -2077,8 +2077,8 @@ static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, * requirement, follow the Display Voltage Frequency Switching * Sequence After Frequency Change * - * FIXME: (DVFS) is used to adjust the display voltage to match the - * display clock frequencies + * Note: DVFS is actually handled via the cdclk code paths, + * hence we do nothing here. */ /* 6. Disable DPLL power in DPLL_ENABLE. */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a551aadb157b..3de8d98baed7 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -386,6 +386,8 @@ struct intel_atomic_state { unsigned int active_crtcs; /* minimum acceptable cdclk for each pipe */ int min_cdclk[I915_MAX_PIPES]; + /* minimum acceptable voltage level for each pipe */ + u8 min_voltage_level[I915_MAX_PIPES]; struct intel_shared_dpll_state shared_dpll[I915_NUM_PLLS]; @@ -739,6 +741,9 @@ struct intel_crtc_state { */ uint8_t lane_lat_optim_mask; + /* minimum acceptable voltage level */ + u8 min_voltage_level; + /* Panel fitter controls for gen2-gen4 + VLV */ struct { u32 control; @@ -1293,6 +1298,8 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, bool state); +void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, + struct intel_crtc_state *crtc_state); u32 bxt_signal_levels(struct intel_dp *intel_dp); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); From 0c9f353f014e6d88a5af8b305503a5396fe63ff8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Oct 2017 12:52:15 +0300 Subject: [PATCH 017/260] drm/i915: Sanity check cdclk in vlv_set_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chv_set_cdclk() sanity checks that the cdclk frequency is one of the legal values. Do the same in the VLV function. Cc: Mika Kahola Cc: Manasi Navare Cc: Rodrigo Vivi Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171024095216.1638-10-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_cdclk.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 4ca4a34b7bfa..fedfe3c720b6 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -520,6 +520,18 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, int cdclk = cdclk_state->cdclk; u32 val, cmd = cdclk_state->voltage_level; + switch (cdclk) { + case 400000: + case 333333: + case 320000: + case 266667: + case 200000: + break; + default: + MISSING_CASE(cdclk); + return; + } + /* There are cases where we can end up here with power domains * off and a CDCLK frequency other than the minimum, like when * issuing a modeset without actually changing any display after From cfddadc98abc85f478c92187c944e0ee963f741b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Oct 2017 12:52:16 +0300 Subject: [PATCH 018/260] drm/i915: Perform a central cdclk state sanity check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WARN if the cdclk state doesn't match what we expect after programming. And let's remove the WARN from bdw_set_cdclk() that's trying to achieve the same thing in a more limite fashion. Also take the opportunity to refactor the code to use a common function for dumping out a cdclk state. Cc: Mika Kahola Cc: Manasi Navare Cc: Rodrigo Vivi Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171024095216.1638-11-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_cdclk.c | 30 ++++++++++++++++++---------- drivers/gpu/drm/i915/intel_display.c | 3 +++ drivers/gpu/drm/i915/intel_drv.h | 2 ++ 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index fedfe3c720b6..51cd23dd8676 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -768,10 +768,6 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); intel_update_cdclk(dev_priv); - - WARN(cdclk != dev_priv->cdclk.hw.cdclk, - "cdclk requested %d kHz but got %d kHz\n", - cdclk, dev_priv->cdclk.hw.cdclk); } static int skl_calc_cdclk(int min_cdclk, int vco) @@ -1068,6 +1064,8 @@ static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) goto sanitize; intel_update_cdclk(dev_priv); + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); + /* Is PLL enabled and locked ? */ if (dev_priv->cdclk.hw.vco == 0 || dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) @@ -1407,6 +1405,7 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) u32 cdctl, expected; intel_update_cdclk(dev_priv); + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); if (dev_priv->cdclk.hw.vco == 0 || dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) @@ -1713,6 +1712,7 @@ static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) u32 cdctl, expected; intel_update_cdclk(dev_priv); + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); if (dev_priv->cdclk.hw.vco == 0 || dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) @@ -1826,6 +1826,14 @@ bool intel_cdclk_changed(const struct intel_cdclk_state *a, a->voltage_level != b->voltage_level; } +void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state, + const char *context) +{ + DRM_DEBUG_DRIVER("%s %d kHz, VCO %d kHz, ref %d kHz, voltage level %d\n", + context, cdclk_state->cdclk, cdclk_state->vco, + cdclk_state->ref, cdclk_state->voltage_level); +} + /** * intel_set_cdclk - Push the CDCLK state to the hardware * @dev_priv: i915 device @@ -1843,11 +1851,15 @@ void intel_set_cdclk(struct drm_i915_private *dev_priv, if (WARN_ON_ONCE(!dev_priv->display.set_cdclk)) return; - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz, VCO %d kHz, ref %d kHz, voltage_level %d\n", - cdclk_state->cdclk, cdclk_state->vco, - cdclk_state->ref, cdclk_state->voltage_level); + intel_dump_cdclk_state(cdclk_state, "Changing CDCLK to"); dev_priv->display.set_cdclk(dev_priv, cdclk_state); + + if (WARN(intel_cdclk_changed(&dev_priv->cdclk.hw, cdclk_state), + "cdclk state doesn't match!\n")) { + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "[hw state]"); + intel_dump_cdclk_state(cdclk_state, "[sw state]"); + } } static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv, @@ -2280,10 +2292,6 @@ void intel_update_cdclk(struct drm_i915_private *dev_priv) { dev_priv->display.get_cdclk(dev_priv, &dev_priv->cdclk.hw); - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", - dev_priv->cdclk.hw.cdclk, dev_priv->cdclk.hw.vco, - dev_priv->cdclk.hw.ref); - /* * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): * Programmng [sic] note: bit[9:2] should be programmed to the number diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b5fa643e1812..7d7952b78a3b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8857,7 +8857,9 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) } intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + intel_update_cdclk(dev_priv); + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); } /* @@ -14358,6 +14360,7 @@ void intel_modeset_init_hw(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); intel_update_cdclk(dev_priv); + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; intel_init_clock_gating(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3de8d98baed7..3b4eafd39f55 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1336,6 +1336,8 @@ bool intel_cdclk_changed(const struct intel_cdclk_state *a, const struct intel_cdclk_state *b); void intel_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state); +void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state, + const char *context); /* intel_display.c */ void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe); From 753bdbd001dca74f9f4528d7be415801f043748a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Oct 2017 21:50:53 +0100 Subject: [PATCH 019/260] drm/i915: Call cond_resched() before repeating i915_gem_evict_something() Insert a breakpoint, a chance to escape back to the scheduler and run something else for a bit, if we find that the GGTT is full and needs to be idled in order to make some room. In practice, this should only be an issue in stress tests as the wait itself will normally give the chance for the scheduler to intervene and make progress. References: https://bugs.freedesktop.org/show_bug.cgi?id=103438 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171024205053.7845-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_evict.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 8daa8a78cdc0..a6b769994d8d 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -216,6 +216,7 @@ search_again: if (ret) return ret; + cond_resched(); goto search_again; } From b1f9107e1b251b2cbd413062fad3636570d2ea20 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Oct 2017 23:08:54 +0100 Subject: [PATCH 020/260] drm/i915/selftests: Don't try to queue a request with zero delay Instead of trying to create a timer with zero delay (i.e. with expires set to the current jiffies and not the future, an already expired timer), execute that request immediately. v2: Refactor list_del_init+signal into its own little function. v3: Reorder testing so as not to immediately signal a delayed request. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171024220855.30155-1-chris@chris-wilson.co.uk Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/selftests/mock_engine.c | 37 +++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 331c2b09869e..0aafa8a105b8 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -32,6 +32,13 @@ static struct mock_request *first_request(struct mock_engine *engine) link); } +static void advance(struct mock_engine *engine, + struct mock_request *request) +{ + list_del_init(&request->link); + mock_seqno_advance(&engine->base, request->base.global_seqno); +} + static void hw_delay_complete(struct timer_list *t) { struct mock_engine *engine = from_timer(engine, t, hw_delay); @@ -39,15 +46,23 @@ static void hw_delay_complete(struct timer_list *t) spin_lock(&engine->hw_lock); - request = first_request(engine); - if (request) { - list_del_init(&request->link); - mock_seqno_advance(&engine->base, request->base.global_seqno); - } - + /* Timer fired, first request is complete */ request = first_request(engine); if (request) - mod_timer(&engine->hw_delay, jiffies + request->delay); + advance(engine, request); + + /* + * Also immediately signal any subsequent 0-delay requests, but + * requeue the timer for the next delayed request. + */ + while ((request = first_request(engine))) { + if (request->delay) { + mod_timer(&engine->hw_delay, jiffies + request->delay); + break; + } + + advance(engine, request); + } spin_unlock(&engine->hw_lock); } @@ -98,8 +113,12 @@ static void mock_submit_request(struct drm_i915_gem_request *request) spin_lock_irq(&engine->hw_lock); list_add_tail(&mock->link, &engine->hw_queue); - if (mock->link.prev == &engine->hw_queue) - mod_timer(&engine->hw_delay, jiffies + mock->delay); + if (mock->link.prev == &engine->hw_queue) { + if (mock->delay) + mod_timer(&engine->hw_delay, jiffies + mock->delay); + else + advance(engine, mock); + } spin_unlock_irq(&engine->hw_lock); } From 20ccd4d3f689ac14dce8632d76769be0ac952060 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Oct 2017 23:08:55 +0100 Subject: [PATCH 021/260] drm/i915: Use same test for eviction and submitting kernel context During evict, we wish to idle the GPU if we see that the GGTT is full. However, our test for idle in i915_gem_evict_something() and in i915_gem_switch_to_kernel_context() do not match leading to disappointment - we never believe that we are idle and keep trying to flush the GGTT ad infinitum. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103438 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171024220855.30155-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_context.c | 7 +++---- drivers/gpu/drm/i915/i915_gem_evict.c | 3 ++- drivers/gpu/drm/i915/intel_engine_cs.c | 6 ++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 5bf96a258509..4f26f80b1b3e 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -897,7 +897,7 @@ int i915_switch_context(struct drm_i915_gem_request *req) return do_rcs_switch(req); } -static bool engine_has_kernel_context(struct intel_engine_cs *engine) +static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) { struct i915_gem_timeline *timeline; @@ -913,8 +913,7 @@ static bool engine_has_kernel_context(struct intel_engine_cs *engine) return false; } - return (!engine->last_retired_context || - i915_gem_context_is_kernel(engine->last_retired_context)); + return intel_engine_has_kernel_context(engine); } int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) @@ -931,7 +930,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) struct drm_i915_gem_request *req; int ret; - if (engine_has_kernel_context(engine)) + if (engine_has_idle_kernel_context(engine)) continue; req = i915_gem_request_alloc(engine, dev_priv->kernel_context); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index a6b769994d8d..60ca4f05ae94 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -46,7 +46,7 @@ static bool ggtt_is_idle(struct drm_i915_private *i915) return false; for_each_engine(engine, i915, id) { - if (engine->last_retired_context != i915->kernel_context) + if (!intel_engine_has_kernel_context(engine)) return false; } @@ -73,6 +73,7 @@ static int ggtt_flush(struct drm_i915_private *i915) if (err) return err; + GEM_BUG_ON(!ggtt_is_idle(i915)); return 0; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ab5bf4e2e28e..f6cdc50d4237 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1585,6 +1585,12 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) return true; } +bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) +{ + return (!engine->last_retired_context || + i915_gem_context_is_kernel(engine->last_retired_context)); +} + void intel_engines_reset_default_submission(struct drm_i915_private *i915) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 6a42ed618a28..a2589aa89163 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -866,6 +866,8 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); +bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); + void intel_engines_mark_idle(struct drm_i915_private *i915); void intel_engines_reset_default_submission(struct drm_i915_private *i915); From 6d0dbd309687113009a276886628c7c74c848d3c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 08:13:44 -0700 Subject: [PATCH 022/260] drm/i915/selftests: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: David Airlie Cc: Tvrtko Ursulin Cc: Chris Wilson Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Kees Cook Link: https://patchwork.freedesktop.org/patch/msgid/20171024151344.GA104417@beast Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/selftests/lib_sw_fence.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c index 3790fdf44a1a..b26f07b55d86 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -49,9 +49,9 @@ void onstack_fence_fini(struct i915_sw_fence *fence) i915_sw_fence_fini(fence); } -static void timed_fence_wake(unsigned long data) +static void timed_fence_wake(struct timer_list *t) { - struct timed_fence *tf = (struct timed_fence *)data; + struct timed_fence *tf = from_timer(tf, t, timer); i915_sw_fence_commit(&tf->fence); } @@ -60,7 +60,7 @@ void timed_fence_init(struct timed_fence *tf, unsigned long expires) { onstack_fence_init(&tf->fence); - setup_timer_on_stack(&tf->timer, timed_fence_wake, (unsigned long)tf); + timer_setup_on_stack(&tf->timer, timed_fence_wake, 0); if (time_after(expires, jiffies)) mod_timer(&tf->timer, expires); From 02a1ca45cf33d23c7540b572aed5bf624ba968fe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Oct 2017 14:13:36 +0100 Subject: [PATCH 023/260] Revert "drm/i915/selftests: Convert timers to use timer_setup()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 6d0dbd309687113009a276886628c7c74c848d3c. timer_setup_on_stack() does not yet exist: In file included from drivers/gpu/drm/i915/i915_sw_fence.c:517:0: drivers/gpu/drm/i915/selftests/lib_sw_fence.c: In function ‘timed_fence_init’: drivers/gpu/drm/i915/selftests/lib_sw_fence.c:63:2: error: implicit declaration of function ‘timer_setup_on_stack’; did you mean ‘hrtimer_init_on_stack’? [-Werror=implicit-function-declaration] timer_setup_on_stack(&tf->timer, timed_fence_wake, 0); Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171025131336.2584-1-chris@chris-wilson.co.uk Acked-by: Joonas Lahtinen --- drivers/gpu/drm/i915/selftests/lib_sw_fence.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c index b26f07b55d86..3790fdf44a1a 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -49,9 +49,9 @@ void onstack_fence_fini(struct i915_sw_fence *fence) i915_sw_fence_fini(fence); } -static void timed_fence_wake(struct timer_list *t) +static void timed_fence_wake(unsigned long data) { - struct timed_fence *tf = from_timer(tf, t, timer); + struct timed_fence *tf = (struct timed_fence *)data; i915_sw_fence_commit(&tf->fence); } @@ -60,7 +60,7 @@ void timed_fence_init(struct timed_fence *tf, unsigned long expires) { onstack_fence_init(&tf->fence); - timer_setup_on_stack(&tf->timer, timed_fence_wake, 0); + setup_timer_on_stack(&tf->timer, timed_fence_wake, (unsigned long)tf); if (time_after(expires, jiffies)) mod_timer(&tf->timer, expires); From 22a8a4fc93b14b5a8cfc785edbdc6f7bd98bffb6 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Mon, 23 Oct 2017 18:32:09 +0300 Subject: [PATCH 024/260] drm/i915: Disable lazy PPGTT page table optimization for vGPU When running under virtualization (vGPU active), we must disable the lazy PPGTT page table initialization optimization introduced by commit 14826673247e ("drm/i915: Only initialize partially filled pagetables"). We must do this because GVT-g makes unduly assumptions about guest behaviour, which this optimization breaks. This results in following looking errors in the host: ERROR gvt: guest page write error -22, gfn 0x7ada8, pa 0x7ada89a8, var 0x6, len 1 The real fix is to not to depend on i915 driver behaviour, but instead either rely on only the contracts that i915 has with the hardware, or add some paravirtualization. While the real fix is en route, it won't be finished in time for 4.15, so the best option is to disable the optimization for now when vGPU is active to avoid breaking 4.15 guests in existing VM environments. Fixes: 14826673247e ("drm/i915: Only initialize partially filled pagetables") Suggested-by: Xiaolin Zhang Signed-off-by: Xiaolin Zhang [Joonas: Rewrote the commit message and added tags.] Signed-off-by: Joonas Lahtinen Cc: Zhenyu Wang Cc: Zhi Wang Cc: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Rodrigo Vivi Acked-by: Chris Wilson Acked-by: Zhenyu Wang Link: https://patchwork.freedesktop.org/patch/msgid/20171023153209.10527-1-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 527a2d2d6281..5eaa6893daaa 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1341,7 +1341,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, if (IS_ERR(pt)) goto unwind; - if (count < GEN8_PTES) + if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) gen8_initialize_pt(vm, pt); gen8_ppgtt_set_pde(vm, pd, pt, pde); From 43037c86d10cea185f6518f797f6303a06e734f9 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 3 Oct 2017 15:31:42 -0700 Subject: [PATCH 025/260] drm/i915/cnl: Allow 2 pixel per clock on Cannonlake. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is heavily based on a initial patch provided by Ville plus all changes provided later by Ander. As Geminilake, Cannonlake also supports 2 pixels per clock. Different from Geminilake we are not implementing the 99% Wa. But we can revisit that decision later if we find out any limitation on later CNL SKUs. v2: Rebase on top of commit 'd305e0614601 ("drm/i915: Track minimum acceptable cdclk instead of "minimum dotclock")' v3: When fixing HDMI on CNL I noticed that I missed to convert back the doubled pixel rate to cdclk. Cc: Paulo Zanoni Cc: Ville Syrjälä Cc: Dhinakaran Pandiyan Cc: Jani Nikula Signed-off-by: Rodrigo Vivi Reviewed-by: Paulo Zanoni Link: https://patchwork.freedesktop.org/patch/msgid/20171003223142.26264-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 14 ++------------ drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 3 ++- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 51cd23dd8676..e8884c2ade98 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1866,12 +1866,7 @@ static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv, int pixel_rate) { if (INTEL_GEN(dev_priv) >= 10) - /* - * FIXME: Switch to DIV_ROUND_UP(pixel_rate, 2) - * once DDI clock voltage requirements are - * handled correctly. - */ - return pixel_rate; + return DIV_ROUND_UP(pixel_rate, 2); else if (IS_GEMINILAKE(dev_priv)) /* * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk @@ -2188,12 +2183,7 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) int max_cdclk_freq = dev_priv->max_cdclk_freq; if (INTEL_GEN(dev_priv) >= 10) - /* - * FIXME: Allow '2 * max_cdclk_freq' - * once DDI clock voltage requirements are - * handled correctly. - */ - return max_cdclk_freq; + return 2 * max_cdclk_freq; else if (IS_GEMINILAKE(dev_priv)) /* * FIXME: Limiting to 99% as a temporary workaround. See diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 7d7952b78a3b..0e493a1fedab 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12765,7 +12765,7 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; max_dotclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; - if (IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) max_dotclk *= 2; if (WARN_ON_ONCE(!crtc_clock || max_dotclk < crtc_clock)) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1ead51754e8f..742d5455b201 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3932,6 +3932,7 @@ skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, struct intel_crtc_state *cstate) { + struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); struct drm_crtc_state *crtc_state = &cstate->base; struct drm_atomic_state *state = crtc_state->state; struct drm_plane *plane; @@ -3974,7 +3975,7 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, crtc_clock = crtc_state->adjusted_mode.crtc_clock; dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk; - if (IS_GEMINILAKE(to_i915(intel_crtc->base.dev))) + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) dotclk *= 2; pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale); From aba5e278586b16a4fbd377e7e8403aa585d0532a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Oct 2017 15:39:41 +0100 Subject: [PATCH 026/260] drm/i915: Add a hook for making the engines idle (parking) and unparking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the next patch, we will want to install a callback when the engines (GT as a whole) become idle and similarly when they first become busy. To enable that callback, first rename intel_engines_mark_idle() to intel_engines_park() and provide the companion intel_engines_unpark(). Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Michał Winiarski Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171025143943.7661-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.c | 2 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 33 +++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_lrc.c | 3 +++ drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +++++- 6 files changed, 47 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bafe1cdd57d8..8364304e9d2b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3347,7 +3347,7 @@ i915_gem_idle_work_handler(struct work_struct *work) if (!intel_engines_are_idle(dev_priv)) DRM_ERROR("Timeout waiting for engines to idle\n"); - intel_engines_mark_idle(dev_priv); + intel_engines_park(dev_priv); i915_gem_timelines_mark_idle(dev_priv); GEM_BUG_ON(!dev_priv->gt.awake); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index d140fcf5c6a3..e0d6221022a8 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -259,6 +259,8 @@ static void mark_busy(struct drm_i915_private *i915) if (INTEL_GEN(i915) >= 6) gen6_rps_busy(i915); + intel_engines_unpark(i915); + queue_delayed_work(i915->wq, &i915->gt.retire_work, round_jiffies_up_relative(HZ)); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f6cdc50d4237..fedb839dff61 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1600,19 +1600,48 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) engine->set_default_submission(engine); } -void intel_engines_mark_idle(struct drm_i915_private *i915) +/** + * intel_engines_park: called when the GT is transitioning from busy->idle + * @i915: the i915 device + * + * The GT is now idle and about to go to sleep (maybe never to wake again?). + * Time for us to tidy and put away our toys (release resources back to the + * system). + */ +void intel_engines_park(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; for_each_engine(engine, i915, id) { + if (engine->park) + engine->park(engine); + intel_engine_disarm_breadcrumbs(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); tasklet_kill(&engine->execlists.irq_tasklet); + + i915_gem_batch_pool_fini(&engine->batch_pool); engine->execlists.no_priolist = false; } } +/** + * intel_engines_unpark: called when the GT is transitioning from idle->busy + * @i915: the i915 device + * + * The GT was idle and now about to fire up with some new user requests. + */ +void intel_engines_unpark(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + if (engine->unpark) + engine->unpark(engine); + } +} + bool intel_engine_can_store_dword(struct intel_engine_cs *engine) { switch (INTEL_GEN(engine->i915)) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d36e25607435..eeb3622803a8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1891,6 +1891,9 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->cancel_requests = execlists_cancel_requests; engine->schedule = execlists_schedule; engine->execlists.irq_tasklet.func = intel_lrc_irq_handler; + + engine->park = NULL; + engine->unpark = NULL; } static void diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8da1bde442dd..05e01446b00b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2028,12 +2028,15 @@ static void i9xx_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = i9xx_submit_request; engine->cancel_requests = cancel_requests; + + engine->park = NULL; + engine->unpark = NULL; } static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) { + i9xx_set_default_submission(engine); engine->submit_request = gen6_bsd_submit_request; - engine->cancel_requests = cancel_requests; } static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a2589aa89163..a7c95f0c3101 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -365,6 +365,9 @@ struct intel_engine_cs { void (*reset_hw)(struct intel_engine_cs *engine, struct drm_i915_gem_request *req); + void (*park)(struct intel_engine_cs *engine); + void (*unpark)(struct intel_engine_cs *engine); + void (*set_default_submission)(struct intel_engine_cs *engine); struct intel_ring *(*context_pin)(struct intel_engine_cs *engine, @@ -868,7 +871,9 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv); bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); -void intel_engines_mark_idle(struct drm_i915_private *i915); +void intel_engines_park(struct drm_i915_private *i915); +void intel_engines_unpark(struct drm_i915_private *i915); + void intel_engines_reset_default_submission(struct drm_i915_private *i915); bool intel_engine_can_store_dword(struct intel_engine_cs *engine); From bcbd5c33a342bc6f4a25fed528dc3ed2a0b1c140 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Oct 2017 15:39:42 +0100 Subject: [PATCH 027/260] drm/i915/guc: Always enable the breadcrumbs irq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The execlists emulation on top of the GuC (used for scheduling and preemption) depends on the MI_USER_INTERRUPT for its notifications and tasklet action. As we always employ the irq, there is no advantage in ever disabling it while we are using the GuC, so allow us to arm the breadcrumb irq when enabling GuC submission and disarm upon disabling. The impact should be lessened by the delayed irq disabling we do (we only disable after receiving an interrupt for which no one was wanting), but allowing guc to explicitly manage the irq in relation to itself is simpler and prevents an issue with losing an interrupt for preemption as it is not coupled to an active request. Internally, we add a reference counter (breadcrumbs.irq_enabled) as a simple mechanism to allow GuC to keep the breadcrumb irq enabled. To improve upon always enabling the irq while guc is selected, we need to hook into the parking facility of intel_engines so that we only enable the breadcrumbs while the GT is active (one step better would be to individually park/unpark each engine). In effect, this means that we keep the breadcrumb irq always enabled for the entire duration the guc is busy, whereas before we would try to switch it off whenever we idled for more than interrupt with no associated waiters. The difference *should* be negligible in practice! v2: Stop abusing fence signaling (and its auxiliary data structures) to enable the breadcrumbs irqs. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Michał Winiarski , Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Reviewed-by: Michał Winiarski , Link: https://patchwork.freedesktop.org/patch/msgid/20171025143943.7661-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_guc_submission.c | 39 ++++++++----------- drivers/gpu/drm/i915/i915_irq.c | 6 ++- drivers/gpu/drm/i915/intel_breadcrumbs.c | 45 +++++++++++++++++----- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++- 4 files changed, 59 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index f84c267728fd..141ed9df3d5c 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -523,29 +523,6 @@ static void i915_guc_submit(struct intel_engine_cs *engine) } } -static void nested_enable_signaling(struct drm_i915_gem_request *rq) -{ - /* If we use dma_fence_enable_sw_signaling() directly, lockdep - * detects an ordering issue between the fence lockclass and the - * global_timeline. This circular dependency can only occur via 2 - * different fences (but same fence lockclass), so we use the nesting - * annotation here to prevent the warn, equivalent to the nesting - * inside i915_gem_request_submit() for when we also enable the - * signaler. - */ - - if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &rq->fence.flags)) - return; - - GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); - trace_dma_fence_enable_signal(&rq->fence); - - spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); - intel_engine_enable_signaling(rq, true); - spin_unlock(&rq->lock); -} - static void port_assign(struct execlist_port *port, struct drm_i915_gem_request *rq) { @@ -555,7 +532,6 @@ static void port_assign(struct execlist_port *port, i915_gem_request_put(port_request(port)); port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); - nested_enable_signaling(rq); } static void i915_guc_dequeue(struct intel_engine_cs *engine) @@ -1097,6 +1073,16 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv) rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; } +static void i915_guc_submission_park(struct intel_engine_cs *engine) +{ + intel_engine_unpin_breadcrumbs_irq(engine); +} + +static void i915_guc_submission_unpark(struct intel_engine_cs *engine) +{ + intel_engine_pin_breadcrumbs_irq(engine); +} + int i915_guc_submission_enable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; @@ -1154,6 +1140,9 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) execlists->irq_tasklet.func = i915_guc_irq_handler; clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); tasklet_schedule(&execlists->irq_tasklet); + + engine->park = i915_guc_submission_park; + engine->unpark = i915_guc_submission_unpark; } return 0; @@ -1168,6 +1157,8 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; + GEM_BUG_ON(dev_priv->gt.awake); /* GT should be parked first */ + guc_interrupts_release(dev_priv); /* Revert back to manual ELSP submission */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f8205841868b..ff00e462697a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1068,6 +1068,9 @@ static void notify_ring(struct intel_engine_cs *engine) struct drm_i915_gem_request *rq = NULL; struct intel_wait *wait; + if (!engine->breadcrumbs.irq_armed) + return; + atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); @@ -1101,7 +1104,8 @@ static void notify_ring(struct intel_engine_cs *engine) if (wakeup) wake_up_process(wait->tsk); } else { - __intel_engine_disarm_breadcrumbs(engine); + if (engine->breadcrumbs.irq_armed) + __intel_engine_disarm_breadcrumbs(engine); } spin_unlock(&engine->breadcrumbs.irq_lock); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 48e1ba01ccf8..1bd0a461d665 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -123,7 +123,7 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t) */ spin_lock_irq(&b->irq_lock); - if (!__intel_breadcrumbs_wakeup(b)) + if (b->irq_armed && !__intel_breadcrumbs_wakeup(b)) __intel_engine_disarm_breadcrumbs(engine); spin_unlock_irq(&b->irq_lock); if (!b->irq_armed) @@ -171,15 +171,37 @@ void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) lockdep_assert_held(&b->irq_lock); GEM_BUG_ON(b->irq_wait); + GEM_BUG_ON(!b->irq_armed); - if (b->irq_enabled) { + GEM_BUG_ON(!b->irq_enabled); + if (!--b->irq_enabled) irq_disable(engine); - b->irq_enabled = false; - } b->irq_armed = false; } +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + spin_lock_irq(&b->irq_lock); + if (!b->irq_enabled++) + irq_enable(engine); + GEM_BUG_ON(!b->irq_enabled); /* no overflow! */ + spin_unlock_irq(&b->irq_lock); +} + +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + spin_lock_irq(&b->irq_lock); + GEM_BUG_ON(!b->irq_enabled); /* no underflow! */ + if (!--b->irq_enabled) + irq_disable(engine); + spin_unlock_irq(&b->irq_lock); +} + void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -241,6 +263,9 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); struct drm_i915_private *i915 = engine->i915; + bool enabled; + + GEM_BUG_ON(!intel_irqs_enabled(i915)); lockdep_assert_held(&b->irq_lock); if (b->irq_armed) @@ -252,7 +277,6 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) * the irq. */ b->irq_armed = true; - GEM_BUG_ON(b->irq_enabled); if (I915_SELFTEST_ONLY(b->mock)) { /* For our mock objects we want to avoid interaction @@ -273,14 +297,15 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) */ /* No interrupts? Kick the waiter every jiffie! */ - if (intel_irqs_enabled(i915)) { - if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) - irq_enable(engine); - b->irq_enabled = true; + enabled = false; + if (!b->irq_enabled++ && + !test_bit(engine->id, &i915->gpu_error.test_irq_rings)) { + irq_enable(engine); + enabled = true; } enable_fake_irq(b); - return true; + return enabled; } static inline struct intel_wait *to_wait(struct rb_node *node) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a7c95f0c3101..e5a0d489a304 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -339,9 +339,9 @@ struct intel_engine_cs { struct timer_list hangcheck; /* detect missed interrupts */ unsigned int hangcheck_interrupts; + unsigned int irq_enabled; bool irq_armed : 1; - bool irq_enabled : 1; I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; @@ -848,6 +848,9 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); #define ENGINE_WAKEUP_WAITER BIT(0) #define ENGINE_WAKEUP_ASLEEP BIT(1) +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); + void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); From 9bdc3573a5a2824da7d7e55eac95ca5a014181d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 25 Oct 2017 18:25:19 +0100 Subject: [PATCH 028/260] drm/i915/guc: Initialize GuC before restarting engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we're handling request resubmission the same way as regular submission (from the tasklet), we can move GuC initialization earlier, before restarting the engines. This way, we're no longer being in the state of flux during engine restart - we're already in user requested submission mode. Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Michal Wajdeczko Cc: Michel Thierry Cc: Mika Kuoppala Cc: Oscar Mateo Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171025172519.10670-5-chris@chris-wilson.co.uk Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 10 +++++----- drivers/gpu/drm/i915/i915_guc_submission.c | 8 -------- drivers/gpu/drm/i915/intel_lrc.c | 2 +- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8364304e9d2b..d803ef5f4a7f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4926,6 +4926,11 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) goto out; } + /* We can't enable contexts until all firmware is loaded */ + ret = intel_uc_init_hw(dev_priv); + if (ret) + goto out; + /* Need to do basic initialisation of all rings first: */ ret = __i915_gem_restart_engines(dev_priv); if (ret) @@ -4933,11 +4938,6 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) intel_mocs_init_l3cc_table(dev_priv); - /* We can't enable contexts until all firmware is loaded */ - ret = intel_uc_init_hw(dev_priv); - if (ret) - goto out; - out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 141ed9df3d5c..07289a4b5bfa 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1132,15 +1132,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct intel_engine_execlists * const execlists = &engine->execlists; - /* The tasklet was initialised by execlists, and may be in - * a state of flux (across a reset) and so we just want to - * take over the callback without changing any other state - * in the tasklet. - */ execlists->irq_tasklet.func = i915_guc_irq_handler; - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet_schedule(&execlists->irq_tasklet); - engine->park = i915_guc_submission_park; engine->unpark = i915_guc_submission_unpark; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index eeb3622803a8..e821c1eba7a4 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1474,7 +1474,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) execlists->active = 0; /* After a GPU reset, we may have requests to replay */ - if (!i915_modparams.enable_guc_submission && execlists->first) + if (execlists->first) tasklet_schedule(&execlists->irq_tasklet); return 0; From f8c3dcf946bfb1e0519fb095d02f9b7def30a749 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 25 Oct 2017 17:15:46 -0700 Subject: [PATCH 029/260] drm/i915/cnl: Fix SSEU Device Status. CNL adds an extra register for slice/subslice information. Although no SKU is planed with an extra slice let's already handle this extra piece of information so we don't have the risk in future of getting a part that might have chosen this part of the die instead of other slices or anything like that. Also if subslice is disabled the information of eu ack for that is garbage, so let's skip checks for eu if subslice is disabled as we skip the subslice if slice is disabled. The rest is pretty much like gen9. v2: Remove IS_CANNONLAKE from gen9 status function. v3: Consider s_max = 6 and ss_max=4 to run over all possible slices and subslices possible by spec. Although no real hardware will have that many slices/subslices. To match with sseu info init. v4: Fix offset calculation for slices 4 and 5. Removed Oscar's rv-b since this change also needs review. v5: Let's consider only valid bits for SLICE*_PGCTL_ACK. This looks like wrong in Spec, but seems to be enough for now. Whenever Spec gets updated and fixed we come back and properly update the masks. Also add a FIXME, so we can revisit this later when we find some strange info on debugfs or when we noitce spec got updated. Cc: Oscar Mateo Signed-off-by: Rodrigo Vivi Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20171026001546.28203-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 61 ++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 7 ++++ 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c65e381b85f3..34862aa8f9c0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4448,6 +4448,61 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, } } +static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, + struct sseu_dev_info *sseu) +{ + const struct intel_device_info *info = INTEL_INFO(dev_priv); + int s_max = 6, ss_max = 4; + int s, ss; + u32 s_reg[s_max], eu_reg[2 * s_max], eu_mask[2]; + + for (s = 0; s < s_max; s++) { + /* + * FIXME: Valid SS Mask respects the spec and read + * only valid bits for those registers, excluding reserverd + * although this seems wrong because it would leave many + * subslices without ACK. + */ + s_reg[s] = I915_READ(GEN10_SLICE_PGCTL_ACK(s)) & + GEN10_PGCTL_VALID_SS_MASK(s); + eu_reg[2 * s] = I915_READ(GEN10_SS01_EU_PGCTL_ACK(s)); + eu_reg[2 * s + 1] = I915_READ(GEN10_SS23_EU_PGCTL_ACK(s)); + } + + eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK | + GEN9_PGCTL_SSA_EU19_ACK | + GEN9_PGCTL_SSA_EU210_ACK | + GEN9_PGCTL_SSA_EU311_ACK; + eu_mask[1] = GEN9_PGCTL_SSB_EU08_ACK | + GEN9_PGCTL_SSB_EU19_ACK | + GEN9_PGCTL_SSB_EU210_ACK | + GEN9_PGCTL_SSB_EU311_ACK; + + for (s = 0; s < s_max; s++) { + if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) + /* skip disabled slice */ + continue; + + sseu->slice_mask |= BIT(s); + sseu->subslice_mask = info->sseu.subslice_mask; + + for (ss = 0; ss < ss_max; ss++) { + unsigned int eu_cnt; + + if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) + /* skip disabled subslice */ + continue; + + eu_cnt = 2 * hweight32(eu_reg[2 * s + ss / 2] & + eu_mask[ss % 2]); + sseu->eu_total += eu_cnt; + sseu->eu_per_subslice = max_t(unsigned int, + sseu->eu_per_subslice, + eu_cnt); + } + } +} + static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { @@ -4483,7 +4538,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; @@ -4589,8 +4644,10 @@ static int i915_sseu_status(struct seq_file *m, void *unused) cherryview_sseu_device_status(dev_priv, &sseu); } else if (IS_BROADWELL(dev_priv)) { broadwell_sseu_device_status(dev_priv, &sseu); - } else if (INTEL_GEN(dev_priv) >= 9) { + } else if (IS_GEN9(dev_priv)) { gen9_sseu_device_status(dev_priv, &sseu); + } else if (INTEL_GEN(dev_priv) >= 10) { + gen10_sseu_device_status(dev_priv, &sseu); } intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f138eae82bf0..8c775e96b4e4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8037,11 +8037,18 @@ enum { #define CHV_EU311_PG_ENABLE (1<<1) #define GEN9_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + (slice)*0x4) +#define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 0x34 + \ + ((slice) % 3) * 0x4) #define GEN9_PGCTL_SLICE_ACK (1 << 0) #define GEN9_PGCTL_SS_ACK(subslice) (1 << (2 + (subslice)*2)) +#define GEN10_PGCTL_VALID_SS_MASK(slice) ((slice) == 0 ? 0x7F : 0x1F) #define GEN9_SS01_EU_PGCTL_ACK(slice) _MMIO(0x805c + (slice)*0x8) +#define GEN10_SS01_EU_PGCTL_ACK(slice) _MMIO(0x805c + ((slice) / 3) * 0x30 + \ + ((slice) % 3) * 0x8) #define GEN9_SS23_EU_PGCTL_ACK(slice) _MMIO(0x8060 + (slice)*0x8) +#define GEN10_SS23_EU_PGCTL_ACK(slice) _MMIO(0x8060 + ((slice) / 3) * 0x30 + \ + ((slice) % 3) * 0x8) #define GEN9_PGCTL_SSA_EU08_ACK (1 << 0) #define GEN9_PGCTL_SSA_EU19_ACK (1 << 2) #define GEN9_PGCTL_SSA_EU210_ACK (1 << 4) From 21e8860ef467001330368eceeb855fde46ab1dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 25 Oct 2017 22:00:09 +0200 Subject: [PATCH 030/260] drm/i915/guc: Do not use 0 for GuC doorbell cookie MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently, this value is reserved and may be interpreted as changing doorbell ownership. Even though we're not observing any side effects now, let's skip over it to be consistent with the spec. v2: Apply checkpatch (Sagar) Suggested-by: Sagar Arun Kamble Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Sagar Arun Kamble Reviewed-by: Sagar Arun Kamble Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171025200020.16636-2-michal.winiarski@intel.com --- drivers/gpu/drm/i915/i915_guc_submission.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 07289a4b5bfa..0b61bf18c3f4 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -475,9 +475,12 @@ static void guc_ring_doorbell(struct i915_guc_client *client) /* pointer of current doorbell cacheline */ db = __get_doorbell(client); - /* we're not expecting the doorbell cookie to change behind our back */ + /* + * We're not expecting the doorbell cookie to change behind our back, + * we also need to treat 0 as a reserved value. + */ cookie = READ_ONCE(db->cookie); - WARN_ON_ONCE(xchg(&db->cookie, cookie + 1) != cookie); + WARN_ON_ONCE(xchg(&db->cookie, cookie + 1 ?: cookie + 2) != cookie); /* XXX: doorbell was lost and need to acquire it again */ GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED); From 89922d0145755b0d22906925e9bac9dc78524fc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 25 Oct 2017 22:00:10 +0200 Subject: [PATCH 031/260] drm/i915/guc: Extract GuC stage desc pool creation into a helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since it's a two-step process, we can have a cleaner error handling in the caller if we do the allocations in a helper. Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Cc: Jeff McGee Cc: Michal Wajdeczko Cc: Oscar Mateo Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171025200020.16636-3-michal.winiarski@intel.com --- drivers/gpu/drm/i915/i915_guc_submission.c | 65 +++++++++++++--------- 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 0b61bf18c3f4..e195bdee0473 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -311,6 +311,37 @@ static void guc_proc_desc_init(struct intel_guc *guc, desc->priority = client->priority; } +static int guc_stage_desc_pool_create(struct intel_guc *guc) +{ + struct i915_vma *vma; + void *vaddr; + + vma = intel_guc_allocate_vma(guc, + PAGE_ALIGN(sizeof(struct guc_stage_desc) * + GUC_MAX_STAGE_DESCRIPTORS)); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma); + return PTR_ERR(vaddr); + } + + guc->stage_desc_pool = vma; + guc->stage_desc_pool_vaddr = vaddr; + ida_init(&guc->stage_ids); + + return 0; +} + +static void guc_stage_desc_pool_destroy(struct intel_guc *guc) +{ + ida_destroy(&guc->stage_ids); + i915_gem_object_unpin_map(guc->stage_desc_pool->obj); + i915_vma_unpin_and_release(&guc->stage_desc_pool); +} + /* * Initialise/clear the stage descriptor shared with the GuC firmware. * @@ -953,47 +984,29 @@ static void guc_ads_destroy(struct intel_guc *guc) int i915_guc_submission_init(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - struct i915_vma *vma; - void *vaddr; int ret; if (guc->stage_desc_pool) return 0; - vma = intel_guc_allocate_vma(guc, - PAGE_ALIGN(sizeof(struct guc_stage_desc) * - GUC_MAX_STAGE_DESCRIPTORS)); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - guc->stage_desc_pool = vma; - - vaddr = i915_gem_object_pin_map(guc->stage_desc_pool->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto err_vma; - } - - guc->stage_desc_pool_vaddr = vaddr; + ret = guc_stage_desc_pool_create(guc); + if (ret) + return ret; ret = intel_guc_log_create(guc); if (ret < 0) - goto err_vaddr; + goto err_stage_desc_pool; ret = guc_ads_create(guc); if (ret < 0) goto err_log; - ida_init(&guc->stage_ids); - return 0; err_log: intel_guc_log_destroy(guc); -err_vaddr: - i915_gem_object_unpin_map(guc->stage_desc_pool->obj); -err_vma: - i915_vma_unpin_and_release(&guc->stage_desc_pool); +err_stage_desc_pool: + guc_stage_desc_pool_destroy(guc); return ret; } @@ -1001,11 +1014,9 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - ida_destroy(&guc->stage_ids); guc_ads_destroy(guc); intel_guc_log_destroy(guc); - i915_gem_object_unpin_map(guc->stage_desc_pool->obj); - i915_vma_unpin_and_release(&guc->stage_desc_pool); + guc_stage_desc_pool_destroy(guc); } static void guc_interrupts_capture(struct drm_i915_private *dev_priv) From b8e5eb960b28bc120b03f174dfa3ac1e32e1d10f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 25 Oct 2017 22:00:11 +0200 Subject: [PATCH 032/260] drm/i915/guc: Allocate separate shared data object for GuC communication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were using first page of kernel context render state for sharing data with GuC. While it's justified by the fact that those pages are not used (note, GuC still enforces this layout and refuses to work if we remove the extra page in front), it's also confusing (why are we using this particular page?). Let's allocate a separate object instead. v2: Drop kernel_context from GuC suspend/resume action handlers (Michel) Suggested-by: Daniele Ceraolo Spurio Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Cc: Jeff McGee Cc: Michal Wajdeczko Cc: Oscar Mateo Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171025200020.16636-4-michal.winiarski@intel.com --- drivers/gpu/drm/i915/i915_guc_submission.c | 36 +++++++++++++++++++++- drivers/gpu/drm/i915/intel_guc.c | 14 ++------- drivers/gpu/drm/i915/intel_guc.h | 2 ++ 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index e195bdee0473..d1a5613da24c 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -437,6 +437,33 @@ static void guc_stage_desc_fini(struct intel_guc *guc, memset(desc, 0, sizeof(*desc)); } +static int guc_shared_data_create(struct intel_guc *guc) +{ + struct i915_vma *vma; + void *vaddr; + + vma = intel_guc_allocate_vma(guc, PAGE_SIZE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma); + return PTR_ERR(vaddr); + } + + guc->shared_data = vma; + guc->shared_data_vaddr = vaddr; + + return 0; +} + +static void guc_shared_data_destroy(struct intel_guc *guc) +{ + i915_gem_object_unpin_map(guc->shared_data->obj); + i915_vma_unpin_and_release(&guc->shared_data); +} + /* Construct a Work Item and append it to the GuC's Work Queue */ static void guc_wq_item_append(struct i915_guc_client *client, struct drm_i915_gem_request *rq) @@ -993,9 +1020,13 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) if (ret) return ret; + ret = guc_shared_data_create(guc); + if (ret) + goto err_stage_desc_pool; + ret = intel_guc_log_create(guc); if (ret < 0) - goto err_stage_desc_pool; + goto err_shared_data; ret = guc_ads_create(guc); if (ret < 0) @@ -1005,6 +1036,8 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) err_log: intel_guc_log_destroy(guc); +err_shared_data: + guc_shared_data_destroy(guc); err_stage_desc_pool: guc_stage_desc_pool_destroy(guc); return ret; @@ -1016,6 +1049,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) guc_ads_destroy(guc); intel_guc_log_destroy(guc); + guc_shared_data_destroy(guc); guc_stage_desc_pool_destroy(guc); } diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 10037c0fdf95..f74d50fdaeb0 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -268,7 +268,6 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) int intel_guc_suspend(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - struct i915_gem_context *ctx; u32 data[3]; if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) @@ -276,14 +275,10 @@ int intel_guc_suspend(struct drm_i915_private *dev_priv) gen9_disable_guc_interrupts(dev_priv); - ctx = dev_priv->kernel_context; - data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; /* any value greater than GUC_POWER_D0 */ data[1] = GUC_POWER_D1; - /* first page is shared data with GuC */ - data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + - LRC_GUCSHR_PN * PAGE_SIZE; + data[2] = guc_ggtt_offset(guc->shared_data); return intel_guc_send(guc, data, ARRAY_SIZE(data)); } @@ -295,7 +290,6 @@ int intel_guc_suspend(struct drm_i915_private *dev_priv) int intel_guc_resume(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - struct i915_gem_context *ctx; u32 data[3]; if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) @@ -304,13 +298,9 @@ int intel_guc_resume(struct drm_i915_private *dev_priv) if (i915_modparams.guc_log_level >= 0) gen9_enable_guc_interrupts(dev_priv); - ctx = dev_priv->kernel_context; - data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; data[1] = GUC_POWER_D0; - /* first page is shared data with GuC */ - data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + - LRC_GUCSHR_PN * PAGE_SIZE; + data[2] = guc_ggtt_offset(guc->shared_data); return intel_guc_send(guc, data, ARRAY_SIZE(data)); } diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 418450b1ae27..aa1583167b0a 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -54,6 +54,8 @@ struct intel_guc { struct i915_vma *stage_desc_pool; void *stage_desc_pool_vaddr; struct ida stage_ids; + struct i915_vma *shared_data; + void *shared_data_vaddr; struct i915_guc_client *execbuf_client; From 4ddbe87a16d75e8f8cb59722fc385b3469a219c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 25 Oct 2017 22:00:12 +0200 Subject: [PATCH 033/260] drm/i915/guc: Add preemption action to GuC firmware interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're using GuC action to request preemption. However, after requesting preemption we need to wait for GuC to finish its own post-processing before we start submitting our requests. Firmware is using shared context to report its status. Let's update GuC firmware interface with those new definitions. v2: Drop unused INTEL_GUC_PREEMPT_OPTION_IMMEDIATE Signed-off-by: Michał Winiarski Cc: Jeff McGee Cc: Michal Wajdeczko Cc: Oscar Mateo Reviewed-by: Jeff McGee Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171025200020.16636-5-michal.winiarski@intel.com --- drivers/gpu/drm/i915/intel_guc_fwif.h | 39 +++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 80c507435458..e24dbec2ead8 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -544,9 +544,36 @@ union guc_log_control { u32 value; } __packed; +struct guc_ctx_report { + u32 report_return_status; + u32 reserved1[64]; + u32 affected_count; + u32 reserved2[2]; +} __packed; + +/* GuC Shared Context Data Struct */ +struct guc_shared_ctx_data { + u32 addr_of_last_preempted_data_low; + u32 addr_of_last_preempted_data_high; + u32 addr_of_last_preempted_data_high_tmp; + u32 padding; + u32 is_mapped_to_proxy; + u32 proxy_ctx_id; + u32 engine_reset_ctx_id; + u32 media_reset_count; + u32 reserved1[8]; + u32 uk_last_ctx_switch_reason; + u32 was_reset; + u32 lrca_gpu_addr; + u64 execlist_ctx; + u32 reserved2[66]; + struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM]; +} __packed; + /* This Action will be programmed in C180 - SOFT_SCRATCH_O_REG */ enum intel_guc_action { INTEL_GUC_ACTION_DEFAULT = 0x0, + INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2, INTEL_GUC_ACTION_SAMPLE_FORCEWAKE = 0x6, INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, @@ -562,6 +589,18 @@ enum intel_guc_action { INTEL_GUC_ACTION_LIMIT }; +enum intel_guc_preempt_options { + INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, + INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, +}; + +enum intel_guc_report_status { + INTEL_GUC_REPORT_STATUS_UNKNOWN = 0x0, + INTEL_GUC_REPORT_STATUS_ACKED = 0x1, + INTEL_GUC_REPORT_STATUS_ERROR = 0x2, + INTEL_GUC_REPORT_STATUS_COMPLETE = 0x4, +}; + /* * The GuC sends its response to a command by overwriting the * command in SS0. The response is distinguishable from a command From e12ab16907252ebc3ec956932eb40ed88105346b Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Thu, 26 Oct 2017 16:17:37 +0200 Subject: [PATCH 034/260] drm/i915/guc: Add a second client, to be used for preemption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This second client is created with priority KMD_HIGH, and marked as preemptive. This will allow us to request preemption using GuC actions. v2: Extract clients creation into a helper, debugfs fixups. (Michał) Recreate doorbell on init. (Daniele) Move clients into an array. v3: And move clients back from an array, to get rid of the enum (Michał) v4: Use is_high_priority, move DRM_ERROR into __create_doorbell, move GEM_BUG_ON inside guc_clients_create (Michał) v5: Split the BUG_ON (Michał) v6: Cleanup after error during doorbell reinit (Michał) Signed-off-by: Dave Gordon Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Cc: Jeff McGee Cc: Michal Wajdeczko Cc: Oscar Mateo Reviewed-by: Michal Wajdeczko Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171026141737.31656-1-michal.winiarski@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 2 + drivers/gpu/drm/i915/i915_guc_submission.c | 118 +++++++++++++++------ drivers/gpu/drm/i915/intel_guc.h | 1 + 3 files changed, 86 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 34862aa8f9c0..c1e5bba91bff 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2484,6 +2484,8 @@ static int i915_guc_info(struct seq_file *m, void *data) seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client); i915_guc_client_info(m, dev_priv, guc->execbuf_client); + seq_printf(m, "\nGuC preempt client @ %p:\n", guc->preempt_client); + i915_guc_client_info(m, dev_priv, guc->preempt_client); i915_guc_log_info(m, dev_priv); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index d1a5613da24c..b0f034f92431 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -33,10 +33,11 @@ * * GuC client: * A i915_guc_client refers to a submission path through GuC. Currently, there - * is only one of these (the execbuf_client) and this one is charged with all - * submissions to the GuC. This struct is the owner of a doorbell, a process - * descriptor and a workqueue (all of them inside a single gem object that - * contains all required pages for these elements). + * are two clients. One of them (the execbuf_client) is charged with all + * submissions to the GuC, the other one (preempt_client) is responsible for + * preempting the execbuf_client. This struct is the owner of a doorbell, a + * process descriptor and a workqueue (all of them inside a single gem object + * that contains all required pages for these elements). * * GuC stage descriptor: * During initialization, the driver allocates a static pool of 1024 such @@ -83,7 +84,8 @@ static inline bool is_high_priority(struct i915_guc_client* client) { - return client->priority <= GUC_CLIENT_PRIORITY_HIGH; + return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || + client->priority == GUC_CLIENT_PRIORITY_HIGH); } static int __reserve_doorbell(struct i915_guc_client *client) @@ -196,8 +198,11 @@ static int __create_doorbell(struct i915_guc_client *client) doorbell->cookie = 0; err = __guc_allocate_doorbell(client->guc, client->stage_id); - if (err) + if (err) { doorbell->db_status = GUC_DOORBELL_DISABLED; + DRM_ERROR("Couldn't create client %u doorbell: %d\n", + client->stage_id, err); + } return err; } @@ -363,6 +368,8 @@ static void guc_stage_desc_init(struct intel_guc *guc, memset(desc, 0, sizeof(*desc)); desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | GUC_STAGE_DESC_ATTR_KERNEL; + if (is_high_priority(client)) + desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT; desc->stage_id = client->stage_id; desc->priority = client->priority; desc->db_id = client->doorbell_id; @@ -763,14 +770,14 @@ static int guc_init_doorbell_hw(struct intel_guc *guc) /* Now for every client (and not only execbuf_client) make sure their * doorbells are known by the GuC */ - //for (client = client_list; client != NULL; client = client->next) - { - ret = __create_doorbell(client); - if (ret) { - DRM_ERROR("Couldn't recreate client %u doorbell: %d\n", - client->stage_id, ret); - return ret; - } + ret = __create_doorbell(guc->execbuf_client); + if (ret) + return ret; + + ret = __create_doorbell(guc->preempt_client); + if (ret) { + __destroy_doorbell(guc->execbuf_client); + return ret; } /* Read back & verify all (used & unused) doorbell registers */ @@ -895,6 +902,50 @@ static void guc_client_free(struct i915_guc_client *client) kfree(client); } +static int guc_clients_create(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct i915_guc_client *client; + + GEM_BUG_ON(guc->execbuf_client); + GEM_BUG_ON(guc->preempt_client); + + client = guc_client_alloc(dev_priv, + INTEL_INFO(dev_priv)->ring_mask, + GUC_CLIENT_PRIORITY_KMD_NORMAL, + dev_priv->kernel_context); + if (IS_ERR(client)) { + DRM_ERROR("Failed to create GuC client for submission!\n"); + return PTR_ERR(client); + } + guc->execbuf_client = client; + + client = guc_client_alloc(dev_priv, + INTEL_INFO(dev_priv)->ring_mask, + GUC_CLIENT_PRIORITY_KMD_HIGH, + dev_priv->preempt_context); + if (IS_ERR(client)) { + DRM_ERROR("Failed to create GuC client for preemption!\n"); + guc_client_free(guc->execbuf_client); + guc->execbuf_client = NULL; + return PTR_ERR(client); + } + guc->preempt_client = client; + + return 0; +} + +static void guc_clients_destroy(struct intel_guc *guc) +{ + struct i915_guc_client *client; + + client = fetch_and_zero(&guc->execbuf_client); + guc_client_free(client); + + client = fetch_and_zero(&guc->preempt_client); + guc_client_free(client); +} + static void guc_policy_init(struct guc_policy *policy) { policy->execution_quantum = POLICY_DEFAULT_EXECUTION_QUANTUM_US; @@ -1134,7 +1185,6 @@ static void i915_guc_submission_unpark(struct intel_engine_cs *engine) int i915_guc_submission_enable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - struct i915_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine; enum intel_engine_id id; int err; @@ -1152,28 +1202,28 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) sizeof(struct guc_wq_item) * I915_NUM_ENGINES > GUC_WQ_SIZE); - if (!client) { - client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, - GUC_CLIENT_PRIORITY_KMD_NORMAL, - dev_priv->kernel_context); - if (IS_ERR(client)) { - DRM_ERROR("Failed to create GuC client for execbuf!\n"); - return PTR_ERR(client); - } - - guc->execbuf_client = client; + /* + * We're being called on both module initialization and on reset, + * until this flow is changed, we're using regular client presence to + * determine which case are we in, and whether we should allocate new + * clients or just reset their workqueues. + */ + if (!guc->execbuf_client) { + err = guc_clients_create(guc); + if (err) + return err; + } else { + guc_reset_wq(guc->execbuf_client); + guc_reset_wq(guc->preempt_client); } err = intel_guc_sample_forcewake(guc); if (err) - goto err_execbuf_client; - - guc_reset_wq(client); + goto err_free_clients; err = guc_init_doorbell_hw(guc); if (err) - goto err_execbuf_client; + goto err_free_clients; /* Take over from manual control of ELSP (execlists) */ guc_interrupts_capture(dev_priv); @@ -1187,9 +1237,8 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) return 0; -err_execbuf_client: - guc_client_free(guc->execbuf_client); - guc->execbuf_client = NULL; +err_free_clients: + guc_clients_destroy(guc); return err; } @@ -1204,6 +1253,5 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) /* Revert back to manual ELSP submission */ intel_engines_reset_default_submission(dev_priv); - guc_client_free(guc->execbuf_client); - guc->execbuf_client = NULL; + guc_clients_destroy(guc); } diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index aa1583167b0a..6ca55c5bed3c 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -58,6 +58,7 @@ struct intel_guc { void *shared_data_vaddr; struct i915_guc_client *execbuf_client; + struct i915_guc_client *preempt_client; DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); /* Cyclic counter mod pagesize */ From a0991e1d2801c5d3e7d98abdefcb8032e09bde6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 25 Oct 2017 22:00:14 +0200 Subject: [PATCH 035/260] drm/i915/guc: Split guc_wq_item_append MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're using a special preempt context for HW to preempt into. We don't want to emit any requests there, but we still need to wrap this context into a valid GuC work item. Let's cleanup the functions operating on GuC work items. We can extract guc_request_add - responsible for adding GuC work item and ringing the doorbell, and guc_wq_item_append - used by the function above, not tied to the concept of gem request. Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Jeff McGee Cc: Michal Wajdeczko Cc: Oscar Mateo Reviewed-by: Chris Wilson Reviewed-by: Jeff McGee Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171025200020.16636-7-michal.winiarski@intel.com --- drivers/gpu/drm/i915/i915_guc_submission.c | 55 ++++++++++++---------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index b0f034f92431..301c658a4cde 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -71,7 +71,7 @@ * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which * represents in-order queue. The kernel driver packs ring tail pointer and an * ELSP context descriptor dword into Work Item. - * See guc_wq_item_append() + * See guc_add_request() * * ADS: * The Additional Data Struct (ADS) has pointers for different buffers used by @@ -394,7 +394,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, * submission or, in other words, not using a direct submission * model) the KMD's LRCA is not used for any work submission. * Instead, the GuC uses the LRCA of the user mode context (see - * guc_wq_item_append below). + * guc_add_request below). */ lrc->context_desc = lower_32_bits(ce->lrc_desc); @@ -473,22 +473,18 @@ static void guc_shared_data_destroy(struct intel_guc *guc) /* Construct a Work Item and append it to the GuC's Work Queue */ static void guc_wq_item_append(struct i915_guc_client *client, - struct drm_i915_gem_request *rq) + u32 target_engine, u32 context_desc, + u32 ring_tail, u32 fence_id) { /* wqi_len is in DWords, and does not include the one-word header */ const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size / sizeof(u32) - 1; - struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *ctx = rq->ctx; struct guc_process_desc *desc = __get_process_desc(client); struct guc_wq_item *wqi; - u32 ring_tail, wq_off; + u32 wq_off; lockdep_assert_held(&client->wq_lock); - ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); - GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); - /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we * should not have the case where structure wqi is across page, neither * wrapped to the beginning. This simplifies the implementation below. @@ -510,15 +506,14 @@ static void guc_wq_item_append(struct i915_guc_client *client, /* Now fill in the 4-word work queue item */ wqi->header = WQ_TYPE_INORDER | (wqi_len << WQ_LEN_SHIFT) | - (engine->guc_id << WQ_TARGET_SHIFT) | + (target_engine << WQ_TARGET_SHIFT) | WQ_NO_WCFLUSH_WAIT; - - wqi->context_desc = lower_32_bits(intel_lr_context_descriptor(ctx, engine)); - + wqi->context_desc = context_desc; wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; - wqi->fence_id = rq->global_seqno; + GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); + wqi->fence_id = fence_id; - /* Postincrement WQ tail for next time. */ + /* Make the update visible to GuC */ WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1)); } @@ -551,6 +546,25 @@ static void guc_ring_doorbell(struct i915_guc_client *client) GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED); } +static void guc_add_request(struct intel_guc *guc, + struct drm_i915_gem_request *rq) +{ + struct i915_guc_client *client = guc->execbuf_client; + struct intel_engine_cs *engine = rq->engine; + u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(rq->ctx, engine)); + u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); + + spin_lock(&client->wq_lock); + + guc_wq_item_append(client, engine->guc_id, ctx_desc, + ring_tail, rq->global_seqno); + guc_ring_doorbell(client); + + client->submissions[engine->id] += 1; + + spin_unlock(&client->wq_lock); +} + /** * i915_guc_submit() - Submit commands through GuC * @engine: engine associated with the commands @@ -562,10 +576,8 @@ static void i915_guc_submit(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; struct intel_guc *guc = &dev_priv->guc; - struct i915_guc_client *client = guc->execbuf_client; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; - const unsigned int engine_id = engine->id; unsigned int n; for (n = 0; n < execlists_num_ports(execlists); n++) { @@ -579,14 +591,7 @@ static void i915_guc_submit(struct intel_engine_cs *engine) if (i915_vma_is_map_and_fenceable(rq->ring->vma)) POSTING_READ_FW(GUC_STATUS); - spin_lock(&client->wq_lock); - - guc_wq_item_append(client, rq); - guc_ring_doorbell(client); - - client->submissions[engine_id] += 1; - - spin_unlock(&client->wq_lock); + guc_add_request(guc, rq); } } } From df77cd83d5566ef6a04528bb08732ccbf829580e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 25 Oct 2017 22:00:15 +0200 Subject: [PATCH 036/260] drm/i915: Extract "emit write" part of emit breadcrumb functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's separate the "emit" part from touching any internal structures, this way we can have a generic "emit coherent GGTT write" function. We would like to reuse this functionality for emitting HWSP write, to confirm that preempt-to-idle has finished. v2: Reorder args to match emit_pipe_control, s/render/rcs (Chris) Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171025200020.16636-8-michal.winiarski@intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 28 ++++++------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 38 +++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e821c1eba7a4..599c709fc5a7 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1794,10 +1794,8 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; - *cs++ = 0; - *cs++ = request->global_seqno; + cs = gen8_emit_ggtt_write(cs, request->global_seqno, + intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); @@ -1807,24 +1805,14 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) } static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; -static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, +static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request, u32 *cs) { /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); - /* w/a for post sync ops following a GPGPU operation we - * need a prior CS_STALL, which is emitted by the flush - * following the batch. - */ - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE; - *cs++ = intel_hws_seqno_address(request->engine); - *cs++ = 0; - *cs++ = request->global_seqno; - /* We're thrashing one dword of HWS. */ - *cs++ = 0; + cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno, + intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); @@ -1832,7 +1820,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, gen8_emit_wa_tail(request, cs); } -static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; +static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; static int gen8_init_rcs_context(struct drm_i915_gem_request *req) { @@ -1991,8 +1979,8 @@ int logical_render_ring_init(struct intel_engine_cs *engine) engine->init_hw = gen8_init_render_ring; engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; - engine->emit_breadcrumb = gen8_emit_breadcrumb_render; - engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_render_sz; + engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs; + engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz; ret = intel_engine_create_scratch(engine, PAGE_SIZE); if (ret) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index e5a0d489a304..c15161e56964 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -869,6 +869,44 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) return batch + 6; } +static inline u32 * +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset) +{ + /* We're using qword write, offset should be aligned to 8 bytes. */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + /* w/a for post sync ops following a GPGPU operation we + * need a prior CS_STALL, which is emitted by the flush + * following the batch. + */ + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = gtt_offset; + *cs++ = 0; + *cs++ = value; + /* We're thrashing one dword of HWS. */ + *cs++ = 0; + + return cs; +} + +static inline u32 * +gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) +{ + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ + GEM_BUG_ON(gtt_offset & (1 << 5)); + /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = value; + + return cs; +} + bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); From 3b8a8a30064d0b89d09e979adb4c975d892c21ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 25 Oct 2017 22:00:16 +0200 Subject: [PATCH 037/260] drm/i915: Add information needed to track engine preempt state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We shouldn't inspect ELSP context status (or any other bits depending on specific submission backend) when using GuC submission. Let's use another piece of HWSP for preempt context, to write its bit of information, meaning that preemption has finished, and hardware is now idle. Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Jeff McGee Cc: Michal Wajdeczko Cc: Oscar Mateo Reviewed-by: Chris Wilson Reviewed-by: Jeff McGee Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171025200020.16636-9-michal.winiarski@intel.com --- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c15161e56964..4a5a08985328 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -626,6 +626,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) */ #define I915_GEM_HWS_INDEX 0x30 #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_GEM_HWS_PREEMPT_INDEX 0x32 +#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) #define I915_GEM_HWS_SCRATCH_INDEX 0x40 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) @@ -778,6 +780,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; } +static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) +{ + return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; +} + /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); From fa87271e5bc57ad30c19385e2d0d1d7fbd3c21c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 25 Oct 2017 22:00:17 +0200 Subject: [PATCH 038/260] drm/i915/guc: Keep request->priority for its lifetime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We also want to support preemption with GuC submission backend. In order to do that, we need to remember the priority, like we do on execlists path. v2: Remove completed prio == INT_MAX optimization Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Jeff McGee Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171025200020.16636-10-michal.winiarski@intel.com --- drivers/gpu/drm/i915/i915_guc_submission.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 301c658a4cde..c99ddd8457f3 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -641,7 +641,6 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine) } INIT_LIST_HEAD(&rq->priotree.link); - rq->priotree.priority = INT_MAX; __i915_gem_request_submit(rq); trace_i915_gem_request_in(rq, port_index(port, execlists)); From a4598d17551ab5ce5c9fd2ac7a7803e89b63260e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 25 Oct 2017 22:00:18 +0200 Subject: [PATCH 039/260] drm/i915: Rename helpers used for unwinding, use macro for can_preempt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We would also like to make use of execlist_cancel_port_requests and unwind_incomplete_requests in GuC preemption backend. Let's rename the functions to use the correct prefixes, so that we can simply add the declarations in the following patch. Similar thing for applies for can_preempt, except we're introducing HAS_LOGICAL_RING_PREEMPTION macro instad, converting other users that were previously touching device info directly. v2: s/intel_engine/execlists and pass execlists to unwind (Chris) v3: use locked version for exporting, drop const qual (Chris) Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Michal Wajdeczko Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171025200020.16636-11-michal.winiarski@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 6 ++--- drivers/gpu/drm/i915/intel_lrc.c | 35 ++++++++++++++------------ 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3db5851756f0..7b871802ae36 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -372,7 +372,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value |= I915_SCHEDULER_CAP_ENABLED; value |= I915_SCHEDULER_CAP_PRIORITY; - if (INTEL_INFO(dev_priv)->has_logical_ring_preemption && + if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) && i915_modparams.enable_execlists && !i915_modparams.enable_guc_submission) value |= I915_SCHEDULER_CAP_PREEMPTION; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 366ba74b0ad2..61c155cbf9d7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3140,6 +3140,8 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ ((dev_priv)->info.has_logical_ring_contexts) +#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \ + ((dev_priv)->info.has_logical_ring_preemption) #define USES_PPGTT(dev_priv) (i915_modparams.enable_ppgtt) #define USES_FULL_PPGTT(dev_priv) (i915_modparams.enable_ppgtt >= 2) #define USES_FULL_48BIT_PPGTT(dev_priv) (i915_modparams.enable_ppgtt == 3) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index fedb839dff61..3ac876ca6cae 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -620,7 +620,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * Similarly the preempt context must always be available so that * we can interrupt the engine at any time. */ - if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) { + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { ring = engine->context_pin(engine, engine->i915->preempt_context); if (IS_ERR(ring)) { @@ -651,7 +651,7 @@ err_rs_fini: err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: - if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) engine->context_unpin(engine, engine->i915->preempt_context); err_unpin_kernel: engine->context_unpin(engine, engine->i915->kernel_context); @@ -679,7 +679,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); - if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) engine->context_unpin(engine, engine->i915->preempt_context); engine->context_unpin(engine, engine->i915->kernel_context); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 599c709fc5a7..b5d382ef8d85 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -354,7 +354,7 @@ static void unwind_wa_tail(struct drm_i915_gem_request *rq) assert_ring_tail_valid(rq->ring, rq->tail); } -static void unwind_incomplete_requests(struct intel_engine_cs *engine) +static void __unwind_incomplete_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *rq, *rn; struct i915_priolist *uninitialized_var(p); @@ -385,6 +385,17 @@ static void unwind_incomplete_requests(struct intel_engine_cs *engine) } } +static void +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) +{ + struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + + spin_lock_irq(&engine->timeline->lock); + __unwind_incomplete_requests(engine); + spin_unlock_irq(&engine->timeline->lock); +} + static inline void execlists_context_status_change(struct drm_i915_gem_request *rq, unsigned long status) @@ -515,11 +526,6 @@ static void inject_preempt_context(struct intel_engine_cs *engine) elsp_write(ce->lrc_desc, elsp); } -static bool can_preempt(struct intel_engine_cs *engine) -{ - return INTEL_INFO(engine->i915)->has_logical_ring_preemption; -} - static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -567,7 +573,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (port_count(&port[0]) > 1) goto unlock; - if (can_preempt(engine) && + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915) && rb_entry(rb, struct i915_priolist, node)->priority > max(last->priotree.priority, 0)) { /* @@ -691,7 +697,7 @@ unlock: } static void -execlist_cancel_port_requests(struct intel_engine_execlists *execlists) +execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) { struct execlist_port *port = execlists->port; unsigned int num_ports = execlists_num_ports(execlists); @@ -718,7 +724,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) spin_lock_irqsave(&engine->timeline->lock, flags); /* Cancel the requests on the HW and clear the ELSP tracker. */ - execlist_cancel_port_requests(execlists); + execlists_cancel_port_requests(execlists); /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->timeline->requests, link) { @@ -858,11 +864,8 @@ static void intel_lrc_irq_handler(unsigned long data) if (status & GEN8_CTX_STATUS_ACTIVE_IDLE && buf[2*head + 1] == PREEMPT_ID) { - execlist_cancel_port_requests(execlists); - - spin_lock_irq(&engine->timeline->lock); - unwind_incomplete_requests(engine); - spin_unlock_irq(&engine->timeline->lock); + execlists_cancel_port_requests(execlists); + execlists_unwind_incomplete_requests(execlists); GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); @@ -1531,10 +1534,10 @@ static void reset_common_ring(struct intel_engine_cs *engine, * guessing the missed context-switch events by looking at what * requests were completed. */ - execlist_cancel_port_requests(execlists); + execlists_cancel_port_requests(execlists); /* Push back any incomplete requests for replay after the reset. */ - unwind_incomplete_requests(engine); + __unwind_incomplete_requests(engine); spin_unlock_irqrestore(&engine->timeline->lock, flags); From c41937fd994a449b4e862826d7eb9f366f6d33e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Thu, 26 Oct 2017 15:35:58 +0200 Subject: [PATCH 040/260] drm/i915/guc: Preemption! With GuC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pretty similar to what we have on execlists. We're reusing most of the GEM code, however, due to GuC quirks we need a couple of extra bits. Preemption is implemented as GuC action, and actions can be pretty slow. Because of that, we're using a mutex to serialize them. Since we're requesting preemption from the tasklet, the task of creating a workitem and wrapping it in GuC action is delegated to a worker. To distinguish that preemption has finished, we're using additional piece of HWSP, and since we're not getting context switch interrupts, we're also adding a user interrupt. The fact that our special preempt context has completed unfortunately doesn't mean that we're ready to submit new work. We also need to wait for GuC to finish its own processing. v2: Don't compile out the wait for GuC, handle workqueue flush on reset, no need for ordered workqueue, put on a reviewer hat when looking at my own patches (Chris) Move struct work around in intel_guc, move user interruput outside of conditional (Michał) Keep ring around rather than chase though intel_context v3: Extract WA for flushing ggtt writes to a helper (Chris) Keep work_struct in intel_guc rather than engine (Michał) Use ordered workqueue for inject_preempt worker to avoid GuC quirks. v4: Drop now unused INTEL_GUC_PREEMPT_OPTION_IMMEDIATE (Daniele) Drop stray newlines, use container_of for intel_guc in worker, check for presence of workqueue when flushing it, rather than enable_guc_submission modparam, reorder preempt postprocessing (Chris) v5: Make wq NULL after destroying it v6: Swap struct guc_preempt_work members (Michał) Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Jeff McGee Cc: Joonas Lahtinen Cc: Oscar Mateo Cc: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171026133558.19580-1-michal.winiarski@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_gem.c | 10 + drivers/gpu/drm/i915/i915_guc_submission.c | 208 +++++++++++++++++++-- drivers/gpu/drm/i915/intel_guc.h | 8 + drivers/gpu/drm/i915/intel_lrc.c | 4 +- drivers/gpu/drm/i915/intel_lrc.h | 1 - drivers/gpu/drm/i915/intel_ringbuffer.h | 6 + 7 files changed, 222 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 7b871802ae36..af745749509c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -373,8 +373,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value |= I915_SCHEDULER_CAP_PRIORITY; if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) && - i915_modparams.enable_execlists && - !i915_modparams.enable_guc_submission) + i915_modparams.enable_execlists) value |= I915_SCHEDULER_CAP_PREEMPTION; } break; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d803ef5f4a7f..c2506fb3a483 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2921,6 +2921,16 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) tasklet_kill(&engine->execlists.irq_tasklet); tasklet_disable(&engine->execlists.irq_tasklet); + /* + * We're using worker to queue preemption requests from the tasklet in + * GuC submission mode. + * Even though tasklet was disabled, we may still have a worker queued. + * Let's make sure that all workers scheduled before disabling the + * tasklet are completed before continuing with the reset. + */ + if (engine->i915->guc.preempt_wq) + flush_workqueue(engine->i915->guc.preempt_wq); + if (engine->irq_seqno_barrier) engine->irq_seqno_barrier(engine); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index c99ddd8457f3..3049a0781b88 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -565,6 +565,108 @@ static void guc_add_request(struct intel_guc *guc, spin_unlock(&client->wq_lock); } +/* + * When we're doing submissions using regular execlists backend, writing to + * ELSP from CPU side is enough to make sure that writes to ringbuffer pages + * pinned in mappable aperture portion of GGTT are visible to command streamer. + * Writes done by GuC on our behalf are not guaranteeing such ordering, + * therefore, to ensure the flush, we're issuing a POSTING READ. + */ +static void flush_ggtt_writes(struct i915_vma *vma) +{ + struct drm_i915_private *dev_priv = to_i915(vma->obj->base.dev); + + if (i915_vma_is_map_and_fenceable(vma)) + POSTING_READ_FW(GUC_STATUS); +} + +#define GUC_PREEMPT_FINISHED 0x1 +#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 +static void inject_preempt_context(struct work_struct *work) +{ + struct guc_preempt_work *preempt_work = + container_of(work, typeof(*preempt_work), work); + struct intel_engine_cs *engine = preempt_work->engine; + struct intel_guc *guc = container_of(preempt_work, typeof(*guc), + preempt_work[engine->id]); + struct i915_guc_client *client = guc->preempt_client; + struct intel_ring *ring = client->owner->engine[engine->id].ring; + u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner, + engine)); + u32 *cs = ring->vaddr + ring->tail; + u32 data[7]; + + if (engine->id == RCS) { + cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, + intel_hws_preempt_done_address(engine)); + } else { + cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, + intel_hws_preempt_done_address(engine)); + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + GEM_BUG_ON(!IS_ALIGNED(ring->size, + GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32))); + GEM_BUG_ON((void *)cs - (ring->vaddr + ring->tail) != + GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32)); + + ring->tail += GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32); + ring->tail &= (ring->size - 1); + + flush_ggtt_writes(ring->vma); + + spin_lock_irq(&client->wq_lock); + guc_wq_item_append(client, engine->guc_id, ctx_desc, + ring->tail / sizeof(u64), 0); + spin_unlock_irq(&client->wq_lock); + + data[0] = INTEL_GUC_ACTION_REQUEST_PREEMPTION; + data[1] = client->stage_id; + data[2] = INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q | + INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q; + data[3] = engine->guc_id; + data[4] = guc->execbuf_client->priority; + data[5] = guc->execbuf_client->stage_id; + data[6] = guc_ggtt_offset(guc->shared_data); + + if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) { + execlists_clear_active(&engine->execlists, + EXECLISTS_ACTIVE_PREEMPT); + tasklet_schedule(&engine->execlists.irq_tasklet); + } +} + +/* + * We're using user interrupt and HWSP value to mark that preemption has + * finished and GPU is idle. Normally, we could unwind and continue similar to + * execlists submission path. Unfortunately, with GuC we also need to wait for + * it to finish its own postprocessing, before attempting to submit. Otherwise + * GuC may silently ignore our submissions, and thus we risk losing request at + * best, executing out-of-order and causing kernel panic at worst. + */ +#define GUC_PREEMPT_POSTPROCESS_DELAY_MS 10 +static void wait_for_guc_preempt_report(struct intel_engine_cs *engine) +{ + struct intel_guc *guc = &engine->i915->guc; + struct guc_shared_ctx_data *data = guc->shared_data_vaddr; + struct guc_ctx_report *report = + &data->preempt_ctx_report[engine->guc_id]; + + WARN_ON(wait_for_atomic(report->report_return_status == + INTEL_GUC_REPORT_STATUS_COMPLETE, + GUC_PREEMPT_POSTPROCESS_DELAY_MS)); + /* + * GuC is expecting that we're also going to clear the affected context + * counter, let's also reset the return status to not depend on GuC + * resetting it after recieving another preempt action + */ + report->affected_count = 0; + report->report_return_status = INTEL_GUC_REPORT_STATUS_UNKNOWN; +} + /** * i915_guc_submit() - Submit commands through GuC * @engine: engine associated with the commands @@ -574,8 +676,7 @@ static void guc_add_request(struct intel_guc *guc, */ static void i915_guc_submit(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - struct intel_guc *guc = &dev_priv->guc; + struct intel_guc *guc = &engine->i915->guc; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; unsigned int n; @@ -588,8 +689,7 @@ static void i915_guc_submit(struct intel_engine_cs *engine) if (rq && count == 0) { port_set(&port[n], port_pack(rq, ++count)); - if (i915_vma_is_map_and_fenceable(rq->ring->vma)) - POSTING_READ_FW(GUC_STATUS); + flush_ggtt_writes(rq->ring->vma); guc_add_request(guc, rq); } @@ -617,13 +717,32 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine) bool submit = false; struct rb_node *rb; - if (port_isset(port)) - port++; - spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - while (rb) { + + if (!rb) + goto unlock; + + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915) && port_isset(port)) { + struct guc_preempt_work *preempt_work = + &engine->i915->guc.preempt_work[engine->id]; + + if (rb_entry(rb, struct i915_priolist, node)->priority > + max(port_request(port)->priotree.priority, 0)) { + execlists_set_active(execlists, + EXECLISTS_ACTIVE_PREEMPT); + queue_work(engine->i915->guc.preempt_wq, + &preempt_work->work); + goto unlock; + } else if (port_isset(last_port)) { + goto unlock; + } + + port++; + } + + do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; @@ -653,7 +772,7 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } + } while (rb); done: execlists->first = rb; if (submit) { @@ -661,6 +780,7 @@ done: execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); i915_guc_submit(engine); } +unlock: spin_unlock_irq(&engine->timeline->lock); } @@ -669,8 +789,6 @@ static void i915_guc_irq_handler(unsigned long data) struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; - const struct execlist_port * const last_port = - &execlists->port[execlists->port_mask]; struct drm_i915_gem_request *rq; rq = port_request(&port[0]); @@ -685,7 +803,19 @@ static void i915_guc_irq_handler(unsigned long data) if (!rq) execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); - if (!port_isset(last_port)) + if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) && + intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) == + GUC_PREEMPT_FINISHED) { + execlists_cancel_port_requests(&engine->execlists); + execlists_unwind_incomplete_requests(execlists); + + wait_for_guc_preempt_report(engine); + + execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT); + intel_write_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX, 0); + } + + if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) i915_guc_dequeue(engine); } @@ -1059,6 +1189,51 @@ static void guc_ads_destroy(struct intel_guc *guc) i915_vma_unpin_and_release(&guc->ads_vma); } +static int guc_preempt_work_create(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Even though both sending GuC action, and adding a new workitem to + * GuC workqueue are serialized (each with its own locking), since + * we're using mutliple engines, it's possible that we're going to + * issue a preempt request with two (or more - each for different + * engine) workitems in GuC queue. In this situation, GuC may submit + * all of them, which will make us very confused. + * Our preemption contexts may even already be complete - before we + * even had the chance to sent the preempt action to GuC!. Rather + * than introducing yet another lock, we can just use ordered workqueue + * to make sure we're always sending a single preemption request with a + * single workitem. + */ + guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt", + WQ_HIGHPRI); + if (!guc->preempt_wq) + return -ENOMEM; + + for_each_engine(engine, dev_priv, id) { + guc->preempt_work[id].engine = engine; + INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context); + } + + return 0; +} + +static void guc_preempt_work_destroy(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) + cancel_work_sync(&guc->preempt_work[id].work); + + destroy_workqueue(guc->preempt_wq); + guc->preempt_wq = NULL; +} + /* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. @@ -1083,12 +1258,18 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) if (ret < 0) goto err_shared_data; + ret = guc_preempt_work_create(guc); + if (ret) + goto err_log; + ret = guc_ads_create(guc); if (ret < 0) - goto err_log; + goto err_wq; return 0; +err_wq: + guc_preempt_work_destroy(guc); err_log: intel_guc_log_destroy(guc); err_shared_data: @@ -1103,6 +1284,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) struct intel_guc *guc = &dev_priv->guc; guc_ads_destroy(guc); + guc_preempt_work_destroy(guc); intel_guc_log_destroy(guc); guc_shared_data_destroy(guc); guc_stage_desc_pool_destroy(guc); diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 6ca55c5bed3c..607e02500262 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -34,6 +34,11 @@ #include "i915_guc_reg.h" #include "i915_vma.h" +struct guc_preempt_work { + struct work_struct work; + struct intel_engine_cs *engine; +}; + /* * Top level structure of GuC. It handles firmware loading and manages client * pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy @@ -60,6 +65,9 @@ struct intel_guc { struct i915_guc_client *execbuf_client; struct i915_guc_client *preempt_client; + struct guc_preempt_work preempt_work[I915_NUM_ENGINES]; + struct workqueue_struct *preempt_wq; + DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); /* Cyclic counter mod pagesize */ u32 db_cacheline; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b5d382ef8d85..6840ec8db037 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -385,7 +385,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) } } -static void +void execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) { struct intel_engine_cs *engine = @@ -696,7 +696,7 @@ unlock: } } -static void +void execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) { struct execlist_port *port = execlists->port; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 689fde1a63a9..17182ce29674 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -107,7 +107,6 @@ intel_lr_context_descriptor(struct i915_gem_context *ctx, return ctx->engine[engine->id].lrc_desc; } - /* Execlists */ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4a5a08985328..69ad875fd011 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -557,6 +557,12 @@ execlists_is_active(const struct intel_engine_execlists *execlists, return test_bit(bit, (unsigned long *)&execlists->active); } +void +execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); + +void +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); + static inline unsigned int execlists_num_ports(const struct intel_engine_execlists * const execlists) { From bad7b7e674cf94775cf86a7c4331bab61d79d7ea Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 24 Oct 2017 10:50:56 +0000 Subject: [PATCH 041/260] drm/i915/huc: Use helper function while waiting for DMA completion Waiting for DMA status register can be done with dedicated function. Lets use it as additional bonus will be smaller driver footprint. Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171024105056.43276-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_huc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index c8a48cbc2b7d..98d17254593c 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -151,7 +151,7 @@ static int huc_ucode_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(HUC_UKERNEL | START_DMA)); /* Wait for DMA to finish */ - ret = wait_for((I915_READ(DMA_CTRL) & START_DMA) == 0, 100); + ret = intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0, 100); DRM_DEBUG_DRIVER("HuC DMA transfer wait over with ret %d\n", ret); From 3c75de5b983a0a147ec93d1960f241a74d76b347 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 26 Oct 2017 12:50:48 +0100 Subject: [PATCH 042/260] drm/i915: Include RING_MODE when dumping the engine state Knowing the RING_MODE flags is useful for checking the state of the engine, such as whether the CS is idle after trying to stop the engines before reset. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171026115048.20144-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_engine_cs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 3ac876ca6cae..f31f2d6384c3 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1723,9 +1723,14 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) drm_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, rq ? rq->ring->tail : 0); - drm_printf(m, "\tRING_CTL: 0x%08x [%s]\n", + drm_printf(m, "\tRING_CTL: 0x%08x%s\n", I915_READ(RING_CTL(engine->mmio_base)), - I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : ""); + I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : ""); + if (INTEL_GEN(engine->i915) > 2) { + drm_printf(m, "\tRING_MODE: 0x%08x%s\n", + I915_READ(RING_MI_MODE(engine->mmio_base)), + I915_READ(RING_MI_MODE(engine->mmio_base)) & (MODE_IDLE) ? " [idle]" : ""); + } rcu_read_unlock(); From 39bf4de89ff7c87a2205e7ec5483e7424b86a1f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Oct 2017 19:15:47 +0100 Subject: [PATCH 043/260] drm/i915: Add -Wall -Wextra to our build, set warnings to full MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recently W=1 on gcc-7.2 (-Wunused-const-variable) caught a regression that had been lurking for 6 months, so lets try enabling the full set of warnings for CI builds. This means more patches will be rejected early that contain trivial and sometimes not so trivial bugs. However, our code does not yet compile cleanly with W=1, so we have to apply a filter to the set of warnings until we can eliminate the mistakes. It also means that developers will have to be running the full gamut of gcc to ensure that as warnings come and go with gcc updates, we have the CI build prepared. v2: Use fine-grained -Wno overrides. Inside the makefile, we can specify CFLAGS on a per-object level, which allows us to limit the scope of any particular warning override. v3: Place per-file overrides after the main enabling block. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Daniel Vetter Cc: Tomi Sarvela Cc: Michal Wajdeczko Cc: Ville Syrjälä Acked-by: Tomi Sarvela Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171024181547.27889-1-chris@chris-wilson.co.uk Acked-by: Jani Nikula --- drivers/gpu/drm/i915/Makefile | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 6c3b0481ef82..7750be8e27a6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -2,7 +2,26 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror +# Add a set of useful warning flags and enable -Werror for CI to prevent +# trivial mistakes from creeping in. We have to do this piecemeal as we reject +# any patch that isn't warning clean, so turning on -Wall -Wextra (or W=1) we +# need to filter out dubious warnings. Still it is our interest +# to keep running locally with W=1 C=1 until we are completely clean. +# +# Note the danger in using -Wall -Wextra is that when CI updates gcc we +# will most likely get a sudden build breakage... Hopefully we will fix +# new warnings before CI updates! +subdir-ccflags-y := -Wall -Wextra +subdir-ccflags-y += $(call cc-option,-Wno-unused-parameter,) +subdir-ccflags-y += $(call cc-option,-Wno-type-limits,) +subdir-ccflags-y += $(call cc-option,-Wno-missing-field-initializers,) +subdir-ccflags-y += $(call cc-option,-Wno-implicit-fallthrough,) +subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror + +# Fine grained warnings disable +CFLAGS_i915_pci.o = $(call cc-option,-Wno-override-init,) +CFLAGS_intel_fbdev.o = $(call cc-option,-Wno-override-init,) + subdir-ccflags-y += \ $(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA) From 886c6b8692ba5f71b578097524b3b082e2e02119 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Thu, 26 Oct 2017 14:52:00 -0700 Subject: [PATCH 044/260] drm/i915: Cancel the modeset retry work during modeset cleanup During modeset cleanup on driver unload we may have a pending hotplug work. This needs to be canceled early during the teardown so that it does not fire after we have freed the connector. We do this after drm_kms_helper_poll_fini(dev) since this might trigger modeset retry work due to link retrain and before intel_fbdev_fini() since this work requires the lock from fbdev. If this is not done we may see something like: DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock)) ------------[ cut here ]------------ WARNING: CPU: 4 PID: 5010 at kernel/locking/mutex-debug.c:103 mutex_destroy+0x4e/0x60 Modules linked in: i915(-) snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec snd_hwdep snd_hda_core snd_pcm vgem ax88179_178 +a usbnet mii x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel e1000e ptp pps_core prime_numbers i2c_hid +[last unloaded: snd_hda_intel] CPU: 4 PID: 5010 Comm: drv_module_relo Tainted: G U 4.14.0-rc3-CI-CI_DRM_3186+ #1 Hardware name: Intel Corporation CoffeeLake Client Platform/CoffeeLake S UDIMM RVP, BIOS CNLSFWX1.R00.X104.A03.1709140524 09/14/2017 task: ffff8803c827aa40 task.stack: ffffc90000520000 RIP: 0010:mutex_destroy+0x4e/0x60 RSP: 0018:ffffc90000523d58 EFLAGS: 00010292 RAX: 000000000000002a RBX: ffff88044fbef648 RCX: 0000000000000000 RDX: 0000000080000001 RSI: 0000000000000001 RDI: ffffffff810f0cf0 RBP: ffffc90000523d60 R08: 0000000000000001 R09: 0000000000000001 R10: 000000000f21cb81 R11: 0000000000000000 R12: ffff88044f71efc8 R13: ffffffffa02b3d20 R14: ffffffffa02b3d90 R15: ffff880459b29308 FS: 00007f5df4d6e8c0(0000) GS:ffff88045d300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055ec51f00a18 CR3: 0000000451782006 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: drm_fb_helper_fini+0xd9/0x130 intel_fbdev_destroy+0x12/0x60 [i915] intel_fbdev_fini+0x28/0x30 [i915] intel_modeset_cleanup+0x45/0xa0 [i915] i915_driver_unload+0x92/0x180 [i915] i915_pci_remove+0x19/0x30 [i915] i915_driver_unload+0x92/0x180 [i915] i915_pci_remove+0x19/0x30 [i915] pci_device_remove+0x39/0xb0 device_release_driver_internal+0x15d/0x220 driver_detach+0x40/0x80 bus_remove_driver+0x58/0xd0 driver_unregister+0x2c/0x40 pci_unregister_driver+0x36/0xb0 i915_exit+0x1a/0x8b [i915] SyS_delete_module+0x18c/0x1e0 entry_SYSCALL_64_fastpath+0x1c/0xb1 RIP: 0033:0x7f5df3286287 RSP: 002b:00007fff8e107cc8 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: ffffffff81493a03 RCX: 00007f5df3286287 RDX: 0000000000000001 RSI: 0000000000000800 RDI: 0000564c7be02e48 RBP: ffffc90000523f88 R08: 0000000000000000 R09: 0000000000000080 R10: 00007f5df4d6e8c0 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff8e107eb0 R14: 0000000000000000 R15: 0000000000000000 Or a GPF like: general protection fault: 0000 [#1] PREEMPT SMP Modules linked in: i915(-) snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec snd_hwdep snd_hda_core snd_pcm vgem ax88179_178 +a usbnet mii x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel e1000e ptp pps_core prime_numbers i2c_hid +[last unloaded: snd_hda_intel] CPU: 0 PID: 82 Comm: kworker/0:1 Tainted: G U W 4.14.0-rc3-CI-CI_DRM_3186+ #1 Hardware name: Intel Corporation CoffeeLake Client Platform/CoffeeLake S UDIMM RVP, BIOS CNLSFWX1.R00.X104.A03.1709140524 09/14/2017 Workqueue: events intel_dp_modeset_retry_work_fn [i915] task: ffff88045a5caa40 task.stack: ffffc90000378000 RIP: 0010:drm_setup_crtcs+0x143/0xbf0 RSP: 0018:ffffc9000037bd20 EFLAGS: 00010202 RAX: 6b6b6b6b6b6b6b6b RBX: 0000000000000002 RCX: 0000000000000001 RDX: 0000000000000001 RSI: 0000000000000780 RDI: 00000000ffffffff RBP: ffffc9000037bdb8 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000780 R11: 0000000000000000 R12: 0000000000000002 R13: ffff88044fbef4e8 R14: 0000000000000780 R15: 0000000000000438 FS: 0000000000000000(0000) GS:ffff88045d200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055ec51ee5168 CR3: 000000044c89d003 CR4: 00000000003606f0 Call Trace: drm_fb_helper_hotplug_event.part.18+0x7e/0xc0 drm_fb_helper_hotplug_event+0x1a/0x20 intel_fbdev_output_poll_changed+0x1a/0x20 [i915] drm_kms_helper_hotplug_event+0x27/0x30 intel_dp_modeset_retry_work_fn+0x77/0x80 [i915] process_one_work+0x233/0x660 worker_thread+0x206/0x3b0 kthread+0x152/0x190 ? process_one_work+0x660/0x660 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x27/0x40 Code: 06 00 00 45 8b 45 20 31 db 45 31 e4 45 85 c0 0f 8e 91 06 00 00 44 8b 75 94 44 8b 7d 90 49 8b 45 28 49 63 d4 44 89 f6 41 83 c4 01 <48> 8b 04 d0 44 +89 fa 48 8b 38 48 8b 87 a8 01 00 00 ff 50 20 01 RIP: drm_setup_crtcs+0x143/0xbf0 RSP: ffffc9000037bd20 ---[ end trace 08901ff1a77d30c7 ]--- v2: * Rename it to intel_hpd_poll_fini() and call drm_kms_helper_fini() inside it as the first step before cancel work (Chris Wilson) * Add GPF trace in commit message and make the function static (Maarten Lankhorst) Suggested-by: Maarten Lankhorst Suggested-by: Chris Wilson Fixes: 9301397a63b3 ("drm/i915: Implement Link Rate fallback on Link training failure") Cc: Chris Wilson Cc: Tony Cheng Cc: Harry Wentland Cc: Jani Nikula Cc: Daniel Vetter Cc: Ville Syrjala Cc: Manasi Navare Cc: Maarten Lankhorst Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/1509054720-25325-1-git-send-email-manasi.d.navare@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0e493a1fedab..a369b35d044d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15200,6 +15200,23 @@ void intel_connector_unregister(struct drm_connector *connector) intel_panel_destroy_backlight(connector); } +static void intel_hpd_poll_fini(struct drm_device *dev) +{ + struct intel_connector *connector; + struct drm_connector_list_iter conn_iter; + + /* First disable polling... */ + drm_kms_helper_poll_fini(dev); + + /* Then kill the work that may have been queued by hpd. */ + drm_connector_list_iter_begin(dev, &conn_iter); + for_each_intel_connector_iter(connector, &conn_iter) { + if (connector->modeset_retry_work.func) + cancel_work_sync(&connector->modeset_retry_work); + } + drm_connector_list_iter_end(&conn_iter); +} + void intel_modeset_cleanup(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -15220,7 +15237,7 @@ void intel_modeset_cleanup(struct drm_device *dev) * Due to the hpd irq storm handling the hotplug work can re-arm the * poll handlers. Hence disable polling after hpd handling is shut down. */ - drm_kms_helper_poll_fini(dev); + intel_hpd_poll_fini(dev); /* poll work can call into fbdev, hence clean that up afterwards */ intel_fbdev_fini(dev_priv); From 28283f4f359cd7cfa9e65457bb98c507a2cd0cd0 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Oct 2017 17:13:40 +0200 Subject: [PATCH 045/260] drm/i915: Do not rely on wm preservation for ILK watermarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original intent was to preserve watermarks as much as possible in intel_pipe_wm.raw_wm, and put the validated ones in intel_pipe_wm.wm. It seems this approach is insufficient and we don't always preserve the raw watermarks, so just use the atomic iterator we're already using to get a const pointer to all bound planes on the crtc. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102373 Signed-off-by: Maarten Lankhorst Cc: stable@vger.kernel.org #v4.8+ Acked-by: Ville Syrjälä Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20171019151341.4579-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 1 - drivers/gpu/drm/i915/intel_pm.c | 51 +++++++++++++------------------- 2 files changed, 21 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3b4eafd39f55..df966427232c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -501,7 +501,6 @@ struct intel_crtc_scaler_state { struct intel_pipe_wm { struct intel_wm_level wm[5]; - struct intel_wm_level raw_wm[5]; uint32_t linetime; bool fbc_wm_enabled; bool pipe_enabled; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 742d5455b201..e7926973112e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2721,9 +2721,9 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, const struct intel_crtc *intel_crtc, int level, struct intel_crtc_state *cstate, - struct intel_plane_state *pristate, - struct intel_plane_state *sprstate, - struct intel_plane_state *curstate, + const struct intel_plane_state *pristate, + const struct intel_plane_state *sprstate, + const struct intel_plane_state *curstate, struct intel_wm_level *result) { uint16_t pri_latency = dev_priv->wm.pri_latency[level]; @@ -3043,28 +3043,24 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) struct intel_pipe_wm *pipe_wm; struct drm_device *dev = state->dev; const struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane; - struct intel_plane_state *pristate = NULL; - struct intel_plane_state *sprstate = NULL; - struct intel_plane_state *curstate = NULL; + struct drm_plane *plane; + const struct drm_plane_state *plane_state; + const struct intel_plane_state *pristate = NULL; + const struct intel_plane_state *sprstate = NULL; + const struct intel_plane_state *curstate = NULL; int level, max_level = ilk_wm_max_level(dev_priv), usable_level; struct ilk_wm_maximums max; pipe_wm = &cstate->wm.ilk.optimal; - for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { - struct intel_plane_state *ps; + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) { + const struct intel_plane_state *ps = to_intel_plane_state(plane_state); - ps = intel_atomic_get_existing_plane_state(state, - intel_plane); - if (!ps) - continue; - - if (intel_plane->base.type == DRM_PLANE_TYPE_PRIMARY) + if (plane->type == DRM_PLANE_TYPE_PRIMARY) pristate = ps; - else if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) + else if (plane->type == DRM_PLANE_TYPE_OVERLAY) sprstate = ps; - else if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR) + else if (plane->type == DRM_PLANE_TYPE_CURSOR) curstate = ps; } @@ -3086,11 +3082,9 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) if (pipe_wm->sprites_scaled) usable_level = 0; - ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, - pristate, sprstate, curstate, &pipe_wm->raw_wm[0]); - memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm)); - pipe_wm->wm[0] = pipe_wm->raw_wm[0]; + ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, + pristate, sprstate, curstate, &pipe_wm->wm[0]); if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) pipe_wm->linetime = hsw_compute_linetime_wm(cstate); @@ -3100,8 +3094,8 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) ilk_compute_wm_reg_maximums(dev_priv, 1, &max); - for (level = 1; level <= max_level; level++) { - struct intel_wm_level *wm = &pipe_wm->raw_wm[level]; + for (level = 1; level <= usable_level; level++) { + struct intel_wm_level *wm = &pipe_wm->wm[level]; ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, pristate, sprstate, curstate, wm); @@ -3111,13 +3105,10 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) * register maximums since such watermarks are * always invalid. */ - if (level > usable_level) - continue; - - if (ilk_validate_wm_level(level, &max, wm)) - pipe_wm->wm[level] = *wm; - else - usable_level = level; + if (!ilk_validate_wm_level(level, &max, wm)) { + memset(wm, 0, sizeof(*wm)); + break; + } } return 0; From b6b178a77210055b153dbc175e4468bd3c7122df Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Oct 2017 17:13:41 +0200 Subject: [PATCH 046/260] drm/i915: Calculate ironlake intermediate watermarks correctly, v2. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The watermarks it should calculate against are the old optimal watermarks. The currently active crtc watermarks are pure fiction, and are invalid in case of a nonblocking modeset, page flip enabling/disabling planes or any other reason. When the crtc is disabled or during a modeset the intermediate watermarks don't need to be programmed separately, and could be directly assigned to the optimal watermarks. Changes since v1: - Use intel_atomic_get_old_crtc_state. (ville) Signed-off-by: Maarten Lankhorst Reviewed-by: Ville Syrjälä Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20171019151341.4579-2-maarten.lankhorst@linux.intel.com [mlankhorst: Add cc stable and bugzilla link, since previous patch doesn't fix issue by itself] Cc: stable@vger.kernel.org #v4.8+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102373 --- drivers/gpu/drm/i915/intel_pm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e7926973112e..07118c0b69d3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3124,7 +3124,11 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, struct intel_crtc_state *newstate) { struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate; - struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk; + struct intel_atomic_state *intel_state = + to_intel_atomic_state(newstate->base.state); + const struct intel_crtc_state *oldstate = + intel_atomic_get_old_crtc_state(intel_state, intel_crtc); + const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal; int level, max_level = ilk_wm_max_level(to_i915(dev)); /* @@ -3133,6 +3137,9 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, * and after the vblank. */ *a = newstate->wm.ilk.optimal; + if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base)) + return 0; + a->pipe_enabled |= b->pipe_enabled; a->sprites_enabled |= b->sprites_enabled; a->sprites_scaled |= b->sprites_scaled; From 0501a3b0eb01ac2209ef6fce76153e5d6b07034e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 26 Oct 2017 17:29:31 +0300 Subject: [PATCH 047/260] drm/i915/edp: read edp display control registers unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per my reading of the eDP spec, DP_DPCD_DISPLAY_CONTROL_CAPABLE bit in DP_EDP_CONFIGURATION_CAP should be set if the eDP display control registers starting at offset DP_EDP_DPCD_REV are "enabled". Currently we check the bit before reading the registers, and DP_EDP_DPCD_REV is the only way to detect eDP revision. Turns out there are (likely buggy) displays that require eDP 1.4+ features, such as supported link rates and link rate select, but do not have the bit set. Read the display control registers unconditionally. They are supposed to read zero anyway if they are not supported, so there should be no harm in this. This fixes the referenced bug by enabling the eDP version check, and thus reading of the supported link rates. The panel in question has 0 in DP_MAX_LINK_RATE which is only supported in eDP 1.4+. Without the supported link rates method we default to RBR which is insufficient for the panel native mode. As a curiosity, the panel also has a bogus value of 0x12 in DP_EDP_DPCD_REV, but that passes our check for >= DP_EDP_14 (which is 0x03). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103400 Reported-and-tested-by: Nicolas P. Cc: Ville Syrjälä Cc: stable@vger.kernel.org Reviewed-by: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20171026142932.17737-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index aa75f55eeb61..158438bb0389 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3735,9 +3735,16 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) } - /* Read the eDP Display control capabilities registers */ - if ((intel_dp->dpcd[DP_EDP_CONFIGURATION_CAP] & DP_DPCD_DISPLAY_CONTROL_CAPABLE) && - drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, + /* + * Read the eDP display control registers. + * + * Do this independent of DP_DPCD_DISPLAY_CONTROL_CAPABLE bit in + * DP_EDP_CONFIGURATION_CAP, because some buggy displays do not have it + * set, but require eDP 1.4+ detection (e.g. for supported link rates + * method). The display control registers should read zero if they're + * not supported anyway. + */ + if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) == sizeof(intel_dp->edp_dpcd)) DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd), From e6ed2a1b99051bc04abcc2daed3972efada0b7a9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 26 Oct 2017 17:29:32 +0300 Subject: [PATCH 048/260] drm/i915/edp: clean up code and comments around eDP DPCD read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some minor drive-by cleanups. Reviewed-by: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20171026142932.17737-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 158438bb0389..73b1ed8cff6b 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3747,11 +3747,11 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) == sizeof(intel_dp->edp_dpcd)) - DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd), + DRM_DEBUG_KMS("eDP DPCD: %*ph\n", (int) sizeof(intel_dp->edp_dpcd), intel_dp->edp_dpcd); - /* Intermediate frequency support */ - if (intel_dp->edp_dpcd[0] >= 0x03) { /* eDp v1.4 or higher */ + /* Read the eDP 1.4+ supported link rates. */ + if (intel_dp->edp_dpcd[0] >= DP_EDP_14) { __le16 sink_rates[DP_MAX_SUPPORTED_RATES]; int i; @@ -3775,6 +3775,10 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) intel_dp->num_sink_rates = i; } + /* + * Use DP_LINK_RATE_SET if DP_SUPPORTED_LINK_RATES are available, + * default to DP_MAX_LINK_RATE and DP_LINK_BW_SET otherwise. + */ if (intel_dp->num_sink_rates) intel_dp->use_rate_select = true; else From 11caf5517c2c9eebd435c60e9343a3784d74e9a1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Oct 2017 10:43:11 +0100 Subject: [PATCH 049/260] drm/i915: Empty the ring before disabling An interesting snippet from Sandybridge's prm: "Although a Ring Buffer can be enabled in the non-empty state, it must not be disabled unless it is empty. Attempting to disable a Ring Buffer in the non-empty state is UNDEFINED." Let's avoid the undefined behaviour as we disable the rings prior to reset and resume. v2: Tell HEAD to catch up to TAIL (empty ring) first, then reset both to 0 (supposedly while stopped). Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171027094311.30380-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++++- drivers/gpu/drm/i915/intel_uncore.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 05e01446b00b..47fadf8da84e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -480,10 +480,14 @@ static bool stop_ring(struct intel_engine_cs *engine) } } - I915_WRITE_CTL(engine, 0); + I915_WRITE_HEAD(engine, I915_READ_TAIL(engine)); + I915_WRITE_HEAD(engine, 0); I915_WRITE_TAIL(engine, 0); + /* The ring must be empty before it is disabled */ + I915_WRITE_CTL(engine, 0); + return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0; } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 20e3c65c0999..96ee6b2754be 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1387,10 +1387,14 @@ static void gen3_stop_engine(struct intel_engine_cs *engine) DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name); - I915_WRITE_FW(RING_CTL(base), 0); + I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base))); + I915_WRITE_FW(RING_HEAD(base), 0); I915_WRITE_FW(RING_TAIL(base), 0); + /* The ring must be empty before it is disabled */ + I915_WRITE_FW(RING_CTL(base), 0); + /* Check acts as a post */ if (I915_READ_FW(RING_HEAD(base)) != 0) DRM_DEBUG_DRIVER("%s: ring head not parked\n", From bea6e987c1ff358224e7bef7084be7650f5d1c38 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 26 Oct 2017 14:00:31 +0100 Subject: [PATCH 050/260] drm/i915: Hold rcu_read_lock when iterating over the radixtree (objects) Kasan spotted [IGT] gem_tiled_pread_pwrite: exiting, ret=0 ================================================================== BUG: KASAN: use-after-free in __i915_gem_object_reset_page_iter+0x15c/0x170 [i915] Read of size 8 at addr ffff8801359da310 by task kworker/3:2/182 CPU: 3 PID: 182 Comm: kworker/3:2 Tainted: G U 4.14.0-rc6-CI-Custom_3340+ #1 Hardware name: Intel Corp. Geminilake/GLK RVP1 DDR4 (05), BIOS GELKRVPA.X64.0062.B30.1708222146 08/22/2017 Workqueue: events __i915_gem_free_work [i915] Call Trace: dump_stack+0x68/0xa0 print_address_description+0x78/0x290 ? __i915_gem_object_reset_page_iter+0x15c/0x170 [i915] kasan_report+0x23d/0x350 __asan_report_load8_noabort+0x19/0x20 __i915_gem_object_reset_page_iter+0x15c/0x170 [i915] ? i915_gem_object_truncate+0x100/0x100 [i915] ? lock_acquire+0x380/0x380 __i915_gem_object_put_pages+0x30d/0x530 [i915] __i915_gem_free_objects+0x551/0xbd0 [i915] ? lock_acquire+0x13e/0x380 __i915_gem_free_work+0x4e/0x70 [i915] process_one_work+0x6f6/0x1590 ? pwq_dec_nr_in_flight+0x2b0/0x2b0 worker_thread+0xe6/0xe90 ? pci_mmcfg_check_reserved+0x110/0x110 kthread+0x309/0x410 ? process_one_work+0x1590/0x1590 ? kthread_create_on_node+0xb0/0xb0 ret_from_fork+0x27/0x40 Allocated by task 1801: save_stack_trace+0x1b/0x20 kasan_kmalloc+0xee/0x190 kasan_slab_alloc+0x12/0x20 kmem_cache_alloc+0xdc/0x2e0 radix_tree_node_alloc.constprop.12+0x48/0x330 __radix_tree_create+0x274/0x480 __radix_tree_insert+0xa2/0x610 i915_gem_object_get_sg+0x224/0x670 [i915] i915_gem_object_get_page+0xb5/0x1c0 [i915] i915_gem_pread_ioctl+0x822/0xf60 [i915] drm_ioctl_kernel+0x13f/0x1c0 drm_ioctl+0x6cf/0x980 do_vfs_ioctl+0x184/0xf30 SyS_ioctl+0x41/0x70 entry_SYSCALL_64_fastpath+0x1c/0xb1 Freed by task 37: save_stack_trace+0x1b/0x20 kasan_slab_free+0xaf/0x190 kmem_cache_free+0xbf/0x340 radix_tree_node_rcu_free+0x79/0x90 rcu_process_callbacks+0x46d/0xf40 __do_softirq+0x21c/0x8d3 The buggy address belongs to the object at ffff8801359da0f0 which belongs to the cache radix_tree_node of size 576 The buggy address is located 544 bytes inside of 576-byte region [ffff8801359da0f0, ffff8801359da330) The buggy address belongs to the page: page:ffffea0004d67600 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x8000000000008100(slab|head) raw: 8000000000008100 0000000000000000 0000000000000000 0000000100110011 raw: ffffea0004b52920 ffffea0004b38020 ffff88015b416a80 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801359da200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801359da280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8801359da300: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc ^ ffff8801359da380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8801359da400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Disabling lock debugging due to kernel taint which looks like the slab containing the radixtree iter was freed as we traversed the tree, taking the rcu read lock across the loop should prevent that (deferring all the frees until the end). Reported-by: Tomi Sarvela Fixes: 96d776345277 ("drm/i915: Use a radixtree for random access to the object's backing storage") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171026130032.10677-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c2506fb3a483..e43688f77817 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2267,8 +2267,10 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) struct radix_tree_iter iter; void __rcu **slot; + rcu_read_lock(); radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) radix_tree_delete(&obj->mm.get_page.radix, iter.index); + rcu_read_unlock(); } void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, From 547da76b5777859f98bb78e6b57f19463f803c04 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 26 Oct 2017 14:00:32 +0100 Subject: [PATCH 051/260] drm/i915: Hold rcu_read_lock when iterating over the radixtree (vma idr) Kasan spotted [IGT] gem_tiled_pread_pwrite: exiting, ret=0 ================================================================== BUG: KASAN: use-after-free in __i915_gem_object_reset_page_iter+0x15c/0x170 [i915] Read of size 8 at addr ffff8801359da310 by task kworker/3:2/182 CPU: 3 PID: 182 Comm: kworker/3:2 Tainted: G U 4.14.0-rc6-CI-Custom_3340+ #1 Hardware name: Intel Corp. Geminilake/GLK RVP1 DDR4 (05), BIOS GELKRVPA.X64.0062.B30.1708222146 08/22/2017 Workqueue: events __i915_gem_free_work [i915] Call Trace: dump_stack+0x68/0xa0 print_address_description+0x78/0x290 ? __i915_gem_object_reset_page_iter+0x15c/0x170 [i915] kasan_report+0x23d/0x350 __asan_report_load8_noabort+0x19/0x20 __i915_gem_object_reset_page_iter+0x15c/0x170 [i915] ? i915_gem_object_truncate+0x100/0x100 [i915] ? lock_acquire+0x380/0x380 __i915_gem_object_put_pages+0x30d/0x530 [i915] __i915_gem_free_objects+0x551/0xbd0 [i915] ? lock_acquire+0x13e/0x380 __i915_gem_free_work+0x4e/0x70 [i915] process_one_work+0x6f6/0x1590 ? pwq_dec_nr_in_flight+0x2b0/0x2b0 worker_thread+0xe6/0xe90 ? pci_mmcfg_check_reserved+0x110/0x110 kthread+0x309/0x410 ? process_one_work+0x1590/0x1590 ? kthread_create_on_node+0xb0/0xb0 ret_from_fork+0x27/0x40 Allocated by task 1801: save_stack_trace+0x1b/0x20 kasan_kmalloc+0xee/0x190 kasan_slab_alloc+0x12/0x20 kmem_cache_alloc+0xdc/0x2e0 radix_tree_node_alloc.constprop.12+0x48/0x330 __radix_tree_create+0x274/0x480 __radix_tree_insert+0xa2/0x610 i915_gem_object_get_sg+0x224/0x670 [i915] i915_gem_object_get_page+0xb5/0x1c0 [i915] i915_gem_pread_ioctl+0x822/0xf60 [i915] drm_ioctl_kernel+0x13f/0x1c0 drm_ioctl+0x6cf/0x980 do_vfs_ioctl+0x184/0xf30 SyS_ioctl+0x41/0x70 entry_SYSCALL_64_fastpath+0x1c/0xb1 Freed by task 37: save_stack_trace+0x1b/0x20 kasan_slab_free+0xaf/0x190 kmem_cache_free+0xbf/0x340 radix_tree_node_rcu_free+0x79/0x90 rcu_process_callbacks+0x46d/0xf40 __do_softirq+0x21c/0x8d3 The buggy address belongs to the object at ffff8801359da0f0 which belongs to the cache radix_tree_node of size 576 The buggy address is located 544 bytes inside of 576-byte region [ffff8801359da0f0, ffff8801359da330) The buggy address belongs to the page: page:ffffea0004d67600 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x8000000000008100(slab|head) raw: 8000000000008100 0000000000000000 0000000000000000 0000000100110011 raw: ffffea0004b52920 ffffea0004b38020 ffff88015b416a80 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801359da200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801359da280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8801359da300: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc ^ ffff8801359da380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8801359da400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Disabling lock debugging due to kernel taint which looks like the slab containing the radixtree iter was freed as we traversed the tree, taking the rcu read lock across the loop should prevent that (deferring all the frees until the end). Reported-by: Tomi Sarvela Fixes: d1b48c1e7184 ("drm/i915: Replace execbuf vma ht with an idr") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171026130032.10677-2-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_context.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 4f26f80b1b3e..10affb35ac56 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -104,6 +104,7 @@ static void lut_close(struct i915_gem_context *ctx) kmem_cache_free(ctx->i915->luts, lut); } + rcu_read_lock(); radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) { struct i915_vma *vma = rcu_dereference_raw(*slot); struct drm_i915_gem_object *obj = vma->obj; @@ -115,6 +116,7 @@ static void lut_close(struct i915_gem_context *ctx) __i915_gem_object_release_unless_active(obj); } + rcu_read_unlock(); } static void i915_gem_context_free(struct i915_gem_context *ctx) From 5448f53ffe62c75e2cf84d407510efadaa1aff8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 19 Oct 2017 16:37:12 +0300 Subject: [PATCH 052/260] drm/i915: Don't use encoder->type in intel_ddi_set_pipe_settings() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit encoder->type isn't reliable for DP/HDMI so instead extract the correct type from the crtc state in intel_ddi_set_pipe_settings(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171019133721.11794-2-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_ddi.c | 47 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 28c25cb9eb2c..c23d55c3344c 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1504,33 +1504,34 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - int type = encoder->type; - uint32_t temp; + u32 temp; - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP || type == INTEL_OUTPUT_DP_MST) { - WARN_ON(transcoder_is_dsi(cpu_transcoder)); + if (!intel_crtc_has_dp_encoder(crtc_state)) + return; - temp = TRANS_MSA_SYNC_CLK; - switch (crtc_state->pipe_bpp) { - case 18: - temp |= TRANS_MSA_6_BPC; - break; - case 24: - temp |= TRANS_MSA_8_BPC; - break; - case 30: - temp |= TRANS_MSA_10_BPC; - break; - case 36: - temp |= TRANS_MSA_12_BPC; - break; - default: - BUG(); - } - I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); + WARN_ON(transcoder_is_dsi(cpu_transcoder)); + + temp = TRANS_MSA_SYNC_CLK; + switch (crtc_state->pipe_bpp) { + case 18: + temp |= TRANS_MSA_6_BPC; + break; + case 24: + temp |= TRANS_MSA_8_BPC; + break; + case 30: + temp |= TRANS_MSA_10_BPC; + break; + case 36: + temp |= TRANS_MSA_12_BPC; + break; + default: + MISSING_CASE(crtc_state->pipe_bpp); + break; } + + I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); } void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, From 3a6d84e677d4e5a6366437b7007e6a2f97aba9d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 19 Oct 2017 16:37:13 +0300 Subject: [PATCH 053/260] drm/i915: Pass crtc state to intel_prepare_dp_ddi_buffers() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminate intel_prepare_dp_ddi_buffers()'s reliance on the encoder->type by passing in the crtc state. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171019133721.11794-3-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_ddi.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index c23d55c3344c..059582495496 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -811,7 +811,8 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por * values in advance. This function programs the correct values for * DP/eDP/FDI use cases. */ -static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) +static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; @@ -819,23 +820,15 @@ static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) enum port port = intel_ddi_get_encoder_port(encoder); const struct ddi_buf_trans *ddi_translations; - switch (encoder->type) { - case INTEL_OUTPUT_EDP: - ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, port, - &n_entries); - break; - case INTEL_OUTPUT_DP: - ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, port, - &n_entries); - break; - case INTEL_OUTPUT_ANALOG: + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) ddi_translations = intel_ddi_get_buf_trans_fdi(dev_priv, &n_entries); - break; - default: - MISSING_CASE(encoder->type); - return; - } + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) + ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, port, + &n_entries); + else + ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, port, + &n_entries); /* If we're boosting the current, set bit 31 of trans1 */ if (IS_GEN9_BC(dev_priv) && @@ -937,7 +930,7 @@ void hsw_fdi_link_train(struct intel_crtc *crtc, for_each_encoder_on_crtc(dev, &crtc->base, encoder) { WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG); - intel_prepare_dp_ddi_buffers(encoder); + intel_prepare_dp_ddi_buffers(encoder, crtc_state); } /* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the @@ -2201,7 +2194,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(encoder, level, encoder->type); else - intel_prepare_dp_ddi_buffers(encoder); + intel_prepare_dp_ddi_buffers(encoder, crtc_state); intel_ddi_init_dp_buf_reg(encoder); if (!is_mst) From f49b44ab84035b0f9c1808770684432bb6804328 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 19 Oct 2017 16:37:14 +0300 Subject: [PATCH 054/260] drm/i915: Start using output_types for DPLL selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit encoder->type is not realiable for DP/HDMI so let's switch the DPLL selection over to using output_types. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171019133721.11794-4-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 36 ++++++++++----------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 897fffe1ecd8..a83bf1c38e05 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -813,15 +813,11 @@ hsw_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, memset(&crtc_state->dpll_hw_state, 0, sizeof(crtc_state->dpll_hw_state)); - if (encoder->type == INTEL_OUTPUT_HDMI) { + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { pll = hsw_ddi_hdmi_get_dpll(clock, crtc, crtc_state); - - } else if (encoder->type == INTEL_OUTPUT_DP || - encoder->type == INTEL_OUTPUT_DP_MST || - encoder->type == INTEL_OUTPUT_EDP) { + } else if (intel_crtc_has_dp_encoder(crtc_state)) { pll = hsw_ddi_dp_get_dpll(encoder, clock); - - } else if (encoder->type == INTEL_OUTPUT_ANALOG) { + } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) { if (WARN_ON(crtc_state->port_clock / 2 != 135000)) return NULL; @@ -1369,15 +1365,13 @@ skl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, memset(&dpll_hw_state, 0, sizeof(dpll_hw_state)); - if (encoder->type == INTEL_OUTPUT_HDMI) { + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { bret = skl_ddi_hdmi_pll_dividers(crtc, crtc_state, clock); if (!bret) { DRM_DEBUG_KMS("Could not get HDMI pll dividers.\n"); return NULL; } - } else if (encoder->type == INTEL_OUTPUT_DP || - encoder->type == INTEL_OUTPUT_DP_MST || - encoder->type == INTEL_OUTPUT_EDP) { + } else if (intel_crtc_has_dp_encoder(crtc_state)) { bret = skl_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state); if (!bret) { DRM_DEBUG_KMS("Could not set DP dpll HW state.\n"); @@ -1388,7 +1382,7 @@ skl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, return NULL; } - if (encoder->type == INTEL_OUTPUT_EDP) + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) pll = intel_find_shared_dpll(crtc, crtc_state, DPLL_ID_SKL_DPLL0, DPLL_ID_SKL_DPLL0); @@ -1812,14 +1806,12 @@ bxt_get_dpll(struct intel_crtc *crtc, struct intel_shared_dpll *pll; int i, clock = crtc_state->port_clock; - if (encoder->type == INTEL_OUTPUT_HDMI && + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) && !bxt_ddi_hdmi_set_dpll_hw_state(crtc, crtc_state, clock, &dpll_hw_state)) return NULL; - if ((encoder->type == INTEL_OUTPUT_DP || - encoder->type == INTEL_OUTPUT_EDP || - encoder->type == INTEL_OUTPUT_DP_MST) && + if (intel_crtc_has_dp_encoder(crtc_state) && !bxt_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state)) return NULL; @@ -1828,7 +1820,7 @@ bxt_get_dpll(struct intel_crtc *crtc, crtc_state->dpll_hw_state = dpll_hw_state; - if (encoder->type == INTEL_OUTPUT_DP_MST) { + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); intel_dig_port = intel_mst->primary; @@ -2345,15 +2337,13 @@ cnl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, memset(&dpll_hw_state, 0, sizeof(dpll_hw_state)); - if (encoder->type == INTEL_OUTPUT_HDMI) { + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { bret = cnl_ddi_hdmi_pll_dividers(crtc, crtc_state, clock); if (!bret) { DRM_DEBUG_KMS("Could not get HDMI pll dividers.\n"); return NULL; } - } else if (encoder->type == INTEL_OUTPUT_DP || - encoder->type == INTEL_OUTPUT_DP_MST || - encoder->type == INTEL_OUTPUT_EDP) { + } else if (intel_crtc_has_dp_encoder(crtc_state)) { bret = cnl_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state); if (!bret) { DRM_DEBUG_KMS("Could not set DP dpll HW state.\n"); @@ -2361,8 +2351,8 @@ cnl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, } crtc_state->dpll_hw_state = dpll_hw_state; } else { - DRM_DEBUG_KMS("Skip DPLL setup for encoder %d\n", - encoder->type); + DRM_DEBUG_KMS("Skip DPLL setup for output_types 0x%x\n", + crtc_state->output_types); return NULL; } From 742745f1ee4f5a36e5cc5e8b360b519df45efa89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 19 Oct 2017 16:37:15 +0300 Subject: [PATCH 055/260] drm/i915: Stop using encoder->type in intel_ddi_enable_transcoder_func() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_ddi_enable_transcoder_func() already has the crtc state so we can use that instead of the untrustworthy encoder->type. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171019133721.11794-5-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_ddi.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 059582495496..c721775687e7 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1550,7 +1550,6 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) enum pipe pipe = crtc->pipe; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; enum port port = intel_ddi_get_encoder_port(encoder); - int type = encoder->type; uint32_t temp; /* Enable TRANS_DDI_FUNC_CTL for the pipe to work in HDMI mode */ @@ -1605,7 +1604,7 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) } } - if (type == INTEL_OUTPUT_HDMI) { + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { if (crtc_state->has_hdmi_sink) temp |= TRANS_DDI_MODE_SELECT_HDMI; else @@ -1615,19 +1614,15 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) temp |= TRANS_DDI_HDMI_SCRAMBLING_MASK; if (crtc_state->hdmi_high_tmds_clock_ratio) temp |= TRANS_DDI_HIGH_TMDS_CHAR_RATE; - } else if (type == INTEL_OUTPUT_ANALOG) { + } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) { temp |= TRANS_DDI_MODE_SELECT_FDI; temp |= (crtc_state->fdi_lanes - 1) << 1; - } else if (type == INTEL_OUTPUT_DP || - type == INTEL_OUTPUT_EDP) { - temp |= TRANS_DDI_MODE_SELECT_DP_SST; - temp |= DDI_PORT_WIDTH(crtc_state->lane_count); - } else if (type == INTEL_OUTPUT_DP_MST) { + } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { temp |= TRANS_DDI_MODE_SELECT_DP_MST; temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } else { - WARN(1, "Invalid encoder type %d for pipe %c\n", - encoder->type, pipe_name(pipe)); + temp |= TRANS_DDI_MODE_SELECT_DP_SST; + temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); From 5161d058dff4d53c78a046350d64beff54e9a9f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Oct 2017 16:43:48 +0300 Subject: [PATCH 056/260] drm/i915: Fix BXT lane latency optimal setting with MST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call the DDI .pre_pll_enable() hook from the MST code so that BXT gets the correct lane latency optimal setting applied. And we obviously need to compute the correct value, and read it out to keep the state checker happy. While at it drop the useless 'encoder' parameter to bxt_ddi_phy_calc_lane_lat_optim_mask() Cc: Maarten Lankhorst Cc: Imre Deak Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171027134348.31190-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/i915_drv.h | 3 +-- drivers/gpu/drm/i915/intel_ddi.c | 3 +-- drivers/gpu/drm/i915/intel_dp_mst.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/i915/intel_dpio_phy.c | 3 +-- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 61c155cbf9d7..c0a716e596ba 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4179,8 +4179,7 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, enum dpio_phy phy); bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, enum dpio_phy phy); -uint8_t bxt_ddi_phy_calc_lane_lat_optim_mask(struct intel_encoder *encoder, - uint8_t lane_count); +uint8_t bxt_ddi_phy_calc_lane_lat_optim_mask(uint8_t lane_count); void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, uint8_t lane_lat_optim_mask); uint8_t bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index c721775687e7..9c118e5305f7 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2662,8 +2662,7 @@ static bool intel_ddi_compute_config(struct intel_encoder *encoder, if (IS_GEN9_LP(dev_priv) && ret) pipe_config->lane_lat_optim_mask = - bxt_ddi_phy_calc_lane_lat_optim_mask(encoder, - pipe_config->lane_count); + bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 3d62c63c0763..c34ffa959e90 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -88,6 +88,10 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->dp_m_n.tu = slots; + if (IS_GEN9_LP(dev_priv)) + pipe_config->lane_lat_optim_mask = + bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); + intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); return true; @@ -182,6 +186,20 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); } +static void intel_mst_pre_pll_enable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_digital_port *intel_dig_port = intel_mst->primary; + struct intel_dp *intel_dp = &intel_dig_port->dp; + + if (intel_dp->active_mst_links == 0 && + intel_dig_port->base.pre_pll_enable) + intel_dig_port->base.pre_pll_enable(&intel_dig_port->base, + pipe_config, NULL); +} + static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) @@ -311,6 +329,10 @@ static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder, intel_ddi_clock_get(&intel_dig_port->base, pipe_config); + if (IS_GEN9_LP(dev_priv)) + pipe_config->lane_lat_optim_mask = + bxt_ddi_phy_get_lane_lat_optim_mask(encoder); + intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); } @@ -582,6 +604,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->compute_config = intel_dp_mst_compute_config; intel_encoder->disable = intel_mst_disable_dp; intel_encoder->post_disable = intel_mst_post_disable_dp; + intel_encoder->pre_pll_enable = intel_mst_pre_pll_enable_dp; intel_encoder->pre_enable = intel_mst_pre_enable_dp; intel_encoder->enable = intel_mst_enable_dp; intel_encoder->get_hw_state = intel_dp_mst_enc_get_hw_state; diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index de38d014ed39..63b76eac018f 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -567,8 +567,7 @@ bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, } uint8_t -bxt_ddi_phy_calc_lane_lat_optim_mask(struct intel_encoder *encoder, - uint8_t lane_count) +bxt_ddi_phy_calc_lane_lat_optim_mask(uint8_t lane_count) { switch (lane_count) { case 1: From 857c416e58dc306b15b7919117b6225544662bfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Oct 2017 12:45:23 +0300 Subject: [PATCH 057/260] drm/i915: Improve DP downstream HPD handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DP dongles may signal downstream HPD via short HPD pulses. Setting the sink to DPMS off apparently kills the downstream HPD (at least on my DP->VGA dongle), so skip the DPMS off for such dongles when we turn off the port. v2: Deal with DDI as well by moving the check into intel_dp_sink_dpms() (Dhinakaran) Cc: Dhinakaran Pandiyan Cc: David Woodhouse Cc: Imre Deak Cc: Pablo Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103472 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99114 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171027094523.9317-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 73b1ed8cff6b..8d25a019b772 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2505,6 +2505,21 @@ static void ironlake_edp_pll_off(struct intel_dp *intel_dp) udelay(200); } +static bool downstream_hpd_needs_d0(struct intel_dp *intel_dp) +{ + /* + * DPCD 1.2+ should support BRANCH_DEVICE_CTRL, and thus + * be capable of signalling downstream hpd with a long pulse. + * Whether or not that means D3 is safe to use is not clear, + * but let's assume so until proven otherwise. + * + * FIXME should really check all downstream ports... + */ + return intel_dp->dpcd[DP_DPCD_REV] == 0x11 && + intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_PRESENT && + intel_dp->downstream_ports[0] & DP_DS_PORT_HPD; +} + /* If the sink supports it, try to set the power state appropriately */ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode) { @@ -2515,6 +2530,9 @@ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode) return; if (mode != DRM_MODE_DPMS_ON) { + if (downstream_hpd_needs_d0(intel_dp)) + return; + ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, DP_SET_POWER_D3); } else { From be33be5df2a0d4b534ace0aaad65cba8144bfce8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 26 Oct 2017 18:14:04 +0300 Subject: [PATCH 058/260] drm/i915: Clean up the mess around hdmi_12bpc_possible() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the crtc state related 12bpc checks into hdmi_12bpc_possible() since that one already examines other parts of the crtc state. Note that we can drop the !force_dvi check since crtc_state->has_hdmi_sink already accounts for that. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171026151405.30710-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_hdmi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 5132dc814788..aa486b8925cf 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1336,6 +1336,12 @@ static bool hdmi_12bpc_possible(const struct intel_crtc_state *crtc_state) if (HAS_GMCH_DISPLAY(dev_priv)) return false; + if (crtc_state->pipe_bpp <= 8*3) + return false; + + if (!crtc_state->has_hdmi_sink) + return false; + /* * HDMI 12bpc affects the clocks, so it's only possible * when not cloning with other encoder types. @@ -1461,9 +1467,8 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, * outputs. We also need to check that the higher clock still fits * within limits. */ - if (pipe_config->pipe_bpp > 8*3 && pipe_config->has_hdmi_sink && !force_dvi && - hdmi_port_clock_valid(intel_hdmi, clock_12bpc, true, force_dvi) == MODE_OK && - hdmi_12bpc_possible(pipe_config)) { + if (hdmi_12bpc_possible(pipe_config) && + hdmi_port_clock_valid(intel_hdmi, clock_12bpc, true, force_dvi) == MODE_OK) { DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n"); desired_bpp = 12*3; From 6e8fbf8d19e48ca3b451233c8939b493a51d030d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Oct 2017 23:17:38 +0300 Subject: [PATCH 059/260] drm/i915/vbt: Fix HDMI level shifter and max data rate bitfield sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HDMI level shifter value should be 5 bits and the max data rate 3 bits. Cc: Jani Nikula Reported-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171027201738.3640-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/intel_vbt_defs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/intel_vbt_defs.h index f225c288a121..3c3c421e2e43 100644 --- a/drivers/gpu/drm/i915/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/intel_vbt_defs.h @@ -342,8 +342,8 @@ struct child_device_config { u8 i2c_speed; u8 dp_onboard_redriver; /* 158 */ u8 dp_ondock_redriver; /* 158 */ - u8 hdmi_level_shifter_value:4; /* 169 */ - u8 hdmi_max_data_rate:4; /* 204 */ + u8 hdmi_level_shifter_value:5; /* 169 */ + u8 hdmi_max_data_rate:3; /* 204 */ u16 dtd_buf_ptr; /* 161 */ u8 edidless_efp:1; /* 161 */ u8 compression_enable:1; /* 198 */ From d6038611aa3d7d8080a8ae7f3122779fb4779a13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 30 Oct 2017 16:57:02 +0200 Subject: [PATCH 060/260] drm/i915: Parse max HDMI TMDS clock from VBT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from version 204 VBT can specify the max TMDS clock we are allowed to use with HDMI ports. Parse that information and take it into account when filtering modes and computing a crtc state. Also take the opportunity to sort the platform check if ladder from new to old. v2: Add defines for the values into intel_vbt_defs.h (Jani) Don't fall back to 0 silently for unknown values (Jani) Skip the debug print for the 0 case (Jani) Cc: Jani Nikula Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171030145702.23662-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_bios.c | 24 +++++++++++++++++++++ drivers/gpu/drm/i915/intel_hdmi.c | 30 ++++++++++++++++++--------- drivers/gpu/drm/i915/intel_vbt_defs.h | 4 ++++ 4 files changed, 50 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c0a716e596ba..4a7325c4189c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1698,6 +1698,8 @@ enum modeset_restore { #define DDC_PIN_D 0x06 struct ddi_vbt_port_info { + int max_tmds_clock; + /* * This is an index in the HDMI/DVI DDI buffer translation table. * The special value HDMI_LEVEL_SHIFT_UNKNOWN means the VBT didn't diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 5e122673d32a..5c016b1f0f24 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1227,6 +1227,30 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, info->hdmi_level_shift = hdmi_level_shift; } + if (bdb_version >= 204) { + int max_tmds_clock; + + switch (child->hdmi_max_data_rate) { + default: + MISSING_CASE(child->hdmi_max_data_rate); + /* fall through */ + case HDMI_MAX_DATA_RATE_PLATFORM: + max_tmds_clock = 0; + break; + case HDMI_MAX_DATA_RATE_297: + max_tmds_clock = 297000; + break; + case HDMI_MAX_DATA_RATE_165: + max_tmds_clock = 165000; + break; + } + + if (max_tmds_clock) + DRM_DEBUG_KMS("VBT HDMI max TMDS clock for port %c: %d kHz\n", + port_name(port), max_tmds_clock); + info->max_tmds_clock = max_tmds_clock; + } + /* Parse the I_boost config for SKL and above */ if (bdb_version >= 196 && child->iboost) { info->dp_boost_level = translate_iboost(child->dp_iboost_level); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index aa486b8925cf..38fe24565b4d 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1224,24 +1224,34 @@ static void pch_post_disable_hdmi(struct intel_encoder *encoder, intel_disable_hdmi(encoder, old_crtc_state, old_conn_state); } -static int intel_hdmi_source_max_tmds_clock(struct drm_i915_private *dev_priv) +static int intel_hdmi_source_max_tmds_clock(struct intel_encoder *encoder) { - if (IS_G4X(dev_priv)) - return 165000; - else if (IS_GEMINILAKE(dev_priv)) - return 594000; - else if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8) - return 300000; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + const struct ddi_vbt_port_info *info = + &dev_priv->vbt.ddi_port_info[encoder->port]; + int max_tmds_clock; + + if (IS_GEMINILAKE(dev_priv)) + max_tmds_clock = 594000; + else if (INTEL_GEN(dev_priv) >= 8 || IS_HASWELL(dev_priv)) + max_tmds_clock = 300000; + else if (INTEL_GEN(dev_priv) >= 5) + max_tmds_clock = 225000; else - return 225000; + max_tmds_clock = 165000; + + if (info->max_tmds_clock) + max_tmds_clock = min(max_tmds_clock, info->max_tmds_clock); + + return max_tmds_clock; } static int hdmi_port_clock_limit(struct intel_hdmi *hdmi, bool respect_downstream_limits, bool force_dvi) { - struct drm_device *dev = intel_hdmi_to_dev(hdmi); - int max_tmds_clock = intel_hdmi_source_max_tmds_clock(to_i915(dev)); + struct intel_encoder *encoder = &hdmi_to_dig_port(hdmi)->base; + int max_tmds_clock = intel_hdmi_source_max_tmds_clock(encoder); if (respect_downstream_limits) { struct intel_connector *connector = hdmi->attached_connector; diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/intel_vbt_defs.h index 3c3c421e2e43..e3d7745a9151 100644 --- a/drivers/gpu/drm/i915/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/intel_vbt_defs.h @@ -304,6 +304,10 @@ struct bdb_general_features { #define DVO_PORT_MIPIC 23 /* 171 */ #define DVO_PORT_MIPID 24 /* 171 */ +#define HDMI_MAX_DATA_RATE_PLATFORM 0 /* 204 */ +#define HDMI_MAX_DATA_RATE_297 1 /* 204 */ +#define HDMI_MAX_DATA_RATE_165 2 /* 204 */ + #define LEGACY_CHILD_DEVICE_CONFIG_SIZE 33 /* DDC Bus DDI Type 155+ */ From e1214b95ed83c0bb903fe368c9a408dffd80491d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Oct 2017 22:31:23 +0300 Subject: [PATCH 061/260] drm/i915: Populate output_types from .get_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than having the caller of .get_config() set output_types based on encoder->type, let's just have .get_config() itself populate output_types. This way we are isolated from encoder->type, which won't be useable for this purpose anyway soon (at least for DDI encoders). Cc: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171027193128.14483-2-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_crt.c | 2 ++ drivers/gpu/drm/i915/intel_ddi.c | 11 +++++++++++ drivers/gpu/drm/i915/intel_display.c | 5 +---- drivers/gpu/drm/i915/intel_dp.c | 5 +++++ drivers/gpu/drm/i915/intel_dp_mst.c | 2 ++ drivers/gpu/drm/i915/intel_dsi.c | 2 ++ drivers/gpu/drm/i915/intel_dvo.c | 2 ++ drivers/gpu/drm/i915/intel_hdmi.c | 2 ++ drivers/gpu/drm/i915/intel_lvds.c | 2 ++ drivers/gpu/drm/i915/intel_sdvo.c | 2 ++ drivers/gpu/drm/i915/intel_tv.c | 2 ++ 11 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 437339f5d098..9c000ac612da 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -119,6 +119,8 @@ static unsigned int intel_crt_get_flags(struct intel_encoder *encoder) static void intel_crt_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { + pipe_config->output_types |= BIT(INTEL_OUTPUT_ANALOG); + pipe_config->base.adjusted_mode.flags |= intel_crt_get_flags(encoder); pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 9c118e5305f7..7e0b1a02912a 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2595,12 +2595,23 @@ void intel_ddi_get_config(struct intel_encoder *encoder, pipe_config->hdmi_high_tmds_clock_ratio = true; /* fall through */ case TRANS_DDI_MODE_SELECT_DVI: + pipe_config->output_types |= BIT(INTEL_OUTPUT_HDMI); pipe_config->lane_count = 4; break; case TRANS_DDI_MODE_SELECT_FDI: + pipe_config->output_types |= BIT(INTEL_OUTPUT_ANALOG); break; case TRANS_DDI_MODE_SELECT_DP_SST: + if (encoder->type == INTEL_OUTPUT_EDP) + pipe_config->output_types |= BIT(INTEL_OUTPUT_EDP); + else + pipe_config->output_types |= BIT(INTEL_OUTPUT_DP); + pipe_config->lane_count = + ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; + intel_dp_get_m_n(intel_crtc, pipe_config); + break; case TRANS_DDI_MODE_SELECT_DP_MST: + pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST); pipe_config->lane_count = ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; intel_dp_get_m_n(intel_crtc, pipe_config); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a369b35d044d..8f769e9b9342 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11562,10 +11562,8 @@ verify_crtc_state(struct drm_crtc *crtc, "Encoder connected to wrong pipe %c\n", pipe_name(pipe)); - if (active) { - pipe_config->output_types |= 1 << encoder->type; + if (active) encoder->get_config(encoder, pipe_config); - } } intel_crtc_compute_pixel_rate(pipe_config); @@ -14960,7 +14958,6 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) crtc_state = to_intel_crtc_state(crtc->base.state); encoder->base.crtc = &crtc->base; - crtc_state->output_types |= 1 << encoder->type; encoder->get_config(encoder, crtc_state); } else { encoder->base.crtc = NULL; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 8d25a019b772..30688a5d680d 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2621,6 +2621,11 @@ static void intel_dp_get_config(struct intel_encoder *encoder, enum port port = dp_to_dig_port(intel_dp)->port; struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + if (encoder->type == INTEL_OUTPUT_EDP) + pipe_config->output_types |= BIT(INTEL_OUTPUT_EDP); + else + pipe_config->output_types |= BIT(INTEL_OUTPUT_DP); + tmp = I915_READ(intel_dp->output_reg); pipe_config->has_audio = tmp & DP_AUDIO_OUTPUT_ENABLE && port != PORT_A; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index c34ffa959e90..210ad0580a66 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -291,6 +291,8 @@ static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder, enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; u32 temp, flags = 0; + pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST); + pipe_config->has_audio = intel_ddi_is_audio_enabled(dev_priv, crtc); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 83f15848098a..2bff7ab25bf3 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1243,6 +1243,8 @@ static void intel_dsi_get_config(struct intel_encoder *encoder, u32 pclk; DRM_DEBUG_KMS("\n"); + pipe_config->output_types |= BIT(INTEL_OUTPUT_DSI); + if (IS_GEN9_LP(dev_priv)) bxt_dsi_get_pipe_config(encoder, pipe_config); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 53c9b763f4ce..754baa00bea9 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -159,6 +159,8 @@ static void intel_dvo_get_config(struct intel_encoder *encoder, struct intel_dvo *intel_dvo = enc_to_dvo(encoder); u32 tmp, flags = 0; + pipe_config->output_types |= BIT(INTEL_OUTPUT_DVO); + tmp = I915_READ(intel_dvo->dev.dvo_reg); if (tmp & DVO_HSYNC_ACTIVE_HIGH) flags |= DRM_MODE_FLAG_PHSYNC; diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 38fe24565b4d..0ec30b4a7dce 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -957,6 +957,8 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder, u32 tmp, flags = 0; int dotclock; + pipe_config->output_types |= BIT(INTEL_OUTPUT_HDMI); + tmp = I915_READ(intel_hdmi->hdmi_reg); if (tmp & SDVO_HSYNC_ACTIVE_HIGH) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 38572d65e46e..ef80499113ee 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -125,6 +125,8 @@ static void intel_lvds_get_config(struct intel_encoder *encoder, struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); u32 tmp, flags = 0; + pipe_config->output_types |= BIT(INTEL_OUTPUT_LVDS); + tmp = I915_READ(lvds_encoder->reg); if (tmp & LVDS_HSYNC_POLARITY) flags |= DRM_MODE_FLAG_NHSYNC; diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 7437944b388f..42ec2d1f7a61 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1429,6 +1429,8 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder, u8 val; bool ret; + pipe_config->output_types |= BIT(INTEL_OUTPUT_SDVO); + sdvox = I915_READ(intel_sdvo->sdvo_reg); ret = intel_sdvo_get_input_timing(intel_sdvo, &dtd); diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index a79a7591b2cf..b18609cebe03 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -868,6 +868,8 @@ static void intel_tv_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { + pipe_config->output_types |= BIT(INTEL_OUTPUT_TVOUT); + pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; } From 7e732cacb1ae27b2eb6902cabd93e9da086c54f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Oct 2017 22:31:24 +0300 Subject: [PATCH 062/260] drm/i915: Stop frobbing with DDI encoder->type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the DDI encoder->type will change at runtime depending on what kind of hotplugs we've processed. That's quite bad since we can't really trust that that current value of encoder->type actually matches the type of signal we're trying to drive through it. Let's eliminate that problem by declaring that non-eDP DDI port will always have the encoder type as INTEL_OUTPUT_DDI. This means the code can no longer try to distinguish DP vs. HDMI based on encoder->type. We'll leave eDP as INTEL_OUTPUT_EDP, since it'll never change and there's a bunch of code that relies on that value to identify eDP encoders. We'll introduce a new encoder .compute_output_type() hook. This allows us to compute the full output_types before any encoder .compute_config() hooks get called, thus those hooks can rely on output_types being correct, which is useful for cloning on oldr platforms. For now we'll just look at the connector type and pick the correct mode based on that. In the future the new hook could be used to implement dynamic switching between LS and PCON modes for LSPCON. v2: Fix BXT/GLK PPS explosion with DSI/MST encoders v3: Avoid the PPS warn on pure HDMI/DVI DDI encoders by checking dp.output_reg v4: Rebase v5: Populate output_types in .get_config() rather than in the caller v5: Split out populating output_types in .get_config() (Maarten) Cc: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171027193128.14483-3-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/intel_ddi.c | 32 ++++++++++++++++++++------- drivers/gpu/drm/i915/intel_display.c | 11 ++++++--- drivers/gpu/drm/i915/intel_dp.c | 19 +++++----------- drivers/gpu/drm/i915/intel_drv.h | 7 ++++-- drivers/gpu/drm/i915/intel_hdmi.c | 10 ++------- drivers/gpu/drm/i915/intel_opregion.c | 2 +- 7 files changed, 47 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c1e5bba91bff..39883cd915db 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3051,7 +3051,7 @@ static void intel_connector_info(struct seq_file *m, break; case DRM_MODE_CONNECTOR_HDMIA: if (intel_encoder->type == INTEL_OUTPUT_HDMI || - intel_encoder->type == INTEL_OUTPUT_UNKNOWN) + intel_encoder->type == INTEL_OUTPUT_DDI) intel_hdmi_info(m, intel_connector); break; default: diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 7e0b1a02912a..e5dd281781fe 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -497,10 +497,8 @@ enum port intel_ddi_get_encoder_port(struct intel_encoder *encoder) switch (encoder->type) { case INTEL_OUTPUT_DP_MST: return enc_to_mst(&encoder->base)->primary->port; - case INTEL_OUTPUT_DP: case INTEL_OUTPUT_EDP: - case INTEL_OUTPUT_HDMI: - case INTEL_OUTPUT_UNKNOWN: + case INTEL_OUTPUT_DDI: return enc_to_dig_port(&encoder->base)->port; case INTEL_OUTPUT_ANALOG: return PORT_E; @@ -1534,6 +1532,7 @@ void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; uint32_t temp; + temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (state == true) temp |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC; @@ -2652,21 +2651,36 @@ void intel_ddi_get_config(struct intel_encoder *encoder, intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); } +static enum intel_output_type +intel_ddi_compute_output_type(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + switch (conn_state->connector->connector_type) { + case DRM_MODE_CONNECTOR_HDMIA: + return INTEL_OUTPUT_HDMI; + case DRM_MODE_CONNECTOR_eDP: + return INTEL_OUTPUT_EDP; + case DRM_MODE_CONNECTOR_DisplayPort: + return INTEL_OUTPUT_DP; + default: + MISSING_CASE(conn_state->connector->connector_type); + return INTEL_OUTPUT_UNUSED; + } +} + static bool intel_ddi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - int type = encoder->type; int port = intel_ddi_get_encoder_port(encoder); int ret; - WARN(type == INTEL_OUTPUT_UNKNOWN, "compute_config() on unknown output!\n"); - if (port == PORT_A) pipe_config->cpu_transcoder = TRANSCODER_EDP; - if (type == INTEL_OUTPUT_HDMI) + if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state); else ret = intel_dp_compute_config(encoder, pipe_config, conn_state); @@ -2815,6 +2829,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs, DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port)); + intel_encoder->compute_output_type = intel_ddi_compute_output_type; intel_encoder->compute_config = intel_ddi_compute_config; intel_encoder->enable = intel_enable_ddi; if (IS_GEN9_LP(dev_priv)) @@ -2868,9 +2883,10 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) max_lanes = 4; } + intel_dig_port->dp.output_reg = INVALID_MMIO_REG; intel_dig_port->max_lanes = max_lanes; - intel_encoder->type = INTEL_OUTPUT_UNKNOWN; + intel_encoder->type = INTEL_OUTPUT_DDI; intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8f769e9b9342..737de251d0f8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10579,7 +10579,7 @@ static const char * const output_type_str[] = { OUTPUT_TYPE(DP), OUTPUT_TYPE(EDP), OUTPUT_TYPE(DSI), - OUTPUT_TYPE(UNKNOWN), + OUTPUT_TYPE(DDI), OUTPUT_TYPE(DP_MST), }; @@ -10750,7 +10750,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) switch (encoder->type) { unsigned int port_mask; - case INTEL_OUTPUT_UNKNOWN: + case INTEL_OUTPUT_DDI: if (WARN_ON(!HAS_DDI(to_i915(dev)))) break; case INTEL_OUTPUT_DP: @@ -10883,7 +10883,12 @@ intel_modeset_pipe_config(struct drm_crtc *crtc, * Determine output_types before calling the .compute_config() * hooks so that the hooks can use this information safely. */ - pipe_config->output_types |= 1 << encoder->type; + if (encoder->compute_output_type) + pipe_config->output_types |= + BIT(encoder->compute_output_type(encoder, pipe_config, + connector_state)); + else + pipe_config->output_types |= BIT(encoder->type); } encoder_retry: diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 30688a5d680d..f0c49962ffbe 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -758,11 +758,16 @@ void intel_power_sequencer_reset(struct drm_i915_private *dev_priv) struct intel_dp *intel_dp; if (encoder->type != INTEL_OUTPUT_DP && - encoder->type != INTEL_OUTPUT_EDP) + encoder->type != INTEL_OUTPUT_EDP && + encoder->type != INTEL_OUTPUT_DDI) continue; intel_dp = enc_to_intel_dp(&encoder->base); + /* Skip pure DVI/HDMI DDI encoders */ + if (!i915_mmio_reg_valid(intel_dp->output_reg)) + continue; + WARN_ON(intel_dp->active_pipe != INVALID_PIPE); if (encoder->type != INTEL_OUTPUT_EDP) @@ -4733,8 +4738,6 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) { struct drm_connector *connector = &intel_connector->base; struct intel_dp *intel_dp = intel_attached_dp(connector); - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = connector->dev; enum drm_connector_status status; u8 sink_irq_vector = 0; @@ -4767,9 +4770,6 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) goto out; } - if (intel_encoder->type != INTEL_OUTPUT_EDP) - intel_encoder->type = INTEL_OUTPUT_DP; - if (intel_dp->reset_link_params) { /* Initial max link lane count */ intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp); @@ -4886,9 +4886,6 @@ intel_dp_force(struct drm_connector *connector) intel_dp_set_edid(intel_dp); intel_display_power_put(dev_priv, intel_dp->aux_power_domain); - - if (intel_encoder->type != INTEL_OUTPUT_EDP) - intel_encoder->type = INTEL_OUTPUT_DP; } static int intel_dp_get_modes(struct drm_connector *connector) @@ -5107,10 +5104,6 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) struct drm_i915_private *dev_priv = to_i915(dev); enum irqreturn ret = IRQ_NONE; - if (intel_dig_port->base.type != INTEL_OUTPUT_EDP && - intel_dig_port->base.type != INTEL_OUTPUT_HDMI) - intel_dig_port->base.type = INTEL_OUTPUT_DP; - if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) { /* * vdd off can generate a long pulse on eDP which diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index df966427232c..629ebc73bb09 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -173,7 +173,7 @@ enum intel_output_type { INTEL_OUTPUT_DP = 7, INTEL_OUTPUT_EDP = 8, INTEL_OUTPUT_DSI = 9, - INTEL_OUTPUT_UNKNOWN = 10, + INTEL_OUTPUT_DDI = 10, INTEL_OUTPUT_DP_MST = 11, }; @@ -216,6 +216,9 @@ struct intel_encoder { enum port port; unsigned int cloneable; void (*hot_plug)(struct intel_encoder *); + enum intel_output_type (*compute_output_type)(struct intel_encoder *, + struct intel_crtc_state *, + struct drm_connector_state *); bool (*compute_config)(struct intel_encoder *, struct intel_crtc_state *, struct drm_connector_state *); @@ -1152,7 +1155,7 @@ enc_to_dig_port(struct drm_encoder *encoder) struct intel_encoder *intel_encoder = to_intel_encoder(encoder); switch (intel_encoder->type) { - case INTEL_OUTPUT_UNKNOWN: + case INTEL_OUTPUT_DDI: WARN_ON(!HAS_DDI(to_i915(encoder->dev))); case INTEL_OUTPUT_DP: case INTEL_OUTPUT_EDP: diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 0ec30b4a7dce..fe46abfeba26 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1627,12 +1627,9 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) intel_hdmi_unset_edid(connector); - if (intel_hdmi_set_edid(connector)) { - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); - - hdmi_to_dig_port(intel_hdmi)->base.type = INTEL_OUTPUT_HDMI; + if (intel_hdmi_set_edid(connector)) status = connector_status_connected; - } else + else status = connector_status_disconnected; intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); @@ -1643,8 +1640,6 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) static void intel_hdmi_force(struct drm_connector *connector) { - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); - DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -1654,7 +1649,6 @@ intel_hdmi_force(struct drm_connector *connector) return; intel_hdmi_set_edid(connector); - hdmi_to_dig_port(intel_hdmi)->base.type = INTEL_OUTPUT_HDMI; } static int intel_hdmi_get_modes(struct drm_connector *connector) diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index 1d946240e55f..39714be3eb6b 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -383,7 +383,7 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, case INTEL_OUTPUT_ANALOG: type = DISPLAY_TYPE_CRT; break; - case INTEL_OUTPUT_UNKNOWN: + case INTEL_OUTPUT_DDI: case INTEL_OUTPUT_DP: case INTEL_OUTPUT_HDMI: case INTEL_OUTPUT_DP_MST: From 0fce04c8764bd0d6ef9b4488460a5a880afb1c73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Oct 2017 22:31:25 +0300 Subject: [PATCH 063/260] drm/i915: Nuke intel_ddi_get_encoder_port() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit encoder->port works for FDI, and it also works for MST (regardless of whether we're dealing with the "fake" MST encoder, or mst->primary). So let's eliminate intel_ddi_get_encoder_port(). Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171027193128.14483-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 50 +++++++++------------------ drivers/gpu/drm/i915/intel_drv.h | 1 - drivers/gpu/drm/i915/intel_opregion.c | 2 +- 3 files changed, 18 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index e5dd281781fe..aaaca906c97f 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -492,22 +492,6 @@ static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_1_05V[] = { { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ }; -enum port intel_ddi_get_encoder_port(struct intel_encoder *encoder) -{ - switch (encoder->type) { - case INTEL_OUTPUT_DP_MST: - return enc_to_mst(&encoder->base)->primary->port; - case INTEL_OUTPUT_EDP: - case INTEL_OUTPUT_DDI: - return enc_to_dig_port(&encoder->base)->port; - case INTEL_OUTPUT_ANALOG: - return PORT_E; - default: - MISSING_CASE(encoder->type); - return PORT_A; - } -} - static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { @@ -815,7 +799,7 @@ static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; int i, n_entries; - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; const struct ddi_buf_trans *ddi_translations; if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) @@ -852,7 +836,7 @@ static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; int n_entries; - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; const struct ddi_buf_trans *ddi_translations; ddi_translations = intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); @@ -1468,7 +1452,7 @@ static void bxt_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; enum intel_dpll_id pll_id = port; pipe_config->port_clock = bxt_calc_pll_link(dev_priv, pll_id); @@ -1548,7 +1532,7 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; uint32_t temp; /* Enable TRANS_DDI_FUNC_CTL for the pipe to work in HDMI mode */ @@ -1644,7 +1628,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_encoder *encoder = intel_connector->encoder; int type = intel_connector->base.connector_type; - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; enum pipe pipe = 0; enum transcoder cpu_transcoder; uint32_t tmp; @@ -1703,7 +1687,7 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; u32 tmp; int i; bool ret; @@ -1788,7 +1772,7 @@ void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (cpu_transcoder != TRANSCODER_EDP) @@ -1927,8 +1911,8 @@ static void cnl_ddi_vswing_program(struct intel_encoder *encoder, int level, enum intel_output_type type) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_ddi_get_encoder_port(encoder); const struct cnl_ddi_buf_trans *ddi_translations; + enum port port = encoder->port; int n_entries, ln; u32 val; @@ -1991,7 +1975,7 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, int level, enum intel_output_type type) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; int width, rate, ln; u32 val; @@ -2110,7 +2094,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, const struct intel_shared_dpll *pll) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; uint32_t val; if (WARN_ON(!pll)) @@ -2149,7 +2133,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, static void intel_ddi_clk_disable(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; if (IS_CANNONLAKE(dev_priv)) I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | @@ -2167,7 +2151,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); int level = intel_ddi_dp_level(intel_dp); @@ -2205,7 +2189,7 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; int level = intel_ddi_hdmi_level(dev_priv, port); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); @@ -2250,7 +2234,7 @@ static void intel_ddi_pre_enable(struct intel_encoder *encoder, static void intel_disable_ddi_buf(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; bool wait = false; u32 val; @@ -2379,7 +2363,7 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; if (port == PORT_A && INTEL_GEN(dev_priv) < 9) intel_dp_stop_link_train(intel_dp); @@ -2398,7 +2382,7 @@ static void intel_enable_ddi_hdmi(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; intel_hdmi_handle_sink_scrambling(encoder, conn_state->connector, @@ -2674,7 +2658,7 @@ static bool intel_ddi_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - int port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; int ret; if (port == PORT_A) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 629ebc73bb09..3828d9bee7f5 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1279,7 +1279,6 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, void hsw_fdi_link_train(struct intel_crtc *crtc, const struct intel_crtc_state *crtc_state); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); -enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state); void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index 39714be3eb6b..fc65f5e451dd 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -367,7 +367,7 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, if (intel_encoder->type == INTEL_OUTPUT_DSI) port = 0; else - port = intel_ddi_get_encoder_port(intel_encoder); + port = intel_encoder->port; if (port == PORT_E) { port = 0; From bb911536f07e5ed9147e3acf55a2cd72dffff70d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Oct 2017 22:31:26 +0300 Subject: [PATCH 064/260] drm/i915: Eliminate pll->state usage from bxt_calc_pll_link() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should be using the DPLL hw state we got from the current crtc state to determine the corresponding port clock frequency rather than getting it via the current state programmed into the DPLL. v2: Rebase due to intel_dpll_id changes Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171027193128.14483-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index aaaca906c97f..8183304c7d34 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1423,19 +1423,16 @@ static void hsw_ddi_clock_get(struct intel_encoder *encoder, ddi_dotclock_get(pipe_config); } -static int bxt_calc_pll_link(struct drm_i915_private *dev_priv, - enum intel_dpll_id pll_id) +static int bxt_calc_pll_link(struct intel_crtc_state *crtc_state) { - struct intel_shared_dpll *pll; struct intel_dpll_hw_state *state; struct dpll clock; /* For DDI ports we always use a shared PLL. */ - if (WARN_ON(pll_id == DPLL_ID_PRIVATE)) + if (WARN_ON(!crtc_state->shared_dpll)) return 0; - pll = &dev_priv->shared_dplls[pll_id]; - state = &pll->state.hw_state; + state = &crtc_state->dpll_hw_state; clock.m1 = 2; clock.m2 = (state->pll0 & PORT_PLL_M2_MASK) << 22; @@ -1449,13 +1446,9 @@ static int bxt_calc_pll_link(struct drm_i915_private *dev_priv, } static void bxt_ddi_clock_get(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; - enum intel_dpll_id pll_id = port; - - pipe_config->port_clock = bxt_calc_pll_link(dev_priv, pll_id); + pipe_config->port_clock = bxt_calc_pll_link(pipe_config); ddi_dotclock_get(pipe_config); } From 1939ba51fd058b869c06aa084c0b9ae75bc721c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Oct 2017 22:31:27 +0300 Subject: [PATCH 065/260] drm/i915: Pass a crtc state to ddi post_disable from MST code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass an old crtc state to intel_ddi_post_disable() from the MST code. Note that this crtc state won't necessaitly match the one that was passed to intel_ddi_pre_enable() if the first stream to be enabled isn't the last stream to be disabled. But this is fine since the states should be identical in every important way. This does mean people frobbing the DDI pre_enable/post_disable hooks have to pay attention in what parts of the state they consult. The alternative would be to inline the relevant code into the MST code. That is actually what we used to do for pre_enable before commit e081c8463ac9 ("drm/i915: Remove duplicate DDI enabling logic from MST path"). For post_disable we've always called the DDI hook. v2: Pimp up the comments explaining the MST issues Cc: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171027193128.14483-6-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_ddi.c | 37 ++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_dp_mst.c | 6 ++--- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 8183304c7d34..7a6806d77678 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2214,6 +2214,19 @@ static void intel_ddi_pre_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; + /* + * When called from DP MST code: + * - conn_state will be NULL + * - encoder will be the main encoder (ie. mst->primary) + * - the main connector associated with this port + * won't be active or linked to a crtc + * - crtc_state will be the state of the first stream to + * be activated on this port, and it may not be the same + * stream that will be deactivated last, but each stream + * should have a state that is identical when it comes to + * the DP link parameteres + */ + WARN_ON(crtc_state->has_pch_encoder); intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); @@ -2254,12 +2267,7 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); struct intel_dp *intel_dp = &dig_port->dp; - /* - * old_crtc_state and old_conn_state are NULL when called from - * DP_MST. The main connector associated with this port is never - * bound to a crtc for MST. - */ - bool is_mst = !old_crtc_state; + bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST); /* * Power down sink before disabling the port, otherwise we end @@ -2303,12 +2311,19 @@ static void intel_ddi_post_disable(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { /* - * old_crtc_state and old_conn_state are NULL when called from - * DP_MST. The main connector associated with this port is never - * bound to a crtc for MST. + * When called from DP MST code: + * - old_conn_state will be NULL + * - encoder will be the main encoder (ie. mst->primary) + * - the main connector associated with this port + * won't be active or linked to a crtc + * - old_crtc_state will be the state of the last stream to + * be deactivated on this port, and it may not be the same + * stream that was activated last, but each stream + * should have a state that is identical when it comes to + * the DP link parameteres */ - if (old_crtc_state && - intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) + + if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) intel_ddi_post_disable_hdmi(encoder, old_crtc_state, old_conn_state); else diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 210ad0580a66..d523302e5081 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -179,10 +179,10 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, intel_dp->active_mst_links--; intel_mst->connector = NULL; - if (intel_dp->active_mst_links == 0) { + if (intel_dp->active_mst_links == 0) intel_dig_port->base.post_disable(&intel_dig_port->base, - NULL, NULL); - } + old_crtc_state, NULL); + DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); } From 35686a44e4b87f45ebd426a7e2b074a54486844f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Oct 2017 22:31:28 +0300 Subject: [PATCH 066/260] drm/i915: Use intel_ddi_get_config() for MST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminate the partially duplicated DDI readout code from MST, and instead just call intel_ddi_get_config(). As a nice bonus we get more cross checking as intel_ddi_get_config() will populate output_types based on the actual mode of the DDI port. Additonally intel_ddi_get_config() must be changed to get the crtc from the passed in crtc state rather than from the encoder->crtc link. encoder->crtc really shouldn't be used anyway. v2: Rebased on BXT MST latency_optim fix Make intel_ddi_clock_get() static Cc: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171027193128.14483-7-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_ddi.c | 6 ++-- drivers/gpu/drm/i915/intel_dp_mst.c | 50 +---------------------------- drivers/gpu/drm/i915/intel_drv.h | 2 -- 3 files changed, 4 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 7a6806d77678..eb8f4e3bb85f 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1453,8 +1453,8 @@ static void bxt_ddi_clock_get(struct intel_encoder *encoder, ddi_dotclock_get(pipe_config); } -void intel_ddi_clock_get(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) +static void intel_ddi_clock_get(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -2533,7 +2533,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; struct intel_digital_port *intel_dig_port; u32 temp, flags = 0; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index d523302e5081..6f11bb35f66f 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -286,56 +286,8 @@ static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder, { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; - u32 temp, flags = 0; - pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST); - - pipe_config->has_audio = - intel_ddi_is_audio_enabled(dev_priv, crtc); - - temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); - if (temp & TRANS_DDI_PHSYNC) - flags |= DRM_MODE_FLAG_PHSYNC; - else - flags |= DRM_MODE_FLAG_NHSYNC; - if (temp & TRANS_DDI_PVSYNC) - flags |= DRM_MODE_FLAG_PVSYNC; - else - flags |= DRM_MODE_FLAG_NVSYNC; - - switch (temp & TRANS_DDI_BPC_MASK) { - case TRANS_DDI_BPC_6: - pipe_config->pipe_bpp = 18; - break; - case TRANS_DDI_BPC_8: - pipe_config->pipe_bpp = 24; - break; - case TRANS_DDI_BPC_10: - pipe_config->pipe_bpp = 30; - break; - case TRANS_DDI_BPC_12: - pipe_config->pipe_bpp = 36; - break; - default: - break; - } - pipe_config->base.adjusted_mode.flags |= flags; - - pipe_config->lane_count = - ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; - - intel_dp_get_m_n(crtc, pipe_config); - - intel_ddi_clock_get(&intel_dig_port->base, pipe_config); - - if (IS_GEN9_LP(dev_priv)) - pipe_config->lane_lat_optim_mask = - bxt_ddi_phy_get_lane_lat_optim_mask(encoder); - - intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); + intel_ddi_get_config(&intel_dig_port->base, pipe_config); } static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3828d9bee7f5..4498d743cebc 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1295,8 +1295,6 @@ bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); -void intel_ddi_clock_get(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, bool state); void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, From 4ab09d0ec3c81aceaebf8967efe111ba69a406d8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Oct 2017 17:29:27 +0000 Subject: [PATCH 067/260] drm/i915: Replace "cc-option -Wno-foo" with "cc-disable-warning foo" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To quote kbuild/makefiles.txt: cc-disable-warning checks if gcc supports a given warning and returns the commandline switch to disable it. This special function is needed, because gcc 4.4 and later accept any unknown -Wno-* option and only warn about it if there is another warning in the source file. This is exactly what we were trying to achieve with cc-option -Wno-foo and failed miserably. Reported-by: kbuild-all@01.org Fixes: 39bf4de89ff7 ("drm/i915: Add -Wall -Wextra to our build, set warnings to full") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20171030172927.18158-1-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 7750be8e27a6..1bbc5440db40 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -12,15 +12,15 @@ # will most likely get a sudden build breakage... Hopefully we will fix # new warnings before CI updates! subdir-ccflags-y := -Wall -Wextra -subdir-ccflags-y += $(call cc-option,-Wno-unused-parameter,) -subdir-ccflags-y += $(call cc-option,-Wno-type-limits,) -subdir-ccflags-y += $(call cc-option,-Wno-missing-field-initializers,) -subdir-ccflags-y += $(call cc-option,-Wno-implicit-fallthrough,) +subdir-ccflags-y += $(call cc-disable-warning, unused-parameter) +subdir-ccflags-y += $(call cc-disable-warning, type-limits) +subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) +subdir-ccflags-y += $(call cc-disable-warning, implicit-fallthrough) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable -CFLAGS_i915_pci.o = $(call cc-option,-Wno-override-init,) -CFLAGS_intel_fbdev.o = $(call cc-option,-Wno-override-init,) +CFLAGS_i915_pci.o = $(call cc-disable-warning, override-init) +CFLAGS_intel_fbdev.o = $(call cc-disable-warning, override-init) subdir-ccflags-y += \ $(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA) From 8ec47de21bfab96790c4202ae8cdb5092ad7ec33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 30 Oct 2017 20:46:53 +0200 Subject: [PATCH 068/260] drm/i915: Pass around crtc and connector states for audio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Explicitly pass the crtc and connector states into the audio code enable/disable hooks, and plumb them all the way down. This gets rid of almost all crtc->config and encoder->crtc uses. The one place where we still use them is i915_audio_component_sync_audio_rate() since that gets called from the audio driver and we don't have explicit states around then. Cc: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171030184654.17429-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 10 +- drivers/gpu/drm/i915/intel_audio.c | 202 +++++++++++++++------------- drivers/gpu/drm/i915/intel_ddi.c | 6 +- drivers/gpu/drm/i915/intel_dp.c | 3 +- drivers/gpu/drm/i915/intel_dp_mst.c | 3 +- drivers/gpu/drm/i915/intel_drv.h | 4 +- drivers/gpu/drm/i915/intel_hdmi.c | 6 +- 7 files changed, 132 insertions(+), 102 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4a7325c4189c..c10fece58e86 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -726,10 +726,12 @@ struct drm_i915_display_funcs { void (*crtc_disable)(struct intel_crtc_state *old_crtc_state, struct drm_atomic_state *old_state); void (*update_crtcs)(struct drm_atomic_state *state); - void (*audio_codec_enable)(struct drm_connector *connector, - struct intel_encoder *encoder, - const struct drm_display_mode *adjusted_mode); - void (*audio_codec_disable)(struct intel_encoder *encoder); + void (*audio_codec_enable)(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); + void (*audio_codec_disable)(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state); void (*fdi_link_train)(struct intel_crtc *crtc, const struct intel_crtc_state *crtc_state); void (*init_clock_gating)(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 0ddba16fde1b..e56520037270 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -102,13 +102,13 @@ static const struct dp_aud_n_m dp_aud_n_m[] = { }; static const struct dp_aud_n_m * -audio_config_dp_get_n_m(struct intel_crtc *intel_crtc, int rate) +audio_config_dp_get_n_m(const struct intel_crtc_state *crtc_state, int rate) { int i; for (i = 0; i < ARRAY_SIZE(dp_aud_n_m); i++) { if (rate == dp_aud_n_m[i].sample_rate && - intel_crtc->config->port_clock == dp_aud_n_m[i].clock) + crtc_state->port_clock == dp_aud_n_m[i].clock) return &dp_aud_n_m[i]; } @@ -157,8 +157,10 @@ static const struct { }; /* get AUD_CONFIG_PIXEL_CLOCK_HDMI_* value for mode */ -static u32 audio_config_hdmi_pixel_clock(const struct drm_display_mode *adjusted_mode) +static u32 audio_config_hdmi_pixel_clock(const struct intel_crtc_state *crtc_state) { + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; int i; for (i = 0; i < ARRAY_SIZE(hdmi_audio_clock); i++) { @@ -179,9 +181,11 @@ static u32 audio_config_hdmi_pixel_clock(const struct drm_display_mode *adjusted return hdmi_audio_clock[i].config; } -static int audio_config_hdmi_get_n(const struct drm_display_mode *adjusted_mode, +static int audio_config_hdmi_get_n(const struct intel_crtc_state *crtc_state, int rate) { + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; int i; for (i = 0; i < ARRAY_SIZE(hdmi_aud_ncts); i++) { @@ -220,7 +224,9 @@ static bool intel_eld_uptodate(struct drm_connector *connector, return true; } -static void g4x_audio_codec_disable(struct intel_encoder *encoder) +static void g4x_audio_codec_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); uint32_t eldv, tmp; @@ -239,11 +245,12 @@ static void g4x_audio_codec_disable(struct intel_encoder *encoder) I915_WRITE(G4X_AUD_CNTL_ST, tmp); } -static void g4x_audio_codec_enable(struct drm_connector *connector, - struct intel_encoder *encoder, - const struct drm_display_mode *adjusted_mode) +static void g4x_audio_codec_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(connector->dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct drm_connector *connector = conn_state->connector; uint8_t *eld = connector->eld; uint32_t eldv; uint32_t tmp; @@ -279,16 +286,20 @@ static void g4x_audio_codec_enable(struct drm_connector *connector, } static void -hsw_dp_audio_config_update(struct intel_crtc *intel_crtc, enum port port, - const struct drm_display_mode *adjusted_mode) +hsw_dp_audio_config_update(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_audio_component *acomp = dev_priv->audio_component; - int rate = acomp ? acomp->aud_sample_rate[port] : 0; - const struct dp_aud_n_m *nm = audio_config_dp_get_n_m(intel_crtc, rate); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum port port = encoder->port; + enum pipe pipe = crtc->pipe; + const struct dp_aud_n_m *nm; + int rate; u32 tmp; + rate = acomp ? acomp->aud_sample_rate[port] : 0; + nm = audio_config_dp_get_n_m(crtc_state, rate); if (nm) DRM_DEBUG_KMS("using Maud %u, Naud %u\n", nm->m, nm->n); else @@ -323,23 +334,26 @@ hsw_dp_audio_config_update(struct intel_crtc *intel_crtc, enum port port, } static void -hsw_hdmi_audio_config_update(struct intel_crtc *intel_crtc, enum port port, - const struct drm_display_mode *adjusted_mode) +hsw_hdmi_audio_config_update(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_audio_component *acomp = dev_priv->audio_component; - int rate = acomp ? acomp->aud_sample_rate[port] : 0; - enum pipe pipe = intel_crtc->pipe; - int n; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum port port = encoder->port; + enum pipe pipe = crtc->pipe; + int n, rate; u32 tmp; + rate = acomp ? acomp->aud_sample_rate[port] : 0; + tmp = I915_READ(HSW_AUD_CFG(pipe)); tmp &= ~AUD_CONFIG_N_VALUE_INDEX; tmp &= ~AUD_CONFIG_PIXEL_CLOCK_HDMI_MASK; tmp &= ~AUD_CONFIG_N_PROG_ENABLE; - tmp |= audio_config_hdmi_pixel_clock(adjusted_mode); + tmp |= audio_config_hdmi_pixel_clock(crtc_state); - n = audio_config_hdmi_get_n(adjusted_mode, rate); + n = audio_config_hdmi_get_n(crtc_state, rate); if (n != 0) { DRM_DEBUG_KMS("using N %d\n", n); @@ -363,20 +377,22 @@ hsw_hdmi_audio_config_update(struct intel_crtc *intel_crtc, enum port port, } static void -hsw_audio_config_update(struct intel_crtc *intel_crtc, enum port port, - const struct drm_display_mode *adjusted_mode) +hsw_audio_config_update(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { - if (intel_crtc_has_dp_encoder(intel_crtc->config)) - hsw_dp_audio_config_update(intel_crtc, port, adjusted_mode); + if (intel_crtc_has_dp_encoder(crtc_state)) + hsw_dp_audio_config_update(encoder, crtc_state); else - hsw_hdmi_audio_config_update(intel_crtc, port, adjusted_mode); + hsw_hdmi_audio_config_update(encoder, crtc_state); } -static void hsw_audio_codec_disable(struct intel_encoder *encoder) +static void hsw_audio_codec_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + enum pipe pipe = crtc->pipe; uint32_t tmp; DRM_DEBUG_KMS("Disable audio codec on pipe %c\n", pipe_name(pipe)); @@ -389,7 +405,7 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder) tmp |= AUD_CONFIG_N_PROG_ENABLE; tmp &= ~AUD_CONFIG_UPPER_N_MASK; tmp &= ~AUD_CONFIG_LOWER_N_MASK; - if (intel_crtc_has_dp_encoder(intel_crtc->config)) + if (intel_crtc_has_dp_encoder(old_crtc_state)) tmp |= AUD_CONFIG_N_VALUE_INDEX; I915_WRITE(HSW_AUD_CFG(pipe), tmp); @@ -402,14 +418,14 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder) mutex_unlock(&dev_priv->av_mutex); } -static void hsw_audio_codec_enable(struct drm_connector *connector, - struct intel_encoder *intel_encoder, - const struct drm_display_mode *adjusted_mode) +static void hsw_audio_codec_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(intel_encoder->base.crtc); - enum pipe pipe = intel_crtc->pipe; - enum port port = intel_encoder->port; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_connector *connector = conn_state->connector; + enum pipe pipe = crtc->pipe; const uint8_t *eld = connector->eld; uint32_t tmp; int len, i; @@ -448,17 +464,19 @@ static void hsw_audio_codec_enable(struct drm_connector *connector, I915_WRITE(HSW_AUD_PIN_ELD_CP_VLD, tmp); /* Enable timestamps */ - hsw_audio_config_update(intel_crtc, port, adjusted_mode); + hsw_audio_config_update(encoder, crtc_state); mutex_unlock(&dev_priv->av_mutex); } -static void ilk_audio_codec_disable(struct intel_encoder *intel_encoder) +static void ilk_audio_codec_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(intel_encoder->base.crtc); - enum pipe pipe = intel_crtc->pipe; - enum port port = intel_encoder->port; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + enum pipe pipe = crtc->pipe; + enum port port = encoder->port; uint32_t tmp, eldv; i915_reg_t aud_config, aud_cntrl_st2; @@ -485,7 +503,7 @@ static void ilk_audio_codec_disable(struct intel_encoder *intel_encoder) tmp |= AUD_CONFIG_N_PROG_ENABLE; tmp &= ~AUD_CONFIG_UPPER_N_MASK; tmp &= ~AUD_CONFIG_LOWER_N_MASK; - if (intel_crtc_has_dp_encoder(intel_crtc->config)) + if (intel_crtc_has_dp_encoder(old_crtc_state)) tmp |= AUD_CONFIG_N_VALUE_INDEX; I915_WRITE(aud_config, tmp); @@ -497,14 +515,15 @@ static void ilk_audio_codec_disable(struct intel_encoder *intel_encoder) I915_WRITE(aud_cntrl_st2, tmp); } -static void ilk_audio_codec_enable(struct drm_connector *connector, - struct intel_encoder *intel_encoder, - const struct drm_display_mode *adjusted_mode) +static void ilk_audio_codec_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(intel_encoder->base.crtc); - enum pipe pipe = intel_crtc->pipe; - enum port port = intel_encoder->port; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_connector *connector = conn_state->connector; + enum pipe pipe = crtc->pipe; + enum port port = encoder->port; uint8_t *eld = connector->eld; uint32_t tmp, eldv; int len, i; @@ -568,36 +587,36 @@ static void ilk_audio_codec_enable(struct drm_connector *connector, tmp &= ~AUD_CONFIG_N_VALUE_INDEX; tmp &= ~AUD_CONFIG_N_PROG_ENABLE; tmp &= ~AUD_CONFIG_PIXEL_CLOCK_HDMI_MASK; - if (intel_crtc_has_dp_encoder(intel_crtc->config)) + if (intel_crtc_has_dp_encoder(crtc_state)) tmp |= AUD_CONFIG_N_VALUE_INDEX; else - tmp |= audio_config_hdmi_pixel_clock(adjusted_mode); + tmp |= audio_config_hdmi_pixel_clock(crtc_state); I915_WRITE(aud_config, tmp); } /** * intel_audio_codec_enable - Enable the audio codec for HD audio - * @intel_encoder: encoder on which to enable audio + * @encoder: encoder on which to enable audio * @crtc_state: pointer to the current crtc state. * @conn_state: pointer to the current connector state. * * The enable sequences may only be performed after enabling the transcoder and * port, and after completed link training. */ -void intel_audio_codec_enable(struct intel_encoder *intel_encoder, +void intel_audio_codec_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; - struct drm_connector *connector; - struct drm_i915_private *dev_priv = to_i915(encoder->dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_audio_component *acomp = dev_priv->audio_component; - enum port port = intel_encoder->port; - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_connector *connector = conn_state->connector; + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; + enum port port = encoder->port; + enum pipe pipe = crtc->pipe; - connector = conn_state->connector; - if (!connector || !connector->eld[0]) + if (!connector->eld[0]) return; DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", @@ -609,19 +628,20 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder, connector->eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2; if (dev_priv->display.audio_codec_enable) - dev_priv->display.audio_codec_enable(connector, intel_encoder, - adjusted_mode); + dev_priv->display.audio_codec_enable(encoder, + crtc_state, + conn_state); mutex_lock(&dev_priv->av_mutex); - intel_encoder->audio_connector = connector; + encoder->audio_connector = connector; /* referred in audio callbacks */ - dev_priv->av_enc_map[pipe] = intel_encoder; + dev_priv->av_enc_map[pipe] = encoder; mutex_unlock(&dev_priv->av_mutex); if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify) { /* audio drivers expect pipe = -1 to indicate Non-MST cases */ - if (intel_encoder->type != INTEL_OUTPUT_DP_MST) + if (encoder->type != INTEL_OUTPUT_DP_MST) pipe = -1; acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr, (int) port, (int) pipe); @@ -629,36 +649,41 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder, intel_lpe_audio_notify(dev_priv, pipe, port, connector->eld, crtc_state->port_clock, - intel_encoder->type == INTEL_OUTPUT_DP); + encoder->type == INTEL_OUTPUT_DP); } /** * intel_audio_codec_disable - Disable the audio codec for HD audio - * @intel_encoder: encoder on which to disable audio + * @encoder: encoder on which to disable audio + * @crtc_state: pointer to the old crtc state. + * @conn_state: pointer to the old connector state. * * The disable sequences must be performed before disabling the transcoder or * port. */ -void intel_audio_codec_disable(struct intel_encoder *intel_encoder) +void intel_audio_codec_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - struct drm_i915_private *dev_priv = to_i915(encoder->dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_audio_component *acomp = dev_priv->audio_component; - enum port port = intel_encoder->port; - struct intel_crtc *crtc = to_intel_crtc(encoder->crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + enum port port = encoder->port; enum pipe pipe = crtc->pipe; if (dev_priv->display.audio_codec_disable) - dev_priv->display.audio_codec_disable(intel_encoder); + dev_priv->display.audio_codec_disable(encoder, + old_crtc_state, + old_conn_state); mutex_lock(&dev_priv->av_mutex); - intel_encoder->audio_connector = NULL; + encoder->audio_connector = NULL; dev_priv->av_enc_map[pipe] = NULL; mutex_unlock(&dev_priv->av_mutex); if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify) { /* audio drivers expect pipe = -1 to indicate Non-MST cases */ - if (intel_encoder->type != INTEL_OUTPUT_DP_MST) + if (encoder->type != INTEL_OUTPUT_DP_MST) pipe = -1; acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr, (int) port, (int) pipe); @@ -793,10 +818,9 @@ static int i915_audio_component_sync_audio_rate(struct device *kdev, int port, int pipe, int rate) { struct drm_i915_private *dev_priv = kdev_to_i915(kdev); - struct intel_encoder *intel_encoder; - struct intel_crtc *crtc; - struct drm_display_mode *adjusted_mode; struct i915_audio_component *acomp = dev_priv->audio_component; + struct intel_encoder *encoder; + struct intel_crtc *crtc; int err = 0; if (!HAS_DDI(dev_priv)) @@ -806,23 +830,19 @@ static int i915_audio_component_sync_audio_rate(struct device *kdev, int port, mutex_lock(&dev_priv->av_mutex); /* 1. get the pipe */ - intel_encoder = get_saved_enc(dev_priv, port, pipe); - if (!intel_encoder || !intel_encoder->base.crtc) { + encoder = get_saved_enc(dev_priv, port, pipe); + if (!encoder || !encoder->base.crtc) { DRM_DEBUG_KMS("Not valid for port %c\n", port_name(port)); err = -ENODEV; goto unlock; } - /* pipe passed from the audio driver will be -1 for Non-MST case */ - crtc = to_intel_crtc(intel_encoder->base.crtc); - pipe = crtc->pipe; - - adjusted_mode = &crtc->config->base.adjusted_mode; + crtc = to_intel_crtc(encoder->base.crtc); /* port must be valid now, otherwise the pipe will be invalid */ acomp->aud_sample_rate[port] = rate; - hsw_audio_config_update(crtc, port, adjusted_mode); + hsw_audio_config_update(encoder, crtc->config); unlock: mutex_unlock(&dev_priv->av_mutex); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index eb8f4e3bb85f..ace674cd79b9 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2425,7 +2425,8 @@ static void intel_disable_ddi_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder); + intel_audio_codec_disable(encoder, + old_crtc_state, old_conn_state); intel_edp_drrs_disable(intel_dp, old_crtc_state); intel_psr_disable(intel_dp, old_crtc_state); @@ -2437,7 +2438,8 @@ static void intel_disable_ddi_hdmi(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder); + intel_audio_codec_disable(encoder, + old_crtc_state, old_conn_state); intel_hdmi_handle_sink_scrambling(encoder, old_conn_state->connector, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index f0c49962ffbe..d27c0145ac91 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2708,7 +2708,8 @@ static void intel_disable_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder); + intel_audio_codec_disable(encoder, + old_crtc_state, old_conn_state); /* Make sure the panel is off before trying to change the mode. But also * ensure that we have vdd while we switch off the panel. */ diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 6f11bb35f66f..653ca39789b8 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -149,7 +149,8 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, DRM_ERROR("failed to update payload %d\n", ret); } if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder); + intel_audio_codec_disable(encoder, + old_crtc_state, old_conn_state); } static void intel_mst_post_disable_dp(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 4498d743cebc..00b488688042 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1311,7 +1311,9 @@ void intel_init_audio_hooks(struct drm_i915_private *dev_priv); void intel_audio_codec_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); -void intel_audio_codec_disable(struct intel_encoder *encoder); +void intel_audio_codec_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state); void i915_audio_component_init(struct drm_i915_private *dev_priv); void i915_audio_component_cleanup(struct drm_i915_private *dev_priv); void intel_audio_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index fe46abfeba26..fa1c793a21ef 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1206,7 +1206,8 @@ static void g4x_disable_hdmi(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder); + intel_audio_codec_disable(encoder, + old_crtc_state, old_conn_state); intel_disable_hdmi(encoder, old_crtc_state, old_conn_state); } @@ -1216,7 +1217,8 @@ static void pch_disable_hdmi(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder); + intel_audio_codec_disable(encoder, + old_crtc_state, old_conn_state); } static void pch_post_disable_hdmi(struct intel_encoder *encoder, From 9f846643c788bbdf4d4b4fe587661705d07de8ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 30 Oct 2017 20:46:54 +0200 Subject: [PATCH 069/260] drm/i915: Remove most encoder->type uses from the audio code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit encoder->type isn't genreally safe around DDI ports, so let's replace some uses in the audio code with the crtc state's output_types instead. Actually in these cases encoder->type would work since the DP SST case is only relevant for VLV/CHV and encoder->type==DP is a thing on those platforms. The DP MST cases would work as well since MST encoder->type==DP_MST always. But I think it's best to try and minimize the encoder->type use in general to avoid showing a bad example to people. Cc: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171030184654.17429-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/intel_audio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index e56520037270..4705194b1992 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -641,7 +641,7 @@ void intel_audio_codec_enable(struct intel_encoder *encoder, if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify) { /* audio drivers expect pipe = -1 to indicate Non-MST cases */ - if (encoder->type != INTEL_OUTPUT_DP_MST) + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) pipe = -1; acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr, (int) port, (int) pipe); @@ -649,7 +649,7 @@ void intel_audio_codec_enable(struct intel_encoder *encoder, intel_lpe_audio_notify(dev_priv, pipe, port, connector->eld, crtc_state->port_clock, - encoder->type == INTEL_OUTPUT_DP); + intel_crtc_has_dp_encoder(crtc_state)); } /** @@ -683,7 +683,7 @@ void intel_audio_codec_disable(struct intel_encoder *encoder, if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify) { /* audio drivers expect pipe = -1 to indicate Non-MST cases */ - if (encoder->type != INTEL_OUTPUT_DP_MST) + if (!intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST)) pipe = -1; acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr, (int) port, (int) pipe); From d9b99ffcb5aa113d175a3df845c72eac0d4624e0 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Fri, 27 Oct 2017 15:32:07 -0700 Subject: [PATCH 070/260] drm/i915/cnl: Remove unnecessary check in cnl_setup_private_ppat There is no need check if PPGTT is disabled because that not possible in CNL. Execlists and GuC submission modes rely on at least aliasing PPGTT and even intel_sanitize_enable_ppgtt says: "We don't allow disabling PPGTT for gen9+ as it's a requirement for execlists, the sole mechanism available to submit work." Suggested-by: Daniele Ceraolo Spurio Cc: Rodrigo Vivi Signed-off-by: Michel Thierry Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171027223207.7869-1-michel.thierry@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 5eaa6893daaa..0684d5df97d9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3178,12 +3178,6 @@ static void cnl_setup_private_ppat(struct intel_ppat *ppat) ppat->match = bdw_private_pat_match; ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3); - /* XXX: spec is unclear if this is still needed for CNL+ */ - if (!USES_PPGTT(ppat->i915)) { - __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC); - return; - } - __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); From 1d033beb20d6d5885587a02a393b6598d766a382 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 31 Oct 2017 10:36:07 +0000 Subject: [PATCH 071/260] drm/i915: Check incoming alignment for unfenced buffers (on i915gm) In case the object has changed tiling between calls to execbuf, we need to check if the existing offset inside the GTT matches the new tiling constraint. We even need to do this for "unfenced" tiled objects, where the 3D commands use an implied fence and so the object still needs to match the physical fence restrictions on alignment (only required for gen2 and early gen3). In commit 2889caa92321 ("drm/i915: Eliminate lots of iterations over the execobjects array"), the idea was to remove the second guessing and only set the NEEDS_MAP flag when required. However, the entire check for an unusable offset for fencing was removed and not just the secondary check. I.e. /* avoid costly ping-pong once a batch bo ended up non-mappable */ if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !i915_vma_is_map_and_fenceable(vma)) return !only_mappable_for_reloc(entry->flags); was entirely removed as the ping-pong between execbuf passes was fixed, but its primary purpose in forcing unaligned unfenced access to be rebound was forgotten. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103502 Fixes: 2889caa92321 ("drm/i915: Eliminate lots of iterations over the execobjects array") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171031103607.17836-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3d7190764f10..5b8213b1a8c7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -343,6 +343,10 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, (vma->node.start + vma->node.size - 1) >> 32) return true; + if (flags & __EXEC_OBJECT_NEEDS_MAP && + !i915_vma_is_map_and_fenceable(vma)) + return true; + return false; } From e5330ac1f50b897d245753828e8887f297f69dd0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 31 Oct 2017 12:22:35 +0000 Subject: [PATCH 072/260] drm/i915: Check that the breadcrumb wasn't disarmed automatically before parking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will disarm the breadcrumb interrupt if we see a user interrupt whilst no one is waiting. This may race with the call to intel_engine_disarm_breadcrumbs() triggering an assert that we aren't trying to do the same job twice. Prevent this by checking that the irq is still armed after flushing the interrupt (for the irq spinlock). Fixes: bcbd5c33a342 ("drm/i915/guc: Always enable the breadcrumbs irq") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20171031122235.1395-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 1bd0a461d665..4de054f8c1ba 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -219,7 +219,8 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) spin_lock(&b->irq_lock); first = fetch_and_zero(&b->irq_wait); - __intel_engine_disarm_breadcrumbs(engine); + if (b->irq_armed) + __intel_engine_disarm_breadcrumbs(engine); spin_unlock(&b->irq_lock); rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { From 680273879d125d644831b8de42c66576e6290378 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Oct 2017 12:06:16 +0100 Subject: [PATCH 073/260] drm/i915: Move parking-while-active warning to intel_engines_park() We will want to break this down to give detailed per-engine warnings as to why we still think we are active as we attempt to park the engines. For the first step, just move the warning verbatim from the idle-worker to intel_engines_park(). Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171027110617.31745-3-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 7 ------- drivers/gpu/drm/i915/intel_engine_cs.c | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e43688f77817..9470ba0c1930 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3352,13 +3352,6 @@ i915_gem_idle_work_handler(struct work_struct *work) */ synchronize_irq(dev_priv->drm.irq); - /* - * We are committed now to parking the engines, make sure there - * will be no more interrupts arriving later. - */ - if (!intel_engines_are_idle(dev_priv)) - DRM_ERROR("Timeout waiting for engines to idle\n"); - intel_engines_park(dev_priv); i915_gem_timelines_mark_idle(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f31f2d6384c3..9767586e2289 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1613,6 +1613,13 @@ void intel_engines_park(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; + /* + * We are committed now to parking the engines, make sure there + * will be no more interrupts arriving later. + */ + if (!intel_engines_are_idle(dev_priv)) + DRM_ERROR("Timeout waiting for engines to idle\n"); + for_each_engine(engine, i915, id) { if (engine->park) engine->park(engine); From 3265124a2d3744d789ede58452ab6f8a9b454be8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Oct 2017 12:06:17 +0100 Subject: [PATCH 074/260] drm/i915: Give more details for the active-when-parking warning for the engines If the we think the engine is still active when we attempt to park it, we want more details -- so dump the engine state. References: https://bugs.freedesktop.org/show_bug.cgi?id=103479 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171027110617.31745-4-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_engine_cs.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 9767586e2289..6895a90af008 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1613,14 +1613,20 @@ void intel_engines_park(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; - /* - * We are committed now to parking the engines, make sure there - * will be no more interrupts arriving later. - */ - if (!intel_engines_are_idle(dev_priv)) - DRM_ERROR("Timeout waiting for engines to idle\n"); - for_each_engine(engine, i915, id) { + /* + * We are committed now to parking the engines, make sure there + * will be no more interrupts arriving later and the engines + * are truly idle. + */ + if (!intel_engine_is_idle(engine)) { + struct drm_printer p = drm_debug_printer(__func__); + + DRM_ERROR("%s is not idle before parking\n", + engine->name); + intel_engine_dump(engine, &p); + } + if (engine->park) engine->park(engine); From 6e7406db8c3a6998e84a605d3e1e04b166e795d1 Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Wed, 1 Nov 2017 12:08:50 +0200 Subject: [PATCH 075/260] drm/i915/cnl: Symmetric scalers for each pipe For Cannonlake the number of scalers for each pipe is 2. Let's increase the number of scalers for pipe C. v2: Use INTEL_GEN() instead of IS_CANNONLAKE() Signed-off-by: Mika Kahola Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1509530930-24960-1-git-send-email-mika.kahola@intel.com --- drivers/gpu/drm/i915/intel_device_info.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 875d428ea75f..db03d179fc85 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -347,7 +347,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) struct intel_device_info *info = mkwrite_device_info(dev_priv); enum pipe pipe; - if (INTEL_GEN(dev_priv) >= 9) { + if (INTEL_GEN(dev_priv) >= 10) { + for_each_pipe(dev_priv, pipe) + info->num_scalers[pipe] = 2; + } else if (INTEL_GEN(dev_priv) == 9) { info->num_scalers[PIPE_A] = 2; info->num_scalers[PIPE_B] = 2; info->num_scalers[PIPE_C] = 1; From 6d5646006f66880574c75c4e142ef370b4503af3 Mon Sep 17 00:00:00 2001 From: Jeff McGee Date: Wed, 1 Nov 2017 15:16:30 -0700 Subject: [PATCH 076/260] drm/i915/guc: Clear terminated attribute bit on GuC preemption context If GuC firmware performs an engine reset while that engine had a preemption pending, it will set the terminated attribute bit on our preemption stage descriptor. GuC firmware retains all pending work items for a high-priority GuC client, unlike the normal-priority GuC client where work items are dropped. It wants to make sure the preempt- to-idle work doesn't run when scheduling resumes, and uses this bit to inform its scheduler and presumably us as well. Our job is to clear it for the next preemption after reset, otherwise that and future preemptions will never complete. We'll just clear it every time. Signed-off-by: Jeff McGee Link: https://patchwork.freedesktop.org/patch/msgid/20171101221630.25086-1-jeff.mcgee@intel.com Reviewed-by: Michel Thierry Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_guc_submission.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 3049a0781b88..d14c1342f09d 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -590,6 +590,7 @@ static void inject_preempt_context(struct work_struct *work) struct intel_guc *guc = container_of(preempt_work, typeof(*guc), preempt_work[engine->id]); struct i915_guc_client *client = guc->preempt_client; + struct guc_stage_desc *stage_desc = __get_stage_desc(client); struct intel_ring *ring = client->owner->engine[engine->id].ring; u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner, engine)); @@ -623,6 +624,20 @@ static void inject_preempt_context(struct work_struct *work) ring->tail / sizeof(u64), 0); spin_unlock_irq(&client->wq_lock); + /* + * If GuC firmware performs an engine reset while that engine had + * a preemption pending, it will set the terminated attribute bit + * on our preemption stage descriptor. GuC firmware retains all + * pending work items for a high-priority GuC client, unlike the + * normal-priority GuC client where work items are dropped. It + * wants to make sure the preempt-to-idle work doesn't run when + * scheduling resumes, and uses this bit to inform its scheduler + * and presumably us as well. Our job is to clear it for the next + * preemption after reset, otherwise that and future preemptions + * will never complete. We'll just clear it every time. + */ + stage_desc->attribute &= ~GUC_STAGE_DESC_ATTR_TERMINATED; + data[0] = INTEL_GUC_ACTION_REQUEST_PREEMPTION; data[1] = client->stage_id; data[2] = INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q | From cb20a3c056b04044c5e92c6d5d4485ba46ab4f72 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Mon, 30 Oct 2017 11:56:14 -0700 Subject: [PATCH 077/260] drm/i915/guc: Rename the function that resets the GuC intel_guc_reset sounds more like the microcontroller is the one performing a reset, while in this case is the opposite. intel_reset_guc not only makes it clearer, it follows the other intel_reset functions available. v2: Print error message in English (Tvrtko). Cc: Tvrtko Ursulin Signed-off-by: Michel Thierry Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171030185616.32836-2-michel.thierry@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_uc.c | 4 ++-- drivers/gpu/drm/i915/intel_uncore.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c10fece58e86..05425bb7565b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3324,7 +3324,7 @@ extern int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags); extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv); -extern int intel_guc_reset(struct drm_i915_private *dev_priv); +extern int intel_reset_guc(struct drm_i915_private *dev_priv); extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); extern void intel_hangcheck_init(struct drm_i915_private *dev_priv); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 25bd162f38d2..aec295470e0d 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -33,9 +33,9 @@ static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv) int ret; u32 guc_status; - ret = intel_guc_reset(dev_priv); + ret = intel_reset_guc(dev_priv); if (ret) { - DRM_ERROR("GuC reset failed, ret = %d\n", ret); + DRM_ERROR("Failed to reset GuC, ret = %d\n", ret); return ret; } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 96ee6b2754be..d629b2e40013 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1803,7 +1803,7 @@ bool intel_has_reset_engine(struct drm_i915_private *dev_priv) i915_modparams.reset >= 2); } -int intel_guc_reset(struct drm_i915_private *dev_priv) +int intel_reset_guc(struct drm_i915_private *dev_priv) { int ret; From 6acbea89efbdc6aef56bc6f364d4d8b72a5fc145 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Tue, 31 Oct 2017 15:53:09 -0700 Subject: [PATCH 078/260] drm/i915/guc: Add support for reset engine using GuC commands This patch adds per engine reset and recovery (TDR) support when GuC is used to submit workloads to GPU. In the case of i915 directly submission to ELSP, driver manages hang detection, recovery and resubmission. With GuC submission these tasks are shared between driver and GuC. i915 is still responsible for detecting a hang, and when it does it only requests GuC to reset that Engine. GuC internally manages acquiring forcewake and idling the engine before resetting it. Once the reset is successful, i915 takes over again and handles the resubmission. The scheduler in i915 knows which requests are pending so after resetting a engine, pending workloads/requests are resubmitted again. v2: s/i915_guc_request_engine_reset/i915_guc_reset_engine/ to match the non-guc function names. v3: Removed debug message about engine restarting from which request, since the new baseline do it regardless of submission mode. (Chris) v4: Rebase. v5: Do not pass unnecessary reporting flags to the fw (Jeff); tasklet_schedule(&execlists->irq_tasklet) handles the resubmit; rebase. v6: Rename the existing reset engine function and share a similar interface between guc and non-guc paths (Chris). Signed-off-by: Michel Thierry Cc: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171031225309.10888-1-michel.thierry@intel.com Reviewed-by: Jeff McGee Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.c | 14 ++++++++++++-- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_guc.c | 24 ++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc_fwif.h | 1 + drivers/gpu/drm/i915/intel_uncore.c | 5 ----- 5 files changed, 39 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index af745749509c..e7e9e061073b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1950,6 +1950,12 @@ error: goto finish; } +static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + return intel_gpu_reset(dev_priv, intel_engine_flag(engine)); +} + /** * i915_reset_engine - reset GPU engine to recover from a hang * @engine: engine to reset @@ -1984,10 +1990,14 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) goto out; } - ret = intel_gpu_reset(engine->i915, intel_engine_flag(engine)); + if (!engine->i915->guc.execbuf_client) + ret = intel_gt_reset_engine(engine->i915, engine); + else + ret = intel_guc_reset_engine(&engine->i915->guc, engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ - DRM_DEBUG_DRIVER("Failed to reset %s, ret=%d\n", + DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", + engine->i915->guc.execbuf_client ? "GuC " : "", engine->name, ret); goto out; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 05425bb7565b..72bb5b51035a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3325,6 +3325,8 @@ extern int i915_reset_engine(struct intel_engine_cs *engine, extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv); extern int intel_reset_guc(struct drm_i915_private *dev_priv); +extern int intel_guc_reset_engine(struct intel_guc *guc, + struct intel_engine_cs *engine); extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); extern void intel_hangcheck_init(struct drm_i915_private *dev_priv); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index f74d50fdaeb0..9678630a1c70 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -24,6 +24,7 @@ #include "intel_guc.h" #include "i915_drv.h" +#include "i915_guc_submission.h" static void gen8_guc_raise_irq(struct intel_guc *guc) { @@ -283,6 +284,29 @@ int intel_guc_suspend(struct drm_i915_private *dev_priv) return intel_guc_send(guc, data, ARRAY_SIZE(data)); } +/** + * intel_guc_reset_engine() - ask GuC to reset an engine + * @guc: intel_guc structure + * @engine: engine to be reset + */ +int intel_guc_reset_engine(struct intel_guc *guc, + struct intel_engine_cs *engine) +{ + u32 data[7]; + + GEM_BUG_ON(!guc->execbuf_client); + + data[0] = INTEL_GUC_ACTION_REQUEST_ENGINE_RESET; + data[1] = engine->guc_id; + data[2] = 0; + data[3] = 0; + data[4] = 0; + data[5] = guc->execbuf_client->stage_id; + data[6] = guc_ggtt_offset(guc->shared_data); + + return intel_guc_send(guc, data, ARRAY_SIZE(data)); +} + /** * intel_guc_resume() - notify GuC resuming from suspend state * @dev_priv: i915 device private diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index e24dbec2ead8..6a10aa6f04d3 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -574,6 +574,7 @@ struct guc_shared_ctx_data { enum intel_guc_action { INTEL_GUC_ACTION_DEFAULT = 0x0, INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2, + INTEL_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3, INTEL_GUC_ACTION_SAMPLE_FORCEWAKE = 0x6, INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index d629b2e40013..0564d87ad416 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1792,14 +1792,9 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv) return intel_get_gpu_reset(dev_priv) != NULL; } -/* - * When GuC submission is enabled, GuC manages ELSP and can initiate the - * engine reset too. For now, fall back to full GPU reset if it is enabled. - */ bool intel_has_reset_engine(struct drm_i915_private *dev_priv) { return (dev_priv->info.has_reset_engine && - !dev_priv->guc.execbuf_client && i915_modparams.reset >= 2); } From 7130630323c562597191653560963e61c5bd0f57 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 2 Nov 2017 11:48:36 +0200 Subject: [PATCH 079/260] drm/i915: Use fallback forcewake if primary ack missing There is a possibility on gen9 hardware to miss the forcewake ack message. The recommended workaround is to use another free bit and toggle it until original bit is successfully acknowledged. Some future gen9 revs might or might not fix the underlying issue but using fallback forcewake bit dance can be considered as harmless: without the ack timeout we never reach the fallback bit forcewake. Thus as of now we adopt a blanket approach for all gen9 and leave the bypassing the fallback bit approach for future patches if corresponding hw revisions do appear. Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms for forcewake request/clear ack") did increase the forcewake timeout. If the issue was a delayed ack, future work could include finding a suitable timeout value both for primary ack and reserve toggle to reduce the worst case latency. v2: use bit 15, naming, comment (Chris), only wait fallback ack v3: fix return on fallback, backoff after fallback write (Chris) v4: udelay on first pass, grammar (Chris) v4: s/reserve/fallback References: HSDES #1604254524 References: https://bugs.freedesktop.org/show_bug.cgi?id=102051 Cc: Chris Wilson Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171102094836.2506-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/i915_reg.h | 5 +- drivers/gpu/drm/i915/intel_uncore.c | 137 ++++++++++++++++++++++++++-- 2 files changed, 130 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8c775e96b4e4..f0f8f6059652 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7774,8 +7774,9 @@ enum { #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88) #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84) #define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044) -#define FORCEWAKE_KERNEL 0x1 -#define FORCEWAKE_USER 0x2 +#define FORCEWAKE_KERNEL BIT(0) +#define FORCEWAKE_USER BIT(1) +#define FORCEWAKE_KERNEL_FALLBACK BIT(15) #define FORCEWAKE_MT_ACK _MMIO(0x130040) #define ECOBUS _MMIO(0xa180) #define FORCEWAKE_MT_ENABLE (1<<5) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 0564d87ad416..0a6d952a2df1 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -69,17 +69,104 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) HRTIMER_MODE_REL); } +static inline int +__wait_for_ack(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d, + const u32 ack, + const u32 value) +{ + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == value, + FORCEWAKE_ACK_TIMEOUT_MS); +} + +static inline int +wait_ack_clear(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d, + const u32 ack) +{ + return __wait_for_ack(i915, d, ack, 0); +} + +static inline int +wait_ack_set(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d, + const u32 ack) +{ + return __wait_for_ack(i915, d, ack, ack); +} + static inline void fw_domain_wait_ack_clear(const struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & - FORCEWAKE_KERNEL) == 0, - FORCEWAKE_ACK_TIMEOUT_MS)) + if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL)) DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n", intel_uncore_forcewake_domain_to_str(d->id)); } +enum ack_type { + ACK_CLEAR = 0, + ACK_SET +}; + +static int +fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d, + const enum ack_type type) +{ + const u32 ack_bit = FORCEWAKE_KERNEL; + const u32 value = type == ACK_SET ? ack_bit : 0; + unsigned int pass; + bool ack_detected; + + /* + * There is a possibility of driver's wake request colliding + * with hardware's own wake requests and that can cause + * hardware to not deliver the driver's ack message. + * + * Use a fallback bit toggle to kick the gpu state machine + * in the hope that the original ack will be delivered along with + * the fallback ack. + * + * This workaround is described in HSDES #1604254524 + */ + + pass = 1; + do { + wait_ack_clear(i915, d, FORCEWAKE_KERNEL_FALLBACK); + + __raw_i915_write32(i915, d->reg_set, + _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL_FALLBACK)); + /* Give gt some time to relax before the polling frenzy */ + udelay(10 * pass); + wait_ack_set(i915, d, FORCEWAKE_KERNEL_FALLBACK); + + ack_detected = (__raw_i915_read32(i915, d->reg_ack) & ack_bit) == value; + + __raw_i915_write32(i915, d->reg_set, + _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL_FALLBACK)); + } while (!ack_detected && pass++ < 10); + + DRM_DEBUG_DRIVER("%s had to use fallback to %s ack, 0x%x (passes %u)\n", + intel_uncore_forcewake_domain_to_str(d->id), + type == ACK_SET ? "set" : "clear", + __raw_i915_read32(i915, d->reg_ack), + pass); + + return ack_detected ? 0 : -ETIMEDOUT; +} + +static inline void +fw_domain_wait_ack_clear_fallback(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) +{ + if (likely(!wait_ack_clear(i915, d, FORCEWAKE_KERNEL))) + return; + + if (fw_domain_wait_ack_with_fallback(i915, d, ACK_CLEAR)) + fw_domain_wait_ack_clear(i915, d); +} + static inline void fw_domain_get(struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) @@ -88,16 +175,25 @@ fw_domain_get(struct drm_i915_private *i915, } static inline void -fw_domain_wait_ack(const struct drm_i915_private *i915, - const struct intel_uncore_forcewake_domain *d) +fw_domain_wait_ack_set(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & - FORCEWAKE_KERNEL), - FORCEWAKE_ACK_TIMEOUT_MS)) + if (wait_ack_set(i915, d, FORCEWAKE_KERNEL)) DRM_ERROR("%s: timed out waiting for forcewake ack request.\n", intel_uncore_forcewake_domain_to_str(d->id)); } +static inline void +fw_domain_wait_ack_set_fallback(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) +{ + if (likely(!wait_ack_set(i915, d, FORCEWAKE_KERNEL))) + return; + + if (fw_domain_wait_ack_with_fallback(i915, d, ACK_SET)) + fw_domain_wait_ack_set(i915, d); +} + static inline void fw_domain_put(const struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) @@ -119,7 +215,27 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains) } for_each_fw_domain_masked(d, fw_domains, i915, tmp) - fw_domain_wait_ack(i915, d); + fw_domain_wait_ack_set(i915, d); + + i915->uncore.fw_domains_active |= fw_domains; +} + +static void +fw_domains_get_with_fallback(struct drm_i915_private *i915, + enum forcewake_domains fw_domains) +{ + struct intel_uncore_forcewake_domain *d; + unsigned int tmp; + + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); + + for_each_fw_domain_masked(d, fw_domains, i915, tmp) { + fw_domain_wait_ack_clear_fallback(i915, d); + fw_domain_get(i915, d); + } + + for_each_fw_domain_masked(d, fw_domains, i915, tmp) + fw_domain_wait_ack_set_fallback(i915, d); i915->uncore.fw_domains_active |= fw_domains; } @@ -1142,7 +1258,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) } if (INTEL_GEN(dev_priv) >= 9) { - dev_priv->uncore.funcs.force_wake_get = fw_domains_get; + dev_priv->uncore.funcs.force_wake_get = + fw_domains_get_with_fallback; dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER_GEN9, From 820c5bbbf418fba41149fdeb870d623e21be2463 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Nov 2017 20:21:49 +0000 Subject: [PATCH 080/260] drm/i915: Flush the irq and tasklets before asserting engine is idle Before we assert that the engine is idle, make sure we flush any residual tasklet. After that point, if the engine is not idle, more work may be queued despite us trying to park the engine and go to sleep. References: https://bugs.freedesktop.org/show_bug.cgi?id=103479 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171101202149.32493-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_engine_cs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 6895a90af008..ddbe5c9bf45a 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1614,6 +1614,10 @@ void intel_engines_park(struct drm_i915_private *i915) enum intel_engine_id id; for_each_engine(engine, i915, id) { + /* Flush the residual irq tasklets first. */ + intel_engine_disarm_breadcrumbs(engine); + tasklet_kill(&engine->execlists.irq_tasklet); + /* * We are committed now to parking the engines, make sure there * will be no more interrupts arriving later and the engines @@ -1630,9 +1634,6 @@ void intel_engines_park(struct drm_i915_private *i915) if (engine->park) engine->park(engine); - intel_engine_disarm_breadcrumbs(engine); - tasklet_kill(&engine->execlists.irq_tasklet); - i915_gem_batch_pool_fini(&engine->batch_pool); engine->execlists.no_priolist = false; } From 49e43ef7c6e533c110a7ff8fa8096291ec1c0e0e Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 2 Nov 2017 12:18:27 +0000 Subject: [PATCH 081/260] drm/i915: ensure oa config uuid is null terminated Because dev_priv is 0-ed it's not currently an issue, but since we have dev_priv->perf.oa.test_config.uuid size at uuid + 1, we could just copy the null character. v2: Use strlcpy instead of strncpy (Chris) Signed-off-by: Lionel Landwerlin Cc: Rodrigo Vivi Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171102121827.436-1-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_oa_bdw.c | 4 ++-- drivers/gpu/drm/i915/i915_oa_bxt.c | 4 ++-- drivers/gpu/drm/i915/i915_oa_cflgt2.c | 4 ++-- drivers/gpu/drm/i915/i915_oa_chv.c | 4 ++-- drivers/gpu/drm/i915/i915_oa_glk.c | 4 ++-- drivers/gpu/drm/i915/i915_oa_hsw.c | 4 ++-- drivers/gpu/drm/i915/i915_oa_kblgt2.c | 4 ++-- drivers/gpu/drm/i915/i915_oa_kblgt3.c | 4 ++-- drivers/gpu/drm/i915/i915_oa_sklgt2.c | 4 ++-- drivers/gpu/drm/i915/i915_oa_sklgt3.c | 4 ++-- drivers/gpu/drm/i915/i915_oa_sklgt4.c | 4 ++-- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_oa_bdw.c b/drivers/gpu/drm/i915/i915_oa_bdw.c index abdf4d0abcce..4abd2e8b5083 100644 --- a/drivers/gpu/drm/i915/i915_oa_bdw.c +++ b/drivers/gpu/drm/i915/i915_oa_bdw.c @@ -85,9 +85,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_bdw(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "d6de6f55-e526-4f79-a6a6-d7315c09044e", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_bxt.c b/drivers/gpu/drm/i915/i915_oa_bxt.c index b69b900de0fe..cb6f304ec16a 100644 --- a/drivers/gpu/drm/i915/i915_oa_bxt.c +++ b/drivers/gpu/drm/i915/i915_oa_bxt.c @@ -83,9 +83,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_bxt(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "5ee72f5c-092f-421e-8b70-225f7c3e9612", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt2.c b/drivers/gpu/drm/i915/i915_oa_cflgt2.c index 368c87d7ee9a..8641ae30e343 100644 --- a/drivers/gpu/drm/i915/i915_oa_cflgt2.c +++ b/drivers/gpu/drm/i915/i915_oa_cflgt2.c @@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_cflgt2(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "74fb4902-d3d3-4237-9e90-cbdc68d0a446", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_chv.c b/drivers/gpu/drm/i915/i915_oa_chv.c index 322a3f94cd16..556febb2c3c8 100644 --- a/drivers/gpu/drm/i915/i915_oa_chv.c +++ b/drivers/gpu/drm/i915/i915_oa_chv.c @@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_chv(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "4a534b07-cba3-414d-8d60-874830e883aa", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_glk.c b/drivers/gpu/drm/i915/i915_oa_glk.c index 4ee527e4c926..971db587957c 100644 --- a/drivers/gpu/drm/i915/i915_oa_glk.c +++ b/drivers/gpu/drm/i915/i915_oa_glk.c @@ -83,9 +83,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_glk(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "dd3fd789-e783-4204-8cd0-b671bbccb0cf", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_hsw.c b/drivers/gpu/drm/i915/i915_oa_hsw.c index 56b03773bb9d..434a9b96d7ab 100644 --- a/drivers/gpu/drm/i915/i915_oa_hsw.c +++ b/drivers/gpu/drm/i915/i915_oa_hsw.c @@ -113,9 +113,9 @@ show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *b void i915_perf_load_test_config_hsw(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "403d8832-1a27-4aa6-a64e-f5389ce7b212", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_render_basic; diff --git a/drivers/gpu/drm/i915/i915_oa_kblgt2.c b/drivers/gpu/drm/i915/i915_oa_kblgt2.c index b6e7cc774136..2fa98a40bbc8 100644 --- a/drivers/gpu/drm/i915/i915_oa_kblgt2.c +++ b/drivers/gpu/drm/i915/i915_oa_kblgt2.c @@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_kblgt2(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "baa3c7e4-52b6-4b85-801e-465a94b746dd", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_kblgt3.c b/drivers/gpu/drm/i915/i915_oa_kblgt3.c index 5576afdd9a7e..f3cb6679a1bc 100644 --- a/drivers/gpu/drm/i915/i915_oa_kblgt3.c +++ b/drivers/gpu/drm/i915/i915_oa_kblgt3.c @@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_kblgt3(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "f1792f32-6db2-4b50-b4b2-557128f1688d", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt2.c b/drivers/gpu/drm/i915/i915_oa_sklgt2.c index 890d55879946..bf8b8cd8a50d 100644 --- a/drivers/gpu/drm/i915/i915_oa_sklgt2.c +++ b/drivers/gpu/drm/i915/i915_oa_sklgt2.c @@ -83,9 +83,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_sklgt2(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "1651949f-0ac0-4cb1-a06f-dafd74a407d1", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt3.c b/drivers/gpu/drm/i915/i915_oa_sklgt3.c index 85e51addf86a..ae534c7c8135 100644 --- a/drivers/gpu/drm/i915/i915_oa_sklgt3.c +++ b/drivers/gpu/drm/i915/i915_oa_sklgt3.c @@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_sklgt3(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "2b985803-d3c9-4629-8a4f-634bfecba0e8", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt4.c b/drivers/gpu/drm/i915/i915_oa_sklgt4.c index bce031ee4445..817fba2d82df 100644 --- a/drivers/gpu/drm/i915/i915_oa_sklgt4.c +++ b/drivers/gpu/drm/i915/i915_oa_sklgt4.c @@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_sklgt4(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "882fa433-1f4a-4a67-a962-c741888fe5f5", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; From ebcaa1ff8b59097805d548fe7a676f194625c033 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 31 Oct 2017 10:23:25 +0000 Subject: [PATCH 082/260] drm/i915: Reject unknown syncobj flags We have to reject unknown flags for uAPI considerations, and also because the curent implementation limits their i915 storage space to two bits. v2: (Chris Wilson) * Fix fail in ABI check. * Added unknown flags and BUILD_BUG_ON. v3: * Use ARCH_KMALLOC_MINALIGN instead of alignof. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: cf6e7bac6357 ("drm/i915: Add support for drm syncobjs") Cc: Jason Ekstrand Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: David Airlie Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171031102326.9738-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 5b8213b1a8c7..435ed95df144 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -2104,6 +2104,11 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, goto err; } + if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + syncobj = drm_syncobj_find(file, fence.handle); if (!syncobj) { DRM_DEBUG("Invalid syncobj handle provided\n"); @@ -2111,6 +2116,9 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, goto err; } + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 125bde7d9504..ac3c6503ca27 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -839,6 +839,7 @@ struct drm_i915_gem_exec_fence { #define I915_EXEC_FENCE_WAIT (1<<0) #define I915_EXEC_FENCE_SIGNAL (1<<1) +#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) __u32 flags; }; From 2d7514152b1c82c81a363f8a171237567ebb7e66 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 3 Nov 2017 09:05:38 +0000 Subject: [PATCH 083/260] drm/i915: Warn in debug builds of incorrect usages of ptr_pack_bits GEM_BUG_ON if the packed bits do not fit into the specified width. v2: Avoid using the macro argument twice. v3: Drop unnecessary braces. (Joonas) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson (v1) Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171103090538.14474-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_utils.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index af3d7cc53fa1..8d07764887ec 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -83,8 +83,11 @@ (typeof(ptr))(__v & -BIT(n)); \ }) -#define ptr_pack_bits(ptr, bits, n) \ - ((typeof(ptr))((unsigned long)(ptr) | (bits))) +#define ptr_pack_bits(ptr, bits, n) ({ \ + unsigned long __bits = (bits); \ + GEM_BUG_ON(__bits & -BIT(n)); \ + ((typeof(ptr))((unsigned long)(ptr) | __bits)); \ +}) #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT) #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT) From 136109c67f67ebe449032950ddc3ad5118934635 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Nov 2017 13:14:30 +0000 Subject: [PATCH 084/260] drm/i915: Set up mocs tables before restarting the engines After a reset, we may immediately begin executing requests on restarting the engines. Ergo this has to be last step with all re-initialisation completed beforehand. The mocs setup was after we started executing the requests; do it earlier! Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171102131430.22328-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9470ba0c1930..6a71805be389 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4936,13 +4936,10 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) if (ret) goto out; - /* Need to do basic initialisation of all rings first: */ - ret = __i915_gem_restart_engines(dev_priv); - if (ret) - goto out; - intel_mocs_init_l3cc_table(dev_priv); + /* Only when the HW is re-initialised, can we replay the requests */ + ret = __i915_gem_restart_engines(dev_priv); out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; From d36caeea4b0f76bf691c7d0cddc4a71ed93783c5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 5 Nov 2017 12:45:50 +0000 Subject: [PATCH 085/260] drm/i915: Assert vma->flags are updated correctly during binding As we bind, and unbind on error, we want to be sure that the vma->flags are updated to reflect the binding state so that on the next invocation all is well. v2: Take two. v3: Take three; vma-misplaced is checking map-and-fenceable so keep it last! Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171105124550.32715-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_vma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index fc77e8191eb5..7cdf34800549 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -633,15 +633,17 @@ int __i915_vma_do_pin(struct i915_vma *vma, if (ret) goto err_unpin; } + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); ret = i915_vma_bind(vma, vma->obj->cache_level, flags); if (ret) goto err_remove; + GEM_BUG_ON((vma->flags & I915_VMA_BIND_MASK) == 0); + if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; @@ -649,6 +651,7 @@ err_remove: if ((bound & I915_VMA_BIND_MASK) == 0) { i915_vma_remove(vma); GEM_BUG_ON(vma->pages); + GEM_BUG_ON(vma->flags & I915_VMA_BIND_MASK); } err_unpin: __i915_vma_unpin(vma); From 69ea47a5a98b198c9d36fe157a3986748a9e2554 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Oct 2017 16:32:07 +0100 Subject: [PATCH 086/260] drm/i915/selftests: Hide dangerous tests Some tests are designed to exercise the limits of the HW and may trigger unintended side-effects making the machine unusable. This should not be executed by default, but are still useful for early platform validation. References: https://bugs.freedesktop.org/show_bug.cgi?id=103453 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171025153207.9589-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/Kconfig.debug | 14 ++++++++++++++ drivers/gpu/drm/i915/selftests/intel_uncore.c | 8 ++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index aed7d207ea84..19c77c6feb24 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -90,6 +90,20 @@ config DRM_I915_SELFTEST If in doubt, say "N". +config DRM_I915_SELFTEST_BROKEN + bool "Enable broken and dangerous selftests" + depends on DRM_I915_SELFTEST + depends on BROKEN + default n + help + This option enables the execution of selftests that are "dangerous" + and may trigger unintended HW side-effects as they break strict + rules given in the HW specification. For science. + + Recommended for masochistic driver developers only. + + If in doubt, say "N". + config DRM_I915_LOW_LEVEL_TRACEPOINTS bool "Enable low level request tracing events" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 3cac22eb47ce..f52a4ab9aa98 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -120,10 +120,10 @@ static int intel_uncore_check_forcewake_domains(struct drm_i915_private *dev_pri !IS_CHERRYVIEW(dev_priv)) return 0; - if (IS_VALLEYVIEW(dev_priv)) /* XXX system lockup! */ - return 0; - - if (IS_BROADWELL(dev_priv)) /* XXX random GPU hang afterwards! */ + /* + * This test may lockup the machine or cause GPU hangs afterwards. + */ + if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) return 0; valid = kzalloc(BITS_TO_LONGS(FW_RANGE) * sizeof(*valid), From f991c492aa55fb1c6834882c5d786d5bb3b25f07 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Nov 2017 11:15:08 +0000 Subject: [PATCH 087/260] drm/i915: Lock llist_del_first() vs llist_del_all() An oversight in commit 87701b4b5593 ("drm/i915: Only free the oldest stale object before a fresh allocation") was that not only do we have to serialise concurrent users of llist_del_first(), but we also have to lock llist_del_first() vs llist_del_all(). From llist.h, * This can be summarized as follows: * * | add | del_first | del_all * add | - | - | - * del_first | | L | L * del_all | | | - * * Where, a particular row's operation can happen concurrently with a column's * operation, with "-" being no lock needed, while "L" being lock is needed. This should hopefully explain: <4>[ 89.287106] general protection fault: 0000 [#1] PREEMPT SMP <4>[ 89.287126] Modules linked in: snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic x86_pkg_temp_thermal intel_powerclamp coretemp i915 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core r8169 mii mei_me mei snd_pcm prime_numbers i2c_hid pinctrl_geminilake pinctrl_intel <4>[ 89.287226] CPU: 2 PID: 23 Comm: ksoftirqd/2 Tainted: G U 4.14.0-rc8-CI-CI_DRM_3315+ #1 <4>[ 89.287247] Hardware name: Intel Corp. Geminilake/GLK RVP2 LP4SD (07), BIOS GELKRVPA.X64.0062.B30.1708222146 08/22/2017 <4>[ 89.287270] task: ffff88017ab34ec0 task.stack: ffffc90000128000 <4>[ 89.287290] RIP: 0010:llist_add_batch+0x4/0x20 <4>[ 89.287301] RSP: 0018:ffffc9000012bdb8 EFLAGS: 00010296 <4>[ 89.287314] RAX: ffffffff811017ad RBX: 6e468801a1560000 RCX: ef3e53fceecdeb81 <4>[ 89.287330] RDX: 6e468801a1566130 RSI: ffff880103d73d98 RDI: ffff880103d73d98 <4>[ 89.287346] RBP: ffffc9000012bdb8 R08: ffff88017ab35780 R09: 0000000000000000 <4>[ 89.287361] R10: ffffc9000012bd68 R11: 00000000abb18c3d R12: ffffffffa01369e0 <4>[ 89.287377] R13: ffff88017fd1b8f8 R14: ffff88017ab34ec0 R15: 000000000000000a <4>[ 89.287393] FS: 0000000000000000(0000) GS:ffff88017fd00000(0000) knlGS:0000000000000000 <4>[ 89.287411] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 89.287424] CR2: 00007ff0c0755018 CR3: 000000016df9b000 CR4: 00000000003406e0 <4>[ 89.287440] Call Trace: <4>[ 89.287511] __i915_gem_free_object_rcu+0x20/0x40 [i915] <4>[ 89.287527] rcu_process_callbacks+0x27a/0x730 <4>[ 89.287544] __do_softirq+0xc0/0x4ae <4>[ 89.287559] ? smpboot_thread_fn+0x2d/0x280 <4>[ 89.287571] run_ksoftirqd+0x1f/0x70 <4>[ 89.287582] smpboot_thread_fn+0x18a/0x280 <4>[ 89.287595] kthread+0x114/0x150 <4>[ 89.287605] ? sort_range+0x30/0x30 <4>[ 89.287615] ? kthread_create_on_node+0x40/0x40 <4>[ 89.287628] ret_from_fork+0x27/0x40 <4>[ 89.287641] Code: 0d 48 83 ea 01 4c 89 c1 48 83 fa ff 74 12 48 23 0c d7 74 ed 48 c1 e2 06 48 0f bd c9 48 8d 04 0a 5d c3 90 90 90 90 90 55 48 89 e5 <48> 8b 0a 48 89 0e 48 89 c8 f0 48 0f b1 3a 48 39 c1 75 ed 48 85 <1>[ 89.287774] RIP: llist_add_batch+0x4/0x20 RSP: ffffc9000012bdb8 <4>[ 89.287826] ---[ end trace e775d15174d8ae02 ]--- (Lockless lists are only easy (and lockless) when only using llist_add/llist_del_all!) Fixes: 87701b4b5593 ("drm/i915: Only free the oldest stale object before a fresh allocation") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171106111508.11941-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6a71805be389..889ae8810d5f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4636,11 +4636,17 @@ static void __i915_gem_free_work(struct work_struct *work) * unbound now. */ + spin_lock(&i915->mm.free_lock); while ((freed = llist_del_all(&i915->mm.free_list))) { + spin_unlock(&i915->mm.free_lock); + __i915_gem_free_objects(i915, freed); if (need_resched()) - break; + return; + + spin_lock(&i915->mm.free_lock); } + spin_unlock(&i915->mm.free_lock); } static void __i915_gem_free_object_rcu(struct rcu_head *head) From 856efd21c6f7139a74d340cdb6e0de5d8e226134 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Nov 2017 11:48:33 +0000 Subject: [PATCH 088/260] drm/i915/guc: Assert guc->stage_desc_pool is allocated Silence smatch by demonstrating that guc->stage_desc_pool is allocated following a successful guc_stage_desc_pool_create(), drivers/gpu/drm/i915/i915_guc_submission.c:1293 i915_guc_submission_init() error: we previously assumed 'guc->stage_desc_pool' could be null (see line 1261) Signed-off-by: Chris Wilson Cc: Oscar Mateo Cc: Daniele Ceraolo Spurio Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171106114833.31199-1-chris@chris-wilson.co.uk Reviewed-by: Michal Wajdeczko --- drivers/gpu/drm/i915/i915_guc_submission.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index d14c1342f09d..0ba2fc04fe9c 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1264,10 +1264,16 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) ret = guc_stage_desc_pool_create(guc); if (ret) return ret; + /* + * Keep static analysers happy, let them know that we allocated the + * vma after testing that it didn't exist earlier. + */ + GEM_BUG_ON(!guc->stage_desc_pool); ret = guc_shared_data_create(guc); if (ret) goto err_stage_desc_pool; + GEM_BUG_ON(!guc->shared_data); ret = intel_guc_log_create(guc); if (ret < 0) @@ -1276,10 +1282,12 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) ret = guc_preempt_work_create(guc); if (ret) goto err_log; + GEM_BUG_ON(!guc->preempt_wq); ret = guc_ads_create(guc); if (ret < 0) goto err_wq; + GEM_BUG_ON(!guc->ads_vma); return 0; From 1c5a907180365d0842c17fb15977ad426d429c14 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 6 Nov 2017 13:51:54 +0000 Subject: [PATCH 089/260] drm/i915/guc: Assert ctch->vma is allocated Silence smatch by demonstrating that ctch->vma is allocated following a successful chch_init() drivers/gpu/drm/i915/intel_guc_ct.c:204 ctch_open() error: we previously assumed 'ctch->vma' could be null (see line 197) Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171106135154.52520-1-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_guc_ct.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c index c4cbec140101..24ad55752396 100644 --- a/drivers/gpu/drm/i915/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/intel_guc_ct.c @@ -198,6 +198,7 @@ static int ctch_open(struct intel_guc *guc, err = ctch_init(guc, ctch); if (unlikely(err)) goto err_out; + GEM_BUG_ON(!ctch->vma); } /* vma should be already allocated and map'ed */ From 7d41ef3479a6bc3ba2ddcf49e365916611e4fa39 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 26 Oct 2017 17:36:55 +0000 Subject: [PATCH 090/260] drm/i915: Add Guc/HuC firmware details to error state Include GuC and HuC firmware details in captured error state to provide additional debug information. To reuse existing uc firmware pretty printer, introduce new drm-printer variant that works with our i915_error_state_buf output. Also update uc firmware pretty printer to accept const input. v2: don't rely on current caps (Chris) dump correct fw info (Michal) v3: simplify capture of custom paths (Chris) v4: improve 'why' comment (Joonas) trim output if no fw path (Michal) group code around uc error state (Michal) v5: use error in cleanup_uc (Michal) Suggested-by: Chris Wilson Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171026173657.49648-1-michal.wajdeczko@intel.com [ickle: allow printing uc_fw after allocation failure] Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 5 +++ drivers/gpu/drm/i915/i915_gpu_error.c | 64 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_uc_fw.c | 2 +- drivers/gpu/drm/i915/intel_uc_fw.h | 2 +- 4 files changed, 71 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 72bb5b51035a..632249f90b91 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -913,6 +913,11 @@ struct i915_gpu_state { struct intel_device_info device_info; struct i915_params params; + struct i915_error_uc { + struct intel_uc_fw guc_fw; + struct intel_uc_fw huc_fw; + } uc; + /* Generic register state */ u32 eir; u32 pgtbl_er; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 653fb69e7ecb..bc264000361f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -30,6 +30,8 @@ #include #include #include +#include + #include "i915_drv.h" static const char *engine_str(int engine) @@ -175,6 +177,21 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e, #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__) #define err_puts(e, s) i915_error_puts(e, s) +static void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf) +{ + i915_error_vprintf(p->arg, vaf->fmt, *vaf->va); +} + +static inline struct drm_printer +i915_error_printer(struct drm_i915_error_state_buf *e) +{ + struct drm_printer p = { + .printfn = __i915_printfn_error, + .arg = e, + }; + return p; +} + #ifdef CONFIG_DRM_I915_COMPRESS_ERROR struct compress { @@ -589,6 +606,20 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m, pdev->subsystem_device); } +static void err_print_uc(struct drm_i915_error_state_buf *m, + const struct i915_error_uc *error_uc) +{ + struct drm_printer p = i915_error_printer(m); + const struct i915_gpu_state *error = + container_of(error_uc, typeof(*error), uc); + + if (!error->device_info.has_guc) + return; + + intel_uc_fw_dump(&error_uc->guc_fw, &p); + intel_uc_fw_dump(&error_uc->huc_fw, &p); +} + int i915_error_state_to_str(struct drm_i915_error_state_buf *m, const struct i915_gpu_state *error) { @@ -773,6 +804,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_print_capabilities(m, &error->device_info); err_print_params(m, &error->params); + err_print_uc(m, &error->uc); if (m->bytes == 0 && m->err) return m->err; @@ -831,6 +863,14 @@ static __always_inline void free_param(const char *type, void *x) kfree(*(void **)x); } +static void cleanup_uc_state(struct i915_gpu_state *error) +{ + struct i915_error_uc *error_uc = &error->uc; + + kfree(error_uc->guc_fw.path); + kfree(error_uc->huc_fw.path); +} + void __i915_gpu_state_free(struct kref *error_ref) { struct i915_gpu_state *error = @@ -870,6 +910,8 @@ void __i915_gpu_state_free(struct kref *error_ref) I915_PARAMS_FOR_EACH(FREE); #undef FREE + cleanup_uc_state(error); + kfree(error); } @@ -1559,6 +1601,26 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, error->pinned_bo = bo; } +static void capture_uc_state(struct i915_gpu_state *error) +{ + struct drm_i915_private *i915 = error->i915; + struct i915_error_uc *error_uc = &error->uc; + + /* Capturing uC state won't be useful if there is no GuC */ + if (!error->device_info.has_guc) + return; + + error_uc->guc_fw = i915->guc.fw; + error_uc->huc_fw = i915->huc.fw; + + /* Non-default firmware paths will be specified by the modparam. + * As modparams are generally accesible from the userspace make + * explicit copies of the firmware paths. + */ + error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, GFP_ATOMIC); + error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, GFP_ATOMIC); +} + static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { @@ -1710,6 +1772,8 @@ static int capture(void *data) I915_PARAMS_FOR_EACH(DUP); #undef DUP + capture_uc_state(error); + i915_capture_gen_state(error->i915, error); i915_capture_reg_state(error->i915, error); i915_gem_record_fences(error->i915, error); diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index 973888e94cba..4bc82d3005ff 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -299,7 +299,7 @@ void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) * * Pretty printer for uC firmware. */ -void intel_uc_fw_dump(struct intel_uc_fw *uc_fw, struct drm_printer *p) +void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) { drm_printf(p, "%s firmware: %s\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index 132903669391..5394d9d1e683 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -116,6 +116,6 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, int (*xfer)(struct intel_uc_fw *uc_fw, struct i915_vma *vma)); void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); -void intel_uc_fw_dump(struct intel_uc_fw *uc_fw, struct drm_printer *p); +void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); #endif From 0397ac13dd47bb0c99b10a11b756aafb72018fca Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 26 Oct 2017 17:36:56 +0000 Subject: [PATCH 091/260] drm/i915: Make GuC log part of the uC error state We keep details of GuC and HuC in separate error state struct. Make GuC log part of it to group all related data together. Since we are printing uC details at the end, with this change GuC log will be moved there too. v2: comment on new placement of the log (Chris) Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171026173657.49648-2-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 18 +++--------------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 632249f90b91..fe93115c4caa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -916,6 +916,7 @@ struct i915_gpu_state { struct i915_error_uc { struct intel_uc_fw guc_fw; struct intel_uc_fw huc_fw; + struct drm_i915_error_object *guc_log; } uc; /* Generic register state */ @@ -941,7 +942,6 @@ struct i915_gpu_state { struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; struct drm_i915_error_object *semaphore; - struct drm_i915_error_object *guc_log; struct drm_i915_error_engine { int engine_id; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index bc264000361f..6e840db28bec 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -618,6 +618,7 @@ static void err_print_uc(struct drm_i915_error_state_buf *m, intel_uc_fw_dump(&error_uc->guc_fw, &p); intel_uc_fw_dump(&error_uc->huc_fw, &p); + print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log); } int i915_error_state_to_str(struct drm_i915_error_state_buf *m, @@ -794,8 +795,6 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, print_error_obj(m, NULL, "Semaphores", error->semaphore); - print_error_obj(m, NULL, "GuC log buffer", error->guc_log); - if (error->overlay) intel_overlay_print_error_state(m, error->overlay); @@ -869,6 +868,7 @@ static void cleanup_uc_state(struct i915_gpu_state *error) kfree(error_uc->guc_fw.path); kfree(error_uc->huc_fw.path); + i915_error_object_free(error_uc->guc_log); } void __i915_gpu_state_free(struct kref *error_ref) @@ -897,7 +897,6 @@ void __i915_gpu_state_free(struct kref *error_ref) } i915_error_object_free(error->semaphore); - i915_error_object_free(error->guc_log); for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) kfree(error->active_bo[i]); @@ -1619,17 +1618,7 @@ static void capture_uc_state(struct i915_gpu_state *error) */ error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, GFP_ATOMIC); error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, GFP_ATOMIC); -} - -static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) -{ - /* Capturing log buf contents won't be useful if logging was disabled */ - if (!dev_priv->guc.log.vma || (i915_modparams.guc_log_level < 0)) - return; - - error->guc_log = i915_error_object_create(dev_priv, - dev_priv->guc.log.vma); + error_uc->guc_log = i915_error_object_create(i915, i915->guc.log.vma); } /* Capture all registers which don't fit into another category. */ @@ -1780,7 +1769,6 @@ static int capture(void *data) i915_gem_record_rings(error->i915, error); i915_capture_active_buffers(error->i915, error); i915_capture_pinned_buffers(error->i915, error); - i915_gem_capture_guc_log_buffer(error->i915, error); error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); From 84a20a8a360d5d3322d976672427fae800722038 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 26 Oct 2017 17:36:57 +0000 Subject: [PATCH 092/260] drm/i915: Handle error-state modparams in dedicated functions Capturing and cleanup and modparams in error state requires some macro tricks. Move that code into separated functions for easier maintenance. Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171026173657.49648-3-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gpu_error.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 6e840db28bec..5c2d83a838d8 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -862,6 +862,13 @@ static __always_inline void free_param(const char *type, void *x) kfree(*(void **)x); } +static void cleanup_params(struct i915_gpu_state *error) +{ +#define FREE(T, x, ...) free_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(FREE); +#undef FREE +} + static void cleanup_uc_state(struct i915_gpu_state *error) { struct i915_error_uc *error_uc = &error->uc; @@ -905,10 +912,7 @@ void __i915_gpu_state_free(struct kref *error_ref) kfree(error->overlay); kfree(error->display); -#define FREE(T, x, ...) free_param(#T, &error->params.x); - I915_PARAMS_FOR_EACH(FREE); -#undef FREE - + cleanup_params(error); cleanup_uc_state(error); kfree(error); @@ -1746,6 +1750,14 @@ static __always_inline void dup_param(const char *type, void *x) *(void **)x = kstrdup(*(void **)x, GFP_ATOMIC); } +static void capture_params(struct i915_gpu_state *error) +{ + error->params = i915_modparams; +#define DUP(T, x, ...) dup_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(DUP); +#undef DUP +} + static int capture(void *data) { struct i915_gpu_state *error = data; @@ -1756,11 +1768,7 @@ static int capture(void *data) ktime_to_timeval(ktime_sub(ktime_get(), error->i915->gt.last_init_time)); - error->params = i915_modparams; -#define DUP(T, x, ...) dup_param(#T, &error->params.x); - I915_PARAMS_FOR_EACH(DUP); -#undef DUP - + capture_params(error); capture_uc_state(error); i915_capture_gen_state(error->i915, error); From 5684514ba9dc6d7aa932cc53d97d866b2386221f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Nov 2017 21:11:28 +0000 Subject: [PATCH 093/260] drm/i915: Deconstruct struct sgt_dma initialiser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc-4.4 complains about: struct sgt_dma iter = { .sg = vma->pages->sgl, .dma = sg_dma_address(iter.sg), .max = iter.dma + iter.sg->length, }; drivers/gpu/drm/i915/i915_gem_gtt.c: In function ‘gen8_ppgtt_insert_4lvl’: drivers/gpu/drm/i915/i915_gem_gtt.c:938: error: ‘iter.sg’ is used uninitialized in this function drivers/gpu/drm/i915/i915_gem_gtt.c:939: error: ‘iter.dma’ is used uninitialized in this function and worse generates invalid code that triggers a GPF: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: gen8_ppgtt_insert_4lvl+0x1b/0x1e0 [i915] PGD 0 Oops: 0000 [#1] SMP Modules linked in: snd_aloop nf_conntrack_ipv6 nf_defrag_ipv6 nf_log_ipv6 ip6table_filter ip6_tables ctr ccm xt_state nf_log_ipv4 nf_log_common xt_LOG xt_limit xt_recent xt_owner xt_addrtype iptable_filter ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c ip_tables dm_mod vhost_net macvtap macvlan vhost tun kvm_intel kvm irqbypass uas usb_storage hid_multitouch btusb btrtl uvcvideo videobuf2_v4l2 videobuf2_core videodev media videobuf2_vmalloc videobuf2_memops sg ppdev dell_wmi sparse_keymap mei_wdt sd_mod iTCO_wdt iTCO_vendor_support rtsx_pci_ms memstick rtsx_pci_sdmmc mmc_core dell_smm_hwmon hwmon dell_laptop dell_smbios dcdbas joydev input_leds hci_uart btintel btqca btbcm bluetooth parport_pc parport i2c_hid intel_lpss_acpi intel_lpss pcspkr wmi int3400_thermal acpi_thermal_rel dell_rbtn mei_me mei snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ahci libahci acpi_pad xhci_pci xhci_hcd snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm snd_timer snd soundcore int3403_thermal arc4 e1000e ptp pps_core i2c_i801 iwlmvm mac80211 rtsx_pci iwlwifi cfg80211 rfkill intel_pch_thermal processor_thermal_device int340x_thermal_zone intel_soc_dts_iosf i915 video fjes CPU: 2 PID: 2408 Comm: X Not tainted 4.10.0-rc5+ #1 Hardware name: Dell Inc. Latitude E7470/0T6HHJ, BIOS 1.11.3 11/09/2016 task: ffff880219fe4740 task.stack: ffffc90005f98000 RIP: 0010:gen8_ppgtt_insert_4lvl+0x1b/0x1e0 [i915] RSP: 0018:ffffc90005f9b8c8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8802167d8000 RCX: 0000000000000001 RDX: 00000000ffff7000 RSI: ffff880219f94140 RDI: ffff880228444000 RBP: ffffc90005f9b948 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000080 R13: 0000000000000001 R14: ffffc90005f9bcd7 R15: ffff88020c9a83c0 FS: 00007fb53e1ee920(0000) GS:ffff88024dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 000000022ef95000 CR4: 00000000003406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ppgtt_bind_vma+0x40/0x50 [i915] i915_vma_bind+0xcb/0x1c0 [i915] __i915_vma_do_pin+0x6e/0xd0 [i915] i915_gem_execbuffer_reserve_vma+0x162/0x1d0 [i915] i915_gem_execbuffer_reserve+0x4fc/0x510 [i915] ? __kmalloc+0x134/0x250 ? i915_gem_wait_for_error+0x25/0x100 [i915] ? i915_gem_wait_for_error+0x25/0x100 [i915] i915_gem_do_execbuffer+0x2df/0xa00 [i915] ? drm_malloc_gfp.clone.0+0x42/0x80 [i915] ? path_put+0x22/0x30 ? __check_object_size+0x62/0x1f0 ? terminate_walk+0x44/0x90 i915_gem_execbuffer2+0x95/0x1e0 [i915] drm_ioctl+0x243/0x490 ? handle_pte_fault+0x1d7/0x220 ? i915_gem_do_execbuffer+0xa00/0xa00 [i915] ? handle_mm_fault+0x10d/0x2a0 vfs_ioctl+0x18/0x30 do_vfs_ioctl+0x14b/0x3f0 SyS_ioctl+0x92/0xa0 entry_SYSCALL_64_fastpath+0x1a/0xa9 RIP: 0033:0x7fb53b4fcb77 RSP: 002b:00007ffe0c572898 EFLAGS: 00003246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fb53e17c038 RCX: 00007fb53b4fcb77 RDX: 00007ffe0c572900 RSI: 0000000040406469 RDI: 000000000000000b RBP: 00007fb5376d67e0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000028 R11: 0000000000003246 R12: 0000000000000000 R13: 0000000000000000 R14: 000055eecb314d00 R15: 000055eecb315460 Code: 0f 84 5d ff ff ff eb a2 0f 1f 84 00 00 00 00 00 55 48 89 e5 41 57 41 56 41 55 41 54 53 48 83 ec 58 0f 1f 44 00 00 31 c0 89 4d b0 <4c> 8b 60 10 44 8b 70 0c 48 89 d0 4c 8b 2e 48 c1 e8 27 25 ff 01 RIP: gen8_ppgtt_insert_4lvl+0x1b/0x1e0 [i915] RSP: ffffc90005f9b8c8 CR2: 0000000000000010 Recent gccs, such as 4.9, 6.3 or 7.2, do not generate the warning nor do they explode on use. If we manually create the struct using locals from the stack, this should eliminate this issue, and does not alter code generation with gcc-7.2. Fixes: 894ccebee2b0 ("drm/i915: Micro-optimise gen8_ppgtt_insert_entries()") Reported-by: Kelly French Signed-off-by: Chris Wilson Cc: Kelly French Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171106211128.12538-1-chris@chris-wilson.co.uk Tested-by: Kelly French Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0684d5df97d9..2847a6b41c16 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -958,10 +958,14 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, } } -struct sgt_dma { +static inline struct sgt_dma { struct scatterlist *sg; dma_addr_t dma, max; -}; +} sgt_dma(struct i915_vma *vma) { + struct scatterlist *sg = vma->pages->sgl; + dma_addr_t addr = sg_dma_address(sg); + return (struct sgt_dma) { sg, addr, addr + sg->length }; +} struct gen8_insert_pte { u16 pml4e; @@ -1042,11 +1046,7 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = { - .sg = vma->pages->sgl, - .dma = sg_dma_address(iter.sg), - .max = iter.dma + iter.sg->length, - }; + struct sgt_dma iter = sgt_dma(vma); struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, @@ -1158,11 +1158,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = { - .sg = vma->pages->sgl, - .dma = sg_dma_address(iter.sg), - .max = iter.dma + iter.sg->length, - }; + struct sgt_dma iter = sgt_dma(vma); struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { @@ -1869,13 +1865,10 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, unsigned act_pt = first_entry / GEN6_PTES; unsigned act_pte = first_entry % GEN6_PTES; const u32 pte_encode = vm->pte_encode(0, cache_level, flags); - struct sgt_dma iter; + struct sgt_dma iter = sgt_dma(vma); gen6_pte_t *vaddr; vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); - iter.sg = vma->pages->sgl; - iter.dma = sg_dma_address(iter.sg); - iter.max = iter.dma + iter.sg->length; do { vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); From 0ffba1fc98e8ec35caae8d50b657296ebb9a9a51 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Nov 2017 15:40:55 +0000 Subject: [PATCH 094/260] drm/i915: Silence smatch for cmdparser drivers/gpu/drm/i915/i915_cmd_parser.c:808:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:811:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:814:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:808:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:811:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:814:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:808:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:811:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:814:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:808:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:811:23: error: not an lvalue drivers/gpu/drm/i915/i915_cmd_parser.c:814:23: error: not an lvalue If we move the shift into each case not only do we kill the warning from smatch, but we shrink the code slightly: text data bss dec hex filename 1267906 20587 3168 1291661 13b58d before 1267890 20587 3168 1291645 13b57d after Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171107154055.19460-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld Reviewed-by: Gabriel Krisman Bertazi --- drivers/gpu/drm/i915/i915_cmd_parser.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 8ba932b22f7c..b11629beeb63 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -798,22 +798,15 @@ struct cmd_node { */ static inline u32 cmd_header_key(u32 x) { - u32 shift; - switch (x >> INSTR_CLIENT_SHIFT) { default: case INSTR_MI_CLIENT: - shift = STD_MI_OPCODE_SHIFT; - break; + return x >> STD_MI_OPCODE_SHIFT; case INSTR_RC_CLIENT: - shift = STD_3D_OPCODE_SHIFT; - break; + return x >> STD_3D_OPCODE_SHIFT; case INSTR_BC_CLIENT: - shift = STD_2D_OPCODE_SHIFT; - break; + return x >> STD_2D_OPCODE_SHIFT; } - - return x >> shift; } static int init_hash_table(struct intel_engine_cs *engine, From 2f59f1b33a84447247b21be60ecf05080ce2982a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Nov 2017 14:53:34 +0000 Subject: [PATCH 095/260] drm/i915: Silence compiler for csr_load_work_fn() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc-4.7 is not very smart and can not tell that "si" is guarded by size being 0. So it complains, drivers/gpu/drm/i915/intel_csr.c: In function ‘csr_load_work_fn’: drivers/gpu/drm/i915/intel_csr.c:204:3: warning: ‘si’ may be used uninitialized in this function [-Wmaybe-uninitialized] drivers/gpu/drm/i915/intel_csr.c:190:30: note: ‘si’ was declared in Give in and mark si as NULL. Signed-off-by: Chris Wilson Cc: Anusha Srivatsa Cc: Rodrigo Vivi Cc: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20171107145334.27154-1-chris@chris-wilson.co.uk Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Michal Wajdeczko --- drivers/gpu/drm/i915/intel_csr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 3e1f86d0c6cc..77d8b3d483ca 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -198,6 +198,7 @@ intel_get_stepping_info(struct drm_i915_private *dev_priv) si = bxt_stepping_info; } else { size = 0; + si = NULL; } if (INTEL_REVID(dev_priv) < size) From 0e870bf872929a026c58a74608734b89a95dda74 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Nov 2017 13:53:24 +0000 Subject: [PATCH 096/260] drm/i915: Simplify onion for bxt_ddi_phy_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Older compilers (gcc-4.9) are not as able to track uninitialised variables as well as more recent compilers. In particular, drivers/gpu/drm/i915/intel_dpio_phy.c: In function ‘bxt_ddi_phy_init’: drivers/gpu/drm/i915/intel_dpio_phy.c:482:25: warning: ‘was_enabled’ may be used uninitialized in this function [-Wmaybe-uninitialized] In this case, we can rearrange code slightly to make the control flow clearer to the reader, as well as the compiler. That is we only call uninit using the same predicate as calling init Signed-off-by: Chris Wilson Cc: Ander Conselvan de Oliveira Cc: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20171107135324.28300-1-chris@chris-wilson.co.uk Reviewed-by: Gabriel Krisman Bertazi --- drivers/gpu/drm/i915/intel_dpio_phy.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 63b76eac018f..490a42d9a3e8 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -466,21 +466,21 @@ void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy) lockdep_assert_held(&dev_priv->power_domains.lock); - if (rcomp_phy != -1) { + was_enabled = true; + if (rcomp_phy != -1) was_enabled = bxt_ddi_phy_is_enabled(dev_priv, rcomp_phy); - /* - * We need to copy the GRC calibration value from rcomp_phy, - * so make sure it's powered up. - */ - if (!was_enabled) - _bxt_ddi_phy_init(dev_priv, rcomp_phy); - } + /* + * We need to copy the GRC calibration value from rcomp_phy, + * so make sure it's powered up. + */ + if (!was_enabled) + _bxt_ddi_phy_init(dev_priv, rcomp_phy); _bxt_ddi_phy_init(dev_priv, phy); - if (rcomp_phy != -1 && !was_enabled) - bxt_ddi_phy_uninit(dev_priv, phy_info->rcomp_phy); + if (!was_enabled) + bxt_ddi_phy_uninit(dev_priv, rcomp_phy); } static bool __printf(6, 7) From 1a1f12872edcd5e425b668a35fb23548cfa918ef Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Nov 2017 14:03:38 +0000 Subject: [PATCH 097/260] drm/i915: Prevent unbounded wm results in g4x_compute_wm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Smatch warns of drivers/gpu/drm/i915/intel_pm.c:1161 g4x_compute_wm() warn: signedness bug returning '(-33554430)' which is a result of it believing that wm may be INT_MAX following g4x_tlb_miss_wa(). Just declaring g4x_tlb_miss_wa() as returning an unsigned integer is not sufficient, we need to tell smatch that wm itself is unsigned for it to not worry. So mark up the locals we expect to be non-negative, and so silence smatch. v2: Mark up vlv_compute_wm_level() as unsigned similarly. Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Dan Carpenter Reviewed-by: Ville Syrjälä #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20171107140338.13748-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 07118c0b69d3..46440e2ecb33 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -928,7 +928,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) * and the size of 8 whole lines. This adjustment is always performed * in the actual pixel depth regardless of whether FBC is enabled or not." */ -static int g4x_tlb_miss_wa(int fifo_size, int width, int cpp) +static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp) { int tlb_miss = fifo_size * 64 - width * cpp * 8; @@ -1105,8 +1105,8 @@ static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; - int clock, htotal, cpp, width, wm; - int latency = dev_priv->wm.pri_latency[level] * 10; + unsigned int latency = dev_priv->wm.pri_latency[level] * 10; + unsigned int clock, htotal, cpp, width, wm; if (latency == 0) return USHRT_MAX; @@ -1145,7 +1145,7 @@ static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state, level == G4X_WM_LEVEL_NORMAL) { wm = intel_wm_method1(clock, cpp, latency); } else { - int small, large; + unsigned int small, large; small = intel_wm_method1(clock, cpp, latency); large = intel_wm_method2(clock, htotal, width, cpp, latency); @@ -1158,7 +1158,7 @@ static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state, wm = DIV_ROUND_UP(wm, 64) + 2; - return min_t(int, wm, USHRT_MAX); + return min_t(unsigned int, wm, USHRT_MAX); } static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state, @@ -1602,7 +1602,7 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; - int clock, htotal, cpp, width, wm; + unsigned int clock, htotal, cpp, width, wm; if (dev_priv->wm.pri_latency[level] == 0) return USHRT_MAX; @@ -1628,7 +1628,7 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, dev_priv->wm.pri_latency[level] * 10); } - return min_t(int, wm, USHRT_MAX); + return min_t(unsigned int, wm, USHRT_MAX); } static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes) From f6d03042b9ad514077175b5cc53069e0a364d0ec Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Nov 2017 11:05:59 +0000 Subject: [PATCH 098/260] drm/i915/selftests: Skip mixed page exhaustion if only small pages available If we only have 4k pages, we can't mix together different combinations of hugepages to see if the world burns. However, as the loops did nothing, we never set err to 0 and reported ENODEV aborting the test. Teach the test to skip instead. Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171107110559.6098-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/huge_pages.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 5cc8101bb2b1..01af540b6ef9 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1159,6 +1159,9 @@ static int igt_ppgtt_exhaust_huge(void *arg) int n, i; int err = -ENODEV; + if (supported == I915_GTT_PAGE_SIZE_4K) + return 0; + /* * Sanity check creating objects with a varying mix of page sizes -- * ensuring that our writes lands in the right place. From c29ccb9f426e6e4e91c9901bb6ee8dbe6abc48eb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Nov 2017 11:40:51 +0000 Subject: [PATCH 099/260] drm/i915/selftests: Take rpm wakeref around GGTT lowlevel tests The vma routines are responsible for acquiring the device rpm wakeref before they poke the HW. However, some of the selftests bypass the higher level vma routines in order to poke directly at the lowlevel GGTT functions; these are then responsible for managing rpm themselves. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171107114051.10583-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 9da0c9f99916..581296860539 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -267,7 +267,9 @@ static int lowlevel_hole(struct drm_i915_private *i915, mock_vma.node.size = BIT_ULL(size); mock_vma.node.start = addr; + intel_runtime_pm_get(i915); vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0); + intel_runtime_pm_put(i915); } count = n; @@ -1047,6 +1049,7 @@ static int igt_ggtt_page(void *arg) goto out_remove; } + intel_runtime_pm_get(i915); for (n = 0; n < count; n++) { u64 offset = tmp.start + order[n] * PAGE_SIZE; u32 __iomem *vaddr; @@ -1086,6 +1089,7 @@ static int igt_ggtt_page(void *arg) break; } } + intel_runtime_pm_put(i915); kfree(order); out_remove: From 693b1ccabe43d96a1eb57716ac032632882de20f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Nov 2017 11:56:53 +0000 Subject: [PATCH 100/260] drm/i915/selftests: Take rpm wakeref around partial tiling tests Since the partial tiling tests are poking into the GGTT to watch the fence registers in operation, it itself needs the device rpm wakeref in order for the GGTT to remain accessible. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171107115653.10716-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 1b8774a42e48..f32aa6bb79e2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -317,6 +317,7 @@ static int igt_partial_tiling(void *arg) } mutex_lock(&i915->drm.struct_mutex); + intel_runtime_pm_get(i915); if (1) { IGT_TIMEOUT(end); @@ -418,6 +419,7 @@ next_tiling: ; } out_unlock: + intel_runtime_pm_put(i915); mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_unpin_pages(obj); out: From 58ecd9d58939382a352b10d43f06f5a23a36db08 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 5 Nov 2017 13:49:05 +0000 Subject: [PATCH 101/260] drm/i915: Read ilk FDI PLL frequency once during initialisation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During intel_atomic_check(), we do not take the intel_runtime_pm_get() wakeref and so should do the atomic modeset precalculations without referring to the HW. However, on Ironlake we see <7>[ 23.487557] [drm:intel_atomic_check [i915]] [CONNECTOR:47:VGA-1] checking for sink bpp constrains <7>[ 23.487615] [drm:intel_atomic_check [i915]] clamping display bpp (was 36) to default limit of 24 <4>[ 23.487621] RPM wakelock ref not held during HW access <4>[ 23.487652] ------------[ cut here ]------------ <4>[ 23.487697] WARNING: CPU: 0 PID: 1343 at drivers/gpu/drm/i915/intel_drv.h:1813 gen5_read32+0x183/0x200 [i915] <4>[ 23.487701] Modules linked in: snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic i915 intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul snd_hda_intel ghash_clmulni_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm lpc_ich e1000e mei_me ptp mei pps_core prime_numbers <4>[ 23.487784] CPU: 0 PID: 1343 Comm: debugfs_test Tainted: G W 4.14.0-rc7-CI-Trybot_1378+ #1 <4>[ 23.487788] Hardware name: Hewlett-Packard HP Compaq 8100 Elite SFF PC/304Ah, BIOS 786H1 v01.13 07/14/2011 <4>[ 23.487793] task: ffff8801f90aa6c0 task.stack: ffffc900013ec000 <4>[ 23.487838] RIP: 0010:gen5_read32+0x183/0x200 [i915] <4>[ 23.487842] RSP: 0018:ffffc900013efb58 EFLAGS: 00010292 <4>[ 23.487849] RAX: 000000000000002a RBX: ffff880205c00000 RCX: 0000000000000006 <4>[ 23.487854] RDX: 000000000000140a RSI: ffffffff81d0eb14 RDI: ffffffff81cc26f6 <4>[ 23.487857] RBP: ffffc900013efb80 R08: ffff8801f90aaff8 R09: 0000000000000000 <4>[ 23.487861] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001 <4>[ 23.487865] R13: 0000000000046000 R14: ffff88020ffaba78 R15: ffff88020b109bf8 <4>[ 23.487870] FS: 00007f53b5e40a40(0000) GS:ffff88021bc00000(0000) knlGS:0000000000000000 <4>[ 23.487874] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 23.487878] CR2: 000055e41900c0e8 CR3: 00000001fa0d6005 CR4: 00000000000206f0 <4>[ 23.487882] Call Trace: <4>[ 23.487931] intel_atomic_check+0x745/0x1290 [i915] <4>[ 23.487948] drm_atomic_check_only+0x459/0x560 <4>[ 23.487956] ? drm_atomic_set_crtc_for_connector+0xc9/0x100 <4>[ 23.488025] drm_atomic_commit+0x18/0x50 <4>[ 23.488035] restore_fbdev_mode_atomic+0x190/0x1f0 <4>[ 23.488059] restore_fbdev_mode+0x32/0x120 <4>[ 23.488072] drm_fb_helper_restore_fbdev_mode_unlocked+0x50/0xa0 <4>[ 23.488139] intel_fbdev_restore_mode+0x34/0x90 [i915] <4>[ 23.488194] i915_driver_lastclose+0xe/0x10 [i915] <4>[ 23.488208] drm_lastclose+0x39/0xf0 <4>[ 23.488219] drm_release+0x30c/0x3c0 <4>[ 23.488236] __fput+0xb9/0x200 <4>[ 23.488252] ____fput+0xe/0x10 <4>[ 23.488264] task_work_run+0x89/0xc0 <4>[ 23.488278] exit_to_usermode_loop+0x83/0x90 <4>[ 23.488290] syscall_return_slowpath+0xd0/0x110 <4>[ 23.488304] entry_SYSCALL_64_fastpath+0xaf/0xb1 <4>[ 23.488312] RIP: 0033:0x7f53b4317560 <4>[ 23.488320] RSP: 002b:00007ffca7e70748 EFLAGS: 00000246 ORIG_RAX: 0000000000000003 <4>[ 23.488333] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 00007f53b4317560 <4>[ 23.488340] RDX: 0000000000000005 RSI: 00007ffca7e70640 RDI: 0000000000000004 <4>[ 23.488347] RBP: 000055e417783900 R08: 000055e418f9e290 R09: 0000000000000000 <4>[ 23.488356] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 <4>[ 23.488363] R13: 00007f53b4302c40 R14: 0000000000000000 R15: 0000000000000000 <4>[ 23.488384] Code: b5 f2 f2 e0 0f ff e9 c5 fe ff ff 80 3d 0e 4b 10 00 00 0f 85 c6 fe ff ff 48 c7 c7 30 73 29 a0 c6 05 fa 4a 10 00 01 e8 8e f2 f2 e0 <0f> ff e9 ac fe ff ff e8 51 9d f3 e0 85 c0 0f 85 01 ff ff ff 48 <4>[ 23.488780] ---[ end trace 6bc72ce7f1596190 ]--- <7>[ 23.488844] [drm:intel_atomic_check [i915]] checking fdi config on pipe A, lanes 1 <7>[ 23.488911] [drm:intel_atomic_check [i915]] hw max bpp: 36, pipe bpp: 24, dithering: 0 due to intel_fdi_link_freq() poking at FDI_PLL_BIOS_0. Avoid this by recording the fdi pll frequency during device initiailisation. v2: Also extract the static FDI PLL frequencies for Sandybridge and Ivybridge. Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171107214713.18704-1-chris@chris-wilson.co.uk Reviewed-by: Maarten Lankhorst Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fe93115c4caa..5e6938d0a903 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2349,6 +2349,7 @@ struct drm_i915_private { unsigned int max_dotclk_freq; unsigned int rawclk_freq; unsigned int hpll_freq; + unsigned int fdi_pll_freq; unsigned int czclk_freq; struct { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 737de251d0f8..3dec70fa1ee8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -219,10 +219,8 @@ intel_fdi_link_freq(struct drm_i915_private *dev_priv, { if (HAS_DDI(dev_priv)) return pipe_config->port_clock; /* SPLL */ - else if (IS_GEN5(dev_priv)) - return ((I915_READ(FDI_PLL_BIOS_0) & FDI_PLL_FB_CLOCK_MASK) + 2) * 10000; else - return 270000; + return dev_priv->fdi_pll_freq; } static const struct intel_limit intel_limits_i8xx_dac = { @@ -14454,6 +14452,22 @@ fail: drm_modeset_acquire_fini(&ctx); } +static void intel_update_fdi_pll_freq(struct drm_i915_private *dev_priv) +{ + if (IS_GEN5(dev_priv)) { + u32 fdi_pll_clk = + I915_READ(FDI_PLL_BIOS_0) & FDI_PLL_FB_CLOCK_MASK; + + dev_priv->fdi_pll_freq = (fdi_pll_clk + 2) * 10000; + } else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) { + dev_priv->fdi_pll_freq = 270000; + } else { + return; + } + + DRM_DEBUG_DRIVER("FDI PLL freq=%d\n", dev_priv->fdi_pll_freq); +} + int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -14541,6 +14555,7 @@ int intel_modeset_init(struct drm_device *dev) } intel_shared_dpll_init(dev); + intel_update_fdi_pll_freq(dev_priv); intel_update_czclk(dev_priv); intel_modeset_init_hw(dev); From c400cc2a1aea6c6c0d60d22521fa4b7261106964 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Nov 2017 15:22:11 +0000 Subject: [PATCH 102/260] drm/i915: Include intel_engine_is_idle() status in engine pretty-printer Upon parking, if we discover an active engine we dump its state. Follow that state with an indication of whether the engine was idle. References: https://bugs.freedesktop.org/show_bug.cgi?id=103479 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171107152211.19930-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_engine_cs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ddbe5c9bf45a..6997306be0d2 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1835,6 +1835,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) } spin_unlock_irq(&b->rb_lock); + drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); drm_printf(m, "\n"); } From 2f6a3783833dde63f1c08982943a8b2229b97afb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Nov 2017 09:44:00 +0000 Subject: [PATCH 103/260] drm/i915: Idle the GPU before shinking everything The handling of contexts are peculiar. Instead of tieing their vma to activity, we pin the context. This means that we cannot simply unbind the context object itself at will (which would normally cause us to wait for the vma to be idle), but must manually idle the GPU and retire requests first. A consequence of this peculiarity is when doing a last desperate attempt to recover memory. If the memory is tied up inside active context objects, we will fail to recover any memory simply by trying to unbind the objects without first doing a wait-for-idle. A side-effect of removing the call to shrinker_lock_uninterruptible() from i915_gem_shrinker_oom() was that we removed an unlocked wait-for-idle, and so lost the "natural" shrinkage of context objects. By replacing that with a locked wait from inside i915_gem_shrink(), we not only replace it with the ability to recover all context objects, but do so for all i915_gem_shrink_all() callers. v2: Switching requires request allocation, which is not permitted from inside the shrinker as it only uses ordinary allocations. References: https://bugs.freedesktop.org/show_bug.cgi?id=102936 Fixes: f2123818ffad ("drm/i915: Move dev_priv->mm.[un]bound_list to its own lock") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171108094400.1386-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index eb31f8aa5c21..3770e3323fc8 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -162,6 +162,18 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!shrinker_lock(dev_priv, &unlock)) return 0; + /* + * When shrinking the active list, also consider active contexts. + * Active contexts are pinned until they are retired, and so can + * not be simply unbound to retire and unpin their pages. To shrink + * the contexts, we must wait until the gpu is idle. + * + * We don't care about errors here; if we cannot wait upon the GPU, + * we will free as much as we can and hope to get a second chance. + */ + if (flags & I915_SHRINK_ACTIVE) + i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); + trace_i915_gem_shrink(dev_priv, target, flags); i915_gem_retire_requests(dev_priv); From 1ab22356b37ab08a391d6f007fda4c822bef9fb5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Nov 2017 22:06:56 +0000 Subject: [PATCH 104/260] drm/i915: Prune the reservation shared fence array The shared fence array is not autopruning and may continue to grow as an object is shared between new timelines. Take the opportunity when we think the object is idle (we have to confirm that any external fence is also signaled) to decouple all the fences. We apply a similar trick after waiting on an object, see commit e54ca9774777 ("drm/i915: Remove completed fences after a wait") v2: No longer need to handle the batch pool as a special case. v3: Need to trylock from within i915_vma_retire as this may be called form the shrinker - and we may later try to allocate underneath the reservation lock, so a deadlock is possible. References: https://bugs.freedesktop.org/show_bug.cgi?id=102936 Fixes: d07f0e59b2c7 ("drm/i915: Move GEM activity tracking into a common struct reservation_object") Fixes: 80b204bce8f2 ("drm/i915: Enable multiple timelines") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171107220656.5020-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_vma.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 7cdf34800549..b7ac84db1d2e 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -54,6 +54,13 @@ i915_vma_retire(struct i915_gem_active *active, if (--obj->active_count) return; + /* Prune the shared fence arrays iff completely idle (inc. external) */ + if (reservation_object_trylock(obj->resv)) { + if (reservation_object_test_signaled_rcu(obj->resv, true)) + reservation_object_add_excl_fence(obj->resv, NULL); + reservation_object_unlock(obj->resv); + } + /* Bump our place on the bound list to keep it roughly in LRU order * so that we don't steal from recently used but inactive objects * (unless we are forced to ofc!) From f72b84c677d61f201b869223a8d6e389c7bb7d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 8 Nov 2017 15:35:55 +0200 Subject: [PATCH 105/260] drm/i915: Move init_clock_gating() back to where it was MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently setting up a bunch of GT registers before we've properly initialized the rest of the GT hardware leads to these setting being lost. So looks like I broke HSW with commit b7048ea12fbb ("drm/i915: Do .init_clock_gating() earlier to avoid it clobbering watermarks") by doing init_clock_gating() too early. This should actually affect other platforms as well, but apparently not to such a great degree. What I was ultimately after in that commit was to move the ilk_init_lp_watermarks() call earlier. So let's undo the damage and move init_clock_gating() back to where it was, and call ilk_init_lp_watermarks() just before the watermark state readout. This highlights how fragile and messed up our init order really is. I wonder why we even initialize the display before gem. The opposite order would make much more sense to me... v2: Keep WaRsPkgCStateDisplayPMReq:hsw early as it really must be done before all planes might get disabled. Cc: stable@vger.kernel.org Cc: Chris Wilson Cc: Mark Janes Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Joonas Lahtinen Cc: Oscar Mateo Cc: Mika Kuoppala Reported-by: Mark Janes Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103549 Fixes: b7048ea12fbb ("drm/i915: Do .init_clock_gating() earlier to avoid it clobbering watermarks") References: https://lists.freedesktop.org/archives/intel-gfx/2017-November/145432.html Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171108133555.14091-1-ville.syrjala@linux.intel.com Tested-by: Chris Wilson Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 14 +++++++-- drivers/gpu/drm/i915/intel_pm.c | 44 ++++++++++++---------------- 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3dec70fa1ee8..84817ccc5305 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3674,6 +3674,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) intel_pps_unlock_regs_wa(dev_priv); intel_modeset_init_hw(dev); + intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display.hpd_irq_setup) @@ -14363,8 +14364,6 @@ void intel_modeset_init_hw(struct drm_device *dev) intel_update_cdclk(dev_priv); intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; - - intel_init_clock_gating(dev_priv); } /* @@ -15094,6 +15093,15 @@ intel_modeset_setup_hw_state(struct drm_device *dev, struct intel_encoder *encoder; int i; + if (IS_HASWELL(dev_priv)) { + /* + * WaRsPkgCStateDisplayPMReq:hsw + * System hang if this isn't done before disabling all planes! + */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); + } + intel_modeset_readout_hw_state(dev); /* HW state is read out, now we need to sanitize this mess. */ @@ -15191,6 +15199,8 @@ void intel_modeset_gem_init(struct drm_device *dev) intel_init_gt_powersave(dev_priv); + intel_init_clock_gating(dev_priv); + intel_setup_overlay(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 46440e2ecb33..e09377df590d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5754,12 +5754,30 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->wm.wm_mutex); } +/* + * FIXME should probably kill this and improve + * the real watermark readout/sanitation instead + */ +static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv) +{ + I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); + + /* + * Don't touch WM1S_LP_EN here. + * Doing so could cause underruns. + */ +} + void ilk_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct ilk_wm_values *hw = &dev_priv->wm.hw; struct drm_crtc *crtc; + ilk_init_lp_watermarks(dev_priv); + for_each_crtc(dev, crtc) ilk_pipe_wm_get_hw_state(crtc); @@ -8213,18 +8231,6 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) } } -static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv) -{ - I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); - I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); - I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); - - /* - * Don't touch WM1S_LP_EN here. - * Doing so could cause underruns. - */ -} - static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) { uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; @@ -8258,8 +8264,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) (I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS)); - ilk_init_lp_watermarks(dev_priv); - /* * Based on the document from hardware guys the following bits * should be set unconditionally in order to enable FBC. @@ -8372,8 +8376,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_GT_MODE, _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - ilk_init_lp_watermarks(dev_priv); - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); @@ -8600,8 +8602,6 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) I915_GTT_PAGE_SIZE_2M); enum pipe pipe; - ilk_init_lp_watermarks(dev_priv); - /* WaSwitchSolVfFArbitrationPriority:bdw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -8652,8 +8652,6 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) { - ilk_init_lp_watermarks(dev_priv); - /* L3 caching of data atomics doesn't work -- disable it. */ I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); I915_WRITE(HSW_ROW_CHICKEN3, @@ -8697,10 +8695,6 @@ static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) /* WaSwitchSolVfFArbitrationPriority:hsw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); - /* WaRsPkgCStateDisplayPMReq:hsw */ - I915_WRITE(CHICKEN_PAR1_1, - I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); - lpt_init_clock_gating(dev_priv); } @@ -8708,8 +8702,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) { uint32_t snpcr; - ilk_init_lp_watermarks(dev_priv); - I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); /* WaDisableEarlyCull:ivb */ From 0a60797a0efbc495f514304d83eb289bb55990a6 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Fri, 3 Nov 2017 11:30:27 -0700 Subject: [PATCH 106/260] drm/i915: Implement ReadHitWriteOnlyDisable. The workaround for this is described as: "if RenderSurfaceState.Num_Multisamples > 1, disable RCC clock gating if RenderSurfaceState.Num_Multisamples == 1, set 0x7010[14] = 1" Further documentation in the internal bug referenced by the bspec suggest that any of the above suggestions should suffice to fix the issue. We are going with disabling RCC clock gating. Unfortunately, what we are doing doesn't match the name of the workaround, but at least it matches its description. This change improves CNL stability by avoiding some of the hangs seen in the platform. v2: Only disable RCC clock gating. Signed-off-by: Rafael Antognolli Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171103183027.5051-1-rafael.antognolli@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f0f8f6059652..6ef33422f762 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3837,6 +3837,7 @@ enum { */ #define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) +#define RCCUNIT_CLKGATE_DIS (1 << 7) /* * Display engine regs diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 6997306be0d2..6cb8e3ed97e4 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1320,6 +1320,9 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine) WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + /* ReadHitWriteOnlyDisable: cnl */ + WA_SET_BIT_MASKED(SLICE_UNIT_LEVEL_CLKGATE, RCCUNIT_CLKGATE_DIS); + /* WaEnablePreemptionGranularityControlByUMD:cnl */ I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); From 2ceddb189306443a8d0d300de5b2bdf496749548 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 3 Nov 2017 15:18:12 +0000 Subject: [PATCH 107/260] drm/i915/guc: Split GuC firmware xfer function into clear steps Transfer of GuC firmware requires few steps that currently are implemented in two large functions. Split this code into smaller functions to make these steps small and clear. Also be prepared for potential DMA xfer step failure. v2: rename function steps (Sagar) Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20171103151816.62048-1-michal.wajdeczko@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_guc_fw.c | 234 ++++++++++++++++------------ 1 file changed, 134 insertions(+), 100 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index ef67a36354c5..c4f4526e3b77 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -97,106 +97,9 @@ int intel_guc_fw_select(struct intel_guc *guc) return 0; } -/* - * Read the GuC status register (GUC_STATUS) and store it in the - * specified location; then return a boolean indicating whether - * the value matches either of two values representing completion - * of the GuC boot process. - * - * This is used for polling the GuC status in a wait_for() - * loop below. - */ -static inline bool guc_ucode_response(struct drm_i915_private *dev_priv, - u32 *status) +static void guc_prepare_xfer(struct intel_guc *guc) { - u32 val = I915_READ(GUC_STATUS); - u32 uk_val = val & GS_UKERNEL_MASK; - *status = val; - return (uk_val == GS_UKERNEL_READY || - ((val & GS_MIA_CORE_STATE) && uk_val == GS_UKERNEL_LAPIC_DONE)); -} - -/* - * Transfer the firmware image to RAM for execution by the microcontroller. - * - * Architecturally, the DMA engine is bidirectional, and can potentially even - * transfer between GTT locations. This functionality is left out of the API - * for now as there is no need for it. - * - * Note that GuC needs the CSS header plus uKernel code to be copied by the - * DMA engine in one operation, whereas the RSA signature is loaded via MMIO. - */ -static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv, - struct i915_vma *vma) -{ - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; - unsigned long offset; - struct sg_table *sg = vma->pages; - u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT]; - int i, ret = 0; - - /* where RSA signature starts */ - offset = guc_fw->rsa_offset; - - /* Copy RSA signature from the fw image to HW for verification */ - sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, sizeof(rsa), offset); - for (i = 0; i < UOS_RSA_SCRATCH_MAX_COUNT; i++) - I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]); - - /* The header plus uCode will be copied to WOPCM via DMA, excluding any - * other components */ - I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); - - /* Set the source address for the new blob */ - offset = guc_ggtt_offset(vma) + guc_fw->header_offset; - I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); - I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); - - /* - * Set the DMA destination. Current uCode expects the code to be - * loaded at 8k; locations below this are used for the stack. - */ - I915_WRITE(DMA_ADDR_1_LOW, 0x2000); - I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); - - /* Finally start the DMA */ - I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA)); - - /* - * Wait for the DMA to complete & the GuC to start up. - * NB: Docs recommend not using the interrupt for completion. - * Measurements indicate this should take no more than 20ms, so a - * timeout here indicates that the GuC has failed and is unusable. - * (Higher levels of the driver will attempt to fall back to - * execlist mode if this happens.) - */ - ret = wait_for(guc_ucode_response(dev_priv, &status), 100); - - DRM_DEBUG_DRIVER("DMA status 0x%x, GuC status 0x%x\n", - I915_READ(DMA_CTRL), status); - - if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { - DRM_ERROR("GuC firmware signature verification failed\n"); - ret = -ENOEXEC; - } - - DRM_DEBUG_DRIVER("returning %d\n", ret); - - return ret; -} - -/* - * Load the GuC firmware blob into the MinuteIA. - */ -static int guc_ucode_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) -{ - struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); struct drm_i915_private *dev_priv = guc_to_i915(guc); - int ret; - - GEM_BUG_ON(guc_fw->type != INTEL_UC_FW_TYPE_GUC); - - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); /* Enable MIA caching. GuC clock gating is disabled. */ I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE); @@ -224,8 +127,139 @@ static int guc_ucode_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) /* allows for 5us (in 10ns units) before GT can go to RC6 */ I915_WRITE(GUC_ARAT_C6DIS, 0x1FF); } +} - ret = guc_ucode_xfer_dma(dev_priv, vma); +/* Copy RSA signature from the fw image to HW for verification */ +static int guc_xfer_rsa(struct intel_guc *guc, struct i915_vma *vma) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_uc_fw *guc_fw = &guc->fw; + struct sg_table *sg = vma->pages; + u32 rsa[UOS_RSA_SCRATCH_MAX_COUNT]; + int i; + + if (sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, sizeof(rsa), + guc_fw->rsa_offset) != sizeof(rsa)) + return -EINVAL; + + for (i = 0; i < UOS_RSA_SCRATCH_MAX_COUNT; i++) + I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]); + + return 0; +} + +/* + * Transfer the firmware image to RAM for execution by the microcontroller. + * + * Architecturally, the DMA engine is bidirectional, and can potentially even + * transfer between GTT locations. This functionality is left out of the API + * for now as there is no need for it. + */ +static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_uc_fw *guc_fw = &guc->fw; + unsigned long offset; + + /* + * The header plus uCode will be copied to WOPCM via DMA, excluding any + * other components + */ + I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); + + /* Set the source address for the new blob */ + offset = guc_ggtt_offset(vma) + guc_fw->header_offset; + I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); + I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); + + /* + * Set the DMA destination. Current uCode expects the code to be + * loaded at 8k; locations below this are used for the stack. + */ + I915_WRITE(DMA_ADDR_1_LOW, 0x2000); + I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); + + /* Finally start the DMA */ + I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA)); + + return 0; +} + +/* + * Read the GuC status register (GUC_STATUS) and store it in the + * specified location; then return a boolean indicating whether + * the value matches either of two values representing completion + * of the GuC boot process. + * + * This is used for polling the GuC status in a wait_for() + * loop below. + */ +static inline bool guc_ready(struct intel_guc *guc, u32 *status) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 val = I915_READ(GUC_STATUS); + u32 uk_val = val & GS_UKERNEL_MASK; + + *status = val; + return (uk_val == GS_UKERNEL_READY) || + ((val & GS_MIA_CORE_STATE) && (uk_val == GS_UKERNEL_LAPIC_DONE)); +} + +static int guc_wait_ucode(struct intel_guc *guc) +{ + u32 status; + int ret; + + /* + * Wait for the GuC to start up. + * NB: Docs recommend not using the interrupt for completion. + * Measurements indicate this should take no more than 20ms, so a + * timeout here indicates that the GuC has failed and is unusable. + * (Higher levels of the driver will attempt to fall back to + * execlist mode if this happens.) + */ + ret = wait_for(guc_ready(guc, &status), 100); + DRM_DEBUG_DRIVER("GuC status %#x\n", status); + + if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { + DRM_ERROR("GuC firmware signature verification failed\n"); + ret = -ENOEXEC; + } + + return ret; +} + +/* + * Load the GuC firmware blob into the MinuteIA. + */ +static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) +{ + struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); + struct drm_i915_private *dev_priv = guc_to_i915(guc); + int ret; + + GEM_BUG_ON(guc_fw->type != INTEL_UC_FW_TYPE_GUC); + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + guc_prepare_xfer(guc); + + /* + * Note that GuC needs the CSS header plus uKernel code to be copied + * by the DMA engine in one operation, whereas the RSA signature is + * loaded via MMIO. + */ + ret = guc_xfer_rsa(guc, vma); + if (ret) + DRM_WARN("GuC firmware signature xfer error %d\n", ret); + + ret = guc_xfer_ucode(guc, vma); + if (ret) + DRM_WARN("GuC firmware code xfer error %d\n", ret); + + ret = guc_wait_ucode(guc); + if (ret) + DRM_ERROR("GuC firmware xfer error %d\n", ret); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -247,5 +281,5 @@ static int guc_ucode_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) */ int intel_guc_fw_upload(struct intel_guc *guc) { - return intel_uc_fw_upload(&guc->fw, guc_ucode_xfer); + return intel_uc_fw_upload(&guc->fw, guc_fw_xfer); } From a86af2314a867804bb1531d514aa25c264fe5cb9 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 3 Nov 2017 15:18:13 +0000 Subject: [PATCH 108/260] drm/i915/guc: Wait for ucode DMA transfer completion We silently assumed that DMA transfer will be completed within assumed timeout and thus we were waiting only at last step for GuC to become ready. Add intermediate wait to catch unexpected delays in DMA transfer. Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20171103151816.62048-2-michal.wajdeczko@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_guc_fw.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index c4f4526e3b77..74a61fe9448c 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -160,6 +160,8 @@ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_uc_fw *guc_fw = &guc->fw; unsigned long offset; + u32 status; + int ret; /* * The header plus uCode will be copied to WOPCM via DMA, excluding any @@ -182,7 +184,12 @@ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) /* Finally start the DMA */ I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA)); - return 0; + /* Wait for DMA to finish */ + ret = __intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0, + 2, 100, &status); + DRM_DEBUG_DRIVER("GuC DMA status %#x\n", status); + + return ret; } /* From e6843363768cc639a590917b509303f8e22dd6de Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 3 Nov 2017 15:18:14 +0000 Subject: [PATCH 109/260] drm/i915/guc: Drop legacy workarounds from guc_prepare_xfer We don't keep the workarounds for pre-production hardware (see intel_detect_preproduction_hw) thus we can drop some extra steps during firmware upload needed only for unsupported platforms. Suggested-by: Chris Wilson Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20171103151816.62048-3-michal.wajdeczko@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_guc_fw.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index 74a61fe9448c..a63b5cf7421a 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -104,16 +104,6 @@ static void guc_prepare_xfer(struct intel_guc *guc) /* Enable MIA caching. GuC clock gating is disabled. */ I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE); - /* WaDisableMinuteIaClockGating:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GUC_SHIM_CONTROL, (I915_READ(GUC_SHIM_CONTROL) & - ~GUC_ENABLE_MIA_CLOCK_GATING)); - } - - /* WaC6DisallowByGfxPause:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) - I915_WRITE(GEN6_GFXPAUSE, 0x30FFF); - if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); else From b814b0744a1434aa46c72bd02c3e9c8b1263f120 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 3 Nov 2017 15:18:15 +0000 Subject: [PATCH 110/260] drm/i915/guc: Simplify programming of GUC_SHIM_CONTROL We can program GUC_SHIM_CONTROL register with all expected bits without use of extra macro defined in fwif.h v2: rebased without pre-prod code v3: fixed typo Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20171103151816.62048-4-michal.wajdeczko@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_guc_reg.h | 7 ------- drivers/gpu/drm/i915/intel_guc_fw.c | 9 +++++++-- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h index 35cf9918d09a..bc1ae7d8f424 100644 --- a/drivers/gpu/drm/i915/i915_guc_reg.h +++ b/drivers/gpu/drm/i915/i915_guc_reg.h @@ -102,13 +102,6 @@ #define GUC_ENABLE_MIA_CLOCK_GATING (1<<15) #define GUC_GEN10_SHIM_WC_ENABLE (1<<21) -#define GUC_SHIM_CONTROL_VALUE (GUC_DISABLE_SRAM_INIT_TO_ZEROES | \ - GUC_ENABLE_READ_CACHE_LOGIC | \ - GUC_ENABLE_MIA_CACHING | \ - GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | \ - GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | \ - GUC_ENABLE_MIA_CLOCK_GATING) - #define GUC_SEND_INTERRUPT _MMIO(0xc4c8) #define GUC_SEND_TRIGGER (1<<0) diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index a63b5cf7421a..69ba01599575 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -101,8 +101,13 @@ static void guc_prepare_xfer(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - /* Enable MIA caching. GuC clock gating is disabled. */ - I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE); + /* Must program this register before loading the ucode with DMA */ + I915_WRITE(GUC_SHIM_CONTROL, GUC_DISABLE_SRAM_INIT_TO_ZEROES | + GUC_ENABLE_READ_CACHE_LOGIC | + GUC_ENABLE_MIA_CACHING | + GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | + GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | + GUC_ENABLE_MIA_CLOCK_GATING); if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); From 94dec87159af6f3dcc0b78d3f909aefa9e29c01a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Nov 2017 08:55:40 +0000 Subject: [PATCH 111/260] drm/i915: Reorder context-close to avoid calling i915_vma_close() under RCU When we close the VMA, we unbind it from the ppgtt and tear down the page directory pointing at it. That may trigger us to return WC pages back to the system, requiring conversion back to WB which itself may sleep. That makes i915_vma_close() unsuitable for use inside the RCU read lock, which we need to hold to iterate the radixtree. The fix is quite simple, we can close all the VMA as we close the ppgtt, we only need to do that instead of closing them during destruction of the LUT. v2: Order between closing the LUT and the ppgtt is important; we use the vma inside the LUT as a means of retrieving the object, and so we must clear the LUT before freeing the VMA when closing the ppgtt. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103638 Fixes: 547da76b5777 ("drm/i915: Hold rcu_read_lock when iterating over the radixtree (vma idr)") Fixes: d1b48c1e7184 ("drm/i915: Replace execbuf vma ht with an idr") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171109085540.32264-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_context.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 10affb35ac56..c05c3d7d21a5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -107,14 +107,9 @@ static void lut_close(struct i915_gem_context *ctx) rcu_read_lock(); radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) { struct i915_vma *vma = rcu_dereference_raw(*slot); - struct drm_i915_gem_object *obj = vma->obj; radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); - - if (!i915_vma_is_ggtt(vma)) - i915_vma_close(vma); - - __i915_gem_object_release_unless_active(obj); + __i915_gem_object_release_unless_active(vma->obj); } rcu_read_unlock(); } @@ -200,6 +195,11 @@ static void context_close(struct i915_gem_context *ctx) { i915_gem_context_set_closed(ctx); + /* + * The LUT uses the VMA as a backpointer to unref the object, + * so we need to clear the LUT before we close all the VMA (inside + * the ppgtt). + */ lut_close(ctx); if (ctx->ppgtt) i915_ppgtt_close(&ctx->ppgtt->base); From adc103047e583497f675f0db32b758c8c2069633 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 Oct 2017 22:51:14 +0200 Subject: [PATCH 112/260] drm/i915: Eliminate some encoder->crtc usage from DP code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the current crtc from the crtc state rather than via the legacy encoder->crtc pointer whenever possible. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171031205123.13123-2-ville.syrjala@linux.intel.com Acked-by: Jani Nikula Reviewed-by: Dhinakaran Pandiyan --- drivers/gpu/drm/i915/intel_dp.c | 63 +++++++++++++++------------------ 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d27c0145ac91..4f64d83537d9 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -129,10 +129,12 @@ static struct intel_dp *intel_attached_dp(struct drm_connector *connector) return enc_to_intel_dp(&intel_attached_encoder(connector)->base); } -static void intel_dp_link_down(struct intel_dp *intel_dp); +static void intel_dp_link_down(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state); static bool edp_panel_vdd_on(struct intel_dp *intel_dp); static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync); -static void vlv_init_panel_power_sequencer(struct intel_dp *intel_dp); +static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); static void vlv_steal_power_sequencer(struct drm_device *dev, enum pipe pipe); static void intel_dp_unset_edid(struct intel_dp *intel_dp); @@ -1858,7 +1860,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); enum port port = dp_to_dig_port(intel_dp)->port; - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; intel_dp_set_link_params(intel_dp, pipe_config->port_clock, @@ -2491,10 +2493,10 @@ static void ironlake_edp_pll_on(struct intel_dp *intel_dp, udelay(200); } -static void ironlake_edp_pll_off(struct intel_dp *intel_dp) +static void ironlake_edp_pll_off(struct intel_dp *intel_dp, + const struct intel_crtc_state *old_crtc_state) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); assert_pipe_disabled(dev_priv, crtc->pipe); @@ -2624,7 +2626,7 @@ static void intel_dp_get_config(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum port port = dp_to_dig_port(intel_dp)->port; - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); if (encoder->type == INTEL_OUTPUT_EDP) pipe_config->output_types |= BIT(INTEL_OUTPUT_EDP); @@ -2723,12 +2725,10 @@ static void g4x_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - intel_disable_dp(encoder, old_crtc_state, old_conn_state); /* disable the port before the pipe on g4x */ - intel_dp_link_down(intel_dp); + intel_dp_link_down(encoder, old_crtc_state); } static void ilk_disable_dp(struct intel_encoder *encoder, @@ -2754,33 +2754,29 @@ static void ilk_post_disable_dp(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = encoder->port; - intel_dp_link_down(intel_dp); + intel_dp_link_down(encoder, old_crtc_state); /* Only ilk+ has port A */ if (port == PORT_A) - ironlake_edp_pll_off(intel_dp); + ironlake_edp_pll_off(intel_dp, old_crtc_state); } static void vlv_post_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - - intel_dp_link_down(intel_dp); + intel_dp_link_down(encoder, old_crtc_state); } static void chv_post_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - intel_dp_link_down(intel_dp); + intel_dp_link_down(encoder, old_crtc_state); mutex_lock(&dev_priv->sb_lock); @@ -2909,7 +2905,7 @@ static void intel_enable_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); uint32_t dp_reg = I915_READ(intel_dp->output_reg); enum pipe pipe = crtc->pipe; @@ -2919,7 +2915,7 @@ static void intel_enable_dp(struct intel_encoder *encoder, pps_lock(intel_dp); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_init_panel_power_sequencer(intel_dp); + vlv_init_panel_power_sequencer(encoder, pipe_config); intel_dp_enable_port(intel_dp, pipe_config); @@ -3047,13 +3043,13 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, } } -static void vlv_init_panel_power_sequencer(struct intel_dp *intel_dp) +static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *encoder = &intel_dig_port->base; + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); lockdep_assert_held(&dev_priv->pps_mutex); @@ -3084,7 +3080,7 @@ static void vlv_init_panel_power_sequencer(struct intel_dp *intel_dp) intel_dp->pps_pipe = crtc->pipe; DRM_DEBUG_KMS("initializing pipe %c power sequencer for port %c\n", - pipe_name(intel_dp->pps_pipe), port_name(intel_dig_port->port)); + pipe_name(intel_dp->pps_pipe), port_name(encoder->port)); /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(dev, intel_dp); @@ -3624,13 +3620,13 @@ void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) } static void -intel_dp_link_down(struct intel_dp *intel_dp) +intel_dp_link_down(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc); - enum port port = intel_dig_port->port; - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + enum port port = encoder->port; uint32_t DP = intel_dp->DP; if (WARN_ON(HAS_DDI(dev_priv))) @@ -5493,7 +5489,6 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, dig_port = dp_to_dig_port(intel_dp); encoder = &dig_port->base; - intel_crtc = to_intel_crtc(encoder->base.crtc); if (!intel_crtc) { DRM_DEBUG_KMS("DRRS: intel_crtc not initialized\n"); From 3c0628f8ce220f35a545de40f0426b9b999ce02d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 Oct 2017 22:51:15 +0200 Subject: [PATCH 113/260] drm/i915: Eliminate some encoder->crtc usage from DSI code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the current crtc from the crtc state rather than via the legacy encoder->crtc pointer whenever possible. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171031205123.13123-3-ville.syrjala@linux.intel.com Acked-by: Jani Nikula Reviewed-by: Dhinakaran Pandiyan --- drivers/gpu/drm/i915/intel_dsi.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 2bff7ab25bf3..f09474b0c4d3 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -662,11 +662,11 @@ static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) } } -static void intel_dsi_port_enable(struct intel_encoder *encoder) +static void intel_dsi_port_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; @@ -705,7 +705,7 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder) if (IS_BROXTON(dev_priv)) temp |= LANE_CONFIGURATION_DUAL_LINK_A; else - temp |= intel_crtc->pipe ? + temp |= crtc->pipe ? LANE_CONFIGURATION_DUAL_LINK_B : LANE_CONFIGURATION_DUAL_LINK_A; } @@ -875,7 +875,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); - intel_dsi_port_enable(encoder); + intel_dsi_port_enable(encoder, pipe_config); } intel_panel_enable_backlight(pipe_config, conn_state); @@ -1082,7 +1082,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; struct drm_display_mode *adjusted_mode_sw; - struct intel_crtc *intel_crtc; + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); unsigned int lane_count = intel_dsi->lane_count; unsigned int bpp, fmt; @@ -1093,8 +1093,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, crtc_hblank_start_sw, crtc_hblank_end_sw; /* FIXME: hw readout should not depend on SW state */ - intel_crtc = to_intel_crtc(encoder->base.crtc); - adjusted_mode_sw = &intel_crtc->config->base.adjusted_mode; + adjusted_mode_sw = &crtc->config->base.adjusted_mode; /* * Atleast one port is active as encoder->get_config called only if From 463320ae538bb02fd16ca9ff9ed39a0b35d920e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 Oct 2017 22:51:16 +0200 Subject: [PATCH 114/260] drm/i915: Eliminate some encoder->crtc usage from SDVO code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the current crtc from the crtc state rather than via the legacy encoder->crtc pointer whenever possible. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171031205123.13123-4-ville.syrjala@linux.intel.com Acked-by: Jani Nikula Reviewed-by: Dhinakaran Pandiyan --- drivers/gpu/drm/i915/intel_sdvo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 42ec2d1f7a61..2b8764897d68 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1512,7 +1512,7 @@ static void intel_disable_sdvo(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_sdvo *intel_sdvo = to_sdvo(encoder); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); u32 temp; intel_sdvo_set_active_outputs(intel_sdvo, 0); @@ -1571,7 +1571,7 @@ static void intel_enable_sdvo(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_sdvo *intel_sdvo = to_sdvo(encoder); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); u32 temp; bool input1, input2; int i; From a7f519ba706b3be9280c063e7e231da138c202de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 Oct 2017 22:51:17 +0200 Subject: [PATCH 115/260] drm/i915: Eliminate some encoder->crtc usage from TV code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the current crtc from the crtc state rather than via the legacy encoder->crtc pointer whenever possible. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171031205123.13123-5-ville.syrjala@linux.intel.com Acked-by: Jani Nikula Reviewed-by: Dhinakaran Pandiyan --- drivers/gpu/drm/i915/intel_tv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index b18609cebe03..b3dabc219e6a 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -822,7 +822,7 @@ intel_enable_tv(struct intel_encoder *encoder, /* Prevents vblank waits from timing out in intel_tv_detect_type() */ intel_wait_for_vblank(dev_priv, - to_intel_crtc(encoder->base.crtc)->pipe); + to_intel_crtc(pipe_config->base.crtc)->pipe); I915_WRITE(TV_CTL, I915_READ(TV_CTL) | TV_ENC_ENABLE); } @@ -982,7 +982,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_tv *intel_tv = enc_to_tv(encoder); const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); u32 tv_ctl; From 2e1029c672c252ad5f34c0bcd0ebacf4a6412cd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 Oct 2017 22:51:18 +0200 Subject: [PATCH 116/260] drm/i915: Pass crtc state to DPIO PHY functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than digging through encoder->crtc and crtc->config in the DPIO PHY functions, pass down the correct crtc state from the caller. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171031205123.13123-6-ville.syrjala@linux.intel.com Acked-by: Jani Nikula Reviewed-by: Dhinakaran Pandiyan --- drivers/gpu/drm/i915/i915_drv.h | 19 ++++-- drivers/gpu/drm/i915/intel_dp.c | 16 ++--- drivers/gpu/drm/i915/intel_dpio_phy.c | 87 +++++++++++++-------------- drivers/gpu/drm/i915/intel_hdmi.c | 17 +++--- 4 files changed, 72 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5e6938d0a903..19b679ca8211 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4200,18 +4200,25 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, u32 deemph_reg_value, u32 margin_reg_value, bool uniq_trans_scale); void chv_data_lane_soft_reset(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, bool reset); -void chv_phy_pre_pll_enable(struct intel_encoder *encoder); -void chv_phy_pre_encoder_enable(struct intel_encoder *encoder); +void chv_phy_pre_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); void chv_phy_release_cl2_override(struct intel_encoder *encoder); -void chv_phy_post_pll_disable(struct intel_encoder *encoder); +void chv_phy_post_pll_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state); void vlv_set_phy_signal_level(struct intel_encoder *encoder, u32 demph_reg_value, u32 preemph_reg_value, u32 uniqtranscale_reg_value, u32 tx3_demph); -void vlv_phy_pre_pll_enable(struct intel_encoder *encoder); -void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder); -void vlv_phy_reset_lanes(struct intel_encoder *encoder); +void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void vlv_phy_reset_lanes(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state); int intel_gpu_freq(struct drm_i915_private *dev_priv, int val); int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 4f64d83537d9..0151c8d18b79 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2781,7 +2781,7 @@ static void chv_post_disable_dp(struct intel_encoder *encoder, mutex_lock(&dev_priv->sb_lock); /* Assert data lane reset */ - chv_data_lane_soft_reset(encoder, true); + chv_data_lane_soft_reset(encoder, old_crtc_state, true); mutex_unlock(&dev_priv->sb_lock); } @@ -3091,7 +3091,7 @@ static void vlv_pre_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - vlv_phy_pre_encoder_enable(encoder); + vlv_phy_pre_encoder_enable(encoder, pipe_config); intel_enable_dp(encoder, pipe_config, conn_state); } @@ -3102,14 +3102,14 @@ static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder, { intel_dp_prepare(encoder, pipe_config); - vlv_phy_pre_pll_enable(encoder); + vlv_phy_pre_pll_enable(encoder, pipe_config); } static void chv_pre_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - chv_phy_pre_encoder_enable(encoder); + chv_phy_pre_encoder_enable(encoder, pipe_config); intel_enable_dp(encoder, pipe_config, conn_state); @@ -3123,14 +3123,14 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder, { intel_dp_prepare(encoder, pipe_config); - chv_phy_pre_pll_enable(encoder); + chv_phy_pre_pll_enable(encoder, pipe_config); } static void chv_dp_post_pll_disable(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, - const struct drm_connector_state *conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { - chv_phy_post_pll_disable(encoder); + chv_phy_post_pll_disable(encoder, old_crtc_state); } /* diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 490a42d9a3e8..5958d3d8b90e 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -733,11 +733,12 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, } void chv_data_lane_soft_reset(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, bool reset) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base)); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); enum pipe pipe = crtc->pipe; uint32_t val; @@ -776,17 +777,16 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder, } } -void chv_phy_pre_pll_enable(struct intel_encoder *encoder) +void chv_phy_pre_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = - to_intel_crtc(encoder->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); enum dpio_channel ch = vlv_dport_to_channel(dport); - enum pipe pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; unsigned int lane_mask = - intel_dp_unused_lane_mask(intel_crtc->config->lane_count); + intel_dp_unused_lane_mask(crtc_state->lane_count); u32 val; /* @@ -802,7 +802,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder) mutex_lock(&dev_priv->sb_lock); /* Assert data lane reset */ - chv_data_lane_soft_reset(encoder, true); + chv_data_lane_soft_reset(encoder, crtc_state, true); /* program left/right clock distribution */ if (pipe != PIPE_B) { @@ -832,7 +832,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder) val |= CHV_PCS_USEDCLKCHANNEL; vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW8(ch), val); - if (intel_crtc->config->lane_count > 2) { + if (crtc_state->lane_count > 2) { val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW8(ch)); val |= CHV_PCS_USEDCLKCHANNEL_OVRRIDE; if (pipe != PIPE_B) @@ -857,16 +857,15 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder) mutex_unlock(&dev_priv->sb_lock); } -void chv_phy_pre_encoder_enable(struct intel_encoder *encoder) +void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_digital_port *dport = dp_to_dig_port(intel_dp); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = - to_intel_crtc(encoder->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); enum dpio_channel ch = vlv_dport_to_channel(dport); - int pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; int data, i, stagger; u32 val; @@ -877,16 +876,16 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder) val &= ~DPIO_LANEDESKEW_STRAP_OVRD; vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW11(ch), val); - if (intel_crtc->config->lane_count > 2) { + if (crtc_state->lane_count > 2) { val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW11(ch)); val &= ~DPIO_LANEDESKEW_STRAP_OVRD; vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val); } /* Program Tx lane latency optimal setting*/ - for (i = 0; i < intel_crtc->config->lane_count; i++) { + for (i = 0; i < crtc_state->lane_count; i++) { /* Set the upar bit */ - if (intel_crtc->config->lane_count == 1) + if (crtc_state->lane_count == 1) data = 0x0; else data = (i == 1) ? 0x0 : 0x1; @@ -895,13 +894,13 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder) } /* Data lane stagger programming */ - if (intel_crtc->config->port_clock > 270000) + if (crtc_state->port_clock > 270000) stagger = 0x18; - else if (intel_crtc->config->port_clock > 135000) + else if (crtc_state->port_clock > 135000) stagger = 0xd; - else if (intel_crtc->config->port_clock > 67500) + else if (crtc_state->port_clock > 67500) stagger = 0x7; - else if (intel_crtc->config->port_clock > 33750) + else if (crtc_state->port_clock > 33750) stagger = 0x4; else stagger = 0x2; @@ -910,7 +909,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder) val |= DPIO_TX2_STAGGER_MASK(0x1f); vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW11(ch), val); - if (intel_crtc->config->lane_count > 2) { + if (crtc_state->lane_count > 2) { val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW11(ch)); val |= DPIO_TX2_STAGGER_MASK(0x1f); vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val); @@ -923,7 +922,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder) DPIO_TX1_STAGGER_MULT(6) | DPIO_TX2_STAGGER_MULT(0)); - if (intel_crtc->config->lane_count > 2) { + if (crtc_state->lane_count > 2) { vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW12(ch), DPIO_LANESTAGGER_STRAP(stagger) | DPIO_LANESTAGGER_STRAP_OVRD | @@ -933,7 +932,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder) } /* Deassert data lane reset */ - chv_data_lane_soft_reset(encoder, false); + chv_data_lane_soft_reset(encoder, crtc_state, false); mutex_unlock(&dev_priv->sb_lock); } @@ -949,10 +948,11 @@ void chv_phy_release_cl2_override(struct intel_encoder *encoder) } } -void chv_phy_post_pll_disable(struct intel_encoder *encoder) +void chv_phy_post_pll_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum pipe pipe = to_intel_crtc(encoder->base.crtc)->pipe; + enum pipe pipe = to_intel_crtc(old_crtc_state->base.crtc)->pipe; u32 val; mutex_lock(&dev_priv->sb_lock); @@ -990,7 +990,7 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); enum dpio_channel port = vlv_dport_to_channel(dport); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; mutex_lock(&dev_priv->sb_lock); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000); @@ -1008,15 +1008,14 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, mutex_unlock(&dev_priv->sb_lock); } -void vlv_phy_pre_pll_enable(struct intel_encoder *encoder) +void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = - to_intel_crtc(encoder->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); enum dpio_channel port = vlv_dport_to_channel(dport); - int pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; /* Program Tx lane resets to default */ mutex_lock(&dev_priv->sb_lock); @@ -1036,15 +1035,15 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder) mutex_unlock(&dev_priv->sb_lock); } -void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder) +void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_digital_port *dport = dp_to_dig_port(intel_dp); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); enum dpio_channel port = vlv_dport_to_channel(dport); - int pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; u32 val; mutex_lock(&dev_priv->sb_lock); @@ -1066,14 +1065,14 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder) mutex_unlock(&dev_priv->sb_lock); } -void vlv_phy_reset_lanes(struct intel_encoder *encoder) +void vlv_phy_reset_lanes(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = - to_intel_crtc(encoder->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); enum dpio_channel port = vlv_dport_to_channel(dport); - int pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; mutex_lock(&dev_priv->sb_lock); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index fa1c793a21ef..1f2258dec09e 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1683,10 +1683,9 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - vlv_phy_pre_encoder_enable(encoder); + vlv_phy_pre_encoder_enable(encoder, pipe_config); /* HDMI 1.0V-2dB */ vlv_set_phy_signal_level(encoder, 0x2b245f5f, 0x00002000, 0x5578b83a, @@ -1707,7 +1706,7 @@ static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder, { intel_hdmi_prepare(encoder, pipe_config); - vlv_phy_pre_pll_enable(encoder); + vlv_phy_pre_pll_enable(encoder, pipe_config); } static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder, @@ -1716,14 +1715,14 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder, { intel_hdmi_prepare(encoder, pipe_config); - chv_phy_pre_pll_enable(encoder); + chv_phy_pre_pll_enable(encoder, pipe_config); } static void chv_hdmi_post_pll_disable(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - chv_phy_post_pll_disable(encoder); + chv_phy_post_pll_disable(encoder, old_crtc_state); } static void vlv_hdmi_post_disable(struct intel_encoder *encoder, @@ -1731,7 +1730,7 @@ static void vlv_hdmi_post_disable(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { /* Reset lanes to avoid HDMI flicker (VLV w/a) */ - vlv_phy_reset_lanes(encoder); + vlv_phy_reset_lanes(encoder, old_crtc_state); } static void chv_hdmi_post_disable(struct intel_encoder *encoder, @@ -1744,7 +1743,7 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder, mutex_lock(&dev_priv->sb_lock); /* Assert data lane reset */ - chv_data_lane_soft_reset(encoder, true); + chv_data_lane_soft_reset(encoder, old_crtc_state, true); mutex_unlock(&dev_priv->sb_lock); } @@ -1757,7 +1756,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - chv_phy_pre_encoder_enable(encoder); + chv_phy_pre_encoder_enable(encoder, pipe_config); /* FIXME: Program the support xxx V-dB */ /* Use 800mV-0dB */ From c249f1f423eac64b7aaeaa369935e9c1e6c9f4d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 Oct 2017 22:51:19 +0200 Subject: [PATCH 117/260] drm/i915: Eliminate crtc->config usage from CRT code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace crtc->config usage with the passed down crtc state. Also take the opportunity for some s/pipe_config/crtc_state/ while at it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171031205123.13123-7-ville.syrjala@linux.intel.com Acked-by: Jani Nikula Reviewed-by: Dhinakaran Pandiyan --- drivers/gpu/drm/i915/intel_crt.c | 44 ++++++++++++++------------------ 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 9c000ac612da..9f31aea51dff 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -219,11 +219,9 @@ static void hsw_disable_crt(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct drm_crtc *crtc = old_crtc_state->base.crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - WARN_ON(!intel_crtc->config->has_pch_encoder); + WARN_ON(!old_crtc_state->has_pch_encoder); intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); } @@ -247,46 +245,42 @@ static void hsw_post_disable_crt(struct intel_encoder *encoder, } static void hsw_pre_pll_enable_crt(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, + const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct drm_crtc *crtc = pipe_config->base.crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - WARN_ON(!intel_crtc->config->has_pch_encoder); + WARN_ON(!crtc_state->has_pch_encoder); intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); } static void hsw_pre_enable_crt(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, + const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct drm_crtc *crtc = pipe_config->base.crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum pipe pipe = crtc->pipe; - WARN_ON(!intel_crtc->config->has_pch_encoder); + WARN_ON(!crtc_state->has_pch_encoder); intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); - dev_priv->display.fdi_link_train(intel_crtc, pipe_config); + dev_priv->display.fdi_link_train(crtc, crtc_state); } static void hsw_enable_crt(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, + const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct drm_crtc *crtc = pipe_config->base.crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum pipe pipe = crtc->pipe; - WARN_ON(!intel_crtc->config->has_pch_encoder); + WARN_ON(!crtc_state->has_pch_encoder); - intel_crt_set_dpms(encoder, pipe_config, DRM_MODE_DPMS_ON); + intel_crt_set_dpms(encoder, crtc_state, DRM_MODE_DPMS_ON); intel_wait_for_vblank(dev_priv, pipe); intel_wait_for_vblank(dev_priv, pipe); @@ -295,10 +289,10 @@ static void hsw_enable_crt(struct intel_encoder *encoder, } static void intel_enable_crt(struct intel_encoder *encoder, - const struct intel_crtc_state *pipe_config, + const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - intel_crt_set_dpms(encoder, pipe_config, DRM_MODE_DPMS_ON); + intel_crt_set_dpms(encoder, crtc_state, DRM_MODE_DPMS_ON); } static enum drm_mode_status From 005b5bc694cf7e0a899fd5d02a06459df191e1ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 Oct 2017 22:51:20 +0200 Subject: [PATCH 118/260] drm/i915: Replace dig_port->port with encoder port for BXT DPLL selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace dig_port->port with encoder->port in the BXT DPLL selection. We can do this because both the master encoder and the fake MST encoders have the same encoder->port value, whereas using dig_port->port only worked for the master encoder since the fake encoders were't derived from intel_digital_port. This eliminates the DP MST special case. Do this by hand because spatch is having problems with the control flow due to the dig_port assignment happening in two different branches. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171031205123.13123-8-ville.syrjala@linux.intel.com Acked-by: Jani Nikula Reviewed-by: Dhinakaran Pandiyan --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index a83bf1c38e05..be74d4767c8a 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -1802,7 +1802,6 @@ bxt_get_dpll(struct intel_crtc *crtc, { struct intel_dpll_hw_state dpll_hw_state = { }; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_digital_port *intel_dig_port; struct intel_shared_dpll *pll; int i, clock = crtc_state->port_clock; @@ -1820,15 +1819,8 @@ bxt_get_dpll(struct intel_crtc *crtc, crtc_state->dpll_hw_state = dpll_hw_state; - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); - - intel_dig_port = intel_mst->primary; - } else - intel_dig_port = enc_to_dig_port(&encoder->base); - /* 1:1 mapping between ports and PLLs */ - i = (enum intel_dpll_id) intel_dig_port->port; + i = (enum intel_dpll_id) encoder->port; pll = intel_get_shared_dpll_by_id(dev_priv, i); DRM_DEBUG_KMS("[CRTC:%d:%s] using pre-allocated %s\n", From 8f4f279707236e273cd50b095a06797fe0ed6ccc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 9 Nov 2017 17:24:34 +0200 Subject: [PATCH 119/260] drm/i915: Nuke intel_digital_port->port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove intel_digital_port->port and replace its users with intel_encoder->port. intel_encoder->port is a superset of intel_digital_port->port, and it works correctly even for MST encoders. v2: Eliminate a few dp_to_dig_port()->base.port cases too (DK) Performed with cocci: @@ @@ struct intel_digital_port { ... - enum port port; ... } @@ struct intel_digital_port *D; expression E; @@ - D->port = E; @@ struct intel_digital_port *D; @@ - D->port + D->base.port @ expression E; @@ ( - dp_to_dig_port(E)->port + dp_to_dig_port(E)->base.port | - enc_to_dig_port(E)->port + to_intel_encoder(E)->port ) @@ expression E; @@ - to_intel_encoder(&E->base) + E @@ struct intel_digital_port *D; identifier I, M; @@ I = &D->base <... ( - D->base.M + I->M | - &D->base + I ) ...> @@ identifier D; expression E; identifier M; @@ D = enc_to_dig_port(&E->base) <... ( - D->base.M + E->M | - &D->base + E ) ...> @@ identifier D, DP; expression E; identifier M; @@ DP = enc_to_intel_dp(&E->base) <... ( - dp_to_dig_port(DP)->base.M + E->M | - &dp_to_dig_port(DP)->base + E ) ...> @@ expression E; identifier M; @@ ( - enc_to_dig_port(&E->base)->base.M + E->M | - enc_to_dig_port(&E->base)->base + E | - enc_to_mst(&E->base)->primary->base.port + E->port ) @@ expression E; identifier D; @@ - struct intel_digital_port *D = E; ... when != D Cc: Dhinakaran Pandiyan Reviewed-by: Dhinakaran Pandiyan Acked-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171109152434.32074-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/intel_ddi.c | 13 ++-- drivers/gpu/drm/i915/intel_display.c | 13 ++-- drivers/gpu/drm/i915/intel_dp.c | 103 +++++++++++++------------- drivers/gpu/drm/i915/intel_dp_mst.c | 6 +- drivers/gpu/drm/i915/intel_dpio_phy.c | 12 ++- drivers/gpu/drm/i915/intel_drv.h | 5 +- drivers/gpu/drm/i915/intel_hdmi.c | 17 ++--- drivers/gpu/drm/i915/intel_pipe_crc.c | 4 +- drivers/gpu/drm/i915/intel_psr.c | 4 +- 10 files changed, 87 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 39883cd915db..d89321f0468c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3603,7 +3603,7 @@ static int i915_dp_mst_info(struct seq_file *m, void *unused) continue; seq_printf(m, "MST Source Port %c\n", - port_name(intel_dig_port->port)); + port_name(intel_dig_port->base.port)); drm_dp_mst_dump_topology(m, &intel_dig_port->dp.mst_mgr); } drm_connector_list_iter_end(&conn_iter); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ace674cd79b9..65ab55496ab7 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1801,8 +1801,8 @@ static void skl_ddi_set_iboost(struct intel_encoder *encoder, int level, enum intel_output_type type) { struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); - struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - enum port port = intel_dig_port->port; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; uint8_t iboost; if (type == INTEL_OUTPUT_HDMI) @@ -2470,7 +2470,7 @@ void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; uint32_t val; bool wait = false; @@ -2698,7 +2698,7 @@ static struct intel_connector * intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port) { struct intel_connector *connector; - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; connector = intel_connector_alloc(); if (!connector) @@ -2717,7 +2717,7 @@ static struct intel_connector * intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) { struct intel_connector *connector; - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; connector = intel_connector_alloc(); if (!connector) @@ -2733,7 +2733,7 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dport) { struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); - if (dport->port != PORT_A) + if (dport->base.port != PORT_A) return false; if (dport->saved_port_bits & DDI_A_4_LANES) @@ -2836,7 +2836,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->suspend = intel_dp_encoder_suspend; intel_encoder->get_power_domains = intel_ddi_get_power_domains; - intel_dig_port->port = port; intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 84817ccc5305..435dc18aa239 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1701,7 +1701,7 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, u32 port_mask; i915_reg_t dpll_reg; - switch (dport->port) { + switch (dport->base.port) { case PORT_B: port_mask = DPLL_PORTB_READY_MASK; dpll_reg = DPLL(0); @@ -1723,7 +1723,8 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, dpll_reg, port_mask, expected_mask, 1000)) WARN(1, "timed out waiting for port %c ready: got 0x%x, expected 0x%x\n", - port_name(dport->port), I915_READ(dpll_reg) & port_mask, expected_mask); + port_name(dport->base.port), + I915_READ(dpll_reg) & port_mask, expected_mask); } static void ironlake_enable_pch_transcoder(struct drm_i915_private *dev_priv, @@ -4481,7 +4482,7 @@ intel_trans_dp_port_sel(struct intel_crtc *crtc) for_each_encoder_on_crtc(dev, &crtc->base, encoder) { if (encoder->type == INTEL_OUTPUT_DP || encoder->type == INTEL_OUTPUT_EDP) - return enc_to_dig_port(&encoder->base)->port; + return encoder->port; } return -1; @@ -7632,7 +7633,7 @@ static void ironlake_init_pch_refclk(struct drm_i915_private *dev_priv) break; case INTEL_OUTPUT_EDP: has_panel = true; - if (enc_to_dig_port(&encoder->base)->port == PORT_A) + if (encoder->port == PORT_A) has_cpu_edp = true; break; default: @@ -10755,7 +10756,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) case INTEL_OUTPUT_DP: case INTEL_OUTPUT_HDMI: case INTEL_OUTPUT_EDP: - port_mask = 1 << enc_to_dig_port(&encoder->base)->port; + port_mask = 1 << encoder->port; /* the same port mustn't appear more than once */ if (used_ports & port_mask) @@ -10765,7 +10766,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) break; case INTEL_OUTPUT_DP_MST: used_mst_ports |= - 1 << enc_to_mst(&encoder->base)->primary->port; + 1 << encoder->port; break; default: break; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0151c8d18b79..9b0c6dea40bc 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -223,7 +223,7 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum port port = dig_port->port; + enum port port = dig_port->base.port; const int *source_rates; int size; u32 voltage; @@ -479,11 +479,11 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) if (WARN(I915_READ(intel_dp->output_reg) & DP_PORT_EN, "skipping pipe %c power seqeuncer kick due to port %c being active\n", - pipe_name(pipe), port_name(intel_dig_port->port))) + pipe_name(pipe), port_name(intel_dig_port->base.port))) return; DRM_DEBUG_KMS("kicking pipe %c power sequencer for port %c\n", - pipe_name(pipe), port_name(intel_dig_port->port)); + pipe_name(pipe), port_name(intel_dig_port->base.port)); /* Preserve the BIOS-computed detected bit. This is * supposed to be read-only. @@ -610,7 +610,7 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp) DRM_DEBUG_KMS("picked pipe %c power sequencer for port %c\n", pipe_name(intel_dp->pps_pipe), - port_name(intel_dig_port->port)); + port_name(intel_dig_port->base.port)); /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(dev, intel_dp); @@ -706,7 +706,7 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; lockdep_assert_held(&dev_priv->pps_mutex); @@ -966,7 +966,7 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * like to run at 2MHz. So, take the cdclk or PCH rawclk value and * divide by 2000 and use that */ - if (intel_dig_port->port == PORT_A) + if (intel_dig_port->base.port == PORT_A) return DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.cdclk, 2000); else return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); @@ -977,7 +977,7 @@ static uint32_t hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - if (intel_dig_port->port != PORT_A && HAS_PCH_LPT_H(dev_priv)) { + if (intel_dig_port->base.port != PORT_A && HAS_PCH_LPT_H(dev_priv)) { /* Workaround for non-ULT HSW */ switch (index) { case 0: return 63; @@ -1447,7 +1447,7 @@ static void intel_aux_reg_init(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); enum port port = intel_aux_port(dev_priv, - dp_to_dig_port(intel_dp)->port); + dp_to_dig_port(intel_dp)->base.port); int i; intel_dp->aux_ch_ctl_reg = intel_aux_ctl_reg(dev_priv, port); @@ -1465,7 +1465,7 @@ static void intel_dp_aux_init(struct intel_dp *intel_dp) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; intel_aux_reg_init(intel_dp); drm_dp_aux_init(&intel_dp->aux); @@ -1635,7 +1635,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = encoder->port; struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_connector *intel_connector = intel_dp->attached_connector; struct intel_digital_connector_state *intel_conn_state = @@ -1859,7 +1859,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = encoder->port; struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; @@ -2074,7 +2074,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) intel_display_power_get(dev_priv, intel_dp->aux_power_domain); DRM_DEBUG_KMS("Turning eDP port %c VDD on\n", - port_name(intel_dig_port->port)); + port_name(intel_dig_port->base.port)); if (!edp_have_panel_power(intel_dp)) wait_panel_power_cycle(intel_dp); @@ -2094,7 +2094,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) */ if (!edp_have_panel_power(intel_dp)) { DRM_DEBUG_KMS("eDP port %c panel power wasn't enabled\n", - port_name(intel_dig_port->port)); + port_name(intel_dig_port->base.port)); msleep(intel_dp->panel_power_up_delay); } @@ -2120,7 +2120,7 @@ void intel_edp_panel_vdd_on(struct intel_dp *intel_dp) pps_unlock(intel_dp); I915_STATE_WARN(!vdd, "eDP port %c VDD already requested on\n", - port_name(dp_to_dig_port(intel_dp)->port)); + port_name(dp_to_dig_port(intel_dp)->base.port)); } static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) @@ -2140,7 +2140,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) return; DRM_DEBUG_KMS("Turning eDP port %c VDD off\n", - port_name(intel_dig_port->port)); + port_name(intel_dig_port->base.port)); pp = ironlake_get_pp_control(intel_dp); pp &= ~EDP_FORCE_VDD; @@ -2200,7 +2200,7 @@ static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync) return; I915_STATE_WARN(!intel_dp->want_panel_vdd, "eDP port %c VDD not forced on", - port_name(dp_to_dig_port(intel_dp)->port)); + port_name(dp_to_dig_port(intel_dp)->base.port)); intel_dp->want_panel_vdd = false; @@ -2223,11 +2223,11 @@ static void edp_panel_on(struct intel_dp *intel_dp) return; DRM_DEBUG_KMS("Turn eDP port %c panel power on\n", - port_name(dp_to_dig_port(intel_dp)->port)); + port_name(dp_to_dig_port(intel_dp)->base.port)); if (WARN(edp_have_panel_power(intel_dp), "eDP port %c panel power already on\n", - port_name(dp_to_dig_port(intel_dp)->port))) + port_name(dp_to_dig_port(intel_dp)->base.port))) return; wait_panel_power_cycle(intel_dp); @@ -2282,10 +2282,10 @@ static void edp_panel_off(struct intel_dp *intel_dp) return; DRM_DEBUG_KMS("Turn eDP port %c panel power off\n", - port_name(dp_to_dig_port(intel_dp)->port)); + port_name(dp_to_dig_port(intel_dp)->base.port)); WARN(!intel_dp->want_panel_vdd, "Need eDP port %c VDD to turn off panel\n", - port_name(dp_to_dig_port(intel_dp)->port)); + port_name(dp_to_dig_port(intel_dp)->base.port)); pp = ironlake_get_pp_control(intel_dp); /* We need to switch off panel power _and_ force vdd, for otherwise some @@ -2437,7 +2437,7 @@ static void assert_dp_port(struct intel_dp *intel_dp, bool state) I915_STATE_WARN(cur_state != state, "DP port %c state assertion failure (expected %s, current %s)\n", - port_name(dig_port->port), + port_name(dig_port->base.port), onoff(state), onoff(cur_state)); } #define assert_dp_port_disabled(d) assert_dp_port((d), false) @@ -2570,7 +2570,7 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = encoder->port; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); u32 tmp; @@ -2625,7 +2625,7 @@ static void intel_dp_get_config(struct intel_encoder *encoder, u32 tmp, flags = 0; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = encoder->port; struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); if (encoder->type == INTEL_OUTPUT_EDP) @@ -2794,7 +2794,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; if (dp_train_pat & DP_TRAINING_PATTERN_MASK) DRM_DEBUG_KMS("Using DP training pattern TPS%d\n", @@ -2969,7 +2969,7 @@ static void g4x_pre_enable_dp(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = encoder->port; intel_dp_prepare(encoder, pipe_config); @@ -3002,7 +3002,7 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) * from a port. */ DRM_DEBUG_KMS("detaching pipe %c power sequencer from port %c\n", - pipe_name(pipe), port_name(intel_dig_port->port)); + pipe_name(pipe), port_name(intel_dig_port->base.port)); I915_WRITE(pp_on_reg, 0); POSTING_READ(pp_on_reg); @@ -3026,7 +3026,7 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, continue; intel_dp = enc_to_intel_dp(&encoder->base); - port = dp_to_dig_port(intel_dp)->port; + port = dp_to_dig_port(intel_dp)->base.port; WARN(intel_dp->active_pipe == pipe, "stealing pipe %c power sequencer from active (e)DP port %c\n", @@ -3178,7 +3178,7 @@ uint8_t intel_dp_voltage_max(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = dp_to_dig_port(intel_dp)->base.port; if (INTEL_GEN(dev_priv) >= 9) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; @@ -3197,7 +3197,7 @@ uint8_t intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = dp_to_dig_port(intel_dp)->base.port; if (INTEL_GEN(dev_priv) >= 9) { switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { @@ -3531,7 +3531,7 @@ void intel_dp_set_signal_levels(struct intel_dp *intel_dp) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); uint32_t signal_levels, mask = 0; @@ -3591,7 +3591,7 @@ void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; uint32_t val; if (!HAS_DDI(dev_priv)) @@ -4490,7 +4490,7 @@ static bool ibx_digital_port_connected(struct drm_i915_private *dev_priv, { u32 bit; - switch (port->port) { + switch (port->base.port) { case PORT_B: bit = SDE_PORTB_HOTPLUG; break; @@ -4501,7 +4501,7 @@ static bool ibx_digital_port_connected(struct drm_i915_private *dev_priv, bit = SDE_PORTD_HOTPLUG; break; default: - MISSING_CASE(port->port); + MISSING_CASE(port->base.port); return false; } @@ -4513,7 +4513,7 @@ static bool cpt_digital_port_connected(struct drm_i915_private *dev_priv, { u32 bit; - switch (port->port) { + switch (port->base.port) { case PORT_B: bit = SDE_PORTB_HOTPLUG_CPT; break; @@ -4524,7 +4524,7 @@ static bool cpt_digital_port_connected(struct drm_i915_private *dev_priv, bit = SDE_PORTD_HOTPLUG_CPT; break; default: - MISSING_CASE(port->port); + MISSING_CASE(port->base.port); return false; } @@ -4536,7 +4536,7 @@ static bool spt_digital_port_connected(struct drm_i915_private *dev_priv, { u32 bit; - switch (port->port) { + switch (port->base.port) { case PORT_A: bit = SDE_PORTA_HOTPLUG_SPT; break; @@ -4555,7 +4555,7 @@ static bool g4x_digital_port_connected(struct drm_i915_private *dev_priv, { u32 bit; - switch (port->port) { + switch (port->base.port) { case PORT_B: bit = PORTB_HOTPLUG_LIVE_STATUS_G4X; break; @@ -4566,7 +4566,7 @@ static bool g4x_digital_port_connected(struct drm_i915_private *dev_priv, bit = PORTD_HOTPLUG_LIVE_STATUS_G4X; break; default: - MISSING_CASE(port->port); + MISSING_CASE(port->base.port); return false; } @@ -4578,7 +4578,7 @@ static bool gm45_digital_port_connected(struct drm_i915_private *dev_priv, { u32 bit; - switch (port->port) { + switch (port->base.port) { case PORT_B: bit = PORTB_HOTPLUG_LIVE_STATUS_GM45; break; @@ -4589,7 +4589,7 @@ static bool gm45_digital_port_connected(struct drm_i915_private *dev_priv, bit = PORTD_HOTPLUG_LIVE_STATUS_GM45; break; default: - MISSING_CASE(port->port); + MISSING_CASE(port->base.port); return false; } @@ -4599,7 +4599,7 @@ static bool gm45_digital_port_connected(struct drm_i915_private *dev_priv, static bool ilk_digital_port_connected(struct drm_i915_private *dev_priv, struct intel_digital_port *port) { - if (port->port == PORT_A) + if (port->base.port == PORT_A) return I915_READ(DEISR) & DE_DP_A_HOTPLUG; else return ibx_digital_port_connected(dev_priv, port); @@ -4608,7 +4608,7 @@ static bool ilk_digital_port_connected(struct drm_i915_private *dev_priv, static bool snb_digital_port_connected(struct drm_i915_private *dev_priv, struct intel_digital_port *port) { - if (port->port == PORT_A) + if (port->base.port == PORT_A) return I915_READ(DEISR) & DE_DP_A_HOTPLUG; else return cpt_digital_port_connected(dev_priv, port); @@ -4617,7 +4617,7 @@ static bool snb_digital_port_connected(struct drm_i915_private *dev_priv, static bool ivb_digital_port_connected(struct drm_i915_private *dev_priv, struct intel_digital_port *port) { - if (port->port == PORT_A) + if (port->base.port == PORT_A) return I915_READ(DEISR) & DE_DP_A_HOTPLUG_IVB; else return cpt_digital_port_connected(dev_priv, port); @@ -4626,7 +4626,7 @@ static bool ivb_digital_port_connected(struct drm_i915_private *dev_priv, static bool bdw_digital_port_connected(struct drm_i915_private *dev_priv, struct intel_digital_port *port) { - if (port->port == PORT_A) + if (port->base.port == PORT_A) return I915_READ(GEN8_DE_PORT_ISR) & GEN8_PORT_DP_A_HOTPLUG; else return cpt_digital_port_connected(dev_priv, port); @@ -5109,12 +5109,12 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) * "vdd off -> long hpd -> vdd on -> detect -> vdd off -> ..." */ DRM_DEBUG_KMS("ignoring long hpd on eDP port %c\n", - port_name(intel_dig_port->port)); + port_name(intel_dig_port->base.port)); return IRQ_HANDLED; } DRM_DEBUG_KMS("got hpd irq on port %c - %s\n", - port_name(intel_dig_port->port), + port_name(intel_dig_port->base.port), long_hpd ? "long" : "short"); if (long_hpd) { @@ -5364,7 +5364,7 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, u32 pp_on, pp_off, pp_div, port_sel = 0; int div = dev_priv->rawclk_freq / 1000; struct pps_registers regs; - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = dp_to_dig_port(intel_dp)->base.port; const struct edp_power_seq *seq = &intel_dp->pps_delays; lockdep_assert_held(&dev_priv->pps_mutex); @@ -5965,9 +5965,9 @@ intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) struct intel_encoder *encoder = &intel_dig_port->base; struct intel_dp *intel_dp = &intel_dig_port->dp; - encoder->hpd_pin = intel_hpd_pin(intel_dig_port->port); + encoder->hpd_pin = intel_hpd_pin(encoder->port); - switch (intel_dig_port->port) { + switch (encoder->port) { case PORT_A: intel_dp->aux_power_domain = POWER_DOMAIN_AUX_A; break; @@ -5985,7 +5985,7 @@ intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; break; default: - MISSING_CASE(intel_dig_port->port); + MISSING_CASE(encoder->port); } } @@ -6021,7 +6021,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_dig_port->port; + enum port port = intel_encoder->port; int type; /* Initialize the work for modeset in case of link train failure */ @@ -6191,7 +6191,6 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder->disable = g4x_disable_dp; } - intel_dig_port->port = port; intel_dig_port->dp.output_reg = output_reg; intel_dig_port->max_lanes = 4; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 653ca39789b8..c3de0918ee13 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -209,7 +209,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; struct intel_connector *connector = to_intel_connector(conn_state->connector); int ret; @@ -253,7 +253,7 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder, struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; int ret; DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); @@ -552,7 +552,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->type = INTEL_OUTPUT_DP_MST; intel_encoder->power_domain = intel_dig_port->base.power_domain; - intel_encoder->port = intel_dig_port->port; + intel_encoder->port = intel_dig_port->base.port; intel_encoder->crtc_mask = 0x7; intel_encoder->cloneable = 0; diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 5958d3d8b90e..76473e9836c6 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -586,9 +586,8 @@ bxt_ddi_phy_calc_lane_lat_optim_mask(uint8_t lane_count) void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, uint8_t lane_lat_optim_mask) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); - enum port port = dport->port; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; enum dpio_phy phy; enum dpio_channel ch; int lane; @@ -613,9 +612,8 @@ void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, uint8_t bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); - enum port port = dport->port; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; enum dpio_phy phy; enum dpio_channel ch; int lane; @@ -641,7 +639,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct intel_crtc *intel_crtc = to_intel_crtc(dport->base.base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); enum dpio_channel ch = vlv_dport_to_channel(dport); enum pipe pipe = intel_crtc->pipe; u32 val; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 00b488688042..a0a5cead868c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1056,7 +1056,6 @@ struct intel_lspcon { struct intel_digital_port { struct intel_encoder base; - enum port port; u32 saved_port_bits; struct intel_dp dp; struct intel_hdmi hdmi; @@ -1088,7 +1087,7 @@ struct intel_dp_mst_encoder { static inline enum dpio_channel vlv_dport_to_channel(struct intel_digital_port *dport) { - switch (dport->port) { + switch (dport->base.port) { case PORT_B: case PORT_D: return DPIO_CH0; @@ -1102,7 +1101,7 @@ vlv_dport_to_channel(struct intel_digital_port *dport) static inline enum dpio_phy vlv_dport_to_phy(struct intel_digital_port *dport) { - switch (dport->port) { + switch (dport->base.port) { case PORT_B: case PORT_C: return DPIO_PHY0; diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 1f2258dec09e..2d95db64cdf2 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -186,7 +186,7 @@ static bool g4x_infoframe_enabled(struct drm_encoder *encoder, if ((val & VIDEO_DIP_ENABLE) == 0) return false; - if ((val & VIDEO_DIP_PORT_MASK) != VIDEO_DIP_PORT(intel_dig_port->port)) + if ((val & VIDEO_DIP_PORT_MASK) != VIDEO_DIP_PORT(intel_dig_port->base.port)) return false; return val & (VIDEO_DIP_ENABLE_AVI | @@ -245,7 +245,7 @@ static bool ibx_infoframe_enabled(struct drm_encoder *encoder, if ((val & VIDEO_DIP_ENABLE) == 0) return false; - if ((val & VIDEO_DIP_PORT_MASK) != VIDEO_DIP_PORT(intel_dig_port->port)) + if ((val & VIDEO_DIP_PORT_MASK) != VIDEO_DIP_PORT(intel_dig_port->base.port)) return false; return val & (VIDEO_DIP_ENABLE_AVI | @@ -362,7 +362,7 @@ static bool vlv_infoframe_enabled(struct drm_encoder *encoder, if ((val & VIDEO_DIP_ENABLE) == 0) return false; - if ((val & VIDEO_DIP_PORT_MASK) != VIDEO_DIP_PORT(intel_dig_port->port)) + if ((val & VIDEO_DIP_PORT_MASK) != VIDEO_DIP_PORT(intel_dig_port->base.port)) return false; return val & (VIDEO_DIP_ENABLE_AVI | @@ -535,7 +535,7 @@ static void g4x_set_infoframes(struct drm_encoder *encoder, struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; i915_reg_t reg = VIDEO_DIP_CTL; u32 val = I915_READ(reg); - u32 port = VIDEO_DIP_PORT(intel_dig_port->port); + u32 port = VIDEO_DIP_PORT(intel_dig_port->base.port); assert_hdmi_port_disabled(intel_hdmi); @@ -686,7 +686,7 @@ static void ibx_set_infoframes(struct drm_encoder *encoder, struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); - u32 port = VIDEO_DIP_PORT(intel_dig_port->port); + u32 port = VIDEO_DIP_PORT(intel_dig_port->base.port); assert_hdmi_port_disabled(intel_hdmi); @@ -782,7 +782,7 @@ static void vlv_set_infoframes(struct drm_encoder *encoder, struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); - u32 port = VIDEO_DIP_PORT(intel_dig_port->port); + u32 port = VIDEO_DIP_PORT(intel_dig_port->base.port); assert_hdmi_port_disabled(intel_hdmi); @@ -1545,7 +1545,7 @@ intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector, bool has_edid) { struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_hdmi *hdmi = intel_attached_hdmi(connector); - enum port port = hdmi_to_dig_port(hdmi)->port; + enum port port = hdmi_to_dig_port(hdmi)->base.port; struct i2c_adapter *adapter = intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus); enum drm_dp_dual_mode_type type = drm_dp_dual_mode_detect(adapter); @@ -2015,7 +2015,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_dig_port->port; + enum port port = intel_encoder->port; DRM_DEBUG_KMS("Adding HDMI connector on port %c\n", port_name(port)); @@ -2135,7 +2135,6 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, if (IS_G4X(dev_priv)) intel_encoder->cloneable |= 1 << INTEL_OUTPUT_HDMI; - intel_dig_port->port = port; intel_dig_port->hdmi.hdmi_reg = hdmi_reg; intel_dig_port->dp.output_reg = INVALID_MMIO_REG; intel_dig_port->max_lanes = 4; diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 899839f2f7c6..61641d479b93 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -269,7 +269,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_i915_private *dev_priv, case INTEL_OUTPUT_DP: case INTEL_OUTPUT_EDP: dig_port = enc_to_dig_port(&encoder->base); - switch (dig_port->port) { + switch (dig_port->base.port) { case PORT_B: *source = INTEL_PIPE_CRC_SOURCE_DP_B; break; @@ -281,7 +281,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_i915_private *dev_priv, break; default: WARN(1, "nonexisting DP port %c\n", - port_name(dig_port->port)); + port_name(dig_port->base.port)); break; } break; diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 6e3b430fccdc..a1ad85fa5c1a 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -163,7 +163,7 @@ static void hsw_psr_enable_sink(struct intel_dp *intel_dp) [3] = 1 - 1, [4] = DP_SET_POWER_D0, }; - enum port port = dig_port->port; + enum port port = dig_port->base.port; u32 aux_ctl; int i; @@ -376,7 +376,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, * ones. Since by Display design transcoder EDP is tied to port A * we can safely escape based on the port A. */ - if (HAS_DDI(dev_priv) && dig_port->port != PORT_A) { + if (HAS_DDI(dev_priv) && dig_port->base.port != PORT_A) { DRM_DEBUG_KMS("PSR condition failed: Port not supported\n"); return; } From 46bd8383d8c940516196f9c28422e127f3b01dc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 Oct 2017 22:51:22 +0200 Subject: [PATCH 120/260] drm/i915: Clean up PPS code calling conventions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to pass 'dev' or 'dev_priv' when the function already takes 'intel_dp'. Also let's prefer passing 'dev_priv' instead of 'dev' when we have to pass one or the other. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171031205123.13123-10-ville.syrjala@linux.intel.com Acked-by: Jani Nikula Reviewed-by: Dhinakaran Pandiyan --- drivers/gpu/drm/i915/intel_dp.c | 99 ++++++++++++++------------------- 1 file changed, 42 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 9b0c6dea40bc..37bfe8b27221 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -135,7 +135,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp); static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync); static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); -static void vlv_steal_power_sequencer(struct drm_device *dev, +static void vlv_steal_power_sequencer(struct drm_i915_private *dev_priv, enum pipe pipe); static void intel_dp_unset_edid(struct intel_dp *intel_dp); @@ -429,14 +429,12 @@ static void intel_dp_unpack_aux(uint32_t src, uint8_t *dst, int dst_bytes) } static void -intel_dp_init_panel_power_sequencer(struct drm_device *dev, - struct intel_dp *intel_dp); +intel_dp_init_panel_power_sequencer(struct intel_dp *intel_dp); static void -intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, - struct intel_dp *intel_dp, +intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp, bool force_disable_vdd); static void -intel_dp_pps_init(struct drm_device *dev, struct intel_dp *intel_dp); +intel_dp_pps_init(struct intel_dp *intel_dp); static void pps_lock(struct intel_dp *intel_dp) { @@ -580,9 +578,8 @@ static enum pipe vlv_find_free_pps(struct drm_i915_private *dev_priv) static enum pipe vlv_power_sequencer_pipe(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe; lockdep_assert_held(&dev_priv->pps_mutex); @@ -605,7 +602,7 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp) if (WARN_ON(pipe == INVALID_PIPE)) pipe = PIPE_A; - vlv_steal_power_sequencer(dev, pipe); + vlv_steal_power_sequencer(dev_priv, pipe); intel_dp->pps_pipe = pipe; DRM_DEBUG_KMS("picked pipe %c power sequencer for port %c\n", @@ -613,8 +610,8 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp) port_name(intel_dig_port->base.port)); /* init power sequencer on this pipe and port */ - intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, true); + intel_dp_init_panel_power_sequencer(intel_dp); + intel_dp_init_panel_power_sequencer_registers(intel_dp, true); /* * Even vdd force doesn't work until we've made @@ -628,9 +625,7 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp) static int bxt_power_sequencer_idx(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); lockdep_assert_held(&dev_priv->pps_mutex); @@ -651,7 +646,7 @@ bxt_power_sequencer_idx(struct intel_dp *intel_dp) * Only the HW needs to be reprogrammed, the SW state is fixed and * has been setup during connector init. */ - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); + intel_dp_init_panel_power_sequencer_registers(intel_dp, false); return 0; } @@ -703,9 +698,8 @@ vlv_initial_pps_pipe(struct drm_i915_private *dev_priv, static void vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); enum port port = intel_dig_port->base.port; lockdep_assert_held(&dev_priv->pps_mutex); @@ -733,8 +727,8 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) DRM_DEBUG_KMS("initial power sequencer for port %c: pipe %c\n", port_name(port), pipe_name(intel_dp->pps_pipe)); - intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); + intel_dp_init_panel_power_sequencer(intel_dp); + intel_dp_init_panel_power_sequencer_registers(intel_dp, false); } void intel_power_sequencer_reset(struct drm_i915_private *dev_priv) @@ -790,10 +784,10 @@ struct pps_registers { i915_reg_t pp_div; }; -static void intel_pps_get_registers(struct drm_i915_private *dev_priv, - struct intel_dp *intel_dp, +static void intel_pps_get_registers(struct intel_dp *intel_dp, struct pps_registers *regs) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); int pps_idx = 0; memset(regs, 0, sizeof(*regs)); @@ -816,8 +810,7 @@ _pp_ctrl_reg(struct intel_dp *intel_dp) { struct pps_registers regs; - intel_pps_get_registers(to_i915(intel_dp_to_dev(intel_dp)), intel_dp, - ®s); + intel_pps_get_registers(intel_dp, ®s); return regs.pp_ctrl; } @@ -827,8 +820,7 @@ _pp_stat_reg(struct intel_dp *intel_dp) { struct pps_registers regs; - intel_pps_get_registers(to_i915(intel_dp_to_dev(intel_dp)), intel_dp, - ®s); + intel_pps_get_registers(intel_dp, ®s); return regs.pp_stat; } @@ -1947,8 +1939,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder, #define IDLE_CYCLE_MASK (PP_ON | PP_SEQUENCE_MASK | PP_CYCLE_DELAY_ACTIVE | PP_SEQUENCE_STATE_MASK) #define IDLE_CYCLE_VALUE (0 | PP_SEQUENCE_NONE | 0 | PP_SEQUENCE_STATE_OFF_IDLE) -static void intel_pps_verify_state(struct drm_i915_private *dev_priv, - struct intel_dp *intel_dp); +static void intel_pps_verify_state(struct intel_dp *intel_dp); static void wait_panel_status(struct intel_dp *intel_dp, u32 mask, @@ -1960,7 +1951,7 @@ static void wait_panel_status(struct intel_dp *intel_dp, lockdep_assert_held(&dev_priv->pps_mutex); - intel_pps_verify_state(dev_priv, intel_dp); + intel_pps_verify_state(intel_dp); pp_stat_reg = _pp_stat_reg(intel_dp); pp_ctrl_reg = _pp_ctrl_reg(intel_dp); @@ -3009,15 +3000,14 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) intel_dp->pps_pipe = INVALID_PIPE; } -static void vlv_steal_power_sequencer(struct drm_device *dev, +static void vlv_steal_power_sequencer(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_encoder *encoder; lockdep_assert_held(&dev_priv->pps_mutex); - for_each_intel_encoder(dev, encoder) { + for_each_intel_encoder(&dev_priv->drm, encoder) { struct intel_dp *intel_dp; enum port port; @@ -3046,9 +3036,8 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); lockdep_assert_held(&dev_priv->pps_mutex); @@ -3069,7 +3058,7 @@ static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, * We may be stealing the power * sequencer from another port. */ - vlv_steal_power_sequencer(dev, crtc->pipe); + vlv_steal_power_sequencer(dev_priv, crtc->pipe); intel_dp->active_pipe = crtc->pipe; @@ -3083,8 +3072,8 @@ static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, pipe_name(intel_dp->pps_pipe), port_name(encoder->port)); /* init power sequencer on this pipe and port */ - intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, true); + intel_dp_init_panel_power_sequencer(intel_dp); + intel_dp_init_panel_power_sequencer_registers(intel_dp, true); } static void vlv_pre_enable_dp(struct intel_encoder *encoder, @@ -5062,7 +5051,7 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) if (intel_dp_is_edp(intel_dp)) { /* Reinit the power sequencer, in case BIOS did something with it. */ - intel_dp_pps_init(encoder->dev, intel_dp); + intel_dp_pps_init(intel_dp); intel_edp_panel_vdd_sanitize(intel_dp); } @@ -5202,13 +5191,13 @@ static void intel_dp_init_panel_power_timestamps(struct intel_dp *intel_dp) } static void -intel_pps_readout_hw_state(struct drm_i915_private *dev_priv, - struct intel_dp *intel_dp, struct edp_power_seq *seq) +intel_pps_readout_hw_state(struct intel_dp *intel_dp, struct edp_power_seq *seq) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 pp_on, pp_off, pp_div = 0, pp_ctl = 0; struct pps_registers regs; - intel_pps_get_registers(dev_priv, intel_dp, ®s); + intel_pps_get_registers(intel_dp, ®s); /* Workaround: Need to write PP_CONTROL with the unlock key as * the very first thing. */ @@ -5252,13 +5241,12 @@ intel_pps_dump_state(const char *state_name, const struct edp_power_seq *seq) } static void -intel_pps_verify_state(struct drm_i915_private *dev_priv, - struct intel_dp *intel_dp) +intel_pps_verify_state(struct intel_dp *intel_dp) { struct edp_power_seq hw; struct edp_power_seq *sw = &intel_dp->pps_delays; - intel_pps_readout_hw_state(dev_priv, intel_dp, &hw); + intel_pps_readout_hw_state(intel_dp, &hw); if (hw.t1_t3 != sw->t1_t3 || hw.t8 != sw->t8 || hw.t9 != sw->t9 || hw.t10 != sw->t10 || hw.t11_t12 != sw->t11_t12) { @@ -5269,10 +5257,9 @@ intel_pps_verify_state(struct drm_i915_private *dev_priv, } static void -intel_dp_init_panel_power_sequencer(struct drm_device *dev, - struct intel_dp *intel_dp) +intel_dp_init_panel_power_sequencer(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct edp_power_seq cur, vbt, spec, *final = &intel_dp->pps_delays; @@ -5282,7 +5269,7 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, if (final->t11_t12 != 0) return; - intel_pps_readout_hw_state(dev_priv, intel_dp, &cur); + intel_pps_readout_hw_state(intel_dp, &cur); intel_pps_dump_state("cur", &cur); @@ -5356,11 +5343,10 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, } static void -intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, - struct intel_dp *intel_dp, +intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp, bool force_disable_vdd) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 pp_on, pp_off, pp_div, port_sel = 0; int div = dev_priv->rawclk_freq / 1000; struct pps_registers regs; @@ -5369,7 +5355,7 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, lockdep_assert_held(&dev_priv->pps_mutex); - intel_pps_get_registers(dev_priv, intel_dp, ®s); + intel_pps_get_registers(intel_dp, ®s); /* * On some VLV machines the BIOS can leave the VDD @@ -5441,16 +5427,15 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, I915_READ(regs.pp_div)); } -static void intel_dp_pps_init(struct drm_device *dev, - struct intel_dp *intel_dp) +static void intel_dp_pps_init(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_initial_power_sequencer_setup(intel_dp); } else { - intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); + intel_dp_init_panel_power_sequencer(intel_dp); + intel_dp_init_panel_power_sequencer_registers(intel_dp, false); } } @@ -5864,7 +5849,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, pps_lock(intel_dp); intel_dp_init_panel_power_timestamps(intel_dp); - intel_dp_pps_init(dev, intel_dp); + intel_dp_pps_init(intel_dp); intel_edp_panel_vdd_sanitize(intel_dp); pps_unlock(intel_dp); From 2f7734770cfbc337fbd842c64a551cddc04a41c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 9 Nov 2017 17:27:58 +0200 Subject: [PATCH 121/260] drm/i915: Clean up DP code local variables and calling conventions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminate a ton of pointless 'dev' variables in the DP code, and pass around 'dev_priv' instead of 'dev'. v2: Rebase Acked-by: Jani Nikula Reviewed-by: Dhinakaran Pandiyan Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171109152758.32257-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_dp.c | 151 ++++++++++++-------------------- 1 file changed, 54 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 37bfe8b27221..efe18269115f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -438,10 +438,7 @@ intel_dp_pps_init(struct intel_dp *intel_dp); static void pps_lock(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *encoder = &intel_dig_port->base; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); /* * See vlv_power_sequencer_reset() why we need @@ -454,10 +451,7 @@ static void pps_lock(struct intel_dp *intel_dp) static void pps_unlock(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *encoder = &intel_dig_port->base; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); mutex_unlock(&dev_priv->pps_mutex); @@ -467,8 +461,8 @@ static void pps_unlock(struct intel_dp *intel_dp) static void vlv_power_sequencer_kick(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); enum pipe pipe = intel_dp->pps_pipe; bool pll_enabled, release_cl_override = false; enum dpio_phy phy = DPIO_PHY(pipe); @@ -733,7 +727,6 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) void intel_power_sequencer_reset(struct drm_i915_private *dev_priv) { - struct drm_device *dev = &dev_priv->drm; struct intel_encoder *encoder; if (WARN_ON(!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv) && @@ -750,7 +743,7 @@ void intel_power_sequencer_reset(struct drm_i915_private *dev_priv) * should use them always. */ - for_each_intel_encoder(dev, encoder) { + for_each_intel_encoder(&dev_priv->drm, encoder) { struct intel_dp *intel_dp; if (encoder->type != INTEL_OUTPUT_DP && @@ -832,8 +825,7 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code, { struct intel_dp *intel_dp = container_of(this, typeof(* intel_dp), edp_notifier); - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); if (!intel_dp_is_edp(intel_dp) || code != SYS_RESTART) return 0; @@ -863,8 +855,7 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code, static bool edp_have_panel_power(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); lockdep_assert_held(&dev_priv->pps_mutex); @@ -877,8 +868,7 @@ static bool edp_have_panel_power(struct intel_dp *intel_dp) static bool edp_have_panel_vdd(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); lockdep_assert_held(&dev_priv->pps_mutex); @@ -892,8 +882,7 @@ static bool edp_have_panel_vdd(struct intel_dp *intel_dp) static void intel_dp_check_edp(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); if (!intel_dp_is_edp(intel_dp)) return; @@ -909,9 +898,7 @@ intel_dp_check_edp(struct intel_dp *intel_dp) static uint32_t intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg; uint32_t status; bool done; @@ -1478,8 +1465,7 @@ static void intel_dp_set_clock(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); const struct dp_link_dpll *divisor = NULL; int i, count = 0; @@ -1848,8 +1834,7 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp, static void intel_dp_prepare(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); enum port port = encoder->port; struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); @@ -1945,8 +1930,7 @@ static void wait_panel_status(struct intel_dp *intel_dp, u32 mask, u32 value) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); i915_reg_t pp_stat_reg, pp_ctrl_reg; lockdep_assert_held(&dev_priv->pps_mutex); @@ -2022,8 +2006,7 @@ static void edp_wait_backlight_off(struct intel_dp *intel_dp) static u32 ironlake_get_pp_control(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 control; lockdep_assert_held(&dev_priv->pps_mutex); @@ -2044,9 +2027,8 @@ static u32 ironlake_get_pp_control(struct intel_dp *intel_dp) */ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); u32 pp; i915_reg_t pp_stat_reg, pp_ctrl_reg; bool need_to_disable = !intel_dp->want_panel_vdd; @@ -2116,8 +2098,7 @@ void intel_edp_panel_vdd_on(struct intel_dp *intel_dp) static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); u32 pp; @@ -2203,8 +2184,7 @@ static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync) static void edp_panel_on(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 pp; i915_reg_t pp_ctrl_reg; @@ -2262,8 +2242,7 @@ void intel_edp_panel_on(struct intel_dp *intel_dp) static void edp_panel_off(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 pp; i915_reg_t pp_ctrl_reg; @@ -2311,9 +2290,7 @@ void intel_edp_panel_off(struct intel_dp *intel_dp) /* Enable backlight in the panel power control. */ static void _intel_edp_backlight_on(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 pp; i915_reg_t pp_ctrl_reg; @@ -2356,8 +2333,7 @@ void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, /* Disable backlight in the panel power control. */ static void _intel_edp_backlight_off(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 pp; i915_reg_t pp_ctrl_reg; @@ -2560,10 +2536,9 @@ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode) static bool intel_dp_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); enum port port = encoder->port; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); u32 tmp; bool ret; @@ -2612,10 +2587,9 @@ out: static void intel_dp_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); u32 tmp, flags = 0; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); enum port port = encoder->port; struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); @@ -2782,9 +2756,8 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, uint32_t *DP, uint8_t dp_train_pat) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); enum port port = intel_dig_port->base.port; if (dp_train_pat & DP_TRAINING_PATTERN_MASK) @@ -2868,8 +2841,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, static void intel_dp_enable_port(struct intel_dp *intel_dp, const struct intel_crtc_state *old_crtc_state) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); /* enable with pattern 1 (as per spec) */ @@ -2893,9 +2865,8 @@ static void intel_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); uint32_t dp_reg = I915_READ(intel_dp->output_reg); enum pipe pipe = crtc->pipe; @@ -3519,10 +3490,9 @@ gen7_edp_signal_levels(uint8_t train_set) void intel_dp_set_signal_levels(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); enum port port = intel_dig_port->base.port; - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t signal_levels, mask = 0; uint8_t train_set = intel_dp->train_set[0]; @@ -3577,9 +3547,8 @@ intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); enum port port = intel_dig_port->base.port; uint32_t val; @@ -4303,11 +4272,11 @@ intel_dp_retrain_link(struct intel_dp *intel_dp) static void intel_dp_check_link_status(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; - struct drm_device *dev = intel_dp_to_dev(intel_dp); u8 link_status[DP_LINK_STATUS_SIZE]; - WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); + WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); if (!intel_dp_get_link_status(intel_dp, link_status)) { DRM_ERROR("Failed to get link status\n"); @@ -4353,8 +4322,7 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) static bool intel_dp_short_pulse(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u8 sink_irq_vector = 0; u8 old_sink_count = intel_dp->sink_count; bool ret; @@ -4393,13 +4361,13 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n"); } - drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, NULL); intel_dp_check_link_status(intel_dp); - drm_modeset_unlock(&dev->mode_config.connection_mutex); + drm_modeset_unlock(&dev_priv->drm.mode_config.connection_mutex); if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { DRM_DEBUG_KMS("Link Training Compliance Test requested\n"); /* Send a Hotplug Uevent to userspace to start modeset */ - drm_kms_helper_hotplug_event(intel_encoder->base.dev); + drm_kms_helper_hotplug_event(&dev_priv->drm); } return true; @@ -4463,8 +4431,7 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) static enum drm_connector_status edp_detect(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); enum drm_connector_status status; status = intel_panel_detect(dev_priv); @@ -4720,22 +4687,21 @@ intel_dp_unset_edid(struct intel_dp *intel_dp) } static int -intel_dp_long_pulse(struct intel_connector *intel_connector) +intel_dp_long_pulse(struct intel_connector *connector) { - struct drm_connector *connector = &intel_connector->base; - struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_device *dev = connector->dev; + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_dp *intel_dp = intel_attached_dp(&connector->base); enum drm_connector_status status; u8 sink_irq_vector = 0; - WARN_ON(!drm_modeset_is_locked(&connector->dev->mode_config.connection_mutex)); + WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); - intel_display_power_get(to_i915(dev), intel_dp->aux_power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); /* Can't disconnect eDP, but you can close the lid... */ if (intel_dp_is_edp(intel_dp)) status = edp_detect(intel_dp); - else if (intel_digital_port_connected(to_i915(dev), + else if (intel_digital_port_connected(dev_priv, dp_to_dig_port(intel_dp))) status = intel_dp_detect_dpcd(intel_dp); else @@ -4806,7 +4772,7 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) intel_dp->aux.i2c_defer_count = 0; intel_dp_set_edid(intel_dp); - if (intel_dp_is_edp(intel_dp) || intel_connector->detect_edid) + if (intel_dp_is_edp(intel_dp) || connector->detect_edid) status = connector_status_connected; intel_dp->detect_done = true; @@ -4829,7 +4795,7 @@ out: if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); - intel_display_power_put(to_i915(dev), intel_dp->aux_power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); return status; } @@ -4996,9 +4962,7 @@ void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); lockdep_assert_held(&dev_priv->pps_mutex); @@ -5086,8 +5050,7 @@ enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) { struct intel_dp *intel_dp = &intel_dig_port->dp; - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); enum irqreturn ret = IRQ_NONE; if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) { @@ -5546,8 +5509,7 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, void intel_edp_drrs_enable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); if (!crtc_state->has_drrs) { DRM_DEBUG_KMS("Panel doesn't support DRRS\n"); @@ -5582,8 +5544,7 @@ unlock: void intel_edp_drrs_disable(struct intel_dp *intel_dp, const struct intel_crtc_state *old_crtc_state) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); if (!old_crtc_state->has_drrs) return; @@ -5766,7 +5727,7 @@ void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, /** * intel_dp_drrs_init - Init basic DRRS work and mutex. - * @intel_connector: eDP connector + * @connector: eDP connector * @fixed_mode: preferred mode of panel * * This function is called only once at driver load to initialize basic @@ -5778,12 +5739,10 @@ void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, * from VBT setting). */ static struct drm_display_mode * -intel_dp_drrs_init(struct intel_connector *intel_connector, - struct drm_display_mode *fixed_mode) +intel_dp_drrs_init(struct intel_connector *connector, + struct drm_display_mode *fixed_mode) { - struct drm_connector *connector = &intel_connector->base; - struct drm_device *dev = connector->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct drm_display_mode *downclock_mode = NULL; INIT_DELAYED_WORK(&dev_priv->drrs.work, intel_edp_drrs_downclock_work); @@ -5799,8 +5758,8 @@ intel_dp_drrs_init(struct intel_connector *intel_connector, return NULL; } - downclock_mode = intel_find_panel_downclock - (dev_priv, fixed_mode, connector); + downclock_mode = intel_find_panel_downclock(dev_priv, fixed_mode, + &connector->base); if (!downclock_mode) { DRM_DEBUG_KMS("Downclock mode is not found. DRRS not supported\n"); @@ -5817,11 +5776,9 @@ intel_dp_drrs_init(struct intel_connector *intel_connector, static bool intel_edp_init_connector(struct intel_dp *intel_dp, struct intel_connector *intel_connector) { - struct drm_connector *connector = &intel_connector->base; - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; - struct drm_device *dev = intel_encoder->base.dev; + struct drm_device *dev = intel_dp_to_dev(intel_dp); struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_connector *connector = &intel_connector->base; struct drm_display_mode *fixed_mode = NULL; struct drm_display_mode *alt_fixed_mode = NULL; struct drm_display_mode *downclock_mode = NULL; @@ -5839,7 +5796,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, * eDP and LVDS bail out early in this case to prevent interfering * with an already powered-on LVDS power sequencer. */ - if (intel_get_lvds_encoder(dev)) { + if (intel_get_lvds_encoder(&dev_priv->drm)) { WARN_ON(!(HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv))); DRM_INFO("LVDS was detected, not registering eDP\n"); From bccd3b831185e75c4138bc3fd5201f3214dfeb3d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Nov 2017 14:30:19 +0000 Subject: [PATCH 122/260] drm/i915: Use trace_printk to provide a death rattle for GEM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trying to enable printk debugging for GEM is fraught with the issue of spam; interactions with HW are very frequent and often boring. However, one instance where they are not so boring is just before a BUG; here ftrace provides a facility to dump its ringbuffer on an oops. So for CI let's enable trace_printk() to capture the last exchanges with HW as a death rattle. For example, [ 79.234110] ------------[ cut here ]------------ [ 79.234137] kernel BUG at drivers/gpu/drm/i915/intel_lrc.c:907! [ 79.234145] invalid opcode: 0000 [#1] SMP [ 79.234153] Dumping ftrace buffer: [ 79.234158] --------------------------------- ... [ 79.314044] gem_conc-1059 1..s1 79203443us : intel_lrc_irq_handler: bcs0 out[0]: ctx=5.2, seqno=145 [ 79.314089] gem_conc-1059 1..s. 79220800us : intel_lrc_irq_handler: bcs0 csb[1/1]: status=0x00000018:0x00000005 [ 79.314133] gem_conc-1059 1..s. 79220803us : intel_lrc_irq_handler: bcs0 out[0]: ctx=5.1, seqno=145 [ 79.314177] gem_conc-1062 2..s1 79230458us : intel_lrc_irq_handler: bcs0 in[0]: ctx=8.1, seqno=146 [ 79.314220] gem_conc-1062 2..s1 79230515us : intel_lrc_irq_handler: bcs0 in[0]: ctx=8.2, seqno=147 [ 79.314265] gem_conc-1059 1..s1 79230951us : intel_lrc_irq_handler: bcs0 csb[2/3]: status=0x00000012:0x00000008 [ 79.314309] gem_conc-1059 1..s1 79230954us : intel_lrc_irq_handler: bcs0 out[0]: ctx=8.2, seqno=147 [ 79.314353] gem_conc-1059 1..s1 79230954us : intel_lrc_irq_handler: bcs0 csb[3/3]: status=0x00008002:0x00000008 [ 79.314396] gem_conc-1059 1..s1 79230955us : intel_lrc_irq_handler: bcs0 out[0]: ctx=8.1, seqno=147 [ 79.314402] --------------------------------- v2: Tweak the formatting to be more consistent between in/out. v3: do {} while (0) stub macro protection Suggested-by: Michał Winiarski Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171109143019.16568-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.debug | 14 ++++++++++++++ drivers/gpu/drm/i915/i915_gem.h | 6 ++++++ drivers/gpu/drm/i915/intel_lrc.c | 17 +++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 19c77c6feb24..9e53edbc713b 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -28,6 +28,7 @@ config DRM_I915_DEBUG select SW_SYNC # signaling validation framework (igt/syncobj*) select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST + select DRM_I915_TRACE_GEM default n help Choose this option to turn on extra driver debugging that may affect @@ -49,6 +50,19 @@ config DRM_I915_DEBUG_GEM If in doubt, say "N". +config DRM_I915_TRACE_GEM + bool "Insert extra ftrace output from the GEM internals" + select TRACING + default n + help + Enable additional and verbose debugging output that will spam + ordinary tests, but may be vital for post-mortem debugging when + used with /proc/sys/kernel/ftrace_dump_on_oops + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_I915_SW_FENCE_DEBUG_OBJECTS bool "Enable additional driver debugging for fence objects" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index ee54597465b6..ff42b5f0e981 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -44,6 +44,12 @@ #define GEM_DEBUG_BUG_ON(expr) #endif +#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM) +#define GEM_TRACE(...) trace_printk(__VA_ARGS__) +#else +#define GEM_TRACE(...) do { } while (0) +#endif + #define I915_NUM_ENGINES 5 #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6840ec8db037..020ca7c9c75f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -466,6 +466,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) port_set(&port[n], port_pack(rq, count)); desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); + + GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n", + engine->name, n, + rq->ctx->hw_id, count, + rq->global_seqno); } else { GEM_BUG_ON(!n); desc = 0; @@ -520,6 +525,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) ce->ring->tail &= (ce->ring->size - 1); ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail; + GEM_TRACE("\n"); for (n = execlists_num_ports(&engine->execlists); --n; ) elsp_write(0, elsp); @@ -832,6 +838,10 @@ static void intel_lrc_irq_handler(unsigned long data) head = execlists->csb_head; tail = READ_ONCE(buf[write_idx]); } + GEM_TRACE("%s cs-irq head=%d [%d], tail=%d [%d]\n", + engine->name, + head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), + tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))))); while (head != tail) { struct drm_i915_gem_request *rq; @@ -859,6 +869,9 @@ static void intel_lrc_irq_handler(unsigned long data) */ status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ + GEM_TRACE("%s csb[%dd]: status=0x%08x:0x%08x\n", + engine->name, head, + status, buf[2*head + 1]); if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; @@ -886,6 +899,10 @@ static void intel_lrc_irq_handler(unsigned long data) GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); rq = port_unpack(port, &count); + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n", + engine->name, + rq->ctx->hw_id, count, + rq->global_seqno); GEM_BUG_ON(count == 0); if (--count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); From 15e4cda950ea660837f6733426691c8e467010a6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Nov 2017 21:34:49 +0000 Subject: [PATCH 123/260] drm/i915: Mark vm_free_page() as a potential sleeper agent vm_free_page() may call down into set_pages_array_wb() (which itself sleeps, on x86 at least) but only if on !llc and the caches overflow. Since this is unlikely, we only rarely trigger the error in practice, and so to make CI detection of this sleeping bug possible we want to mark the common vm_free_page() as a potential sleep. Suggested-by: Tvrtko Ursulin References: https://bugs.freedesktop.org/show_bug.cgi?id=103638 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Auld Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171109213450.13875-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2847a6b41c16..1e40eeb31f9d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -454,6 +454,14 @@ static void vm_free_pages_release(struct i915_address_space *vm, static void vm_free_page(struct i915_address_space *vm, struct page *page) { + /* + * On !llc, we need to change the pages back to WB. We only do so + * in bulk, so we rarely need to change the page attributes here, + * but doing so requires a stop_machine() from deep inside arch/x86/mm. + * To make detection of the possible sleep more likely, use an + * unconditional might_sleep() for everybody. + */ + might_sleep(); if (!pagevec_add(&vm->free_pages, page)) vm_free_pages_release(vm, false); } From 7f017b19fbce6200a993305425bf930f38fcd9e6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Nov 2017 21:34:50 +0000 Subject: [PATCH 124/260] drm/i915: Mark up i915_vma_unbind() as a potential sleeper Whenever we want to unbind a vma, we must wait on all GPU activity to complete first. (This is what gives us the ability to do fine grained eviction and purging by only having to wait on the VMA that we need to unbind to proceed; though if pushed we can make it a rule that we are only allowed to unbind already idle VMA and move the burden of the work and organising the sleep onto the caller.) Currently, we might only sleep if the vma is still active on the GPU, but in principle i915_vma_unbind() always implies a sleep, so mark it up with a might_sleep(). Suggested-by: Tvrtko Ursulin References: https://bugs.freedesktop.org/show_bug.cgi?id=103638 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Auld Cc: Daniel Vetter Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171109213450.13875-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index b7ac84db1d2e..bf6d8d1eaabe 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -743,6 +743,7 @@ int i915_vma_unbind(struct i915_vma *vma) /* First wait upon any activity as retiring the request may * have side-effects such as unpinning or even unbinding this vma. */ + might_sleep(); active = i915_vma_get_active(vma); if (active) { int idx; From 25be4a154504d60293e345535eeab5d6a022b4fa Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 9 Nov 2017 15:18:04 -0800 Subject: [PATCH 125/260] drm/i915: Update DRIVER_DATE to 20171109 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 19b679ca8211..7a8f5d7638f0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -80,8 +80,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20171023" -#define DRIVER_TIMESTAMP 1508748913 +#define DRIVER_DATE "20171109" +#define DRIVER_TIMESTAMP 1510269484 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions From e25dcf6ed8f5406b12689543698df98bd4c48cc7 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 9 Nov 2017 15:30:06 -0800 Subject: [PATCH 126/260] drm/i915: Update DRIVER_DATE to 20171109 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7a8f5d7638f0..c94f34f5cee3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -81,7 +81,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" #define DRIVER_DATE "20171109" -#define DRIVER_TIMESTAMP 1510269484 +#define DRIVER_TIMESTAMP 1510270206 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions From b1a1e5d2cebf4630d971628d20b3034dfe9c2c40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 10:11:10 +0000 Subject: [PATCH 127/260] drm/i915/selftests: Reduce the volume of the timeout message Originally it was anticipated that timeouts would be a rare event, and so merit a warning that the test was incomplete. However, for igt we keep the timeout low, and hitting the timeout is intentional. It no longer necessitates a warning, but to be expected. Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171110101110.12042-1-chris@chris-wilson.co.uk Reviwed-by: Matthew Auld Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_selftest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index 78e1a1b168ff..9766e806dce6 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -99,6 +99,6 @@ __printf(2, 3) bool __igt_timeout(unsigned long timeout, const char *fmt, ...); #define igt_timeout(t, fmt, ...) \ - __igt_timeout((t), KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) + __igt_timeout((t), KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #endif /* !__I915_SELFTEST_H__ */ From c16c4ba7250b681e103126c54481a8b17c073758 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Nov 2017 10:20:03 +0000 Subject: [PATCH 128/260] drm/i915: Move irqs enabled assertion deeper for mock breadcrumbs In order to allow the mock breadcrumbs tests to run without device irqs being enabled, move the intel_irqs_enabled() assert deeper to just before we commit to enabling the HW irq. v2: Add a FIXME explaining that placing the assertion so deep is not ideal, but a compromise for mock breadcrumbs. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171107102003.1802-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 4de054f8c1ba..4c4fbf5f20f9 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -145,6 +145,14 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t) static void irq_enable(struct intel_engine_cs *engine) { + /* + * FIXME: Ideally we want this on the API boundary, but for the + * sake of testing with mock breadcrumbs (no HW so unable to + * enable irqs) we place it deep within the bowels, at the point + * of no return. + */ + GEM_BUG_ON(!intel_irqs_enabled(engine->i915)); + /* Enabling the IRQ may miss the generation of the interrupt, but * we still need to force the barrier before reading the seqno, * just in case. @@ -266,8 +274,6 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) struct drm_i915_private *i915 = engine->i915; bool enabled; - GEM_BUG_ON(!intel_irqs_enabled(i915)); - lockdep_assert_held(&b->irq_lock); if (b->irq_armed) return false; From af0ab55ffe2467fab82bc0909e469cba7701fac7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 19 Oct 2017 13:16:19 +0200 Subject: [PATCH 129/260] x86/platform/intel/iosf_mbi: Add unlocked PMIC bus access notifier unregister For race free unregistration drivers may need to acquire PMIC bus access through iosf_mbi_punit_acquire() and then (un)register the notifier without dropping the lock. This commit adds an unlocked variant of iosf_mbi_unregister_pmic_bus_access_notifier for this use case. Acked-by: Ingo Molnar Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20171019111620.26761-2-hdegoede@redhat.com --- arch/x86/include/asm/iosf_mbi.h | 25 +++++++++++++++++++++++++ arch/x86/platform/intel/iosf_mbi.c | 19 +++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h index c313cac36f56..eb959cd77994 100644 --- a/arch/x86/include/asm/iosf_mbi.h +++ b/arch/x86/include/asm/iosf_mbi.h @@ -145,6 +145,18 @@ int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb); */ int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb); +/** + * iosf_mbi_unregister_pmic_bus_access_notifier_unlocked - Unregister PMIC bus + * notifier, unlocked + * + * Like iosf_mbi_unregister_pmic_bus_access_notifier(), but for use when the + * caller has already called iosf_mbi_punit_acquire() itself. + * + * @nb: notifier_block to unregister + */ +int iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( + struct notifier_block *nb); + /** * iosf_mbi_call_pmic_bus_access_notifier_chain - Call PMIC bus notifier chain * @@ -153,6 +165,11 @@ int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb); */ int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v); +/** + * iosf_mbi_assert_punit_acquired - Assert that the P-Unit has been acquired. + */ +void iosf_mbi_assert_punit_acquired(void); + #else /* CONFIG_IOSF_MBI is not enabled */ static inline bool iosf_mbi_available(void) @@ -196,12 +213,20 @@ int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb) return 0; } +static inline int +iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(struct notifier_block *nb) +{ + return 0; +} + static inline int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v) { return 0; } +static inline void iosf_mbi_assert_punit_acquired(void) {} + #endif /* CONFIG_IOSF_MBI */ #endif /* IOSF_MBI_SYMS_H */ diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c index a952ac199741..6f37a2137a79 100644 --- a/arch/x86/platform/intel/iosf_mbi.c +++ b/arch/x86/platform/intel/iosf_mbi.c @@ -218,14 +218,23 @@ int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(iosf_mbi_register_pmic_bus_access_notifier); +int iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( + struct notifier_block *nb) +{ + iosf_mbi_assert_punit_acquired(); + + return blocking_notifier_chain_unregister( + &iosf_mbi_pmic_bus_access_notifier, nb); +} +EXPORT_SYMBOL(iosf_mbi_unregister_pmic_bus_access_notifier_unlocked); + int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb) { int ret; /* Wait for the bus to go inactive before unregistering */ mutex_lock(&iosf_mbi_punit_mutex); - ret = blocking_notifier_chain_unregister( - &iosf_mbi_pmic_bus_access_notifier, nb); + ret = iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(nb); mutex_unlock(&iosf_mbi_punit_mutex); return ret; @@ -239,6 +248,12 @@ int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v) } EXPORT_SYMBOL(iosf_mbi_call_pmic_bus_access_notifier_chain); +void iosf_mbi_assert_punit_acquired(void) +{ + WARN_ON(!mutex_is_locked(&iosf_mbi_punit_mutex)); +} +EXPORT_SYMBOL(iosf_mbi_assert_punit_acquired); + #ifdef CONFIG_IOSF_MBI_DEBUG static u32 dbg_mdr; static u32 dbg_mcr; From a5266db4d31410fbfb4f40118e58085994e83dbc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 19 Oct 2017 13:16:20 +0200 Subject: [PATCH 130/260] drm/i915: Acquire PUNIT->PMIC bus for intel_uncore_forcewake_reset() intel_uncore_forcewake_reset() does forcewake puts and gets as such we need to make sure that no-one tries to access the PUNIT->PMIC bus (on systems where this bus is shared) while it runs, otherwise bad things happen. Normally this is taken care of by the i915_pmic_bus_access_notifier() which does an intel_uncore_forcewake_get(FORCEWAKE_ALL) when some other driver tries to access the PMIC bus, so that later forcewake gets are no-ops (for the duration of the bus access). But intel_uncore_forcewake_reset gets called in 3 cases: 1) Before registering the pmic_bus_access_notifier 2) After unregistering the pmic_bus_access_notifier 3) To reset forcewake state on a GPU reset In all 3 cases the i915_pmic_bus_access_notifier() protection is insufficient. This commit fixes this race by calling iosf_mbi_punit_acquire() before calling intel_uncore_forcewake_reset(). In the case where it is called directly after unregistering the pmic_bus_access_notifier, we need to hold the punit-lock over both calls to avoid a race where intel_uncore_fw_release_timer() may execute between the 2 calls. Reviewed-by: Imre Deak Acked-by: Ingo Molnar Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20171019111620.26761-3-hdegoede@redhat.com --- drivers/gpu/drm/i915/intel_uncore.c | 17 +++++++++++++---- drivers/gpu/drm/i915/selftests/intel_uncore.c | 3 +++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 0a6d952a2df1..211acee7c31d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -345,6 +345,7 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) return HRTIMER_NORESTART; } +/* Note callers must have acquired the PUNIT->PMIC bus, before calling this. */ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, bool restore) { @@ -353,6 +354,8 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, int retry_count = 100; enum forcewake_domains fw, active_domains; + iosf_mbi_assert_punit_acquired(); + /* Hold uncore.lock across reset to prevent any register access * with forcewake not set correctly. Wait until all pending * timers are run before holding. @@ -532,14 +535,18 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, GT_FIFO_CTL_RC6_POLICY_STALL); } + iosf_mbi_punit_acquire(); intel_uncore_forcewake_reset(dev_priv, restore_forcewake); + iosf_mbi_punit_release(); } void intel_uncore_suspend(struct drm_i915_private *dev_priv) { - iosf_mbi_unregister_pmic_bus_access_notifier( + iosf_mbi_punit_acquire(); + iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( &dev_priv->uncore.pmic_bus_access_nb); intel_uncore_forcewake_reset(dev_priv, false); + iosf_mbi_punit_release(); } void intel_uncore_resume_early(struct drm_i915_private *dev_priv) @@ -1419,12 +1426,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) void intel_uncore_fini(struct drm_i915_private *dev_priv) { - iosf_mbi_unregister_pmic_bus_access_notifier( - &dev_priv->uncore.pmic_bus_access_nb); - /* Paranoia: make sure we have disabled everything before we exit. */ intel_uncore_sanitize(dev_priv); + + iosf_mbi_punit_acquire(); + iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( + &dev_priv->uncore.pmic_bus_access_nb); intel_uncore_forcewake_reset(dev_priv, false); + iosf_mbi_punit_release(); } static const struct reg_whitelist { diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index f52a4ab9aa98..2f6367643171 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -148,7 +148,10 @@ static int intel_uncore_check_forcewake_domains(struct drm_i915_private *dev_pri for_each_set_bit(offset, valid, FW_RANGE) { i915_reg_t reg = { offset }; + iosf_mbi_punit_acquire(); intel_uncore_forcewake_reset(dev_priv, false); + iosf_mbi_punit_release(); + check_for_unclaimed_mmio(dev_priv); (void)I915_READ(reg); From 30b29406d9374989f34bce0eadaa630813049808 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 11:25:50 +0000 Subject: [PATCH 131/260] drm/i915: Restore the wait for idle engine after flushing interrupts So it appears that commit 5427f207852d ("drm/i915: Bump wait-times for the final CS interrupt before parking") was a little over optimistic in its belief that it had successfully waited for all residual activity on the engines before parking. Numerous sightings in CI since then of <7>[ 52.542886] [IGT] core_auth: executing <3>[ 52.561013] [drm:intel_engines_park [i915]] *ERROR* vcs0 is not idle before parking <7>[ 52.561215] intel_engines_park vcs0 <7>[ 52.561229] intel_engines_park current seqno 98, last 98, hangcheck 0 [-247449 ms], inflight 0 <7>[ 52.561238] intel_engines_park Reset count: 0 <7>[ 52.561266] intel_engines_park Requests: <7>[ 52.561363] intel_engines_park RING_START: 0x00000000 [0x00000000] <7>[ 52.561377] intel_engines_park RING_HEAD: 0x00000000 [0x00000000] <7>[ 52.561390] intel_engines_park RING_TAIL: 0x00000000 [0x00000000] <7>[ 52.561406] intel_engines_park RING_CTL: 0x00000000 <7>[ 52.561422] intel_engines_park RING_MODE: 0x00000200 [idle] <7>[ 52.561442] intel_engines_park ACTHD: 0x00000000_00000000 <7>[ 52.561459] intel_engines_park BBADDR: 0x00000000_00000000 <7>[ 52.561474] intel_engines_park Execlist status: 0x00000301 00000000 <7>[ 52.561489] intel_engines_park Execlist CSB read 5 [5 cached], write 5 [5 from hws], interrupt posted? no <7>[ 52.561500] intel_engines_park ELSP[0] idle <7>[ 52.561510] intel_engines_park ELSP[1] idle <7>[ 52.561519] intel_engines_park HW active? 0x0 <7>[ 52.561608] intel_engines_park Idle? yes <7>[ 52.561617] intel_engines_park on Braswell, which indicates that the engine just needs that little bit longer after flushing the tasklet to settle. So give it a few more milliseconds before declaring an err and applying the emergency brake. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103479 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171110112550.28909-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_engine_cs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 6cb8e3ed97e4..87778f03393b 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1626,11 +1626,12 @@ void intel_engines_park(struct drm_i915_private *i915) * will be no more interrupts arriving later and the engines * are truly idle. */ - if (!intel_engine_is_idle(engine)) { + if (wait_for(intel_engine_is_idle(engine), 10)) { struct drm_printer p = drm_debug_printer(__func__); - DRM_ERROR("%s is not idle before parking\n", - engine->name); + dev_err(i915->drm.dev, + "%s is not idle before parking\n", + engine->name); intel_engine_dump(engine, &p); } From 9511ce1ce7299029acc985ce5f96a1e2e703f084 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 15:19:19 +0000 Subject: [PATCH 132/260] drm/i915/selftests: Initialise mock_i915->mm.obj_lock lockdep spotted that the mock tests were using the i915->mm.obj_lock without first initialiasing it: >[ 1303.217043] [IGT] drv_selftest: starting subtest mock_objects <4>[ 1303.240898] Setting dangerous option mock_selftests - tainting kernel <6>[ 1303.253665] i915: Performing mock selftests with st_random_seed=0xd87ea6c6 st_timeout=1000 <4>[ 1303.254812] INFO: trying to register non-static key. <4>[ 1303.254816] the code is fine but needs lockdep annotation. <4>[ 1303.254818] turning off the locking correctness validator. <4>[ 1303.254820] CPU: 4 PID: 13112 Comm: drv_selftest Tainted: G U W 4.14.0-rc8-CI-Patchwork_7058+ #1 <4>[ 1303.254823] Hardware name: MSI MS-7924/Z97M-G43(MS-7924), BIOS V1.12 02/15/2016 <4>[ 1303.254825] Call Trace: <4>[ 1303.254829] dump_stack+0x68/0x9f <4>[ 1303.254832] register_lock_class+0x3fd/0x580 <4>[ 1303.254835] ? ___slab_alloc.constprop.29+0x157/0x3d0 <4>[ 1303.254837] ? ___slab_alloc.constprop.29+0x157/0x3d0 <4>[ 1303.254840] ? sg_kmalloc+0x1e/0x50 <4>[ 1303.254842] ? debug_smp_processor_id+0x17/0x20 <4>[ 1303.254845] __lock_acquire+0xa4/0x1b00 <4>[ 1303.254884] ? __i915_gem_object_set_pages+0x116/0x1f0 [i915] <4>[ 1303.254887] ? __this_cpu_preempt_check+0x13/0x20 <4>[ 1303.254889] ? sg_kmalloc+0x1e/0x50 <4>[ 1303.254891] lock_acquire+0xb0/0x200 <4>[ 1303.254893] ? lock_acquire+0xb0/0x200 <4>[ 1303.254917] ? __i915_gem_object_set_pages+0x116/0x1f0 [i915] <4>[ 1303.254920] _raw_spin_lock+0x32/0x50 <4>[ 1303.254944] ? __i915_gem_object_set_pages+0x116/0x1f0 [i915] <4>[ 1303.254967] __i915_gem_object_set_pages+0x116/0x1f0 [i915] <4>[ 1303.254991] i915_gem_object_get_pages_phys+0x286/0x2b0 [i915] <4>[ 1303.255015] ____i915_gem_object_get_pages+0x20/0x60 [i915] <4>[ 1303.255039] i915_gem_object_attach_phys+0x137/0x1a0 [i915] <4>[ 1303.255063] igt_phys_object+0x45/0x120 [i915] <4>[ 1303.255094] __i915_subtests+0x40/0xd0 [i915] <4>[ 1303.255099] ? work_on_cpu_safe+0x60/0x60 <4>[ 1303.255131] i915_gem_object_mock_selftests+0x34/0x50 [i915] Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171110151919.18451-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 04eb9362f4f8..d2bf2729c331 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -180,6 +180,7 @@ struct drm_i915_private *mock_gem_device(void) I915_GTT_PAGE_SIZE_2M; spin_lock_init(&i915->mm.object_stat_lock); + spin_lock_init(&i915->mm.obj_lock); mock_uncore_init(i915); init_waitqueue_head(&i915->gpu_error.wait_queue); From 1803fcbca2e444f7972430c4dc1c3e98c6ee1bc9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 10 Nov 2017 14:26:27 +0000 Subject: [PATCH 133/260] drm/i915: Define an engine class enum for the uABI We want to be able to report back to userspace details about an engine's class, and in return for userspace to be able to request actions regarding certain classes of engines. To isolate the uABI from any variations between hw generations, we define an abstract class for the engines and internally map onto the hw. v2: Remove MAX from the uABI; keep it internal if we need it, but don't let userspace make the mistake of using it themselves. v3: s/OTHER/INVALID/ The use of OTHER is ill-defined, so remove it from the uABI as any future new type of engine can define a class to suit it. But keep a reserved value for an invalid class, so that we can always unambiguously express when something doesn't belong to the classification. Signed-off-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Reviewed-by: Joonas Lahtinen #v2 Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 10 +++++++++- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++++- include/uapi/drm/i915_drm.h | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 87778f03393b..bded9c40dbd5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -50,6 +50,8 @@ struct engine_class_info { const char *name; int (*init_legacy)(struct intel_engine_cs *engine); int (*init_execlists)(struct intel_engine_cs *engine); + + u8 uabi_class; }; static const struct engine_class_info intel_engine_classes[] = { @@ -57,21 +59,25 @@ static const struct engine_class_info intel_engine_classes[] = { .name = "rcs", .init_execlists = logical_render_ring_init, .init_legacy = intel_init_render_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_RENDER, }, [COPY_ENGINE_CLASS] = { .name = "bcs", .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_blt_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_COPY, }, [VIDEO_DECODE_CLASS] = { .name = "vcs", .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_bsd_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_VIDEO, }, [VIDEO_ENHANCEMENT_CLASS] = { .name = "vecs", .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_vebox_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE, }, }; @@ -213,13 +219,15 @@ intel_engine_setup(struct drm_i915_private *dev_priv, WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", class_info->name, info->instance) >= sizeof(engine->name)); - engine->uabi_id = info->uabi_id; engine->hw_id = engine->guc_id = info->hw_id; engine->mmio_base = info->mmio_base; engine->irq_shift = info->irq_shift; engine->class = info->class; engine->instance = info->instance; + engine->uabi_id = info->uabi_id; + engine->uabi_class = class_info->uabi_class; + engine->context_size = __intel_engine_context_size(dev_priv, engine->class); if (WARN_ON(engine->context_size > BIT(20))) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 69ad875fd011..f3dbfe7ae6e4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -289,11 +289,14 @@ struct intel_engine_execlists { struct intel_engine_cs { struct drm_i915_private *i915; char name[INTEL_ENGINE_CS_MAX_NAME]; + enum intel_engine_id id; - unsigned int uabi_id; unsigned int hw_id; unsigned int guc_id; + u8 uabi_id; + u8 uabi_class; + u8 class; u8 instance; u32 context_size; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index ac3c6503ca27..1f7dfb22a7c2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -86,6 +86,22 @@ enum i915_mocs_table_index { I915_MOCS_CACHED, }; +/* + * Different engines serve different roles, and there may be more than one + * engine serving each role. enum drm_i915_gem_engine_class provides a + * classification of the role of the engine, which may be used when requesting + * operations to be performed on a certain subset of engines, or for providing + * information about that group. + */ +enum drm_i915_gem_engine_class { + I915_ENGINE_CLASS_RENDER = 0, + I915_ENGINE_CLASS_COPY = 1, + I915_ENGINE_CLASS_VIDEO = 2, + I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, + + I915_ENGINE_CLASS_INVALID = -1 +}; + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use From ae6c4574782dbfebcbf1f7e3620bcaf58ceb69e3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 14:26:28 +0000 Subject: [PATCH 134/260] drm/i915: Force the switch to the i915->kernel_context In the next few patches, we will have a hard requirement that we emit a context-switch to the perma-pinned i915->kernel_context (so that we can save the HW state using that context-switch). As the first context itself may be classed as a kernel context, we want to be explicit in our comparison. For an extra-layer of finesse, we can check the last unretired context on the engine; as well as the last retired context when idle. v2: verbose verbosity v3: Always force the switch, even when the engine is idle, and update the assert that this happens before suspend. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen #v1 Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 10 ++++++---- drivers/gpu/drm/i915/intel_engine_cs.c | 26 ++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 889ae8810d5f..824515556733 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4693,14 +4693,16 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) i915_gem_object_put(obj); } -static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) +static void assert_kernel_context_is_current(struct drm_i915_private *i915) { + struct i915_gem_context *kernel_context = i915->kernel_context; struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, dev_priv, id) - GEM_BUG_ON(engine->last_retired_context && - !i915_gem_context_is_kernel(engine->last_retired_context)); + for_each_engine(engine, i915, id) { + GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request)); + GEM_BUG_ON(engine->last_retired_context != kernel_context); + } } void i915_gem_sanitize(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index bded9c40dbd5..6d0b38189a7d 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1596,10 +1596,32 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) return true; } +/** + * intel_engine_has_kernel_context: + * @engine: the engine + * + * Returns true if the last context to be executed on this engine, or has been + * executed if the engine is already idle, is the kernel context + * (#i915.kernel_context). + */ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) { - return (!engine->last_retired_context || - i915_gem_context_is_kernel(engine->last_retired_context)); + const struct i915_gem_context * const kernel_context = + engine->i915->kernel_context; + struct drm_i915_gem_request *rq; + + lockdep_assert_held(&engine->i915->drm.struct_mutex); + + /* + * Check the last context seen by the engine. If active, it will be + * the last request that remains in the timeline. When idle, it is + * the last executed context as tracked by retirement. + */ + rq = __i915_gem_active_peek(&engine->timeline->last_request); + if (rq) + return rq->ctx == kernel_context; + else + return engine->last_retired_context == kernel_context; } void intel_engines_reset_default_submission(struct drm_i915_private *i915) From f58d13d5717938d4dfcc82a2eeba0a6d7644f6e5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 14:26:29 +0000 Subject: [PATCH 135/260] drm/i915: Move GT powersaving init to i915_gem_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GT powersaving is tightly coupled to the request infrastructure. To avoid complications with the order of initialisation in the next patch (where we want to send requests to hw during GEM init) move the powersaving initialisation into the purview of i915_gem_init(). Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/intel_display.c | 2 -- drivers/gpu/drm/i915/intel_pm.c | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 824515556733..37586f703c1e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5021,6 +5021,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) goto out_unlock; + intel_init_gt_powersave(dev_priv); + ret = i915_gem_init_hw(dev_priv); if (ret == -EIO) { /* Allow engine initialisation to fail by marking the GPU as diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 435dc18aa239..da11ba898621 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15198,8 +15198,6 @@ void intel_modeset_gem_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - intel_init_gt_powersave(dev_priv); - intel_init_clock_gating(dev_priv); intel_setup_overlay(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e09377df590d..48a127ac6de5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7918,7 +7918,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) intel_runtime_pm_get(dev_priv); } - mutex_lock(&dev_priv->drm.struct_mutex); mutex_lock(&dev_priv->pcu_lock); /* Initialize RPS limits (for userspace) */ @@ -7960,7 +7959,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) rps->boost_freq = rps->max_freq; mutex_unlock(&dev_priv->pcu_lock); - mutex_unlock(&dev_priv->drm.struct_mutex); intel_autoenable_gt_powersave(dev_priv); } From cc6a818ad6bdb0d3008314cbd0fc9c9a2cd02695 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 14:26:30 +0000 Subject: [PATCH 136/260] drm/i915: Move intel_init_clock_gating() to i915_gem_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Despite its name intel_init_clock_gating applies both display clock gating workarounds; GT mmio workarounds and the occasional GT power context workaround. Worse, sometimes it includes a context register workaround which we need to apply before we record the default HW state for all contexts. Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Joonas Lahtinen Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 17 +++++++++++++++-- drivers/gpu/drm/i915/intel_display.c | 2 -- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 37586f703c1e..ed335612be25 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5024,6 +5024,21 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_init_gt_powersave(dev_priv); ret = i915_gem_init_hw(dev_priv); + if (ret) + goto out_unlock; + + /* + * Despite its name intel_init_clock_gating applies both display + * clock gating workarounds; GT mmio workarounds and the occasional + * GT power context workaround. Worse, sometimes it includes a context + * register workaround which we need to apply before we record the + * default HW state for all contexts. + * + * FIXME: break up the workarounds and apply them at the right time! + */ + intel_init_clock_gating(dev_priv); + +out_unlock: if (ret == -EIO) { /* Allow engine initialisation to fail by marking the GPU as * wedged. But we only want to do this where the GPU is angry, @@ -5035,8 +5050,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) } ret = 0; } - -out_unlock: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index da11ba898621..63cc09ade9d1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15198,8 +15198,6 @@ void intel_modeset_gem_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - intel_init_clock_gating(dev_priv); - intel_setup_overlay(dev_priv); } From d378a3efb819e6d1992127122d957337571b4594 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 14:26:31 +0000 Subject: [PATCH 137/260] drm/i915: Inline intel_modeset_gem_init() intel_modeset_gem_init() now only sets up the legacy overlay, so let's remove the function and call the setup directly during driver load. This should help us find a better point in the initialisation sequence for it later. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/intel_display.c | 7 ------- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index e7e9e061073b..1b440f2b90a5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -676,7 +676,7 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_uc; - intel_modeset_gem_init(dev); + intel_setup_overlay(dev_priv); if (INTEL_INFO(dev_priv)->num_pipes == 0) return 0; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c94f34f5cee3..0be9a918697a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4124,7 +4124,6 @@ void intel_device_info_dump(struct drm_i915_private *dev_priv); /* modesetting */ extern void intel_modeset_init_hw(struct drm_device *dev); extern int intel_modeset_init(struct drm_device *dev); -extern void intel_modeset_gem_init(struct drm_device *dev); extern void intel_modeset_cleanup(struct drm_device *dev); extern int intel_connector_register(struct drm_connector *); extern void intel_connector_unregister(struct drm_connector *); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 63cc09ade9d1..5c7540f3f5dc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15194,13 +15194,6 @@ void intel_display_resume(struct drm_device *dev) drm_atomic_state_put(state); } -void intel_modeset_gem_init(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - intel_setup_overlay(dev_priv); -} - int intel_connector_register(struct drm_connector *connector) { struct intel_connector *intel_connector = to_intel_connector(connector); From f4e15af7e21861445821d5f09922ef7e695269a1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 14:26:32 +0000 Subject: [PATCH 138/260] drm/i915: Mark the context state as dirty/written In the next few patches, we will want to both copy out of the context image and write a valid image into a new context. To be completely safe, we should then couple in our domain tracking to ensure that we don't have any issues with stale data remaining in unwanted cachelines. Historically, we omitted the .write=true from the call to set-gtt-domain in i915_switch_context() in order to avoid a stall between every request as we would want to wait for the previous context write from the gpu. Since then, we limit the set-gtt-domain to only occur when we first bind the vma, so once in use we will never stall, and we are sure to flush the context following a load from swap. Equally we never applied the lessons learnt from ringbuffer submission to execlists; so time to apply the flush of the lrc after load as well. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Acked-by: Joonas Lahtinen Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 32 ++++++++++++++++++------- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++--- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 020ca7c9c75f..1baaab35905c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1077,12 +1077,34 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) spin_unlock_irq(&engine->timeline->lock); } +static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) +{ + unsigned int flags; + int err; + + /* + * Clear this page out of any CPU caches for coherent swap-in/out. + * We only want to do this on the first bind so that we do not stall + * on an active context (which by nature is already on the GPU). + */ + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + if (err) + return err; + } + + flags = PIN_GLOBAL | PIN_HIGH; + if (ctx->ggtt_offset_bias) + flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias; + + return i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, flags); +} + static struct intel_ring * execlists_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { struct intel_context *ce = &ctx->engine[engine->id]; - unsigned int flags; void *vaddr; int ret; @@ -1099,11 +1121,7 @@ execlists_context_pin(struct intel_engine_cs *engine, } GEM_BUG_ON(!ce->state); - flags = PIN_GLOBAL | PIN_HIGH; - if (ctx->ggtt_offset_bias) - flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias; - - ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, flags); + ret = __context_pin(ctx, ce->state); if (ret) goto err; @@ -1123,9 +1141,7 @@ execlists_context_pin(struct intel_engine_cs *engine, ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = i915_ggtt_offset(ce->ring->vma); - ce->state->obj->mm.dirty = true; ce->state->obj->pin_global++; - i915_gem_context_get(ctx); out: return ce->ring; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 47fadf8da84e..7e2a671882fb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1363,12 +1363,13 @@ static int context_pin(struct i915_gem_context *ctx) struct i915_vma *vma = ctx->engine[RCS].state; int ret; - /* Clear this page out of any CPU caches for coherent swap-in/out. + /* + * Clear this page out of any CPU caches for coherent swap-in/out. * We only want to do this on the first bind so that we do not stall * on an active context (which by nature is already on the GPU). */ if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { - ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); + ret = i915_gem_object_set_to_gtt_domain(vma->obj, true); if (ret) return ret; } @@ -1445,7 +1446,6 @@ intel_ring_context_pin(struct intel_engine_cs *engine, if (ret) goto err; - ce->state->obj->mm.dirty = true; ce->state->obj->pin_global++; } From d2b4b97933f5adacfba42dc3b9200d0e21fbe2c4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 14:26:33 +0000 Subject: [PATCH 139/260] drm/i915: Record the default hw state after reset upon load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take a copy of the HW state after a reset upon module loading by executing a context switch from a blank context to the kernel context, thus saving the default hw state over the blank context image. We can then use the default hw state to initialise any future context, ensuring that each starts with the default view of hw state. v2: Unmap our default state from the GTT after stealing it from the context. This should stop us from accidentally overwriting it via the GTT (and frees up some precious GTT space). Testcase: igt/gem_ctx_isolation Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/scheduler.c | 2 - drivers/gpu/drm/i915/i915_debugfs.c | 1 - drivers/gpu/drm/i915/i915_drv.c | 3 + drivers/gpu/drm/i915/i915_gem.c | 115 ++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_context.c | 55 +++--------- drivers/gpu/drm/i915/i915_gem_context.h | 4 +- drivers/gpu/drm/i915/intel_engine_cs.c | 17 ++++ drivers/gpu/drm/i915/intel_lrc.c | 38 +++++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 45 +++++++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + include/uapi/drm/i915_drm.h | 15 ++++ 11 files changed, 224 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f6ded475bb2c..42cc61230ca7 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -723,8 +723,6 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu) if (IS_ERR(vgpu->shadow_ctx)) return PTR_ERR(vgpu->shadow_ctx); - vgpu->shadow_ctx->engine[RCS].initialised = true; - bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES); return 0; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d89321f0468c..533ba096b9a6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1974,7 +1974,6 @@ static int i915_context_status(struct seq_file *m, void *unused) struct intel_context *ce = &ctx->engine[engine->id]; seq_printf(m, "%s: ", engine->name); - seq_putc(m, ce->initialised ? 'I' : 'i'); if (ce->state) describe_obj(m, ce->state->obj); if (ce->ring) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1b440f2b90a5..d97fe9c9439a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -406,6 +406,9 @@ static int i915_getparam(struct drm_device *dev, void *data, */ value = 1; break; + case I915_PARAM_HAS_CONTEXT_ISOLATION: + value = intel_engines_has_context_isolation(dev_priv); + break; case I915_PARAM_SLICE_MASK: value = INTEL_INFO(dev_priv)->sseu.slice_mask; if (!value) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ed335612be25..4bf118304086 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4972,6 +4972,120 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) return true; } +static int __intel_engines_record_defaults(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + /* + * As we reset the gpu during very early sanitisation, the current + * register state on the GPU should reflect its defaults values. + * We load a context onto the hw (with restore-inhibit), then switch + * over to a second context to save that default register state. We + * can then prime every new context with that state so they all start + * from the same default HW values. + */ + + ctx = i915_gem_context_create_kernel(i915, 0); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + for_each_engine(engine, i915, id) { + struct drm_i915_gem_request *rq; + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ctx; + } + + err = i915_switch_context(rq); + if (engine->init_context) + err = engine->init_context(rq); + + __i915_add_request(rq, true); + if (err) + goto err_active; + } + + err = i915_gem_switch_to_kernel_context(i915); + if (err) + goto err_active; + + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + if (err) + goto err_active; + + assert_kernel_context_is_current(i915); + + for_each_engine(engine, i915, id) { + struct i915_vma *state; + + state = ctx->engine[id].state; + if (!state) + continue; + + /* + * As we will hold a reference to the logical state, it will + * not be torn down with the context, and importantly the + * object will hold onto its vma (making it possible for a + * stray GTT write to corrupt our defaults). Unmap the vma + * from the GTT to prevent such accidents and reclaim the + * space. + */ + err = i915_vma_unbind(state); + if (err) + goto err_active; + + err = i915_gem_object_set_to_cpu_domain(state->obj, false); + if (err) + goto err_active; + + engine->default_state = i915_gem_object_get(state->obj); + } + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { + unsigned int found = intel_engines_has_context_isolation(i915); + + /* + * Make sure that classes with multiple engine instances all + * share the same basic configuration. + */ + for_each_engine(engine, i915, id) { + unsigned int bit = BIT(engine->uabi_class); + unsigned int expected = engine->default_state ? bit : 0; + + if ((found & bit) != expected) { + DRM_ERROR("mismatching default context state for class %d on engine %s\n", + engine->uabi_class, engine->name); + } + } + } + +out_ctx: + i915_gem_context_set_closed(ctx); + i915_gem_context_put(ctx); + return err; + +err_active: + /* + * If we have to abandon now, we expect the engines to be idle + * and ready to be torn-down. First try to flush any remaining + * request, ensure we are pointing at the kernel context and + * then remove it. + */ + if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) + goto out_ctx; + + if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED))) + goto out_ctx; + + i915_gem_contexts_lost(i915); + goto out_ctx; +} + int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; @@ -5038,6 +5152,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_init_clock_gating(dev_priv); + ret = __intel_engines_record_defaults(dev_priv); out_unlock: if (ret == -EIO) { /* Allow engine initialisation to fail by marking the GPU as diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c05c3d7d21a5..2db040695035 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -418,8 +418,8 @@ out: return ctx; } -static struct i915_gem_context * -create_kernel_context(struct drm_i915_private *i915, int prio) +struct i915_gem_context * +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) { struct i915_gem_context *ctx; @@ -473,7 +473,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) ida_init(&dev_priv->contexts.hw_ida); /* lowest priority; idle task */ - ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN); + ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context\n"); err = PTR_ERR(ctx); @@ -487,7 +487,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) dev_priv->kernel_context = ctx; /* highest priority; preempting task */ - ctx = create_kernel_context(dev_priv, INT_MAX); + ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default preempt context\n"); err = PTR_ERR(ctx); @@ -522,28 +522,6 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) engine->context_unpin(engine, engine->last_retired_context); engine->last_retired_context = NULL; } - - /* Force the GPU state to be restored on enabling */ - if (!i915_modparams.enable_execlists) { - struct i915_gem_context *ctx; - - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - if (!i915_gem_context_is_default(ctx)) - continue; - - for_each_engine(engine, dev_priv, id) - ctx->engine[engine->id].initialised = false; - - ctx->remap_slice = ALL_L3_SLICES(dev_priv); - } - - for_each_engine(engine, dev_priv, id) { - struct intel_context *kce = - &dev_priv->kernel_context->engine[engine->id]; - - kce->initialised = true; - } - } } void i915_gem_contexts_fini(struct drm_i915_private *i915) @@ -718,9 +696,6 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, if (to->remap_slice) return false; - if (!to->engine[RCS].initialised) - return false; - if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) return false; @@ -795,11 +770,14 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) return ret; } - if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) - /* NB: If we inhibit the restore, the context is not allowed to - * die because future work may end up depending on valid address - * space. This means we must enforce that a page table load - * occur when this occurs. */ + if (i915_gem_context_is_kernel(to)) + /* + * The kernel context(s) is treated as pure scratch and is not + * expected to retain any state (as we sacrifice it during + * suspend and on resume it may be corrupted). This is ok, + * as nothing actually executes using the kernel context; it + * is purely used for flushing user contexts. + */ hw_flags = MI_RESTORE_INHIBIT; else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings) hw_flags = MI_FORCE_RESTORE; @@ -843,15 +821,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) to->remap_slice &= ~(1<engine[RCS].initialised) { - if (engine->init_context) { - ret = engine->init_context(req); - if (ret) - return ret; - } - to->engine[RCS].initialised = true; - } - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 44688e22a5c2..4bfb72f8e1cb 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -157,7 +157,6 @@ struct i915_gem_context { u32 *lrc_reg_state; u64 lrc_desc; int pin_count; - bool initialised; } engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ @@ -292,6 +291,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +struct i915_gem_context * +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); + static inline struct i915_gem_context * i915_gem_context_get(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 6d0b38189a7d..868c07a693b5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -687,6 +687,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + if (engine->default_state) + i915_gem_object_put(engine->default_state); + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) engine->context_unpin(engine, engine->i915->preempt_context); engine->context_unpin(engine, engine->i915->kernel_context); @@ -1705,6 +1708,20 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int which; + + which = 0; + for_each_engine(engine, i915, id) + if (engine->default_state) + which |= BIT(engine->uabi_class); + + return which; +} + static void print_request(struct drm_printer *m, struct drm_i915_gem_request *rq, const char *prefix) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1baaab35905c..0c93f27f36ee 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1180,7 +1180,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; u32 *cs; - int ret; GEM_BUG_ON(!ce->pin_count); @@ -1194,14 +1193,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) if (IS_ERR(cs)) return PTR_ERR(cs); - if (!ce->initialised) { - ret = engine->init_context(request); - if (ret) - return ret; - - ce->initialised = true; - } - /* Note that after this point, we have committed to using * this request as it is being used to both track the * state of engine initialisation and liveness of the @@ -2133,7 +2124,6 @@ static void execlists_init_reg_state(u32 *regs, CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine), _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | (HAS_RESOURCE_STREAMER(dev_priv) ? CTX_CTRL_RS_CTX_ENABLE : 0))); CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); @@ -2210,6 +2200,7 @@ populate_lr_context(struct i915_gem_context *ctx, struct intel_ring *ring) { void *vaddr; + u32 *regs; int ret; ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); @@ -2226,11 +2217,31 @@ populate_lr_context(struct i915_gem_context *ctx, } ctx_obj->mm.dirty = true; + if (engine->default_state) { + /* + * We only want to copy over the template context state; + * skipping over the headers reserved for GuC communication, + * leaving those as zero. + */ + const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE; + void *defaults; + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(defaults)) + return PTR_ERR(defaults); + + memcpy(vaddr + start, defaults + start, engine->context_size); + i915_gem_object_unpin_map(engine->default_state); + } + /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ - - execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, - ctx, engine, ring); + regs = vaddr + LRC_STATE_PN * PAGE_SIZE; + execlists_init_reg_state(regs, ctx, engine, ring); + if (!engine->default_state) + regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); i915_gem_object_unpin_map(ctx_obj); @@ -2283,7 +2294,6 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ce->ring = ring; ce->state = vma; - ce->initialised |= engine->init_context == NULL; return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7e2a671882fb..464dc58af27b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1384,11 +1384,34 @@ alloc_context_vma(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; struct drm_i915_gem_object *obj; struct i915_vma *vma; + int err; obj = i915_gem_object_create(i915, engine->context_size); if (IS_ERR(obj)) return ERR_CAST(obj); + if (engine->default_state) { + void *defaults, *vaddr; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(defaults)) { + err = PTR_ERR(defaults); + goto err_map; + } + + memcpy(vaddr, defaults, engine->context_size); + + i915_gem_object_unpin_map(engine->default_state); + i915_gem_object_unpin_map(obj); + } + /* * Try to make the context utilize L3 as well as LLC. * @@ -1410,10 +1433,18 @@ alloc_context_vma(struct intel_engine_cs *engine) } vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); - if (IS_ERR(vma)) - i915_gem_object_put(obj); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } return vma; + +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); + return ERR_PTR(err); } static struct intel_ring * @@ -1449,16 +1480,6 @@ intel_ring_context_pin(struct intel_engine_cs *engine, ce->state->obj->pin_global++; } - /* The kernel context is only used as a placeholder for flushing the - * active context. It is never used for submitting user rendering and - * as such never requires the golden render context, and so we can skip - * emitting it when we switch to the kernel context. This is required - * as during eviction we cannot allocate and pin the renderstate in - * order to initialise the context. - */ - if (i915_gem_context_is_kernel(ctx)) - ce->initialised = true; - i915_gem_context_get(ctx); out: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f3dbfe7ae6e4..337222859166 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -306,6 +306,7 @@ struct intel_engine_cs { struct intel_ring *buffer; struct intel_timeline *timeline; + struct drm_i915_gem_object *default_state; struct intel_render_state *render_state; atomic_t irq_count; @@ -932,6 +933,7 @@ void intel_engines_park(struct drm_i915_private *i915); void intel_engines_unpark(struct drm_i915_private *i915); void intel_engines_reset_default_submission(struct drm_i915_private *i915); +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); bool intel_engine_can_store_dword(struct intel_engine_cs *engine); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 1f7dfb22a7c2..6c02ced663f8 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -466,6 +466,21 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 +/* + * Query whether every context (both per-file default and user created) is + * isolated (insofar as HW supports). If this parameter is not true, then + * freshly created contexts may inherit values from an existing context, + * rather than default HW values. If true, it also ensures (insofar as HW + * supports) that all state set by this context will not leak to any other + * context. + * + * As not every engine across every gen support contexts, the returned + * value reports the support of context isolation for individual engines by + * returning a bitmask of each engine class set to true if that class supports + * isolation. + */ +#define I915_PARAM_HAS_CONTEXT_ISOLATION 50 + typedef struct drm_i915_getparam { __s32 param; /* From 7c2fa7faf18f80f9fbbe7fcad8072604304db5dd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 14:26:34 +0000 Subject: [PATCH 140/260] drm/i915: Stop caching the "golden" renderstate As we now record the default HW state and so only emit the "golden" renderstate once to prepare the HW, there is no advantage in keeping the renderstate batch around as it will never be used again. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem_render_state.c | 145 +++++++------------ drivers/gpu/drm/i915/i915_gem_render_state.h | 4 +- drivers/gpu/drm/i915/intel_engine_cs.c | 9 +- drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 - 7 files changed, 56 insertions(+), 111 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0be9a918697a..40012b6daea2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -67,7 +67,6 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_render_state.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 3703dc91eeda..c2723a06fbb4 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -26,10 +26,12 @@ */ #include "i915_drv.h" +#include "i915_gem_render_state.h" #include "intel_renderstate.h" struct intel_render_state { const struct intel_renderstate_rodata *rodata; + struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 batch_offset; u32 batch_size; @@ -40,6 +42,9 @@ struct intel_render_state { static const struct intel_renderstate_rodata * render_state_get_rodata(const struct intel_engine_cs *engine) { + if (engine->id != RCS) + return NULL; + switch (INTEL_GEN(engine->i915)) { case 6: return &gen6_null_state; @@ -74,17 +79,16 @@ static int render_state_setup(struct intel_render_state *so, struct drm_i915_private *i915) { const struct intel_renderstate_rodata *rodata = so->rodata; - struct drm_i915_gem_object *obj = so->vma->obj; unsigned int i = 0, reloc_index = 0; unsigned int needs_clflush; u32 *d; int ret; - ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); + ret = i915_gem_obj_prepare_shmem_write(so->obj, &needs_clflush); if (ret) return ret; - d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0)); + d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0)); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; @@ -112,7 +116,7 @@ static int render_state_setup(struct intel_render_state *so, goto err; } - so->batch_offset = so->vma->node.start; + so->batch_offset = i915_ggtt_offset(so->vma); so->batch_size = rodata->batch_items * sizeof(u32); while (i % CACHELINE_DWORDS) @@ -160,9 +164,9 @@ static int render_state_setup(struct intel_render_state *so, drm_clflush_virt_range(d, i * sizeof(u32)); kunmap_atomic(d); - ret = i915_gem_object_set_to_gtt_domain(obj, false); + ret = i915_gem_object_set_to_gtt_domain(so->obj, false); out: - i915_gem_obj_finish_shmem_access(obj); + i915_gem_obj_finish_shmem_access(so->obj); return ret; err: @@ -173,112 +177,61 @@ err: #undef OUT_BATCH -int i915_gem_render_state_init(struct intel_engine_cs *engine) +int i915_gem_render_state_emit(struct drm_i915_gem_request *rq) { - struct intel_render_state *so; - const struct intel_renderstate_rodata *rodata; - struct drm_i915_gem_object *obj; - int ret; + struct intel_engine_cs *engine = rq->engine; + struct intel_render_state so = {}; /* keep the compiler happy */ + int err; - if (engine->id != RCS) + so.rodata = render_state_get_rodata(engine); + if (!so.rodata) return 0; - rodata = render_state_get_rodata(engine); - if (!rodata) - return 0; - - if (rodata->batch_items * 4 > PAGE_SIZE) + if (so.rodata->batch_items * 4 > PAGE_SIZE) return -EINVAL; - so = kmalloc(sizeof(*so), GFP_KERNEL); - if (!so) - return -ENOMEM; + so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(so.obj)) + return PTR_ERR(so.obj); - obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(obj)) { - ret = PTR_ERR(obj); - goto err_free; - } - - so->vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); - if (IS_ERR(so->vma)) { - ret = PTR_ERR(so->vma); + so.vma = i915_vma_instance(so.obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(so.vma)) { + err = PTR_ERR(so.vma); goto err_obj; } - so->rodata = rodata; - engine->render_state = so; - return 0; + err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err_vma; -err_obj: - i915_gem_object_put(obj); -err_free: - kfree(so); - return ret; -} + err = render_state_setup(&so, rq->i915); + if (err) + goto err_unpin; -int i915_gem_render_state_emit(struct drm_i915_gem_request *req) -{ - struct intel_render_state *so; - int ret; + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto err_unpin; - lockdep_assert_held(&req->i915->drm.struct_mutex); + err = engine->emit_bb_start(rq, + so.batch_offset, so.batch_size, + I915_DISPATCH_SECURE); + if (err) + goto err_unpin; - so = req->engine->render_state; - if (!so) - return 0; - - /* Recreate the page after shrinking */ - if (!i915_gem_object_has_pages(so->vma->obj)) - so->batch_offset = -1; - - ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (ret) - return ret; - - if (so->vma->node.start != so->batch_offset) { - ret = render_state_setup(so, req->i915); - if (ret) + if (so.aux_size > 8) { + err = engine->emit_bb_start(rq, + so.aux_offset, so.aux_size, + I915_DISPATCH_SECURE); + if (err) goto err_unpin; } - ret = req->engine->emit_flush(req, EMIT_INVALIDATE); - if (ret) - goto err_unpin; - - ret = req->engine->emit_bb_start(req, - so->batch_offset, so->batch_size, - I915_DISPATCH_SECURE); - if (ret) - goto err_unpin; - - if (so->aux_size > 8) { - ret = req->engine->emit_bb_start(req, - so->aux_offset, so->aux_size, - I915_DISPATCH_SECURE); - if (ret) - goto err_unpin; - } - - i915_vma_move_to_active(so->vma, req, 0); + i915_vma_move_to_active(so.vma, rq, 0); err_unpin: - i915_vma_unpin(so->vma); - return ret; -} - -void i915_gem_render_state_fini(struct intel_engine_cs *engine) -{ - struct intel_render_state *so; - struct drm_i915_gem_object *obj; - - so = fetch_and_zero(&engine->render_state); - if (!so) - return; - - obj = so->vma->obj; - - i915_vma_close(so->vma); - __i915_gem_object_release_unless_active(obj); - - kfree(so); + i915_vma_unpin(so.vma); +err_vma: + i915_vma_close(so.vma); +err_obj: + __i915_gem_object_release_unless_active(so.obj); + return err; } diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 87481845799d..86369520482e 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -26,8 +26,6 @@ struct drm_i915_gem_request; -int i915_gem_render_state_init(struct intel_engine_cs *engine); -int i915_gem_render_state_emit(struct drm_i915_gem_request *req); -void i915_gem_render_state_fini(struct intel_engine_cs *engine); +int i915_gem_render_state_emit(struct drm_i915_gem_request *rq); #endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 868c07a693b5..a0f9d0eb4bce 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -641,21 +641,15 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) goto err_unpin_preempt; - ret = i915_gem_render_state_init(engine); - if (ret) - goto err_breadcrumbs; - if (HWS_NEEDS_PHYSICAL(engine->i915)) ret = init_phys_status_page(engine); else ret = init_status_page(engine); if (ret) - goto err_rs_fini; + goto err_breadcrumbs; return 0; -err_rs_fini: - i915_gem_render_state_fini(engine); err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: @@ -682,7 +676,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) else cleanup_status_page(engine); - i915_gem_render_state_fini(engine); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0c93f27f36ee..58d050a9a866 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -136,6 +136,7 @@ #include #include #include "i915_drv.h" +#include "i915_gem_render_state.h" #include "intel_mocs.h" #define RING_EXECLIST_QFULL (1 << 0x2) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 464dc58af27b..3321b801e77d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -28,9 +28,12 @@ */ #include + #include -#include "i915_drv.h" #include + +#include "i915_drv.h" +#include "i915_gem_render_state.h" #include "i915_trace.h" #include "intel_drv.h" diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 337222859166..ef22c994038b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -165,7 +165,6 @@ struct i915_ctx_workarounds { }; struct drm_i915_gem_request; -struct intel_render_state; /* * Engine IDs definitions. @@ -307,7 +306,6 @@ struct intel_engine_cs { struct intel_timeline *timeline; struct drm_i915_gem_object *default_state; - struct intel_render_state *render_state; atomic_t irq_count; unsigned long irq_posted; From a0a8b1cf93383592fdfa86588beb8dcc4e32f036 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Nov 2017 14:06:44 +0000 Subject: [PATCH 141/260] drm/i915: Kerneldoc typo s/rps/rps_client/ Update the kerneldoc parameter name to match the real parameter name. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171109140644.10805-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4bf118304086..a2e5a54128c1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -538,7 +538,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, * @obj: i915 gem object * @flags: how to wait (under a lock, for all rendering or just for writes etc) * @timeout: how long to wait - * @rps: client (user process) to charge for any waitboosting + * @rps_client: client (user process) to charge for any waitboosting */ int i915_gem_object_wait(struct drm_i915_gem_object *obj, From 9c52d1c816baa5b8c97485b20d95af29c98d26ee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 23:24:47 +0000 Subject: [PATCH 142/260] drm/i915/selftests: Yet another forgotten mock_i915->mm initialiser Move all of the i915->mm initialisation to a private function that can be reused by the mock i915 device to save forgetting any more steps. For example, <7>[ 1542.046332] [IGT] drv_selftest: starting subtest mock_objects <4>[ 1542.123924] Setting dangerous option mock_selftests - tainting kernel <6>[ 1542.167941] i915: Performing mock selftests with st_random_seed=0x246f5ab5 st_timeout=1000 <4>[ 1542.178012] INFO: trying to register non-static key. <4>[ 1542.178027] the code is fine but needs lockdep annotation. <4>[ 1542.178032] turning off the locking correctness validator. <4>[ 1542.178041] CPU: 3 PID: 6008 Comm: kworker/3:7 Tainted: G U 4.14.0-rc8-CI-CI_DRM_3332+ #1 <4>[ 1542.178049] Hardware name: /NUC6CAYB, BIOS AYAPLCEL.86A.0040.2017.0619.1722 06/19/2017 <4>[ 1542.178144] Workqueue: events __i915_gem_free_work [i915] <4>[ 1542.178152] Call Trace: <4>[ 1542.178163] dump_stack+0x68/0x9f <4>[ 1542.178170] register_lock_class+0x3fd/0x580 <4>[ 1542.178177] ? unwind_next_frame+0x14/0x20 <4>[ 1542.178184] ? __save_stack_trace+0x73/0xd0 <4>[ 1542.178191] __lock_acquire+0xa4/0x1b00 <4>[ 1542.178254] ? __i915_gem_free_work+0x28/0xa0 [i915] <4>[ 1542.178261] ? __lock_acquire+0x4ab/0x1b00 <4>[ 1542.178268] lock_acquire+0xb0/0x200 <4>[ 1542.178273] ? lock_acquire+0xb0/0x200 <4>[ 1542.178336] ? __i915_gem_free_work+0x28/0xa0 [i915] <4>[ 1542.178344] _raw_spin_lock+0x32/0x50 <4>[ 1542.178405] ? __i915_gem_free_work+0x28/0xa0 [i915] <4>[ 1542.178468] __i915_gem_free_work+0x28/0xa0 [i915] <4>[ 1542.178476] process_one_work+0x221/0x650 <4>[ 1542.178483] worker_thread+0x4e/0x3c0 <4>[ 1542.178489] kthread+0x114/0x150 <4>[ 1542.178494] ? process_one_work+0x650/0x650 <4>[ 1542.178499] ? kthread_create_on_node+0x40/0x40 <4>[ 1542.178506] ret_from_fork+0x27/0x40 v2: Fish out i915->mm.object_stat_lock which was being inited over in i915_drv.c (Matthew) Signed-off-by: Chris Wilson Cc: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171110232447.21618-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_drv.c | 1 - drivers/gpu/drm/i915/i915_gem.c | 26 ++++++++++++------- .../gpu/drm/i915/selftests/mock_gem_device.c | 8 +----- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d97fe9c9439a..f8600cc96f2a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -894,7 +894,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, mutex_init(&dev_priv->backlight_lock); spin_lock_init(&dev_priv->uncore.lock); - spin_lock_init(&dev_priv->mm.object_stat_lock); mutex_init(&dev_priv->sb_lock); mutex_init(&dev_priv->modeset_restore_lock); mutex_init(&dev_priv->av_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a2e5a54128c1..82e6e69c222c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5218,6 +5218,22 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv) i915_gem_detect_bit_6_swizzle(dev_priv); } +static void i915_gem_init__mm(struct drm_i915_private *i915) +{ + spin_lock_init(&i915->mm.object_stat_lock); + spin_lock_init(&i915->mm.obj_lock); + spin_lock_init(&i915->mm.free_lock); + + init_llist_head(&i915->mm.free_list); + + INIT_LIST_HEAD(&i915->mm.unbound_list); + INIT_LIST_HEAD(&i915->mm.bound_list); + INIT_LIST_HEAD(&i915->mm.fence_list); + INIT_LIST_HEAD(&i915->mm.userfault_list); + + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); +} + int i915_gem_load_init(struct drm_i915_private *dev_priv) { @@ -5259,15 +5275,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (err) goto err_priorities; - INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); - - spin_lock_init(&dev_priv->mm.obj_lock); - spin_lock_init(&dev_priv->mm.free_lock); - init_llist_head(&dev_priv->mm.free_list); - INIT_LIST_HEAD(&dev_priv->mm.unbound_list); - INIT_LIST_HEAD(&dev_priv->mm.bound_list); - INIT_LIST_HEAD(&dev_priv->mm.fence_list); - INIT_LIST_HEAD(&dev_priv->mm.userfault_list); + i915_gem_init__mm(dev_priv); INIT_DELAYED_WORK(&dev_priv->gt.retire_work, i915_gem_retire_work_handler); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index d2bf2729c331..80f152aaedf9 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -179,9 +179,8 @@ struct drm_i915_private *mock_gem_device(void) I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_2M; - spin_lock_init(&i915->mm.object_stat_lock); - spin_lock_init(&i915->mm.obj_lock); mock_uncore_init(i915); + i915_gem_init__mm(i915); init_waitqueue_head(&i915->gpu_error.wait_queue); init_waitqueue_head(&i915->gpu_error.reset_queue); @@ -190,11 +189,6 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->wq) goto put_device; - INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); - init_llist_head(&i915->mm.free_list); - INIT_LIST_HEAD(&i915->mm.unbound_list); - INIT_LIST_HEAD(&i915->mm.bound_list); - mock_init_contexts(i915); INIT_DELAYED_WORK(&i915->gt.retire_work, mock_retire_work_handler); From 556fe36d09da5f82879e92bafa0371b4b79f7d6f Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 10 Nov 2017 12:34:53 +0100 Subject: [PATCH 143/260] drm/i915: Update watermark state correctly in sanitize_watermarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We no longer use intel_crtc->wm.active for watermarks any more, which was incorrect. But this uncovered a bug in sanitize_watermarks(), which meant that we wrote the correct watermarks, but the next update would still use the wrong hw watermarks for calculating. This caused all further updates to fail with -EINVAL and the log would reveal an error like the one below: [ 10.043902] [drm:ilk_validate_wm_level.part.8 [i915]] Sprite WM0 too large 56 (max 0) [ 10.043960] [drm:ilk_validate_pipe_wm [i915]] LP0 watermark invalid [ 10.044030] [drm:intel_crtc_atomic_check [i915]] No valid intermediate pipe watermarks are possible Signed-off-by: Maarten Lankhorst Fixes: b6b178a77210 ("drm/i915: Calculate ironlake intermediate watermarks correctly, v2.") Cc: stable@vger.kernel.org #v4.8+ Link: https://patchwork.freedesktop.org/patch/msgid/20171110113503.16253-1-maarten.lankhorst@linux.intel.com Signed-off-by: Maarten Lankhorst Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5c7540f3f5dc..63c8b9ccb31e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14443,6 +14443,8 @@ retry: cs->wm.need_postvbl_update = true; dev_priv->display.optimize_watermarks(intel_state, cs); + + to_intel_crtc_state(crtc->state)->wm = cs->wm; } put_state: From 37cd33006d02d84e08caedea2db48662c18e499e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Nov 2017 11:27:38 +0000 Subject: [PATCH 144/260] drm/i915: Remove redundant intel_autoenable_gt_powersave() Now that we always execute a context switch upon module load, there is no need to queue a delayed task for doing so. The purpose of the delayed task is to enable GT powersaving, for which we need the HW state to be valid (i.e. having loaded a context and initialised basic state). We used to defer this operation as historically it was slow (due to slow register polling, fixed with commit 1758b90e38f5 ("drm/i915: Use a hybrid scheme for fast register waits")) but now we have a requirement to save the default HW state. v2: Load the kernel context (to provide the power context) upon resume. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171112112738.1463-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 11 +----- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 31 +++++++++++---- drivers/gpu/drm/i915/intel_drv.h | 1 - drivers/gpu/drm/i915/intel_pm.c | 66 -------------------------------- 5 files changed, 24 insertions(+), 86 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f8600cc96f2a..9df7b5d59a94 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1683,8 +1683,6 @@ static int i915_drm_resume(struct drm_device *dev) intel_csr_ucode_resume(dev_priv); - i915_gem_resume(dev_priv); - i915_restore_state(dev_priv); intel_pps_unlock_regs_wa(dev_priv); intel_opregion_setup(dev_priv); @@ -1705,12 +1703,7 @@ static int i915_drm_resume(struct drm_device *dev) drm_mode_config_reset(dev); - mutex_lock(&dev->struct_mutex); - if (i915_gem_init_hw(dev_priv)) { - DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); - i915_gem_set_wedged(dev_priv); - } - mutex_unlock(&dev->struct_mutex); + i915_gem_resume(dev_priv); intel_guc_resume(dev_priv); @@ -1745,8 +1738,6 @@ static int i915_drm_resume(struct drm_device *dev) intel_opregion_notify_adapter(dev_priv, PCI_D0); - intel_autoenable_gt_powersave(dev_priv); - enable_rpm_wakeref_asserts(dev_priv); return 0; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 40012b6daea2..07b9e78bbe96 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1392,7 +1392,6 @@ struct intel_gen6_power_mgmt { struct intel_rps rps; struct intel_rc6 rc6; struct intel_llc_pstate llc_pstate; - struct delayed_work autoenable_work; }; /* defined intel_pm.c */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 82e6e69c222c..fa9e5130c986 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4806,23 +4806,38 @@ err_unlock: return ret; } -void i915_gem_resume(struct drm_i915_private *dev_priv) +void i915_gem_resume(struct drm_i915_private *i915) { - struct drm_device *dev = &dev_priv->drm; + WARN_ON(i915->gt.awake); - WARN_ON(dev_priv->gt.awake); + mutex_lock(&i915->drm.struct_mutex); + intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); - mutex_lock(&dev->struct_mutex); - i915_gem_restore_gtt_mappings(dev_priv); - i915_gem_restore_fences(dev_priv); + i915_gem_restore_gtt_mappings(i915); + i915_gem_restore_fences(i915); /* As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ - dev_priv->gt.resume(dev_priv); + i915->gt.resume(i915); - mutex_unlock(&dev->struct_mutex); + if (i915_gem_init_hw(i915)) + goto err_wedged; + + /* Always reload a context for powersaving. */ + if (i915_gem_switch_to_kernel_context(i915)) + goto err_wedged; + +out_unlock: + intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); + mutex_unlock(&i915->drm.struct_mutex); + return; + +err_wedged: + DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); + i915_gem_set_wedged(i915); + goto out_unlock; } void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a0a5cead868c..10bec8796239 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1880,7 +1880,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv); void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); -void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 48a127ac6de5..c1a56809f143 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7959,8 +7959,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) rps->boost_freq = rps->max_freq; mutex_unlock(&dev_priv->pcu_lock); - - intel_autoenable_gt_powersave(dev_priv); } void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) @@ -7985,9 +7983,6 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) < 6) return; - if (cancel_delayed_work_sync(&dev_priv->gt_pm.autoenable_work)) - intel_runtime_pm_put(dev_priv); - /* gen6_rps_idle() will be called later to disable interrupts */ } @@ -8146,65 +8141,6 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->pcu_lock); } -static void __intel_autoenable_gt_powersave(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, - typeof(*dev_priv), - gt_pm.autoenable_work.work); - struct intel_engine_cs *rcs; - struct drm_i915_gem_request *req; - - rcs = dev_priv->engine[RCS]; - if (rcs->last_retired_context) - goto out; - - if (!rcs->init_context) - goto out; - - mutex_lock(&dev_priv->drm.struct_mutex); - - req = i915_gem_request_alloc(rcs, dev_priv->kernel_context); - if (IS_ERR(req)) - goto unlock; - - if (!i915_modparams.enable_execlists && i915_switch_context(req) == 0) - rcs->init_context(req); - - /* Mark the device busy, calling intel_enable_gt_powersave() */ - i915_add_request(req); - -unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); -out: - intel_runtime_pm_put(dev_priv); -} - -void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) -{ - if (IS_IRONLAKE_M(dev_priv)) { - ironlake_enable_drps(dev_priv); - intel_init_emon(dev_priv); - } else if (INTEL_INFO(dev_priv)->gen >= 6) { - /* - * PCU communication is slow and this doesn't need to be - * done at any specific time, so do this out of our fast path - * to make resume and init faster. - * - * We depend on the HW RC6 power context save/restore - * mechanism when entering D3 through runtime PM suspend. So - * disable RPM until RPS/RC6 is properly setup. We can only - * get here via the driver load/system resume/runtime resume - * paths, so the _noresume version is enough (and in case of - * runtime resume it's necessary). - */ - if (queue_delayed_work(dev_priv->wq, - &dev_priv->gt_pm.autoenable_work, - round_jiffies_up_relative(HZ))) - intel_runtime_pm_get_noresume(dev_priv); - } -} - static void ibx_init_clock_gating(struct drm_i915_private *dev_priv) { /* @@ -9435,8 +9371,6 @@ void intel_pm_setup(struct drm_i915_private *dev_priv) { mutex_init(&dev_priv->pcu_lock); - INIT_DELAYED_WORK(&dev_priv->gt_pm.autoenable_work, - __intel_autoenable_gt_powersave); atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); dev_priv->runtime_pm.suspended = false; From 1b790cd9bfa8dc897710b5cdc24895cfda3d5f09 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 8 Nov 2017 15:59:44 -0800 Subject: [PATCH 145/260] drm/i915: Remove Gen9 WAs with no effect GEN8_CONFIG0 (0xD00) is a protected by a lock (bit 31) which is set by the BIOS, so there is no way we can enable the three chicken bits mandated by the WA (the BIOS should be doing it instead). v2: Rebased v3: Standalone patch References: b033bb6d5d3a ("drm/i915/gen9: Enable must set chicken bits in config0 reg") Signed-off-by: Oscar Mateo Cc: Chris Wilson Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/1510185589-9100-2-git-send-email-oscar.mateo@intel.com Reviewed-by: Mika Kuoppala Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_reg.h | 3 --- drivers/gpu/drm/i915/intel_pm.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6ef33422f762..4e92db28af9c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -355,9 +355,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ECOCHK_PPGTT_WT_HSW (0x2<<3) #define ECOCHK_PPGTT_WB_HSW (0x3<<3) -#define GEN8_CONFIG0 _MMIO(0xD00) -#define GEN9_DEFAULT_FIXES (1 << 3 | 1 << 2 | 1 << 1) - #define GAC_ECO_BITS _MMIO(0x14090) #define ECOBITS_SNB_BIT (1<<13) #define ECOBITS_PPGTT_CACHE64B (3<<8) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c1a56809f143..8e7f02e6008a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -75,9 +75,6 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(CHICKEN_PAR1_1, I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); - I915_WRITE(GEN8_CONFIG0, - I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); - /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */ I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); From f9bab55ee60d6c885eb30dab1f20827053e23355 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 10 Nov 2017 12:34:54 +0100 Subject: [PATCH 146/260] drm/i915: Remove bogus ips_enabled check. The flag just tells us IPS can be enabled, if the primary plane is not enabled it means IPS might not be. This never triggered in CI because we don't have a haswell ULT there, but can be reproduced easily with kms_atomic_transitions.plane-all-modeset-transition Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171110113503.16253-2-maarten.lankhorst@linux.intel.com [mlankhorst: Remove from haswell_get_pipe_config too. (danvet)] Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 63c8b9ccb31e..2ecdd20ea772 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9233,10 +9233,6 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, ironlake_get_pfit_config(crtc, pipe_config); } - if (IS_HASWELL(dev_priv)) - pipe_config->ips_enabled = hsw_crtc_supports_ips(crtc) && - (I915_READ(IPS_CTL) & IPS_ENABLE); - if (pipe_config->cpu_transcoder != TRANSCODER_EDP && !transcoder_is_dsi(pipe_config->cpu_transcoder)) { pipe_config->pixel_multiplier = @@ -11259,10 +11255,6 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_CLOCK_FUZZY(pixel_rate); } - /* BDW+ don't expose a synchronous way to read the state */ - if (IS_HASWELL(dev_priv)) - PIPE_CONF_CHECK_I(ips_enabled); - PIPE_CONF_CHECK_I(double_wide); PIPE_CONF_CHECK_P(shared_dpll); From d640bf7986cf49ef99aee72b6bb1433d6527f687 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 10 Nov 2017 12:34:55 +0100 Subject: [PATCH 147/260] drm/i915: Check boolean options in intel_pipe_config_compare with its own macro Add PIPE_CONF_CHECK_BOOL for boolean options, which are printed with yesno. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171110113503.16253-3-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 30 ++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2ecdd20ea772..f56e7d29ed69 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11092,6 +11092,15 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, ret = false; \ } +#define PIPE_CONF_CHECK_BOOL(name) \ + if (current_config->name != pipe_config->name) { \ + pipe_config_err(adjust, __stringify(name), \ + "(expected %s, found %s)\n", \ + yesno(current_config->name), \ + yesno(pipe_config->name)); \ + ret = false; \ + } + #define PIPE_CONF_CHECK_P(name) \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ @@ -11177,7 +11186,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_I(cpu_transcoder); - PIPE_CONF_CHECK_I(has_pch_encoder); + PIPE_CONF_CHECK_BOOL(has_pch_encoder); PIPE_CONF_CHECK_I(fdi_lanes); PIPE_CONF_CHECK_M_N(fdi_m_n); @@ -11209,17 +11218,17 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vsync_end); PIPE_CONF_CHECK_I(pixel_multiplier); - PIPE_CONF_CHECK_I(has_hdmi_sink); + PIPE_CONF_CHECK_BOOL(has_hdmi_sink); if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - PIPE_CONF_CHECK_I(limited_color_range); + PIPE_CONF_CHECK_BOOL(limited_color_range); - PIPE_CONF_CHECK_I(hdmi_scrambling); - PIPE_CONF_CHECK_I(hdmi_high_tmds_clock_ratio); - PIPE_CONF_CHECK_I(has_infoframe); - PIPE_CONF_CHECK_I(ycbcr420); + PIPE_CONF_CHECK_BOOL(hdmi_scrambling); + PIPE_CONF_CHECK_BOOL(hdmi_high_tmds_clock_ratio); + PIPE_CONF_CHECK_BOOL(has_infoframe); + PIPE_CONF_CHECK_BOOL(ycbcr420); - PIPE_CONF_CHECK_I(has_audio); + PIPE_CONF_CHECK_BOOL(has_audio); PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags, DRM_MODE_FLAG_INTERLACE); @@ -11245,7 +11254,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_I(pipe_src_w); PIPE_CONF_CHECK_I(pipe_src_h); - PIPE_CONF_CHECK_I(pch_pfit.enabled); + PIPE_CONF_CHECK_BOOL(pch_pfit.enabled); if (current_config->pch_pfit.enabled) { PIPE_CONF_CHECK_X(pch_pfit.pos); PIPE_CONF_CHECK_X(pch_pfit.size); @@ -11255,7 +11264,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_CLOCK_FUZZY(pixel_rate); } - PIPE_CONF_CHECK_I(double_wide); + PIPE_CONF_CHECK_BOOL(double_wide); PIPE_CONF_CHECK_P(shared_dpll); PIPE_CONF_CHECK_X(dpll_hw_state.dpll); @@ -11293,6 +11302,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I +#undef PIPE_CONF_CHECK_BOOL #undef PIPE_CONF_CHECK_P #undef PIPE_CONF_CHECK_FLAGS #undef PIPE_CONF_CHECK_CLOCK_FUZZY From 4493e098d71996bde2f2fbee8a3d92af21e9bcc8 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 10 Nov 2017 12:34:56 +0100 Subject: [PATCH 148/260] drm/i915: Handle adjust better in intel_pipe_config_compare Some parameters use CHECK_BOOL, but should really use CHECK_BOOL_INCOMPLETE. We cannot currently check whether the inherited infoframes and audio are set up correctly. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171110113503.16253-4-maarten.lankhorst@linux.intel.com [mlankhorst: Add danvet's comment about why this is needed.] Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f56e7d29ed69..0ebf3f283b87 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11073,6 +11073,9 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, bool adjust) { bool ret = true; + bool fixup_inherited = adjust && + (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && + !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); #define PIPE_CONF_CHECK_X(name) \ if (current_config->name != pipe_config->name) { \ @@ -11101,6 +11104,22 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, ret = false; \ } +/* + * Checks state where we only read out the enabling, but not the entire + * state itself (like full infoframes or ELD for audio). These states + * require a full modeset on bootup to fix up. + */ +#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) \ + if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \ + PIPE_CONF_CHECK_BOOL(name); \ + } else { \ + pipe_config_err(adjust, __stringify(name), \ + "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)\n", \ + yesno(current_config->name), \ + yesno(pipe_config->name)); \ + ret = false; \ + } + #define PIPE_CONF_CHECK_P(name) \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ @@ -11225,10 +11244,10 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_BOOL(hdmi_scrambling); PIPE_CONF_CHECK_BOOL(hdmi_high_tmds_clock_ratio); - PIPE_CONF_CHECK_BOOL(has_infoframe); + PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_infoframe); PIPE_CONF_CHECK_BOOL(ycbcr420); - PIPE_CONF_CHECK_BOOL(has_audio); + PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_audio); PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags, DRM_MODE_FLAG_INTERLACE); @@ -11303,6 +11322,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I #undef PIPE_CONF_CHECK_BOOL +#undef PIPE_CONF_CHECK_BOOL_INCOMPLETE #undef PIPE_CONF_CHECK_P #undef PIPE_CONF_CHECK_FLAGS #undef PIPE_CONF_CHECK_CLOCK_FUZZY From 10bf0a38c4084c14c57df5d635a5ba0867a3c260 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 10 Nov 2017 12:34:58 +0100 Subject: [PATCH 149/260] drm/i915: Handle locking better in i915_sink_crc. Lock the bare minimum, instead of the entire world, and use interruptible locking because we can. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171110113503.16253-6-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 46 ++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 533ba096b9a6..cb77aeab4ee2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2735,39 +2735,63 @@ static int i915_sink_crc(struct seq_file *m, void *data) struct intel_connector *connector; struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp = NULL; + struct drm_modeset_acquire_ctx ctx; int ret; u8 crc[6]; - drm_modeset_lock_all(dev); + drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE); + drm_connector_list_iter_begin(dev, &conn_iter); + for_each_intel_connector_iter(connector, &conn_iter) { struct drm_crtc *crtc; - - if (!connector->base.state->best_encoder) - continue; - - crtc = connector->base.state->crtc; - if (!crtc->state->active) - continue; + struct drm_connector_state *state; if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) continue; - intel_dp = enc_to_intel_dp(connector->base.state->best_encoder); +retry: + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, &ctx); + if (ret) + goto err; + + state = connector->base.state; + if (!state->best_encoder) + continue; + + crtc = state->crtc; + ret = drm_modeset_lock(&crtc->mutex, &ctx); + if (ret) + goto err; + + if (!crtc->state->active) + continue; + + intel_dp = enc_to_intel_dp(state->best_encoder); ret = intel_dp_sink_crc(intel_dp, crc); if (ret) - goto out; + goto err; seq_printf(m, "%02x%02x%02x%02x%02x%02x\n", crc[0], crc[1], crc[2], crc[3], crc[4], crc[5]); goto out; + +err: + if (ret == -EDEADLK) { + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry; + } + goto out; } ret = -ENODEV; out: drm_connector_list_iter_end(&conn_iter); - drm_modeset_unlock_all(dev); + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + return ret; } From a54b19f17746b9574c30acb8cefca75ed5de0f9f Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 10 Nov 2017 19:08:39 +0000 Subject: [PATCH 150/260] drm/i915/perf: complete whitelisting for OA programming on HSW We were missing some registers and also can name one for which we only had the offset. Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171110190845.32574-2-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 3 ++- drivers/gpu/drm/i915/i915_reg.h | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 59ee808f8fd9..45aef15b9e7c 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3023,7 +3023,8 @@ static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) { return gen7_is_valid_mux_addr(dev_priv, addr) || (addr >= 0x25100 && addr <= 0x2FF90) || - addr == 0x9ec0; + (addr >= HSW_MBVID2_NOA0.reg && addr <= HSW_MBVID2_NOA9.reg) || + addr == HSW_MBVID2_MISR0.reg; } static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4e92db28af9c..0f09ba2e823b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1117,6 +1117,20 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) /* RPC unit config (Gen8+) */ #define RPM_CONFIG _MMIO(0x0D08) +/* NOA (HSW) */ +#define HSW_MBVID2_NOA0 _MMIO(0x9E80) +#define HSW_MBVID2_NOA1 _MMIO(0x9E84) +#define HSW_MBVID2_NOA2 _MMIO(0x9E88) +#define HSW_MBVID2_NOA3 _MMIO(0x9E8C) +#define HSW_MBVID2_NOA4 _MMIO(0x9E90) +#define HSW_MBVID2_NOA5 _MMIO(0x9E94) +#define HSW_MBVID2_NOA6 _MMIO(0x9E98) +#define HSW_MBVID2_NOA7 _MMIO(0x9E9C) +#define HSW_MBVID2_NOA8 _MMIO(0x9EA0) +#define HSW_MBVID2_NOA9 _MMIO(0x9EA4) + +#define HSW_MBVID2_MISR0 _MMIO(0x9EC0) + /* NOA (Gen8+) */ #define NOA_CONFIG(i) _MMIO(0x0D0C + (i) * 4) From 4407eaa9b0ccbc55408884475301f02280aec402 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 10 Nov 2017 19:08:40 +0000 Subject: [PATCH 151/260] drm/i915/perf: add support for Coffeelake GT3 We can enable GT3 as well as GT2. Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171110190845.32574-3-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_oa_cflgt3.c | 109 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_oa_cflgt3.h | 34 ++++++++ drivers/gpu/drm/i915/i915_perf.c | 3 + 5 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/i915_oa_cflgt3.c create mode 100644 drivers/gpu/drm/i915/i915_oa_cflgt3.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 1bbc5440db40..3c419455b0af 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -162,7 +162,8 @@ i915-y += i915_perf.o \ i915_oa_kblgt2.o \ i915_oa_kblgt3.o \ i915_oa_glk.o \ - i915_oa_cflgt2.o + i915_oa_cflgt2.o \ + i915_oa_cflgt3.o ifeq ($(CONFIG_DRM_I915_GVT),y) i915-y += intel_gvt.o diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 07b9e78bbe96..d511bf948cd6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3057,6 +3057,8 @@ intel_info(const struct drm_i915_private *dev_priv) (INTEL_DEVID(dev_priv) & 0x00F0) == 0x00A0) #define IS_CFL_GT2(dev_priv) (IS_COFFEELAKE(dev_priv) && \ (dev_priv)->info.gt == 2) +#define IS_CFL_GT3(dev_priv) (IS_COFFEELAKE(dev_priv) && \ + (dev_priv)->info.gt == 3) #define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support) diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt3.c b/drivers/gpu/drm/i915/i915_oa_cflgt3.c new file mode 100644 index 000000000000..42ff06fe54a3 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_cflgt3.c @@ -0,0 +1,109 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_cflgt3.h" + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x00000000 }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x00000000 }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x00000000 }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x9840), 0x00000080 }, + { _MMIO(0x9888), 0x11810000 }, + { _MMIO(0x9888), 0x07810013 }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930040 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} + +void +i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv) +{ + strncpy(dev_priv->perf.oa.test_config.uuid, + "577e8e2c-3fa0-4875-8743-3538d585e3b0", + UUID_STRING_LEN); + dev_priv->perf.oa.test_config.id = 1; + + dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + + dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + + dev_priv->perf.oa.test_config.sysfs_metric.name = "577e8e2c-3fa0-4875-8743-3538d585e3b0"; + dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + + dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; +} diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt3.h b/drivers/gpu/drm/i915/i915_oa_cflgt3.h new file mode 100644 index 000000000000..c13b5aac01b9 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_cflgt3.h @@ -0,0 +1,34 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_CFLGT3_H__ +#define __I915_OA_CFLGT3_H__ + +extern void i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 45aef15b9e7c..7271debe0417 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -207,6 +207,7 @@ #include "i915_oa_kblgt3.h" #include "i915_oa_glk.h" #include "i915_oa_cflgt2.h" +#include "i915_oa_cflgt3.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -2934,6 +2935,8 @@ void i915_perf_register(struct drm_i915_private *dev_priv) } else if (IS_COFFEELAKE(dev_priv)) { if (IS_CFL_GT2(dev_priv)) i915_perf_load_test_config_cflgt2(dev_priv); + if (IS_CFL_GT3(dev_priv)) + i915_perf_load_test_config_cflgt3(dev_priv); } if (dev_priv->perf.oa.test_config.id == 0) From ba6b7c1ab26f223dd9b19c73df7813e944444d36 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 10 Nov 2017 19:08:41 +0000 Subject: [PATCH 152/260] drm/i915/perf: refactor perf setup Gen8/9 aren't very different and we can merge some of this code. Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171110190845.32574-4-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 48 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 7271debe0417..802928c54f06 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3423,41 +3423,46 @@ void i915_perf_init(struct drm_i915_private *dev_priv) * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ - dev_priv->perf.oa.ops.is_valid_b_counter_reg = - gen7_is_valid_b_counter_addr; - dev_priv->perf.oa.ops.is_valid_mux_reg = - gen8_is_valid_mux_addr; - dev_priv->perf.oa.ops.is_valid_flex_reg = - gen8_is_valid_flex_addr; + dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer; - dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set; dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable; dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable; dev_priv->perf.oa.ops.read = gen8_oa_read; dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; - dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; + if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv)) { + dev_priv->perf.oa.ops.is_valid_b_counter_reg = + gen7_is_valid_b_counter_addr; + dev_priv->perf.oa.ops.is_valid_mux_reg = + gen8_is_valid_mux_addr; + dev_priv->perf.oa.ops.is_valid_flex_reg = + gen8_is_valid_flex_addr; - if (IS_GEN8(dev_priv)) { - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; - - dev_priv->perf.oa.timestamp_frequency = 12500000; - - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); if (IS_CHERRYVIEW(dev_priv)) { dev_priv->perf.oa.ops.is_valid_mux_reg = chv_is_valid_mux_addr; } - } else if (IS_GEN9(dev_priv)) { - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); + dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; + dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set; + + if (IS_GEN8(dev_priv)) { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; + + dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); + } else { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + + dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); + } switch (dev_priv->info.platform) { + case INTEL_BROADWELL: + dev_priv->perf.oa.timestamp_frequency = 12500000; + break; case INTEL_BROXTON: case INTEL_GEMINILAKE: dev_priv->perf.oa.timestamp_frequency = 19200000; @@ -3468,9 +3473,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.timestamp_frequency = 12000000; break; default: - /* Leave timestamp_frequency to 0 so we can - * detect unsupported platforms. - */ break; } } From 5888576b0b5fedaaee278879b98b66c21984c4e6 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 10 Nov 2017 19:08:42 +0000 Subject: [PATCH 153/260] drm/i915: fix register naming This name was added with the whitelisting of registers for building up OA configs. It is contained in a range gen8 whitelist : addr >= RPM_CONFIG0.reg && addr <= NOA_CONFIG(8).reg Hence why the name isn't used anywhere. v2: Fix register name again RPC->RCP (Matthew) Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171110190845.32574-5-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0f09ba2e823b..92f0c497fac8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1114,8 +1114,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RPM_CONFIG0 _MMIO(0x0D00) #define RPM_CONFIG1 _MMIO(0x0D04) -/* RPC unit config (Gen8+) */ -#define RPM_CONFIG _MMIO(0x0D08) +/* RCP unit config (Gen8+) */ +#define RCP_CONFIG _MMIO(0x0D08) /* NOA (HSW) */ #define HSW_MBVID2_NOA0 _MMIO(0x9E80) From 95690a02fb5d963e62aa16c3796af3dde01f63c9 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 10 Nov 2017 19:08:43 +0000 Subject: [PATCH 154/260] drm/i915/perf: enable perf support on CNL This adds new registers to the whitelist to configs emitted from userspace. Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171110190845.32574-6-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/i915_oa_cnl.c | 121 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_oa_cnl.h | 34 ++++++++ drivers/gpu/drm/i915/i915_perf.c | 41 +++++++++- drivers/gpu/drm/i915/i915_reg.h | 5 ++ 5 files changed, 202 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_oa_cnl.c create mode 100644 drivers/gpu/drm/i915/i915_oa_cnl.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 3c419455b0af..f7afd44214b5 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -163,7 +163,8 @@ i915-y += i915_perf.o \ i915_oa_kblgt3.o \ i915_oa_glk.o \ i915_oa_cflgt2.o \ - i915_oa_cflgt3.o + i915_oa_cflgt3.o \ + i915_oa_cnl.o ifeq ($(CONFIG_DRM_I915_GVT),y) i915-y += intel_gvt.o diff --git a/drivers/gpu/drm/i915/i915_oa_cnl.c b/drivers/gpu/drm/i915/i915_oa_cnl.c new file mode 100644 index 000000000000..ff0ac3627cc4 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_cnl.c @@ -0,0 +1,121 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_cnl.h" + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x0000ffff }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x0000ffff }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x0000ffff }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0xd04), 0x00000200 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x17060000 }, + { _MMIO(0x9840), 0x00000000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x13034000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x07060066 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x05060000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x0f080040 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x07091000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x0f041000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x1d004000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x35000000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x49000000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x3d000000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x31000000 }, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} + +void +i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv) +{ + strncpy(dev_priv->perf.oa.test_config.uuid, + "db41edd4-d8e7-4730-ad11-b9a2d6833503", + UUID_STRING_LEN); + dev_priv->perf.oa.test_config.id = 1; + + dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + + dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + + dev_priv->perf.oa.test_config.sysfs_metric.name = "db41edd4-d8e7-4730-ad11-b9a2d6833503"; + dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + + dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; +} diff --git a/drivers/gpu/drm/i915/i915_oa_cnl.h b/drivers/gpu/drm/i915/i915_oa_cnl.h new file mode 100644 index 000000000000..fb918b131105 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_cnl.h @@ -0,0 +1,34 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_CNL_H__ +#define __I915_OA_CNL_H__ + +extern void i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 802928c54f06..00be015e01df 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -208,6 +208,7 @@ #include "i915_oa_glk.h" #include "i915_oa_cflgt2.h" #include "i915_oa_cflgt3.h" +#include "i915_oa_cnl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -1852,7 +1853,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, * be read back from automatically triggered reports, as part of the * RPT_ID field. */ - if (IS_GEN9(dev_priv)) { + if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) { I915_WRITE(GEN8_OA_DEBUG, _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); @@ -1885,6 +1886,16 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) } +static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) +{ + /* Reset all contexts' slices/subslices configurations. */ + gen8_configure_all_contexts(dev_priv, NULL, false); + + /* Make sure we disable noa to save power. */ + I915_WRITE(RPM_CONFIG1, + I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE); +} + static void gen7_oa_enable(struct drm_i915_private *dev_priv) { /* @@ -2937,6 +2948,8 @@ void i915_perf_register(struct drm_i915_private *dev_priv) i915_perf_load_test_config_cflgt2(dev_priv); if (IS_CFL_GT3(dev_priv)) i915_perf_load_test_config_cflgt3(dev_priv); + } else if (IS_CANNONLAKE(dev_priv)) { + i915_perf_load_test_config_cnl(dev_priv); } if (dev_priv->perf.oa.test_config.id == 0) @@ -3022,6 +3035,12 @@ static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) (addr >= RPM_CONFIG0.reg && addr <= NOA_CONFIG(8).reg); } +static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return gen8_is_valid_mux_addr(dev_priv, addr) || + (addr >= OA_PERFCNT3_LO.reg && addr <= OA_PERFCNT4_HI.reg); +} + static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) { return gen7_is_valid_mux_addr(dev_priv, addr) || @@ -3475,6 +3494,26 @@ void i915_perf_init(struct drm_i915_private *dev_priv) default: break; } + } else if (IS_GEN10(dev_priv)) { + dev_priv->perf.oa.ops.is_valid_b_counter_reg = + gen7_is_valid_b_counter_addr; + dev_priv->perf.oa.ops.is_valid_mux_reg = + gen10_is_valid_mux_addr; + dev_priv->perf.oa.ops.is_valid_flex_reg = + gen8_is_valid_flex_addr; + + dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; + dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set; + + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + + dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); + + /* Default frequency, although we need to read it from + * the register as it might vary between parts. + */ + dev_priv->perf.oa.timestamp_frequency = 12000000; } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 92f0c497fac8..31e4543ca942 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1106,6 +1106,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OA_PERFCNT1_HI _MMIO(0x91BC) #define OA_PERFCNT2_LO _MMIO(0x91C0) #define OA_PERFCNT2_HI _MMIO(0x91C4) +#define OA_PERFCNT3_LO _MMIO(0x91C8) +#define OA_PERFCNT3_HI _MMIO(0x91CC) +#define OA_PERFCNT4_LO _MMIO(0x91D8) +#define OA_PERFCNT4_HI _MMIO(0x91DC) #define OA_PERFMATRIX_LO _MMIO(0x91C8) #define OA_PERFMATRIX_HI _MMIO(0x91CC) @@ -1113,6 +1117,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0x0D00) #define RPM_CONFIG1 _MMIO(0x0D04) +#define GEN10_GT_NOA_ENABLE (1 << 9) /* RCP unit config (Gen8+) */ #define RCP_CONFIG _MMIO(0x0D08) From dab91783338bd3dd42638f89b5f7e34c57773207 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 10 Nov 2017 19:08:44 +0000 Subject: [PATCH 155/260] drm/i915: expose command stream timestamp frequency to userspace We use to have this fixed per generation, but starting with CNL userspace cannot tell just off the PCI ID. Let's make this information available. This is particularly useful for performance monitoring where much of the normalization work is done using those timestamps (this include pipeline statistics in both GL & Vulkan as well as OA reports). v2: Use variables for 24MHz/19.2MHz values (Ewelina) Renamed function & coding style (Sagar) v3: Fix frequency read on Broadwell (Sagar) Fix missing divide by 4 on <= gen4 (Sagar) Signed-off-by: Lionel Landwerlin Tested-by: Rafael Antognolli Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20171110190845.32574-7-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 2 + drivers/gpu/drm/i915/i915_drv.c | 3 + drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_reg.h | 21 +++++ drivers/gpu/drm/i915/intel_device_info.c | 107 +++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 6 ++ 6 files changed, 141 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index cb77aeab4ee2..82ff186108f1 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3269,6 +3269,8 @@ static int i915_engine_info(struct seq_file *m, void *unused) yesno(dev_priv->gt.awake)); seq_printf(m, "Global active requests: %d\n", dev_priv->gt.active_requests); + seq_printf(m, "CS timestamp frequency: %llu Hz\n", + dev_priv->info.cs_timestamp_frequency); p = drm_seq_file_printer(m); for_each_engine(engine, dev_priv, id) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9df7b5d59a94..42813f4247e2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -419,6 +419,9 @@ static int i915_getparam(struct drm_device *dev, void *data, if (!value) return -ENODEV; break; + case I915_PARAM_CS_TIMESTAMP_FREQUENCY: + value = INTEL_INFO(dev_priv)->cs_timestamp_frequency; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d511bf948cd6..b538df740ac3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -885,6 +885,8 @@ struct intel_device_info { /* Slice/subslice/EU info */ struct sseu_dev_info sseu; + u64 cs_timestamp_frequency; + struct color_luts { u16 degamma_lut_size; u16 gamma_lut_size; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 31e4543ca942..05e33a41fcc7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1116,9 +1116,24 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0x0D00) +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 0 +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 1 +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1 +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT) + #define RPM_CONFIG1 _MMIO(0x0D04) #define GEN10_GT_NOA_ENABLE (1 << 9) +/* GPM unit config (Gen9+) */ +#define CTC_MODE _MMIO(0xA26C) +#define CTC_SOURCE_PARAMETER_MASK 1 +#define CTC_SOURCE_CRYSTAL_CLOCK 0 +#define CTC_SOURCE_DIVIDE_LOGIC 1 +#define CTC_SHIFT_PARAMETER_SHIFT 1 +#define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT) + /* RCP unit config (Gen8+) */ #define RCP_CONFIG _MMIO(0x0D08) @@ -8863,6 +8878,12 @@ enum skl_power_gate { #define ILK_TIMESTAMP_HI _MMIO(0x70070) #define IVB_TIMESTAMP_CTR _MMIO(0x44070) +#define GEN9_TIMESTAMP_OVERRIDE _MMIO(0x44074) +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT 0 +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK 0x3ff +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT 12 +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK (0xf << 12) + #define _PIPE_FRMTMSTMP_A 0x70048 #define PIPE_FRMTMSTMP(pipe) \ _MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index db03d179fc85..78bf7374fbdd 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -329,6 +329,108 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = 0; } +static u64 read_reference_ts_freq(struct drm_i915_private *dev_priv) +{ + u32 ts_override = I915_READ(GEN9_TIMESTAMP_OVERRIDE); + u64 base_freq, frac_freq; + + base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; + base_freq *= 1000000; + + frac_freq = ((ts_override & + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); + if (frac_freq != 0) + frac_freq = 1000000 / (frac_freq + 1); + + return base_freq + frac_freq; +} + +static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv) +{ + u64 f12_5_mhz = 12500000; + u64 f19_2_mhz = 19200000; + u64 f24_mhz = 24000000; + + if (INTEL_GEN(dev_priv) <= 4) { + /* PRMs say: + * + * "The value in this register increments once every 16 + * hclks." (through the “Clocking Configuration” + * (“CLKCFG”) MCHBAR register) + */ + return (dev_priv->rawclk_freq * 1000) / 16; + } else if (INTEL_GEN(dev_priv) <= 8) { + /* PRMs say: + * + * "The PCU TSC counts 10ns increments; this timestamp + * reflects bits 38:3 of the TSC (i.e. 80ns granularity, + * rolling over every 1.5 hours). + */ + return f12_5_mhz; + } else if (INTEL_GEN(dev_priv) <= 9) { + u32 ctc_reg = I915_READ(CTC_MODE); + u64 freq = 0; + + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(dev_priv); + } else { + freq = IS_GEN9_LP(dev_priv) ? f19_2_mhz : f24_mhz; + + /* Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). + */ + freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >> + CTC_SHIFT_PARAMETER_SHIFT); + } + + return freq; + } else if (INTEL_GEN(dev_priv) <= 10) { + u32 ctc_reg = I915_READ(CTC_MODE); + u64 freq = 0; + u32 rpm_config_reg = 0; + + /* First figure out the reference frequency. There are 2 ways + * we can compute the frequency, either through the + * TIMESTAMP_OVERRIDE register or through RPM_CONFIG. CTC_MODE + * tells us which one we should use. + */ + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(dev_priv); + } else { + u32 crystal_clock; + + rpm_config_reg = I915_READ(RPM_CONFIG0); + crystal_clock = (rpm_config_reg & + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + switch (crystal_clock) { + case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + freq = f19_2_mhz; + break; + case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + freq = f24_mhz; + break; + } + } + + /* Now figure out how the command stream's timestamp register + * increments from this frequency (it might increment only + * every few clock cycle). + */ + freq >>= 3 - ((rpm_config_reg & + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + + return freq; + } + + DRM_ERROR("Unknown gen, unable to compute command stream timestamp frequency\n"); + return 0; +} + /* * Determine various intel_device_info fields at runtime. * @@ -450,6 +552,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) else if (INTEL_GEN(dev_priv) >= 10) gen10_sseu_info_init(dev_priv); + /* Initialize command stream timestamp frequency */ + info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv); + DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask); DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask)); DRM_DEBUG_DRIVER("subslice total: %u\n", @@ -465,4 +570,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->sseu.has_subslice_pg ? "y" : "n"); DRM_DEBUG_DRIVER("has EU power gating: %s\n", info->sseu.has_eu_pg ? "y" : "n"); + DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n", + info->cs_timestamp_frequency); } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6c02ced663f8..b57985929553 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -481,6 +481,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_CONTEXT_ISOLATION 50 +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP + * registers. This used to be fixed per platform but from CNL onwards, this + * might vary depending on the parts. + */ +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 + typedef struct drm_i915_getparam { __s32 param; /* From ce453b3e426f33eb56e5425f0b839f75eed621d9 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Fri, 10 Nov 2017 16:44:47 -0800 Subject: [PATCH 156/260] drm/i915: Clear per-engine fault register as early as possible From gen6, the hardware tracks address lookup failures and we should clear those registers upon startup to prevent false positives. However, this was happening before we have the engines defined (intel_uncore_init()) and the for_each_engine loop was just a nop. The earliest we can call this is inside intel_engines_init_mmio(). Suggested-by: Chris Wilson Signed-off-by: Michel Thierry Cc: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171111004448.12360-1-michel.thierry@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 ++ drivers/gpu/drm/i915/intel_uncore.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a0f9d0eb4bce..70bbe8ef8f54 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -289,6 +289,8 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv) device_info->num_rings = hweight32(mask); + i915_check_and_clear_faults(dev_priv); + return 0; cleanup: diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 211acee7c31d..a78ceafcc825 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1420,8 +1420,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) iosf_mbi_register_pmic_bus_access_notifier( &dev_priv->uncore.pmic_bus_access_nb); - - i915_check_and_clear_faults(dev_priv); } void intel_uncore_fini(struct drm_i915_private *dev_priv) From b03ec3d67ab840f674eb61a3467fd971c9552aa4 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Mon, 13 Nov 2017 09:36:28 -0800 Subject: [PATCH 157/260] drm/i915: There is only one fault register from GEN8 onwards Until Haswell/Baytrail, the hardware used to have a per engine fault register (e.g. 0x4094 - render fault register, 0x4194 - media fault register and so on). But since Broadwell, all these registers were combined into a singe one and the engine id stored in bits 14:12. Not only we should not been reading (and writing to) registers that do not exist, in platforms with VCS2 (SKL), the address that would belong this engine (0x4494, VCS2_HW = 4) is already assigned to other register. v2: use less controversial function names (Chris). v3: make non-exported functions static, remove now obsolete check for engine presence before posting_read (Chris). References: IHD-OS-BDW-Vol 2c-11.15, page 75. References: IHD-OS-SKL-Vol 2c-05.16, page 350. Signed-off-by: Michel Thierry Cc: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171113173628.11689-1-michel.thierry@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 57 ++++++++++++++++++++------- drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++-- drivers/gpu/drm/i915/i915_reg.h | 2 + 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1e40eeb31f9d..3c3a699436c9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2256,35 +2256,62 @@ static bool needs_idle_maps(struct drm_i915_private *dev_priv) return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active(); } -void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) +static void gen6_check_and_clear_faults(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - - if (INTEL_INFO(dev_priv)->gen < 6) - return; + u32 fault; for_each_engine(engine, dev_priv, id) { - u32 fault_reg; - fault_reg = I915_READ(RING_FAULT_REG(engine)); - if (fault_reg & RING_FAULT_VALID) { + fault = I915_READ(RING_FAULT_REG(engine)); + if (fault & RING_FAULT_VALID) { DRM_DEBUG_DRIVER("Unexpected fault\n" "\tAddr: 0x%08lx\n" "\tAddress space: %s\n" "\tSource ID: %d\n" "\tType: %d\n", - fault_reg & PAGE_MASK, - fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", - RING_FAULT_SRCID(fault_reg), - RING_FAULT_FAULT_TYPE(fault_reg)); + fault & PAGE_MASK, + fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); I915_WRITE(RING_FAULT_REG(engine), - fault_reg & ~RING_FAULT_VALID); + fault & ~RING_FAULT_VALID); } } - /* Engine specific init may not have been done till this point. */ - if (dev_priv->engine[RCS]) - POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); + POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); +} + +static void gen8_check_and_clear_faults(struct drm_i915_private *dev_priv) +{ + u32 fault = I915_READ(GEN8_RING_FAULT_REG); + + if (fault & RING_FAULT_VALID) { + DRM_DEBUG_DRIVER("Unexpected fault\n" + "\tAddr: 0x%08lx\n" + "\tEngine ID: %d\n" + "\tSource ID: %d\n" + "\tType: %d\n", + fault & PAGE_MASK, + GEN8_RING_FAULT_ENGINE_ID(fault), + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); + I915_WRITE(GEN8_RING_FAULT_REG, + fault & ~RING_FAULT_VALID); + } + + POSTING_READ(GEN8_RING_FAULT_REG); +} + +void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) +{ + /* From GEN8 onwards we only have one 'All Engine Fault Register' */ + if (INTEL_GEN(dev_priv) >= 8) + gen8_check_and_clear_faults(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_check_and_clear_faults(dev_priv); + else + return; } void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5c2d83a838d8..7481c8e1b5a8 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1217,11 +1217,13 @@ static void error_record_engine_registers(struct i915_gpu_state *error, if (INTEL_GEN(dev_priv) >= 6) { ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); - ee->fault_reg = I915_READ(RING_FAULT_REG(engine)); - if (INTEL_GEN(dev_priv) >= 8) + if (INTEL_GEN(dev_priv) >= 8) { gen8_record_semaphore_state(error, engine, ee); - else + ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG); + } else { gen6_record_semaphore_state(engine, ee); + ee->fault_reg = I915_READ(RING_FAULT_REG(engine)); + } } if (INTEL_GEN(dev_priv) >= 4) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 05e33a41fcc7..e52e74db4c7c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2360,6 +2360,8 @@ enum i915_power_well_id { #define ARB_MODE_SWIZZLE_BDW (1<<1) #define RENDER_HWS_PGA_GEN7 _MMIO(0x04080) #define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100*(engine)->hw_id) +#define GEN8_RING_FAULT_REG _MMIO(0x4094) +#define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7) #define RING_FAULT_GTTSEL_MASK (1<<11) #define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) #define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) From 40c7ae457cf969b2558a8a8f3827e8399e68a986 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Nov 2017 16:46:38 -0800 Subject: [PATCH 158/260] drm/i915: Fix function name in comment Commit 78597996370c (drm/i915/bxt: Fix PPS lost state after suspend breaking eDP link training) renamed the function to intel_power_sequencer_reset() but forgot to update comment. Cc: Imre Deak Signed-off-by: Lucas De Marchi Signed-off-by: Imre Deak Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20171114004638.5186-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index efe18269115f..c9c416389d0e 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -441,7 +441,7 @@ static void pps_lock(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); /* - * See vlv_power_sequencer_reset() why we need + * See intel_power_sequencer_reset() why we need * a power domain reference here. */ intel_display_power_get(dev_priv, intel_dp->aux_power_domain); From 274b2462a0493ba73957346fe5fca168e3190b53 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Tue, 14 Nov 2017 10:25:12 +0000 Subject: [PATCH 159/260] drm/i915: Object w/o backing storage is banned by -ENXIO -ENXIO should be returned when operations are banned from changing backing storage of objects without backing storage. v4: - update "Reviewed-by". (Joonas) v3: - separate this patch from "Introduce GEM proxy" patch-set. (Joonas) v2: - update the patch description and subject to just mention objects w/o backing storage, instead of "GEM proxy". (Joonas) Signed-off-by: Tina Zhang Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1510555798-21079-1-git-send-email-tina.zhang@intel.com Reviewed-by: Joonas Lahtinen Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171114102513.22269-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fa9e5130c986..a70fbfe8555e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1725,7 +1725,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, */ if (!obj->base.filp) { i915_gem_object_put(obj); - return -EINVAL; + return -ENXIO; } addr = vm_mmap(obj->base.filp, 0, args->size, From a03f395ad78f883df490234366dd4e4fc922d174 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Tue, 14 Nov 2017 10:25:13 +0000 Subject: [PATCH 160/260] drm/i915: Introduce GEM proxy GEM proxy is a kind of GEM, whose backing physical memory is pinned and produced by guest VM and is used by host as read only. With GEM proxy, host is able to access guest physical memory through GEM object interface. As GEM proxy is such a special kind of GEM, a new flag I915_GEM_OBJECT_IS_PROXY is introduced to ban host from changing the backing storage of GEM proxy. v3: - update "Reviewed-by". (Joonas) v2: - return -ENXIO when pin and map pages of GEM proxy to kernel space. (Chris) Here are the histories of this patch in "Dma-buf support for Gvt-g" patch-set: v14: - return -ENXIO when gem proxy object is banned by ioctl. (Chris) (Daniel) v13: - add comments to GEM proxy. (Chris) - don't ban GEM proxy in i915_gem_sw_finish_ioctl. (Chris) - check GEM proxy bar after finishing i915_gem_object_wait. (Chris) - remove GEM proxy bar in i915_gem_madvise_ioctl. v6: - add gem proxy barrier in the following ioctls. (Chris) i915_gem_set_caching_ioctl i915_gem_set_domain_ioctl i915_gem_sw_finish_ioctl i915_gem_set_tiling_ioctl i915_gem_madvise_ioctl Signed-off-by: Tina Zhang Reviewed-by: Joonas Lahtinen #v1 Reviewed-by: Chris Wilson Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1510555798-21079-2-git-send-email-tina.zhang@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171114102513.22269-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 31 ++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_gem_object.h | 11 +++++++-- drivers/gpu/drm/i915/i915_gem_tiling.c | 9 ++++++++ 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a70fbfe8555e..a7979b74ce21 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1619,7 +1619,19 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (err) goto out; - /* Flush and acquire obj->pages so that we are coherent through + /* + * Proxy objects do not control access to the backing storage, ergo + * they cannot be used as a means to manipulate the cache domain + * tracking for that backing storage. The proxy object is always + * considered to be outside of any cache domain. + */ + if (i915_gem_object_is_proxy(obj)) { + err = -ENXIO; + goto out; + } + + /* + * Flush and acquire obj->pages so that we are coherent through * direct access in memory with previous cached writes through * shmemfs and that our cache domain tracking remains valid. * For example, if the obj->filp was moved to swap without us @@ -1675,6 +1687,11 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT; + /* + * Proxy objects are barred from CPU access, so there is no + * need to ban sw_finish as it is a nop. + */ + /* Pinned buffers may be scanout, so flush the cache */ i915_gem_object_flush_if_display(obj); i915_gem_object_put(obj); @@ -2669,7 +2686,8 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, void *ptr; int ret; - GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); + if (unlikely(!i915_gem_object_has_struct_page(obj))) + return ERR_PTR(-ENXIO); ret = mutex_lock_interruptible(&obj->mm.lock); if (ret) @@ -3888,6 +3906,15 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT; + /* + * The caching mode of proxy object is handled by its generator, and + * not allowed to be changed by userspace. + */ + if (i915_gem_object_is_proxy(obj)) { + ret = -ENXIO; + goto out; + } + if (obj->cache_level == level) goto out; diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 63ce38c1cce9..19fb28c177d8 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -53,8 +53,9 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +#define I915_GEM_OBJECT_IS_PROXY BIT(2) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -361,6 +362,12 @@ i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; } +static inline bool +i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY; +} + static inline bool i915_gem_object_is_active(const struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 1294cf695df0..b85d7ebd9bee 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -345,6 +345,15 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT; + /* + * The tiling mode of proxy objects is handled by its generator, and + * not allowed to be changed by userspace. + */ + if (i915_gem_object_is_proxy(obj)) { + err = -ENXIO; + goto err; + } + if (!i915_tiling_ok(obj, args->tiling_mode, args->stride)) { err = -EINVAL; goto err; From 4036c78ccf6bf411d09dbf3ec9a9cc7c1838b7b2 Mon Sep 17 00:00:00 2001 From: James Ausmus Date: Mon, 13 Nov 2017 10:11:28 -0800 Subject: [PATCH 161/260] drm/i915/glk: Refactor handling of PLANE_COLOR_CTL for GLK+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since GLK, some plane configuration settings have moved to the PLANE_COLOR_CTL register. Refactor handling of the register to work like PLANE_CTL. This also allows us to fix the set/read of the plane Alpha Mode for GLK+. v2: Adjust ordering of platform checks to be newest->oldest, drop redundant comment about alpha blending. (Ville) v3: Move Alpha Mode bits out of skl_plane_ctl_format into skl_plane_ctl_alpha, and drop glk_plane_ctl_format, drop initialization of state->color_ctl on platforms that don't use it, and drop color_ctl local var. (Ville) v4: Consolidate skl_plane_ctl_format switch statement on formats that return the same settings. (Ville) Signed-off-by: James Ausmus Cc: Paulo Zanoni Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171113181128.2926-1-james.ausmus@intel.com Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_reg.h | 12 +++-- drivers/gpu/drm/i915/intel_display.c | 73 ++++++++++++++++++++++------ drivers/gpu/drm/i915/intel_drv.h | 5 ++ drivers/gpu/drm/i915/intel_sprite.c | 11 ++--- 4 files changed, 76 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e52e74db4c7c..d7e24426120d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6297,7 +6297,7 @@ enum { #define _PLANE_CTL_2_A 0x70280 #define _PLANE_CTL_3_A 0x70380 #define PLANE_CTL_ENABLE (1 << 31) -#define PLANE_CTL_PIPE_GAMMA_ENABLE (1 << 30) +#define PLANE_CTL_PIPE_GAMMA_ENABLE (1 << 30) /* Pre-GLK */ #define PLANE_CTL_FORMAT_MASK (0xf << 24) #define PLANE_CTL_FORMAT_YUV422 ( 0 << 24) #define PLANE_CTL_FORMAT_NV12 ( 1 << 24) @@ -6307,7 +6307,7 @@ enum { #define PLANE_CTL_FORMAT_AYUV ( 8 << 24) #define PLANE_CTL_FORMAT_INDEXED ( 12 << 24) #define PLANE_CTL_FORMAT_RGB_565 ( 14 << 24) -#define PLANE_CTL_PIPE_CSC_ENABLE (1 << 23) +#define PLANE_CTL_PIPE_CSC_ENABLE (1 << 23) /* Pre-GLK */ #define PLANE_CTL_KEY_ENABLE_MASK (0x3 << 21) #define PLANE_CTL_KEY_ENABLE_SOURCE ( 1 << 21) #define PLANE_CTL_KEY_ENABLE_DESTINATION ( 2 << 21) @@ -6320,13 +6320,13 @@ enum { #define PLANE_CTL_YUV422_VYUY ( 3 << 16) #define PLANE_CTL_DECOMPRESSION_ENABLE (1 << 15) #define PLANE_CTL_TRICKLE_FEED_DISABLE (1 << 14) -#define PLANE_CTL_PLANE_GAMMA_DISABLE (1 << 13) +#define PLANE_CTL_PLANE_GAMMA_DISABLE (1 << 13) /* Pre-GLK */ #define PLANE_CTL_TILED_MASK (0x7 << 10) #define PLANE_CTL_TILED_LINEAR ( 0 << 10) #define PLANE_CTL_TILED_X ( 1 << 10) #define PLANE_CTL_TILED_Y ( 4 << 10) #define PLANE_CTL_TILED_YF ( 5 << 10) -#define PLANE_CTL_ALPHA_MASK (0x3 << 4) +#define PLANE_CTL_ALPHA_MASK (0x3 << 4) /* Pre-GLK */ #define PLANE_CTL_ALPHA_DISABLE ( 0 << 4) #define PLANE_CTL_ALPHA_SW_PREMULTIPLY ( 2 << 4) #define PLANE_CTL_ALPHA_HW_PREMULTIPLY ( 3 << 4) @@ -6366,6 +6366,10 @@ enum { #define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) #define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) #define PLANE_COLOR_PLANE_GAMMA_DISABLE (1 << 13) +#define PLANE_COLOR_ALPHA_MASK (0x3 << 4) +#define PLANE_COLOR_ALPHA_DISABLE (0 << 4) +#define PLANE_COLOR_ALPHA_SW_PREMULTIPLY (2 << 4) +#define PLANE_COLOR_ALPHA_HW_PREMULTIPLY (3 << 4) #define _PLANE_BUF_CFG_1_A 0x7027c #define _PLANE_BUF_CFG_2_A 0x7037c #define _PLANE_NV12_BUF_CFG_1_A 0x70278 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0ebf3f283b87..ed6a4a8d9273 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3432,20 +3432,11 @@ static u32 skl_plane_ctl_format(uint32_t pixel_format) case DRM_FORMAT_RGB565: return PLANE_CTL_FORMAT_RGB_565; case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX; case DRM_FORMAT_XRGB8888: - return PLANE_CTL_FORMAT_XRGB_8888; - /* - * XXX: For ARBG/ABGR formats we default to expecting scanout buffers - * to be already pre-multiplied. We need to add a knob (or a different - * DRM_FORMAT) for user-space to configure that. - */ - case DRM_FORMAT_ABGR8888: - return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX | - PLANE_CTL_ALPHA_SW_PREMULTIPLY; case DRM_FORMAT_ARGB8888: - return PLANE_CTL_FORMAT_XRGB_8888 | - PLANE_CTL_ALPHA_SW_PREMULTIPLY; + return PLANE_CTL_FORMAT_XRGB_8888; case DRM_FORMAT_XRGB2101010: return PLANE_CTL_FORMAT_XRGB_2101010; case DRM_FORMAT_XBGR2101010: @@ -3465,6 +3456,33 @@ static u32 skl_plane_ctl_format(uint32_t pixel_format) return 0; } +/* + * XXX: For ARBG/ABGR formats we default to expecting scanout buffers + * to be already pre-multiplied. We need to add a knob (or a different + * DRM_FORMAT) for user-space to configure that. + */ +static u32 skl_plane_ctl_alpha(uint32_t pixel_format) +{ + switch (pixel_format) { + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_ARGB8888: + return PLANE_CTL_ALPHA_SW_PREMULTIPLY; + default: + return PLANE_CTL_ALPHA_DISABLE; + } +} + +static u32 glk_plane_color_ctl_alpha(uint32_t pixel_format) +{ + switch (pixel_format) { + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_ARGB8888: + return PLANE_COLOR_ALPHA_SW_PREMULTIPLY; + default: + return PLANE_COLOR_ALPHA_DISABLE; + } +} + static u32 skl_plane_ctl_tiling(uint64_t fb_modifier) { switch (fb_modifier) { @@ -3521,7 +3539,8 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, plane_ctl = PLANE_CTL_ENABLE; - if (!IS_GEMINILAKE(dev_priv) && !IS_CANNONLAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) < 10 && !IS_GEMINILAKE(dev_priv)) { + plane_ctl |= skl_plane_ctl_alpha(fb->format->format); plane_ctl |= PLANE_CTL_PIPE_GAMMA_ENABLE | PLANE_CTL_PIPE_CSC_ENABLE | @@ -3540,6 +3559,20 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, return plane_ctl; } +u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + const struct drm_framebuffer *fb = plane_state->base.fb; + u32 plane_color_ctl = 0; + + plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; + plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; + plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE; + plane_color_ctl |= glk_plane_color_ctl_alpha(fb->format->format); + + return plane_color_ctl; +} + static int __intel_display_resume(struct drm_device *dev, struct drm_atomic_state *state, @@ -8426,7 +8459,7 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, base, offset, stride_mult, tiling; + u32 val, base, offset, stride_mult, tiling, alpha; int pipe = crtc->pipe; int fourcc, pixel_format; unsigned int aligned_height; @@ -8448,9 +8481,16 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, goto error; pixel_format = val & PLANE_CTL_FORMAT_MASK; + + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + alpha = I915_READ(PLANE_COLOR_CTL(pipe, 0)); + alpha &= PLANE_COLOR_ALPHA_MASK; + } else { + alpha = val & PLANE_CTL_ALPHA_MASK; + } + fourcc = skl_format_to_fourcc(pixel_format, - val & PLANE_CTL_ORDER_RGBX, - val & PLANE_CTL_ALPHA_MASK); + val & PLANE_CTL_ORDER_RGBX, alpha); fb->format = drm_format_info(fourcc); tiling = val & PLANE_CTL_TILED_MASK; @@ -12853,6 +12893,9 @@ intel_check_primary_plane(struct intel_plane *plane, state->ctl = i9xx_plane_ctl(crtc_state, state); } + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + state->color_ctl = glk_plane_color_ctl(crtc_state, state); + return 0; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 10bec8796239..e9b66e0cb647 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -425,6 +425,9 @@ struct intel_plane_state { /* plane control register */ u32 ctl; + /* plane color control register */ + u32 color_ctl; + /* * scaler_id * = -1 : not using a scaler @@ -1503,6 +1506,8 @@ static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state) return i915_ggtt_offset(state->vma); } +u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 4fcf80ca91dd..ce615704982a 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -263,13 +263,9 @@ skl_update_plane(struct intel_plane *plane, spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), - PLANE_COLOR_PIPE_GAMMA_ENABLE | - PLANE_COLOR_PIPE_CSC_ENABLE | - PLANE_COLOR_PLANE_GAMMA_DISABLE); - } - + plane_state->color_ctl); if (key->flags) { I915_WRITE_FW(PLANE_KEYVAL(pipe, plane_id), key->min_value); I915_WRITE_FW(PLANE_KEYMAX(pipe, plane_id), key->max_value); @@ -978,6 +974,9 @@ intel_check_sprite_plane(struct intel_plane *plane, state->ctl = g4x_sprite_ctl(crtc_state, state); } + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + state->color_ctl = glk_plane_color_ctl(crtc_state, state); + return 0; } From 3657e92762c42b098cb7057ac86b1c7cd30336e5 Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Thu, 9 Nov 2017 10:37:50 +0200 Subject: [PATCH 162/260] drm/i915: Generalize transcoder looping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make looping through transcoders in intel_ddi.c more generic, let's switch to use 'for_each_pipe()' macro to do this. v2: Add a notion that we are dealing with transcoders instead of pipes (Jani) Reviewed-by: Ville Syrjälä Signed-off-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/1510216670-16848-1-git-send-email-mika.kahola@intel.com Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_ddi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 65ab55496ab7..eff3b51872eb 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1681,8 +1681,8 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum port port = encoder->port; + enum pipe p; u32 tmp; - int i; bool ret; if (!intel_display_power_get_if_enabled(dev_priv, @@ -1717,15 +1717,17 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, goto out; } - for (i = TRANSCODER_A; i <= TRANSCODER_C; i++) { - tmp = I915_READ(TRANS_DDI_FUNC_CTL(i)); + for_each_pipe(dev_priv, p) { + enum transcoder cpu_transcoder = (enum transcoder) p; + + tmp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if ((tmp & TRANS_DDI_PORT_MASK) == TRANS_DDI_SELECT_PORT(port)) { if ((tmp & TRANS_DDI_MODE_SELECT_MASK) == TRANS_DDI_MODE_SELECT_DP_MST) goto out; - *pipe = i; + *pipe = p; ret = true; goto out; From f577a03ba920df1a9163221d4823627999c59bfd Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 13 Nov 2017 23:34:53 +0000 Subject: [PATCH 163/260] drm/i915: fix 64bit divide MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ERROR: "__udivdi3" [drivers/gpu/drm/i915/i915.ko] undefined! ERROR: "__divdi3" [drivers/gpu/drm/i915/i915.ko] undefined! Store the frequency in kHz and drop 64bit divisions. v2: Use div64_u64 (Matthew) v3: store frequency in kHz to avoid 64bit divs (Chris/Ville) Fixes: dab9178333 ("drm/i915: expose command stream timestamp frequency to userspace") Reported-by: Matthew Auld Signed-off-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20171113233455.12085-3-lionel.g.landwerlin@intel.com Reviewed-by: Ewelina Musial Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_device_info.c | 29 ++++++++++++------------ 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 82ff186108f1..6ba08b0c1c22 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3269,8 +3269,8 @@ static int i915_engine_info(struct seq_file *m, void *unused) yesno(dev_priv->gt.awake)); seq_printf(m, "Global active requests: %d\n", dev_priv->gt.active_requests); - seq_printf(m, "CS timestamp frequency: %llu Hz\n", - dev_priv->info.cs_timestamp_frequency); + seq_printf(m, "CS timestamp frequency: %u kHz\n", + dev_priv->info.cs_timestamp_frequency_khz); p = drm_seq_file_printer(m); for_each_engine(engine, dev_priv, id) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 42813f4247e2..171b86f37878 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -420,7 +420,7 @@ static int i915_getparam(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_CS_TIMESTAMP_FREQUENCY: - value = INTEL_INFO(dev_priv)->cs_timestamp_frequency; + value = 1000 * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz; break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b538df740ac3..2158a758a17d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -885,7 +885,7 @@ struct intel_device_info { /* Slice/subslice/EU info */ struct sseu_dev_info sseu; - u64 cs_timestamp_frequency; + u32 cs_timestamp_frequency_khz; struct color_luts { u16 degamma_lut_size; diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 78bf7374fbdd..f3e4940fed49 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -329,29 +329,28 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = 0; } -static u64 read_reference_ts_freq(struct drm_i915_private *dev_priv) +static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv) { u32 ts_override = I915_READ(GEN9_TIMESTAMP_OVERRIDE); - u64 base_freq, frac_freq; + u32 base_freq, frac_freq; base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; - base_freq *= 1000000; + base_freq *= 1000; frac_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); - if (frac_freq != 0) - frac_freq = 1000000 / (frac_freq + 1); + frac_freq = 1000 / (frac_freq + 1); return base_freq + frac_freq; } -static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv) +static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) { - u64 f12_5_mhz = 12500000; - u64 f19_2_mhz = 19200000; - u64 f24_mhz = 24000000; + u32 f12_5_mhz = 12500; + u32 f19_2_mhz = 19200; + u32 f24_mhz = 24000; if (INTEL_GEN(dev_priv) <= 4) { /* PRMs say: @@ -360,7 +359,7 @@ static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv) * hclks." (through the “Clocking Configuration” * (“CLKCFG”) MCHBAR register) */ - return (dev_priv->rawclk_freq * 1000) / 16; + return dev_priv->rawclk_freq / 16; } else if (INTEL_GEN(dev_priv) <= 8) { /* PRMs say: * @@ -371,7 +370,7 @@ static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv) return f12_5_mhz; } else if (INTEL_GEN(dev_priv) <= 9) { u32 ctc_reg = I915_READ(CTC_MODE); - u64 freq = 0; + u32 freq = 0; if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { freq = read_reference_ts_freq(dev_priv); @@ -389,7 +388,7 @@ static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv) return freq; } else if (INTEL_GEN(dev_priv) <= 10) { u32 ctc_reg = I915_READ(CTC_MODE); - u64 freq = 0; + u32 freq = 0; u32 rpm_config_reg = 0; /* First figure out the reference frequency. There are 2 ways @@ -553,7 +552,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) gen10_sseu_info_init(dev_priv); /* Initialize command stream timestamp frequency */ - info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv); + info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv); DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask); DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask)); @@ -570,6 +569,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->sseu.has_subslice_pg ? "y" : "n"); DRM_DEBUG_DRIVER("has EU power gating: %s\n", info->sseu.has_eu_pg ? "y" : "n"); - DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n", - info->cs_timestamp_frequency); + DRM_DEBUG_DRIVER("CS timestamp frequency: %u kHz\n", + info->cs_timestamp_frequency_khz); } From 34991bd48c927712678d0cea77628328f9046923 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 11 Nov 2017 10:03:36 +0000 Subject: [PATCH 164/260] drm/i915: Unify SLICE_UNIT_LEVEL_CLKGATE w/a for cnl gem_workarounds reports that the SLICE_UNIT_LEVEL_CLKGATE write isn't sticking. Commit 0a60797a0efb ("drm/i915: Implement ReadHitWriteOnlyDisable.") presumes that SLICE_UNIT_LEVEL_CLKGATE is a masked register in the context image, but commit 90007bca6162 ("drm/i915/cnl: Introduce initial Cannonlake Workarounds.") lists it as an ordering unmasked register. The masked write will be losing the default settings if we trust the original commit. That gem_workarounds reports the value is lost entirely is more worrying though -- but it clearly suggests that it is not a masked register in the context image, so unify both w/a to use the original rmw. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103705 Fixes: 0a60797a0efb ("drm/i915: Implement ReadHitWriteOnlyDisable.") References: 90007bca6162 ("drm/i915/cnl: Introduce initial Cannonlake Workarounds.") Signed-off-by: Chris Wilson Cc: Rafael Antognolli Cc: Rodrigo Vivi Cc: Oscar Mateo Cc: Mika Kuoppala Cc: Jani Nikula Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171111100336.11020-1-chris@chris-wilson.co.uk Reviewed-by: Rafael Antognolli --- drivers/gpu/drm/i915/intel_engine_cs.c | 3 --- drivers/gpu/drm/i915/intel_pm.c | 8 +++++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 70bbe8ef8f54..1bd9462d206f 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1326,9 +1326,6 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine) WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); - /* ReadHitWriteOnlyDisable: cnl */ - WA_SET_BIT_MASKED(SLICE_UNIT_LEVEL_CLKGATE, RCCUNIT_CLKGATE_DIS); - /* WaEnablePreemptionGranularityControlByUMD:cnl */ I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8e7f02e6008a..8c69ec9eb6ee 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8471,11 +8471,13 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | DISP_FBC_MEMORY_WAKE); + val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE); + /* ReadHitWriteOnlyDisable:cnl */ + val |= RCCUNIT_CLKGATE_DIS; /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */ if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) - I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, - I915_READ(SLICE_UNIT_LEVEL_CLKGATE) | - SARBUNIT_CLKGATE_DIS); + val |= SARBUNIT_CLKGATE_DIS; + I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val); /* Display WA #1133: WaFbcSkipSegments:cnl */ val = I915_READ(ILK_DPFC_CHICKEN); From f3e2b2c540503fb6087d779b0633a10c79cb6d5c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Nov 2017 13:43:39 +0000 Subject: [PATCH 165/260] drm/i915: Remove pre-production Broxton register workarounds We've begun excluding pre-production Broxton machines since commit 0102ba1fd8af ("drm/i915: Add early BXT sdv to the list of preproduction machines"), now remove the list of workaround register values for those early machines. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20170927093325.24206-1-chris@chris-wilson.co.uk Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20171114134340.5439-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 56 +------------------------- 1 file changed, 1 insertion(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 1bd9462d206f..580e79058433 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1020,22 +1020,6 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_DG_MIRROR_FIX_ENABLE); - - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, - GEN9_RHWO_OPTIMIZATION_DISABLE); - /* - * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set - * but we do that in per ctx batchbuffer as there is an issue - * with this register not getting restored on ctx restore - */ - } - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, @@ -1051,11 +1035,6 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, GEN9_CCS_TLB_PREFETCH_ENABLE); - /* WaDisableMaskBasedCammingInRCC:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, - PIXEL_MASK_CAMMING_DISABLE); - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ WA_SET_BIT_MASKED(HDC_CHICKEN0, HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | @@ -1085,8 +1064,7 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv) || - IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + IS_COFFEELAKE(dev_priv)) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); @@ -1216,17 +1194,6 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; - /* WaStoreMultiplePTEenable:bxt */ - /* This is a requirement according to Hardware specification */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); - - /* WaSetClckGatingDisableMedia:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); - } - /* WaDisableThreadStallDopClockGating:bxt */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); @@ -1237,27 +1204,6 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); } - /* WaDisableSbeCacheDispatchPortSharing:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - } - - /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ - /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ - /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ - /* WaDisableLSQCROPERFforOCL:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); - if (ret) - return ret; - - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - } - /* WaProgramL3SqcReg1DefaultForPerf:bxt */ if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { u32 val = I915_READ(GEN8_L3SQCREG1); From 70a84f3c6075031dbf004a1610ca2471f4c528aa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Nov 2017 13:43:40 +0000 Subject: [PATCH 166/260] drm/i915: Unconditionally apply the Broxton register workaround set Having removed the preproduction Broxton support (see commit 0102ba1fd8af ("drm/i915: Add early BXT sdv to the list of preproduction machines")), we know we then always need the production Broxton workaround set and do not need a predicate upon revision. Signed-off-by: Chris Wilson Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20171114134340.5439-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 29 +++++++++++--------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 580e79058433..a42b738e79e7 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1188,6 +1188,7 @@ static int skl_init_workarounds(struct intel_engine_cs *engine) static int bxt_init_workarounds(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + u32 val; int ret; ret = gen9_init_workarounds(engine); @@ -1199,29 +1200,23 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) STALL_DOP_GATING_DISABLE); /* WaDisablePooledEuLoadBalancingFix:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { - I915_WRITE(FF_SLICE_CS_CHICKEN2, - _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); - } + I915_WRITE(FF_SLICE_CS_CHICKEN2, + _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); /* WaProgramL3SqcReg1DefaultForPerf:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { - u32 val = I915_READ(GEN8_L3SQCREG1); - val &= ~L3_PRIO_CREDITS_MASK; - val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); - I915_WRITE(GEN8_L3SQCREG1, val); - } + val = I915_READ(GEN8_L3SQCREG1); + val &= ~L3_PRIO_CREDITS_MASK; + val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); + I915_WRITE(GEN8_L3SQCREG1, val); /* WaToEnableHwFixForPushConstHWBug:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaInPlaceDecompressionHang:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); return 0; } From df49ec8223050d510bd243d85c51139fc101029f Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 10 Nov 2017 16:03:19 -0800 Subject: [PATCH 167/260] drm/i915: Display WA #1185 WaDisableDARBFClkGating:cnl, glk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Display is not sending a PMRsp when a PMReq is received at the same time that all planes are turned off. State machine in the dcprunit is stuck in the WAIT4DONE state which means that there is no fill_done. WA: disable arbiter clock gating, set bit [27] of 0x46530 v2: As Ville pointed out, based on the description the issue can happen when disabling the planes, similar to WaRsPkgCStateDisplayPMReq:hsw Also description of the issue was updated on commit message to make it more clear that we need this earlier. v3: Restore comment about possibility to system hang to where we are sure about it, without speculation. (Ville). v4: Remove doubled sob. Actually do v3 changes :/ Cc: Radhakrishna Sripada Cc: Imre Deak Cc: Ville Syrjälä Signed-off-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171111000319.5040-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_display.c | 27 ++++++++++++++++++--------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d7e24426120d..cfdf4f821ac3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3852,6 +3852,7 @@ enum { * GEN9 clock gating regs */ #define GEN9_CLKGATE_DIS_0 _MMIO(0x46530) +#define DARBF_GATING_DIS (1 << 27) #define PWM2_GATING_DIS (1 << 14) #define PWM1_GATING_DIS (1 << 13) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ed6a4a8d9273..23aa7191024e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15148,6 +15148,23 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) } } +static void intel_early_display_was(struct drm_i915_private *dev_priv) +{ + /* Display WA #1185 WaDisableDARBFClkGating:cnl,glk */ + if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + DARBF_GATING_DIS); + + if (IS_HASWELL(dev_priv)) { + /* + * WaRsPkgCStateDisplayPMReq:hsw + * System hang if this isn't done before disabling all planes! + */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); + } +} + /* Scan out the current hw modeset state, * and sanitizes it to the current state */ @@ -15161,15 +15178,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev, struct intel_encoder *encoder; int i; - if (IS_HASWELL(dev_priv)) { - /* - * WaRsPkgCStateDisplayPMReq:hsw - * System hang if this isn't done before disabling all planes! - */ - I915_WRITE(CHICKEN_PAR1_1, - I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); - } - + intel_early_display_was(dev_priv); intel_modeset_readout_hw_state(dev); /* HW state is read out, now we need to sanitize this mess. */ From 7469c62cb6549cf72bfd4d63cb3fc5f34eb0b2d0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Nov 2017 13:03:00 +0000 Subject: [PATCH 168/260] drm/i915: Resume GuC before using GEM Resuming GEM presumes it can talk to hw, in particular to ensure the kernel context is loaded upon resume for powersaving. If the GuC is still asleep at this point, we upset the HW. Rearrange the resume such that we restore the original order of init-hw, resume-guc, use-gem. Fixes: 37cd33006d02 ("drm/i915: Remove redundant intel_autoenable_gt_powersave()") References: a1c419941453 ("drm/i915/guc: Add host2guc notification for suspend and resume") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Alex Dai Cc: Sagar Arun Kamble Cc: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20171114130300.25677-2-chris@chris-wilson.co.uk Reviewed-by: Sagar Arun Kamble --- drivers/gpu/drm/i915/i915_drv.c | 2 -- drivers/gpu/drm/i915/i915_gem.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 171b86f37878..1ae7031eedaa 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1708,8 +1708,6 @@ static int i915_drm_resume(struct drm_device *dev) i915_gem_resume(dev_priv); - intel_guc_resume(dev_priv); - intel_modeset_init_hw(dev); spin_lock_irq(&dev_priv->irq_lock); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a7979b74ce21..bf8fea792048 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4852,6 +4852,8 @@ void i915_gem_resume(struct drm_i915_private *i915) if (i915_gem_init_hw(i915)) goto err_wedged; + intel_guc_resume(i915); + /* Always reload a context for powersaving. */ if (i915_gem_switch_to_kernel_context(i915)) goto err_wedged; From 6e1281412ab9e6772d8f6c26e592181fcdd2ae0c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Nov 2017 22:33:46 +0000 Subject: [PATCH 169/260] drm/i915/selftests: Always initialise err smatch does not track initialised values as well as gcc, and this triggers many warnings by smatch not presented by gcc. Silence smatch by initialising the error values to -ENODEV, which we use to denote internal errors. (If we see a selftest fail with a silent -ENODEV, we know smatch was right!) v2: smatch was right about igt_create_vma(), it may unlikely fail on the first object allocation which we want to be loud about. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171114223346.25958-1-chris@chris-wilson.co.uk Reviewed-by: Dhinakaran Pandiyan --- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 8 ++++---- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_timeline.c | 2 +- drivers/gpu/drm/i915/selftests/i915_syncmap.c | 6 +++--- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index def5052862ae..c82780a9d455 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -325,7 +325,7 @@ static int igt_ctx_exec(void *arg) LIST_HEAD(objects); unsigned long ncontexts, ndwords, dw; bool first_shared_gtt = true; - int err; + int err = -ENODEV; /* Create a few different contexts (with different mm) and write * through each ctx/mm using the GPU making sure those writes end diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 581296860539..d9560d8a6cc8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -699,7 +699,7 @@ static int drunk_hole(struct drm_i915_private *i915, unsigned int *order, count, n; struct i915_vma *vma; u64 hole_size; - int err; + int err = -ENODEV; hole_size = (hole_end - hole_start) >> size; if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) @@ -958,7 +958,7 @@ static int exercise_ggtt(struct drm_i915_private *i915, u64 hole_start, hole_end, last = 0; struct drm_mm_node *node; IGT_TIMEOUT(end_time); - int err; + int err = -ENODEV; mutex_lock(&i915->drm.struct_mutex); restart: @@ -1164,7 +1164,7 @@ static int igt_gtt_reserve(void *arg) struct drm_i915_gem_object *obj, *on; LIST_HEAD(objects); u64 total; - int err; + int err = -ENODEV; /* i915_gem_gtt_reserve() tries to reserve the precise range * for the node, and evicts if it has to. So our test checks that @@ -1355,7 +1355,7 @@ static int igt_gtt_insert(void *arg) }, *ii; LIST_HEAD(objects); u64 total; - int err; + int err = -ENODEV; /* i915_gem_gtt_insert() tries to allocate some free space in the GTT * to the node, evicting if required. diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index a999161e8db1..6bce99050e94 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -332,7 +332,7 @@ static int live_nop_request(void *arg) struct intel_engine_cs *engine; struct live_test t; unsigned int id; - int err; + int err = -ENODEV; /* Submit various sized batches of empty requests, to each engine * (individually), and wait for the batch to complete. We can check diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c index 4795877abe56..3000e6a7d82d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c @@ -79,7 +79,7 @@ static int igt_sync(void *arg) }, *p; struct intel_timeline *tl; int order, offset; - int ret; + int ret = -ENODEV; tl = mock_timeline(0); if (!tl) diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c index bcab3d00a785..47f4ae18a1ef 100644 --- a/drivers/gpu/drm/i915/selftests/i915_syncmap.c +++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c @@ -333,7 +333,7 @@ static int igt_syncmap_join_below(void *arg) { struct i915_syncmap *sync; unsigned int step, order, idx; - int err; + int err = -ENODEV; i915_syncmap_init(&sync); @@ -402,7 +402,7 @@ static int igt_syncmap_neighbours(void *arg) I915_RND_STATE(prng); IGT_TIMEOUT(end_time); struct i915_syncmap *sync; - int err; + int err = -ENODEV; /* * Each leaf holds KSYNCMAP seqno. Check that when we create KSYNCMAP @@ -447,7 +447,7 @@ static int igt_syncmap_compact(void *arg) { struct i915_syncmap *sync; unsigned int idx, order; - int err; + int err = -ENODEV; i915_syncmap_init(&sync); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 2e86ec136b35..eb89e301b602 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -150,7 +150,7 @@ static int igt_vma_create(void *arg) IGT_TIMEOUT(end_time); LIST_HEAD(contexts); LIST_HEAD(objects); - int err; + int err = -ENOMEM; /* Exercise creating many vma amonst many objections, checking the * vma creation and lookup routines. From ce30560c80dead91e98a03d90fb8791e57a9b69d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Nov 2017 16:03:01 +0100 Subject: [PATCH 170/260] drm/i915: Fix false-positive assert_rpm_wakelock_held in i915_pmic_bus_access_notifier v2 assert_rpm_wakelock_held is triggered from i915_pmic_bus_access_notifier even though it gets unregistered on (runtime) suspend, this is caused by a race happening under the following circumstances: intel_runtime_pm_put does: atomic_dec(&dev_priv->pm.wakeref_count); pm_runtime_mark_last_busy(kdev); pm_runtime_put_autosuspend(kdev); And pm_runtime_put_autosuspend calls intel_runtime_suspend from a workqueue, so there is ample of time between the atomic_dec() and intel_runtime_suspend() unregistering the notifier. If the notifier gets called in this windowd assert_rpm_wakelock_held falsely triggers (at this point we're not runtime-suspended yet). This commit adds disable_rpm_wakeref_asserts and enable_rpm_wakeref_asserts calls around the intel_uncore_forcewake_get(FORCEWAKE_ALL) call in i915_pmic_bus_access_notifier fixing the false-positive WARN_ON. Changes in v2: -Reword comment explaining why disabling the wakeref asserts is ok and necessary Cc: stable@vger.kernel.org Reported-by: FKr Reviewed-by: Imre Deak Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20171110150301.9601-2-hdegoede@redhat.com --- drivers/gpu/drm/i915/intel_uncore.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index a78ceafcc825..6e01c1898aea 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1364,8 +1364,15 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb, * bus, which will be busy after this notification, leading to: * "render: timed out waiting for forcewake ack request." * errors. + * + * The notifier is unregistered during intel_runtime_suspend(), + * so it's ok to access the HW here without holding a RPM + * wake reference -> disable wakeref asserts for the time of + * the access. */ + disable_rpm_wakeref_asserts(dev_priv); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + enable_rpm_wakeref_asserts(dev_priv); break; case MBI_PMIC_BUS_ACCESS_END: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); From bedf4d79c3654921839b62246b0965ddb308b201 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Nov 2017 14:55:17 +0100 Subject: [PATCH 171/260] drm/i915: Re-register PMIC bus access notifier on runtime resume intel_uncore_suspend() unregisters the uncore code's PMIC bus access notifier and gets called on both normal and runtime suspend. intel_uncore_resume_early() re-registers the notifier, but only on normal resume. Add a new intel_uncore_runtime_resume() function which only re-registers the notifier and call that on runtime resume. Cc: stable@vger.kernel.org Reported-by: Imre Deak Reviewed-by: Imre Deak Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20171114135518.15981-2-hdegoede@redhat.com --- drivers/gpu/drm/i915/i915_drv.c | 2 ++ drivers/gpu/drm/i915/intel_uncore.c | 6 ++++++ drivers/gpu/drm/i915/intel_uncore.h | 1 + 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1ae7031eedaa..f4588add15db 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2621,6 +2621,8 @@ static int intel_runtime_resume(struct device *kdev) ret = vlv_resume_prepare(dev_priv, true); } + intel_uncore_runtime_resume(dev_priv); + /* * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6e01c1898aea..b4621271e7a2 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -557,6 +557,12 @@ void intel_uncore_resume_early(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); } +void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv) +{ + iosf_mbi_register_pmic_bus_access_notifier( + &dev_priv->uncore.pmic_bus_access_nb); +} + void intel_uncore_sanitize(struct drm_i915_private *dev_priv) { i915_modparams.enable_rc6 = diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 582771251b57..9ce079b5dd0d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -134,6 +134,7 @@ bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv void intel_uncore_fini(struct drm_i915_private *dev_priv); void intel_uncore_suspend(struct drm_i915_private *dev_priv); void intel_uncore_resume_early(struct drm_i915_private *dev_priv); +void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv); u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv); void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); From 01c799c995bb8a87878b0d64edfd33014968da8e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Nov 2017 14:55:18 +0100 Subject: [PATCH 172/260] drm/i915: Call uncore_suspend before platform suspend handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quoting Ville: "the forcewake timer might still be active until the uncore suspend, and having active forcewakes while we've already told the GT wake stuff to stop acting normally doesn't seem quite right to me." Reported-by: Ville Syrjälä Suggested-by: Imre Deak Reviewed-by: Imre Deak Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20171114135518.15981-3-hdegoede@redhat.com --- drivers/gpu/drm/i915/i915_drv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f4588add15db..3423d873123a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2526,6 +2526,8 @@ static int intel_runtime_suspend(struct device *kdev) intel_runtime_pm_disable_interrupts(dev_priv); + intel_uncore_suspend(dev_priv); + ret = 0; if (IS_GEN9_LP(dev_priv)) { bxt_display_core_uninit(dev_priv); @@ -2538,6 +2540,8 @@ static int intel_runtime_suspend(struct device *kdev) if (ret) { DRM_ERROR("Runtime suspend failed, disabling it (%d)\n", ret); + intel_uncore_runtime_resume(dev_priv); + intel_runtime_pm_enable_interrupts(dev_priv); enable_rpm_wakeref_asserts(dev_priv); @@ -2545,8 +2549,6 @@ static int intel_runtime_suspend(struct device *kdev) return ret; } - intel_uncore_suspend(dev_priv); - enable_rpm_wakeref_asserts(dev_priv); WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); From 764b9f2c4d252fd893a07b39b4ab14ae8044c9cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 14 Nov 2017 21:11:27 +0200 Subject: [PATCH 173/260] drm/i915: Fix kerneldocs for intel_audio.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix copy/paste fail in kerneldocs for intel_audio_codec_disable(). Cc: Daniel Vetter Suggested-by: Daniel Vetter Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171114191127.16188-1-ville.syrjala@linux.intel.com Reviewed-by: Dhinakaran Pandiyan --- drivers/gpu/drm/i915/intel_audio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 4705194b1992..f1502a0188eb 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -655,8 +655,8 @@ void intel_audio_codec_enable(struct intel_encoder *encoder, /** * intel_audio_codec_disable - Disable the audio codec for HD audio * @encoder: encoder on which to disable audio - * @crtc_state: pointer to the old crtc state. - * @conn_state: pointer to the old connector state. + * @old_crtc_state: pointer to the old crtc state. + * @old_conn_state: pointer to the old connector state. * * The disable sequences must be performed before disabling the transcoder or * port. From db7fb60593e42824ea217ff8ed14b74376ac98c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Nov 2017 17:17:35 +0200 Subject: [PATCH 174/260] drm/i915: Check if the stolen memory "reserved" area is enabled or not MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently there are some machines that put semi-sensible looking values into the stolen "reserved" base and size, except those values are actually outside the stolen memory. There is a bit in the register which supposedly could tell us whether the reserved area is even enabled or not. Let's check for that before we go trusting the base and size. Cc: Tomi Sarvela Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171102151737.23336-1-ville.syrjala@linux.intel.com Reviewed-by: Paulo Zanoni --- drivers/gpu/drm/i915/i915_gem_stolen.c | 30 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 32 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 03e7abc7e043..44a5dbc8c23b 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -294,6 +294,12 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, ELK_STOLEN_RESERVED); dma_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; + if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) { + *base = 0; + *size = 0; + return; + } + *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; WARN_ON((reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base); @@ -313,6 +319,12 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); + if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { + *base = 0; + *size = 0; + return; + } + *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; switch (reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK) { @@ -339,6 +351,12 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); + if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { + *base = 0; + *size = 0; + return; + } + *base = reg_val & GEN7_STOLEN_RESERVED_ADDR_MASK; switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { @@ -359,6 +377,12 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); + if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { + *base = 0; + *size = 0; + return; + } + *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { @@ -387,6 +411,12 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); dma_addr_t stolen_top; + if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { + *base = 0; + *size = 0; + return; + } + stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index cfdf4f821ac3..107e2d7c9fba 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -379,6 +379,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN8_STOLEN_RESERVED_2M (1 << 7) #define GEN8_STOLEN_RESERVED_4M (2 << 7) #define GEN8_STOLEN_RESERVED_8M (3 << 7) +#define GEN6_STOLEN_RESERVED_ENABLE (1 << 0) /* VGA stuff */ @@ -3431,6 +3432,7 @@ enum i915_power_well_id { #define ELK_STOLEN_RESERVED _MMIO(MCHBAR_MIRROR_BASE + 0x48) #define G4X_STOLEN_RESERVED_ADDR1_MASK (0xFFFF << 16) #define G4X_STOLEN_RESERVED_ADDR2_MASK (0xFFF << 4) +#define G4X_STOLEN_RESERVED_ENABLE (1 << 0) /* Memory controller frequency in MCHBAR for Haswell (possible SNB+) */ #define DCLK _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5e04) From da1dd0dbe0244db849395df5e00cd62a6d8b0ed4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Nov 2017 17:17:36 +0200 Subject: [PATCH 175/260] drm/i915: Make the report about a bogus stolen reserved area an error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we should be properly filtering out the cases when the stolen reserved area is disabled, let's convert the debug message about a misplaced reserved area into an error. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171102151737.23336-2-ville.syrjala@linux.intel.com Reviewed-by: Paulo Zanoni --- drivers/gpu/drm/i915/i915_gem_stolen.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 44a5dbc8c23b..4f9377b5f4ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -503,9 +503,9 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (reserved_base < dev_priv->mm.stolen_base || reserved_base + reserved_size > stolen_top) { dma_addr_t reserved_top = reserved_base + reserved_size; - DRM_DEBUG_KMS("Stolen reserved area [%pad - %pad] outside stolen memory [%pad - %pad]\n", - &reserved_base, &reserved_top, - &dev_priv->mm.stolen_base, &stolen_top); + DRM_ERROR("Stolen reserved area [%pad - %pad] outside stolen memory [%pad - %pad]\n", + &reserved_base, &reserved_top, + &dev_priv->mm.stolen_base, &stolen_top); return 0; } From b099a4459d99fbc032b046e55ca77c8577f5a214 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Nov 2017 17:17:37 +0200 Subject: [PATCH 176/260] drm/i915: Use ELK stolen memory reserved detection for ILK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While I have no solid proof that ILK follows the ELK path when it comes to the stolen memory reserved area, there are some hints that it might be the case. Unfortunately my ILK doesn't have this enabled, and no way to enable it via the BIOS it seems. So let's have ILK use the ELK code path, and let's toss in a WARN into the code to see if we catch anyone with an ILK that has this enabled to further analyze the situation. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171102151737.23336-3-ville.syrjala@linux.intel.com Acked-by: Paulo Zanoni --- drivers/gpu/drm/i915/i915_gem_stolen.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 4f9377b5f4ae..1877ae9a1d9b 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -300,6 +300,12 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, return; } + /* + * Whether ILK really reuses the ELK register for this is unclear. + * Let's see if we catch anyone with this supposedly enabled on ILK. + */ + WARN(IS_GEN5(dev_priv), "ILK stolen reserved found? 0x%08x\n", reg_val); + *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; WARN_ON((reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base); @@ -466,14 +472,12 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) case 3: break; case 4: - if (IS_G4X(dev_priv)) - g4x_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - break; + if (!IS_G4X(dev_priv)) + break; + /* fall through */ case 5: - /* Assume the gen6 maximum for the older platforms. */ - reserved_size = 1024 * 1024; - reserved_base = stolen_top - reserved_size; + g4x_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); break; case 6: gen6_get_stolen_reserved(dev_priv, From fb4e14860b5e24645926ff46d3fe7237e97acc0f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Nov 2017 19:18:42 +0000 Subject: [PATCH 177/260] drm/i915/selftests: Markup __iomem for igt_gem_coherency Silence sparse warnings by using __iomem markup and io accessors. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171114191842.19063-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_coherency.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 35d778d70626..7a0d1e17c1ad 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -33,7 +33,7 @@ static int cpu_set(struct drm_i915_gem_object *obj, { unsigned int needs_clflush; struct page *page; - typeof(v) *map; + u32 *map; int err; err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); @@ -59,7 +59,7 @@ static int cpu_get(struct drm_i915_gem_object *obj, { unsigned int needs_clflush; struct page *page; - typeof(v) map; + u32 *map; int err; err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); @@ -82,7 +82,7 @@ static int gtt_set(struct drm_i915_gem_object *obj, u32 v) { struct i915_vma *vma; - typeof(v) *map; + u32 __iomem *map; int err; err = i915_gem_object_set_to_gtt_domain(obj, true); @@ -98,7 +98,7 @@ static int gtt_set(struct drm_i915_gem_object *obj, if (IS_ERR(map)) return PTR_ERR(map); - map[offset / sizeof(*map)] = v; + iowrite32(v, &map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); return 0; @@ -109,7 +109,7 @@ static int gtt_get(struct drm_i915_gem_object *obj, u32 *v) { struct i915_vma *vma; - typeof(v) map; + u32 __iomem *map; int err; err = i915_gem_object_set_to_gtt_domain(obj, false); @@ -125,7 +125,7 @@ static int gtt_get(struct drm_i915_gem_object *obj, if (IS_ERR(map)) return PTR_ERR(map); - *v = map[offset / sizeof(*map)]; + *v = ioread32(&map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); return 0; @@ -135,7 +135,7 @@ static int wc_set(struct drm_i915_gem_object *obj, unsigned long offset, u32 v) { - typeof(v) *map; + u32 *map; int err; err = i915_gem_object_set_to_wc_domain(obj, true); @@ -156,7 +156,7 @@ static int wc_get(struct drm_i915_gem_object *obj, unsigned long offset, u32 *v) { - typeof(v) map; + u32 *map; int err; err = i915_gem_object_set_to_wc_domain(obj, false); From 4667c2d54cff89d6413a49f60c784a7d17d3e7a7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Nov 2017 13:17:05 +0000 Subject: [PATCH 178/260] drm/i915: Initialise entry in intel_ppat_get() for older compilers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc-4.7.3 is confused by the guards inside intel_ppat_get() and reports: drivers/gpu/drm/i915/i915_gem_gtt.c: In function ‘intel_ppat_get’: drivers/gpu/drm/i915/i915_gem_gtt.c:3044:27: warning: ‘entry’ may be used uninitialized in this function [-Wmaybe-uninitialized] Forgive the compiler this once, and rearrange the code so that entry is always initialised. v2: Flavour with a bit of NULL (instead of ERR_PTR(-ENOSPC)) Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Zhi Wang Cc: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171115131705.16341-1-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3c3a699436c9..f92a39fc511c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3076,7 +3076,7 @@ const struct intel_ppat_entry * intel_ppat_get(struct drm_i915_private *i915, u8 value) { struct intel_ppat *ppat = &i915->ppat; - struct intel_ppat_entry *entry; + struct intel_ppat_entry *entry = NULL; unsigned int scanned, best_score; int i; @@ -3099,7 +3099,7 @@ intel_ppat_get(struct drm_i915_private *i915, u8 value) } if (scanned == ppat->max_entries) { - if (!best_score) + if (!entry) return ERR_PTR(-ENOSPC); kref_get(&entry->ref); From 24fd018aaeaecdba79f85e2232ca37a412e2754b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Nov 2017 15:12:03 +0000 Subject: [PATCH 179/260] drm/i915/selftests: Increase size for mock ringbuffer We don't actually emit any commands into the ringbuffer, so we set it very small. However, an upcoming change centralises the wait-for-space into i915_gem_request_alloc() and that imposes a minimum size upon all ringbuffers (mock or real) of MIN_SPACE_FOR_ADD_REQUEST. Grow the mock ringbuffer such that we allocate a single page for the struct+buffer, satisfying the new condition without wasting too much space. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171115151204.8105-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/mock_engine.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 0aafa8a105b8..55c0e2c15782 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -124,9 +124,11 @@ static void mock_submit_request(struct drm_i915_gem_request *request) static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { - const unsigned long sz = roundup_pow_of_two(sizeof(struct intel_ring)); + const unsigned long sz = PAGE_SIZE / 2; struct intel_ring *ring; + BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz); + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); if (!ring) return NULL; From fd13821219dda093e402c5849e5d4525bb64b4f3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Nov 2017 15:12:04 +0000 Subject: [PATCH 180/260] drm/i915: Make request's wait-for-space explicit At the start of building a request, we would wait for roughly enough space to fit the average request (to reduce the likelihood of having to wait and abort partway through request construction). To achieve we would try to begin a 0-length command packet, this just adds extra confusion so make the wait-for-space explicit, as in the next patch we want to move it from the backend to the i915_gem_request_alloc() so it can ensure that the wait-for-space is the first operation in building a new request. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171115151204.8105-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 8 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 56 ++++++++++++++++--------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 58d050a9a866..ebd9596fe83b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1180,7 +1180,7 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; - u32 *cs; + int ret; GEM_BUG_ON(!ce->pin_count); @@ -1190,9 +1190,9 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) */ request->reserved_space += EXECLISTS_REQUEST_SIZE; - cs = intel_ring_begin(request, 0); - if (IS_ERR(cs)) - return PTR_ERR(cs); + ret = intel_ring_wait_for_space(request->ring, request->reserved_space); + if (ret) + return ret; /* Note that after this point, we have committed to using * this request as it is being used to both track the diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3321b801e77d..12e734b29463 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1578,7 +1578,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) static int ring_request_alloc(struct drm_i915_gem_request *request) { - u32 *cs; + int ret; GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); @@ -1588,37 +1588,24 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) */ request->reserved_space += LEGACY_REQUEST_SIZE; - cs = intel_ring_begin(request, 0); - if (IS_ERR(cs)) - return PTR_ERR(cs); + ret = intel_ring_wait_for_space(request->ring, request->reserved_space); + if (ret) + return ret; request->reserved_space -= LEGACY_REQUEST_SIZE; return 0; } -static noinline int wait_for_space(struct drm_i915_gem_request *req, - unsigned int bytes) +static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) { - struct intel_ring *ring = req->ring; struct drm_i915_gem_request *target; long timeout; - lockdep_assert_held(&req->i915->drm.struct_mutex); + lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); if (intel_ring_update_space(ring) >= bytes) return 0; - /* - * Space is reserved in the ringbuffer for finalising the request, - * as that cannot be allowed to fail. During request finalisation, - * reserved_space is set to 0 to stop the overallocation and the - * assumption is that then we never need to wait (which has the - * risk of failing with EINTR). - * - * See also i915_gem_request_alloc() and i915_add_request(). - */ - GEM_BUG_ON(!req->reserved_space); - list_for_each_entry(target, &ring->request_list, ring_link) { /* Would completion of this request free enough space? */ if (bytes <= __intel_ring_space(target->postfix, @@ -1642,6 +1629,22 @@ static noinline int wait_for_space(struct drm_i915_gem_request *req, return 0; } +int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes) +{ + GEM_BUG_ON(bytes > ring->effective_size); + if (unlikely(bytes > ring->effective_size - ring->emit)) + bytes += ring->size - ring->emit; + + if (unlikely(bytes > ring->space)) { + int ret = wait_for_space(ring, bytes); + if (unlikely(ret)) + return ret; + } + + GEM_BUG_ON(ring->space < bytes); + return 0; +} + u32 *intel_ring_begin(struct drm_i915_gem_request *req, unsigned int num_dwords) { @@ -1681,7 +1684,20 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, } if (unlikely(total_bytes > ring->space)) { - int ret = wait_for_space(req, total_bytes); + int ret; + + /* + * Space is reserved in the ringbuffer for finalising the + * request, as that cannot be allowed to fail. During request + * finalisation, reserved_space is set to 0 to stop the + * overallocation and the assumption is that then we never need + * to wait (which has the risk of failing with EINTR). + * + * See also i915_gem_request_alloc() and i915_add_request(). + */ + GEM_BUG_ON(!req->reserved_space); + + ret = wait_for_space(ring, total_bytes); if (unlikely(ret)) return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index ef22c994038b..15a15cb876a6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -660,6 +660,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); +int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, unsigned int n); From 34cc9efc27e2623c76a69d2ad1fa2b972e27a2c1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Nov 2017 13:51:16 +0000 Subject: [PATCH 181/260] drm/i915: Remove pre-production pooled-EU w/a for Broxton WaEnablePooledEuFor2x6 only applies to preproduction models, unsupported since commit 0102ba1fd8af ("drm/i915: Add early BXT sdv to the list of preproduction machines"). Signed-off-by: Chris Wilson Cc: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20171114135116.30036-1-chris@chris-wilson.co.uk Reviewed-by: David Weinehall --- drivers/gpu/drm/i915/intel_device_info.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index f3e4940fed49..02f8bf101ccd 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -235,16 +235,6 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3; - /* - * There is a HW issue in 2x6 fused down parts that requires - * Pooled EU to be enabled as a WA. The pool configuration - * changes depending upon which subslice is fused down. This - * doesn't affect if the device has all 3 subslices enabled. - */ - /* WaEnablePooledEuFor2x6:bxt */ - info->has_pooled_eu |= (hweight8(sseu->subslice_mask) == 2 && - IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST)); - sseu->min_eu_in_pool = 0; if (info->has_pooled_eu) { if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) From a58c38aa6cdf0d00727cafd85cd2354dec46401a Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Fri, 8 Sep 2017 21:37:48 +0800 Subject: [PATCH 182/260] drm/i915/gvt: Add support for opregion virtualization opregion emulated with a copy from host which leads to some display bugs such as guest resolution adjustment failure due to host opregion fail to claim port D support. with a fake opregion table provided to fully emulate opregion to meet guest port requirement. v1 - initial patch v2 - reforamt opregion arrary with 0x02x output v3 - opregion array removed with opregion generation on host initizaiton v4 - rebased v3 patch from stable branch to staging branch which also has different struct child_device_config and addressed v3 review comments. Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/opregion.c | 180 ++++++++++++++++++++++++---- drivers/gpu/drm/i915/gvt/reg.h | 3 + 2 files changed, 159 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 311799136d7f..db8abac5faad 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -25,9 +25,133 @@ #include "i915_drv.h" #include "gvt.h" +/* + * Note: Only for GVT-g virtual VBT generation, other usage must + * not do like this. + */ +#define _INTEL_BIOS_PRIVATE +#include "intel_vbt_defs.h" + +#define OPREGION_SIGNATURE "IntelGraphicsMem" +#define MBOX_VBT (1<<3) + +/* device handle */ +#define DEVICE_TYPE_CRT 0x01 +#define DEVICE_TYPE_EFP1 0x04 +#define DEVICE_TYPE_EFP2 0x40 +#define DEVICE_TYPE_EFP3 0x20 +#define DEVICE_TYPE_EFP4 0x10 + +#define DEV_SIZE 38 + +struct opregion_header { + u8 signature[16]; + u32 size; + u32 opregion_ver; + u8 bios_ver[32]; + u8 vbios_ver[16]; + u8 driver_ver[16]; + u32 mboxes; + u32 driver_model; + u32 pcon; + u8 dver[32]; + u8 rsvd[124]; +} __packed; + +struct bdb_data_header { + u8 id; + u16 size; /* data size */ +} __packed; + +struct vbt { + /* header->bdb_offset point to bdb_header offset */ + struct vbt_header header; + struct bdb_header bdb_header; + + struct bdb_data_header general_features_header; + struct bdb_general_features general_features; + + struct bdb_data_header general_definitions_header; + struct bdb_general_definitions general_definitions; + struct child_device_config child0; + struct child_device_config child1; + struct child_device_config child2; + struct child_device_config child3; + + struct bdb_data_header driver_features_header; + struct bdb_driver_features driver_features; +}; + +static void virt_vbt_generation(struct vbt *v) +{ + int num_child; + + memset(v, 0, sizeof(struct vbt)); + + v->header.signature[0] = '$'; + v->header.signature[1] = 'V'; + v->header.signature[2] = 'B'; + v->header.signature[3] = 'T'; + + /* there's features depending on version! */ + v->header.version = 155; + v->header.header_size = sizeof(v->header); + v->header.vbt_size = sizeof(struct vbt) - sizeof(v->header); + v->header.bdb_offset = offsetof(struct vbt, bdb_header); + + strcpy(&v->bdb_header.signature[0], "BIOS_DATA_BLOCK"); + v->bdb_header.version = 198; /* child_dev_size = 38 */ + v->bdb_header.header_size = sizeof(v->bdb_header); + + v->bdb_header.bdb_size = sizeof(struct vbt) - sizeof(struct vbt_header) + - sizeof(struct bdb_header); + + /* general features */ + v->general_features_header.id = BDB_GENERAL_FEATURES; + v->general_features_header.size = sizeof(struct bdb_general_features); + v->general_features.int_crt_support = 0; + v->general_features.int_tv_support = 0; + + /* child device */ + num_child = 4; /* each port has one child */ + v->general_definitions_header.id = BDB_GENERAL_DEFINITIONS; + /* size will include child devices */ + v->general_definitions_header.size = + sizeof(struct bdb_general_definitions) + num_child * DEV_SIZE; + v->general_definitions.child_dev_size = DEV_SIZE; + + /* portA */ + v->child0.handle = DEVICE_TYPE_EFP1; + v->child0.device_type = DEVICE_TYPE_DP; + v->child0.dvo_port = DVO_PORT_DPA; + v->child0.aux_channel = DP_AUX_A; + + /* portB */ + v->child1.handle = DEVICE_TYPE_EFP2; + v->child1.device_type = DEVICE_TYPE_DP; + v->child1.dvo_port = DVO_PORT_DPB; + v->child1.aux_channel = DP_AUX_B; + + /* portC */ + v->child2.handle = DEVICE_TYPE_EFP3; + v->child2.device_type = DEVICE_TYPE_DP; + v->child2.dvo_port = DVO_PORT_DPC; + v->child2.aux_channel = DP_AUX_C; + + /* portD */ + v->child3.handle = DEVICE_TYPE_EFP4; + v->child3.device_type = DEVICE_TYPE_DP; + v->child3.dvo_port = DVO_PORT_DPD; + v->child3.aux_channel = DP_AUX_D; + + /* driver features */ + v->driver_features_header.id = BDB_DRIVER_FEATURES; + v->driver_features_header.size = sizeof(struct bdb_driver_features); + v->driver_features.lvds_config = BDB_DRIVER_FEATURE_NO_LVDS; +} + static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa) { - u8 *buf; int i; if (WARN((vgpu_opregion(vgpu)->va), @@ -35,26 +159,11 @@ static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa) vgpu->id)) return -EINVAL; - vgpu_opregion(vgpu)->va = (void *)__get_free_pages(GFP_KERNEL | - __GFP_ZERO, - get_order(INTEL_GVT_OPREGION_SIZE)); - - if (!vgpu_opregion(vgpu)->va) - return -ENOMEM; - - memcpy(vgpu_opregion(vgpu)->va, vgpu->gvt->opregion.opregion_va, - INTEL_GVT_OPREGION_SIZE); + vgpu_opregion(vgpu)->va = vgpu->gvt->opregion.opregion_va; for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i; - /* for unknown reason, the value in LID field is incorrect - * which block the windows guest, so workaround it by force - * setting it to "OPEN" - */ - buf = (u8 *)vgpu_opregion(vgpu)->va; - buf[INTEL_GVT_OPREGION_CLID] = 0x3; - return 0; } @@ -139,7 +248,8 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa) */ void intel_gvt_clean_opregion(struct intel_gvt *gvt) { - memunmap(gvt->opregion.opregion_va); + free_pages((unsigned long)gvt->opregion.opregion_va, + get_order(INTEL_GVT_OPREGION_SIZE)); gvt->opregion.opregion_va = NULL; } @@ -152,17 +262,39 @@ void intel_gvt_clean_opregion(struct intel_gvt *gvt) */ int intel_gvt_init_opregion(struct intel_gvt *gvt) { + u8 *buf; + struct opregion_header *header; + struct vbt v; + gvt_dbg_core("init host opregion\n"); - pci_read_config_dword(gvt->dev_priv->drm.pdev, INTEL_GVT_PCI_OPREGION, - &gvt->opregion.opregion_pa); + gvt->opregion.opregion_va = (void *)__get_free_pages(GFP_KERNEL | + __GFP_ZERO, + get_order(INTEL_GVT_OPREGION_SIZE)); - gvt->opregion.opregion_va = memremap(gvt->opregion.opregion_pa, - INTEL_GVT_OPREGION_SIZE, MEMREMAP_WB); if (!gvt->opregion.opregion_va) { - gvt_err("fail to map host opregion\n"); - return -EFAULT; + gvt_err("fail to get memory for virt opregion\n"); + return -ENOMEM; } + + /* emulated opregion with VBT mailbox only */ + header = (struct opregion_header *)gvt->opregion.opregion_va; + memcpy(header->signature, OPREGION_SIGNATURE, + sizeof(OPREGION_SIGNATURE)); + header->mboxes = MBOX_VBT; + + /* for unknown reason, the value in LID field is incorrect + * which block the windows guest, so workaround it by force + * setting it to "OPEN" + */ + buf = (u8 *)gvt->opregion.opregion_va; + buf[INTEL_GVT_OPREGION_CLID] = 0x3; + + /* emulated vbt from virt vbt generation */ + virt_vbt_generation(&v); + memcpy(gvt->opregion.opregion_va + INTEL_GVT_OPREGION_VBT_OFFSET, + &v, sizeof(struct vbt)); + return 0; } diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h index 7d01c77a0f7a..83f2f63d7eeb 100644 --- a/drivers/gpu/drm/i915/gvt/reg.h +++ b/drivers/gpu/drm/i915/gvt/reg.h @@ -51,6 +51,9 @@ #define INTEL_GVT_OPREGION_PAGES 2 #define INTEL_GVT_OPREGION_SIZE (INTEL_GVT_OPREGION_PAGES * PAGE_SIZE) +#define INTEL_GVT_OPREGION_VBT_OFFSET 0x400 +#define INTEL_GVT_OPREGION_VBT_SIZE \ + (INTEL_GVT_OPREGION_SIZE - INTEL_GVT_OPREGION_VBT_OFFSET) #define VGT_SPRSTRIDE(pipe) _PIPE(pipe, _SPRA_STRIDE, _PLANE_STRIDE_2_B) From 54cff6479fd87e4a941e3686d93faa734586922b Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 16:40:04 +0800 Subject: [PATCH 183/260] drm/i915/gvt: Make elsp_dwords in the right order The context descriptors in elsp_dwords are stored in a reversed order and the definition of context descriptor is also reversed. The revesred stuff is hard to be used and might cause misunderstanding. Make them in the right oder for following code re-factoring. Tested on my SKL NUC. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 22 +++++++++++----------- drivers/gpu/drm/i915/gvt/execlist.h | 8 ++++---- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 4427be18e4a9..9f0207205117 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -511,8 +511,8 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload) if (!workload->emulate_schedule_in) return 0; - ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1); - ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0); + ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0); + ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1); ret = emulate_execlist_schedule_in(&vgpu->execlist[ring_id], ctx); if (!ret) @@ -770,21 +770,21 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) { struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id]; - struct execlist_ctx_descriptor_format desc[2]; + struct execlist_ctx_descriptor_format *desc[2]; int i, ret; - desc[0] = *get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1); - desc[1] = *get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0); + desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0); + desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1); - if (!desc[0].valid) { + if (!desc[0]->valid) { gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n"); goto inv_desc; } for (i = 0; i < ARRAY_SIZE(desc); i++) { - if (!desc[i].valid) + if (!desc[i]->valid) continue; - if (!desc[i].privilege_access) { + if (!desc[i]->privilege_access) { gvt_vgpu_err("unexpected GGTT elsp submission\n"); goto inv_desc; } @@ -792,9 +792,9 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) /* submit workload */ for (i = 0; i < ARRAY_SIZE(desc); i++) { - if (!desc[i].valid) + if (!desc[i]->valid) continue; - ret = submit_context(vgpu, ring_id, &desc[i], i == 0); + ret = submit_context(vgpu, ring_id, desc[i], i == 0); if (ret) { gvt_vgpu_err("failed to submit desc %d\n", i); return ret; @@ -805,7 +805,7 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) inv_desc: gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n", - desc[0].udw, desc[0].ldw, desc[1].udw, desc[1].ldw); + desc[0]->udw, desc[0]->ldw, desc[1]->udw, desc[1]->ldw); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/gvt/execlist.h b/drivers/gpu/drm/i915/gvt/execlist.h index 7eced40a1e30..427e40e64d41 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.h +++ b/drivers/gpu/drm/i915/gvt/execlist.h @@ -36,10 +36,6 @@ #define _GVT_EXECLIST_H_ struct execlist_ctx_descriptor_format { - union { - u32 udw; - u32 context_id; - }; union { u32 ldw; struct { @@ -54,6 +50,10 @@ struct execlist_ctx_descriptor_format { u32 lrca : 20; }; }; + union { + u32 udw; + u32 context_id; + }; }; struct execlist_status_format { diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index a5bed2e71b92..820d26670541 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1453,7 +1453,7 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, execlist = &vgpu->execlist[ring_id]; - execlist->elsp_dwords.data[execlist->elsp_dwords.index] = data; + execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data; if (execlist->elsp_dwords.index == 3) { ret = intel_vgpu_submit_execlist(vgpu, ring_id); if(ret) From 874b6a910e6cc094629bd2634d14061cf5eb7690 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 20:08:18 +0800 Subject: [PATCH 184/260] drm/i915/gvt: Rename intel_vgpu_{init, clean}_gvt_context() To move workload related functions into scheduler.c, an expected way is to collect all the init/clean functions related to vGPU workload submission into fewer functions. Rename intel_vgpu_{init, clean}_gvt_context() for above usage in future. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 21 +++++++++++++++++++-- drivers/gpu/drm/i915/gvt/scheduler.h | 4 ++-- drivers/gpu/drm/i915/gvt/vgpu.c | 6 +++--- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 42cc61230ca7..5913bcb7b73e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -709,12 +709,29 @@ err: return ret; } -void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu) +/** + * intel_vgpu_clean_submission - free submission-related resource for vGPU + * @vgpu: a vGPU + * + * This function is called when a vGPU is being destroyed. + * + */ +void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) { i915_gem_context_put(vgpu->shadow_ctx); } -int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu) +/** + * intel_vgpu_setup_submission - setup submission-related resource for vGPU + * @vgpu: a vGPU + * + * This function is called when a vGPU is being created. + * + * Returns: + * Zero on success, negative error code if failed. + * + */ +int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) { atomic_set(&vgpu->running_workload_num, 0); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 2d694f6c0907..c216aefaa73e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -137,9 +137,9 @@ void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt); void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu); -int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu); +int intel_vgpu_setup_submission(struct intel_vgpu *vgpu); -void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu); +void intel_vgpu_clean_submission(struct intel_vgpu *vgpu); void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx); #endif diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 02c61a1ad56a..3d69871d28e9 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -254,7 +254,7 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) idr_remove(&gvt->vgpu_idr, vgpu->id); intel_vgpu_clean_sched_policy(vgpu); - intel_vgpu_clean_gvt_context(vgpu); + intel_vgpu_clean_submission(vgpu); intel_vgpu_clean_execlist(vgpu); intel_vgpu_clean_display(vgpu); intel_vgpu_clean_opregion(vgpu); @@ -376,7 +376,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_display; - ret = intel_vgpu_init_gvt_context(vgpu); + ret = intel_vgpu_setup_submission(vgpu); if (ret) goto out_clean_execlist; @@ -389,7 +389,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, return vgpu; out_clean_shadow_ctx: - intel_vgpu_clean_gvt_context(vgpu); + intel_vgpu_clean_submission(vgpu); out_clean_execlist: intel_vgpu_clean_execlist(vgpu); out_clean_display: From 9a9829e9eb8bc4b4e870ce15a8904a32991608d5 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 20:28:09 +0800 Subject: [PATCH 185/260] drm/i915/gvt: Move workload cache init/clean into intel_vgpu_{setup, clean}_submission() Move vGPU workload cache initialization/de-initialization into intel_vgpu_{setup, clean}_submission() since they are not specific to execlist stuffs. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 15 +-------------- drivers/gpu/drm/i915/gvt/scheduler.c | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 9f0207205117..0ee40a5ee192 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -856,14 +856,12 @@ void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu) struct intel_engine_cs *engine; clean_workloads(vgpu, ALL_ENGINES); - kmem_cache_destroy(vgpu->workloads); for_each_engine(engine, vgpu->gvt->dev_priv, i) { kfree(vgpu->reserve_ring_buffer_va[i]); vgpu->reserve_ring_buffer_va[i] = NULL; vgpu->reserve_ring_buffer_size[i] = 0; } - } #define RESERVE_RING_BUFFER_SIZE ((1 * PAGE_SIZE)/8) @@ -872,19 +870,8 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) enum intel_engine_id i; struct intel_engine_cs *engine; - /* each ring has a virtual execlist engine */ - for_each_engine(engine, vgpu->gvt->dev_priv, i) { + for_each_engine(engine, vgpu->gvt->dev_priv, i) init_vgpu_execlist(vgpu, i); - INIT_LIST_HEAD(&vgpu->workload_q_head[i]); - } - - vgpu->workloads = kmem_cache_create("gvt-g_vgpu_workload", - sizeof(struct intel_vgpu_workload), 0, - SLAB_HWCACHE_ALIGN, - NULL); - - if (!vgpu->workloads) - return -ENOMEM; /* each ring has a shadow ring buffer until vgpu destroyed */ for_each_engine(engine, vgpu->gvt->dev_priv, i) { diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 5913bcb7b73e..81952139b00c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -719,6 +719,7 @@ err: void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) { i915_gem_context_put(vgpu->shadow_ctx); + kmem_cache_destroy(vgpu->workloads); } /** @@ -733,7 +734,9 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) */ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) { - atomic_set(&vgpu->running_workload_num, 0); + enum intel_engine_id i; + struct intel_engine_cs *engine; + int ret; vgpu->shadow_ctx = i915_gem_context_create_gvt( &vgpu->gvt->dev_priv->drm); @@ -742,5 +745,24 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES); + vgpu->workloads = kmem_cache_create("gvt-g_vgpu_workload", + sizeof(struct intel_vgpu_workload), 0, + SLAB_HWCACHE_ALIGN, + NULL); + + if (!vgpu->workloads) { + ret = -ENOMEM; + goto out_shadow_ctx; + } + + for_each_engine(engine, vgpu->gvt->dev_priv, i) + INIT_LIST_HEAD(&vgpu->workload_q_head[i]); + + atomic_set(&vgpu->running_workload_num, 0); + return 0; + +out_shadow_ctx: + i915_gem_context_put(vgpu->shadow_ctx); + return ret; } From 1406a14b0ed977fc18f43398b391e4bb5d744174 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 21:15:18 +0800 Subject: [PATCH 186/260] drm/i915/gvt: Introduce intel_vgpu_submission Introduce intel_vgpu_submission to hold all members related to submission in struct intel_vgpu before. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 30 ++++++++----- drivers/gpu/drm/i915/gvt/gvt.h | 17 +++++--- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- drivers/gpu/drm/i915/gvt/kvmgt.c | 2 +- drivers/gpu/drm/i915/gvt/render.c | 9 ++-- drivers/gpu/drm/i915/gvt/scheduler.c | 65 +++++++++++++++------------- drivers/gpu/drm/i915/gvt/scheduler.h | 2 +- drivers/gpu/drm/i915/gvt/vgpu.c | 4 +- 8 files changed, 75 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 0ee40a5ee192..f118454d2eab 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -362,7 +362,7 @@ static void free_workload(struct intel_vgpu_workload *workload) { intel_vgpu_unpin_mm(workload->shadow_mm); intel_gvt_mm_unreference(workload->shadow_mm); - kmem_cache_free(workload->vgpu->workloads, workload); + kmem_cache_free(workload->vgpu->submission.workloads, workload); } #define get_desc_from_elsp_dwords(ed, i) \ @@ -401,7 +401,8 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) struct intel_vgpu_workload, wa_ctx); int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; + struct intel_vgpu_submission *s = &workload->vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_gem_object *ctx_obj = shadow_ctx->engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; @@ -474,6 +475,7 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) static int prepare_execlist_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; struct execlist_ctx_descriptor_format ctx[2]; int ring_id = workload->ring_id; int ret; @@ -514,7 +516,7 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload) ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0); ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1); - ret = emulate_execlist_schedule_in(&vgpu->execlist[ring_id], ctx); + ret = emulate_execlist_schedule_in(&s->execlist[ring_id], ctx); if (!ret) goto out; else @@ -533,7 +535,8 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; int ring_id = workload->ring_id; - struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id]; + struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_vgpu_execlist *execlist = &s->execlist[ring_id]; struct intel_vgpu_workload *next_workload; struct list_head *next = workload_q_head(vgpu, ring_id)->next; bool lite_restore = false; @@ -652,6 +655,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, struct execlist_ctx_descriptor_format *desc, bool emulate_schedule_in) { + struct intel_vgpu_submission *s = &vgpu->submission; struct list_head *q = workload_q_head(vgpu, ring_id); struct intel_vgpu_workload *last_workload = get_last_workload(q); struct intel_vgpu_workload *workload = NULL; @@ -689,7 +693,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, gvt_dbg_el("ring id %d begin a new workload\n", ring_id); - workload = kmem_cache_zalloc(vgpu->workloads, GFP_KERNEL); + workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL); if (!workload) return -ENOMEM; @@ -738,7 +742,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, } if (emulate_schedule_in) - workload->elsp_dwords = vgpu->execlist[ring_id].elsp_dwords; + workload->elsp_dwords = s->execlist[ring_id].elsp_dwords; gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n", workload, ring_id, head, tail, start, ctl); @@ -748,7 +752,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, ret = prepare_mm(workload); if (ret) { - kmem_cache_free(vgpu->workloads, workload); + kmem_cache_free(s->workloads, workload); return ret; } @@ -769,7 +773,8 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) { - struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id]; + struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_vgpu_execlist *execlist = &s->execlist[ring_id]; struct execlist_ctx_descriptor_format *desc[2]; int i, ret; @@ -811,7 +816,8 @@ inv_desc: static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) { - struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id]; + struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_vgpu_execlist *execlist = &s->execlist[ring_id]; struct execlist_context_status_pointer_format ctx_status_ptr; u32 ctx_status_ptr_reg; @@ -833,6 +839,7 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) { + struct intel_vgpu_submission *s = &vgpu->submission; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_engine_cs *engine; struct intel_vgpu_workload *pos, *n; @@ -841,12 +848,11 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) /* free the unsubmited workloads in the queues. */ for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { list_for_each_entry_safe(pos, n, - &vgpu->workload_q_head[engine->id], list) { + &s->workload_q_head[engine->id], list) { list_del_init(&pos->list); free_workload(pos); } - - clear_bit(engine->id, vgpu->shadow_ctx_desc_updated); + clear_bit(engine->id, s->shadow_ctx_desc_updated); } } diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 9c2e7c0aa38f..c3f84f26090a 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -142,6 +142,15 @@ struct vgpu_sched_ctl { int weight; }; +struct intel_vgpu_submission { + struct intel_vgpu_execlist execlist[I915_NUM_ENGINES]; + struct list_head workload_q_head[I915_NUM_ENGINES]; + struct kmem_cache *workloads; + atomic_t running_workload_num; + struct i915_gem_context *shadow_ctx; + DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES); +}; + struct intel_vgpu { struct intel_gvt *gvt; int id; @@ -161,16 +170,12 @@ struct intel_vgpu { struct intel_vgpu_gtt gtt; struct intel_vgpu_opregion opregion; struct intel_vgpu_display display; - struct intel_vgpu_execlist execlist[I915_NUM_ENGINES]; - struct list_head workload_q_head[I915_NUM_ENGINES]; - struct kmem_cache *workloads; - atomic_t running_workload_num; + struct intel_vgpu_submission submission; /* 1/2K for each reserve ring buffer */ void *reserve_ring_buffer_va[I915_NUM_ENGINES]; int reserve_ring_buffer_size[I915_NUM_ENGINES]; DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); - struct i915_gem_context *shadow_ctx; - DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES); + #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) struct { diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 820d26670541..00893532394a 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1451,7 +1451,7 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, if (WARN_ON(ring_id < 0 || ring_id > I915_NUM_ENGINES - 1)) return -EINVAL; - execlist = &vgpu->execlist[ring_id]; + execlist = &vgpu->submission.execlist[ring_id]; execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data; if (execlist->elsp_dwords.index == 3) { diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 96060920a6fe..fc6878bb2496 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1188,7 +1188,7 @@ hw_id_show(struct device *dev, struct device_attribute *attr, struct intel_vgpu *vgpu = (struct intel_vgpu *) mdev_get_drvdata(mdev); return sprintf(buf, "%u\n", - vgpu->shadow_ctx->hw_id); + vgpu->submission.shadow_ctx->hw_id); } return sprintf(buf, "\n"); } diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 6d066cf35478..db4d091be60b 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -261,14 +261,15 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct render_mmio *mmio; - u32 v; - int i, array_size; - u32 *reg_state = vgpu->shadow_ctx->engine[ring_id].lrc_reg_state; + struct intel_vgpu_submission *s = &vgpu->submission; + u32 *reg_state = s->shadow_ctx->engine[ring_id].lrc_reg_state; u32 ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL]; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); i915_reg_t last_reg = _MMIO(0); + struct render_mmio *mmio; + u32 v; + int i, array_size; if (IS_SKYLAKE(vgpu->gvt->dev_priv) || IS_KABYLAKE(vgpu->gvt->dev_priv)) { diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 81952139b00c..864a2bc06e45 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -57,7 +57,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; + struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = shadow_ctx->engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; @@ -249,12 +249,13 @@ void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) */ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) { + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; - struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; struct drm_i915_gem_request *rq; - struct intel_vgpu *vgpu = workload->vgpu; struct intel_ring *ring; int ret; @@ -267,7 +268,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - if (!test_and_set_bit(ring_id, vgpu->shadow_ctx_desc_updated)) + if (!test_and_set_bit(ring_id, s->shadow_ctx_desc_updated)) shadow_context_descriptor_update(shadow_ctx, dev_priv->engine[ring_id]); @@ -326,9 +327,11 @@ err_scan: static int dispatch_workload(struct intel_vgpu_workload *workload) { + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; - struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; int ret = 0; @@ -414,7 +417,7 @@ static struct intel_vgpu_workload *pick_next_workload( gvt_dbg_sched("ring id %d pick new workload %p\n", ring_id, workload); - atomic_inc(&workload->vgpu->running_workload_num); + atomic_inc(&workload->vgpu->submission.running_workload_num); out: mutex_unlock(&gvt->lock); return workload; @@ -424,8 +427,9 @@ static void update_guest_context(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_gem_object *ctx_obj = shadow_ctx->engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; @@ -491,15 +495,14 @@ static void update_guest_context(struct intel_vgpu_workload *workload) static void complete_current_workload(struct intel_gvt *gvt, int ring_id) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - struct intel_vgpu_workload *workload; - struct intel_vgpu *vgpu; + struct intel_vgpu_workload *workload = + scheduler->current_workload[ring_id]; + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; int event; mutex_lock(&gvt->lock); - workload = scheduler->current_workload[ring_id]; - vgpu = workload->vgpu; - /* For the workload w/ request, needs to wait for the context * switch to make sure request is completed. * For the workload w/o request, directly complete the workload. @@ -536,7 +539,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) } mutex_lock(&dev_priv->drm.struct_mutex); /* unpin shadow ctx as the shadow_ctx update is done */ - engine->context_unpin(engine, workload->vgpu->shadow_ctx); + engine->context_unpin(engine, s->shadow_ctx); mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -548,7 +551,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) list_del_init(&workload->list); workload->complete(workload); - atomic_dec(&vgpu->running_workload_num); + atomic_dec(&s->running_workload_num); wake_up(&scheduler->workload_complete_wq); if (gvt->scheduler.need_reschedule) @@ -637,14 +640,15 @@ complete: void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu) { + struct intel_vgpu_submission *s = &vgpu->submission; struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - if (atomic_read(&vgpu->running_workload_num)) { + if (atomic_read(&s->running_workload_num)) { gvt_dbg_sched("wait vgpu idle\n"); wait_event(scheduler->workload_complete_wq, - !atomic_read(&vgpu->running_workload_num)); + !atomic_read(&s->running_workload_num)); } } @@ -718,8 +722,10 @@ err: */ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) { - i915_gem_context_put(vgpu->shadow_ctx); - kmem_cache_destroy(vgpu->workloads); + struct intel_vgpu_submission *s = &vgpu->submission; + + i915_gem_context_put(s->shadow_ctx); + kmem_cache_destroy(s->workloads); } /** @@ -734,35 +740,36 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) */ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) { + struct intel_vgpu_submission *s = &vgpu->submission; enum intel_engine_id i; struct intel_engine_cs *engine; int ret; - vgpu->shadow_ctx = i915_gem_context_create_gvt( + s->shadow_ctx = i915_gem_context_create_gvt( &vgpu->gvt->dev_priv->drm); - if (IS_ERR(vgpu->shadow_ctx)) - return PTR_ERR(vgpu->shadow_ctx); + if (IS_ERR(s->shadow_ctx)) + return PTR_ERR(s->shadow_ctx); - bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES); + bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); - vgpu->workloads = kmem_cache_create("gvt-g_vgpu_workload", + s->workloads = kmem_cache_create("gvt-g_vgpu_workload", sizeof(struct intel_vgpu_workload), 0, SLAB_HWCACHE_ALIGN, NULL); - if (!vgpu->workloads) { + if (!s->workloads) { ret = -ENOMEM; goto out_shadow_ctx; } for_each_engine(engine, vgpu->gvt->dev_priv, i) - INIT_LIST_HEAD(&vgpu->workload_q_head[i]); + INIT_LIST_HEAD(&s->workload_q_head[i]); - atomic_set(&vgpu->running_workload_num, 0); + atomic_set(&s->running_workload_num, 0); return 0; out_shadow_ctx: - i915_gem_context_put(vgpu->shadow_ctx); + i915_gem_context_put(s->shadow_ctx); return ret; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index c216aefaa73e..3ca0087f10b2 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -122,7 +122,7 @@ struct intel_shadow_bb_entry { }; #define workload_q_head(vgpu, ring_id) \ - (&(vgpu->workload_q_head[ring_id])) + (&(vgpu->submission.workload_q_head[ring_id])) #define queue_workload(workload) do { \ list_add_tail(&workload->list, \ diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 3d69871d28e9..35a5ec201206 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -226,7 +226,7 @@ void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu) vgpu->active = false; - if (atomic_read(&vgpu->running_workload_num)) { + if (atomic_read(&vgpu->submission.running_workload_num)) { mutex_unlock(&gvt->lock); intel_gvt_wait_vgpu_idle(vgpu); mutex_lock(&gvt->lock); @@ -293,7 +293,7 @@ struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt) vgpu->gvt = gvt; for (i = 0; i < I915_NUM_ENGINES; i++) - INIT_LIST_HEAD(&vgpu->workload_q_head[i]); + INIT_LIST_HEAD(&vgpu->submission.workload_q_head[i]); ret = intel_vgpu_init_sched_policy(vgpu); if (ret) From 91d5d85442b2a65e5f4e1726565c1c1a8ba9976f Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 21:33:20 +0800 Subject: [PATCH 187/260] drm/i915/gvt: Move tlb_handle_pending into intel_vgpu_submission Move tlb_handle_pending into intel_vgpu_submssion since it belongs to a part of vGPU submission stuffs Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 2 +- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- drivers/gpu/drm/i915/gvt/render.c | 3 ++- drivers/gpu/drm/i915/gvt/scheduler.c | 1 + drivers/gpu/drm/i915/gvt/vgpu.c | 1 - 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index c3f84f26090a..93ff530eee30 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -149,6 +149,7 @@ struct intel_vgpu_submission { atomic_t running_workload_num; struct i915_gem_context *shadow_ctx; DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES); + DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); }; struct intel_vgpu { @@ -174,7 +175,6 @@ struct intel_vgpu { /* 1/2K for each reserve ring buffer */ void *reserve_ring_buffer_va[I915_NUM_ENGINES]; int reserve_ring_buffer_size[I915_NUM_ENGINES]; - DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 00893532394a..c78e45058219 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1526,7 +1526,7 @@ static int gvt_reg_tlb_control_handler(struct intel_vgpu *vgpu, default: return -EINVAL; } - set_bit(id, (void *)vgpu->tlb_handle_pending); + set_bit(id, (void *)vgpu->submission.tlb_handle_pending); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index db4d091be60b..e16c3551b4a3 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -147,6 +147,7 @@ static u32 gen9_render_mocs_L3[32]; static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_vgpu_submission *s = &vgpu->submission; enum forcewake_domains fw; i915_reg_t reg; u32 regs[] = { @@ -160,7 +161,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; - if (!test_and_clear_bit(ring_id, (void *)vgpu->tlb_handle_pending)) + if (!test_and_clear_bit(ring_id, (void *)s->tlb_handle_pending)) return; reg = _MMIO(regs[ring_id]); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 864a2bc06e45..7cb1cf4223ed 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -766,6 +766,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) INIT_LIST_HEAD(&s->workload_q_head[i]); atomic_set(&s->running_workload_num, 0); + bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); return 0; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 35a5ec201206..1c9818d9f1d8 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -346,7 +346,6 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, vgpu->handle = param->handle; vgpu->gvt = gvt; vgpu->sched_ctl.weight = param->weight; - bitmap_zero(vgpu->tlb_handle_pending, I915_NUM_ENGINES); intel_vgpu_init_cfg_space(vgpu, param->primary); From bf4097ea5762bd90d836df9904594eb4822fefa7 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 21:36:21 +0800 Subject: [PATCH 188/260] drm/i915/gvt: Fix a memory leak in cmd_parser.c The pointer points to the original memory can never take the return value of krealloc(). Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 701a3c6f1669..9405a214760c 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2620,14 +2620,16 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_top = workload->rb_start + guest_rb_size; if (workload->rb_len > vgpu->reserve_ring_buffer_size[ring_id]) { - void *va = vgpu->reserve_ring_buffer_va[ring_id]; + void *va, *p; + /* realloc the new ring buffer if needed */ - vgpu->reserve_ring_buffer_va[ring_id] = - krealloc(va, workload->rb_len, GFP_KERNEL); - if (!vgpu->reserve_ring_buffer_va[ring_id]) { + va = vgpu->reserve_ring_buffer_va[ring_id]; + p = krealloc(va, workload->rb_len, GFP_KERNEL); + if (!p) { gvt_vgpu_err("fail to alloc reserve ring buffer\n"); return -ENOMEM; } + vgpu->reserve_ring_buffer_va[ring_id] = p; vgpu->reserve_ring_buffer_size[ring_id] = workload->rb_len; } From 8cf80a2e4b313d1aba7d10ce6ebfbb44a119c66c Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 21:46:06 +0800 Subject: [PATCH 189/260] drm/i915/gvt: Rename reserved ring buffer "reserved" means reserve something from somewhere. Actually they are buffers used by command scanner. Rename it to ring_scan_buffer. v2: - Remove the usage of an extra variable. (Zhenyu) Fixes: 0a53bc07f044 ("drm/i915/gvt: Separate cmd scan from request allocation") Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 16 ++++++++-------- drivers/gpu/drm/i915/gvt/execlist.c | 22 +++++++++++----------- drivers/gpu/drm/i915/gvt/gvt.h | 6 +++--- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 9405a214760c..a2b970fb3c6f 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2619,21 +2619,21 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_tail = workload->rb_start + workload->rb_tail; gma_top = workload->rb_start + guest_rb_size; - if (workload->rb_len > vgpu->reserve_ring_buffer_size[ring_id]) { - void *va, *p; + if (workload->rb_len > vgpu->ring_scan_buffer_size[ring_id]) { + void *p; /* realloc the new ring buffer if needed */ - va = vgpu->reserve_ring_buffer_va[ring_id]; - p = krealloc(va, workload->rb_len, GFP_KERNEL); + p = krealloc(vgpu->ring_scan_buffer[ring_id], workload->rb_len, + GFP_KERNEL); if (!p) { - gvt_vgpu_err("fail to alloc reserve ring buffer\n"); + gvt_vgpu_err("fail to re-alloc ring scan buffer\n"); return -ENOMEM; } - vgpu->reserve_ring_buffer_va[ring_id] = p; - vgpu->reserve_ring_buffer_size[ring_id] = workload->rb_len; + vgpu->ring_scan_buffer[ring_id] = p; + vgpu->ring_scan_buffer_size[ring_id] = workload->rb_len; } - shadow_ring_buffer_va = vgpu->reserve_ring_buffer_va[ring_id]; + shadow_ring_buffer_va = vgpu->ring_scan_buffer[ring_id]; /* get shadow ring buffer va */ workload->shadow_ring_buffer_va = shadow_ring_buffer_va; diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index f118454d2eab..5e4f35f9e290 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -864,9 +864,9 @@ void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu) clean_workloads(vgpu, ALL_ENGINES); for_each_engine(engine, vgpu->gvt->dev_priv, i) { - kfree(vgpu->reserve_ring_buffer_va[i]); - vgpu->reserve_ring_buffer_va[i] = NULL; - vgpu->reserve_ring_buffer_size[i] = 0; + kfree(vgpu->ring_scan_buffer[i]); + vgpu->ring_scan_buffer[i] = NULL; + vgpu->ring_scan_buffer_size[i] = 0; } } @@ -881,21 +881,21 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) /* each ring has a shadow ring buffer until vgpu destroyed */ for_each_engine(engine, vgpu->gvt->dev_priv, i) { - vgpu->reserve_ring_buffer_va[i] = + vgpu->ring_scan_buffer[i] = kmalloc(RESERVE_RING_BUFFER_SIZE, GFP_KERNEL); - if (!vgpu->reserve_ring_buffer_va[i]) { - gvt_vgpu_err("fail to alloc reserve ring buffer\n"); + if (!vgpu->ring_scan_buffer[i]) { + gvt_vgpu_err("fail to alloc ring scan buffer\n"); goto out; } - vgpu->reserve_ring_buffer_size[i] = RESERVE_RING_BUFFER_SIZE; + vgpu->ring_scan_buffer_size[i] = RESERVE_RING_BUFFER_SIZE; } return 0; out: for_each_engine(engine, vgpu->gvt->dev_priv, i) { - if (vgpu->reserve_ring_buffer_size[i]) { - kfree(vgpu->reserve_ring_buffer_va[i]); - vgpu->reserve_ring_buffer_va[i] = NULL; - vgpu->reserve_ring_buffer_size[i] = 0; + if (vgpu->ring_scan_buffer_size[i]) { + kfree(vgpu->ring_scan_buffer[i]); + vgpu->ring_scan_buffer[i] = NULL; + vgpu->ring_scan_buffer_size[i] = 0; } } return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 93ff530eee30..5b723fa00fcb 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -172,9 +172,9 @@ struct intel_vgpu { struct intel_vgpu_opregion opregion; struct intel_vgpu_display display; struct intel_vgpu_submission submission; - /* 1/2K for each reserve ring buffer */ - void *reserve_ring_buffer_va[I915_NUM_ENGINES]; - int reserve_ring_buffer_size[I915_NUM_ENGINES]; + /* 1/2K for each engine */ + void *ring_scan_buffer[I915_NUM_ENGINES]; + int ring_scan_buffer_size[I915_NUM_ENGINES]; #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) From 325eb94a33451db138e8da7393ad8e9a0e22dd18 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 21:58:11 +0800 Subject: [PATCH 190/260] drm/i915/gvt: Move ring scan buffers into intel_vgpu_submission Move ring scan buffers into intel_vgpu_submission since they belongs to a part of vGPU submission stuffs. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 11 ++++++----- drivers/gpu/drm/i915/gvt/execlist.c | 23 +++++++++++++---------- drivers/gpu/drm/i915/gvt/gvt.h | 7 +++---- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index a2b970fb3c6f..5b033c950a31 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2604,6 +2604,7 @@ out: static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; unsigned long gma_head, gma_tail, gma_top, guest_rb_size; void *shadow_ring_buffer_va; int ring_id = workload->ring_id; @@ -2619,21 +2620,21 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_tail = workload->rb_start + workload->rb_tail; gma_top = workload->rb_start + guest_rb_size; - if (workload->rb_len > vgpu->ring_scan_buffer_size[ring_id]) { + if (workload->rb_len > s->ring_scan_buffer_size[ring_id]) { void *p; /* realloc the new ring buffer if needed */ - p = krealloc(vgpu->ring_scan_buffer[ring_id], workload->rb_len, + p = krealloc(s->ring_scan_buffer[ring_id], workload->rb_len, GFP_KERNEL); if (!p) { gvt_vgpu_err("fail to re-alloc ring scan buffer\n"); return -ENOMEM; } - vgpu->ring_scan_buffer[ring_id] = p; - vgpu->ring_scan_buffer_size[ring_id] = workload->rb_len; + s->ring_scan_buffer[ring_id] = p; + s->ring_scan_buffer_size[ring_id] = workload->rb_len; } - shadow_ring_buffer_va = vgpu->ring_scan_buffer[ring_id]; + shadow_ring_buffer_va = s->ring_scan_buffer[ring_id]; /* get shadow ring buffer va */ workload->shadow_ring_buffer_va = shadow_ring_buffer_va; diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 5e4f35f9e290..7c7ab63a7bb8 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -864,15 +864,18 @@ void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu) clean_workloads(vgpu, ALL_ENGINES); for_each_engine(engine, vgpu->gvt->dev_priv, i) { - kfree(vgpu->ring_scan_buffer[i]); - vgpu->ring_scan_buffer[i] = NULL; - vgpu->ring_scan_buffer_size[i] = 0; + struct intel_vgpu_submission *s = &vgpu->submission; + + kfree(s->ring_scan_buffer[i]); + s->ring_scan_buffer[i] = NULL; + s->ring_scan_buffer_size[i] = 0; } } #define RESERVE_RING_BUFFER_SIZE ((1 * PAGE_SIZE)/8) int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) { + struct intel_vgpu_submission *s = &vgpu->submission; enum intel_engine_id i; struct intel_engine_cs *engine; @@ -881,21 +884,21 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) /* each ring has a shadow ring buffer until vgpu destroyed */ for_each_engine(engine, vgpu->gvt->dev_priv, i) { - vgpu->ring_scan_buffer[i] = + s->ring_scan_buffer[i] = kmalloc(RESERVE_RING_BUFFER_SIZE, GFP_KERNEL); - if (!vgpu->ring_scan_buffer[i]) { + if (!s->ring_scan_buffer[i]) { gvt_vgpu_err("fail to alloc ring scan buffer\n"); goto out; } - vgpu->ring_scan_buffer_size[i] = RESERVE_RING_BUFFER_SIZE; + s->ring_scan_buffer_size[i] = RESERVE_RING_BUFFER_SIZE; } return 0; out: for_each_engine(engine, vgpu->gvt->dev_priv, i) { - if (vgpu->ring_scan_buffer_size[i]) { - kfree(vgpu->ring_scan_buffer[i]); - vgpu->ring_scan_buffer[i] = NULL; - vgpu->ring_scan_buffer_size[i] = 0; + if (s->ring_scan_buffer_size[i]) { + kfree(s->ring_scan_buffer[i]); + s->ring_scan_buffer[i] = NULL; + s->ring_scan_buffer_size[i] = 0; } } return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 5b723fa00fcb..49fe54870000 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -150,6 +150,9 @@ struct intel_vgpu_submission { struct i915_gem_context *shadow_ctx; DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES); DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); + /* 1/2K for each engine */ + void *ring_scan_buffer[I915_NUM_ENGINES]; + int ring_scan_buffer_size[I915_NUM_ENGINES]; }; struct intel_vgpu { @@ -172,10 +175,6 @@ struct intel_vgpu { struct intel_vgpu_opregion opregion; struct intel_vgpu_display display; struct intel_vgpu_submission submission; - /* 1/2K for each engine */ - void *ring_scan_buffer[I915_NUM_ENGINES]; - int ring_scan_buffer_size[I915_NUM_ENGINES]; - #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) struct { From 8652a8aca614cc2576cf5d4c73d3b87e871ce004 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 10 Sep 2017 22:01:10 +0800 Subject: [PATCH 191/260] drm/i915/gvt: Do not allocate initial ring scan buffer Theoretically, the largest bulk of commands in the ring buffer of an engine might be the first submission, which usually contains a lot of commands to initialize the HW. After removing the initial allocation of the ring scan buffer and let krealloc() do everything we need, we still have a big chance to get the buffer of suitable size in the first submission. Tested on my SKL NUC. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 21 --------------------- drivers/gpu/drm/i915/gvt/gvt.h | 1 - 2 files changed, 22 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 7c7ab63a7bb8..adf7668f8cc1 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -872,36 +872,15 @@ void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu) } } -#define RESERVE_RING_BUFFER_SIZE ((1 * PAGE_SIZE)/8) int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) { - struct intel_vgpu_submission *s = &vgpu->submission; enum intel_engine_id i; struct intel_engine_cs *engine; for_each_engine(engine, vgpu->gvt->dev_priv, i) init_vgpu_execlist(vgpu, i); - /* each ring has a shadow ring buffer until vgpu destroyed */ - for_each_engine(engine, vgpu->gvt->dev_priv, i) { - s->ring_scan_buffer[i] = - kmalloc(RESERVE_RING_BUFFER_SIZE, GFP_KERNEL); - if (!s->ring_scan_buffer[i]) { - gvt_vgpu_err("fail to alloc ring scan buffer\n"); - goto out; - } - s->ring_scan_buffer_size[i] = RESERVE_RING_BUFFER_SIZE; - } return 0; -out: - for_each_engine(engine, vgpu->gvt->dev_priv, i) { - if (s->ring_scan_buffer_size[i]) { - kfree(s->ring_scan_buffer[i]); - s->ring_scan_buffer[i] = NULL; - s->ring_scan_buffer_size[i] = 0; - } - } - return -ENOMEM; } void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu, diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 49fe54870000..7a770b1c99d6 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -150,7 +150,6 @@ struct intel_vgpu_submission { struct i915_gem_context *shadow_ctx; DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES); DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); - /* 1/2K for each engine */ void *ring_scan_buffer[I915_NUM_ENGINES]; int ring_scan_buffer_size[I915_NUM_ENGINES]; }; From 5c56883a9531cd89561fb9a11a33697f2847c82a Mon Sep 17 00:00:00 2001 From: fred gao Date: Wed, 20 Sep 2017 05:36:47 +0800 Subject: [PATCH 192/260] drm/i915/gvt: Change the return type during command scan Generally, there are 3 types of errors during command scan: a) some commands might be unknown with EBADRQC; b) some cmd access invalid address with EFAULT; c) some unexpected force nonpriv cmd with EPERM. later the healthy state can be judged through the return error. v2: - remove some internal i915 errors rating. (Zhenyu) v3: - the healthy state is judged through the internal defined return error. (Zhenyu) - force non priv cmd error can be ignored. (Kevin) v4: - reuse standard defined errno instead of recreate, e.g EBADRQC for unknown cmd, EFAULT for invalid address, EPERM for nonpriv. (Zhenyu) v5: - remove some irrelevant code for the patch. - fix typo of vgpu_is_vm_unhealthy. (Zhenyu) v6: - move the healthy check and failsafe code into another patch. (Zhenyu) v7: - polish title and commit message. (Zhenyu) Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 78 ++++++++++++++++++--------- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 5b033c950a31..885cccf708ac 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -825,7 +825,7 @@ static int force_nonpriv_reg_handler(struct parser_exec_state *s, if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) { gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n", offset, data); - return -EINVAL; + return -EPERM; } return 0; } @@ -839,7 +839,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, if (offset + 4 > gvt->device_info.mmio_size) { gvt_vgpu_err("%s access to (%x) outside of MMIO range\n", cmd, offset); - return -EINVAL; + return -EFAULT; } if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) { @@ -854,8 +854,8 @@ static int cmd_reg_handler(struct parser_exec_state *s, } if (is_force_nonpriv_mmio(offset) && - force_nonpriv_reg_handler(s, offset, index)) - return -EINVAL; + force_nonpriv_reg_handler(s, offset, index)) + return -EPERM; if (offset == i915_mmio_reg_offset(DERRMR) || offset == i915_mmio_reg_offset(FORCEWAKE_MT)) { @@ -894,11 +894,14 @@ static int cmd_handler_lri(struct parser_exec_state *s) i915_mmio_reg_offset(DERRMR)) ret |= 0; else - ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0; + ret |= (cmd_reg_inhibit(s, i)) ? + -EBADRQC : 0; } if (ret) break; ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lri"); + if (ret) + break; } return ret; } @@ -912,11 +915,15 @@ static int cmd_handler_lrr(struct parser_exec_state *s) if (IS_BROADWELL(s->vgpu->gvt->dev_priv)) ret |= ((cmd_reg_inhibit(s, i) || (cmd_reg_inhibit(s, i + 1)))) ? - -EINVAL : 0; + -EBADRQC : 0; if (ret) break; ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrr-src"); + if (ret) + break; ret |= cmd_reg_handler(s, cmd_reg(s, i + 1), i, "lrr-dst"); + if (ret) + break; } return ret; } @@ -934,15 +941,19 @@ static int cmd_handler_lrm(struct parser_exec_state *s) for (i = 1; i < cmd_len;) { if (IS_BROADWELL(gvt->dev_priv)) - ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0; + ret |= (cmd_reg_inhibit(s, i)) ? -EBADRQC : 0; if (ret) break; ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrm"); + if (ret) + break; if (cmd_val(s, 0) & (1 << 22)) { gma = cmd_gma(s, i + 1); if (gmadr_bytes == 8) gma |= (cmd_gma_hi(s, i + 2)) << 32; ret |= cmd_address_audit(s, gma, sizeof(u32), false); + if (ret) + break; } i += gmadr_dw_number(s) + 1; } @@ -958,11 +969,15 @@ static int cmd_handler_srm(struct parser_exec_state *s) for (i = 1; i < cmd_len;) { ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "srm"); + if (ret) + break; if (cmd_val(s, 0) & (1 << 22)) { gma = cmd_gma(s, i + 1); if (gmadr_bytes == 8) gma |= (cmd_gma_hi(s, i + 2)) << 32; ret |= cmd_address_audit(s, gma, sizeof(u32), false); + if (ret) + break; } i += gmadr_dw_number(s) + 1; } @@ -1116,7 +1131,7 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s, v = (dword0 & GENMASK(21, 19)) >> 19; if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code))) - return -EINVAL; + return -EBADRQC; info->pipe = gen8_plane_code[v].pipe; info->plane = gen8_plane_code[v].plane; @@ -1136,7 +1151,7 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s, info->surf_reg = SPRSURF(info->pipe); } else { WARN_ON(1); - return -EINVAL; + return -EBADRQC; } return 0; } @@ -1185,7 +1200,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s, default: gvt_vgpu_err("unknown plane code %d\n", plane); - return -EINVAL; + return -EBADRQC; } info->stride_val = (dword1 & GENMASK(15, 6)) >> 6; @@ -1348,10 +1363,13 @@ static unsigned long get_gma_bb_from_cmd(struct parser_exec_state *s, int index) { unsigned long addr; unsigned long gma_high, gma_low; - int gmadr_bytes = s->vgpu->gvt->device_info.gmadr_bytes_in_cmd; + struct intel_vgpu *vgpu = s->vgpu; + int gmadr_bytes = vgpu->gvt->device_info.gmadr_bytes_in_cmd; - if (WARN_ON(gmadr_bytes != 4 && gmadr_bytes != 8)) + if (WARN_ON(gmadr_bytes != 4 && gmadr_bytes != 8)) { + gvt_vgpu_err("invalid gma bytes %d\n", gmadr_bytes); return INTEL_GVT_INVALID_ADDR; + } gma_low = cmd_val(s, index) & BATCH_BUFFER_ADDR_MASK; if (gmadr_bytes == 4) { @@ -1374,16 +1392,16 @@ static inline int cmd_address_audit(struct parser_exec_state *s, if (op_size > max_surface_size) { gvt_vgpu_err("command address audit fail name %s\n", s->info->name); - return -EINVAL; + return -EFAULT; } if (index_mode) { if (guest_gma >= GTT_PAGE_SIZE / sizeof(u64)) { - ret = -EINVAL; + ret = -EFAULT; goto err; } } else if (!intel_gvt_ggtt_validate_range(vgpu, guest_gma, op_size)) { - ret = -EINVAL; + ret = -EFAULT; goto err; } @@ -1439,7 +1457,7 @@ static inline int unexpected_cmd(struct parser_exec_state *s) gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name); - return -EINVAL; + return -EBADRQC; } static int cmd_handler_mi_semaphore_wait(struct parser_exec_state *s) @@ -1588,22 +1606,26 @@ static int find_bb_size(struct parser_exec_state *s) /* get the start gm address of the batch buffer */ gma = get_gma_bb_from_cmd(s, 1); + if (gma == INTEL_GVT_INVALID_ADDR) + return -EFAULT; + cmd = cmd_val(s, 0); info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", cmd, get_opcode(cmd, s->ring_id)); - return -EINVAL; + return -EBADRQC; } do { - copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, - gma, gma + 4, &cmd); + if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, + gma, gma + 4, &cmd) < 0) + return -EFAULT; info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", cmd, get_opcode(cmd, s->ring_id)); - return -EINVAL; + return -EBADRQC; } if (info->opcode == OP_MI_BATCH_BUFFER_END) { @@ -1634,11 +1656,13 @@ static int perform_bb_shadow(struct parser_exec_state *s) /* get the start gm address of the batch buffer */ gma = get_gma_bb_from_cmd(s, 1); + if (gma == INTEL_GVT_INVALID_ADDR) + return -EFAULT; /* get the size of the batch buffer */ bb_size = find_bb_size(s); if (bb_size < 0) - return -EINVAL; + return bb_size; /* allocate shadow batch buffer */ entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL); @@ -1710,13 +1734,13 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s) if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) { gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n"); - return -EINVAL; + return -EFAULT; } second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1; if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) { gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n"); - return -EINVAL; + return -EFAULT; } s->saved_buf_addr_type = s->buf_addr_type; @@ -2430,7 +2454,7 @@ static int cmd_parser_exec(struct parser_exec_state *s) if (info == NULL) { gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", cmd, get_opcode(cmd, s->ring_id)); - return -EINVAL; + return -EBADRQC; } s->info = info; @@ -2465,6 +2489,10 @@ static inline bool gma_out_of_range(unsigned long gma, return (gma > gma_tail) && (gma < gma_head); } +/* Keep the consistent return type, e.g EBADRQC for unknown + * cmd, EFAULT for invalid address, EPERM for nonpriv. later + * works as the input of VM healthy status. + */ static int command_scan(struct parser_exec_state *s, unsigned long rb_head, unsigned long rb_tail, unsigned long rb_start, unsigned long rb_len) @@ -2487,7 +2515,7 @@ static int command_scan(struct parser_exec_state *s, s->ip_gma, rb_start, gma_bottom); parser_exec_state_dump(s); - return -EINVAL; + return -EFAULT; } if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) { gvt_vgpu_err("ip_gma %lx out of range." diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 7cb1cf4223ed..0771b715f825 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -84,7 +84,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) GTT_PAGE_SHIFT)); if (context_gpa == INTEL_GVT_INVALID_ADDR) { gvt_vgpu_err("Invalid guest context descriptor\n"); - return -EINVAL; + return -EFAULT; } page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); From e011c6ce2b4fc7c577ade41485d74431a4e6ea1a Mon Sep 17 00:00:00 2001 From: fred gao Date: Tue, 19 Sep 2017 15:11:28 +0800 Subject: [PATCH 193/260] drm/i915/gvt: Add VM healthy check for workload_thread When a scan error occurs in dispatch_workload, this patch is to check the healthy state and free all the queued workloads before the failsafe mode is entered. Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 6 ++++++ drivers/gpu/drm/i915/gvt/handlers.c | 4 +++- drivers/gpu/drm/i915/gvt/scheduler.c | 7 +++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 7a770b1c99d6..be1bb6dbcbd2 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -193,6 +193,10 @@ struct intel_vgpu { #endif }; +/* validating GM healthy status*/ +#define vgpu_is_vm_unhealthy(ret_val) \ + (((ret_val) == -EBADRQC) || ((ret_val) == -EFAULT)) + struct intel_gvt_gm { unsigned long vgpu_allocated_low_gm_size; unsigned long vgpu_allocated_high_gm_size; @@ -497,6 +501,7 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci); void populate_pvinfo_page(struct intel_vgpu *vgpu); int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload); +void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason); struct intel_gvt_ops { int (*emulate_cfg_read)(struct intel_vgpu *, unsigned int, void *, @@ -519,6 +524,7 @@ struct intel_gvt_ops { enum { GVT_FAILSAFE_UNSUPPORTED_GUEST, GVT_FAILSAFE_INSUFFICIENT_RESOURCE, + GVT_FAILSAFE_GUEST_ERR, }; static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index c78e45058219..acc1cf4fa6f5 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -157,7 +157,7 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg) (num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0))) -static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) +void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) { switch (reason) { case GVT_FAILSAFE_UNSUPPORTED_GUEST: @@ -165,6 +165,8 @@ static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) break; case GVT_FAILSAFE_INSUFFICIENT_RESOURCE: pr_err("Graphics resource is not enough for the guest\n"); + case GVT_FAILSAFE_GUEST_ERR: + pr_err("GVT Internal error for the guest\n"); default: break; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 0771b715f825..02af14023383 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -634,6 +634,13 @@ complete: FORCEWAKE_ALL); intel_runtime_pm_put(gvt->dev_priv); + if (ret && (vgpu_is_vm_unhealthy(ret))) { + mutex_lock(&gvt->lock); + intel_vgpu_clean_execlist(vgpu); + mutex_unlock(&gvt->lock); + enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); + } + } return 0; } From c9214008b52b20ba2a88dad6ba92f1a19c533db6 Mon Sep 17 00:00:00 2001 From: fred gao Date: Tue, 19 Sep 2017 15:11:29 +0800 Subject: [PATCH 194/260] drm/i915/gvt: Add VM healthy check for submit_context When a scan error occurs in submit_context, this patch is to decrease the mm ref count and free the workload struct before the workload is abandoned. v2: - submit_context related code should be combined together. (Zhenyu) v3: - free all the unsubmitted workloads. (Zhenyu) v4: - refine the clean path. (Zhenyu) v5: - polish the title. (Zhenyu) Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index adf7668f8cc1..1347f61fd709 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -360,7 +360,6 @@ static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist, static void free_workload(struct intel_vgpu_workload *workload) { - intel_vgpu_unpin_mm(workload->shadow_mm); intel_gvt_mm_unreference(workload->shadow_mm); kmem_cache_free(workload->vgpu->submission.workloads, workload); } @@ -540,7 +539,7 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) struct intel_vgpu_workload *next_workload; struct list_head *next = workload_q_head(vgpu, ring_id)->next; bool lite_restore = false; - int ret; + int ret = 0; gvt_dbg_el("complete workload %p status %d\n", workload, workload->status); @@ -581,17 +580,12 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) if (lite_restore) { gvt_dbg_el("next context == current - no schedule-out\n"); - free_workload(workload); - return 0; + goto out; } ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc); - if (ret) - goto err; out: - free_workload(workload); - return 0; -err: + intel_vgpu_unpin_mm(workload->shadow_mm); free_workload(workload); return ret; } @@ -762,13 +756,22 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, if (list_empty(workload_q_head(vgpu, ring_id))) { intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->drm.struct_mutex); - intel_gvt_scan_and_shadow_workload(workload); + ret = intel_gvt_scan_and_shadow_workload(workload); mutex_unlock(&dev_priv->drm.struct_mutex); intel_runtime_pm_put(dev_priv); } - queue_workload(workload); - return 0; + if (ret == 0) + queue_workload(workload); + else { + free_workload(workload); + if (vgpu_is_vm_unhealthy(ret)) { + intel_vgpu_clean_execlist(vgpu); + enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); + } + } + return ret; + } int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) From 24f8a29af4afe7c53e08f4afa0c3fa9eb3791b89 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Sep 2017 16:55:34 +0100 Subject: [PATCH 195/260] drm/i915/gvt: ensure -ve return value is handled correctly An earlier fix changed the return type from find_bb_size however the integer return is being assigned to a unsigned int so the -ve error check will never be detected. Make bb_size an int to fix this. Detected by CoverityScan CID#1456886 ("Unsigned compared against 0") Fixes: 1e3197d6ad73 ("drm/i915/gvt: Refine error handling for perform_bb_shadow") Signed-off-by: Colin Ian King Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 885cccf708ac..cd2681a23fcd 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1650,7 +1650,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) struct intel_shadow_bb_entry *entry_obj; struct intel_vgpu *vgpu = s->vgpu; unsigned long gma = 0; - uint32_t bb_size; + int bb_size; void *dst = NULL; int ret = 0; From a34e8def4d9152123582a37a3660c2092e37ceed Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Thu, 21 Sep 2017 08:02:31 +0800 Subject: [PATCH 196/260] drm/i915/gvt: Use dyndbg for gvt debug info It's better enable/disable and classify gvt debug info dynamically. This patch change it to dyndbg so can be dynamically enable/disable each item. All gvt log can be enabled by, $ echo 'file *gvt* +p' > /sys/kernel/debug/dynamic_debug/control Signed-off-by: Shuo Liu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/debug.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h index b0cff4dc2684..d98d5a0b928c 100644 --- a/drivers/gpu/drm/i915/gvt/debug.h +++ b/drivers/gpu/drm/i915/gvt/debug.h @@ -25,41 +25,41 @@ #define __GVT_DEBUG_H__ #define gvt_err(fmt, args...) \ - DRM_ERROR("gvt: "fmt, ##args) + pr_err("gvt: "fmt, ##args) #define gvt_vgpu_err(fmt, args...) \ do { \ if (IS_ERR_OR_NULL(vgpu)) \ - DRM_DEBUG_DRIVER("gvt: "fmt, ##args); \ + pr_debug("gvt: "fmt, ##args); \ else \ - DRM_DEBUG_DRIVER("gvt: vgpu %d: "fmt, vgpu->id, ##args);\ + pr_debug("gvt: vgpu %d: "fmt, vgpu->id, ##args);\ } while (0) #define gvt_dbg_core(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: core: "fmt, ##args) + pr_debug("gvt: core: "fmt, ##args) #define gvt_dbg_irq(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: irq: "fmt, ##args) + pr_debug("gvt: irq: "fmt, ##args) #define gvt_dbg_mm(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: mm: "fmt, ##args) + pr_debug("gvt: mm: "fmt, ##args) #define gvt_dbg_mmio(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: mmio: "fmt, ##args) + pr_debug("gvt: mmio: "fmt, ##args) #define gvt_dbg_dpy(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: dpy: "fmt, ##args) + pr_debug("gvt: dpy: "fmt, ##args) #define gvt_dbg_el(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: el: "fmt, ##args) + pr_debug("gvt: el: "fmt, ##args) #define gvt_dbg_sched(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: sched: "fmt, ##args) + pr_debug("gvt: sched: "fmt, ##args) #define gvt_dbg_render(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: render: "fmt, ##args) + pr_debug("gvt: render: "fmt, ##args) #define gvt_dbg_cmd(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: cmd: "fmt, ##args) + pr_debug("gvt: cmd: "fmt, ##args) #endif From 21527a8dafc40fc499ae57492c1c5d0098cbcf08 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 12 Sep 2017 21:42:09 +0800 Subject: [PATCH 197/260] drm/i915/gvt: Factor out vGPU workload creation/destroy Factor out vGPU workload creation/destroy functions since they are not specific to execlist emulation. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 27 ++++----------- drivers/gpu/drm/i915/gvt/scheduler.c | 51 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/scheduler.h | 5 +++ 3 files changed, 62 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 1347f61fd709..a3bdb037a9fe 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -358,12 +358,6 @@ static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist, return 0; } -static void free_workload(struct intel_vgpu_workload *workload) -{ - intel_gvt_mm_unreference(workload->shadow_mm); - kmem_cache_free(workload->vgpu->submission.workloads, workload); -} - #define get_desc_from_elsp_dwords(ed, i) \ ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2])) @@ -586,7 +580,7 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc); out: intel_vgpu_unpin_mm(workload->shadow_mm); - free_workload(workload); + intel_vgpu_destroy_workload(workload); return ret; } @@ -687,10 +681,6 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, gvt_dbg_el("ring id %d begin a new workload\n", ring_id); - workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL); - if (!workload) - return -ENOMEM; - /* record some ring buffer register values for scan and shadow */ intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + RING_CTX_OFF(rb_start.val), &start, 4); @@ -699,13 +689,10 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4); - INIT_LIST_HEAD(&workload->list); - INIT_LIST_HEAD(&workload->shadow_bb); + workload = intel_vgpu_create_workload(vgpu); + if (IS_ERR(workload)) + return PTR_ERR(workload); - init_waitqueue_head(&workload->shadow_ctx_status_wq); - atomic_set(&workload->shadow_ctx_active, 0); - - workload->vgpu = vgpu; workload->ring_id = ring_id; workload->ctx_desc = *desc; workload->ring_context_gpa = ring_context_gpa; @@ -715,9 +702,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, workload->rb_ctl = ctl; workload->prepare = prepare_execlist_workload; workload->complete = complete_execlist_workload; - workload->status = -EINPROGRESS; workload->emulate_schedule_in = emulate_schedule_in; - workload->shadowed = false; if (ring_id == RCS) { intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + @@ -764,7 +749,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, if (ret == 0) queue_workload(workload); else { - free_workload(workload); + intel_vgpu_destroy_workload(workload); if (vgpu_is_vm_unhealthy(ret)) { intel_vgpu_clean_execlist(vgpu); enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); @@ -853,7 +838,7 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) list_for_each_entry_safe(pos, n, &s->workload_q_head[engine->id], list) { list_del_init(&pos->list); - free_workload(pos); + intel_vgpu_destroy_workload(pos); } clear_bit(engine->id, s->shadow_ctx_desc_updated); } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 02af14023383..10ccb05d0e8d 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -781,3 +781,54 @@ out_shadow_ctx: i915_gem_context_put(s->shadow_ctx); return ret; } + +/** + * intel_vgpu_destroy_workload - destroy a vGPU workload + * @vgpu: a vGPU + * + * This function is called when destroy a vGPU workload. + * + */ +void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu_submission *s = &workload->vgpu->submission; + + if (workload->shadow_mm) + intel_gvt_mm_unreference(workload->shadow_mm); + + kmem_cache_free(s->workloads, workload); +} + +/** + * intel_vgpu_create_workload - create a vGPU workload + * @vgpu: a vGPU + * + * This function is called when creating a vGPU workload. + * + * Returns: + * struct intel_vgpu_workload * on success, negative error code in + * pointer if failed. + * + */ +struct intel_vgpu_workload * +intel_vgpu_create_workload(struct intel_vgpu *vgpu) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_vgpu_workload *workload; + + workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL); + if (!workload) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&workload->list); + INIT_LIST_HEAD(&workload->shadow_bb); + + init_waitqueue_head(&workload->shadow_ctx_status_wq); + atomic_set(&workload->shadow_ctx_active, 0); + + workload->status = -EINPROGRESS; + workload->shadowed = false; + workload->vgpu = vgpu; + + return workload; +} diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 3ca0087f10b2..3fe4702cda06 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -141,5 +141,10 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu); void intel_vgpu_clean_submission(struct intel_vgpu *vgpu); +struct intel_vgpu_workload * +intel_vgpu_create_workload(struct intel_vgpu *vgpu); + +void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload); + void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx); #endif From 497aa3f5e3bdb6bea5994f7075e2f2df2377d70e Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 12 Sep 2017 21:51:10 +0800 Subject: [PATCH 198/260] drm/i915/gvt: Factor out prepare_workload() Factor out prepare_workload() for the following re-factor. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 10ccb05d0e8d..3d1435f55c7b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -325,6 +325,16 @@ err_scan: return ret; } +static int prepare_workload(struct intel_vgpu_workload *workload) +{ + int ret = 0; + + if (workload->prepare) + ret = workload->prepare(workload); + + return ret; +} + static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; @@ -344,12 +354,10 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) if (ret) goto out; - if (workload->prepare) { - ret = workload->prepare(workload); - if (ret) { - engine->context_unpin(engine, shadow_ctx); - goto out; - } + ret = prepare_workload(workload); + if (ret) { + engine->context_unpin(engine, shadow_ctx); + goto out; } out: From d8235b5e55845de19983cec38af245cc200b81e2 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 12 Sep 2017 22:06:39 +0800 Subject: [PATCH 199/260] drm/i915/gvt: Move common workload preparation into prepare_workload() Move common workload preparation into prepare_workload() in scheduler.c, as they are not specific to execlist emulation. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 154 +-------------------------- drivers/gpu/drm/i915/gvt/scheduler.c | 154 ++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 152 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index a3bdb037a9fe..45cb342500b7 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -361,110 +361,6 @@ static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist, #define get_desc_from_elsp_dwords(ed, i) \ ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2])) -static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) -{ - const int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd; - struct intel_shadow_bb_entry *entry_obj; - - /* pin the gem object to ggtt */ - list_for_each_entry(entry_obj, &workload->shadow_bb, list) { - struct i915_vma *vma; - - vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0); - if (IS_ERR(vma)) { - return PTR_ERR(vma); - } - - /* FIXME: we are not tracking our pinned VMA leaving it - * up to the core to fix up the stray pin_count upon - * free. - */ - - /* update the relocate gma with shadow batch buffer*/ - entry_obj->bb_start_cmd_va[1] = i915_ggtt_offset(vma); - if (gmadr_bytes == 8) - entry_obj->bb_start_cmd_va[2] = 0; - } - return 0; -} - -static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) -{ - struct intel_vgpu_workload *workload = container_of(wa_ctx, - struct intel_vgpu_workload, - wa_ctx); - int ring_id = workload->ring_id; - struct intel_vgpu_submission *s = &workload->vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; - struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; - struct execlist_ring_context *shadow_ring_context; - struct page *page; - - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap_atomic(page); - - shadow_ring_context->bb_per_ctx_ptr.val = - (shadow_ring_context->bb_per_ctx_ptr.val & - (~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma; - shadow_ring_context->rcs_indirect_ctx.val = - (shadow_ring_context->rcs_indirect_ctx.val & - (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma; - - kunmap_atomic(shadow_ring_context); - return 0; -} - -static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) -{ - struct i915_vma *vma; - unsigned char *per_ctx_va = - (unsigned char *)wa_ctx->indirect_ctx.shadow_va + - wa_ctx->indirect_ctx.size; - - if (wa_ctx->indirect_ctx.size == 0) - return 0; - - vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL, - 0, CACHELINE_BYTES, 0); - if (IS_ERR(vma)) { - return PTR_ERR(vma); - } - - /* FIXME: we are not tracking our pinned VMA leaving it - * up to the core to fix up the stray pin_count upon - * free. - */ - - wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma); - - wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1); - memset(per_ctx_va, 0, CACHELINE_BYTES); - - update_wa_ctx_2_shadow_ctx(wa_ctx); - return 0; -} - -static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) -{ - /* release all the shadow batch buffer */ - if (!list_empty(&workload->shadow_bb)) { - struct intel_shadow_bb_entry *entry_obj = - list_first_entry(&workload->shadow_bb, - struct intel_shadow_bb_entry, - list); - struct intel_shadow_bb_entry *temp; - - list_for_each_entry_safe(entry_obj, temp, &workload->shadow_bb, - list) { - i915_gem_object_unpin_map(entry_obj->obj); - i915_gem_object_put(entry_obj->obj); - list_del(&entry_obj->list); - kfree(entry_obj); - } - } -} - static int prepare_execlist_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; @@ -473,36 +369,6 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; int ret; - ret = intel_vgpu_pin_mm(workload->shadow_mm); - if (ret) { - gvt_vgpu_err("fail to vgpu pin mm\n"); - goto out; - } - - ret = intel_vgpu_sync_oos_pages(workload->vgpu); - if (ret) { - gvt_vgpu_err("fail to vgpu sync oos pages\n"); - goto err_unpin_mm; - } - - ret = intel_vgpu_flush_post_shadow(workload->vgpu); - if (ret) { - gvt_vgpu_err("fail to flush post shadow\n"); - goto err_unpin_mm; - } - - ret = prepare_shadow_batch_buffer(workload); - if (ret) { - gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n"); - goto err_unpin_mm; - } - - ret = prepare_shadow_wa_ctx(&workload->wa_ctx); - if (ret) { - gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n"); - goto err_shadow_batch; - } - if (!workload->emulate_schedule_in) return 0; @@ -510,18 +376,11 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload) ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1); ret = emulate_execlist_schedule_in(&s->execlist[ring_id], ctx); - if (!ret) - goto out; - else + if (ret) { gvt_vgpu_err("fail to emulate execlist schedule in\n"); - - release_shadow_wa_ctx(&workload->wa_ctx); -err_shadow_batch: - release_shadow_batch_buffer(workload); -err_unpin_mm: - intel_vgpu_unpin_mm(workload->shadow_mm); -out: - return ret; + return ret; + } + return 0; } static int complete_execlist_workload(struct intel_vgpu_workload *workload) @@ -538,11 +397,6 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) gvt_dbg_el("complete workload %p status %d\n", workload, workload->status); - if (!workload->status) { - release_shadow_batch_buffer(workload); - release_shadow_wa_ctx(&workload->wa_ctx); - } - if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { /* if workload->status is not successful means HW GPU * has occurred GPU hang or something wrong with i915/GVT, diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 3d1435f55c7b..a7a67cc65a07 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -325,13 +325,157 @@ err_scan: return ret; } +static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) +{ + struct intel_gvt *gvt = workload->vgpu->gvt; + const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd; + struct intel_shadow_bb_entry *entry_obj; + + /* pin the gem object to ggtt */ + list_for_each_entry(entry_obj, &workload->shadow_bb, list) { + struct i915_vma *vma; + + vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + /* FIXME: we are not tracking our pinned VMA leaving it + * up to the core to fix up the stray pin_count upon + * free. + */ + + /* update the relocate gma with shadow batch buffer*/ + entry_obj->bb_start_cmd_va[1] = i915_ggtt_offset(vma); + if (gmadr_bytes == 8) + entry_obj->bb_start_cmd_va[2] = 0; + } + return 0; +} + +static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) +{ + struct intel_vgpu_workload *workload = container_of(wa_ctx, + struct intel_vgpu_workload, + wa_ctx); + int ring_id = workload->ring_id; + struct intel_vgpu_submission *s = &workload->vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; + struct drm_i915_gem_object *ctx_obj = + shadow_ctx->engine[ring_id].state->obj; + struct execlist_ring_context *shadow_ring_context; + struct page *page; + + page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); + shadow_ring_context = kmap_atomic(page); + + shadow_ring_context->bb_per_ctx_ptr.val = + (shadow_ring_context->bb_per_ctx_ptr.val & + (~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma; + shadow_ring_context->rcs_indirect_ctx.val = + (shadow_ring_context->rcs_indirect_ctx.val & + (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma; + + kunmap_atomic(shadow_ring_context); + return 0; +} + +static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) +{ + struct i915_vma *vma; + unsigned char *per_ctx_va = + (unsigned char *)wa_ctx->indirect_ctx.shadow_va + + wa_ctx->indirect_ctx.size; + + if (wa_ctx->indirect_ctx.size == 0) + return 0; + + vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL, + 0, CACHELINE_BYTES, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + /* FIXME: we are not tracking our pinned VMA leaving it + * up to the core to fix up the stray pin_count upon + * free. + */ + + wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma); + + wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1); + memset(per_ctx_va, 0, CACHELINE_BYTES); + + update_wa_ctx_2_shadow_ctx(wa_ctx); + return 0; +} + +static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) +{ + /* release all the shadow batch buffer */ + if (!list_empty(&workload->shadow_bb)) { + struct intel_shadow_bb_entry *entry_obj = + list_first_entry(&workload->shadow_bb, + struct intel_shadow_bb_entry, + list); + struct intel_shadow_bb_entry *temp; + + list_for_each_entry_safe(entry_obj, temp, &workload->shadow_bb, + list) { + i915_gem_object_unpin_map(entry_obj->obj); + i915_gem_object_put(entry_obj->obj); + list_del(&entry_obj->list); + kfree(entry_obj); + } + } +} + static int prepare_workload(struct intel_vgpu_workload *workload) { + struct intel_vgpu *vgpu = workload->vgpu; int ret = 0; - if (workload->prepare) - ret = workload->prepare(workload); + ret = intel_vgpu_pin_mm(workload->shadow_mm); + if (ret) { + gvt_vgpu_err("fail to vgpu pin mm\n"); + return ret; + } + ret = intel_vgpu_sync_oos_pages(workload->vgpu); + if (ret) { + gvt_vgpu_err("fail to vgpu sync oos pages\n"); + goto err_unpin_mm; + } + + ret = intel_vgpu_flush_post_shadow(workload->vgpu); + if (ret) { + gvt_vgpu_err("fail to flush post shadow\n"); + goto err_unpin_mm; + } + + ret = prepare_shadow_batch_buffer(workload); + if (ret) { + gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n"); + goto err_unpin_mm; + } + + ret = prepare_shadow_wa_ctx(&workload->wa_ctx); + if (ret) { + gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n"); + goto err_shadow_batch; + } + + if (workload->prepare) { + ret = workload->prepare(workload); + if (ret) + goto err_shadow_wa_ctx; + } + + return 0; +err_shadow_wa_ctx: + release_shadow_wa_ctx(&workload->wa_ctx); +err_shadow_batch: + release_shadow_batch_buffer(workload); +err_unpin_mm: + intel_vgpu_unpin_mm(workload->shadow_mm); return ret; } @@ -557,6 +701,12 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) scheduler->current_workload[ring_id] = NULL; list_del_init(&workload->list); + + if (!workload->status) { + release_shadow_batch_buffer(workload); + release_shadow_wa_ctx(&workload->wa_ctx); + } + workload->complete(workload); atomic_dec(&s->running_workload_num); From 6d76303553bab75ffc53993c56aad06251d8de60 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 12 Sep 2017 22:33:12 +0800 Subject: [PATCH 200/260] drm/i915/gvt: Move common vGPU workload creation into scheduler.c Move common vGPU workload creation functions into scheduler.c since they are not specific to execlist emulation. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 156 +-------------------- drivers/gpu/drm/i915/gvt/scheduler.c | 198 ++++++++++++++++++++++++--- drivers/gpu/drm/i915/gvt/scheduler.h | 3 +- 3 files changed, 184 insertions(+), 173 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 45cb342500b7..c753cb5bfb58 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -438,179 +438,29 @@ out: return ret; } -#define RING_CTX_OFF(x) \ - offsetof(struct execlist_ring_context, x) - -static void read_guest_pdps(struct intel_vgpu *vgpu, - u64 ring_context_gpa, u32 pdp[8]) -{ - u64 gpa; - int i; - - gpa = ring_context_gpa + RING_CTX_OFF(pdp3_UDW.val); - - for (i = 0; i < 8; i++) - intel_gvt_hypervisor_read_gpa(vgpu, - gpa + i * 8, &pdp[7 - i], 4); -} - -static int prepare_mm(struct intel_vgpu_workload *workload) -{ - struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; - struct intel_vgpu_mm *mm; - struct intel_vgpu *vgpu = workload->vgpu; - int page_table_level; - u32 pdp[8]; - - if (desc->addressing_mode == 1) { /* legacy 32-bit */ - page_table_level = 3; - } else if (desc->addressing_mode == 3) { /* legacy 64 bit */ - page_table_level = 4; - } else { - gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n"); - return -EINVAL; - } - - read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp); - - mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp); - if (mm) { - intel_gvt_mm_reference(mm); - } else { - - mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT, - pdp, page_table_level, 0); - if (IS_ERR(mm)) { - gvt_vgpu_err("fail to create mm object.\n"); - return PTR_ERR(mm); - } - } - workload->shadow_mm = mm; - return 0; -} - -#define get_last_workload(q) \ - (list_empty(q) ? NULL : container_of(q->prev, \ - struct intel_vgpu_workload, list)) - static int submit_context(struct intel_vgpu *vgpu, int ring_id, struct execlist_ctx_descriptor_format *desc, bool emulate_schedule_in) { struct intel_vgpu_submission *s = &vgpu->submission; - struct list_head *q = workload_q_head(vgpu, ring_id); - struct intel_vgpu_workload *last_workload = get_last_workload(q); struct intel_vgpu_workload *workload = NULL; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - u64 ring_context_gpa; - u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx; - int ret; - ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, - (u32)((desc->lrca + 1) << GTT_PAGE_SHIFT)); - if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca); - return -EINVAL; - } - - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(ring_header.val), &head, 4); - - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(ring_tail.val), &tail, 4); - - head &= RB_HEAD_OFF_MASK; - tail &= RB_TAIL_OFF_MASK; - - if (last_workload && same_context(&last_workload->ctx_desc, desc)) { - gvt_dbg_el("ring id %d cur workload == last\n", ring_id); - gvt_dbg_el("ctx head %x real head %lx\n", head, - last_workload->rb_tail); - /* - * cannot use guest context head pointer here, - * as it might not be updated at this time - */ - head = last_workload->rb_tail; - } - - gvt_dbg_el("ring id %d begin a new workload\n", ring_id); - - /* record some ring buffer register values for scan and shadow */ - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(rb_start.val), &start, 4); - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(rb_ctrl.val), &ctl, 4); - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4); - - workload = intel_vgpu_create_workload(vgpu); + workload = intel_vgpu_create_workload(vgpu, ring_id, desc); if (IS_ERR(workload)) return PTR_ERR(workload); - workload->ring_id = ring_id; - workload->ctx_desc = *desc; - workload->ring_context_gpa = ring_context_gpa; - workload->rb_head = head; - workload->rb_tail = tail; - workload->rb_start = start; - workload->rb_ctl = ctl; workload->prepare = prepare_execlist_workload; workload->complete = complete_execlist_workload; workload->emulate_schedule_in = emulate_schedule_in; - if (ring_id == RCS) { - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4); - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4); - - workload->wa_ctx.indirect_ctx.guest_gma = - indirect_ctx & INDIRECT_CTX_ADDR_MASK; - workload->wa_ctx.indirect_ctx.size = - (indirect_ctx & INDIRECT_CTX_SIZE_MASK) * - CACHELINE_BYTES; - workload->wa_ctx.per_ctx.guest_gma = - per_ctx & PER_CTX_ADDR_MASK; - workload->wa_ctx.per_ctx.valid = per_ctx & 1; - } - if (emulate_schedule_in) workload->elsp_dwords = s->execlist[ring_id].elsp_dwords; - gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n", - workload, ring_id, head, tail, start, ctl); - gvt_dbg_el("workload %p emulate schedule_in %d\n", workload, emulate_schedule_in); - ret = prepare_mm(workload); - if (ret) { - kmem_cache_free(s->workloads, workload); - return ret; - } - - /* Only scan and shadow the first workload in the queue - * as there is only one pre-allocated buf-obj for shadow. - */ - if (list_empty(workload_q_head(vgpu, ring_id))) { - intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); - ret = intel_gvt_scan_and_shadow_workload(workload); - mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); - } - - if (ret == 0) - queue_workload(workload); - else { - intel_vgpu_destroy_workload(workload); - if (vgpu_is_vm_unhealthy(ret)) { - intel_vgpu_clean_execlist(vgpu); - enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); - } - } - return ret; - + queue_workload(workload); + return 0; } int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index a7a67cc65a07..69893f29ff6d 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -792,13 +792,8 @@ complete: FORCEWAKE_ALL); intel_runtime_pm_put(gvt->dev_priv); - if (ret && (vgpu_is_vm_unhealthy(ret))) { - mutex_lock(&gvt->lock); - intel_vgpu_clean_execlist(vgpu); - mutex_unlock(&gvt->lock); + if (ret && (vgpu_is_vm_unhealthy(ret))) enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); - } - } return 0; } @@ -957,19 +952,8 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) kmem_cache_free(s->workloads, workload); } -/** - * intel_vgpu_create_workload - create a vGPU workload - * @vgpu: a vGPU - * - * This function is called when creating a vGPU workload. - * - * Returns: - * struct intel_vgpu_workload * on success, negative error code in - * pointer if failed. - * - */ -struct intel_vgpu_workload * -intel_vgpu_create_workload(struct intel_vgpu *vgpu) +static struct intel_vgpu_workload * +alloc_workload(struct intel_vgpu *vgpu) { struct intel_vgpu_submission *s = &vgpu->submission; struct intel_vgpu_workload *workload; @@ -990,3 +974,179 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu) return workload; } + +#define RING_CTX_OFF(x) \ + offsetof(struct execlist_ring_context, x) + +static void read_guest_pdps(struct intel_vgpu *vgpu, + u64 ring_context_gpa, u32 pdp[8]) +{ + u64 gpa; + int i; + + gpa = ring_context_gpa + RING_CTX_OFF(pdp3_UDW.val); + + for (i = 0; i < 8; i++) + intel_gvt_hypervisor_read_gpa(vgpu, + gpa + i * 8, &pdp[7 - i], 4); +} + +static int prepare_mm(struct intel_vgpu_workload *workload) +{ + struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; + struct intel_vgpu_mm *mm; + struct intel_vgpu *vgpu = workload->vgpu; + int page_table_level; + u32 pdp[8]; + + if (desc->addressing_mode == 1) { /* legacy 32-bit */ + page_table_level = 3; + } else if (desc->addressing_mode == 3) { /* legacy 64 bit */ + page_table_level = 4; + } else { + gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n"); + return -EINVAL; + } + + read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp); + + mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp); + if (mm) { + intel_gvt_mm_reference(mm); + } else { + + mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT, + pdp, page_table_level, 0); + if (IS_ERR(mm)) { + gvt_vgpu_err("fail to create mm object.\n"); + return PTR_ERR(mm); + } + } + workload->shadow_mm = mm; + return 0; +} + +#define same_context(a, b) (((a)->context_id == (b)->context_id) && \ + ((a)->lrca == (b)->lrca)) + +#define get_last_workload(q) \ + (list_empty(q) ? NULL : container_of(q->prev, \ + struct intel_vgpu_workload, list)) +/** + * intel_vgpu_create_workload - create a vGPU workload + * @vgpu: a vGPU + * @desc: a guest context descriptor + * + * This function is called when creating a vGPU workload. + * + * Returns: + * struct intel_vgpu_workload * on success, negative error code in + * pointer if failed. + * + */ +struct intel_vgpu_workload * +intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, + struct execlist_ctx_descriptor_format *desc) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + struct list_head *q = workload_q_head(vgpu, ring_id); + struct intel_vgpu_workload *last_workload = get_last_workload(q); + struct intel_vgpu_workload *workload = NULL; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + u64 ring_context_gpa; + u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx; + int ret; + + ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, + (u32)((desc->lrca + 1) << GTT_PAGE_SHIFT)); + if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) { + gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca); + return ERR_PTR(-EINVAL); + } + + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(ring_header.val), &head, 4); + + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(ring_tail.val), &tail, 4); + + head &= RB_HEAD_OFF_MASK; + tail &= RB_TAIL_OFF_MASK; + + if (last_workload && same_context(&last_workload->ctx_desc, desc)) { + gvt_dbg_el("ring id %d cur workload == last\n", ring_id); + gvt_dbg_el("ctx head %x real head %lx\n", head, + last_workload->rb_tail); + /* + * cannot use guest context head pointer here, + * as it might not be updated at this time + */ + head = last_workload->rb_tail; + } + + gvt_dbg_el("ring id %d begin a new workload\n", ring_id); + + /* record some ring buffer register values for scan and shadow */ + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(rb_start.val), &start, 4); + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(rb_ctrl.val), &ctl, 4); + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4); + + workload = alloc_workload(vgpu); + if (IS_ERR(workload)) + return workload; + + workload->ring_id = ring_id; + workload->ctx_desc = *desc; + workload->ring_context_gpa = ring_context_gpa; + workload->rb_head = head; + workload->rb_tail = tail; + workload->rb_start = start; + workload->rb_ctl = ctl; + + if (ring_id == RCS) { + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4); + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4); + + workload->wa_ctx.indirect_ctx.guest_gma = + indirect_ctx & INDIRECT_CTX_ADDR_MASK; + workload->wa_ctx.indirect_ctx.size = + (indirect_ctx & INDIRECT_CTX_SIZE_MASK) * + CACHELINE_BYTES; + workload->wa_ctx.per_ctx.guest_gma = + per_ctx & PER_CTX_ADDR_MASK; + workload->wa_ctx.per_ctx.valid = per_ctx & 1; + } + + gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n", + workload, ring_id, head, tail, start, ctl); + + ret = prepare_mm(workload); + if (ret) { + kmem_cache_free(s->workloads, workload); + return ERR_PTR(ret); + } + + /* Only scan and shadow the first workload in the queue + * as there is only one pre-allocated buf-obj for shadow. + */ + if (list_empty(workload_q_head(vgpu, ring_id))) { + intel_runtime_pm_get(dev_priv); + mutex_lock(&dev_priv->drm.struct_mutex); + ret = intel_gvt_scan_and_shadow_workload(workload); + mutex_unlock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_put(dev_priv); + } + + if (ret && (vgpu_is_vm_unhealthy(ret))) { + enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); + intel_vgpu_destroy_workload(workload); + return ERR_PTR(ret); + } + + return workload; +} diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 3fe4702cda06..79199693697b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -142,7 +142,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu); void intel_vgpu_clean_submission(struct intel_vgpu *vgpu); struct intel_vgpu_workload * -intel_vgpu_create_workload(struct intel_vgpu *vgpu); +intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, + struct execlist_ctx_descriptor_format *desc); void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload); From d0d51282b88e11a1aa71040b21a9a8cae584a1d4 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 12 Sep 2017 22:39:08 +0800 Subject: [PATCH 201/260] drm/i915/gvt: Remove one extra declaration in scheduler.h Now the function has been moved into scheduler.c. The extra declaration is not necessary. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/scheduler.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 79199693697b..2dc729320a61 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -147,5 +147,4 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload); -void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx); #endif From ad1d36369b07f6b9db81897802ee5d8764eaa922 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Wed, 13 Sep 2017 00:31:29 +0800 Subject: [PATCH 202/260] drm/i915/gvt: Introduce vGPU submission ops Introduce vGPU submission ops to support easy switching submission mode of one vGPU between different OSes. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 10 ++++-- drivers/gpu/drm/i915/gvt/gvt.h | 14 ++++++++ drivers/gpu/drm/i915/gvt/handlers.c | 16 +++++++-- drivers/gpu/drm/i915/gvt/scheduler.c | 53 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/scheduler.h | 6 ++++ drivers/gpu/drm/i915/gvt/vgpu.c | 13 ++----- 6 files changed, 98 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index c753cb5bfb58..e633ac4991e8 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -548,7 +548,7 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) } } -void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu) +void clean_execlist(struct intel_vgpu *vgpu) { enum intel_engine_id i; struct intel_engine_cs *engine; @@ -564,7 +564,7 @@ void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu) } } -int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) +int init_execlist(struct intel_vgpu *vgpu) { enum intel_engine_id i; struct intel_engine_cs *engine; @@ -586,3 +586,9 @@ void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu, for_each_engine_masked(engine, dev_priv, engine_mask, tmp) init_vgpu_execlist(vgpu, engine->id); } + +const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops = { + .name = "execlist", + .init = init_execlist, + .clean = clean_execlist, +}; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index be1bb6dbcbd2..1be41505c4ba 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -142,6 +142,17 @@ struct vgpu_sched_ctl { int weight; }; +enum { + INTEL_VGPU_EXECLIST_SUBMISSION = 1, + INTEL_VGPU_GUC_SUBMISSION, +}; + +struct intel_vgpu_submission_ops { + const char *name; + int (*init)(struct intel_vgpu *vgpu); + void (*clean)(struct intel_vgpu *vgpu); +}; + struct intel_vgpu_submission { struct intel_vgpu_execlist execlist[I915_NUM_ENGINES]; struct list_head workload_q_head[I915_NUM_ENGINES]; @@ -152,6 +163,9 @@ struct intel_vgpu_submission { DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); void *ring_scan_buffer[I915_NUM_ENGINES]; int ring_scan_buffer_size[I915_NUM_ENGINES]; + const struct intel_vgpu_submission_ops *ops; + int virtual_submission_interface; + bool active; }; struct intel_vgpu { diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index acc1cf4fa6f5..5438dafea7a4 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1471,9 +1471,11 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { + struct intel_vgpu_submission *s = &vgpu->submission; u32 data = *(u32 *)p_data; int ring_id = render_mmio_to_ring_id(vgpu->gvt, offset); bool enable_execlist; + int ret; write_vreg(vgpu, offset, p_data, bytes); @@ -1495,8 +1497,18 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, (enable_execlist ? "enabling" : "disabling"), ring_id); - if (enable_execlist) - intel_vgpu_start_schedule(vgpu); + if (!enable_execlist) + return 0; + + if (s->active) + return 0; + + ret = intel_vgpu_select_submission_ops(vgpu, + INTEL_VGPU_EXECLIST_SUBMISSION); + if (ret) + return ret; + + intel_vgpu_start_schedule(vgpu); } return 0; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 69893f29ff6d..f3be88fa88dd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -884,6 +884,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) { struct intel_vgpu_submission *s = &vgpu->submission; + intel_vgpu_select_submission_ops(vgpu, 0); i915_gem_context_put(s->shadow_ctx); kmem_cache_destroy(s->workloads); } @@ -935,6 +936,58 @@ out_shadow_ctx: return ret; } +/** + * intel_vgpu_select_submission_ops - select virtual submission interface + * @vgpu: a vGPU + * @interface: expected vGPU virtual submission interface + * + * This function is called when guest configures submission interface. + * + * Returns: + * Zero on success, negative error code if failed. + * + */ +int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu, + unsigned int interface) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + const struct intel_vgpu_submission_ops *ops[] = { + [INTEL_VGPU_EXECLIST_SUBMISSION] = + &intel_vgpu_execlist_submission_ops, + }; + int ret; + + if (WARN_ON(interface >= ARRAY_SIZE(ops))) + return -EINVAL; + + if (s->active) { + s->ops->clean(vgpu); + s->active = false; + gvt_dbg_core("vgpu%d: de-select ops [ %s ] \n", + vgpu->id, s->ops->name); + } + + if (interface == 0) { + s->ops = NULL; + s->virtual_submission_interface = 0; + gvt_dbg_core("vgpu%d: no submission ops\n", vgpu->id); + return 0; + } + + ret = ops[interface]->init(vgpu); + if (ret) + return ret; + + s->ops = ops[interface]; + s->virtual_submission_interface = interface; + s->active = true; + + gvt_dbg_core("vgpu%d: activate ops [ %s ]\n", + vgpu->id, s->ops->name); + + return 0; +} + /** * intel_vgpu_destroy_workload - destroy a vGPU workload * @vgpu: a vGPU diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 2dc729320a61..8652acda6436 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -141,6 +141,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu); void intel_vgpu_clean_submission(struct intel_vgpu *vgpu); +int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu, + unsigned int interface); + +extern const struct intel_vgpu_submission_ops +intel_vgpu_execlist_submission_ops; + struct intel_vgpu_workload * intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, struct execlist_ctx_descriptor_format *desc); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 1c9818d9f1d8..e63abd48bcee 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -255,7 +255,6 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) idr_remove(&gvt->vgpu_idr, vgpu->id); intel_vgpu_clean_sched_policy(vgpu); intel_vgpu_clean_submission(vgpu); - intel_vgpu_clean_execlist(vgpu); intel_vgpu_clean_display(vgpu); intel_vgpu_clean_opregion(vgpu); intel_vgpu_clean_gtt(vgpu); @@ -371,26 +370,20 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_gtt; - ret = intel_vgpu_init_execlist(vgpu); + ret = intel_vgpu_setup_submission(vgpu); if (ret) goto out_clean_display; - ret = intel_vgpu_setup_submission(vgpu); - if (ret) - goto out_clean_execlist; - ret = intel_vgpu_init_sched_policy(vgpu); if (ret) - goto out_clean_shadow_ctx; + goto out_clean_submission; mutex_unlock(&gvt->lock); return vgpu; -out_clean_shadow_ctx: +out_clean_submission: intel_vgpu_clean_submission(vgpu); -out_clean_execlist: - intel_vgpu_clean_execlist(vgpu); out_clean_display: intel_vgpu_clean_display(vgpu); out_clean_gtt: From 06bb372f9ace47296aeaaca8e130d948ea2855cf Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Wed, 13 Sep 2017 01:41:35 +0800 Subject: [PATCH 203/260] drm/i915/gvt: Introduce intel_vgpu_reset_submission Introduce an generic API to reset vGPU virtual submission interface. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 20 ++++++++------------ drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/scheduler.c | 20 ++++++++++++++++++++ drivers/gpu/drm/i915/gvt/scheduler.h | 3 +++ drivers/gpu/drm/i915/gvt/vgpu.c | 4 ++-- 5 files changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index e633ac4991e8..107eeda82e9d 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -564,18 +564,7 @@ void clean_execlist(struct intel_vgpu *vgpu) } } -int init_execlist(struct intel_vgpu *vgpu) -{ - enum intel_engine_id i; - struct intel_engine_cs *engine; - - for_each_engine(engine, vgpu->gvt->dev_priv, i) - init_vgpu_execlist(vgpu, i); - - return 0; -} - -void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu, +void reset_execlist(struct intel_vgpu *vgpu, unsigned long engine_mask) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; @@ -587,8 +576,15 @@ void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu, init_vgpu_execlist(vgpu, engine->id); } +int init_execlist(struct intel_vgpu *vgpu) +{ + reset_execlist(vgpu, ALL_ENGINES); + return 0; +} + const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops = { .name = "execlist", .init = init_execlist, + .reset = reset_execlist, .clean = clean_execlist, }; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 1be41505c4ba..b7253d7daf72 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -151,6 +151,7 @@ struct intel_vgpu_submission_ops { const char *name; int (*init)(struct intel_vgpu *vgpu); void (*clean)(struct intel_vgpu *vgpu); + void (*reset)(struct intel_vgpu *vgpu, unsigned long engine_mask); }; struct intel_vgpu_submission { diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f3be88fa88dd..88ce57116a4c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -889,6 +889,26 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) kmem_cache_destroy(s->workloads); } + +/** + * intel_vgpu_reset_submission - reset submission-related resource for vGPU + * @vgpu: a vGPU + * @engine_mask: engines expected to be reset + * + * This function is called when a vGPU is being destroyed. + * + */ +void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, + unsigned long engine_mask) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + + if (!s->active) + return; + + s->ops->reset(vgpu, engine_mask); +} + /** * intel_vgpu_setup_submission - setup submission-related resource for vGPU * @vgpu: a vGPU diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 8652acda6436..e0b5730a3018 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -139,6 +139,9 @@ void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu); int intel_vgpu_setup_submission(struct intel_vgpu *vgpu); +void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, + unsigned long engine_mask); + void intel_vgpu_clean_submission(struct intel_vgpu *vgpu); int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu, diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index e63abd48bcee..841b95c6c231 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -492,10 +492,10 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, mutex_lock(&gvt->lock); } - intel_vgpu_reset_execlist(vgpu, resetting_eng); - + intel_vgpu_reset_submission(vgpu, resetting_eng); /* full GPU reset or device model level reset */ if (engine_mask == ALL_ENGINES || dmlr) { + intel_vgpu_select_submission_ops(vgpu, 0); /*fence will not be reset during virtual reset */ if (dmlr) { From e2c43c0111d54d4857d052fcfca9f3f16bf1b1b2 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Wed, 13 Sep 2017 01:58:35 +0800 Subject: [PATCH 204/260] drm/i915/gvt: Move clean_workloads() into scheduler.c Move clean_workloads() into scheduler.c since it's not specific to execlist. v2: - Remove clean_workloads in intel_vgpu_select_submission_ops. (Zhenyu) Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 41 +--------------------------- drivers/gpu/drm/i915/gvt/scheduler.c | 37 +++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 107eeda82e9d..5c966edfeefb 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -46,8 +46,6 @@ #define same_context(a, b) (((a)->context_id == (b)->context_id) && \ ((a)->lrca == (b)->lrca)) -static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask); - static int context_switch_events[] = { [RCS] = RCS_AS_CONTEXT_SWITCH, [BCS] = BCS_AS_CONTEXT_SWITCH, @@ -397,23 +395,8 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) gvt_dbg_el("complete workload %p status %d\n", workload, workload->status); - if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { - /* if workload->status is not successful means HW GPU - * has occurred GPU hang or something wrong with i915/GVT, - * and GVT won't inject context switch interrupt to guest. - * So this error is a vGPU hang actually to the guest. - * According to this we should emunlate a vGPU hang. If - * there are pending workloads which are already submitted - * from guest, we should clean them up like HW GPU does. - * - * if it is in middle of engine resetting, the pending - * workloads won't be submitted to HW GPU and will be - * cleaned up during the resetting process later, so doing - * the workload clean up here doesn't have any impact. - **/ - clean_workloads(vgpu, ENGINE_MASK(ring_id)); + if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) goto out; - } if (!list_empty(workload_q_head(vgpu, ring_id))) { struct execlist_ctx_descriptor_format *this_desc, *next_desc; @@ -529,32 +512,11 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; } -static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) -{ - struct intel_vgpu_submission *s = &vgpu->submission; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct intel_engine_cs *engine; - struct intel_vgpu_workload *pos, *n; - unsigned int tmp; - - /* free the unsubmited workloads in the queues. */ - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { - list_for_each_entry_safe(pos, n, - &s->workload_q_head[engine->id], list) { - list_del_init(&pos->list); - intel_vgpu_destroy_workload(pos); - } - clear_bit(engine->id, s->shadow_ctx_desc_updated); - } -} - void clean_execlist(struct intel_vgpu *vgpu) { enum intel_engine_id i; struct intel_engine_cs *engine; - clean_workloads(vgpu, ALL_ENGINES); - for_each_engine(engine, vgpu->gvt->dev_priv, i) { struct intel_vgpu_submission *s = &vgpu->submission; @@ -571,7 +533,6 @@ void reset_execlist(struct intel_vgpu *vgpu, struct intel_engine_cs *engine; unsigned int tmp; - clean_workloads(vgpu, engine_mask); for_each_engine_masked(engine, dev_priv, engine_mask, tmp) init_vgpu_execlist(vgpu, engine->id); } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 88ce57116a4c..391690c0c28c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -644,6 +644,25 @@ static void update_guest_context(struct intel_vgpu_workload *workload) kunmap(page); } +static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_engine_cs *engine; + struct intel_vgpu_workload *pos, *n; + unsigned int tmp; + + /* free the unsubmited workloads in the queues. */ + for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + list_for_each_entry_safe(pos, n, + &s->workload_q_head[engine->id], list) { + list_del_init(&pos->list); + intel_vgpu_destroy_workload(pos); + } + clear_bit(engine->id, s->shadow_ctx_desc_updated); + } +} + static void complete_current_workload(struct intel_gvt *gvt, int ring_id) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; @@ -707,6 +726,23 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) release_shadow_wa_ctx(&workload->wa_ctx); } + if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { + /* if workload->status is not successful means HW GPU + * has occurred GPU hang or something wrong with i915/GVT, + * and GVT won't inject context switch interrupt to guest. + * So this error is a vGPU hang actually to the guest. + * According to this we should emunlate a vGPU hang. If + * there are pending workloads which are already submitted + * from guest, we should clean them up like HW GPU does. + * + * if it is in middle of engine resetting, the pending + * workloads won't be submitted to HW GPU and will be + * cleaned up during the resetting process later, so doing + * the workload clean up here doesn't have any impact. + **/ + clean_workloads(vgpu, ENGINE_MASK(ring_id)); + } + workload->complete(workload); atomic_dec(&s->running_workload_num); @@ -906,6 +942,7 @@ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, if (!s->active) return; + clean_workloads(vgpu, engine_mask); s->ops->reset(vgpu, engine_mask); } From c5d71cb31723135dbaa54b32bf046342fe538210 Mon Sep 17 00:00:00 2001 From: fred gao Date: Thu, 28 Sep 2017 11:03:02 +0800 Subject: [PATCH 205/260] drm/i915/gvt: Move vGPU type related code into gvt file In this patch, all the vGPU type related code will be merged into same gvt file and the common interface will be exposed to both XenGT and KvmGT. v2: - remove the useless mdev_* gvt_ops. add get_gvt_attr ops for MPT module. intel_gvt_{init,cleanup}_vgpu_type_groups are initialized in gvt part. (Wang, Zhi) - set gvt_vgpu_type_groups[i] to NULL. (Zhang,Xiong) Signed-off-by: fred gao Reviewed-by: Zhi Wang Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 134 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/gvt.h | 4 + 2 files changed, 138 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index aaa347f8620c..ed313811fa76 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -36,6 +36,8 @@ #include "i915_drv.h" #include "gvt.h" +#include +#include struct intel_gvt_host intel_gvt_host; @@ -44,6 +46,129 @@ static const char * const supported_hypervisors[] = { [INTEL_GVT_HYPERVISOR_KVM] = "KVM", }; +static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt, + const char *name) +{ + int i; + struct intel_vgpu_type *t; + const char *driver_name = dev_driver_string( + &gvt->dev_priv->drm.pdev->dev); + + for (i = 0; i < gvt->num_types; i++) { + t = &gvt->types[i]; + if (!strncmp(t->name, name + strlen(driver_name) + 1, + sizeof(t->name))) + return t; + } + + return NULL; +} + +static ssize_t available_instances_show(struct kobject *kobj, + struct device *dev, char *buf) +{ + struct intel_vgpu_type *type; + unsigned int num = 0; + void *gvt = kdev_to_i915(dev)->gvt; + + type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); + if (!type) + num = 0; + else + num = type->avail_instance; + + return sprintf(buf, "%u\n", num); +} + +static ssize_t device_api_show(struct kobject *kobj, struct device *dev, + char *buf) +{ + return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); +} + +static ssize_t description_show(struct kobject *kobj, struct device *dev, + char *buf) +{ + struct intel_vgpu_type *type; + void *gvt = kdev_to_i915(dev)->gvt; + + type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); + if (!type) + return 0; + + return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" + "fence: %d\nresolution: %s\n" + "weight: %d\n", + BYTES_TO_MB(type->low_gm_size), + BYTES_TO_MB(type->high_gm_size), + type->fence, vgpu_edid_str(type->resolution), + type->weight); +} + +static MDEV_TYPE_ATTR_RO(available_instances); +static MDEV_TYPE_ATTR_RO(device_api); +static MDEV_TYPE_ATTR_RO(description); + +static struct attribute *gvt_type_attrs[] = { + &mdev_type_attr_available_instances.attr, + &mdev_type_attr_device_api.attr, + &mdev_type_attr_description.attr, + NULL, +}; + +static struct attribute_group *gvt_vgpu_type_groups[] = { + [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, +}; + +static bool intel_get_gvt_attrs(struct attribute ***type_attrs, + struct attribute_group ***intel_vgpu_type_groups) +{ + *type_attrs = gvt_type_attrs; + *intel_vgpu_type_groups = gvt_vgpu_type_groups; + return true; +} + +static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) +{ + int i, j; + struct intel_vgpu_type *type; + struct attribute_group *group; + + for (i = 0; i < gvt->num_types; i++) { + type = &gvt->types[i]; + + group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); + if (WARN_ON(!group)) + goto unwind; + + group->name = type->name; + group->attrs = gvt_type_attrs; + gvt_vgpu_type_groups[i] = group; + } + + return true; + +unwind: + for (j = 0; j < i; j++) { + group = gvt_vgpu_type_groups[j]; + kfree(group); + } + + return false; +} + +static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) +{ + int i; + struct attribute_group *group; + + for (i = 0; i < gvt->num_types; i++) { + group = gvt_vgpu_type_groups[i]; + gvt_vgpu_type_groups[i] = NULL; + kfree(group); + } +} + static const struct intel_gvt_ops intel_gvt_ops = { .emulate_cfg_read = intel_vgpu_emulate_cfg_read, .emulate_cfg_write = intel_vgpu_emulate_cfg_write, @@ -54,6 +179,8 @@ static const struct intel_gvt_ops intel_gvt_ops = { .vgpu_reset = intel_gvt_reset_vgpu, .vgpu_activate = intel_gvt_activate_vgpu, .vgpu_deactivate = intel_gvt_deactivate_vgpu, + .gvt_find_vgpu_type = intel_gvt_find_vgpu_type, + .get_gvt_attrs = intel_get_gvt_attrs, }; /** @@ -202,6 +329,7 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) intel_gvt_free_firmware(gvt); intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev, gvt); + intel_gvt_cleanup_vgpu_type_groups(gvt); intel_gvt_clean_vgpu_types(gvt); idr_destroy(&gvt->vgpu_idr); @@ -292,6 +420,12 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) if (ret) goto out_clean_thread; + ret = intel_gvt_init_vgpu_type_groups(gvt); + if (ret == false) { + gvt_err("failed to init vgpu type groups: %d\n", ret); + goto out_clean_types; + } + ret = intel_gvt_hypervisor_host_init(&dev_priv->drm.pdev->dev, gvt, &intel_gvt_ops); if (ret) { diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index b7253d7daf72..35c8d87151dd 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -533,6 +533,10 @@ struct intel_gvt_ops { void (*vgpu_reset)(struct intel_vgpu *); void (*vgpu_activate)(struct intel_vgpu *); void (*vgpu_deactivate)(struct intel_vgpu *); + struct intel_vgpu_type *(*gvt_find_vgpu_type)(struct intel_gvt *gvt, + const char *name); + bool (*get_gvt_attrs)(struct attribute ***type_attrs, + struct attribute_group ***intel_vgpu_type_groups); }; From 6aa23ced915f94e23cd7bc3c1d005e39ef39bb2b Mon Sep 17 00:00:00 2001 From: fred gao Date: Thu, 28 Sep 2017 11:03:03 +0800 Subject: [PATCH 206/260] drm/i915/gvt: Refactor vGPU type code in kvmgt part all the vGPU type related code in kvmgt will be moved into gvt.c/gvt.h files while the common vGPU type related interfaces will be called. v2: - intel_gvt_{init,cleanup}_vgpu_type_groups are initialized in gvt part. (Wang, Zhi) Signed-off-by: fred gao Reviewed-by: Zhi Wang Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 128 ++----------------------------- 1 file changed, 8 insertions(+), 120 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index fc6878bb2496..110f07e8bcfb 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -248,120 +248,6 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu) } } -static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt, - const char *name) -{ - int i; - struct intel_vgpu_type *t; - const char *driver_name = dev_driver_string( - &gvt->dev_priv->drm.pdev->dev); - - for (i = 0; i < gvt->num_types; i++) { - t = &gvt->types[i]; - if (!strncmp(t->name, name + strlen(driver_name) + 1, - sizeof(t->name))) - return t; - } - - return NULL; -} - -static ssize_t available_instances_show(struct kobject *kobj, - struct device *dev, char *buf) -{ - struct intel_vgpu_type *type; - unsigned int num = 0; - void *gvt = kdev_to_i915(dev)->gvt; - - type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); - if (!type) - num = 0; - else - num = type->avail_instance; - - return sprintf(buf, "%u\n", num); -} - -static ssize_t device_api_show(struct kobject *kobj, struct device *dev, - char *buf) -{ - return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); -} - -static ssize_t description_show(struct kobject *kobj, struct device *dev, - char *buf) -{ - struct intel_vgpu_type *type; - void *gvt = kdev_to_i915(dev)->gvt; - - type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); - if (!type) - return 0; - - return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" - "fence: %d\nresolution: %s\n" - "weight: %d\n", - BYTES_TO_MB(type->low_gm_size), - BYTES_TO_MB(type->high_gm_size), - type->fence, vgpu_edid_str(type->resolution), - type->weight); -} - -static MDEV_TYPE_ATTR_RO(available_instances); -static MDEV_TYPE_ATTR_RO(device_api); -static MDEV_TYPE_ATTR_RO(description); - -static struct attribute *type_attrs[] = { - &mdev_type_attr_available_instances.attr, - &mdev_type_attr_device_api.attr, - &mdev_type_attr_description.attr, - NULL, -}; - -static struct attribute_group *intel_vgpu_type_groups[] = { - [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, -}; - -static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) -{ - int i, j; - struct intel_vgpu_type *type; - struct attribute_group *group; - - for (i = 0; i < gvt->num_types; i++) { - type = &gvt->types[i]; - - group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); - if (WARN_ON(!group)) - goto unwind; - - group->name = type->name; - group->attrs = type_attrs; - intel_vgpu_type_groups[i] = group; - } - - return true; - -unwind: - for (j = 0; j < i; j++) { - group = intel_vgpu_type_groups[j]; - kfree(group); - } - - return false; -} - -static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) -{ - int i; - struct attribute_group *group; - - for (i = 0; i < gvt->num_types; i++) { - group = intel_vgpu_type_groups[i]; - kfree(group); - } -} - static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) { hash_init(info->ptable); @@ -441,7 +327,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) pdev = mdev_parent_dev(mdev); gvt = kdev_to_i915(pdev)->gvt; - type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); + type = intel_gvt_ops->gvt_find_vgpu_type(gvt, kobject_name(kobj)); if (!type) { gvt_vgpu_err("failed to find type %s to create\n", kobject_name(kobj)); @@ -1212,8 +1098,7 @@ static const struct attribute_group *intel_vgpu_groups[] = { NULL, }; -static const struct mdev_parent_ops intel_vgpu_ops = { - .supported_type_groups = intel_vgpu_type_groups, +static struct mdev_parent_ops intel_vgpu_ops = { .mdev_attr_groups = intel_vgpu_groups, .create = intel_vgpu_create, .remove = intel_vgpu_remove, @@ -1229,17 +1114,20 @@ static const struct mdev_parent_ops intel_vgpu_ops = { static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) { - if (!intel_gvt_init_vgpu_type_groups(gvt)) - return -EFAULT; + struct attribute **kvm_type_attrs; + struct attribute_group **kvm_vgpu_type_groups; intel_gvt_ops = ops; + if (!intel_gvt_ops->get_gvt_attrs(&kvm_type_attrs, + &kvm_vgpu_type_groups)) + return -EFAULT; + intel_vgpu_ops.supported_type_groups = kvm_vgpu_type_groups; return mdev_register_device(dev, &intel_vgpu_ops); } static void kvmgt_host_exit(struct device *dev, void *gvt) { - intel_gvt_cleanup_vgpu_type_groups(gvt); mdev_unregister_device(dev); } From bc7b0be316aebac42eb9e8e54c984609555944da Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 26 Sep 2017 16:19:13 +0800 Subject: [PATCH 207/260] drm/i915/gvt: Add basic debugfs infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need debugfs entry to expose some debug information of gvt and vGPUs. The first tool will be added is mmio-diff, which help to find the difference values of host and vGPU mmio. It's useful for platform enabling. This patch just add a basic debugfs infrastructure, each vGPU has its own sub-folder. Two simple attributes are created as a template. . ├── num_tracked_mmio ├── vgpu1 | └── active └── vgpu2 └── active Signed-off-by: Changbin Du Reviewed-by: Zhi Wang Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/Makefile | 2 +- drivers/gpu/drm/i915/gvt/debugfs.c | 97 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/gvt.c | 5 ++ drivers/gpu/drm/i915/gvt/gvt.h | 12 +++- drivers/gpu/drm/i915/gvt/handlers.c | 2 - drivers/gpu/drm/i915/gvt/vgpu.c | 7 +++ 6 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/i915/gvt/debugfs.c diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index f5486cb94818..7100240a4a81 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -1,7 +1,7 @@ GVT_DIR := gvt GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ - execlist.o scheduler.o sched_policy.o render.o cmd_parser.o + execlist.o scheduler.o sched_policy.o render.o cmd_parser.o debugfs.o ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c new file mode 100644 index 000000000000..02acd5842605 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -0,0 +1,97 @@ +/* + * Copyright(c) 2011-2017 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include "i915_drv.h" +#include "gvt.h" + + +/** + * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU + * @vgpu: a vGPU + * + * Returns: + * Zero on success, negative error code if failed. + */ +int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) +{ + struct dentry *ent; + char name[10] = ""; + + sprintf(name, "vgpu%d", vgpu->id); + vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root); + if (!vgpu->debugfs) + return -ENOMEM; + + ent = debugfs_create_bool("active", 0444, vgpu->debugfs, + &vgpu->active); + if (!ent) + return -ENOMEM; + + return 0; +} + +/** + * intel_gvt_debugfs_remove_vgpu - remove debugfs entries of a vGPU + * @vgpu: a vGPU + */ +void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu) +{ + debugfs_remove_recursive(vgpu->debugfs); + vgpu->debugfs = NULL; +} + +/** + * intel_gvt_debugfs_init - register gvt debugfs root entry + * @gvt: GVT device + * + * Returns: + * zero on success, negative if failed. + */ +int intel_gvt_debugfs_init(struct intel_gvt *gvt) +{ + struct drm_minor *minor = gvt->dev_priv->drm.primary; + struct dentry *ent; + + gvt->debugfs_root = debugfs_create_dir("gvt", minor->debugfs_root); + if (!gvt->debugfs_root) { + gvt_err("Cannot create debugfs dir\n"); + return -ENOMEM; + } + + ent = debugfs_create_ulong("num_tracked_mmio", 0444, gvt->debugfs_root, + &gvt->mmio.num_tracked_mmio); + if (!ent) + return -ENOMEM; + + return 0; +} + +/** + * intel_gvt_debugfs_clean - remove debugfs entries + * @gvt: GVT device + */ +void intel_gvt_debugfs_clean(struct intel_gvt *gvt) +{ + debugfs_remove_recursive(gvt->debugfs_root); + gvt->debugfs_root = NULL; +} diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index ed313811fa76..cef06d59884e 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -318,6 +318,7 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) if (WARN_ON(!gvt)) return; + intel_gvt_debugfs_clean(gvt); clean_service_thread(gvt); intel_gvt_clean_cmd_parser(gvt); intel_gvt_clean_sched_policy(gvt); @@ -441,6 +442,10 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) } gvt->idle_vgpu = vgpu; + ret = intel_gvt_debugfs_init(gvt); + if (ret) + gvt_err("debugfs registeration failed, go on.\n"); + gvt_dbg_core("gvt device initialization is done\n"); dev_priv->gvt = gvt; return 0; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 35c8d87151dd..f08d194f639f 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -190,6 +190,8 @@ struct intel_vgpu { struct intel_vgpu_display display; struct intel_vgpu_submission submission; + struct dentry *debugfs; + #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) struct { struct mdev_device *mdev; @@ -253,7 +255,7 @@ struct intel_gvt_mmio { unsigned int num_mmio_block; DECLARE_HASHTABLE(mmio_info_table, INTEL_GVT_MMIO_HASH_BITS); - unsigned int num_tracked_mmio; + unsigned long num_tracked_mmio; }; struct intel_gvt_firmware { @@ -301,6 +303,8 @@ struct intel_gvt { struct task_struct *service_thread; wait_queue_head_t service_thread_wq; unsigned long service_request; + + struct dentry *debugfs_root; }; static inline struct intel_gvt *to_gvt(struct drm_i915_private *i915) @@ -619,6 +623,12 @@ static inline bool intel_gvt_mmio_has_mode_mask( return gvt->mmio.mmio_attribute[offset >> 2] & F_MODE_MASK; } +int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu); +void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu); +int intel_gvt_debugfs_init(struct intel_gvt *gvt); +void intel_gvt_debugfs_clean(struct intel_gvt *gvt); + + #include "trace.h" #include "mpt.h" diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 5438dafea7a4..408600befc9f 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2928,8 +2928,6 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) gvt->mmio.mmio_block = mmio_blocks; gvt->mmio.num_mmio_block = ARRAY_SIZE(mmio_blocks); - gvt_dbg_mmio("traced %u virtual mmio registers\n", - gvt->mmio.num_tracked_mmio); return 0; err: intel_gvt_clean_mmio_info(gvt); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 841b95c6c231..99dbefadfe91 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -252,6 +252,7 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) WARN(vgpu->active, "vGPU is still active!\n"); + intel_gvt_debugfs_remove_vgpu(vgpu); idr_remove(&gvt->vgpu_idr, vgpu->id); intel_vgpu_clean_sched_policy(vgpu); intel_vgpu_clean_submission(vgpu); @@ -378,10 +379,16 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_submission; + ret = intel_gvt_debugfs_add_vgpu(vgpu); + if (ret) + goto out_clean_sched_policy; + mutex_unlock(&gvt->lock); return vgpu; +out_clean_sched_policy: + intel_vgpu_clean_sched_policy(vgpu); out_clean_submission: intel_vgpu_clean_submission(vgpu); out_clean_display: From 5e86ccefa3c4e2d12acd7f5fc417ea120edbca21 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 26 Sep 2017 15:02:21 +0800 Subject: [PATCH 208/260] drm/i915/gvt: Use BIT() to make klockwork happy Replace the plain bit usage with BIT() to make klockwork happy. Cc: Deng Hongyi Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 2801d70579d8..77da3ecfc4cc 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -351,7 +351,7 @@ static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) return false; e->type = get_entry_type(e->type); - if (!(e->val64 & (1 << 7))) + if (!(e->val64 & BIT(7))) return false; e->type = get_pse_type(e->type); @@ -369,12 +369,12 @@ static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) return (e->val64 != 0); else - return (e->val64 & (1 << 0)); + return (e->val64 & BIT(0)); } static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) { - e->val64 &= ~(1 << 0); + e->val64 &= ~BIT(0); } /* From 58facf8c46160706ccb3b7b3768c0f0e29407548 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Fri, 22 Sep 2017 21:12:03 +0800 Subject: [PATCH 209/260] drm/i915/gvt: Refine find_bb_size() Returns the error code if something is wrong and the size of batch buffer is passed through the pointer. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 30 +++++++++++++-------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index cd2681a23fcd..3355eb4fab75 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1594,23 +1594,23 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) return 1; } -static int find_bb_size(struct parser_exec_state *s) +static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) { unsigned long gma = 0; struct cmd_info *info; - int bb_size = 0; uint32_t cmd_len = 0; - bool met_bb_end = false; + bool bb_end = false; struct intel_vgpu *vgpu = s->vgpu; u32 cmd; + *bb_size = 0; + /* get the start gm address of the batch buffer */ gma = get_gma_bb_from_cmd(s, 1); if (gma == INTEL_GVT_INVALID_ADDR) return -EFAULT; cmd = cmd_val(s, 0); - info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", @@ -1629,20 +1629,18 @@ static int find_bb_size(struct parser_exec_state *s) } if (info->opcode == OP_MI_BATCH_BUFFER_END) { - met_bb_end = true; + bb_end = true; } else if (info->opcode == OP_MI_BATCH_BUFFER_START) { - if (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0) { + if (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0) /* chained batch buffer */ - met_bb_end = true; - } + bb_end = true; } cmd_len = get_cmd_length(info, cmd) << 2; - bb_size += cmd_len; + *bb_size += cmd_len; gma += cmd_len; + } while (!bb_end); - } while (!met_bb_end); - - return bb_size; + return 0; } static int perform_bb_shadow(struct parser_exec_state *s) @@ -1650,7 +1648,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) struct intel_shadow_bb_entry *entry_obj; struct intel_vgpu *vgpu = s->vgpu; unsigned long gma = 0; - int bb_size; + unsigned long bb_size; void *dst = NULL; int ret = 0; @@ -1660,9 +1658,9 @@ static int perform_bb_shadow(struct parser_exec_state *s) return -EFAULT; /* get the size of the batch buffer */ - bb_size = find_bb_size(s); - if (bb_size < 0) - return bb_size; + ret = find_bb_size(s, &bb_size); + if (ret) + return ret; /* allocate shadow batch buffer */ entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL); From f52c380a48f527930c86ea6fd7242873c93ba682 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sun, 24 Sep 2017 21:53:03 +0800 Subject: [PATCH 210/260] drm/i915/gvt: Refine shadow batch buffer 1) Use standard i915 GEM object sequence to access the shadow batch buffer. 2) Manage i915 vma life cycle to solve one FIXME. v2: - Refine code structure. - Refine the usage of GEM APIs. - Add the missing lock/unlock in release_shadow_batch_buffer. Test on my SKL NuC. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 78 ++++++++++++------------ drivers/gpu/drm/i915/gvt/scheduler.c | 88 ++++++++++++++++++--------- drivers/gpu/drm/i915/gvt/scheduler.h | 7 ++- 3 files changed, 101 insertions(+), 72 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 3355eb4fab75..020443083681 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1645,11 +1645,10 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) static int perform_bb_shadow(struct parser_exec_state *s) { - struct intel_shadow_bb_entry *entry_obj; struct intel_vgpu *vgpu = s->vgpu; + struct intel_vgpu_shadow_bb *bb; unsigned long gma = 0; unsigned long bb_size; - void *dst = NULL; int ret = 0; /* get the start gm address of the batch buffer */ @@ -1657,51 +1656,51 @@ static int perform_bb_shadow(struct parser_exec_state *s) if (gma == INTEL_GVT_INVALID_ADDR) return -EFAULT; - /* get the size of the batch buffer */ ret = find_bb_size(s, &bb_size); if (ret) return ret; - /* allocate shadow batch buffer */ - entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL); - if (entry_obj == NULL) + bb = kzalloc(sizeof(*bb), GFP_KERNEL); + if (!bb) return -ENOMEM; - entry_obj->obj = - i915_gem_object_create(s->vgpu->gvt->dev_priv, - roundup(bb_size, PAGE_SIZE)); - if (IS_ERR(entry_obj->obj)) { - ret = PTR_ERR(entry_obj->obj); - goto free_entry; - } - entry_obj->len = bb_size; - INIT_LIST_HEAD(&entry_obj->list); - - dst = i915_gem_object_pin_map(entry_obj->obj, I915_MAP_WB); - if (IS_ERR(dst)) { - ret = PTR_ERR(dst); - goto put_obj; + bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv, + roundup(bb_size, PAGE_SIZE)); + if (IS_ERR(bb->obj)) { + ret = PTR_ERR(bb->obj); + goto err_free_bb; } - ret = i915_gem_object_set_to_cpu_domain(entry_obj->obj, false); - if (ret) { - gvt_vgpu_err("failed to set shadow batch to CPU\n"); - goto unmap_src; + ret = i915_gem_obj_prepare_shmem_write(bb->obj, &bb->clflush); + if (ret) + goto err_free_obj; + + bb->va = i915_gem_object_pin_map(bb->obj, I915_MAP_WB); + if (IS_ERR(bb->va)) { + ret = PTR_ERR(bb->va); + goto err_finish_shmem_access; } - entry_obj->va = dst; - entry_obj->bb_start_cmd_va = s->ip_va; + if (bb->clflush & CLFLUSH_BEFORE) { + drm_clflush_virt_range(bb->va, bb->obj->base.size); + bb->clflush &= ~CLFLUSH_BEFORE; + } - /* copy batch buffer to shadow batch buffer*/ ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, gma, gma + bb_size, - dst); + bb->va); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); - goto unmap_src; + ret = -EFAULT; + goto err_unmap; } - list_add(&entry_obj->list, &s->workload->shadow_bb); + INIT_LIST_HEAD(&bb->list); + list_add(&bb->list, &s->workload->shadow_bb); + + bb->accessing = true; + bb->bb_start_cmd_va = s->ip_va; + /* * ip_va saves the virtual address of the shadow batch buffer, while * ip_gma saves the graphics address of the original batch buffer. @@ -1710,17 +1709,17 @@ static int perform_bb_shadow(struct parser_exec_state *s) * buffer's gma in pair. After all, we don't want to pin the shadow * buffer here (too early). */ - s->ip_va = dst; + s->ip_va = bb->va; s->ip_gma = gma; - return 0; - -unmap_src: - i915_gem_object_unpin_map(entry_obj->obj); -put_obj: - i915_gem_object_put(entry_obj->obj); -free_entry: - kfree(entry_obj); +err_unmap: + i915_gem_object_unpin_map(bb->obj); +err_finish_shmem_access: + i915_gem_obj_finish_shmem_access(bb->obj); +err_free_obj: + i915_gem_object_put(bb->obj); +err_free_bb: + kfree(bb); return ret; } @@ -1762,7 +1761,6 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s) if (ret < 0) return ret; } - return ret; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 391690c0c28c..f2d4c90ea1d4 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -325,31 +325,46 @@ err_scan: return ret; } +static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload); + static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) { struct intel_gvt *gvt = workload->vgpu->gvt; const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd; - struct intel_shadow_bb_entry *entry_obj; + struct intel_vgpu_shadow_bb *bb; + int ret; - /* pin the gem object to ggtt */ - list_for_each_entry(entry_obj, &workload->shadow_bb, list) { - struct i915_vma *vma; + list_for_each_entry(bb, &workload->shadow_bb, list) { + bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0); + if (IS_ERR(bb->vma)) { + ret = PTR_ERR(bb->vma); + goto err; + } - vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - /* FIXME: we are not tracking our pinned VMA leaving it - * up to the core to fix up the stray pin_count upon - * free. - */ - - /* update the relocate gma with shadow batch buffer*/ - entry_obj->bb_start_cmd_va[1] = i915_ggtt_offset(vma); + /* relocate shadow batch buffer */ + bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); if (gmadr_bytes == 8) - entry_obj->bb_start_cmd_va[2] = 0; + bb->bb_start_cmd_va[2] = 0; + + /* No one is going to touch shadow bb from now on. */ + if (bb->clflush & CLFLUSH_AFTER) { + drm_clflush_virt_range(bb->va, bb->obj->base.size); + bb->clflush &= ~CLFLUSH_AFTER; + } + + ret = i915_gem_object_set_to_gtt_domain(bb->obj, false); + if (ret) + goto err; + + i915_gem_obj_finish_shmem_access(bb->obj); + bb->accessing = false; + + i915_vma_move_to_active(bb->vma, workload->req, 0); } return 0; +err: + release_shadow_batch_buffer(workload); + return ret; } static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) @@ -410,22 +425,37 @@ static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) { - /* release all the shadow batch buffer */ - if (!list_empty(&workload->shadow_bb)) { - struct intel_shadow_bb_entry *entry_obj = - list_first_entry(&workload->shadow_bb, - struct intel_shadow_bb_entry, - list); - struct intel_shadow_bb_entry *temp; + struct intel_vgpu *vgpu = workload->vgpu; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_vgpu_shadow_bb *bb, *pos; - list_for_each_entry_safe(entry_obj, temp, &workload->shadow_bb, - list) { - i915_gem_object_unpin_map(entry_obj->obj); - i915_gem_object_put(entry_obj->obj); - list_del(&entry_obj->list); - kfree(entry_obj); + if (list_empty(&workload->shadow_bb)) + return; + + bb = list_first_entry(&workload->shadow_bb, + struct intel_vgpu_shadow_bb, list); + + mutex_lock(&dev_priv->drm.struct_mutex); + + list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { + if (bb->obj) { + if (bb->accessing) + i915_gem_obj_finish_shmem_access(bb->obj); + + if (bb->va && !IS_ERR(bb->va)) + i915_gem_object_unpin_map(bb->obj); + + if (bb->vma && !IS_ERR(bb->vma)) { + i915_vma_unpin(bb->vma); + i915_vma_close(bb->vma); + } + __i915_gem_object_release_unless_active(bb->obj); } + list_del(&bb->list); + kfree(bb); } + + mutex_unlock(&dev_priv->drm.struct_mutex); } static int prepare_workload(struct intel_vgpu_workload *workload) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index e0b5730a3018..e4a9f9acd4a9 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -112,13 +112,14 @@ struct intel_vgpu_workload { struct intel_shadow_wa_ctx wa_ctx; }; -/* Intel shadow batch buffer is a i915 gem object */ -struct intel_shadow_bb_entry { +struct intel_vgpu_shadow_bb { struct list_head list; struct drm_i915_gem_object *obj; + struct i915_vma *vma; void *va; - unsigned long len; u32 *bb_start_cmd_va; + unsigned int clflush; + bool accessing; }; #define workload_q_head(vgpu, ring_id) \ From 7d1e5cdf01789729aff2da4005f51f58b491040c Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Fri, 29 Sep 2017 02:47:55 +0800 Subject: [PATCH 211/260] drm/i915/gvt: Factor intel_vgpu_page_track As the data structure of "intel_vgpu_guest_page" will become much heavier in future, it's better to factor out the guest memory page track mechnisim as early as possible. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 176 ++++++++++++++++++-------------- drivers/gpu/drm/i915/gvt/gtt.h | 30 +++--- drivers/gpu/drm/i915/gvt/mmio.c | 32 +++--- drivers/gpu/drm/i915/gvt/mpt.h | 34 +++--- 4 files changed, 147 insertions(+), 125 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 77da3ecfc4cc..bd3dc209cd89 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -494,7 +494,7 @@ static inline int ppgtt_spt_get_entry( return -EINVAL; ret = ops->get_entry(page_table, e, index, guest, - spt->guest_page.gfn << GTT_PAGE_SHIFT, + spt->guest_page.track.gfn << GTT_PAGE_SHIFT, spt->vgpu); if (ret) return ret; @@ -516,7 +516,7 @@ static inline int ppgtt_spt_set_entry( return -EINVAL; return ops->set_entry(page_table, e, index, guest, - spt->guest_page.gfn << GTT_PAGE_SHIFT, + spt->guest_page.track.gfn << GTT_PAGE_SHIFT, spt->vgpu); } @@ -537,84 +537,99 @@ static inline int ppgtt_spt_set_entry( spt->shadow_page.type, e, index, false) /** - * intel_vgpu_init_guest_page - init a guest page data structure + * intel_vgpu_init_page_track - init a page track data structure * @vgpu: a vGPU - * @p: a guest page data structure + * @t: a page track data structure * @gfn: guest memory page frame number - * @handler: function will be called when target guest memory page has + * @handler: the function will be called when target guest memory page has * been modified. * - * This function is called when user wants to track a guest memory page. + * This function is called when a user wants to prepare a page track data + * structure to track a guest memory page. * * Returns: * Zero on success, negative error code if failed. */ -int intel_vgpu_init_guest_page(struct intel_vgpu *vgpu, +int intel_vgpu_init_page_track(struct intel_vgpu *vgpu, + struct intel_vgpu_page_track *t, + unsigned long gfn, + int (*handler)(void *, u64, void *, int), + void *data) +{ + INIT_HLIST_NODE(&t->node); + + t->tracked = false; + t->gfn = gfn; + t->handler = handler; + t->data = data; + + hash_add(vgpu->gtt.tracked_guest_page_hash_table, &t->node, t->gfn); + return 0; +} + +/** + * intel_vgpu_clean_page_track - release a page track data structure + * @vgpu: a vGPU + * @t: a page track data structure + * + * This function is called before a user frees a page track data structure. + */ +void intel_vgpu_clean_page_track(struct intel_vgpu *vgpu, + struct intel_vgpu_page_track *t) +{ + if (!hlist_unhashed(&t->node)) + hash_del(&t->node); + + if (t->tracked) + intel_gvt_hypervisor_disable_page_track(vgpu, t); +} + +/** + * intel_vgpu_find_tracked_page - find a tracked guest page + * @vgpu: a vGPU + * @gfn: guest memory page frame number + * + * This function is called when the emulation layer wants to figure out if a + * trapped GFN is a tracked guest page. + * + * Returns: + * Pointer to page track data structure, NULL if not found. + */ +struct intel_vgpu_page_track *intel_vgpu_find_tracked_page( + struct intel_vgpu *vgpu, unsigned long gfn) +{ + struct intel_vgpu_page_track *t; + + hash_for_each_possible(vgpu->gtt.tracked_guest_page_hash_table, + t, node, gfn) { + if (t->gfn == gfn) + return t; + } + return NULL; +} + +static int init_guest_page(struct intel_vgpu *vgpu, struct intel_vgpu_guest_page *p, unsigned long gfn, int (*handler)(void *, u64, void *, int), void *data) { - INIT_HLIST_NODE(&p->node); - - p->writeprotection = false; - p->gfn = gfn; - p->handler = handler; - p->data = data; p->oos_page = NULL; p->write_cnt = 0; - hash_add(vgpu->gtt.guest_page_hash_table, &p->node, p->gfn); - return 0; + return intel_vgpu_init_page_track(vgpu, &p->track, gfn, handler, data); } static int detach_oos_page(struct intel_vgpu *vgpu, struct intel_vgpu_oos_page *oos_page); -/** - * intel_vgpu_clean_guest_page - release the resource owned by guest page data - * structure - * @vgpu: a vGPU - * @p: a tracked guest page - * - * This function is called when user tries to stop tracking a guest memory - * page. - */ -void intel_vgpu_clean_guest_page(struct intel_vgpu *vgpu, +static void clean_guest_page(struct intel_vgpu *vgpu, struct intel_vgpu_guest_page *p) { - if (!hlist_unhashed(&p->node)) - hash_del(&p->node); - if (p->oos_page) detach_oos_page(vgpu, p->oos_page); - if (p->writeprotection) - intel_gvt_hypervisor_unset_wp_page(vgpu, p); -} - -/** - * intel_vgpu_find_guest_page - find a guest page data structure by GFN. - * @vgpu: a vGPU - * @gfn: guest memory page frame number - * - * This function is called when emulation logic wants to know if a trapped GFN - * is a tracked guest page. - * - * Returns: - * Pointer to guest page data structure, NULL if failed. - */ -struct intel_vgpu_guest_page *intel_vgpu_find_guest_page( - struct intel_vgpu *vgpu, unsigned long gfn) -{ - struct intel_vgpu_guest_page *p; - - hash_for_each_possible(vgpu->gtt.guest_page_hash_table, - p, node, gfn) { - if (p->gfn == gfn) - return p; - } - return NULL; + intel_vgpu_clean_page_track(vgpu, &p->track); } static inline int init_shadow_page(struct intel_vgpu *vgpu, @@ -664,6 +679,9 @@ static inline struct intel_vgpu_shadow_page *find_shadow_page( return NULL; } +#define page_track_to_guest_page(ptr) \ + container_of(ptr, struct intel_vgpu_guest_page, track) + #define guest_page_to_ppgtt_spt(ptr) \ container_of(ptr, struct intel_vgpu_ppgtt_spt, guest_page) @@ -697,7 +715,7 @@ static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt) trace_spt_free(spt->vgpu->id, spt, spt->shadow_page.type); clean_shadow_page(spt->vgpu, &spt->shadow_page); - intel_vgpu_clean_guest_page(spt->vgpu, &spt->guest_page); + clean_guest_page(spt->vgpu, &spt->guest_page); list_del_init(&spt->post_shadow_list); free_spt(spt); @@ -713,22 +731,24 @@ static void ppgtt_free_all_shadow_page(struct intel_vgpu *vgpu) ppgtt_free_shadow_page(shadow_page_to_ppgtt_spt(sp)); } -static int ppgtt_handle_guest_write_page_table_bytes(void *gp, +static int ppgtt_handle_guest_write_page_table_bytes( + struct intel_vgpu_guest_page *gpt, u64 pa, void *p_data, int bytes); -static int ppgtt_write_protection_handler(void *gp, u64 pa, +static int ppgtt_write_protection_handler(void *data, u64 pa, void *p_data, int bytes) { - struct intel_vgpu_guest_page *gpt = (struct intel_vgpu_guest_page *)gp; + struct intel_vgpu_page_track *t = data; + struct intel_vgpu_guest_page *p = page_track_to_guest_page(t); int ret; if (bytes != 4 && bytes != 8) return -EINVAL; - if (!gpt->writeprotection) + if (!t->tracked) return -EINVAL; - ret = ppgtt_handle_guest_write_page_table_bytes(gp, + ret = ppgtt_handle_guest_write_page_table_bytes(p, pa, p_data, bytes); if (ret) return ret; @@ -768,7 +788,7 @@ retry: goto err; } - ret = intel_vgpu_init_guest_page(vgpu, &spt->guest_page, + ret = init_guest_page(vgpu, &spt->guest_page, gfn, ppgtt_write_protection_handler, NULL); if (ret) { gvt_vgpu_err("fail to initialize guest page for spt\n"); @@ -856,7 +876,7 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) int v = atomic_read(&spt->refcount); trace_spt_change(spt->vgpu->id, "die", spt, - spt->guest_page.gfn, spt->shadow_page.type); + spt->guest_page.track.gfn, spt->shadow_page.type); trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); @@ -878,7 +898,7 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) } release: trace_spt_change(spt->vgpu->id, "release", spt, - spt->guest_page.gfn, spt->shadow_page.type); + spt->guest_page.track.gfn, spt->shadow_page.type); ppgtt_free_shadow_page(spt); return 0; fail: @@ -895,6 +915,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s = NULL; struct intel_vgpu_guest_page *g; + struct intel_vgpu_page_track *t; int ret; if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(we->type)))) { @@ -902,8 +923,9 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( goto fail; } - g = intel_vgpu_find_guest_page(vgpu, ops->get_pfn(we)); - if (g) { + t = intel_vgpu_find_tracked_page(vgpu, ops->get_pfn(we)); + if (t) { + g = page_track_to_guest_page(t); s = guest_page_to_ppgtt_spt(g); ppgtt_get_shadow_page(s); } else { @@ -915,7 +937,8 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( goto fail; } - ret = intel_gvt_hypervisor_set_wp_page(vgpu, &s->guest_page); + ret = intel_gvt_hypervisor_enable_page_track(vgpu, + &s->guest_page.track); if (ret) goto fail; @@ -923,7 +946,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( if (ret) goto fail; - trace_spt_change(vgpu->id, "new", s, s->guest_page.gfn, + trace_spt_change(vgpu->id, "new", s, s->guest_page.track.gfn, s->shadow_page.type); } return s; @@ -953,7 +976,7 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) int ret; trace_spt_change(spt->vgpu->id, "born", spt, - spt->guest_page.gfn, spt->shadow_page.type); + spt->guest_page.track.gfn, spt->shadow_page.type); if (gtt_type_is_pte_pt(spt->shadow_page.type)) { for_each_present_guest_entry(spt, &ge, i) { @@ -1082,7 +1105,7 @@ static int sync_oos_page(struct intel_vgpu *vgpu, index++) { ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); ops->get_entry(NULL, &new, index, true, - oos_page->guest_page->gfn << PAGE_SHIFT, vgpu); + oos_page->guest_page->track.gfn << PAGE_SHIFT, vgpu); if (old.val64 == new.val64 && !test_and_clear_bit(index, spt->post_shadow_bitmap)) @@ -1132,8 +1155,9 @@ static int attach_oos_page(struct intel_vgpu *vgpu, struct intel_gvt *gvt = vgpu->gvt; int ret; - ret = intel_gvt_hypervisor_read_gpa(vgpu, gpt->gfn << GTT_PAGE_SHIFT, - oos_page->mem, GTT_PAGE_SIZE); + ret = intel_gvt_hypervisor_read_gpa(vgpu, + gpt->track.gfn << GTT_PAGE_SHIFT, + oos_page->mem, GTT_PAGE_SIZE); if (ret) return ret; @@ -1152,7 +1176,7 @@ static int ppgtt_set_guest_page_sync(struct intel_vgpu *vgpu, { int ret; - ret = intel_gvt_hypervisor_set_wp_page(vgpu, gpt); + ret = intel_gvt_hypervisor_enable_page_track(vgpu, &gpt->track); if (ret) return ret; @@ -1200,7 +1224,7 @@ static int ppgtt_set_guest_page_oos(struct intel_vgpu *vgpu, gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); list_add_tail(&oos_page->vm_list, &vgpu->gtt.oos_page_list_head); - return intel_gvt_hypervisor_unset_wp_page(vgpu, gpt); + return intel_gvt_hypervisor_disable_page_track(vgpu, &gpt->track); } /** @@ -1335,10 +1359,10 @@ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) return 0; } -static int ppgtt_handle_guest_write_page_table_bytes(void *gp, +static int ppgtt_handle_guest_write_page_table_bytes( + struct intel_vgpu_guest_page *gpt, u64 pa, void *p_data, int bytes) { - struct intel_vgpu_guest_page *gpt = (struct intel_vgpu_guest_page *)gp; struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; @@ -2032,7 +2056,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) struct intel_vgpu_gtt *gtt = &vgpu->gtt; struct intel_vgpu_mm *ggtt_mm; - hash_init(gtt->guest_page_hash_table); + hash_init(gtt->tracked_guest_page_hash_table); hash_init(gtt->shadow_page_hash_table); INIT_LIST_HEAD(>t->mm_list_head); diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 30a4c8d16026..bd9b298b8777 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -193,18 +193,16 @@ struct intel_vgpu_scratch_pt { unsigned long page_mfn; }; - struct intel_vgpu_gtt { struct intel_vgpu_mm *ggtt_mm; unsigned long active_ppgtt_mm_bitmap; struct list_head mm_list_head; DECLARE_HASHTABLE(shadow_page_hash_table, INTEL_GVT_GTT_HASH_BITS); - DECLARE_HASHTABLE(guest_page_hash_table, INTEL_GVT_GTT_HASH_BITS); - atomic_t n_write_protected_guest_page; + DECLARE_HASHTABLE(tracked_guest_page_hash_table, INTEL_GVT_GTT_HASH_BITS); + atomic_t n_tracked_guest_page; struct list_head oos_page_list_head; struct list_head post_shadow_list_head; struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX]; - }; extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); @@ -228,12 +226,16 @@ struct intel_vgpu_shadow_page { unsigned long mfn; }; -struct intel_vgpu_guest_page { +struct intel_vgpu_page_track { struct hlist_node node; - bool writeprotection; + bool tracked; unsigned long gfn; int (*handler)(void *, u64, void *, int); void *data; +}; + +struct intel_vgpu_guest_page { + struct intel_vgpu_page_track track; unsigned long write_cnt; struct intel_vgpu_oos_page *oos_page; }; @@ -258,22 +260,16 @@ struct intel_vgpu_ppgtt_spt { struct list_head post_shadow_list; }; -int intel_vgpu_init_guest_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *guest_page, +int intel_vgpu_init_page_track(struct intel_vgpu *vgpu, + struct intel_vgpu_page_track *t, unsigned long gfn, int (*handler)(void *gp, u64, void *, int), void *data); -void intel_vgpu_clean_guest_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *guest_page); +void intel_vgpu_clean_page_track(struct intel_vgpu *vgpu, + struct intel_vgpu_page_track *t); -int intel_vgpu_set_guest_page_writeprotection(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *guest_page); - -void intel_vgpu_clear_guest_page_writeprotection(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *guest_page); - -struct intel_vgpu_guest_page *intel_vgpu_find_guest_page( +struct intel_vgpu_page_track *intel_vgpu_find_tracked_page( struct intel_vgpu *vgpu, unsigned long gfn); int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 1e1310f50289..4ea0feb5f04d 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -117,18 +117,18 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, else memcpy(pt, p_data, bytes); - } else if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { - struct intel_vgpu_guest_page *gp; + } else if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) { + struct intel_vgpu_page_track *t; /* Since we enter the failsafe mode early during guest boot, * guest may not have chance to set up its ppgtt table, so * there should not be any wp pages for guest. Keep the wp * related code here in case we need to handle it in furture. */ - gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT); - if (gp) { + t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT); + if (t) { /* remove write protection to prevent furture traps */ - intel_vgpu_clean_guest_page(vgpu, gp); + intel_vgpu_clean_page_track(vgpu, t); if (read) intel_gvt_hypervisor_read_gpa(vgpu, pa, p_data, bytes); @@ -170,17 +170,17 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, return ret; } - if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { - struct intel_vgpu_guest_page *gp; + if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) { + struct intel_vgpu_page_track *t; - gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT); - if (gp) { + t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT); + if (t) { ret = intel_gvt_hypervisor_read_gpa(vgpu, pa, p_data, bytes); if (ret) { gvt_vgpu_err("guest page read error %d, " "gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n", - ret, gp->gfn, pa, *(u32 *)p_data, + ret, t->gfn, pa, *(u32 *)p_data, bytes); } mutex_unlock(&gvt->lock); @@ -267,17 +267,17 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, return ret; } - if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { - struct intel_vgpu_guest_page *gp; + if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) { + struct intel_vgpu_page_track *t; - gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT); - if (gp) { - ret = gp->handler(gp, pa, p_data, bytes); + t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT); + if (t) { + ret = t->handler(t, pa, p_data, bytes); if (ret) { gvt_err("guest page write error %d, " "gfn 0x%lx, pa 0x%llx, " "var 0x%x, len %d\n", - ret, gp->gfn, pa, + ret, t->gfn, pa, *(u32 *)p_data, bytes); } mutex_unlock(&gvt->lock); diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index f0e5487e6688..c436e20ea59e 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -154,51 +154,53 @@ static inline unsigned long intel_gvt_hypervisor_virt_to_mfn(void *p) } /** - * intel_gvt_hypervisor_set_wp_page - set a guest page to write-protected + * intel_gvt_hypervisor_enable - set a guest page to write-protected * @vgpu: a vGPU - * @p: intel_vgpu_guest_page + * @t: page track data structure * * Returns: * Zero on success, negative error code if failed. */ -static inline int intel_gvt_hypervisor_set_wp_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *p) +static inline int intel_gvt_hypervisor_enable_page_track( + struct intel_vgpu *vgpu, + struct intel_vgpu_page_track *t) { int ret; - if (p->writeprotection) + if (t->tracked) return 0; - ret = intel_gvt_host.mpt->set_wp_page(vgpu->handle, p->gfn); + ret = intel_gvt_host.mpt->set_wp_page(vgpu->handle, t->gfn); if (ret) return ret; - p->writeprotection = true; - atomic_inc(&vgpu->gtt.n_write_protected_guest_page); + t->tracked = true; + atomic_inc(&vgpu->gtt.n_tracked_guest_page); return 0; } /** - * intel_gvt_hypervisor_unset_wp_page - remove the write-protection of a + * intel_gvt_hypervisor_disable_page_track - remove the write-protection of a * guest page * @vgpu: a vGPU - * @p: intel_vgpu_guest_page + * @t: page track data structure * * Returns: * Zero on success, negative error code if failed. */ -static inline int intel_gvt_hypervisor_unset_wp_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *p) +static inline int intel_gvt_hypervisor_disable_page_track( + struct intel_vgpu *vgpu, + struct intel_vgpu_page_track *t) { int ret; - if (!p->writeprotection) + if (!t->tracked) return 0; - ret = intel_gvt_host.mpt->unset_wp_page(vgpu->handle, p->gfn); + ret = intel_gvt_host.mpt->unset_wp_page(vgpu->handle, t->gfn); if (ret) return ret; - p->writeprotection = false; - atomic_dec(&vgpu->gtt.n_write_protected_guest_page); + t->tracked = false; + atomic_dec(&vgpu->gtt.n_tracked_guest_page); return 0; } From 62a6a53786fc4b4e7543cc63b704dbb3f7df4c0f Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sat, 30 Sep 2017 17:42:20 +0800 Subject: [PATCH 212/260] drm/i915/gvt: Export intel_gvt_render_mmio_to_ring_id() Since many emulation logic needs to convert the offset of ring registers into ring id, we export it for other caller which might need it. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 21 +++++++++++++++------ drivers/gpu/drm/i915/gvt/mmio.h | 2 ++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 408600befc9f..bb16ef5c8510 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -137,17 +137,26 @@ static int new_mmio_info(struct intel_gvt *gvt, return 0; } -static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg) +/** + * intel_gvt_render_mmio_to_ring_id - convert a mmio offset into ring id + * @gvt: a GVT device + * @offset: register offset + * + * Returns: + * Ring ID on success, negative error code if failed. + */ +int intel_gvt_render_mmio_to_ring_id(struct intel_gvt *gvt, + unsigned int offset) { enum intel_engine_id id; struct intel_engine_cs *engine; - reg &= ~GENMASK(11, 0); + offset &= ~GENMASK(11, 0); for_each_engine(engine, gvt->dev_priv, id) { - if (engine->mmio_base == reg) + if (engine->mmio_base == offset) return id; } - return -1; + return -ENODEV; } #define offset_to_fence_num(offset) \ @@ -1445,7 +1454,7 @@ static int mmio_read_from_hw(struct intel_vgpu *vgpu, static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - int ring_id = render_mmio_to_ring_id(vgpu->gvt, offset); + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); struct intel_vgpu_execlist *execlist; u32 data = *(u32 *)p_data; int ret = 0; @@ -1473,7 +1482,7 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, { struct intel_vgpu_submission *s = &vgpu->submission; u32 data = *(u32 *)p_data; - int ring_id = render_mmio_to_ring_id(vgpu->gvt, offset); + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); bool enable_execlist; int ret; diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 32cd64ddad26..dbc04ad2c7a1 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -65,6 +65,8 @@ struct intel_gvt_mmio_info { struct hlist_node node; }; +int intel_gvt_render_mmio_to_ring_id(struct intel_gvt *gvt, + unsigned int reg); unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt); bool intel_gvt_match_device(struct intel_gvt *gvt, unsigned long device); From 9556e118889293f6d5d226b64688ee2adfd8964c Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 10 Oct 2017 13:51:32 +0800 Subject: [PATCH 213/260] drm/i915/gvt: Use I915_GTT_PAGE_SIZE As there is already an I915_GTT_PAGE_SIZE marco in i915, let GVT-g use it as well. Also this patch re-names some GTT marcos with additional prefix. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 13 +++---- drivers/gpu/drm/i915/gvt/gtt.c | 51 ++++++++++++++------------- drivers/gpu/drm/i915/gvt/gtt.h | 7 ++-- drivers/gpu/drm/i915/gvt/reg.h | 3 +- drivers/gpu/drm/i915/gvt/scheduler.c | 14 ++++---- 5 files changed, 45 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 020443083681..8c8514ee73ac 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1396,7 +1396,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s, } if (index_mode) { - if (guest_gma >= GTT_PAGE_SIZE / sizeof(u64)) { + if (guest_gma >= I915_GTT_PAGE_SIZE / sizeof(u64)) { ret = -EFAULT; goto err; } @@ -1563,10 +1563,10 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm, return -EFAULT; } - offset = gma & (GTT_PAGE_SIZE - 1); + offset = gma & (I915_GTT_PAGE_SIZE - 1); - copy_len = (end_gma - gma) >= (GTT_PAGE_SIZE - offset) ? - GTT_PAGE_SIZE - offset : end_gma - gma; + copy_len = (end_gma - gma) >= (I915_GTT_PAGE_SIZE - offset) ? + I915_GTT_PAGE_SIZE - offset : end_gma - gma; intel_gvt_hypervisor_read_gpa(vgpu, gpa, va + len, copy_len); @@ -2540,7 +2540,7 @@ static int scan_workload(struct intel_vgpu_workload *workload) int ret = 0; /* ring base is page aligned */ - if (WARN_ON(!IS_ALIGNED(workload->rb_start, GTT_PAGE_SIZE))) + if (WARN_ON(!IS_ALIGNED(workload->rb_start, I915_GTT_PAGE_SIZE))) return -EINVAL; gma_head = workload->rb_start + workload->rb_head; @@ -2589,7 +2589,8 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx); /* ring base is page aligned */ - if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, GTT_PAGE_SIZE))) + if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, + I915_GTT_PAGE_SIZE))) return -EINVAL; ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(uint32_t); diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index bd3dc209cd89..c9cbd71be4c5 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -94,12 +94,12 @@ int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index, u64 h_addr; int ret; - ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << GTT_PAGE_SHIFT, + ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT, &h_addr); if (ret) return ret; - *h_index = h_addr >> GTT_PAGE_SHIFT; + *h_index = h_addr >> I915_GTT_PAGE_SHIFT; return 0; } @@ -109,12 +109,12 @@ int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, u64 g_addr; int ret; - ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << GTT_PAGE_SHIFT, + ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT, &g_addr); if (ret) return ret; - *g_index = g_addr >> GTT_PAGE_SHIFT; + *g_index = g_addr >> I915_GTT_PAGE_SHIFT; return 0; } @@ -382,7 +382,7 @@ static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) */ static unsigned long gma_to_ggtt_pte_index(unsigned long gma) { - unsigned long x = (gma >> GTT_PAGE_SHIFT); + unsigned long x = (gma >> I915_GTT_PAGE_SHIFT); trace_gma_index(__func__, gma, x); return x; @@ -494,7 +494,7 @@ static inline int ppgtt_spt_get_entry( return -EINVAL; ret = ops->get_entry(page_table, e, index, guest, - spt->guest_page.track.gfn << GTT_PAGE_SHIFT, + spt->guest_page.track.gfn << I915_GTT_PAGE_SHIFT, spt->vgpu); if (ret) return ret; @@ -516,7 +516,7 @@ static inline int ppgtt_spt_set_entry( return -EINVAL; return ops->set_entry(page_table, e, index, guest, - spt->guest_page.track.gfn << GTT_PAGE_SHIFT, + spt->guest_page.track.gfn << I915_GTT_PAGE_SHIFT, spt->vgpu); } @@ -649,7 +649,7 @@ static inline int init_shadow_page(struct intel_vgpu *vgpu, INIT_HLIST_NODE(&p->node); - p->mfn = daddr >> GTT_PAGE_SHIFT; + p->mfn = daddr >> I915_GTT_PAGE_SHIFT; hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn); return 0; } @@ -659,7 +659,7 @@ static inline void clean_shadow_page(struct intel_vgpu *vgpu, { struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; - dma_unmap_page(kdev, p->mfn << GTT_PAGE_SHIFT, 4096, + dma_unmap_page(kdev, p->mfn << I915_GTT_PAGE_SHIFT, 4096, PCI_DMA_BIDIRECTIONAL); if (!hlist_unhashed(&p->node)) @@ -818,7 +818,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page( ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) #define pt_entries(spt) \ - (GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) + (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) #define for_each_present_guest_entry(spt, e, i) \ for (i = 0; i < pt_entries(spt); i++) \ @@ -1101,8 +1101,8 @@ static int sync_oos_page(struct intel_vgpu *vgpu, old.type = new.type = get_entry_type(spt->guest_page_type); old.val64 = new.val64 = 0; - for (index = 0; index < (GTT_PAGE_SIZE >> info->gtt_entry_size_shift); - index++) { + for (index = 0; index < (I915_GTT_PAGE_SIZE >> + info->gtt_entry_size_shift); index++) { ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); ops->get_entry(NULL, &new, index, true, oos_page->guest_page->track.gfn << PAGE_SHIFT, vgpu); @@ -1156,8 +1156,8 @@ static int attach_oos_page(struct intel_vgpu *vgpu, int ret; ret = intel_gvt_hypervisor_read_gpa(vgpu, - gpt->track.gfn << GTT_PAGE_SHIFT, - oos_page->mem, GTT_PAGE_SIZE); + gpt->track.gfn << I915_GTT_PAGE_SHIFT, + oos_page->mem, I915_GTT_PAGE_SIZE); if (ret) return ret; @@ -1439,7 +1439,7 @@ static int gen8_mm_alloc_page_table(struct intel_vgpu_mm *mm) mm->shadow_page_table = mem + mm->page_table_entry_size; } else if (mm->type == INTEL_GVT_MM_GGTT) { mm->page_table_entry_cnt = - (gvt_ggtt_gm_sz(gvt) >> GTT_PAGE_SHIFT); + (gvt_ggtt_gm_sz(gvt) >> I915_GTT_PAGE_SHIFT); mm->page_table_entry_size = mm->page_table_entry_cnt * info->gtt_entry_size; mem = vzalloc(mm->page_table_entry_size); @@ -1761,8 +1761,8 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) gma_ops->gma_to_ggtt_pte_index(gma)); if (ret) goto err; - gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT) - + (gma & ~GTT_PAGE_MASK); + gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + + (gma & ~I915_GTT_PAGE_MASK); trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa); return gpa; @@ -1814,8 +1814,8 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) } } - gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT) - + (gma & ~GTT_PAGE_MASK); + gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + + (gma & ~I915_GTT_PAGE_MASK); trace_gma_translate(vgpu->id, "ppgtt", 0, mm->page_table_level, gma, gpa); @@ -1883,7 +1883,7 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, if (bytes != 4 && bytes != 8) return -EINVAL; - gma = g_gtt_index << GTT_PAGE_SHIFT; + gma = g_gtt_index << I915_GTT_PAGE_SHIFT; /* the VM may configure the whole GM space when ballooning is used */ if (!vgpu_gmadr_is_valid(vgpu, gma)) @@ -1946,7 +1946,7 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, { struct intel_vgpu_gtt *gtt = &vgpu->gtt; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - int page_entry_num = GTT_PAGE_SIZE >> + int page_entry_num = I915_GTT_PAGE_SIZE >> vgpu->gvt->device_info.gtt_entry_size_shift; void *scratch_pt; int i; @@ -1970,7 +1970,7 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, return -ENOMEM; } gtt->scratch_pt[type].page_mfn = - (unsigned long)(daddr >> GTT_PAGE_SHIFT); + (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); gtt->scratch_pt[type].page = virt_to_page(scratch_pt); gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", vgpu->id, type, gtt->scratch_pt[type].page_mfn); @@ -2013,7 +2013,7 @@ static int release_scratch_page_tree(struct intel_vgpu *vgpu) for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { if (vgpu->gtt.scratch_pt[i].page != NULL) { daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn << - GTT_PAGE_SHIFT); + I915_GTT_PAGE_SHIFT); dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); __free_page(vgpu->gtt.scratch_pt[i].page); vgpu->gtt.scratch_pt[i].page = NULL; @@ -2310,7 +2310,8 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) return -ENOMEM; } gvt->gtt.scratch_ggtt_page = virt_to_page(page); - gvt->gtt.scratch_ggtt_mfn = (unsigned long)(daddr >> GTT_PAGE_SHIFT); + gvt->gtt.scratch_ggtt_mfn = (unsigned long)(daddr >> + I915_GTT_PAGE_SHIFT); if (enable_out_of_sync) { ret = setup_spt_oos(gvt); @@ -2337,7 +2338,7 @@ void intel_gvt_clean_gtt(struct intel_gvt *gvt) { struct device *dev = &gvt->dev_priv->drm.pdev->dev; dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_ggtt_mfn << - GTT_PAGE_SHIFT); + I915_GTT_PAGE_SHIFT); dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index bd9b298b8777..80b2deb097ad 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -34,9 +34,8 @@ #ifndef _GVT_GTT_H_ #define _GVT_GTT_H_ -#define GTT_PAGE_SHIFT 12 -#define GTT_PAGE_SIZE (1UL << GTT_PAGE_SHIFT) -#define GTT_PAGE_MASK (~(GTT_PAGE_SIZE-1)) +#define I915_GTT_PAGE_SHIFT 12 +#define I915_GTT_PAGE_MASK (~(I915_GTT_PAGE_SIZE - 1)) struct intel_vgpu_mm; @@ -245,7 +244,7 @@ struct intel_vgpu_oos_page { struct list_head list; struct list_head vm_list; int id; - unsigned char mem[GTT_PAGE_SIZE]; + unsigned char mem[I915_GTT_PAGE_SIZE]; }; #define GTT_ENTRY_NUM_IN_ONE_PAGE 512 diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h index 83f2f63d7eeb..d4f7ce6dc1d7 100644 --- a/drivers/gpu/drm/i915/gvt/reg.h +++ b/drivers/gpu/drm/i915/gvt/reg.h @@ -74,6 +74,7 @@ #define RB_HEAD_OFF_MASK ((1U << 21) - (1U << 2)) #define RB_TAIL_OFF_MASK ((1U << 21) - (1U << 3)) #define RB_TAIL_SIZE_MASK ((1U << 21) - (1U << 12)) -#define _RING_CTL_BUF_SIZE(ctl) (((ctl) & RB_TAIL_SIZE_MASK) + GTT_PAGE_SIZE) +#define _RING_CTL_BUF_SIZE(ctl) (((ctl) & RB_TAIL_SIZE_MASK) + \ + I915_GTT_PAGE_SIZE) #endif diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f2d4c90ea1d4..7a1ffaa9ae06 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -81,7 +81,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) while (i < context_page_num) { context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, (u32)((workload->ctx_desc.lrca + i) << - GTT_PAGE_SHIFT)); + I915_GTT_PAGE_SHIFT)); if (context_gpa == INTEL_GVT_INVALID_ADDR) { gvt_vgpu_err("Invalid guest context descriptor\n"); return -EFAULT; @@ -90,7 +90,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); dst = kmap(page); intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, - GTT_PAGE_SIZE); + I915_GTT_PAGE_SIZE); kunmap(page); i++; } @@ -120,7 +120,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) sizeof(*shadow_ring_context), (void *)shadow_ring_context + sizeof(*shadow_ring_context), - GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); + I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); kunmap(page); return 0; @@ -635,7 +635,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) while (i < context_page_num) { context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, (u32)((workload->ctx_desc.lrca + i) << - GTT_PAGE_SHIFT)); + I915_GTT_PAGE_SHIFT)); if (context_gpa == INTEL_GVT_INVALID_ADDR) { gvt_vgpu_err("invalid guest context descriptor\n"); return; @@ -644,7 +644,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); src = kmap(page); intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, - GTT_PAGE_SIZE); + I915_GTT_PAGE_SIZE); kunmap(page); i++; } @@ -669,7 +669,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) sizeof(*shadow_ring_context), (void *)shadow_ring_context + sizeof(*shadow_ring_context), - GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); + I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); kunmap(page); } @@ -1198,7 +1198,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, int ret; ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, - (u32)((desc->lrca + 1) << GTT_PAGE_SHIFT)); + (u32)((desc->lrca + 1) << I915_GTT_PAGE_SHIFT)); if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) { gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca); return ERR_PTR(-EINVAL); From 22115cef0869b7d08cb2aa60a65d4d978598644c Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 10 Oct 2017 14:34:11 +0800 Subject: [PATCH 214/260] drm/i915/gvt: Let the caller choose if a shadow page should be put into hash table As we want to re-use intel_vgpu_shadow_page in buidling scrach page table and we don't want to put scrach page table page into hash table, a new param is introduced to give the caller a choice to decide if a shadow page should be put into hash table. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 25 +++++++++++++------------ drivers/gpu/drm/i915/gvt/gtt.h | 4 ++-- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index c9cbd71be4c5..912d63b2ff44 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -633,7 +633,7 @@ static void clean_guest_page(struct intel_vgpu *vgpu, } static inline int init_shadow_page(struct intel_vgpu *vgpu, - struct intel_vgpu_shadow_page *p, int type) + struct intel_vgpu_shadow_page *p, int type, bool hash) { struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; dma_addr_t daddr; @@ -650,7 +650,8 @@ static inline int init_shadow_page(struct intel_vgpu *vgpu, INIT_HLIST_NODE(&p->node); p->mfn = daddr >> I915_GTT_PAGE_SHIFT; - hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn); + if (hash) + hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn); return 0; } @@ -782,7 +783,7 @@ retry: * TODO: guest page type may be different with shadow page type, * when we support PSE page in future. */ - ret = init_shadow_page(vgpu, &spt->shadow_page, type); + ret = init_shadow_page(vgpu, &spt->shadow_page, type, true); if (ret) { gvt_vgpu_err("fail to initialize shadow page for spt\n"); goto err; @@ -1902,11 +1903,11 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, * update the entry in this situation p2m will fail * settting the shadow entry to point to a scratch page */ - ops->set_pfn(&m, gvt->gtt.scratch_ggtt_mfn); + ops->set_pfn(&m, gvt->gtt.scratch_mfn); } } else { m = e; - ops->set_pfn(&m, gvt->gtt.scratch_ggtt_mfn); + ops->set_pfn(&m, gvt->gtt.scratch_mfn); } ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index); @@ -2309,16 +2310,16 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) __free_page(virt_to_page(page)); return -ENOMEM; } - gvt->gtt.scratch_ggtt_page = virt_to_page(page); - gvt->gtt.scratch_ggtt_mfn = (unsigned long)(daddr >> - I915_GTT_PAGE_SHIFT); + + gvt->gtt.scratch_page = virt_to_page(page); + gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); if (enable_out_of_sync) { ret = setup_spt_oos(gvt); if (ret) { gvt_err("fail to initialize SPT oos\n"); dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); - __free_page(gvt->gtt.scratch_ggtt_page); + __free_page(gvt->gtt.scratch_page); return ret; } } @@ -2337,12 +2338,12 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) void intel_gvt_clean_gtt(struct intel_gvt *gvt) { struct device *dev = &gvt->dev_priv->drm.pdev->dev; - dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_ggtt_mfn << + dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn << I915_GTT_PAGE_SHIFT); dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); - __free_page(gvt->gtt.scratch_ggtt_page); + __free_page(gvt->gtt.scratch_page); if (enable_out_of_sync) clean_spt_oos(gvt); @@ -2368,7 +2369,7 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) memset(&e, 0, sizeof(struct intel_gvt_gtt_entry)); e.type = GTT_TYPE_GGTT_PTE; - ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn); + ops->set_pfn(&e, gvt->gtt.scratch_mfn); e.val64 |= _PAGE_PRESENT; index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 80b2deb097ad..ab590278dcf1 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -85,8 +85,8 @@ struct intel_gvt_gtt { struct list_head oos_page_free_list_head; struct list_head mm_lru_list_head; - struct page *scratch_ggtt_page; - unsigned long scratch_ggtt_mfn; + struct page *scratch_page; + unsigned long scratch_mfn; }; enum { From 7422064883398612b5921bf26b15bd9b0dd6c325 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 10 Oct 2017 17:14:13 +0800 Subject: [PATCH 215/260] drm/i915/gvt: Fix a bug of unexpectedly clear scratch page table During a vGPU reset, the scratch page table shouldn't be cleared, what needs to be cleared should be the scratch page. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 912d63b2ff44..d47b8b100ac2 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2395,8 +2395,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) */ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) { - int i; - ppgtt_free_all_shadow_page(vgpu); /* Shadow pages are only created when there is no page @@ -2406,11 +2404,4 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); intel_vgpu_reset_ggtt(vgpu); - - /* clear scratch page for security */ - for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { - if (vgpu->gtt.scratch_pt[i].page != NULL) - memset(page_address(vgpu->gtt.scratch_pt[i].page), - 0, PAGE_SIZE); - } } From 054f4eba2a2985b1db43353b7b5ce90e96cf9bb9 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 10 Oct 2017 17:19:30 +0800 Subject: [PATCH 216/260] drm/i915/gvt: Introduce page table type of current level in GTT type enumerations Need to figure out page table type of current level by GTT entry type during getting a scratch page table entry. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index d47b8b100ac2..74be5e628310 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -156,13 +156,15 @@ int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, struct gtt_type_table_entry { int entry_type; + int pt_type; int next_pt_type; int pse_entry_type; }; -#define GTT_TYPE_TABLE_ENTRY(type, e_type, npt_type, pse_type) \ +#define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \ [type] = { \ .entry_type = e_type, \ + .pt_type = cpt_type, \ .next_pt_type = npt_type, \ .pse_entry_type = pse_type, \ } @@ -170,55 +172,68 @@ struct gtt_type_table_entry { static struct gtt_type_table_entry gtt_type_table[] = { GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY, GTT_TYPE_PPGTT_ROOT_L4_ENTRY, + GTT_TYPE_INVALID, GTT_TYPE_PPGTT_PML4_PT, GTT_TYPE_INVALID), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT, GTT_TYPE_PPGTT_PML4_ENTRY, + GTT_TYPE_PPGTT_PML4_PT, GTT_TYPE_PPGTT_PDP_PT, GTT_TYPE_INVALID), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY, GTT_TYPE_PPGTT_PML4_ENTRY, + GTT_TYPE_PPGTT_PML4_PT, GTT_TYPE_PPGTT_PDP_PT, GTT_TYPE_INVALID), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT, GTT_TYPE_PPGTT_PDP_ENTRY, + GTT_TYPE_PPGTT_PDP_PT, GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PTE_1G_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY, GTT_TYPE_PPGTT_ROOT_L3_ENTRY, + GTT_TYPE_INVALID, GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PTE_1G_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY, GTT_TYPE_PPGTT_PDP_ENTRY, + GTT_TYPE_PPGTT_PDP_PT, GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PTE_1G_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PDE_ENTRY, + GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_PPGTT_PTE_2M_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY, GTT_TYPE_PPGTT_PDE_ENTRY, + GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_PPGTT_PTE_2M_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_PPGTT_PTE_4K_ENTRY, + GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_INVALID, GTT_TYPE_INVALID), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, GTT_TYPE_PPGTT_PTE_4K_ENTRY, + GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_INVALID, GTT_TYPE_INVALID), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, GTT_TYPE_PPGTT_PDE_ENTRY, + GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_INVALID, GTT_TYPE_PPGTT_PTE_2M_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY, GTT_TYPE_PPGTT_PDP_ENTRY, + GTT_TYPE_PPGTT_PDP_PT, GTT_TYPE_INVALID, GTT_TYPE_PPGTT_PTE_1G_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE, GTT_TYPE_GGTT_PTE, GTT_TYPE_INVALID, + GTT_TYPE_INVALID, GTT_TYPE_INVALID), }; @@ -227,6 +242,11 @@ static inline int get_next_pt_type(int type) return gtt_type_table[type].next_pt_type; } +static inline int get_pt_type(int type) +{ + return gtt_type_table[type].pt_type; +} + static inline int get_entry_type(int type) { return gtt_type_table[type].entry_type; From 655c64efe36f199bea16f9ba7388c479d5feed5f Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 10 Oct 2017 17:24:26 +0800 Subject: [PATCH 217/260] drm/i915/gvt: Introduce ops->set_present() We need ops->set_present() during generating a new scratch page table entry. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 6 ++++++ drivers/gpu/drm/i915/gvt/gtt.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 74be5e628310..3d6008b116e5 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -397,6 +397,11 @@ static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) e->val64 &= ~BIT(0); } +static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) +{ + e->val64 |= BIT(0); +} + /* * Per-platform GMA routines. */ @@ -426,6 +431,7 @@ static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { .get_entry = gtt_get_entry64, .set_entry = gtt_set_entry64, .clear_present = gtt_entry_clear_present, + .set_present = gtt_entry_set_present, .test_present = gen8_gtt_test_present, .test_pse = gen8_gtt_test_pse, .get_pfn = gen8_gtt_get_pfn, diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index ab590278dcf1..f98c1c19b4cb 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -62,6 +62,7 @@ struct intel_gvt_gtt_pte_ops { struct intel_vgpu *vgpu); bool (*test_present)(struct intel_gvt_gtt_entry *e); void (*clear_present)(struct intel_gvt_gtt_entry *e); + void (*set_present)(struct intel_gvt_gtt_entry *e); bool (*test_pse)(struct intel_gvt_gtt_entry *e); void (*set_pfn)(struct intel_gvt_gtt_entry *e, unsigned long pfn); unsigned long (*get_pfn)(struct intel_gvt_gtt_entry *e); From c1802534e5a6ec089e2b951116adfc14bb6dae64 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 17 Oct 2017 02:00:27 +0800 Subject: [PATCH 218/260] drm/i915/gvt: Refine broken PPGTT scratch Refine previously broken PPGTT scratch. Scratch PTE was no correctly handled and also the handling of scratch entries in page table walk was not well organized, which brings gaps of introducing lazy shadow. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 196 ++++++++++++++++++--------------- drivers/gpu/drm/i915/gvt/gtt.h | 17 +-- 2 files changed, 112 insertions(+), 101 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 3d6008b116e5..6fa9271e23a5 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -841,20 +841,51 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page( return NULL; } +static bool ppgtt_is_scratch_entry(struct intel_vgpu *vgpu, + struct intel_gvt_gtt_entry *e) +{ + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + int pt_type = get_pt_type(e->type); + + if (pt_type == GTT_TYPE_INVALID) + return false; + + if (ops->get_pfn(e) == vgpu->gtt.ppgtt_scratch_page[pt_type].mfn) + return true; + + return false; +} + +static void ppgtt_get_scratch_entry(struct intel_vgpu *vgpu, int type, + struct intel_gvt_gtt_entry *e) +{ + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + struct intel_vgpu_shadow_page *scratch_page; + int pt_type = get_pt_type(type); + + if (WARN_ON(pt_type == GTT_TYPE_INVALID)) + return; + + scratch_page = &vgpu->gtt.ppgtt_scratch_page[pt_type]; + + e->type = get_entry_type(type); + ops->get_entry(scratch_page->vaddr, e, 0, false, 0, vgpu); +} + #define pt_entry_size_shift(spt) \ ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) #define pt_entries(spt) \ (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) -#define for_each_present_guest_entry(spt, e, i) \ +#define for_each_guest_entry(spt, e, i) \ for (i = 0; i < pt_entries(spt); i++) \ - if (!ppgtt_get_guest_entry(spt, e, i) && \ - spt->vgpu->gvt->gtt.pte_ops->test_present(e)) + if (!ppgtt_get_guest_entry(spt, e, i)) #define for_each_present_shadow_entry(spt, e, i) \ for (i = 0; i < pt_entries(spt); i++) \ if (!ppgtt_get_shadow_entry(spt, e, i) && \ + !ppgtt_is_scratch_entry(spt->vgpu, e) && \ spt->vgpu->gvt->gtt.pte_ops->test_present(e)) static void ppgtt_get_shadow_page(struct intel_vgpu_ppgtt_spt *spt) @@ -873,18 +904,13 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, { struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s; - intel_gvt_gtt_type_t cur_pt_type; if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type)))) return -EINVAL; - if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY - && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { - cur_pt_type = get_next_pt_type(e->type) + 1; - if (ops->get_pfn(e) == - vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) - return 0; - } + if (WARN_ON(ppgtt_is_scratch_entry(vgpu, e))) + return -EINVAL; + s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); if (!s) { gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n", @@ -997,6 +1023,7 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) { struct intel_vgpu *vgpu = spt->vgpu; + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s; struct intel_gvt_gtt_entry se, ge; unsigned long i; @@ -1006,22 +1033,34 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) spt->guest_page.track.gfn, spt->shadow_page.type); if (gtt_type_is_pte_pt(spt->shadow_page.type)) { - for_each_present_guest_entry(spt, &ge, i) { - ret = gtt_entry_p2m(vgpu, &ge, &se); - if (ret) - goto fail; + for_each_guest_entry(spt, &ge, i) { + if (ops->test_present(&ge)) { + ret = gtt_entry_p2m(vgpu, &ge, &se); + if (ret) + goto fail; + } else { + ppgtt_get_scratch_entry(vgpu, + spt->shadow_page.type, &se); + } ppgtt_set_shadow_entry(spt, &se, i); } return 0; } - for_each_present_guest_entry(spt, &ge, i) { + for_each_guest_entry(spt, &ge, i) { if (!gtt_type_is_pt(get_next_pt_type(ge.type))) { gvt_vgpu_err("GVT doesn't support pse bit now\n"); ret = -EINVAL; goto fail; } + if (!ops->test_present(&ge)) { + ppgtt_get_scratch_entry(vgpu, spt->shadow_page.type, + &se); + ppgtt_set_shadow_entry(spt, &se, i); + continue; + } + s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); if (IS_ERR(s)) { ret = PTR_ERR(s); @@ -1053,7 +1092,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, if (!ops->test_present(se)) return 0; - if (ops->get_pfn(se) == vgpu->gtt.scratch_pt[sp->type].page_mfn) + if (ppgtt_is_scratch_entry(vgpu, se)) return 0; if (gtt_type_is_pt(get_next_pt_type(se->type))) { @@ -1292,7 +1331,6 @@ static int ppgtt_handle_guest_write_page_table( { struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu *vgpu = spt->vgpu; - int type = spt->shadow_page.type; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_entry se; @@ -1319,7 +1357,7 @@ static int ppgtt_handle_guest_write_page_table( goto fail; if (!new_present) { - ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); + ppgtt_get_scratch_entry(vgpu, spt->shadow_page.type, &se); ppgtt_set_shadow_entry(spt, &se, index); } @@ -1968,106 +2006,85 @@ int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, return ret; } -static int alloc_scratch_pages(struct intel_vgpu *vgpu, +static void ppgtt_destroy_scratch(struct intel_vgpu *vgpu) +{ + struct intel_vgpu_shadow_page *scratch_page; + int i; + + for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { + scratch_page = &vgpu->gtt.ppgtt_scratch_page[i]; + if (scratch_page->page != NULL) { + clean_shadow_page(vgpu, scratch_page); + __free_page(scratch_page->page); + } + } +} + +static int setup_ppgtt_scratch_page(struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type) { + struct intel_gvt *gvt = vgpu->gvt; + struct intel_gvt_device_info *info = &gvt->device_info; + int num_entries = I915_GTT_PAGE_SIZE >> info->gtt_entry_size_shift; struct intel_vgpu_gtt *gtt = &vgpu->gtt; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - int page_entry_num = I915_GTT_PAGE_SIZE >> - vgpu->gvt->device_info.gtt_entry_size_shift; - void *scratch_pt; - int i; - struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - dma_addr_t daddr; + struct intel_vgpu_shadow_page *scratch_page; + struct intel_gvt_gtt_entry e; + intel_gvt_gtt_type_t next_pt_type; + int ret, i; if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX)) return -EINVAL; - scratch_pt = (void *)get_zeroed_page(GFP_KERNEL); - if (!scratch_pt) { + scratch_page = >t->ppgtt_scratch_page[type]; + + scratch_page->page = alloc_page(GFP_KERNEL); + if (!scratch_page) { gvt_vgpu_err("fail to allocate scratch page\n"); return -ENOMEM; } - daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, - 4096, PCI_DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, daddr)) { - gvt_vgpu_err("fail to dmamap scratch_pt\n"); - __free_page(virt_to_page(scratch_pt)); + ret = init_shadow_page(vgpu, scratch_page, type, false); + if (ret) { + gvt_vgpu_err("fail to allocate scratch page\n"); + __free_page(scratch_page->page); return -ENOMEM; } - gtt->scratch_pt[type].page_mfn = - (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); - gtt->scratch_pt[type].page = virt_to_page(scratch_pt); - gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", - vgpu->id, type, gtt->scratch_pt[type].page_mfn); - /* Build the tree by full filled the scratch pt with the entries which - * point to the next level scratch pt or scratch page. The - * scratch_pt[type] indicate the scratch pt/scratch page used by the - * 'type' pt. - * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by - * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self - * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. - */ - if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) { - struct intel_gvt_gtt_entry se; + memset(&e, 0, sizeof(e)); - memset(&se, 0, sizeof(struct intel_gvt_gtt_entry)); - se.type = get_entry_type(type - 1); - ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn); - - /* The entry parameters like present/writeable/cache type - * set to the same as i915's scratch page tree. - */ - se.val64 |= _PAGE_PRESENT | _PAGE_RW; - if (type == GTT_TYPE_PPGTT_PDE_PT) - se.val64 |= PPAT_CACHED; - - for (i = 0; i < page_entry_num; i++) - ops->set_entry(scratch_pt, &se, i, false, 0, vgpu); + if (type == GTT_TYPE_PPGTT_PTE_PT) { + e.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY; + ops->set_pfn(&e, gvt->gtt.scratch_mfn); + } else { + next_pt_type = get_next_pt_type(type); + e.type = get_entry_type(type); + ops->set_pfn(&e, gtt->ppgtt_scratch_page[next_pt_type].mfn); } + ops->set_present(&e); + + for (i = 0; i < num_entries; i++) + ops->set_entry(scratch_page->vaddr, &e, i, false, 0, vgpu); + return 0; } -static int release_scratch_page_tree(struct intel_vgpu *vgpu) -{ - int i; - struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - dma_addr_t daddr; - - for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { - if (vgpu->gtt.scratch_pt[i].page != NULL) { - daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn << - I915_GTT_PAGE_SHIFT); - dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); - __free_page(vgpu->gtt.scratch_pt[i].page); - vgpu->gtt.scratch_pt[i].page = NULL; - vgpu->gtt.scratch_pt[i].page_mfn = 0; - } - } - - return 0; -} - -static int create_scratch_page_tree(struct intel_vgpu *vgpu) +static int ppgtt_create_scratch(struct intel_vgpu *vgpu) { int i, ret; for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { - ret = alloc_scratch_pages(vgpu, i); + ret = setup_ppgtt_scratch_page(vgpu, i); if (ret) goto err; } - return 0; - err: - release_scratch_page_tree(vgpu); + ppgtt_destroy_scratch(vgpu); return ret; } - + /** * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization * @vgpu: a vGPU @@ -2100,8 +2117,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) } gtt->ggtt_mm = ggtt_mm; - - return create_scratch_page_tree(vgpu); + return ppgtt_create_scratch(vgpu); } static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type) @@ -2133,7 +2149,7 @@ static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type) void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) { ppgtt_free_all_shadow_page(vgpu); - release_scratch_page_tree(vgpu); + ppgtt_destroy_scratch(vgpu); intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_GGTT); diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index f98c1c19b4cb..416b2f868cf0 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -188,9 +188,12 @@ extern void intel_vgpu_destroy_mm(struct kref *mm_ref); struct intel_vgpu_guest_page; -struct intel_vgpu_scratch_pt { +struct intel_vgpu_shadow_page { + void *vaddr; struct page *page; - unsigned long page_mfn; + int type; + struct hlist_node node; + unsigned long mfn; }; struct intel_vgpu_gtt { @@ -202,7 +205,7 @@ struct intel_vgpu_gtt { atomic_t n_tracked_guest_page; struct list_head oos_page_list_head; struct list_head post_shadow_list_head; - struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX]; + struct intel_vgpu_shadow_page ppgtt_scratch_page[GTT_TYPE_MAX]; }; extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); @@ -218,14 +221,6 @@ extern struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu, struct intel_vgpu_oos_page; -struct intel_vgpu_shadow_page { - void *vaddr; - struct page *page; - int type; - struct hlist_node node; - unsigned long mfn; -}; - struct intel_vgpu_page_track { struct hlist_node node; bool tracked; From a2ae95af9646316aaf86e2d18f46de1a5f746f1a Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Fri, 20 Oct 2017 15:16:46 +0800 Subject: [PATCH 219/260] drm/i915/gvt: update CSB and CSB write pointer in virtual HWSP The engine provides a mirror of the CSB and CSB write pointer in the HWSP. Read these status from virtual HWSP in VM can reduce CPU utilization while applications have much more short GPU workloads. Here we update the corresponding data in virtual HWSP as it in virtual MMIO. Before read these status from HWSP in GVT-g VM, please ensure the host support it by checking the BIT(3) of caps in PVINFO. Virtual HWSP only support GEN8+ platform, since the HWSP MMIO may change follow the platform update, please add the corresponding MMIO emulation when enable new platforms in GVT-g. v3 : Add address audit in HWSP address update. v4 : Separate this patch with enalbe virtual HWSP in VM. Use intel_gvt_render_mmio_to_ring_id() to determine ring_id by offset. v5 : Remove unnessary check about Gen8, GVT-g only support Gen8+. Signed-off-by: Weinan Li Cc: Zhenyu Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 16 +++++++++++++++ drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/handlers.c | 30 ++++++++++++++++++++++++++++- drivers/gpu/drm/i915/gvt/vgpu.c | 3 +++ 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 5c966edfeefb..c9fa0fb488d3 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -133,6 +133,8 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist, struct execlist_context_status_pointer_format ctx_status_ptr; u32 write_pointer; u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset; + unsigned long hwsp_gpa; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id, _EL_OFFSET_STATUS_PTR); @@ -158,6 +160,20 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist, ctx_status_ptr.write_ptr = write_pointer; vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; + /* Update the CSB and CSB write pointer in HWSP */ + hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, + vgpu->hws_pga[ring_id]); + if (hwsp_gpa != INTEL_GVT_INVALID_ADDR) { + intel_gvt_hypervisor_write_gpa(vgpu, + hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 + + write_pointer * 8, + status, 8); + intel_gvt_hypervisor_write_gpa(vgpu, + hwsp_gpa + + intel_hws_csb_write_index(dev_priv) * 4, + &write_pointer, 4); + } + gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n", vgpu->id, write_pointer, offset, status->ldw, status->udw); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index f08d194f639f..27e8186cbc81 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -189,6 +189,7 @@ struct intel_vgpu { struct intel_vgpu_opregion opregion; struct intel_vgpu_display display; struct intel_vgpu_submission submission; + u32 hws_pga[I915_NUM_ENGINES]; struct dentry *debugfs; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index bb16ef5c8510..56ee588377f4 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1380,6 +1380,34 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, return intel_vgpu_default_mmio_write(vgpu, offset, &value, bytes); } +static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset, + void *p_data, unsigned int bytes) +{ + u32 value = *(u32 *)p_data; + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); + + if (!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) { + gvt_vgpu_err("VM(%d) write invalid HWSP address, reg:0x%x, value:0x%x\n", + vgpu->id, offset, value); + return -EINVAL; + } + /* + * Need to emulate all the HWSP register write to ensure host can + * update the VM CSB status correctly. Here listed registers can + * support BDW, SKL or other platforms with same HWSP registers. + */ + if (unlikely(ring_id < 0 || ring_id > I915_NUM_ENGINES)) { + gvt_vgpu_err("VM(%d) access unknown hardware status page register:0x%x\n", + vgpu->id, offset); + return -EINVAL; + } + vgpu->hws_pga[ring_id] = value; + gvt_dbg_mmio("VM(%d) write: 0x%x to HWSP: 0x%x\n", + vgpu->id, value, offset); + + return intel_vgpu_default_mmio_write(vgpu, offset, &value, bytes); +} + static int skl_power_well_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -2535,7 +2563,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG - MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL); + MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, hws_pga_write); MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 99dbefadfe91..c6b82d1ba7de 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -43,7 +43,10 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) vgpu_vreg(vgpu, vgtif_reg(version_minor)) = 0; vgpu_vreg(vgpu, vgtif_reg(display_ready)) = 0; vgpu_vreg(vgpu, vgtif_reg(vgt_id)) = vgpu->id; + vgpu_vreg(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT; + vgpu_vreg(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION; + vgpu_vreg(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) = vgpu_aperture_gmadr_base(vgpu); vgpu_vreg(vgpu, vgtif_reg(avail_rs.mappable_gmadr.size)) = From 4023f301d28fb18aac225fef59592a44b9fd42b3 Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Fri, 20 Oct 2017 21:52:29 +0800 Subject: [PATCH 220/260] drm/i915/gvt: opregion virtualization for win guest this is an enhanced opregion emulation for win guest support by initializing more data members including opregion header size, version and child device propertity for display port. for simplicity, redefined child_device_config structure. Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/opregion.c | 82 +++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index db8abac5faad..2533d1ef1c56 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -63,6 +63,60 @@ struct bdb_data_header { u16 size; /* data size */ } __packed; +struct efp_child_device_config { + u16 handle; + u16 device_type; + u16 device_class; + u8 i2c_speed; + u8 dp_onboard_redriver; /* 158 */ + u8 dp_ondock_redriver; /* 158 */ + u8 hdmi_level_shifter_value:4; /* 169 */ + u8 hdmi_max_data_rate:4; /* 204 */ + u16 dtd_buf_ptr; /* 161 */ + u8 edidless_efp:1; /* 161 */ + u8 compression_enable:1; /* 198 */ + u8 compression_method:1; /* 198 */ + u8 ganged_edp:1; /* 202 */ + u8 skip0:4; + u8 compression_structure_index:4; /* 198 */ + u8 skip1:4; + u8 slave_port; /* 202 */ + u8 skip2; + u8 dvo_port; + u8 i2c_pin; /* for add-in card */ + u8 slave_addr; /* for add-in card */ + u8 ddc_pin; + u16 edid_ptr; + u8 dvo_config; + u8 efp_docked_port:1; /* 158 */ + u8 lane_reversal:1; /* 184 */ + u8 onboard_lspcon:1; /* 192 */ + u8 iboost_enable:1; /* 196 */ + u8 hpd_invert:1; /* BXT 196 */ + u8 slip3:3; + u8 hdmi_compat:1; + u8 dp_compat:1; + u8 tmds_compat:1; + u8 skip4:5; + u8 aux_channel; + u8 dongle_detect; + u8 pipe_cap:2; + u8 sdvo_stall:1; /* 158 */ + u8 hpd_status:2; + u8 integrated_encoder:1; + u8 skip5:2; + u8 dvo_wiring; + u8 mipi_bridge_type; /* 171 */ + u16 device_class_ext; + u8 dvo_function; + u8 dp_usb_type_c:1; /* 195 */ + u8 skip6:7; + u8 dp_usb_type_c_2x_gpio_index; /* 195 */ + u16 dp_usb_type_c_2x_gpio_pin; /* 195 */ + u8 iboost_dp:4; /* 196 */ + u8 iboost_hdmi:4; /* 196 */ +} __packed; + struct vbt { /* header->bdb_offset point to bdb_header offset */ struct vbt_header header; @@ -73,10 +127,11 @@ struct vbt { struct bdb_data_header general_definitions_header; struct bdb_general_definitions general_definitions; - struct child_device_config child0; - struct child_device_config child1; - struct child_device_config child2; - struct child_device_config child3; + + struct efp_child_device_config child0; + struct efp_child_device_config child1; + struct efp_child_device_config child2; + struct efp_child_device_config child3; struct bdb_data_header driver_features_header; struct bdb_driver_features driver_features; @@ -100,7 +155,7 @@ static void virt_vbt_generation(struct vbt *v) v->header.bdb_offset = offsetof(struct vbt, bdb_header); strcpy(&v->bdb_header.signature[0], "BIOS_DATA_BLOCK"); - v->bdb_header.version = 198; /* child_dev_size = 38 */ + v->bdb_header.version = 186; /* child_dev_size = 38 */ v->bdb_header.header_size = sizeof(v->bdb_header); v->bdb_header.bdb_size = sizeof(struct vbt) - sizeof(struct vbt_header) @@ -125,24 +180,32 @@ static void virt_vbt_generation(struct vbt *v) v->child0.device_type = DEVICE_TYPE_DP; v->child0.dvo_port = DVO_PORT_DPA; v->child0.aux_channel = DP_AUX_A; + v->child0.dp_compat = true; + v->child0.integrated_encoder = true; /* portB */ v->child1.handle = DEVICE_TYPE_EFP2; v->child1.device_type = DEVICE_TYPE_DP; v->child1.dvo_port = DVO_PORT_DPB; v->child1.aux_channel = DP_AUX_B; + v->child1.dp_compat = true; + v->child1.integrated_encoder = true; /* portC */ v->child2.handle = DEVICE_TYPE_EFP3; v->child2.device_type = DEVICE_TYPE_DP; v->child2.dvo_port = DVO_PORT_DPC; v->child2.aux_channel = DP_AUX_C; + v->child2.dp_compat = true; + v->child2.integrated_encoder = true; /* portD */ v->child3.handle = DEVICE_TYPE_EFP4; v->child3.device_type = DEVICE_TYPE_DP; v->child3.dvo_port = DVO_PORT_DPD; v->child3.aux_channel = DP_AUX_D; + v->child3.dp_compat = true; + v->child3.integrated_encoder = true; /* driver features */ v->driver_features_header.id = BDB_DRIVER_FEATURES; @@ -278,22 +341,23 @@ int intel_gvt_init_opregion(struct intel_gvt *gvt) } /* emulated opregion with VBT mailbox only */ - header = (struct opregion_header *)gvt->opregion.opregion_va; + buf = (u8 *)gvt->opregion.opregion_va; + header = (struct opregion_header *)buf; memcpy(header->signature, OPREGION_SIGNATURE, sizeof(OPREGION_SIGNATURE)); + header->size = 0x8; + header->opregion_ver = 0x02000000; header->mboxes = MBOX_VBT; /* for unknown reason, the value in LID field is incorrect * which block the windows guest, so workaround it by force * setting it to "OPEN" */ - buf = (u8 *)gvt->opregion.opregion_va; buf[INTEL_GVT_OPREGION_CLID] = 0x3; /* emulated vbt from virt vbt generation */ virt_vbt_generation(&v); - memcpy(gvt->opregion.opregion_va + INTEL_GVT_OPREGION_VBT_OFFSET, - &v, sizeof(struct vbt)); + memcpy(buf + INTEL_GVT_OPREGION_VBT_OFFSET, &v, sizeof(struct vbt)); return 0; } From 7cb16018f5b4f04bc58a8752bfd11067bafeb552 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 23 Oct 2017 11:46:43 +0800 Subject: [PATCH 221/260] drm/i915/gvt: Add mmio iterator intel_gvt_for_each_tracked_mmio() This patch add a function intel_gvt_for_each_tracked_mmio() to iterate each tracked mmio. The caller don't be aware of how the tracked mmios are presented internally. v2: remove snapshot_hw_mmio_registers(). Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/firmware.c | 26 ++++++++++------------ drivers/gpu/drm/i915/gvt/handlers.c | 34 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/mmio.h | 4 ++++ 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index a26c1705430e..a73e1d418c22 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -66,20 +66,23 @@ static struct bin_attribute firmware_attr = { .mmap = NULL, }; -static int expose_firmware_sysfs(struct intel_gvt *gvt) +static int mmio_snapshot_handler(struct intel_gvt *gvt, u32 offset, void *data) { struct drm_i915_private *dev_priv = gvt->dev_priv; + + *(u32 *)(data + offset) = I915_READ_NOTRACE(_MMIO(offset)); + return 0; +} + +static int expose_firmware_sysfs(struct intel_gvt *gvt) +{ struct intel_gvt_device_info *info = &gvt->device_info; struct pci_dev *pdev = gvt->dev_priv->drm.pdev; - struct intel_gvt_mmio_info *e; - struct gvt_mmio_block *block = gvt->mmio.mmio_block; - int num = gvt->mmio.num_mmio_block; struct gvt_firmware_header *h; void *firmware; void *p; unsigned long size, crc32_start; - int i, j; - int ret; + int i, ret; size = sizeof(*h) + info->mmio_size + info->cfg_space_size; firmware = vzalloc(size); @@ -104,15 +107,8 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) p = firmware + h->mmio_offset; - hash_for_each(gvt->mmio.mmio_info_table, i, e, node) - *(u32 *)(p + e->offset) = I915_READ_NOTRACE(_MMIO(e->offset)); - - for (i = 0; i < num; i++, block++) { - for (j = 0; j < block->size; j += 4) - *(u32 *)(p + INTEL_GVT_MMIO_OFFSET(block->offset) + j) = - I915_READ_NOTRACE(_MMIO(INTEL_GVT_MMIO_OFFSET( - block->offset) + j)); - } + /* Take a snapshot of hw mmio registers. */ + intel_gvt_for_each_tracked_mmio(gvt, mmio_snapshot_handler, p); memcpy(gvt->firmware.mmio, p, info->mmio_size); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 56ee588377f4..4f8d470dbb3f 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2971,6 +2971,40 @@ err: return ret; } +/** + * intel_gvt_for_each_tracked_mmio - iterate each tracked mmio + * @gvt: a GVT device + * @handler: the handler + * @data: private data given to handler + * + * Returns: + * Zero on success, negative error code if failed. + */ +int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt, + int (*handler)(struct intel_gvt *gvt, u32 offset, void *data), + void *data) +{ + struct gvt_mmio_block *block = gvt->mmio.mmio_block; + struct intel_gvt_mmio_info *e; + int i, j, ret; + + hash_for_each(gvt->mmio.mmio_info_table, i, e, node) { + ret = handler(gvt, e->offset, data); + if (ret) + return ret; + } + + for (i = 0; i < gvt->mmio.num_mmio_block; i++, block++) { + for (j = 0; j < block->size; j += 4) { + ret = handler(gvt, + INTEL_GVT_MMIO_OFFSET(block->offset) + j, + data); + if (ret) + return ret; + } + } + return 0; +} /** * intel_vgpu_default_mmio_read - default MMIO read handler diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index dbc04ad2c7a1..62709ac351cd 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -72,6 +72,10 @@ bool intel_gvt_match_device(struct intel_gvt *gvt, unsigned long device); int intel_gvt_setup_mmio_info(struct intel_gvt *gvt); void intel_gvt_clean_mmio_info(struct intel_gvt *gvt); +int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt, + int (*handler)(struct intel_gvt *gvt, u32 offset, void *data), + void *data); + #define INTEL_GVT_MMIO_OFFSET(reg) ({ \ typeof(reg) __reg = reg; \ From cea9083e35b2f771624a18126b10ef1fd7806040 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 23 Oct 2017 11:46:44 +0800 Subject: [PATCH 222/260] drm/i915/gvt: Add new debugfs tool mmio_diff This new debugfs entry is used to figure out which registers of vGPU is different to host. It is a useful tool for new platform enabling and debugging. When read this entry, all the diff mmio are recognized and sorted by mmio offset. Besides, the bit positions of different value are listed in 'Diff' column. Here is a show: $ sudo cat ./mmio_diff Offset HW vGPU Diff 00002030 000025f8 00000000 3-8,10,13 00002034 012025f8 00000000 3-8,10,13,21,24 00002038 027fb000 00000000 12-13,15-22,25 0000203c 00003000 00000000 12-13 00002054 0000000a 00000040 1,3,6 00002074 012025f8 00000000 3-8,10,13,21,24 00002080 fffe6000 00000000 13-14,17-31 000020a8 fffffeff ffffffff 8 000020d4 00000004 00000000 2 .... 00145974 eb42718c 010c11b0 2-5,13-14,17-19,22,25,27,29-31 00145978 0000002f 0000002a 0,2 0014597c 0000002f 0000002a 0,2 00145980 0000002b 00000028 0-1 00145984 a5a87c9e b27d20c0 1-4,6,10-12,14,16,18,20,22-26,28 001459c0 88390000 883c0000 16,18 00146200 88350000 883a0000 16-19 Total: 72432, Diff: 901 v3: fix a typo. v2: add mmio_hw_access_pre/post(). Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/debugfs.c | 115 +++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index 02acd5842605..32a66dfdf112 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -21,9 +21,119 @@ * SOFTWARE. */ #include +#include #include "i915_drv.h" #include "gvt.h" +struct mmio_diff_param { + struct intel_vgpu *vgpu; + int total; + int diff; + struct list_head diff_mmio_list; +}; + +struct diff_mmio { + struct list_head node; + u32 offset; + u32 preg; + u32 vreg; +}; + +/* Compare two diff_mmio items. */ +static int mmio_offset_compare(void *priv, + struct list_head *a, struct list_head *b) +{ + struct diff_mmio *ma; + struct diff_mmio *mb; + + ma = container_of(a, struct diff_mmio, node); + mb = container_of(b, struct diff_mmio, node); + if (ma->offset < mb->offset) + return -1; + else if (ma->offset > mb->offset) + return 1; + return 0; +} + +static inline int mmio_diff_handler(struct intel_gvt *gvt, + u32 offset, void *data) +{ + struct drm_i915_private *dev_priv = gvt->dev_priv; + struct mmio_diff_param *param = data; + struct diff_mmio *node; + u32 preg, vreg; + + preg = I915_READ_NOTRACE(_MMIO(offset)); + vreg = vgpu_vreg(param->vgpu, offset); + + if (preg != vreg) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + node->offset = offset; + node->preg = preg; + node->vreg = vreg; + list_add(&node->node, ¶m->diff_mmio_list); + param->diff++; + } + param->total++; + return 0; +} + +/* Show the all the different values of tracked mmio. */ +static int vgpu_mmio_diff_show(struct seq_file *s, void *unused) +{ + struct intel_vgpu *vgpu = s->private; + struct intel_gvt *gvt = vgpu->gvt; + struct mmio_diff_param param = { + .vgpu = vgpu, + .total = 0, + .diff = 0, + }; + struct diff_mmio *node, *next; + + INIT_LIST_HEAD(¶m.diff_mmio_list); + + mutex_lock(&gvt->lock); + spin_lock_bh(&gvt->scheduler.mmio_context_lock); + + mmio_hw_access_pre(gvt->dev_priv); + /* Recognize all the diff mmios to list. */ + intel_gvt_for_each_tracked_mmio(gvt, mmio_diff_handler, ¶m); + mmio_hw_access_post(gvt->dev_priv); + + spin_unlock_bh(&gvt->scheduler.mmio_context_lock); + mutex_unlock(&gvt->lock); + + /* In an ascending order by mmio offset. */ + list_sort(NULL, ¶m.diff_mmio_list, mmio_offset_compare); + + seq_printf(s, "%-8s %-8s %-8s %-8s\n", "Offset", "HW", "vGPU", "Diff"); + list_for_each_entry_safe(node, next, ¶m.diff_mmio_list, node) { + u32 diff = node->preg ^ node->vreg; + + seq_printf(s, "%08x %08x %08x %*pbl\n", + node->offset, node->preg, node->vreg, + 32, &diff); + list_del(&node->node); + kfree(node); + } + seq_printf(s, "Total: %d, Diff: %d\n", param.total, param.diff); + return 0; +} + +static int vgpu_mmio_diff_open(struct inode *inode, struct file *file) +{ + return single_open(file, vgpu_mmio_diff_show, inode->i_private); +} + +static const struct file_operations vgpu_mmio_diff_fops = { + .open = vgpu_mmio_diff_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; /** * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU @@ -47,6 +157,11 @@ int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) if (!ent) return -ENOMEM; + ent = debugfs_create_file("mmio_diff", 0444, vgpu->debugfs, + vgpu, &vgpu_mmio_diff_fops); + if (!ent) + return -ENOMEM; + return 0; } From 0bde438b7e2200fc961625ac7b828cbc8c6d78e1 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 26 Oct 2017 15:29:13 +0800 Subject: [PATCH 223/260] MAINTAINERS: Update gvt-linux.git new repo place gvt-linux.git repo is moved for its new place under https://github.com/intel/gvt-linux.git Old https://github.com/01org/gvt-linux.git is set only for redirect now. Signed-off-by: Zhenyu Wang --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6e1f94b4ed26..9e6cf2209e81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6928,7 +6928,7 @@ M: Zhi Wang L: intel-gvt-dev@lists.freedesktop.org L: intel-gfx@lists.freedesktop.org W: https://01.org/igvt-g -T: git https://github.com/01org/gvt-linux.git +T: git https://github.com/intel/gvt-linux.git S: Supported F: drivers/gpu/drm/i915/gvt/ From ffe2a503b06cc300e25c1dfc00c85a2240cf97d7 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 30 Oct 2017 14:19:15 +0800 Subject: [PATCH 224/260] drm/i915/gvt: Reduce rcs mocs switch latency Use I915_WRITE_FW instead of I915_WRITE to reduce overhead. The overall mmio switch latency lowers from ~600us to ~180us. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index e16c3551b4a3..0672178548ef 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -209,7 +209,7 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id) offset.reg = regs[ring_id]; for (i = 0; i < 64; i++) { gen9_render_mocs[ring_id][i] = I915_READ_FW(offset); - I915_WRITE(offset, vgpu_vreg(vgpu, offset)); + I915_WRITE_FW(offset, vgpu_vreg(vgpu, offset)); offset.reg += 4; } From e4aeba697943692652246a2d0a040f0859d70e86 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 2 Nov 2017 13:33:23 +0800 Subject: [PATCH 225/260] drm/i915/gvt: Don't dump partial state in cmd parser I have seen the cmd parser dump partial odd info. Stop that and only dump the full verbose info when debug enabled. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 8c8514ee73ac..18c45734c7a2 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -709,18 +709,13 @@ static void parser_exec_state_dump(struct parser_exec_state *s) print_opcode(cmd_val(s, 0), s->ring_id); - /* print the whole page to trace */ - pr_err(" ip_va=%p: %08x %08x %08x %08x\n", - s->ip_va, cmd_val(s, 0), cmd_val(s, 1), - cmd_val(s, 2), cmd_val(s, 3)); - s->ip_va = (u32 *)((((u64)s->ip_va) >> 12) << 12); while (cnt < 1024) { - pr_err("ip_va=%p: ", s->ip_va); + gvt_dbg_cmd("ip_va=%p: ", s->ip_va); for (i = 0; i < 8; i++) - pr_err("%08x ", cmd_val(s, i)); - pr_err("\n"); + gvt_dbg_cmd("%08x ", cmd_val(s, i)); + gvt_dbg_cmd("\n"); s->ip_va += 8 * sizeof(u32); cnt += 8; From c982c45db6a78e1c23a416785fe920542ae1f33e Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 2 Nov 2017 13:33:32 +0800 Subject: [PATCH 226/260] drm/i915/gvt: Make gvt_vgpu_err use pr_err gvt_vgpu_err means something goes wrong. We need the error propagates to kernel message by default. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/debug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h index d98d5a0b928c..c6027125c1ec 100644 --- a/drivers/gpu/drm/i915/gvt/debug.h +++ b/drivers/gpu/drm/i915/gvt/debug.h @@ -30,9 +30,9 @@ #define gvt_vgpu_err(fmt, args...) \ do { \ if (IS_ERR_OR_NULL(vgpu)) \ - pr_debug("gvt: "fmt, ##args); \ + pr_err("gvt: "fmt, ##args); \ else \ - pr_debug("gvt: vgpu %d: "fmt, vgpu->id, ##args);\ + pr_err("gvt: vgpu %d: "fmt, vgpu->id, ##args);\ } while (0) #define gvt_dbg_core(fmt, args...) \ From c4270d122ccff963a021d1beb893d6192336af96 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 2 Nov 2017 13:33:42 +0800 Subject: [PATCH 227/260] drm/i915/gvt: Emulate PCI expansion ROM base address register Our vGPU doesn't have a device ROM, we need follow the PCI spec to report this info to drivers. Otherwise, we would see below errors. Inspecting possible rom at 0xfe049000 (vd=8086:1912 bdf=00:10.0) qemu-system-x86_64: vfio-pci: Cannot read device rom at 00000000-0000-0000-0000-000000000001 Device option ROM contents are probably invalid (check dmesg). Skip option ROM probe with rombar=0, or load from file with romfile=No option rom signature (got 4860) I will also send a improvement patch to PCI subsystem related to PCI ROM. But no idea to omit below error, since no pattern to detect vbios shadow without touch its content. 0000:00:10.0: Invalid PCI ROM header signature: expecting 0xaa55, got 0x0000 Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cfg_space.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index ab19545d59a1..4ce2e6bd0680 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -208,6 +208,20 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu, return 0; } +static int emulate_pci_rom_bar_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 *pval = (u32 *)(vgpu_cfg_space(vgpu) + offset); + u32 new = *(u32 *)(p_data); + + if ((new & PCI_ROM_ADDRESS_MASK) == PCI_ROM_ADDRESS_MASK) + /* We don't have rom, return size of 0. */ + *pval = 0; + else + vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes); + return 0; +} + static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -300,6 +314,11 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, } switch (rounddown(offset, 4)) { + case PCI_ROM_ADDRESS: + if (WARN_ON(!IS_ALIGNED(offset, 4))) + return -EINVAL; + return emulate_pci_rom_bar_write(vgpu, offset, p_data, bytes); + case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5: if (WARN_ON(!IS_ALIGNED(offset, 4))) return -EINVAL; @@ -375,6 +394,8 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, pci_resource_len(gvt->dev_priv->drm.pdev, 0); vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size = pci_resource_len(gvt->dev_priv->drm.pdev, 2); + + memset(vgpu_cfg_space(vgpu) + PCI_ROM_ADDRESS, 0, 4); } /** From 5c35258de6cfc0829803fe9f9869253a4e600925 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 2 Nov 2017 17:44:52 +0800 Subject: [PATCH 228/260] Revert "drm/i915/gvt: Refine broken PPGTT scratch" This reverts commit b20d09886fd1b74cd2255d846029a049e524db14. This caused windows driver boot errors for invalid page address. Revert for now. Signed-off-by: Zhenyu Wang Cc: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 196 +++++++++++++++------------------ drivers/gpu/drm/i915/gvt/gtt.h | 17 ++- 2 files changed, 101 insertions(+), 112 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 6fa9271e23a5..3d6008b116e5 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -841,51 +841,20 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page( return NULL; } -static bool ppgtt_is_scratch_entry(struct intel_vgpu *vgpu, - struct intel_gvt_gtt_entry *e) -{ - struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - int pt_type = get_pt_type(e->type); - - if (pt_type == GTT_TYPE_INVALID) - return false; - - if (ops->get_pfn(e) == vgpu->gtt.ppgtt_scratch_page[pt_type].mfn) - return true; - - return false; -} - -static void ppgtt_get_scratch_entry(struct intel_vgpu *vgpu, int type, - struct intel_gvt_gtt_entry *e) -{ - struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - struct intel_vgpu_shadow_page *scratch_page; - int pt_type = get_pt_type(type); - - if (WARN_ON(pt_type == GTT_TYPE_INVALID)) - return; - - scratch_page = &vgpu->gtt.ppgtt_scratch_page[pt_type]; - - e->type = get_entry_type(type); - ops->get_entry(scratch_page->vaddr, e, 0, false, 0, vgpu); -} - #define pt_entry_size_shift(spt) \ ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) #define pt_entries(spt) \ (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) -#define for_each_guest_entry(spt, e, i) \ +#define for_each_present_guest_entry(spt, e, i) \ for (i = 0; i < pt_entries(spt); i++) \ - if (!ppgtt_get_guest_entry(spt, e, i)) + if (!ppgtt_get_guest_entry(spt, e, i) && \ + spt->vgpu->gvt->gtt.pte_ops->test_present(e)) #define for_each_present_shadow_entry(spt, e, i) \ for (i = 0; i < pt_entries(spt); i++) \ if (!ppgtt_get_shadow_entry(spt, e, i) && \ - !ppgtt_is_scratch_entry(spt->vgpu, e) && \ spt->vgpu->gvt->gtt.pte_ops->test_present(e)) static void ppgtt_get_shadow_page(struct intel_vgpu_ppgtt_spt *spt) @@ -904,13 +873,18 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, { struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s; + intel_gvt_gtt_type_t cur_pt_type; if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type)))) return -EINVAL; - if (WARN_ON(ppgtt_is_scratch_entry(vgpu, e))) - return -EINVAL; - + if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY + && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { + cur_pt_type = get_next_pt_type(e->type) + 1; + if (ops->get_pfn(e) == + vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) + return 0; + } s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); if (!s) { gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n", @@ -1023,7 +997,6 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) { struct intel_vgpu *vgpu = spt->vgpu; - struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s; struct intel_gvt_gtt_entry se, ge; unsigned long i; @@ -1033,34 +1006,22 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) spt->guest_page.track.gfn, spt->shadow_page.type); if (gtt_type_is_pte_pt(spt->shadow_page.type)) { - for_each_guest_entry(spt, &ge, i) { - if (ops->test_present(&ge)) { - ret = gtt_entry_p2m(vgpu, &ge, &se); - if (ret) - goto fail; - } else { - ppgtt_get_scratch_entry(vgpu, - spt->shadow_page.type, &se); - } + for_each_present_guest_entry(spt, &ge, i) { + ret = gtt_entry_p2m(vgpu, &ge, &se); + if (ret) + goto fail; ppgtt_set_shadow_entry(spt, &se, i); } return 0; } - for_each_guest_entry(spt, &ge, i) { + for_each_present_guest_entry(spt, &ge, i) { if (!gtt_type_is_pt(get_next_pt_type(ge.type))) { gvt_vgpu_err("GVT doesn't support pse bit now\n"); ret = -EINVAL; goto fail; } - if (!ops->test_present(&ge)) { - ppgtt_get_scratch_entry(vgpu, spt->shadow_page.type, - &se); - ppgtt_set_shadow_entry(spt, &se, i); - continue; - } - s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); if (IS_ERR(s)) { ret = PTR_ERR(s); @@ -1092,7 +1053,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, if (!ops->test_present(se)) return 0; - if (ppgtt_is_scratch_entry(vgpu, se)) + if (ops->get_pfn(se) == vgpu->gtt.scratch_pt[sp->type].page_mfn) return 0; if (gtt_type_is_pt(get_next_pt_type(se->type))) { @@ -1331,6 +1292,7 @@ static int ppgtt_handle_guest_write_page_table( { struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu *vgpu = spt->vgpu; + int type = spt->shadow_page.type; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_entry se; @@ -1357,7 +1319,7 @@ static int ppgtt_handle_guest_write_page_table( goto fail; if (!new_present) { - ppgtt_get_scratch_entry(vgpu, spt->shadow_page.type, &se); + ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); ppgtt_set_shadow_entry(spt, &se, index); } @@ -2006,85 +1968,106 @@ int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, return ret; } -static void ppgtt_destroy_scratch(struct intel_vgpu *vgpu) -{ - struct intel_vgpu_shadow_page *scratch_page; - int i; - - for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { - scratch_page = &vgpu->gtt.ppgtt_scratch_page[i]; - if (scratch_page->page != NULL) { - clean_shadow_page(vgpu, scratch_page); - __free_page(scratch_page->page); - } - } -} - -static int setup_ppgtt_scratch_page(struct intel_vgpu *vgpu, +static int alloc_scratch_pages(struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type) { - struct intel_gvt *gvt = vgpu->gvt; - struct intel_gvt_device_info *info = &gvt->device_info; - int num_entries = I915_GTT_PAGE_SIZE >> info->gtt_entry_size_shift; struct intel_vgpu_gtt *gtt = &vgpu->gtt; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - struct intel_vgpu_shadow_page *scratch_page; - struct intel_gvt_gtt_entry e; - intel_gvt_gtt_type_t next_pt_type; - int ret, i; + int page_entry_num = I915_GTT_PAGE_SIZE >> + vgpu->gvt->device_info.gtt_entry_size_shift; + void *scratch_pt; + int i; + struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; + dma_addr_t daddr; if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX)) return -EINVAL; - scratch_page = >t->ppgtt_scratch_page[type]; - - scratch_page->page = alloc_page(GFP_KERNEL); - if (!scratch_page) { + scratch_pt = (void *)get_zeroed_page(GFP_KERNEL); + if (!scratch_pt) { gvt_vgpu_err("fail to allocate scratch page\n"); return -ENOMEM; } - ret = init_shadow_page(vgpu, scratch_page, type, false); - if (ret) { - gvt_vgpu_err("fail to allocate scratch page\n"); - __free_page(scratch_page->page); + daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, + 4096, PCI_DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, daddr)) { + gvt_vgpu_err("fail to dmamap scratch_pt\n"); + __free_page(virt_to_page(scratch_pt)); return -ENOMEM; } + gtt->scratch_pt[type].page_mfn = + (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); + gtt->scratch_pt[type].page = virt_to_page(scratch_pt); + gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", + vgpu->id, type, gtt->scratch_pt[type].page_mfn); - memset(&e, 0, sizeof(e)); + /* Build the tree by full filled the scratch pt with the entries which + * point to the next level scratch pt or scratch page. The + * scratch_pt[type] indicate the scratch pt/scratch page used by the + * 'type' pt. + * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by + * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self + * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. + */ + if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) { + struct intel_gvt_gtt_entry se; - if (type == GTT_TYPE_PPGTT_PTE_PT) { - e.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY; - ops->set_pfn(&e, gvt->gtt.scratch_mfn); - } else { - next_pt_type = get_next_pt_type(type); - e.type = get_entry_type(type); - ops->set_pfn(&e, gtt->ppgtt_scratch_page[next_pt_type].mfn); + memset(&se, 0, sizeof(struct intel_gvt_gtt_entry)); + se.type = get_entry_type(type - 1); + ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn); + + /* The entry parameters like present/writeable/cache type + * set to the same as i915's scratch page tree. + */ + se.val64 |= _PAGE_PRESENT | _PAGE_RW; + if (type == GTT_TYPE_PPGTT_PDE_PT) + se.val64 |= PPAT_CACHED; + + for (i = 0; i < page_entry_num; i++) + ops->set_entry(scratch_pt, &se, i, false, 0, vgpu); } - ops->set_present(&e); - - for (i = 0; i < num_entries; i++) - ops->set_entry(scratch_page->vaddr, &e, i, false, 0, vgpu); - return 0; } -static int ppgtt_create_scratch(struct intel_vgpu *vgpu) +static int release_scratch_page_tree(struct intel_vgpu *vgpu) +{ + int i; + struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; + dma_addr_t daddr; + + for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { + if (vgpu->gtt.scratch_pt[i].page != NULL) { + daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn << + I915_GTT_PAGE_SHIFT); + dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); + __free_page(vgpu->gtt.scratch_pt[i].page); + vgpu->gtt.scratch_pt[i].page = NULL; + vgpu->gtt.scratch_pt[i].page_mfn = 0; + } + } + + return 0; +} + +static int create_scratch_page_tree(struct intel_vgpu *vgpu) { int i, ret; for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { - ret = setup_ppgtt_scratch_page(vgpu, i); + ret = alloc_scratch_pages(vgpu, i); if (ret) goto err; } + return 0; + err: - ppgtt_destroy_scratch(vgpu); + release_scratch_page_tree(vgpu); return ret; } - + /** * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization * @vgpu: a vGPU @@ -2117,7 +2100,8 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) } gtt->ggtt_mm = ggtt_mm; - return ppgtt_create_scratch(vgpu); + + return create_scratch_page_tree(vgpu); } static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type) @@ -2149,7 +2133,7 @@ static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type) void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) { ppgtt_free_all_shadow_page(vgpu); - ppgtt_destroy_scratch(vgpu); + release_scratch_page_tree(vgpu); intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_GGTT); diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 416b2f868cf0..f98c1c19b4cb 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -188,12 +188,9 @@ extern void intel_vgpu_destroy_mm(struct kref *mm_ref); struct intel_vgpu_guest_page; -struct intel_vgpu_shadow_page { - void *vaddr; +struct intel_vgpu_scratch_pt { struct page *page; - int type; - struct hlist_node node; - unsigned long mfn; + unsigned long page_mfn; }; struct intel_vgpu_gtt { @@ -205,7 +202,7 @@ struct intel_vgpu_gtt { atomic_t n_tracked_guest_page; struct list_head oos_page_list_head; struct list_head post_shadow_list_head; - struct intel_vgpu_shadow_page ppgtt_scratch_page[GTT_TYPE_MAX]; + struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX]; }; extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); @@ -221,6 +218,14 @@ extern struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu, struct intel_vgpu_oos_page; +struct intel_vgpu_shadow_page { + void *vaddr; + struct page *page; + int type; + struct hlist_node node; + unsigned long mfn; +}; + struct intel_vgpu_page_track { struct hlist_node node; bool tracked; From 295764cd2ff41e2c1bc8af4050de77cec5e7a1c0 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Tue, 7 Nov 2017 05:23:02 +0800 Subject: [PATCH 229/260] drm/i915/gvt: Limit read hw reg to active vgpu mmio_read_from_hw() let vgpu could read hw reg, if vgpu's workload is running on hw, things is good. Otherwise vgpu will get other vgpu's reg val, it is unsafe. This patch limit such hw access to active vgpu. If vgpu isn't running on hw, the reg read of this vgpu will get the last active val which saved at schedule_out. v2: ring timestamp is walking continuously even if the ring is idle. so read hw directly. (Zhenyu) Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 26 ++++++++++++++++++++++---- drivers/gpu/drm/i915/gvt/scheduler.c | 15 +++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 4f8d470dbb3f..880448d4adc7 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1471,11 +1471,29 @@ static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, static int mmio_read_from_hw(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_gvt *gvt = vgpu->gvt; + struct drm_i915_private *dev_priv = gvt->dev_priv; + int ring_id; + u32 ring_base; + + ring_id = intel_gvt_render_mmio_to_ring_id(gvt, offset); + /** + * Read HW reg in following case + * a. the offset isn't a ring mmio + * b. the offset's ring is running on hw. + * c. the offset is ring time stamp mmio + */ + if (ring_id >= 0) + ring_base = dev_priv->engine[ring_id]->mmio_base; + + if (ring_id < 0 || vgpu == gvt->scheduler.engine_owner[ring_id] || + offset == i915_mmio_reg_offset(RING_TIMESTAMP(ring_base)) || + offset == i915_mmio_reg_offset(RING_TIMESTAMP_UDW(ring_base))) { + mmio_hw_access_pre(dev_priv); + vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); + mmio_hw_access_post(dev_priv); + } - mmio_hw_access_pre(dev_priv); - vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); - mmio_hw_access_post(dev_priv); return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 7a1ffaa9ae06..9749113fccdd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -131,6 +131,20 @@ static inline bool is_gvt_request(struct drm_i915_gem_request *req) return i915_gem_context_force_single_submission(req->ctx); } +static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + u32 ring_base = dev_priv->engine[ring_id]->mmio_base; + i915_reg_t reg; + + reg = RING_INSTDONE(ring_base); + vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg); + reg = RING_ACTHD(ring_base); + vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg); + reg = RING_ACTHD_UDW(ring_base); + vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg); +} + static int shadow_context_status_change(struct notifier_block *nb, unsigned long action, void *data) { @@ -175,6 +189,7 @@ static int shadow_context_status_change(struct notifier_block *nb, break; case INTEL_CONTEXT_SCHEDULE_OUT: case INTEL_CONTEXT_SCHEDULE_PREEMPTED: + save_ring_hw_state(workload->vgpu, ring_id); atomic_set(&workload->shadow_ctx_active, 0); break; default: From b2d6ef70614e9e9dacfa9fc7dac49e7dc22dc8b3 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Wed, 8 Nov 2017 00:45:21 +0800 Subject: [PATCH 230/260] drm/i915/gvt: Let each vgpu has separate opregion memory Currently every vgpu share a common gvt opregion memory, but it is freed at vgpu destroy, then the later vgpu doesn't have opregion memory once the first vgpu is destroyed. This cause guest function failure like reboot, second or later boot. This patch allocate and init virt opregion memory for each vgpu, so this memory could be freed at vgpu destroy. Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 9 +-- drivers/gpu/drm/i915/gvt/gvt.h | 10 --- drivers/gpu/drm/i915/gvt/opregion.c | 101 +++++++++++----------------- 3 files changed, 42 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index cef06d59884e..3a74a408a966 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -323,7 +323,6 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) intel_gvt_clean_cmd_parser(gvt); intel_gvt_clean_sched_policy(gvt); intel_gvt_clean_workload_scheduler(gvt); - intel_gvt_clean_opregion(gvt); intel_gvt_clean_gtt(gvt); intel_gvt_clean_irq(gvt); intel_gvt_clean_mmio_info(gvt); @@ -397,13 +396,9 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) if (ret) goto out_clean_irq; - ret = intel_gvt_init_opregion(gvt); - if (ret) - goto out_clean_gtt; - ret = intel_gvt_init_workload_scheduler(gvt); if (ret) - goto out_clean_opregion; + goto out_clean_gtt; ret = intel_gvt_init_sched_policy(gvt); if (ret) @@ -460,8 +455,6 @@ out_clean_sched_policy: intel_gvt_clean_sched_policy(gvt); out_clean_workload_scheduler: intel_gvt_clean_workload_scheduler(gvt); -out_clean_opregion: - intel_gvt_clean_opregion(gvt); out_clean_gtt: intel_gvt_clean_gtt(gvt); out_clean_irq: diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 27e8186cbc81..393066726993 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -125,7 +125,6 @@ struct intel_vgpu_irq { struct intel_vgpu_opregion { void *va; u32 gfn[INTEL_GVT_OPREGION_PAGES]; - struct page *pages[INTEL_GVT_OPREGION_PAGES]; }; #define vgpu_opregion(vgpu) (&(vgpu->opregion)) @@ -265,11 +264,6 @@ struct intel_gvt_firmware { bool firmware_loaded; }; -struct intel_gvt_opregion { - void *opregion_va; - u32 opregion_pa; -}; - #define NR_MAX_INTEL_VGPU_TYPES 20 struct intel_vgpu_type { char name[16]; @@ -293,7 +287,6 @@ struct intel_gvt { struct intel_gvt_firmware firmware; struct intel_gvt_irq irq; struct intel_gvt_gtt gtt; - struct intel_gvt_opregion opregion; struct intel_gvt_workload_scheduler scheduler; struct notifier_block shadow_ctx_notifier_block[I915_NUM_ENGINES]; DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS); @@ -511,9 +504,6 @@ static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar) PCI_BASE_ADDRESS_MEM_MASK; } -void intel_gvt_clean_opregion(struct intel_gvt *gvt); -int intel_gvt_init_opregion(struct intel_gvt *gvt); - void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu); int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa); diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 2533d1ef1c56..80720e59723a 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -213,16 +213,55 @@ static void virt_vbt_generation(struct vbt *v) v->driver_features.lvds_config = BDB_DRIVER_FEATURE_NO_LVDS; } +static int alloc_and_init_virt_opregion(struct intel_vgpu *vgpu) +{ + u8 *buf; + struct opregion_header *header; + struct vbt v; + + gvt_dbg_core("init vgpu%d opregion\n", vgpu->id); + vgpu_opregion(vgpu)->va = (void *)__get_free_pages(GFP_KERNEL | + __GFP_ZERO, + get_order(INTEL_GVT_OPREGION_SIZE)); + if (!vgpu_opregion(vgpu)->va) { + gvt_err("fail to get memory for vgpu virt opregion\n"); + return -ENOMEM; + } + + /* emulated opregion with VBT mailbox only */ + buf = (u8 *)vgpu_opregion(vgpu)->va; + header = (struct opregion_header *)buf; + memcpy(header->signature, OPREGION_SIGNATURE, + sizeof(OPREGION_SIGNATURE)); + header->size = 0x8; + header->opregion_ver = 0x02000000; + header->mboxes = MBOX_VBT; + + /* for unknown reason, the value in LID field is incorrect + * which block the windows guest, so workaround it by force + * setting it to "OPEN" + */ + buf[INTEL_GVT_OPREGION_CLID] = 0x3; + + /* emulated vbt from virt vbt generation */ + virt_vbt_generation(&v); + memcpy(buf + INTEL_GVT_OPREGION_VBT_OFFSET, &v, sizeof(struct vbt)); + + return 0; +} + static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa) { - int i; + int i, ret; if (WARN((vgpu_opregion(vgpu)->va), "vgpu%d: opregion has been initialized already.\n", vgpu->id)) return -EINVAL; - vgpu_opregion(vgpu)->va = vgpu->gvt->opregion.opregion_va; + ret = alloc_and_init_virt_opregion(vgpu); + if (ret < 0) + return ret; for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i; @@ -304,64 +343,6 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa) return 0; } -/** - * intel_gvt_clean_opregion - clean host opergion related stuffs - * @gvt: a GVT device - * - */ -void intel_gvt_clean_opregion(struct intel_gvt *gvt) -{ - free_pages((unsigned long)gvt->opregion.opregion_va, - get_order(INTEL_GVT_OPREGION_SIZE)); - gvt->opregion.opregion_va = NULL; -} - -/** - * intel_gvt_init_opregion - initialize host opergion related stuffs - * @gvt: a GVT device - * - * Returns: - * Zero on success, negative error code if failed. - */ -int intel_gvt_init_opregion(struct intel_gvt *gvt) -{ - u8 *buf; - struct opregion_header *header; - struct vbt v; - - gvt_dbg_core("init host opregion\n"); - - gvt->opregion.opregion_va = (void *)__get_free_pages(GFP_KERNEL | - __GFP_ZERO, - get_order(INTEL_GVT_OPREGION_SIZE)); - - if (!gvt->opregion.opregion_va) { - gvt_err("fail to get memory for virt opregion\n"); - return -ENOMEM; - } - - /* emulated opregion with VBT mailbox only */ - buf = (u8 *)gvt->opregion.opregion_va; - header = (struct opregion_header *)buf; - memcpy(header->signature, OPREGION_SIGNATURE, - sizeof(OPREGION_SIGNATURE)); - header->size = 0x8; - header->opregion_ver = 0x02000000; - header->mboxes = MBOX_VBT; - - /* for unknown reason, the value in LID field is incorrect - * which block the windows guest, so workaround it by force - * setting it to "OPEN" - */ - buf[INTEL_GVT_OPREGION_CLID] = 0x3; - - /* emulated vbt from virt vbt generation */ - virt_vbt_generation(&v); - memcpy(buf + INTEL_GVT_OPREGION_VBT_OFFSET, &v, sizeof(struct vbt)); - - return 0; -} - #define GVT_OPREGION_FUNC(scic) \ ({ \ u32 __ret; \ From f2880e04f3a5419366926182fc97a3c2e4fd8f2a Mon Sep 17 00:00:00 2001 From: fred gao Date: Tue, 14 Nov 2017 17:09:35 +0800 Subject: [PATCH 231/260] drm/i915/gvt: Move request alloc to dispatch_workload path only Previously the performance is improved through the workload auditing and shadowing ahead of vGPU scheduling, however, there is the case that more requests are allocated in submit_context before the previous request is added, the timeline will hold its seqno which is later. This patch is to move the request alloc to dispatch_workload function, where is the same place as request is added. It will fix the issue of kernel BUG for (timeline->seqno != request->fence.seqno) check when add_request. Fixes: 89ea20b930cb ("drm/i915/gvt: Factor out scan and shadow from workload dispatch") Signed-off-by: Chuanxiao Dong Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 31 ++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 9749113fccdd..a742b364c2c3 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -270,7 +270,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ring_id = workload->ring_id; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct drm_i915_gem_request *rq; struct intel_ring *ring; int ret; @@ -315,6 +314,27 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ret = populate_shadow_context(workload); if (ret) goto err_unpin; + workload->shadowed = true; + return 0; + +err_unpin: + engine->context_unpin(engine, shadow_ctx); +err_shadow: + release_shadow_wa_ctx(&workload->wa_ctx); +err_scan: + return ret; +} + +static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) +{ + int ring_id = workload->ring_id; + struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; + struct intel_engine_cs *engine = dev_priv->engine[ring_id]; + struct drm_i915_gem_request *rq; + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; + int ret; rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); if (IS_ERR(rq)) { @@ -329,14 +349,11 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ret = copy_workload_to_ring_buffer(workload); if (ret) goto err_unpin; - workload->shadowed = true; return 0; err_unpin: engine->context_unpin(engine, shadow_ctx); -err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); -err_scan: return ret; } @@ -496,6 +513,12 @@ static int prepare_workload(struct intel_vgpu_workload *workload) goto err_unpin_mm; } + ret = intel_gvt_generate_request(workload); + if (ret) { + gvt_vgpu_err("fail to generate request\n"); + goto err_unpin_mm; + } + ret = prepare_shadow_batch_buffer(workload); if (ret) { gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n"); From d5653ec322ff132c2d6976ca0b909e0a9d1da6af Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 16 Nov 2017 10:39:54 +0200 Subject: [PATCH 232/260] drm/i915: Print the condition causing GEM_BUG_ON It is easier to categorize and debug bugs if the failed condition is in plain sight in the actual dmesg output. Make it so. Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Marta Lofstedt Signed-off-by: Mika Kuoppala Reviewed-by: Joonas Lahtinen Reviewed-by: Marta Lofstedt Link: https://patchwork.freedesktop.org/patch/msgid/20171116083954.3357-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/i915_gem.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index ff42b5f0e981..e920dab7f1b8 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -28,7 +28,11 @@ #include #ifdef CONFIG_DRM_I915_DEBUG_GEM -#define GEM_BUG_ON(expr) BUG_ON(expr) +#define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ + printk(KERN_ERR "GEM_BUG_ON(%s)\n", __stringify(condition)); \ + BUG(); \ + } \ + } while(0) #define GEM_WARN_ON(expr) WARN_ON(expr) #define GEM_DEBUG_DECL(var) var From c534612e780c4a2c8ef5bfc11583c7d58436baca Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Nov 2017 12:14:58 +0000 Subject: [PATCH 233/260] drm/i915: Clear breadcrumb node when cancelling signaling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we call intel_engine_cancel_signaling() to stop reporting when a request is completed via an asynchronous signal, we remove that request from the breadcrumb wait queue. However, we may be concurrently processing that request in the signaler itself, the actual operations on the request's node itself are serialised but we do not actually clear the waiter after removing it from the tree allowing both parties to attempt to do so and corrupting the rbtree. (Previously removing from the breadcrumb wait queue could only be done on behalf of i915_wait_request, so this race could not happen). Reported-by: "He, Bo" Fixes: 9eb143bbec7d ("drm/i915: Allow a request to be cancelled") Signed-off-by: Chris Wilson Cc: "He, Bo" Cc: Tvrtko Ursulin Cc: Michał Winiarski Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171115121458.24655-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 4c4fbf5f20f9..5ae2d276f7f3 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -549,6 +549,7 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); rb_erase(&wait->node, &b->waiters); + RB_CLEAR_NODE(&wait->node); out: GEM_BUG_ON(b->irq_wait == wait); From d710fc16ff0749f7a8bd33c69785513fe8b95fb8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Nov 2017 10:50:59 +0000 Subject: [PATCH 234/260] drm/i915: Prevent overflow of execbuf.buffer_count and num_cliprects We check whether the multiplies will overflow prior to calling kmalloc_array so that we can respond with -EINVAL for the invalid user arguments rather than treating it as an -ENOMEM that would otherwise occur. However, as Dan Carpenter pointed out, we did an addition on the unsigned int prior to passing to kmalloc_array where it would be promoted to size_t for the calculation, thereby allowing it to overflow and underallocate. v2: buffer_count is currently limited to INT_MAX because we treat it as signaled variable for LUT_HANDLE in eb_lookup_vma v3: Move common checks for eb1/eb2 into the same function v4: Put the check back for nfence*sizeof(user_fence) overflow v5: access_ok uses ULONG_MAX but kvmalloc_array uses SIZE_MAX v6: size_t and unsigned long are not type-equivalent on 32b Reported-by: Dan Carpenter Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171116105059.25142-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 66 ++++++++++++++-------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 435ed95df144..53ccb27bfe91 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -2074,23 +2074,27 @@ static struct drm_syncobj ** get_fence_array(struct drm_i915_gem_execbuffer2 *args, struct drm_file *file) { - const unsigned int nfences = args->num_cliprects; + const unsigned long nfences = args->num_cliprects; struct drm_i915_gem_exec_fence __user *user; struct drm_syncobj **fences; - unsigned int n; + unsigned long n; int err; if (!(args->flags & I915_EXEC_FENCE_ARRAY)) return NULL; - if (nfences > SIZE_MAX / sizeof(*fences)) + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (nfences > min_t(unsigned long, + ULONG_MAX / sizeof(*user), + SIZE_MAX / sizeof(*fences))) return ERR_PTR(-EINVAL); user = u64_to_user_ptr(args->cliprects_ptr); - if (!access_ok(VERIFY_READ, user, nfences * 2 * sizeof(u32))) + if (!access_ok(VERIFY_READ, user, nfences * sizeof(*user))) return ERR_PTR(-EFAULT); - fences = kvmalloc_array(args->num_cliprects, sizeof(*fences), + fences = kvmalloc_array(nfences, sizeof(*fences), __GFP_NOWARN | GFP_KERNEL); if (!fences) return ERR_PTR(-ENOMEM); @@ -2447,6 +2451,26 @@ err_in_fence: return err; } +static size_t eb_element_size(void) +{ + return (sizeof(struct drm_i915_gem_exec_object2) + + sizeof(struct i915_vma *) + + sizeof(unsigned int)); +} + +static bool check_buffer_count(size_t count) +{ + const size_t sz = eb_element_size(); + + /* + * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup + * array size (see eb_create()). Otherwise, we can accept an array as + * large as can be addressed (though use large arrays at your peril)! + */ + + return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); +} + /* * Legacy execbuffer just creates an exec2 list from the original exec object * list array and passes it to the real function. @@ -2455,18 +2479,16 @@ int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file) { - const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) + - sizeof(struct i915_vma *) + - sizeof(unsigned int)); struct drm_i915_gem_execbuffer *args = data; struct drm_i915_gem_execbuffer2 exec2; struct drm_i915_gem_exec_object *exec_list = NULL; struct drm_i915_gem_exec_object2 *exec2_list = NULL; + const size_t count = args->buffer_count; unsigned int i; int err; - if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) { - DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); + if (!check_buffer_count(count)) { + DRM_DEBUG("execbuf2 with %zd buffers\n", count); return -EINVAL; } @@ -2485,9 +2507,9 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, return -EINVAL; /* Copy in the exec list from userland */ - exec_list = kvmalloc_array(args->buffer_count, sizeof(*exec_list), + exec_list = kvmalloc_array(count, sizeof(*exec_list), __GFP_NOWARN | GFP_KERNEL); - exec2_list = kvmalloc_array(args->buffer_count + 1, sz, + exec2_list = kvmalloc_array(count + 1, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec_list == NULL || exec2_list == NULL) { DRM_DEBUG("Failed to allocate exec list for %d buffers\n", @@ -2498,7 +2520,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, } err = copy_from_user(exec_list, u64_to_user_ptr(args->buffers_ptr), - sizeof(*exec_list) * args->buffer_count); + sizeof(*exec_list) * count); if (err) { DRM_DEBUG("copy %d exec entries failed %d\n", args->buffer_count, err); @@ -2548,16 +2570,14 @@ int i915_gem_execbuffer2(struct drm_device *dev, void *data, struct drm_file *file) { - const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) + - sizeof(struct i915_vma *) + - sizeof(unsigned int)); struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list; struct drm_syncobj **fences = NULL; + const size_t count = args->buffer_count; int err; - if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) { - DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); + if (!check_buffer_count(count)) { + DRM_DEBUG("execbuf2 with %zd buffers\n", count); return -EINVAL; } @@ -2565,17 +2585,17 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EINVAL; /* Allocate an extra slot for use by the command parser */ - exec2_list = kvmalloc_array(args->buffer_count + 1, sz, + exec2_list = kvmalloc_array(count + 1, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec2_list == NULL) { - DRM_DEBUG("Failed to allocate exec list for %d buffers\n", - args->buffer_count); + DRM_DEBUG("Failed to allocate exec list for %zd buffers\n", + count); return -ENOMEM; } if (copy_from_user(exec2_list, u64_to_user_ptr(args->buffers_ptr), - sizeof(*exec2_list) * args->buffer_count)) { - DRM_DEBUG("copy %d exec entries failed\n", args->buffer_count); + sizeof(*exec2_list) * count)) { + DRM_DEBUG("copy %zd exec entries failed\n", count); kvfree(exec2_list); return -EFAULT; } From c6dce8f140bc19efb04afc8c0d11897ead000946 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Thu, 16 Nov 2017 19:02:37 +0530 Subject: [PATCH 235/260] drm/i915: Update execlists tasklet naming intel_lrc_irq_handler and i915_guc_irq_handler are HW submission related tasklet functions. Name them with "submission_tasklet" suffix and remove intel/i915 prefix as they are static. Also rename irq_tasklet as just tasklet for clarity. v2: s/_bh/_tasklet (Chris) Suggested-by: Michal Wajdeczko Signed-off-by: Sagar Arun Kamble Cc: Michal Wajdeczko Cc: Michal Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Michal Wajdeczko Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1510839162-25197-2-git-send-email-sagar.a.kamble@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 10 +++++----- drivers/gpu/drm/i915/i915_guc_submission.c | 6 +++--- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 17 +++++++++-------- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++-- 6 files changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bf8fea792048..61ba321e9970 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2933,13 +2933,13 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) * Prevent request submission to the hardware until we have * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request - * to a second via its engine->irq_tasklet *just* as we are + * to a second via its execlists->tasklet *just* as we are * calling engine->init_hw() and also writing the ELSP. - * Turning off the engine->irq_tasklet until the reset is over + * Turning off the execlists->tasklet until the reset is over * prevents the race. */ - tasklet_kill(&engine->execlists.irq_tasklet); - tasklet_disable(&engine->execlists.irq_tasklet); + tasklet_kill(&engine->execlists.tasklet); + tasklet_disable(&engine->execlists.tasklet); /* * We're using worker to queue preemption requests from the tasklet in @@ -3128,7 +3128,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) { - tasklet_enable(&engine->execlists.irq_tasklet); + tasklet_enable(&engine->execlists.tasklet); kthread_unpark(engine->breadcrumbs.signaler); intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 0ba2fc04fe9c..7f3e63da519b 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -650,7 +650,7 @@ static void inject_preempt_context(struct work_struct *work) if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) { execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); - tasklet_schedule(&engine->execlists.irq_tasklet); + tasklet_schedule(&engine->execlists.tasklet); } } @@ -799,7 +799,7 @@ unlock: spin_unlock_irq(&engine->timeline->lock); } -static void i915_guc_irq_handler(unsigned long data) +static void guc_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1439,7 +1439,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct intel_engine_execlists * const execlists = &engine->execlists; - execlists->irq_tasklet.func = i915_guc_irq_handler; + execlists->tasklet.func = guc_submission_tasklet; engine->park = i915_guc_submission_park; engine->unpark = i915_guc_submission_unpark; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ff00e462697a..4fb183ae7a07 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1404,7 +1404,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) } if (tasklet) - tasklet_hi_schedule(&execlists->irq_tasklet); + tasklet_hi_schedule(&execlists->tasklet); } static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a42b738e79e7..9897c7f78c51 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1585,7 +1585,7 @@ void intel_engines_park(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { /* Flush the residual irq tasklets first. */ intel_engine_disarm_breadcrumbs(engine); - tasklet_kill(&engine->execlists.irq_tasklet); + tasklet_kill(&engine->execlists.tasklet); /* * We are committed now to parking the engines, make sure there diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ebd9596fe83b..be6c39adebdf 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -781,7 +781,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. */ -static void intel_lrc_irq_handler(unsigned long data) +static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; @@ -947,7 +947,7 @@ static void insert_request(struct intel_engine_cs *engine, list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); if (ptr_unmask_bits(p, 1)) - tasklet_hi_schedule(&engine->execlists.irq_tasklet); + tasklet_hi_schedule(&engine->execlists.tasklet); } static void execlists_submit_request(struct drm_i915_gem_request *request) @@ -1503,7 +1503,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) /* After a GPU reset, we may have requests to replay */ if (execlists->first) - tasklet_schedule(&execlists->irq_tasklet); + tasklet_schedule(&execlists->tasklet); return 0; } @@ -1881,8 +1881,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) * Tasklet cannot be active at this point due intel_mark_active/idle * so this is just for documentation. */ - if (WARN_ON(test_bit(TASKLET_STATE_SCHED, &engine->execlists.irq_tasklet.state))) - tasklet_kill(&engine->execlists.irq_tasklet); + if (WARN_ON(test_bit(TASKLET_STATE_SCHED, + &engine->execlists.tasklet.state))) + tasklet_kill(&engine->execlists.tasklet); dev_priv = engine->i915; @@ -1906,7 +1907,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = execlists_submit_request; engine->cancel_requests = execlists_cancel_requests; engine->schedule = execlists_schedule; - engine->execlists.irq_tasklet.func = intel_lrc_irq_handler; + engine->execlists.tasklet.func = execlists_submission_tasklet; engine->park = NULL; engine->unpark = NULL; @@ -1968,8 +1969,8 @@ logical_ring_setup(struct intel_engine_cs *engine) engine->execlists.fw_domains = fw_domains; - tasklet_init(&engine->execlists.irq_tasklet, - intel_lrc_irq_handler, (unsigned long)engine); + tasklet_init(&engine->execlists.tasklet, + execlists_submission_tasklet, (unsigned long)engine); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 15a15cb876a6..c00804ed64c6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -193,9 +193,9 @@ struct i915_priolist { */ struct intel_engine_execlists { /** - * @irq_tasklet: softirq tasklet for bottom handler + * @tasklet: softirq tasklet for bottom handler */ - struct tasklet_struct irq_tasklet; + struct tasklet_struct tasklet; /** * @default_priolist: priority list for I915_PRIORITY_NORMAL From 6fa1f6fbb18e23dc0f582e296fa83194390a49a6 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Thu, 16 Nov 2017 19:02:38 +0530 Subject: [PATCH 236/260] drm/i915/guc: Update names of submission related static functions i915_guc_submit, i915_guc_dequeue, i915_guc_submission_park and i915_guc_submission_upark are functions internal to GuC submission hence remove "i915_" prefix. Suggested-by: Michal Wajdeczko Signed-off-by: Sagar Arun Kamble Cc: Michal Wajdeczko Cc: Michal Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/1510839162-25197-3-git-send-email-sagar.a.kamble@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_guc_submission.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 7f3e63da519b..b7978a243a5c 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -683,13 +683,13 @@ static void wait_for_guc_preempt_report(struct intel_engine_cs *engine) } /** - * i915_guc_submit() - Submit commands through GuC + * guc_submit() - Submit commands through GuC * @engine: engine associated with the commands * * The only error here arises if the doorbell hardware isn't functioning * as expected, which really shouln't happen. */ -static void i915_guc_submit(struct intel_engine_cs *engine) +static void guc_submit(struct intel_engine_cs *engine) { struct intel_guc *guc = &engine->i915->guc; struct intel_engine_execlists * const execlists = &engine->execlists; @@ -722,7 +722,7 @@ static void port_assign(struct execlist_port *port, port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); } -static void i915_guc_dequeue(struct intel_engine_cs *engine) +static void guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -793,7 +793,7 @@ done: if (submit) { port_assign(port, last); execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); - i915_guc_submit(engine); + guc_submit(engine); } unlock: spin_unlock_irq(&engine->timeline->lock); @@ -831,13 +831,13 @@ static void guc_submission_tasklet(unsigned long data) } if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) - i915_guc_dequeue(engine); + guc_dequeue(engine); } /* * Everything below here is concerned with setup & teardown, and is * therefore not part of the somewhat time-critical batch-submission - * path of i915_guc_submit() above. + * path of guc_submit() above. */ /* Check that a doorbell register is in the expected state */ @@ -1381,12 +1381,12 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv) rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; } -static void i915_guc_submission_park(struct intel_engine_cs *engine) +static void guc_submission_park(struct intel_engine_cs *engine) { intel_engine_unpin_breadcrumbs_irq(engine); } -static void i915_guc_submission_unpark(struct intel_engine_cs *engine) +static void guc_submission_unpark(struct intel_engine_cs *engine) { intel_engine_pin_breadcrumbs_irq(engine); } @@ -1440,8 +1440,8 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct intel_engine_execlists * const execlists = &engine->execlists; execlists->tasklet.func = guc_submission_tasklet; - engine->park = i915_guc_submission_park; - engine->unpark = i915_guc_submission_unpark; + engine->park = guc_submission_park; + engine->unpark = guc_submission_unpark; } return 0; From db14d0c5ae7c2d8a39b4dcaa584e7503ace6b18f Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Thu, 16 Nov 2017 19:02:39 +0530 Subject: [PATCH 237/260] drm/i915/guc: Update name and prototype of GuC submission interface functions i915 GuC submission is hardware interface and GuC APIs that are not user facing should be named intel_guc* hence we change GuC submission related functions name prefix to intel_guc. Also changed the parameter to these functions to intel_guc struct. v2: Using local guc variable in intel_uc_fini_hw. (Michal Wajdeczko) Rebase. Suggested-by: Chris Wilson Signed-off-by: Sagar Arun Kamble Cc: Michal Wajdeczko Cc: Michal Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Acked-by: Chris Wilson Acked-by: Joonas Lahtinen Acked-by: Oscar Mateo Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/1510839162-25197-4-git-send-email-sagar.a.kamble@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_guc_submission.c | 15 ++++++--------- drivers/gpu/drm/i915/i915_guc_submission.h | 8 ++++---- drivers/gpu/drm/i915/intel_uc.c | 16 +++++++++------- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index b7978a243a5c..515505c63d07 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1253,9 +1253,8 @@ static void guc_preempt_work_destroy(struct intel_guc *guc) * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. */ -int i915_guc_submission_init(struct drm_i915_private *dev_priv) +int intel_guc_submission_init(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; int ret; if (guc->stage_desc_pool) @@ -1302,10 +1301,8 @@ err_stage_desc_pool: return ret; } -void i915_guc_submission_fini(struct drm_i915_private *dev_priv) +void intel_guc_submission_fini(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; - guc_ads_destroy(guc); guc_preempt_work_destroy(guc); intel_guc_log_destroy(guc); @@ -1391,9 +1388,9 @@ static void guc_submission_unpark(struct intel_engine_cs *engine) intel_engine_pin_breadcrumbs_irq(engine); } -int i915_guc_submission_enable(struct drm_i915_private *dev_priv) +int intel_guc_submission_enable(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; + struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_engine_cs *engine; enum intel_engine_id id; int err; @@ -1451,9 +1448,9 @@ err_free_clients: return err; } -void i915_guc_submission_disable(struct drm_i915_private *dev_priv) +void intel_guc_submission_disable(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; + struct drm_i915_private *dev_priv = guc_to_i915(guc); GEM_BUG_ON(dev_priv->gt.awake); /* GT should be parked first */ diff --git a/drivers/gpu/drm/i915/i915_guc_submission.h b/drivers/gpu/drm/i915/i915_guc_submission.h index cb4353b59059..6bdb8fc00a40 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.h +++ b/drivers/gpu/drm/i915/i915_guc_submission.h @@ -72,9 +72,9 @@ struct i915_guc_client { u64 submissions[I915_NUM_ENGINES]; }; -int i915_guc_submission_init(struct drm_i915_private *dev_priv); -int i915_guc_submission_enable(struct drm_i915_private *dev_priv); -void i915_guc_submission_disable(struct drm_i915_private *dev_priv); -void i915_guc_submission_fini(struct drm_i915_private *dev_priv); +int intel_guc_submission_init(struct intel_guc *guc); +int intel_guc_submission_enable(struct intel_guc *guc); +void intel_guc_submission_disable(struct intel_guc *guc); +void intel_guc_submission_fini(struct intel_guc *guc); #endif diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index aec295470e0d..2fb65eb7cf74 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -168,7 +168,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) * This is stuff we need to have available at fw load time * if we are planning to enable submission later */ - ret = i915_guc_submission_init(dev_priv); + ret = intel_guc_submission_init(guc); if (ret) goto err_guc; } @@ -217,7 +217,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) if (i915_modparams.guc_log_level >= 0) gen9_enable_guc_interrupts(dev_priv); - ret = i915_guc_submission_enable(dev_priv); + ret = intel_guc_submission_enable(guc); if (ret) goto err_interrupts; } @@ -246,7 +246,7 @@ err_log_capture: guc_capture_load_err_log(guc); err_submission: if (i915_modparams.enable_guc_submission) - i915_guc_submission_fini(dev_priv); + intel_guc_submission_fini(guc); err_guc: i915_ggtt_disable_guc(dev_priv); @@ -271,19 +271,21 @@ err_guc: void intel_uc_fini_hw(struct drm_i915_private *dev_priv) { - guc_free_load_err_log(&dev_priv->guc); + struct intel_guc *guc = &dev_priv->guc; + + guc_free_load_err_log(guc); if (!i915_modparams.enable_guc_loading) return; if (i915_modparams.enable_guc_submission) - i915_guc_submission_disable(dev_priv); + intel_guc_submission_disable(guc); - guc_disable_communication(&dev_priv->guc); + guc_disable_communication(guc); if (i915_modparams.enable_guc_submission) { gen9_disable_guc_interrupts(dev_priv); - i915_guc_submission_fini(dev_priv); + intel_guc_submission_fini(guc); } i915_ggtt_disable_guc(dev_priv); From 5afc8b49e4f06dbd903987df101db5341575a5ba Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Thu, 16 Nov 2017 19:02:40 +0530 Subject: [PATCH 238/260] drm/i915/guc: Rename i915_guc_client struct to intel_guc_client GuC submission clients are currently being used in kernel only hence update the structure name to intel_guc_client. Signed-off-by: Sagar Arun Kamble Cc: Michal Wajdeczko Cc: Michal Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Acked-by: Chris Wilson Acked-by: Joonas Lahtinen Acked-by: Oscar Mateo Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/1510839162-25197-5-git-send-email-sagar.a.kamble@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_guc_submission.c | 62 +++++++++++----------- drivers/gpu/drm/i915/i915_guc_submission.h | 2 +- drivers/gpu/drm/i915/intel_guc.h | 6 +-- 4 files changed, 37 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6ba08b0c1c22..ce023587dd30 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2433,7 +2433,7 @@ static void i915_guc_log_info(struct seq_file *m, static void i915_guc_client_info(struct seq_file *m, struct drm_i915_private *dev_priv, - struct i915_guc_client *client) + struct intel_guc_client *client) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -2498,7 +2498,7 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); const struct intel_guc *guc = &dev_priv->guc; struct guc_stage_desc *desc = guc->stage_desc_pool_vaddr; - struct i915_guc_client *client = guc->execbuf_client; + struct intel_guc_client *client = guc->execbuf_client; unsigned int tmp; int index; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 515505c63d07..22dbc254ee85 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -32,7 +32,7 @@ * DOC: GuC-based command submission * * GuC client: - * A i915_guc_client refers to a submission path through GuC. Currently, there + * A intel_guc_client refers to a submission path through GuC. Currently, there * are two clients. One of them (the execbuf_client) is charged with all * submissions to the GuC, the other one (preempt_client) is responsible for * preempting the execbuf_client. This struct is the owner of a doorbell, a @@ -42,7 +42,7 @@ * GuC stage descriptor: * During initialization, the driver allocates a static pool of 1024 such * descriptors, and shares them with the GuC. - * Currently, there exists a 1:1 mapping between a i915_guc_client and a + * Currently, there exists a 1:1 mapping between a intel_guc_client and a * guc_stage_desc (via the client's stage_id), so effectively only one * gets used. This stage descriptor lets the GuC know about the doorbell, * workqueue and process descriptor. Theoretically, it also lets the GuC @@ -82,13 +82,13 @@ * */ -static inline bool is_high_priority(struct i915_guc_client* client) +static inline bool is_high_priority(struct intel_guc_client *client) { return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || client->priority == GUC_CLIENT_PRIORITY_HIGH); } -static int __reserve_doorbell(struct i915_guc_client *client) +static int __reserve_doorbell(struct intel_guc_client *client) { unsigned long offset; unsigned long end; @@ -120,7 +120,7 @@ static int __reserve_doorbell(struct i915_guc_client *client) return 0; } -static void __unreserve_doorbell(struct i915_guc_client *client) +static void __unreserve_doorbell(struct intel_guc_client *client) { GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID); @@ -152,7 +152,7 @@ static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static struct guc_stage_desc *__get_stage_desc(struct i915_guc_client *client) +static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client) { struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr; @@ -166,7 +166,7 @@ static struct guc_stage_desc *__get_stage_desc(struct i915_guc_client *client) * client object which contains the page being used for the doorbell */ -static void __update_doorbell_desc(struct i915_guc_client *client, u16 new_id) +static void __update_doorbell_desc(struct intel_guc_client *client, u16 new_id) { struct guc_stage_desc *desc; @@ -175,12 +175,12 @@ static void __update_doorbell_desc(struct i915_guc_client *client, u16 new_id) desc->db_id = new_id; } -static struct guc_doorbell_info *__get_doorbell(struct i915_guc_client *client) +static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client) { return client->vaddr + client->doorbell_offset; } -static bool has_doorbell(struct i915_guc_client *client) +static bool has_doorbell(struct intel_guc_client *client) { if (client->doorbell_id == GUC_DOORBELL_INVALID) return false; @@ -188,7 +188,7 @@ static bool has_doorbell(struct i915_guc_client *client) return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); } -static int __create_doorbell(struct i915_guc_client *client) +static int __create_doorbell(struct intel_guc_client *client) { struct guc_doorbell_info *doorbell; int err; @@ -207,7 +207,7 @@ static int __create_doorbell(struct i915_guc_client *client) return err; } -static int __destroy_doorbell(struct i915_guc_client *client) +static int __destroy_doorbell(struct intel_guc_client *client) { struct drm_i915_private *dev_priv = guc_to_i915(client->guc); struct guc_doorbell_info *doorbell; @@ -228,7 +228,7 @@ static int __destroy_doorbell(struct i915_guc_client *client) return __guc_deallocate_doorbell(client->guc, client->stage_id); } -static int create_doorbell(struct i915_guc_client *client) +static int create_doorbell(struct intel_guc_client *client) { int ret; @@ -250,7 +250,7 @@ err: return ret; } -static int destroy_doorbell(struct i915_guc_client *client) +static int destroy_doorbell(struct intel_guc_client *client) { int err; @@ -286,7 +286,7 @@ static unsigned long __select_cacheline(struct intel_guc* guc) } static inline struct guc_process_desc * -__get_process_desc(struct i915_guc_client *client) +__get_process_desc(struct intel_guc_client *client) { return client->vaddr + client->proc_desc_offset; } @@ -295,7 +295,7 @@ __get_process_desc(struct i915_guc_client *client) * Initialise the process descriptor shared with the GuC firmware. */ static void guc_proc_desc_init(struct intel_guc *guc, - struct i915_guc_client *client) + struct intel_guc_client *client) { struct guc_process_desc *desc; @@ -355,7 +355,7 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc) * write queue, etc). */ static void guc_stage_desc_init(struct intel_guc *guc, - struct i915_guc_client *client) + struct intel_guc_client *client) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_engine_cs *engine; @@ -436,7 +436,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, } static void guc_stage_desc_fini(struct intel_guc *guc, - struct i915_guc_client *client) + struct intel_guc_client *client) { struct guc_stage_desc *desc; @@ -472,7 +472,7 @@ static void guc_shared_data_destroy(struct intel_guc *guc) } /* Construct a Work Item and append it to the GuC's Work Queue */ -static void guc_wq_item_append(struct i915_guc_client *client, +static void guc_wq_item_append(struct intel_guc_client *client, u32 target_engine, u32 context_desc, u32 ring_tail, u32 fence_id) { @@ -517,7 +517,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1)); } -static void guc_reset_wq(struct i915_guc_client *client) +static void guc_reset_wq(struct intel_guc_client *client) { struct guc_process_desc *desc = __get_process_desc(client); @@ -525,7 +525,7 @@ static void guc_reset_wq(struct i915_guc_client *client) desc->tail = 0; } -static void guc_ring_doorbell(struct i915_guc_client *client) +static void guc_ring_doorbell(struct intel_guc_client *client) { struct guc_doorbell_info *db; u32 cookie; @@ -549,7 +549,7 @@ static void guc_ring_doorbell(struct i915_guc_client *client) static void guc_add_request(struct intel_guc *guc, struct drm_i915_gem_request *rq) { - struct i915_guc_client *client = guc->execbuf_client; + struct intel_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine = rq->engine; u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(rq->ctx, engine)); u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); @@ -589,7 +589,7 @@ static void inject_preempt_context(struct work_struct *work) struct intel_engine_cs *engine = preempt_work->engine; struct intel_guc *guc = container_of(preempt_work, typeof(*guc), preempt_work[engine->id]); - struct i915_guc_client *client = guc->preempt_client; + struct intel_guc_client *client = guc->preempt_client; struct guc_stage_desc *stage_desc = __get_stage_desc(client); struct intel_ring *ring = client->owner->engine[engine->id].ring; u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner, @@ -866,7 +866,7 @@ static bool doorbell_ok(struct intel_guc *guc, u16 db_id) * reloaded the GuC FW) we can use this function to tell the GuC to reassign the * doorbell to the rightful owner. */ -static int __reset_doorbell(struct i915_guc_client* client, u16 db_id) +static int __reset_doorbell(struct intel_guc_client *client, u16 db_id) { int err; @@ -887,7 +887,7 @@ static int __reset_doorbell(struct i915_guc_client* client, u16 db_id) */ static int guc_init_doorbell_hw(struct intel_guc *guc) { - struct i915_guc_client *client = guc->execbuf_client; + struct intel_guc_client *client = guc->execbuf_client; bool recreate_first_client = false; u16 db_id; int ret; @@ -937,7 +937,7 @@ static int guc_init_doorbell_hw(struct intel_guc *guc) } /** - * guc_client_alloc() - Allocate an i915_guc_client + * guc_client_alloc() - Allocate an intel_guc_client * @dev_priv: driver private data structure * @engines: The set of engines to enable for this client * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW @@ -947,15 +947,15 @@ static int guc_init_doorbell_hw(struct intel_guc *guc) * @ctx: the context that owns the client (we use the default render * context) * - * Return: An i915_guc_client object if success, else NULL. + * Return: An intel_guc_client object if success, else NULL. */ -static struct i915_guc_client * +static struct intel_guc_client * guc_client_alloc(struct drm_i915_private *dev_priv, u32 engines, u32 priority, struct i915_gem_context *ctx) { - struct i915_guc_client *client; + struct intel_guc_client *client; struct intel_guc *guc = &dev_priv->guc; struct i915_vma *vma; void *vaddr; @@ -1033,7 +1033,7 @@ err_client: return ERR_PTR(ret); } -static void guc_client_free(struct i915_guc_client *client) +static void guc_client_free(struct intel_guc_client *client) { /* * XXX: wait for any outstanding submissions before freeing memory. @@ -1054,7 +1054,7 @@ static void guc_client_free(struct i915_guc_client *client) static int guc_clients_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct i915_guc_client *client; + struct intel_guc_client *client; GEM_BUG_ON(guc->execbuf_client); GEM_BUG_ON(guc->preempt_client); @@ -1086,7 +1086,7 @@ static int guc_clients_create(struct intel_guc *guc) static void guc_clients_destroy(struct intel_guc *guc) { - struct i915_guc_client *client; + struct intel_guc_client *client; client = fetch_and_zero(&guc->execbuf_client); guc_client_free(client); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.h b/drivers/gpu/drm/i915/i915_guc_submission.h index 6bdb8fc00a40..f8ae258c89d1 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.h +++ b/drivers/gpu/drm/i915/i915_guc_submission.h @@ -52,7 +52,7 @@ struct drm_i915_private; * queue (a circular array of work items), again described in the process * descriptor. Work queue pages are mapped momentarily as required. */ -struct i915_guc_client { +struct intel_guc_client { struct i915_vma *vma; void *vaddr; struct i915_gem_context *owner; diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 607e02500262..75c4cfefdaff 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -41,7 +41,7 @@ struct guc_preempt_work { /* * Top level structure of GuC. It handles firmware loading and manages client - * pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy + * pool and doorbells. intel_guc owns a intel_guc_client to replace the legacy * ExecList submission. */ struct intel_guc { @@ -62,8 +62,8 @@ struct intel_guc { struct i915_vma *shared_data; void *shared_data_vaddr; - struct i915_guc_client *execbuf_client; - struct i915_guc_client *preempt_client; + struct intel_guc_client *execbuf_client; + struct intel_guc_client *preempt_client; struct guc_preempt_work preempt_work[I915_NUM_ENGINES]; struct workqueue_struct *preempt_wq; From a269574489d4fd4615562090b130cfc79309176e Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Thu, 16 Nov 2017 19:02:41 +0530 Subject: [PATCH 239/260] drm/i915/guc: Rename i915_guc_submission.c|h to intel_guc_submission.c|h With all component structures and functions named appropriately, change the names of GuC submission source files. There were bunch of style issues in guc_submission.c that are highlighted now by checkpatch. Fix those. Update name in Documentation/gpu. (Joonas) v2: Rebase. v3: Rebase. Signed-off-by: Sagar Arun Kamble Cc: Michal Wajdeczko Cc: Michal Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Acked-by: Chris Wilson Acked-by: Joonas Lahtinen Acked-by: Oscar Mateo Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/1510839162-25197-6-git-send-email-sagar.a.kamble@intel.com Signed-off-by: Chris Wilson --- Documentation/gpu/i915.rst | 4 +- drivers/gpu/drm/i915/Makefile | 6 +- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/intel_guc.c | 2 +- ...uc_submission.c => intel_guc_submission.c} | 58 ++++++++++++------- ...uc_submission.h => intel_guc_submission.h} | 1 + drivers/gpu/drm/i915/intel_uc.c | 2 +- 7 files changed, 45 insertions(+), 30 deletions(-) rename drivers/gpu/drm/i915/{i915_guc_submission.c => intel_guc_submission.c} (97%) rename drivers/gpu/drm/i915/{i915_guc_submission.h => intel_guc_submission.h} (98%) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 2e7ee0313c1c..21577eabaf78 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -350,10 +350,10 @@ GuC-specific firmware loader GuC-based command submission ---------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/i915_guc_submission.c +.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_submission.c :doc: GuC-based command submission -.. kernel-doc:: drivers/gpu/drm/i915/i915_guc_submission.c +.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_submission.c :internal: GuC Firmware Layout diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index f7afd44214b5..c3649ec5b041 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -82,10 +82,10 @@ i915-y += intel_uc.o \ intel_uc_fw.o \ intel_guc.o \ intel_guc_ct.o \ - intel_guc_log.o \ intel_guc_fw.o \ - intel_huc.o \ - i915_guc_submission.o + intel_guc_log.o \ + intel_guc_submission.o \ + intel_huc.o # autogenerated null render state i915-y += intel_renderstate_gen6.o \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ce023587dd30..ff8f508a7661 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -30,7 +30,7 @@ #include #include #include "intel_drv.h" -#include "i915_guc_submission.h" +#include "intel_guc_submission.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) { diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 9678630a1c70..823d0c2e9ad2 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -23,8 +23,8 @@ */ #include "intel_guc.h" +#include "intel_guc_submission.h" #include "i915_drv.h" -#include "i915_guc_submission.h" static void gen8_guc_raise_irq(struct intel_guc *guc) { diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c similarity index 97% rename from drivers/gpu/drm/i915/i915_guc_submission.c rename to drivers/gpu/drm/i915/intel_guc_submission.c index 22dbc254ee85..9c9e6edfd5be 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -25,7 +25,7 @@ #include #include -#include "i915_guc_submission.h" +#include "intel_guc_submission.h" #include "i915_drv.h" /** @@ -102,7 +102,7 @@ static int __reserve_doorbell(struct intel_guc_client *client) * priority contexts, the second half for high-priority ones. */ offset = 0; - end = GUC_NUM_DOORBELLS/2; + end = GUC_NUM_DOORBELLS / 2; if (is_high_priority(client)) { offset = end; end += offset; @@ -221,7 +221,8 @@ static int __destroy_doorbell(struct intel_guc_client *client) /* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit * to go to zero after updating db_status before we call the GuC to - * release the doorbell */ + * release the doorbell + */ if (wait_for_us(!(I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID), 10)) WARN_ONCE(true, "Doorbell never became invalid after disable\n"); @@ -270,7 +271,7 @@ static int destroy_doorbell(struct intel_guc_client *client) return 0; } -static unsigned long __select_cacheline(struct intel_guc* guc) +static unsigned long __select_cacheline(struct intel_guc *guc) { unsigned long offset; @@ -281,7 +282,7 @@ static unsigned long __select_cacheline(struct intel_guc* guc) guc->db_cacheline += cache_line_size(); DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n", - offset, guc->db_cacheline, cache_line_size()); + offset, guc->db_cacheline, cache_line_size()); return offset; } @@ -367,7 +368,8 @@ static void guc_stage_desc_init(struct intel_guc *guc, desc = __get_stage_desc(client); memset(desc, 0, sizeof(*desc)); - desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | GUC_STAGE_DESC_ATTR_KERNEL; + desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | + GUC_STAGE_DESC_ATTR_KERNEL; if (is_high_priority(client)) desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT; desc->stage_id = client->stage_id; @@ -403,7 +405,8 @@ static void guc_stage_desc_init(struct intel_guc *guc, guc_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; /* XXX: In direct submission, the GuC wants the HW context id - * here. In proxy submission, it wants the stage id */ + * here. In proxy submission, it wants the stage id + */ lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) | (guc_engine_id << GUC_ELC_ENGINE_OFFSET); @@ -416,7 +419,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, } DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", - client->engines, desc->engines_used); + client->engines, desc->engines_used); WARN_ON(desc->engines_used == 0); /* @@ -551,7 +554,8 @@ static void guc_add_request(struct intel_guc *guc, { struct intel_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(rq->ctx, engine)); + u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(rq->ctx, + engine)); u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); spin_lock(&client->wq_lock); @@ -777,7 +781,8 @@ static void guc_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&rq->priotree.link); __i915_gem_request_submit(rq); - trace_i915_gem_request_in(rq, port_index(port, execlists)); + trace_i915_gem_request_in(rq, + port_index(port, execlists)); last = rq; submit = true; } @@ -910,7 +915,8 @@ static int guc_init_doorbell_hw(struct intel_guc *guc) if (recreate_first_client) { ret = __reserve_doorbell(client); if (unlikely(ret)) { - DRM_ERROR("Couldn't re-reserve first client db: %d\n", ret); + DRM_ERROR("Couldn't re-reserve first client db: %d\n", + ret); return ret; } @@ -918,7 +924,8 @@ static int guc_init_doorbell_hw(struct intel_guc *guc) } /* Now for every client (and not only execbuf_client) make sure their - * doorbells are known by the GuC */ + * doorbells are known by the GuC + */ ret = __create_doorbell(guc->execbuf_client); if (ret) return ret; @@ -941,11 +948,11 @@ static int guc_init_doorbell_hw(struct intel_guc *guc) * @dev_priv: driver private data structure * @engines: The set of engines to enable for this client * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW - * The kernel client to replace ExecList submission is created with - * NORMAL priority. Priority of a client for scheduler can be HIGH, - * while a preemption context can use CRITICAL. + * The kernel client to replace ExecList submission is created with + * NORMAL priority. Priority of a client for scheduler can be HIGH, + * while a preemption context can use CRITICAL. * @ctx: the context that owns the client (we use the default render - * context) + * context) * * Return: An intel_guc_client object if success, else NULL. */ @@ -973,7 +980,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv, spin_lock_init(&client->wq_lock); ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, - GFP_KERNEL); + GFP_KERNEL); if (ret < 0) goto err_client; @@ -1042,7 +1049,8 @@ static void guc_client_free(struct intel_guc_client *client) /* FIXME: in many cases, by the time we get here the GuC has been * reset, so we cannot destroy the doorbell properly. Ignore the - * error message for now */ + * error message for now + */ destroy_doorbell(client); guc_stage_desc_fini(client->guc, client); i915_gem_object_unpin_map(client->vma->obj); @@ -1178,7 +1186,8 @@ static int guc_ads_create(struct intel_guc *guc) * because our GuC shared data is there. */ blob->ads.golden_context_lrca = - guc_ggtt_offset(dev_priv->kernel_context->engine[RCS].state) + skipped_offset; + guc_ggtt_offset(dev_priv->kernel_context->engine[RCS].state) + + skipped_offset; /* * The GuC expects us to exclude the portion of the context image that @@ -1187,7 +1196,8 @@ static int guc_ads_create(struct intel_guc *guc) * dwords). Weird guc is weird. */ for_each_engine(engine, dev_priv, id) - blob->ads.eng_state_size[engine->guc_id] = engine->context_size - skipped_size; + blob->ads.eng_state_size[engine->guc_id] = + engine->context_size - skipped_size; base = guc_ggtt_offset(vma); blob->ads.scheduler_policies = base + ptr_offset(blob, policies); @@ -1317,7 +1327,9 @@ static void guc_interrupts_capture(struct drm_i915_private *dev_priv) enum intel_engine_id id; int irqs; - /* tell all command streamers to forward interrupts (but not vblank) to GuC */ + /* tell all command streamers to forward interrupts (but not vblank) + * to GuC + */ irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); for_each_engine(engine, dev_priv, id) I915_WRITE(RING_MODE_GEN7(engine), irqs); @@ -1435,7 +1447,9 @@ int intel_guc_submission_enable(struct intel_guc *guc) guc_interrupts_capture(dev_priv); for_each_engine(engine, dev_priv, id) { - struct intel_engine_execlists * const execlists = &engine->execlists; + struct intel_engine_execlists * const execlists = + &engine->execlists; + execlists->tasklet.func = guc_submission_tasklet; engine->park = guc_submission_park; engine->unpark = guc_submission_unpark; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h similarity index 98% rename from drivers/gpu/drm/i915/i915_guc_submission.h rename to drivers/gpu/drm/i915/intel_guc_submission.h index f8ae258c89d1..e901192ee469 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.h +++ b/drivers/gpu/drm/i915/intel_guc_submission.h @@ -67,6 +67,7 @@ struct intel_guc_client { u16 doorbell_id; unsigned long doorbell_offset; + /* Protects GuC client's WQ access */ spinlock_t wq_lock; /* Per-engine counts of GuC submissions */ u64 submissions[I915_NUM_ENGINES]; diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 2fb65eb7cf74..1e2a30a40ede 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -23,8 +23,8 @@ */ #include "intel_uc.h" +#include "intel_guc_submission.h" #include "i915_drv.h" -#include "i915_guc_submission.h" /* Reset GuC providing us with fresh state for both GuC and HuC. */ From 738a8143866c4cdbe4c008f1583a0c55338266b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 15 Nov 2017 22:04:42 +0200 Subject: [PATCH 240/260] drm/i915: Don't sanitize frame start delay if the pipe is off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid touching PIPECONF in intel_sanitize_crtc() unless the pipe is actually on. Should cure some unclaimed register accesses during reset, as we are rather cavalier in our approach to powerdomain management. We don't have to sanitize this if the pipe is off since we will overwrite the frame start delay anyway when turning the pipe on. v2: Amended commit message to implicate the reset path (Chris) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102249 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171115200442.15051-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 23aa7191024e..e6fcbc5abc75 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14812,7 +14812,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; /* Clear any frame start delays used for debugging left by the BIOS */ - if (!transcoder_is_dsi(cpu_transcoder)) { + if (crtc->active && !transcoder_is_dsi(cpu_transcoder)) { i915_reg_t reg = PIPECONF(cpu_transcoder); I915_WRITE(reg, From 4fe95b042d9d36744f6848069a92759c10dd2a0e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Nov 2017 15:25:58 +0000 Subject: [PATCH 241/260] drm/i915/selftests: exercise_ggtt may have nothing to do When operating on the live_ggtt we have to find a usuable hole for our test. It is possible for there to be no hole we can use, so initialise the err to 0 for the early exit. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171115152558.31252-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index d9560d8a6cc8..3dcf886a2802 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -958,7 +958,7 @@ static int exercise_ggtt(struct drm_i915_private *i915, u64 hole_start, hole_end, last = 0; struct drm_mm_node *node; IGT_TIMEOUT(end_time); - int err = -ENODEV; + int err = 0; mutex_lock(&i915->drm.struct_mutex); restart: From ec2f343e720eca5ec7015b4d2b5fc2703604835e Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 14 Nov 2017 11:47:53 -0800 Subject: [PATCH 242/260] drm/i915/cnl: Remove spurious central_freq. "Display software must leave this field at the default value. It no longer needs to be configured as part of PLL programming." We respect this already and we are setting up the default one line below: "DPLL_CFGCR1_CENTRAL_FREQ". Also we don't touch anywhere else this central_freq for cnl. So let's remove from the final write. No functional change. Only a clean-up patch. Cc: Manasi Navare Cc: Mika Kahola Cc: James Ausmus Signed-off-by: Rodrigo Vivi Reviewed-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20171114194759.24541-2-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index be74d4767c8a..61c684ac47af 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2265,7 +2265,6 @@ static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, DPLL_CFGCR1_QDIV_MODE(wrpll_params.qdiv_mode) | DPLL_CFGCR1_KDIV(wrpll_params.kdiv) | DPLL_CFGCR1_PDIV(wrpll_params.pdiv) | - wrpll_params.central_freq | DPLL_CFGCR1_CENTRAL_FREQ; memset(&crtc_state->dpll_hw_state, 0, From ecc2069a025e34b90033602a36d4cc171387a950 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 14 Nov 2017 11:47:54 -0800 Subject: [PATCH 243/260] drm/i915/cnl: Remove useless conversion. No functional change. Just starting the wrpll fixes with a clean-up to make units a bit more clear. Cc: Mika Kahola Cc: Manasi Navare Cc: James Ausmus Signed-off-by: Rodrigo Vivi Reviewed-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20171114194759.24541-3-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 61c684ac47af..db7afd314462 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2198,11 +2198,11 @@ static void cnl_wrpll_params_populate(struct skl_wrpll_params *params, uint32_t } static bool -cnl_ddi_calculate_wrpll(int clock /* in Hz */, +cnl_ddi_calculate_wrpll(int clock, struct drm_i915_private *dev_priv, struct skl_wrpll_params *wrpll_params) { - uint64_t afe_clock = clock * 5 / KHz(1); /* clocks in kHz */ + uint64_t afe_clock = clock * 5; unsigned int dco_min = 7998 * KHz(1); unsigned int dco_max = 10000 * KHz(1); unsigned int dco_mid = (dco_min + dco_max) / 2; @@ -2255,7 +2255,7 @@ static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, cfgcr0 = DPLL_CFGCR0_HDMI_MODE; - if (!cnl_ddi_calculate_wrpll(clock * 1000, dev_priv, &wrpll_params)) + if (!cnl_ddi_calculate_wrpll(clock, dev_priv, &wrpll_params)) return false; cfgcr0 |= DPLL_CFGCR0_DCO_FRACTION(wrpll_params.dco_fraction) | From 5eca81de88927f3e43fdd6e90e8fa5204be996b7 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 14 Nov 2017 11:47:55 -0800 Subject: [PATCH 244/260] drm/i915/cnl: Fix, simplify and unify wrpll variable sizes. - 64 bits is not needed for afe_clock now we don't convert that to Hz. - 16 bits is not enough for all dco stuff. - unsigned is not relevant/needed for all divisors values. Cc: Mika Kahola Cc: Manasi Navare Cc: James Ausmus Signed-off-by: Rodrigo Vivi Reviewed-by: Manasi Navare Date: Tue, 14 Nov 2017 15:42:23 -0800 Subject: [PATCH 245/260] drm/i915/cnl: Fix wrpll math for higher freqs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spec describe all values in MHz. We handle our clocks in KHz. This includes the best_dco_centrality that was forgot in the same unity as spec. Consequently we couldn't get a good divider for high frequenies. Hence HDMI 2.0 wasn't working. Spec tells 999999 for initial best_dco_centrality meaning the max value in MHz. Since we convert dco from MHz to KHz we also need to convert this initial best_doc_centrality to 999999000 or 999999999 or even better, to the max that its variable allow. This patch also replaces the use of "* KHz(1)" with the values directly on KHz to avoid future confusion. v2: Use U32_MAX instead of random 99999 as spec tells. (Ville). Cc: Ville Syrjälä Cc: Shashank Sharma Cc: Mika Kahola Cc: Manasi Navare Cc: James Ausmus Signed-off-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171114234223.10600-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index fba969cbda37..6cc12abdd39e 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2201,8 +2201,8 @@ cnl_ddi_calculate_wrpll(int clock, struct skl_wrpll_params *wrpll_params) { u32 afe_clock = clock * 5; - u32 dco_min = 7998 * KHz(1); - u32 dco_max = 10000 * KHz(1); + u32 dco_min = 7998000; + u32 dco_max = 10000000; u32 dco_mid = (dco_min + dco_max) / 2; static const int dividers[] = { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30, 32, 36, 40, @@ -2211,7 +2211,7 @@ cnl_ddi_calculate_wrpll(int clock, 84, 88, 90, 92, 96, 98, 100, 102, 3, 5, 7, 9, 15, 21 }; u32 dco, best_dco = 0, dco_centrality = 0; - u32 best_dco_centrality = 999999; + u32 best_dco_centrality = U32_MAX; /* Spec meaning of 999999 MHz */ int d, best_div = 0, pdiv = 0, qdiv = 0, kdiv = 0; for (d = 0; d < ARRAY_SIZE(dividers); d++) { From cacf6fe7c6c4ac9e748f41d0de4293b2aea5ae07 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 14 Nov 2017 11:47:57 -0800 Subject: [PATCH 246/260] drm/i915/cnl: Don't blindly replace qdiv. Accordingly to spec "If Kdiv != 2, then Qdiv must be 1." but we already handle qdiv values properly and this case here should be spurious. But instead of blindly replacing let's warn loudly instead. Because it means something was really wrong on initial setup. Cc: Mika Kahola Cc: Manasi Navare Cc: James Ausmus Signed-off-by: Rodrigo Vivi Reviewed-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20171114194759.24541-6-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 6cc12abdd39e..361b7102b602 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2184,8 +2184,7 @@ static void cnl_wrpll_params_populate(struct skl_wrpll_params *params, WARN(1, "Incorrect PDiv\n"); } - if (kdiv != 2) - qdiv = 1; + WARN_ON(kdiv != 2 && qdiv != 1); params->qdiv_ratio = qdiv; params->qdiv_mode = (qdiv == 1) ? 0 : 1; From 8a00678a096445716861d2c0bd2a7007ad373f4d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 15 Nov 2017 10:42:57 -0800 Subject: [PATCH 247/260] drm/i915/cnl: Simplify dco_fraction calculation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I confess I never fully understood that previous calculation, so this is not a "fix". But let's simplify this math so poor brains like mine can read and make some sense of it in the future. v2: Don't follow the spec since that gives invalid values and it is also confusing. This Ville's version is much simpler. v3: Use u64 cast instead of declaring a u64 dco. (Ville). Cc: Ville Syrjälä Cc: Mika Kahola Cc: Manasi Navare Cc: James Ausmus Signed-off-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171115184257.8633-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 361b7102b602..51c5ae4e9116 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2153,6 +2153,8 @@ static void cnl_wrpll_params_populate(struct skl_wrpll_params *params, u32 dco_freq, u32 ref_freq, int pdiv, int qdiv, int kdiv) { + u32 dco; + switch (kdiv) { case 1: params->kdiv = 1; @@ -2189,9 +2191,10 @@ static void cnl_wrpll_params_populate(struct skl_wrpll_params *params, params->qdiv_ratio = qdiv; params->qdiv_mode = (qdiv == 1) ? 0 : 1; - params->dco_integer = div_u64(dco_freq, ref_freq); - params->dco_fraction = div_u64((div_u64((uint64_t)dco_freq<<15, (uint64_t)ref_freq) - - ((uint64_t)params->dco_integer<<15)) * 0x8000, 0x8000); + dco = div_u64((u64)dco_freq << 15, ref_freq); + + params->dco_integer = dco >> 15; + params->dco_fraction = dco & 0x7fff; } static bool From 9672a69c761468ec920a8bc2442b8397b20578f8 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 15 Nov 2017 10:42:05 -0800 Subject: [PATCH 248/260] drm/i915/cnl: Extend HDMI 2.0 support to CNL. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting on GLK we support HDMI 2.0. So this patch only extend the work Shashank has made to GLK to CNL. v2: The version that compiles :/ v3: Invert order to newer || older platforms check. (Ville). Cc: Ville Syrjälä Cc: Paulo Zanoni Cc: Shashank Sharma Cc: Manasi Navare Signed-off-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171115184205.8104-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_hdmi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 2d95db64cdf2..9d5e72728475 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1235,7 +1235,7 @@ static int intel_hdmi_source_max_tmds_clock(struct intel_encoder *encoder) &dev_priv->vbt.ddi_port_info[encoder->port]; int max_tmds_clock; - if (IS_GEMINILAKE(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) max_tmds_clock = 594000; else if (INTEL_GEN(dev_priv) >= 8 || IS_HASWELL(dev_priv)) max_tmds_clock = 300000; @@ -1511,7 +1511,8 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, pipe_config->lane_count = 4; - if (scdc->scrambling.supported && IS_GEMINILAKE(dev_priv)) { + if (scdc->scrambling.supported && (INTEL_GEN(dev_priv) >= 10 || + IS_GEMINILAKE(dev_priv))) { if (scdc->scrambling.low_rates) pipe_config->hdmi_scrambling = true; @@ -2033,7 +2034,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, connector->doublescan_allowed = 0; connector->stereo_allowed = 1; - if (IS_GEMINILAKE(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) connector->ycbcr_420_allowed = true; intel_hdmi->ddc_bus = intel_hdmi_ddc_pin(dev_priv, port); From 55bd6bd75717ca67b790cc1a457cab92adcce148 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Thu, 16 Nov 2017 14:06:31 -0800 Subject: [PATCH 249/260] drm/i915/selftests: Add a GuC doorbells selftest The first test aims to check guc_init_doorbell_hw, changing the existing guc clients and doorbells state before calling it. The second test tries to create as many clients as it is currently possible (currently limited to max number of doorbells) and exercise the doorbell alloc/dealloc code. Since our usage mode require very few clients/doorbells, this code has been exercised very lightly and it's good to have a simple test for it. As reference, this test already helped identify the bug fixed by commit 7f1ea2ac3017 ("drm/i915/guc: Fix doorbell id selection"). v2: Extend number of clients; check for client allocation failure when number of doorbells is exceeded; validate client properties; reuse guc_init_doorbell_hw (Chris). v3: guc_init_doorbell_hw test added per Chris suggestion. v4: Try to explain why guc_init_doorbell_hw exist and comment some details in the subtest. v5: Remove redundant pr_info at the beginning of each subtest (Chris); rebase (s/i915_guc_client/intel_guc_client/). Signed-off-by: Michel Thierry Cc: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171116220632.1909-1-michel.thierry@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_guc_submission.c | 4 + .../drm/i915/selftests/i915_live_selftests.h | 1 + drivers/gpu/drm/i915/selftests/intel_guc.c | 367 ++++++++++++++++++ 3 files changed, 372 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/intel_guc.c diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 9c9e6edfd5be..cbf5a96f5806 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -1475,3 +1475,7 @@ void intel_guc_submission_disable(struct intel_guc *guc) guc_clients_destroy(guc); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_guc.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 54a73534b37e..1b750e92dd08 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -19,3 +19,4 @@ selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(contexts, i915_gem_context_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) +selftest(guc, intel_guc_live_selftest) diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c new file mode 100644 index 000000000000..f10029e18820 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -0,0 +1,367 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +/* max doorbell number + negative test for each client type */ +#define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM) + +struct intel_guc_client *clients[ATTEMPTS]; + +static bool available_dbs(struct intel_guc *guc, u32 priority) +{ + unsigned long offset; + unsigned long end; + u16 id; + + /* first half is used for normal priority, second half for high */ + offset = 0; + end = GUC_NUM_DOORBELLS / 2; + if (priority <= GUC_CLIENT_PRIORITY_HIGH) { + offset = end; + end += offset; + } + + id = find_next_zero_bit(guc->doorbell_bitmap, end, offset); + if (id < end) + return true; + + return false; +} + +static int check_all_doorbells(struct intel_guc *guc) +{ + u16 db_id; + + pr_info_once("Max number of doorbells: %d", GUC_NUM_DOORBELLS); + for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) { + if (!doorbell_ok(guc, db_id)) { + pr_err("doorbell %d, not ok\n", db_id); + return -EIO; + } + } + + return 0; +} + +/* + * Basic client sanity check, handy to validate create_clients. + */ +static int validate_client(struct intel_guc_client *client, + int client_priority, + bool is_preempt_client) +{ + struct drm_i915_private *dev_priv = guc_to_i915(client->guc); + struct i915_gem_context *ctx_owner = is_preempt_client ? + dev_priv->preempt_context : dev_priv->kernel_context; + + if (client->owner != ctx_owner || + client->engines != INTEL_INFO(dev_priv)->ring_mask || + client->priority != client_priority || + client->doorbell_id == GUC_DOORBELL_INVALID) + return -EINVAL; + else + return 0; +} + +/* + * Check that guc_init_doorbell_hw is doing what it should. + * + * During GuC submission enable, we create GuC clients and their doorbells, + * but after resetting the microcontroller (resume & gpu reset), these + * GuC clients are still around, but the status of their doorbells may be + * incorrect. This is the reason behind validating that the doorbells status + * expected by the driver matches what the GuC/HW have. + */ +static int igt_guc_init_doorbell_hw(void *args) +{ + struct drm_i915_private *dev_priv = args; + struct intel_guc *guc; + DECLARE_BITMAP(db_bitmap_bk, GUC_NUM_DOORBELLS); + int i, err = 0; + + GEM_BUG_ON(!HAS_GUC(dev_priv)); + mutex_lock(&dev_priv->drm.struct_mutex); + + guc = &dev_priv->guc; + if (!guc) { + pr_err("No guc object!\n"); + err = -EINVAL; + goto unlock; + } + + err = check_all_doorbells(guc); + if (err) + goto unlock; + + /* + * Get rid of clients created during driver load because the test will + * recreate them. + */ + guc_clients_destroy(guc); + if (guc->execbuf_client || guc->preempt_client) { + pr_err("guc_clients_destroy lied!\n"); + err = -EINVAL; + goto unlock; + } + + err = guc_clients_create(guc); + if (err) { + pr_err("Failed to create clients\n"); + goto unlock; + } + + err = validate_client(guc->execbuf_client, + GUC_CLIENT_PRIORITY_KMD_NORMAL, false); + if (err) { + pr_err("execbug client validation failed\n"); + goto out; + } + + err = validate_client(guc->preempt_client, + GUC_CLIENT_PRIORITY_KMD_HIGH, true); + if (err) { + pr_err("preempt client validation failed\n"); + goto out; + } + + /* each client should have received a doorbell during alloc */ + if (!has_doorbell(guc->execbuf_client) || + !has_doorbell(guc->preempt_client)) { + pr_err("guc_clients_create didn't create doorbells\n"); + err = -EINVAL; + goto out; + } + + /* + * Basic test - an attempt to reallocate a valid doorbell to the + * client it is currently assigned should not cause a failure. + */ + err = guc_init_doorbell_hw(guc); + if (err) + goto out; + + /* + * Negative test - a client with no doorbell (invalid db id). + * Each client gets a doorbell when it is created, after destroying + * the doorbell, the db id is changed to GUC_DOORBELL_INVALID and the + * firmware will reject any attempt to allocate a doorbell with an + * invalid id (db has to be reserved before allocation). + */ + destroy_doorbell(guc->execbuf_client); + if (has_doorbell(guc->execbuf_client)) { + pr_err("destroy db did not work\n"); + err = -EINVAL; + goto out; + } + + err = guc_init_doorbell_hw(guc); + if (err != -EIO) { + pr_err("unexpected (err = %d)", err); + goto out; + } + + if (!available_dbs(guc, guc->execbuf_client->priority)) { + pr_err("doorbell not available when it should\n"); + err = -EIO; + goto out; + } + + /* clean after test */ + err = create_doorbell(guc->execbuf_client); + if (err) { + pr_err("recreate doorbell failed\n"); + goto out; + } + + /* + * Negative test - doorbell_bitmap out of sync, will trigger a few of + * WARN_ON(!doorbell_ok(guc, db_id)) but that's ok as long as the + * doorbells from our clients don't fail. + */ + bitmap_copy(db_bitmap_bk, guc->doorbell_bitmap, GUC_NUM_DOORBELLS); + for (i = 0; i < GUC_NUM_DOORBELLS; i++) + if (i % 2) + test_and_change_bit(i, guc->doorbell_bitmap); + + err = guc_init_doorbell_hw(guc); + if (err) { + pr_err("out of sync doorbell caused an error\n"); + goto out; + } + + /* restore 'correct' db bitmap */ + bitmap_copy(guc->doorbell_bitmap, db_bitmap_bk, GUC_NUM_DOORBELLS); + err = guc_init_doorbell_hw(guc); + if (err) { + pr_err("restored doorbell caused an error\n"); + goto out; + } + +out: + /* + * Leave clean state for other test, plus the driver always destroy the + * clients during unload. + */ + guc_clients_destroy(guc); + guc_clients_create(guc); +unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + return err; +} + +/* + * Create as many clients as number of doorbells. Note that there's already + * client(s)/doorbell(s) created during driver load, but this test creates + * its own and do not interact with the existing ones. + */ +static int igt_guc_doorbells(void *arg) +{ + struct drm_i915_private *dev_priv = arg; + struct intel_guc *guc; + int i, err = 0; + u16 db_id; + + GEM_BUG_ON(!HAS_GUC(dev_priv)); + mutex_lock(&dev_priv->drm.struct_mutex); + + guc = &dev_priv->guc; + if (!guc) { + pr_err("No guc object!\n"); + err = -EINVAL; + goto unlock; + } + + err = check_all_doorbells(guc); + if (err) + goto unlock; + + for (i = 0; i < ATTEMPTS; i++) { + clients[i] = guc_client_alloc(dev_priv, + INTEL_INFO(dev_priv)->ring_mask, + i % GUC_CLIENT_PRIORITY_NUM, + dev_priv->kernel_context); + + if (!clients[i]) { + pr_err("[%d] No guc client\n", i); + err = -EINVAL; + goto out; + } + + if (IS_ERR(clients[i])) { + if (PTR_ERR(clients[i]) != -ENOSPC) { + pr_err("[%d] unexpected error\n", i); + err = PTR_ERR(clients[i]); + goto out; + } + + if (available_dbs(guc, i % GUC_CLIENT_PRIORITY_NUM)) { + pr_err("[%d] non-db related alloc fail\n", i); + err = -EINVAL; + goto out; + } + + /* expected, ran out of dbs for this client type */ + continue; + } + + /* + * The check below is only valid because we keep a doorbell + * assigned during the whole life of the client. + */ + if (clients[i]->stage_id >= GUC_NUM_DOORBELLS) { + pr_err("[%d] more clients than doorbells (%d >= %d)\n", + i, clients[i]->stage_id, GUC_NUM_DOORBELLS); + err = -EINVAL; + goto out; + } + + err = validate_client(clients[i], + i % GUC_CLIENT_PRIORITY_NUM, false); + if (err) { + pr_err("[%d] client_alloc sanity check failed!\n", i); + err = -EINVAL; + goto out; + } + + db_id = clients[i]->doorbell_id; + + /* + * Client alloc gives us a doorbell, but we want to exercise + * this ourselves (this resembles guc_init_doorbell_hw) + */ + destroy_doorbell(clients[i]); + if (clients[i]->doorbell_id != GUC_DOORBELL_INVALID) { + pr_err("[%d] destroy db did not work!\n", i); + err = -EINVAL; + goto out; + } + + err = __reserve_doorbell(clients[i]); + if (err) { + pr_err("[%d] Failed to reserve a doorbell\n", i); + goto out; + } + + __update_doorbell_desc(clients[i], clients[i]->doorbell_id); + err = __create_doorbell(clients[i]); + if (err) { + pr_err("[%d] Failed to create a doorbell\n", i); + goto out; + } + + /* doorbell id shouldn't change, we are holding the mutex */ + if (db_id != clients[i]->doorbell_id) { + pr_err("[%d] doorbell id changed (%d != %d)\n", + i, db_id, clients[i]->doorbell_id); + err = -EINVAL; + goto out; + } + + err = check_all_doorbells(guc); + if (err) + goto out; + } + +out: + for (i = 0; i < ATTEMPTS; i++) + if (!IS_ERR_OR_NULL(clients[i])) + guc_client_free(clients[i]); +unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + return err; +} + +int intel_guc_live_selftest(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_guc_init_doorbell_hw), + SUBTEST(igt_guc_doorbells), + }; + + if (!i915_modparams.enable_guc_submission) + return 0; + + return i915_subtests(tests, dev_priv); +} From 290b20a6d34acc188aec01669c210d731cc0ec15 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Nov 2017 21:56:55 +0000 Subject: [PATCH 250/260] drm/i915: Add might_sleep() check to wait_for() We should long past the time of trying to use wait_for() from inside atomic contexts, so add a might_sleep() check to prevent misuse. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171114215655.4849-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_drv.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index e9b66e0cb647..645c5e8ee9f0 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -47,14 +47,11 @@ * contexts. Note that it's important that we check the condition again after * having timed out, since the timeout could be due to preemption or similar and * we've never had a chance to check the condition before the timeout. - * - * TODO: When modesetting has fully transitioned to atomic, the below - * drm_can_sleep() can be removed and in_atomic()/!in_atomic() asserts - * added. */ #define _wait_for(COND, US, W) ({ \ unsigned long timeout__ = jiffies + usecs_to_jiffies(US) + 1; \ int ret__; \ + might_sleep(); \ for (;;) { \ bool expired__ = time_after(jiffies, timeout__); \ if (COND) { \ @@ -65,11 +62,7 @@ ret__ = -ETIMEDOUT; \ break; \ } \ - if ((W) && drm_can_sleep()) { \ - usleep_range((W), (W)*2); \ - } else { \ - cpu_relax(); \ - } \ + usleep_range((W), (W) * 2); \ } \ ret__; \ }) From 41729bf2248bc8593e5103d43974079cc269524c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Nov 2017 17:35:20 +0000 Subject: [PATCH 251/260] drm/i915: Mark the userptr invalidate workqueue as WQ_MEM_RECLAIM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 21cc6431e0c2 ("drm/i915: Mark the userptr invalidate workqueue as WQ_MEM_RECLAIM") tried to fixup the check_flush_dependency warning for hitting i915_gem_userptr_mn_invalidate_range_start from within the shrinker, but I failed to notice userptr has 2 similarly named workqueues. I marked up i915-userptr-acquire as WQ_MEM_RECLAIM whereas we only wait upon i915-userptr-release from inside the reclaim paths. [62530.869510] workqueue: PF_MEMALLOC task 7983(gem_shrink) is flushing !WQ_MEM_RECLAIM i915-userptr-release: (null) [62530.869515] ------------[ cut here ]------------ [62530.869519] WARNING: CPU: 1 PID: 7983 at kernel/workqueue.c:2434 check_flush_dependency+0x7f/0x110 [62530.869519] Modules linked in: pegasus mii ip6table_filter ip6_tables bnep iptable_filter snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic binfmt_misc nls_iso8859_1 intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_intel snd_hda_codec kvm_intel snd_hda_core snd_hwdep kvm snd_pcm irqbypass snd_seq_midi snd_seq_midi_event snd_rawmidi crct10dif_pclmul crc32_pclmul 8250_dw ghash_clmulni_intel snd_seq pcbc snd_seq_device snd_timer btusb aesni_intel btrtl btbcm aes_x86_64 iwlwifi btintel crypto_simd glue_helper cryptd bluetooth snd intel_cstate input_leds idma64 intel_rapl_perf ecdh_generic serio_raw soundcore cfg80211 wmi_bmof virt_dma intel_lpss_pci intel_lpss acpi_als kfifo_buf industrialio winbond_cir soc_button_array rc_core spidev tpm_crb intel_hid acpi_pad mac_hid sparse_keymap [62530.869546] parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid i915 i2c_algo_bit prime_numbers drm_kms_helper syscopyarea e1000e sysfillrect sysimgblt fb_sys_fops ahci ptp pps_core libahci drm wmi video i2c_hid hid [62530.869557] CPU: 1 PID: 7983 Comm: gem_shrink Tainted: G U W L 4.14.0-rc8-drm-tip-ww45-commit-1342299+ #1 [62530.869558] Hardware name: Intel Corporation CoffeeLake Client Platform/CoffeeLake H DDR4 RVP, BIOS CNLSFWR1.R00.X098.A00.1707301945 07/30/2017 [62530.869559] task: ffffa1049dbeec80 task.stack: ffffae7d05c44000 [62530.869560] RIP: 0010:check_flush_dependency+0x7f/0x110 [62530.869561] RSP: 0018:ffffae7d05c473a0 EFLAGS: 00010286 [62530.869562] RAX: 000000000000006e RBX: ffffa1049540f400 RCX: ffffffffa3e55788 [62530.869562] RDX: 0000000000000000 RSI: 0000000000000092 RDI: 0000000000000202 [62530.869563] RBP: ffffae7d05c473c0 R08: 000000000000006e R09: 000000000038bb0e [62530.869563] R10: 0000000000000000 R11: 000000000000006e R12: ffffa1049dbeec80 [62530.869564] R13: 0000000000000000 R14: 0000000000000000 R15: ffffae7d05c473e0 [62530.869565] FS: 00007f621b129880(0000) GS:ffffa1050b240000(0000) knlGS:0000000000000000 [62530.869566] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [62530.869566] CR2: 00007f6214400000 CR3: 0000000353a17003 CR4: 00000000003606e0 [62530.869567] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [62530.869567] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [62530.869568] Call Trace: [62530.869570] flush_workqueue+0x115/0x3d0 [62530.869573] ? wake_up_process+0x15/0x20 [62530.869596] i915_gem_userptr_mn_invalidate_range_start+0x12f/0x160 [i915] [62530.869614] ? i915_gem_userptr_mn_invalidate_range_start+0x12f/0x160 [i915] [62530.869616] __mmu_notifier_invalidate_range_start+0x55/0x80 [62530.869618] try_to_unmap_one+0x791/0x8b0 [62530.869620] ? call_rwsem_down_read_failed+0x18/0x30 [62530.869622] rmap_walk_anon+0x10b/0x260 [62530.869624] rmap_walk+0x48/0x60 [62530.869625] try_to_unmap+0x93/0xf0 [62530.869626] ? page_remove_rmap+0x2a0/0x2a0 [62530.869627] ? page_not_mapped+0x20/0x20 [62530.869629] ? page_get_anon_vma+0x90/0x90 [62530.869630] ? invalid_mkclean_vma+0x20/0x20 [62530.869631] migrate_pages+0x946/0xaa0 [62530.869633] ? __ClearPageMovable+0x10/0x10 [62530.869635] ? isolate_freepages_block+0x3c0/0x3c0 [62530.869636] compact_zone+0x22f/0x970 [62530.869638] compact_zone_order+0xa3/0xd0 [62530.869640] try_to_compact_pages+0x1a5/0x2a0 [62530.869641] ? try_to_compact_pages+0x1a5/0x2a0 [62530.869643] __alloc_pages_direct_compact+0x50/0x110 [62530.869644] __alloc_pages_slowpath+0x4da/0xf30 [62530.869646] __alloc_pages_nodemask+0x262/0x280 [62530.869648] alloc_pages_vma+0x165/0x1e0 [62530.869649] shmem_alloc_hugepage+0xd0/0x130 [62530.869651] ? __radix_tree_insert+0x45/0x230 [62530.869652] ? __vm_enough_memory+0x29/0x130 [62530.869654] shmem_alloc_and_acct_page+0x10d/0x1e0 [62530.869655] shmem_getpage_gfp+0x426/0xc00 [62530.869657] shmem_fault+0xa0/0x1e0 [62530.869659] ? file_update_time+0x60/0x110 [62530.869660] __do_fault+0x1e/0xc0 [62530.869661] __handle_mm_fault+0xa35/0x1170 [62530.869662] handle_mm_fault+0xcc/0x1c0 [62530.869664] __do_page_fault+0x262/0x4f0 [62530.869666] do_page_fault+0x2e/0xe0 [62530.869667] page_fault+0x22/0x30 [62530.869668] RIP: 0033:0x404335 [62530.869669] RSP: 002b:00007fff7829e420 EFLAGS: 00010216 [62530.869670] RAX: 00007f6210400000 RBX: 0000000000000004 RCX: 0000000000b80000 [62530.869670] RDX: 0000000000002e01 RSI: 0000000000008000 RDI: 0000000000000004 [62530.869671] RBP: 0000000000000019 R08: 0000000000000002 R09: 0000000000000000 [62530.869671] R10: 0000000000000559 R11: 0000000000000246 R12: 0000000008000000 [62530.869672] R13: 00000000004042f0 R14: 0000000000000004 R15: 000000000000007e [62530.869673] Code: 00 8b b0 18 05 00 00 48 8d 8b b0 00 00 00 48 8d 90 c0 06 00 00 4d 89 f0 48 c7 c7 40 c0 c8 a3 c6 05 68 c5 e8 00 01 e8 c2 68 04 00 <0f> ff 4d 85 ed 74 18 49 8b 45 20 48 8b 70 08 8b 86 00 01 00 00 [62530.869691] ---[ end trace 01e01ad0ff5781f8 ]--- Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103739 Fixes: 21cc6431e0c2 ("drm/i915: Mark the userptr invalidate workqueue as WQ_MEM_RECLAIM") Signed-off-by: Chris Wilson Cc: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20171114173520.8829-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_userptr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index e26b23171b56..ccde12d9e5f5 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -172,7 +172,9 @@ i915_mmu_notifier_create(struct mm_struct *mm) spin_lock_init(&mn->lock); mn->mn.ops = &i915_gem_userptr_notifier; mn->objects = RB_ROOT_CACHED; - mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0); + mn->wq = alloc_workqueue("i915-userptr-release", + WQ_UNBOUND | WQ_MEM_RECLAIM, + 0); if (mn->wq == NULL) { kfree(mn); return ERR_PTR(-ENOMEM); @@ -827,7 +829,7 @@ int i915_gem_init_userptr(struct drm_i915_private *dev_priv) dev_priv->mm.userptr_wq = alloc_workqueue("i915-userptr-acquire", - WQ_HIGHPRI | WQ_MEM_RECLAIM, + WQ_HIGHPRI | WQ_UNBOUND, 0); if (!dev_priv->mm.userptr_wq) return -ENOMEM; From 33a49868e5d8d98f01b41f902dd487530cd56b5e Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 13 Nov 2017 15:40:43 +0100 Subject: [PATCH 252/260] drm/i915: Enable FIFO underrun reporting after initial fastset, v4. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The firmware may have set up the pipe correctly, but the FIFO underrun and CRC interrupts are likely not enabled. This resulted in debugfs_test.read_all_entries failing on haswell, because of a timeout when reading the crc debugfs entry. Solve this by enabling FIFO underrun reporting after the initial fastset, which lets interrupts be generated as expected. Changes since v1: - Always enable CPU FIFO underrun reporting for >GEN2, and handle GEN2 correctly. Changes since v2: - Remove unneeded HAS_DDI, simplify GEN2 case. Changes since v3: - Use intel_crtc_pch_transcoder to determine pch transcoder for underruns. (Ville) - Remove crtc->config dereference in intel_crtc_pch_transcoder. (Ville) Testcase: debugfs_test.read_all_entries Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171113144043.58658-1-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_display.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e6fcbc5abc75..5196a1683cd3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1872,8 +1872,6 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - WARN_ON(!crtc->config->has_pch_encoder); - if (HAS_PCH_LPT(dev_priv)) return PIPE_A; else @@ -12940,6 +12938,7 @@ out: static void intel_finish_crtc_commit(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { + struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_crtc_state->state); @@ -12947,6 +12946,20 @@ static void intel_finish_crtc_commit(struct drm_crtc *crtc, intel_atomic_get_new_crtc_state(old_intel_state, intel_crtc); intel_pipe_update_end(new_crtc_state); + + if (new_crtc_state->update_pipe && + !needs_modeset(&new_crtc_state->base) && + old_crtc_state->mode.private_flags & I915_MODE_FLAG_INHERITED) { + if (!IS_GEN2(dev_priv)) + intel_set_cpu_fifo_underrun_reporting(dev_priv, intel_crtc->pipe, true); + + if (new_crtc_state->has_pch_encoder) { + enum pipe pch_transcoder = + intel_crtc_pch_transcoder(intel_crtc); + + intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); + } + } } /** From 93313538c15341f44e8e879aa071663795f07bdf Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 10 Nov 2017 12:34:59 +0100 Subject: [PATCH 253/260] drm/i915: Pass idle crtc_state to intel_dp_sink_crc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IPS can only be enabled if the primary plane is visible, so first make sure sw state matches hw state by waiting for hw_done. After this pass crtc_state to intel_dp_sink_crc() so that can be used, instead of using legacy pointers. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171110113503.16253-7-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_debugfs.c | 17 +++++++++++++++-- drivers/gpu/drm/i915/intel_dp.c | 23 +++++++++++++---------- drivers/gpu/drm/i915/intel_drv.h | 3 ++- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ff8f508a7661..df3852c02a35 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2746,6 +2746,7 @@ static int i915_sink_crc(struct seq_file *m, void *data) for_each_intel_connector_iter(connector, &conn_iter) { struct drm_crtc *crtc; struct drm_connector_state *state; + struct intel_crtc_state *crtc_state; if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) continue; @@ -2764,12 +2765,24 @@ retry: if (ret) goto err; - if (!crtc->state->active) + crtc_state = to_intel_crtc_state(crtc->state); + if (!crtc_state->base.active) continue; + /* + * We need to wait for all crtc updates to complete, to make + * sure any pending modesets and plane updates are completed. + */ + if (crtc_state->base.commit) { + ret = wait_for_completion_interruptible(&crtc_state->base.commit->hw_done); + + if (ret) + goto err; + } + intel_dp = enc_to_intel_dp(state->best_encoder); - ret = intel_dp_sink_crc(intel_dp, crc); + ret = intel_dp_sink_crc(intel_dp, crtc_state, crc); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index c9c416389d0e..74c7cd655db4 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3861,11 +3861,12 @@ intel_dp_configure_mst(struct intel_dp *intel_dp) intel_dp->is_mst); } -static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp) +static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state, bool disable_wa) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); u8 buf; int ret = 0; int count = 0; @@ -3901,15 +3902,17 @@ static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp) } out: - hsw_enable_ips(intel_crtc); + if (disable_wa) + hsw_enable_ips(intel_crtc); return ret; } -static int intel_dp_sink_crc_start(struct intel_dp *intel_dp) +static int intel_dp_sink_crc_start(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); u8 buf; int ret; @@ -3923,7 +3926,7 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp) return -EIO; if (buf & DP_TEST_SINK_START) { - ret = intel_dp_sink_crc_stop(intel_dp); + ret = intel_dp_sink_crc_stop(intel_dp, crtc_state, false); if (ret) return ret; } @@ -3940,16 +3943,16 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp) return 0; } -int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc) +int intel_dp_sink_crc(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, u8 *crc) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); u8 buf; int count, ret; int attempts = 6; - ret = intel_dp_sink_crc_start(intel_dp); + ret = intel_dp_sink_crc_start(intel_dp, crtc_state); if (ret) return ret; @@ -3977,7 +3980,7 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc) } stop: - intel_dp_sink_crc_stop(intel_dp); + intel_dp_sink_crc_stop(intel_dp, crtc_state, true); return ret; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 645c5e8ee9f0..225392fe6762 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1531,7 +1531,8 @@ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode); void intel_dp_encoder_reset(struct drm_encoder *encoder); void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder); void intel_dp_encoder_destroy(struct drm_encoder *encoder); -int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc); +int intel_dp_sink_crc(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state, u8 *crc); bool intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state); From 199ea381d9887bc8b1720176c443e203d9664cd3 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 10 Nov 2017 12:35:00 +0100 Subject: [PATCH 254/260] drm/i915: Pass crtc_state to ips toggle functions, v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes since v1: - Only pass crtc_state, not crtc. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171110113503.16253-8-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_color.c | 4 ++-- drivers/gpu/drm/i915/intel_display.c | 24 ++++++++++++++---------- drivers/gpu/drm/i915/intel_dp.c | 6 +++--- drivers/gpu/drm/i915/intel_drv.h | 4 ++-- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index b8315bca852b..aa66e952a95d 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -370,7 +370,7 @@ static void haswell_load_luts(struct drm_crtc_state *crtc_state) */ if (IS_HASWELL(dev_priv) && intel_crtc_state->ips_enabled && (intel_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT)) { - hsw_disable_ips(intel_crtc); + hsw_disable_ips(intel_crtc_state); reenable_ips = true; } @@ -380,7 +380,7 @@ static void haswell_load_luts(struct drm_crtc_state *crtc_state) i9xx_load_luts(crtc_state); if (reenable_ips) - hsw_enable_ips(intel_crtc); + hsw_enable_ips(intel_crtc_state); } static void bdw_load_degamma_lut(struct drm_crtc_state *state) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5196a1683cd3..3b3dec1e6640 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4864,8 +4864,9 @@ static void ironlake_pfit_enable(struct intel_crtc *crtc) } } -void hsw_enable_ips(struct intel_crtc *crtc) +void hsw_enable_ips(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -4903,12 +4904,13 @@ void hsw_enable_ips(struct intel_crtc *crtc) } } -void hsw_disable_ips(struct intel_crtc *crtc) +void hsw_disable_ips(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (!crtc->config->ips_enabled) + if (!crtc_state->ips_enabled) return; assert_plane_enabled(dev_priv, crtc->plane); @@ -4956,7 +4958,8 @@ static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc) * completely hide the primary plane. */ static void -intel_post_enable_primary(struct drm_crtc *crtc) +intel_post_enable_primary(struct drm_crtc *crtc, + const struct intel_crtc_state *new_crtc_state) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -4969,7 +4972,7 @@ intel_post_enable_primary(struct drm_crtc *crtc) * when going from primary only to sprite only and vice * versa. */ - hsw_enable_ips(intel_crtc); + hsw_enable_ips(new_crtc_state); /* * Gen2 reports pipe underruns whenever all planes are disabled. @@ -4988,7 +4991,8 @@ intel_post_enable_primary(struct drm_crtc *crtc) /* FIXME move all this to pre_plane_update() with proper state tracking */ static void -intel_pre_disable_primary(struct drm_crtc *crtc) +intel_pre_disable_primary(struct drm_crtc *crtc, + const struct intel_crtc_state *old_crtc_state) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -5010,7 +5014,7 @@ intel_pre_disable_primary(struct drm_crtc *crtc) * when going from primary only to sprite only and vice * versa. */ - hsw_disable_ips(intel_crtc); + hsw_disable_ips(old_crtc_state); } /* FIXME get rid of this and use pre_plane_update */ @@ -5022,7 +5026,7 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; - intel_pre_disable_primary(crtc); + intel_pre_disable_primary(crtc, to_intel_crtc_state(crtc->state)); /* * Vblank time updates from the shadow to live plane control register @@ -5066,7 +5070,7 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) if (primary_state->base.visible && (needs_modeset(&pipe_config->base) || !old_primary_state->base.visible)) - intel_post_enable_primary(&crtc->base); + intel_post_enable_primary(&crtc->base, pipe_config); } } @@ -5095,7 +5099,7 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, if (old_primary_state->base.visible && (modeset || !primary_state->base.visible)) - intel_pre_disable_primary(&crtc->base); + intel_pre_disable_primary(&crtc->base, old_crtc_state); } /* diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 74c7cd655db4..bc61f38b131d 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3903,7 +3903,7 @@ static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp, out: if (disable_wa) - hsw_enable_ips(intel_crtc); + hsw_enable_ips(crtc_state); return ret; } @@ -3931,11 +3931,11 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp, return ret; } - hsw_disable_ips(intel_crtc); + hsw_disable_ips(crtc_state); if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK, buf | DP_TEST_SINK_START) < 0) { - hsw_enable_ips(intel_crtc); + hsw_enable_ips(crtc_state); return -EIO; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 225392fe6762..69aab324aaa1 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1485,8 +1485,8 @@ bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, int target_clock, int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); bool intel_crtc_active(struct intel_crtc *crtc); -void hsw_enable_ips(struct intel_crtc *crtc); -void hsw_disable_ips(struct intel_crtc *crtc); +void hsw_enable_ips(const struct intel_crtc_state *crtc_state); +void hsw_disable_ips(const struct intel_crtc_state *crtc_state); enum intel_display_power_domain intel_port_to_power_domain(enum port port); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config); From 5b9489cb8ee23b41ab5a4cca6652511fa252bad8 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 15 Nov 2017 17:31:56 +0100 Subject: [PATCH 255/260] drm/i915: Calculate vlv/chv intermediate watermarks correctly, v3. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The watermarks it should calculate against are the old optimal watermarks. The currently active crtc watermarks are pure fiction, and are invalid in case of a nonblocking modeset, page flip enabling/disabling planes or any other reason. When the crtc is disabled or during a modeset the intermediate watermarks don't need to be programmed separately, and could be directly assigned to the optimal watermarks. CXSR must always be disabled in the intermediate case for modesets, else we get a WARN for vblank wait timeout. Also rename crtc_state to new_crtc_state, to distinguish it from the old state. Changes since v1: - Use intel_atomic_get_old_crtc_state. (ville) Changes since v2: - Always unset cxsr during modeset. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171115163157.14372-1-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_pm.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8c69ec9eb6ee..f904bf73dbd6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2026,16 +2026,27 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, static int vlv_compute_intermediate_wm(struct drm_device *dev, struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) + struct intel_crtc_state *new_crtc_state) { - struct vlv_wm_state *intermediate = &crtc_state->wm.vlv.intermediate; - const struct vlv_wm_state *optimal = &crtc_state->wm.vlv.optimal; - const struct vlv_wm_state *active = &crtc->wm.active.vlv; + struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate; + const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal; + struct intel_atomic_state *intel_state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(intel_state, crtc); + const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal; int level; + if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) { + *intermediate = *optimal; + + intermediate->cxsr = false; + goto out; + } + intermediate->num_levels = min(optimal->num_levels, active->num_levels); intermediate->cxsr = optimal->cxsr && active->cxsr && - !crtc_state->disable_cxsr; + !new_crtc_state->disable_cxsr; for (level = 0; level < intermediate->num_levels; level++) { enum plane_id plane_id; @@ -2054,12 +2065,13 @@ static int vlv_compute_intermediate_wm(struct drm_device *dev, vlv_invalidate_wms(crtc, intermediate, level); +out: /* * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0) - crtc_state->wm.need_postvbl_update = true; + new_crtc_state->wm.need_postvbl_update = true; return 0; } From 248c2435cbd75ad098130dda819b2973591d6f6d Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 15 Nov 2017 17:31:57 +0100 Subject: [PATCH 256/260] drm/i915: Calculate g4x intermediate watermarks correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The watermarks it should calculate against are the old optimal watermarks. The currently active crtc watermarks are pure fiction, and are invalid in case of a nonblocking modeset, page flip enabling/disabling planes or any other reason. When the crtc is disabled or during a modeset the intermediate watermarks don't need to be programmed separately, and could be directly assigned to the optimal watermarks. CXSR must always be disabled in the intermediate case for modesets, else we get a WARN for vblank wait timeout. Also rename crtc_state to new_crtc_state, to distinguish it from the old state. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171115163157.14372-2-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_pm.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f904bf73dbd6..3c55e4026331 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1406,17 +1406,29 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) static int g4x_compute_intermediate_wm(struct drm_device *dev, struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) + struct intel_crtc_state *new_crtc_state) { - struct g4x_wm_state *intermediate = &crtc_state->wm.g4x.intermediate; - const struct g4x_wm_state *optimal = &crtc_state->wm.g4x.optimal; - const struct g4x_wm_state *active = &crtc->wm.active.g4x; + struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate; + const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal; + struct intel_atomic_state *intel_state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(intel_state, crtc); + const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal; enum plane_id plane_id; + if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) { + *intermediate = *optimal; + + intermediate->cxsr = false; + intermediate->hpll_en = false; + goto out; + } + intermediate->cxsr = optimal->cxsr && active->cxsr && - !crtc_state->disable_cxsr; + !new_crtc_state->disable_cxsr; intermediate->hpll_en = optimal->hpll_en && active->hpll_en && - !crtc_state->disable_cxsr; + !new_crtc_state->disable_cxsr; intermediate->fbc_en = optimal->fbc_en && active->fbc_en; for_each_plane_id_on_crtc(crtc, plane_id) { @@ -1458,12 +1470,13 @@ static int g4x_compute_intermediate_wm(struct drm_device *dev, WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) && intermediate->fbc_en && intermediate->hpll_en); +out: /* * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0) - crtc_state->wm.need_postvbl_update = true; + new_crtc_state->wm.need_postvbl_update = true; return 0; } From 0cfecb7c4b9b45ed1776162e132b43f92564f3f4 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Thu, 16 Nov 2017 17:08:25 -0800 Subject: [PATCH 257/260] Revert "drm/i915: Display WA #1133 WaFbcSkipSegments:cnl, glk" This reverts commit 8f067837c4b713ce2e69be95af7b2a5eb3bd7de8. HSD says "WA withdrawn. It was causing corruption with some images. WA is not strictly necessary since this bug just causes loss of FBC compression with some sizes and images, but doesn't break anything." Fixes: 8f067837c4b7 ("drm/i915: Display WA #1133 WaFbcSkipSegments:cnl, glk") Cc: Rodrigo Vivi Signed-off-by: Radhakrishna Sripada Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171117010825.23118-1-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 3 --- drivers/gpu/drm/i915/intel_pm.c | 12 ------------ 2 files changed, 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 107e2d7c9fba..96c80fa0fcac 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2985,9 +2985,6 @@ enum i915_power_well_id { #define ILK_DPFC_CHICKEN _MMIO(0x43224) #define ILK_DPFC_DISABLE_DUMMY0 (1<<8) #define ILK_DPFC_NUKE_ON_ANY_MODIFICATION (1<<23) -#define GLK_SKIP_SEG_EN (1<<12) -#define GLK_SKIP_SEG_COUNT_MASK (3<<10) -#define GLK_SKIP_SEG_COUNT(x) ((x)<<10) #define ILK_FBC_RT_BASE _MMIO(0x2128) #define ILK_FBC_RT_VALID (1<<0) #define SNB_FBC_FRONT_BUFFER (1<<1) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3c55e4026331..4d2cd432f739 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -121,7 +121,6 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) static void glk_init_clock_gating(struct drm_i915_private *dev_priv) { - u32 val; gen9_init_clock_gating(dev_priv); /* @@ -141,11 +140,6 @@ static void glk_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(CHICKEN_MISC_2, val); } - /* Display WA #1133: WaFbcSkipSegments:glk */ - val = I915_READ(ILK_DPFC_CHICKEN); - val &= ~GLK_SKIP_SEG_COUNT_MASK; - val |= GLK_SKIP_SEG_EN | GLK_SKIP_SEG_COUNT(1); - I915_WRITE(ILK_DPFC_CHICKEN, val); } static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) @@ -8503,12 +8497,6 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) val |= SARBUNIT_CLKGATE_DIS; I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val); - - /* Display WA #1133: WaFbcSkipSegments:cnl */ - val = I915_READ(ILK_DPFC_CHICKEN); - val &= ~GLK_SKIP_SEG_COUNT_MASK; - val |= GLK_SKIP_SEG_EN | GLK_SKIP_SEG_COUNT(1); - I915_WRITE(ILK_DPFC_CHICKEN, val); } static void cfl_init_clock_gating(struct drm_i915_private *dev_priv) From 223c73a3667a5afe33245d423f867f4c4b1b39ea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Nov 2017 16:29:45 +0000 Subject: [PATCH 258/260] drm/i915/selftests: Report ENOMEM clearly for an allocation failure If we can not run the drunk_hole test because we couldn't allocate the memory for the permutation array (even after we tried trimming the size), report a clear ENOMEM. Similary, if we are asked to operate on a hole too small for ourselves, make it skip quietly. v2: Avoid malloc(0) since that returns ZERO_SIZE_PTR not NULL. v3: Fixup similar construction for lowlevel_hole v4: Use u64 >> 1 to avoid 64b div. Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171117101732.4335-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171117162945.16390-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 3dcf886a2802..6491cf0a4f46 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -216,13 +216,21 @@ static int lowlevel_hole(struct drm_i915_private *i915, hole_size = (hole_end - hole_start) >> size; if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) hole_size = KMALLOC_MAX_SIZE / sizeof(u32); - count = hole_size; - do { - count >>= 1; - order = i915_random_order(count, &prng); - } while (!order && count); - if (!order) + count = hole_size >> 1; + if (!count) { + pr_debug("%s: hole is too small [%llx - %llx] >> %d: %lld\n", + __func__, hole_start, hole_end, size, hole_size); break; + } + + do { + order = i915_random_order(count, &prng); + if (order) + break; + } while (count >>= 1); + if (!count) + return -ENOMEM; + GEM_BUG_ON(!order); GEM_BUG_ON(count * BIT_ULL(size) > vm->total); GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end); @@ -704,13 +712,21 @@ static int drunk_hole(struct drm_i915_private *i915, hole_size = (hole_end - hole_start) >> size; if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) hole_size = KMALLOC_MAX_SIZE / sizeof(u32); - count = hole_size; - do { - count >>= 1; - order = i915_random_order(count, &prng); - } while (!order && count); - if (!order) + count = hole_size >> 1; + if (!count) { + pr_debug("%s: hole is too small [%llx - %llx] >> %d: %lld\n", + __func__, hole_start, hole_end, size, hole_size); break; + } + + do { + order = i915_random_order(count, &prng); + if (order) + break; + } while (count >>= 1); + if (!count) + return -ENOMEM; + GEM_BUG_ON(!order); /* Ignore allocation failures (i.e. don't report them as * a test failure) as we are purposefully allocating very From 6a7a6a982a22091b9d67cb3ff33d7782494aa2f8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Nov 2017 10:26:35 +0000 Subject: [PATCH 259/260] drm/i915: Add a policy note for removing workarounds Rodrigo gave a persuasive argument for keeping workarounds: that they serve as a good guide for the bring up of the next generation. Not only do workarounds persist into the early revisions, they show where the workarounds were previously added to the code flow and sometimes the old workarounds have an explanation that give insight into their wider implications. Based on his suggestion, document the policy that we want to keep the workarounds from the current generation to guide the next. Older preproduction workarounds we still want to remove to keep the code clean. Signed-off-by: Chris Wilson Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171117102635.8689-1-chris@chris-wilson.co.uk Acked-by: Jani Nikula Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3423d873123a..8ea6ce7027d4 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -843,6 +843,11 @@ static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) * We don't keep the workarounds for pre-production hardware, so we expect our * driver to fail on these machines in one way or another. A little warning on * dmesg may help both the user and the bug triagers. + * + * Our policy for removing pre-production workarounds is to keep the + * current gen workarounds as a guide to the bring-up of the next gen + * (workarounds have a habit of persisting!). Anything older than that + * should be removed along with the complications they introduce. */ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) { From 010d118c20617021025a930bc8e90f371ab99da5 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 17 Nov 2017 14:47:02 -0800 Subject: [PATCH 260/260] drm/i915: Update DRIVER_DATE to 20171117 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2158a758a17d..36bb4927484a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,8 +79,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20171109" -#define DRIVER_TIMESTAMP 1510270206 +#define DRIVER_DATE "20171117" +#define DRIVER_TIMESTAMP 1510958822 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions