From f83a30f466ebbd56355b1f65ec9bcd5087840ffc Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 26 Jul 2023 17:30:42 -0400 Subject: [PATCH] drm/xe: Fix an invalid locking wait context bug We cannot have spin locks around xe_irq_reset, since it will call the intel_display_power_is_enabled() function, and that needs a mutex lock. Hence causing the undesired "[ BUG: Invalid wait context ]" We cannot convert i915's power domain lock to spin lock due to the nested dependency of non-atomic context waits. So, let's move the xe_irq_reset functions from the critical area, while still ensuring that we are protecting the irq.enabled and ensuring the right serialization in the irq handlers. v2: On the first version, I had missed the fact that irq.enabled is checked on the xe/display glue layer, and that i915 display code is actually using the irq spin lock properly. So, this got changed to a version suggested by Matthew Auld. v3: do not use lockdep_assert for display glue. do not save restore irq from inside IRQ or we can get bogus irq restore warnings Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/463 Suggested-by: Matthew Auld Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index ca6353243326..69629be07de2 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -308,6 +308,13 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg) unsigned long intr_dw[2]; u32 identity[32]; + spin_lock(&xe->irq.lock); + if (!xe->irq.enabled) { + spin_unlock(&xe->irq.lock); + return IRQ_NONE; + } + spin_unlock(&xe->irq.lock); + master_ctl = xelp_intr_disable(xe); if (!master_ctl) { xelp_intr_enable(xe, false); @@ -366,6 +373,13 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) /* TODO: This really shouldn't be copied+pasted */ + spin_lock(&xe->irq.lock); + if (!xe->irq.enabled) { + spin_unlock(&xe->irq.lock); + return IRQ_NONE; + } + spin_unlock(&xe->irq.lock); + master_tile_ctl = dg1_intr_disable(xe); if (!master_tile_ctl) { dg1_intr_enable(xe, false); @@ -563,10 +577,14 @@ void xe_irq_shutdown(struct xe_device *xe) void xe_irq_suspend(struct xe_device *xe) { + int irq = to_pci_dev(xe->drm.dev)->irq; + spin_lock_irq(&xe->irq.lock); - xe->irq.enabled = false; - xe_irq_reset(xe); + xe->irq.enabled = false; /* no new irqs */ spin_unlock_irq(&xe->irq.lock); + + synchronize_irq(irq); /* flush irqs */ + xe_irq_reset(xe); /* turn irqs off */ } void xe_irq_resume(struct xe_device *xe) @@ -574,13 +592,15 @@ void xe_irq_resume(struct xe_device *xe) struct xe_gt *gt; int id; - spin_lock_irq(&xe->irq.lock); + /* + * lock not needed: + * 1. no irq will arrive before the postinstall + * 2. display is not yet resumed + */ xe->irq.enabled = true; xe_irq_reset(xe); - xe_irq_postinstall(xe); + xe_irq_postinstall(xe); /* turn irqs on */ for_each_gt(gt, xe, id) xe_irq_enable_hwe(gt); - - spin_unlock_irq(&xe->irq.lock); }