diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index b1f36c986f0d..f948a358f53e 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -35,7 +35,7 @@ #include "xe_wait_user_fence.h" #ifdef CONFIG_LOCKDEP -static struct lockdep_map xe_device_mem_access_lockdep_map = { +struct lockdep_map xe_device_mem_access_lockdep_map = { .name = "xe_device_mem_access_lockdep_map" }; #endif @@ -431,13 +431,13 @@ void xe_device_mem_access_get(struct xe_device *xe) * runtime_resume callback, lockdep should give us a nice splat. */ lock_map_acquire(&xe_device_mem_access_lockdep_map); + lock_map_release(&xe_device_mem_access_lockdep_map); xe_pm_runtime_get(xe); ref = atomic_inc_return(&xe->mem_access.ref); XE_WARN_ON(ref == S32_MAX); - lock_map_release(&xe_device_mem_access_lockdep_map); } void xe_device_mem_access_put(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 8b085ffdc5f8..593accb68281 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -16,6 +16,10 @@ struct xe_file; #include "xe_force_wake.h" #include "xe_macros.h" +#ifdef CONFIG_LOCKDEP +extern struct lockdep_map xe_device_mem_access_lockdep_map; +#endif + static inline struct xe_device *to_xe_device(const struct drm_device *dev) { return container_of(dev, struct xe_device, drm); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 04b995aa848f..cb2a00ea28e3 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -188,6 +188,29 @@ int xe_pm_runtime_suspend(struct xe_device *xe) /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); + /* + * The actual xe_device_mem_access_put() is always async underneath, so + * exactly where that is called should makes no difference to us. However + * we still need to be very careful with the locks that this callback + * acquires and the locks that are acquired and held by any callers of + * xe_device_mem_access_get(). We already have the matching annotation + * on that side, but we also need it here. For example lockdep should be + * able to tell us if the following scenario is in theory possible: + * + * CPU0 | CPU1 (kworker) + * lock(A) | + * | xe_pm_runtime_suspend() + * | lock(A) + * xe_device_mem_access_get() | + * + * This will clearly deadlock since rpm core needs to wait for + * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) + * on CPU0 which prevents CPU1 making forward progress. With the + * annotation here and in xe_device_mem_access_get() lockdep will see + * the potential lock inversion and give us a nice splat. + */ + lock_map_acquire(&xe_device_mem_access_lockdep_map); + if (xe->d3cold.allowed) { err = xe_bo_evict_all(xe); if (err) @@ -202,6 +225,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_irq_suspend(xe); out: + lock_map_release(&xe_device_mem_access_lockdep_map); xe_pm_write_callback_task(xe, NULL); return err; } @@ -215,6 +239,8 @@ int xe_pm_runtime_resume(struct xe_device *xe) /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); + lock_map_acquire(&xe_device_mem_access_lockdep_map); + /* * It can be possible that xe has allowed d3cold but other pcie devices * in gfx card soc would have blocked d3cold, therefore card has not @@ -250,6 +276,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) goto out; } out: + lock_map_release(&xe_device_mem_access_lockdep_map); xe_pm_write_callback_task(xe, NULL); return err; }