diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 14b15ecc5617..0897719751e9 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -335,6 +335,9 @@ struct xe_device { /** @allowed: Indicates if d3cold is a valid device state */ bool allowed; + /** @power_lost: Indicates if card has really lost power. */ + bool power_lost; + /** * @vram_threshold: * diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index d44537abf7da..ed90d738d673 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -844,3 +844,30 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) xe_guc_ct_print(&guc->ct, p, false); xe_guc_submit_print(guc, p); } + +/** + * xe_guc_in_reset() - Detect if GuC MIA is in reset. + * @guc: The GuC object + * + * This function detects runtime resume from d3cold by leveraging + * GUC_STATUS, GUC doesn't get reset during d3hot, + * it strictly to be called from RPM resume handler. + * + * Return: true if failed to get forcewake or GuC MIA is in Reset, + * otherwise false. + */ +bool xe_guc_in_reset(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 status; + int err; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return true; + + status = xe_mmio_read32(gt, GUC_STATUS); + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + + return status & GS_MIA_IN_RESET; +} diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 74a74051f354..f64f22e97169 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -35,6 +35,7 @@ void xe_guc_reset_wait(struct xe_guc *guc); void xe_guc_stop_prepare(struct xe_guc *guc); int xe_guc_stop(struct xe_guc *guc); int xe_guc_start(struct xe_guc *guc); +bool xe_guc_in_reset(struct xe_guc *guc); static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 6d04e570735a..ae6e1394ff31 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -818,8 +818,6 @@ static int xe_pci_runtime_idle(struct device *dev) * but maybe include some other conditions. So, before * we can re-enable the D3cold, we need to: * 1. rewrite the VRAM save / restore to avoid buffer object locks - * 2. at resume, detect if we really lost power and avoid memory - * restoration if we were only up to d3cold */ xe->d3cold.allowed = false; } diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 21964e4d09f8..f336aec7085d 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -16,6 +16,7 @@ #include "xe_device_sysfs.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_guc.h" #include "xe_irq.h" #include "xe_pcode.h" @@ -186,7 +187,15 @@ int xe_pm_runtime_resume(struct xe_device *xe) u8 id; int err; - if (xe->d3cold.allowed) { + /* + * It can be possible that xe has allowed d3cold but other pcie devices + * in gfx card soc would have blocked d3cold, therefore card has not + * really lost power. Detecting primary Gt power is sufficient. + */ + gt = xe_device_get_gt(xe, 0); + xe->d3cold.power_lost = xe_guc_in_reset(>->uc.guc); + + if (xe->d3cold.allowed && xe->d3cold.power_lost) { for_each_gt(gt, xe, id) { err = xe_pcode_init(gt); if (err) @@ -207,7 +216,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_resume(gt); - if (xe->d3cold.allowed) { + if (xe->d3cold.allowed && xe->d3cold.power_lost) { err = xe_bo_restore_user(xe); if (err) return err;