diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 122197737ef2..dd0ed941441a 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -1326,6 +1326,9 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start, ggtt->error_capture.size); + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + intel_uc_resume_mappings(>->uc); + ggtt->invalidate(ggtt); if (flush) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c index fb0984f875f9..b26f493f86fa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c @@ -90,7 +90,12 @@ void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc) { struct intel_gt *gt = gsc_uc_to_gt(gsc); - intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC); + /* + * GSC FW needs to be copied to a dedicated memory allocations for + * loading (see gsc->local), so we don't need to GGTT map the FW image + * itself into GGTT. + */ + intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC, false); INIT_WORK(&gsc->work, gsc_work); /* we can arrive here from i915_driver_early_probe for primary diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index f9bddaa876d9..a4203018d7a3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -163,7 +163,7 @@ void intel_guc_init_early(struct intel_guc *guc) struct intel_gt *gt = guc_to_gt(guc); struct drm_i915_private *i915 = gt->i915; - intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC); + intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, true); intel_guc_ct_init_early(&guc->ct); intel_guc_log_init_early(&guc->log); intel_guc_submission_init_early(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 04724ff56ded..268e036f8f28 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -276,7 +276,7 @@ void intel_huc_init_early(struct intel_huc *huc) struct drm_i915_private *i915 = huc_to_gt(huc)->i915; struct intel_gt *gt = huc_to_gt(huc); - intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC); + intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC, true); /* * we always init the fence as already completed, even if HuC is not diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index c8b9cbb7ba3a..1e7f5cc9d550 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -700,6 +700,12 @@ void intel_uc_suspend(struct intel_uc *uc) } } +static void __uc_resume_mappings(struct intel_uc *uc) +{ + intel_uc_fw_resume_mapping(&uc->guc.fw); + intel_uc_fw_resume_mapping(&uc->huc.fw); +} + static int __uc_resume(struct intel_uc *uc, bool enable_communication) { struct intel_guc *guc = &uc->guc; @@ -767,4 +773,6 @@ static const struct intel_uc_ops uc_ops_on = { .init_hw = __uc_init_hw, .fini_hw = __uc_fini_hw, + + .resume_mappings = __uc_resume_mappings, }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index d585524d94de..014bb7d83689 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -24,6 +24,7 @@ struct intel_uc_ops { void (*fini)(struct intel_uc *uc); int (*init_hw)(struct intel_uc *uc); void (*fini_hw)(struct intel_uc *uc); + void (*resume_mappings)(struct intel_uc *uc); }; struct intel_uc { @@ -114,6 +115,7 @@ intel_uc_ops_function(init, init, int, 0); intel_uc_ops_function(fini, fini, void, ); intel_uc_ops_function(init_hw, init_hw, int, 0); intel_uc_ops_function(fini_hw, fini_hw, void, ); +intel_uc_ops_function(resume_mappings, resume_mappings, void, ); #undef intel_uc_ops_function #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index dc5c96c503a9..31776c279f32 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -471,12 +471,14 @@ static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc * intel_uc_fw_init_early - initialize the uC object and select the firmware * @uc_fw: uC firmware * @type: type of uC + * @needs_ggtt_mapping: whether the FW needs to be GGTT mapped for loading * * Initialize the state of our uC object and relevant tracking and select the * firmware to fetch and load. */ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, - enum intel_uc_fw_type type) + enum intel_uc_fw_type type, + bool needs_ggtt_mapping) { struct intel_gt *gt = ____uc_fw_to_gt(uc_fw, type); struct drm_i915_private *i915 = gt->i915; @@ -490,6 +492,7 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, GEM_BUG_ON(uc_fw->file_selected.path); uc_fw->type = type; + uc_fw->needs_ggtt_mapping = needs_ggtt_mapping; if (HAS_GT_UC(i915)) { if (!validate_fw_table_type(i915, type)) { @@ -755,7 +758,7 @@ static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware ** if (err) return err; - if ((*fw)->size > INTEL_UC_RSVD_GGTT_PER_FW) { + if (uc_fw->needs_ggtt_mapping && (*fw)->size > INTEL_UC_RSVD_GGTT_PER_FW) { gt_err(gt, "%s firmware %s: size (%zuKB) exceeds max supported size (%uKB)\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, (*fw)->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K); @@ -940,29 +943,32 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj = uc_fw->obj; struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; - struct i915_vma_resource *dummy = &uc_fw->dummy; + struct i915_vma_resource *vma_res = &uc_fw->vma_res; u32 pte_flags = 0; - dummy->start = uc_fw_ggtt_offset(uc_fw); - dummy->node_size = obj->base.size; - dummy->bi.pages = obj->mm.pages; + if (!uc_fw->needs_ggtt_mapping) + return; + + vma_res->start = uc_fw_ggtt_offset(uc_fw); + vma_res->node_size = obj->base.size; + vma_res->bi.pages = obj->mm.pages; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); /* uc_fw->obj cache domains were not controlled across suspend */ if (i915_gem_object_has_struct_page(obj)) - drm_clflush_sg(dummy->bi.pages); + drm_clflush_sg(vma_res->bi.pages); if (i915_gem_object_is_lmem(obj)) pte_flags |= PTE_LM; if (ggtt->vm.raw_insert_entries) - ggtt->vm.raw_insert_entries(&ggtt->vm, dummy, + ggtt->vm.raw_insert_entries(&ggtt->vm, vma_res, i915_gem_get_pat_index(ggtt->vm.i915, I915_CACHE_NONE), pte_flags); else - ggtt->vm.insert_entries(&ggtt->vm, dummy, + ggtt->vm.insert_entries(&ggtt->vm, vma_res, i915_gem_get_pat_index(ggtt->vm.i915, I915_CACHE_NONE), pte_flags); @@ -970,11 +976,13 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw) { - struct drm_i915_gem_object *obj = uc_fw->obj; struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; - u64 start = uc_fw_ggtt_offset(uc_fw); + struct i915_vma_resource *vma_res = &uc_fw->vma_res; - ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size); + if (!vma_res->node_size) + return; + + ggtt->vm.clear_range(&ggtt->vm, vma_res->start, vma_res->node_size); } static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) @@ -991,7 +999,7 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); /* Set the source address for the uCode */ - offset = uc_fw_ggtt_offset(uc_fw); + offset = uc_fw->vma_res.start; GEM_BUG_ON(upper_32_bits(offset) & 0xFFFF0000); intel_uncore_write_fw(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset)); intel_uncore_write_fw(uncore, DMA_ADDR_0_HIGH, upper_32_bits(offset)); @@ -1065,9 +1073,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) return -ENOEXEC; /* Call custom loader */ - uc_fw_bind_ggtt(uc_fw); err = uc_fw_xfer(uc_fw, dst_offset, dma_flags); - uc_fw_unbind_ggtt(uc_fw); if (err) goto fail; @@ -1171,6 +1177,8 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw) goto out_unpin; } + uc_fw_bind_ggtt(uc_fw); + return 0; out_unpin: @@ -1181,6 +1189,7 @@ out: void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) { + uc_fw_unbind_ggtt(uc_fw); uc_fw_rsa_data_destroy(uc_fw); if (i915_gem_object_has_pinned_pages(uc_fw->obj)) @@ -1189,6 +1198,17 @@ void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE); } +void intel_uc_fw_resume_mapping(struct intel_uc_fw *uc_fw) +{ + if (!intel_uc_fw_is_available(uc_fw)) + return; + + if (!i915_gem_object_has_pinned_pages(uc_fw->obj)) + return; + + uc_fw_bind_ggtt(uc_fw); +} + /** * intel_uc_fw_cleanup_fetch - cleanup uC firmware * @uc_fw: uC firmware diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 6ba00e6b3975..2be9470eb712 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -99,13 +99,19 @@ struct intel_uc_fw { struct drm_i915_gem_object *obj; /** - * @dummy: A vma used in binding the uc fw to ggtt. We can't define this - * vma on the stack as it can lead to a stack overflow, so we define it - * here. Safe to have 1 copy per uc fw because the binding is single - * threaded as it done during driver load (inherently single threaded) - * or during a GT reset (mutex guarantees single threaded). + * @needs_ggtt_mapping: indicates whether the fw object needs to be + * pinned to ggtt. If true, the fw is pinned at init time and unpinned + * during driver unload. */ - struct i915_vma_resource dummy; + bool needs_ggtt_mapping; + + /** + * @vma_res: A vma resource used in binding the uc fw to ggtt. The fw is + * pinned in a reserved area of the ggtt (above the maximum address + * usable by GuC); therefore, we can't use the normal vma functions to + * do the pinning and we instead use this resource to do so. + */ + struct i915_vma_resource vma_res; struct i915_vma *rsa_data; u32 rsa_size; @@ -282,12 +288,14 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) } void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, - enum intel_uc_fw_type type); + enum intel_uc_fw_type type, + bool needs_ggtt_mapping); int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw); void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags); int intel_uc_fw_init(struct intel_uc_fw *uc_fw); void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); +void intel_uc_fw_resume_mapping(struct intel_uc_fw *uc_fw); size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len); int intel_uc_fw_mark_load_failed(struct intel_uc_fw *uc_fw, int err); void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p);