From b4ee9606378bb9520c94d8b96f0305c3696f5c29 Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Wed, 1 Mar 2023 10:21:06 -0600 Subject: [PATCH 01/18] drm/amdkfd: Fix BO offset for multi-VMA page migration svm_migrate_ram_to_vram migrates a prange from sys ram to vram. The prange may cross multiple vma. Need remember current dst vram offset in the TTM resource for each migration. v2: squash in warning fix (Alex) Signed-off-by: Xiaogang Chen Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index de8ce72344fc..391da6acb3e5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -289,7 +289,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) static int svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, - dma_addr_t *scratch) + dma_addr_t *scratch, uint64_t ttm_res_offset) { uint64_t npages = migrate->npages; struct device *dev = adev->dev; @@ -299,8 +299,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, uint64_t i, j; int r; - pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, - prange->last); + pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start, + prange->last, ttm_res_offset); src = scratch; dst = (uint64_t *)(scratch + npages); @@ -311,7 +311,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, goto out; } - amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT, + amdgpu_res_first(prange->ttm_res, ttm_res_offset, npages << PAGE_SHIFT, &cursor); for (i = j = 0; i < npages; i++) { struct page *spage; @@ -398,7 +398,7 @@ out: static long svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, - uint64_t end, uint32_t trigger) + uint64_t end, uint32_t trigger, uint64_t ttm_res_offset) { struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); uint64_t npages = (end - start) >> PAGE_SHIFT; @@ -451,7 +451,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, else pr_debug("0x%lx pages migrated\n", cpages); - r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch); + r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch, ttm_res_offset); migrate_vma_pages(&migrate); pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", @@ -499,6 +499,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long addr, start, end; struct vm_area_struct *vma; struct amdgpu_device *adev; + uint64_t ttm_res_offset; unsigned long cpages = 0; long r = 0; @@ -519,6 +520,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; + ttm_res_offset = prange->offset << PAGE_SHIFT; for (addr = start; addr < end;) { unsigned long next; @@ -528,13 +530,14 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, break; next = min(vma->vm_end, end); - r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger); + r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, ttm_res_offset); if (r < 0) { pr_debug("failed %ld to migrate\n", r); break; } else { cpages += r; } + ttm_res_offset += next - addr; addr = next; } From 8eeddc0d4200762063e1c66b9cc63afa7b24ebf0 Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Thu, 9 Mar 2023 17:44:55 -0600 Subject: [PATCH 02/18] drm/amdkfd: Get prange->offset after svm_range_vram_node_new During miration to vram prange->offset is valid after vram buffer is located, either use old one or allocate a new one. Move svm_range_vram_node_new before migrate for each vma to get valid prange->offset. v2: squash in warning fix Fixes: b4ee9606378b ("drm/amdkfd: Fix BO offset for multi-VMA page migration") Signed-off-by: Xiaogang Chen Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 391da6acb3e5..54933903bcb8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -305,12 +305,6 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, src = scratch; dst = (uint64_t *)(scratch + npages); - r = svm_range_vram_node_new(adev, prange, true); - if (r) { - dev_dbg(adev->dev, "fail %d to alloc vram\n", r); - goto out; - } - amdgpu_res_first(prange->ttm_res, ttm_res_offset, npages << PAGE_SHIFT, &cursor); for (i = j = 0; i < npages; i++) { @@ -391,7 +385,7 @@ out_free_vram_pages: migrate->dst[i + 3] = 0; } #endif -out: + return r; } @@ -520,6 +514,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; + + r = svm_range_vram_node_new(adev, prange, true); + if (r) { + dev_dbg(adev->dev, "fail %ld to alloc vram\n", r); + return r; + } ttm_res_offset = prange->offset << PAGE_SHIFT; for (addr = start; addr < end;) { @@ -543,6 +543,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) prange->actual_loc = best_loc; + else + svm_range_vram_node_free(prange); return r < 0 ? r : 0; } From b2ca5c5d416b4e72d1e9d0293fc720e2d525fd42 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Tue, 7 Mar 2023 16:19:02 -0800 Subject: [PATCH 03/18] drm/amdkfd: fix a potential double free in pqm_create_queue Set *q to NULL on errors, otherwise pqm_create_queue would free it again. Signed-off-by: Chia-I Wu Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 5137476ec18e..4236539d9f93 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -218,8 +218,8 @@ static int init_user_queue(struct process_queue_manager *pqm, return 0; cleanup: - if (dev->shared_resources.enable_mes) - uninit_queue(*q); + uninit_queue(*q); + *q = NULL; return retval; } From ab9bdb1213b4b40942af6a383f555d0c14874c1b Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 1 Mar 2023 10:53:03 +0800 Subject: [PATCH 04/18] drm/amd/pm: bump SMU 13.0.4 driver_if header version Align the SMU driver interface version with PMFW to suppress the version mismatch message on driver loading. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h index f77401709d83..2162ecd1057d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define PMFW_DRIVER_IF_VERSION 7 +#define PMFW_DRIVER_IF_VERSION 8 typedef struct { int32_t value; @@ -198,7 +198,7 @@ typedef struct { uint16_t SkinTemp; uint16_t DeviceState; uint16_t CurTemp; //[centi-Celsius] - uint16_t spare2; + uint16_t FilterAlphaValue; uint16_t AverageGfxclkFrequency; uint16_t AverageFclkFrequency; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 1c0ae2cb757b..f085cb97a620 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -29,7 +29,7 @@ #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x37 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x08 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x37 From a9386ee9681585794dbab95d4ce6826f73d19af6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82a=C5=BCej=20Szczygie=C5=82?= Date: Sun, 5 Mar 2023 00:44:31 +0100 Subject: [PATCH 05/18] drm/amd/pm: Fix sienna cichlid incorrect OD volage after resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always setup overdrive tables after resume. Preserve only some user-defined settings in user_overdrive_table if they're set. Copy restored user_overdrive_table into od_table to get correct values. On cold boot, BTC was triggered and GfxVfCurve was calibrated. We got VfCurve settings (a). On resuming back, BTC will be triggered again and GfxVfCurve will be recalibrated. VfCurve settings (b) got may be different from those of cold boot. So if we reuse those VfCurve settings (a) got on cold boot on suspend, we can run into discrepencies. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1897 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2276 Reviewed-by: Evan Quan Signed-off-by: Błażej Szczygieł Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 43 ++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 697e98a0a20a..75f18681e984 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2143,16 +2143,9 @@ static int sienna_cichlid_set_default_od_settings(struct smu_context *smu) (OverDriveTable_t *)smu->smu_table.boot_overdrive_table; OverDriveTable_t *user_od_table = (OverDriveTable_t *)smu->smu_table.user_overdrive_table; + OverDriveTable_t user_od_table_bak; int ret = 0; - /* - * For S3/S4/Runpm resume, no need to setup those overdrive tables again as - * - either they already have the default OD settings got during cold bootup - * - or they have some user customized OD settings which cannot be overwritten - */ - if (smu->adev->in_suspend) - return 0; - ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)boot_od_table, false); if (ret) { @@ -2163,7 +2156,23 @@ static int sienna_cichlid_set_default_od_settings(struct smu_context *smu) sienna_cichlid_dump_od_table(smu, boot_od_table); memcpy(od_table, boot_od_table, sizeof(OverDriveTable_t)); - memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t)); + + /* + * For S3/S4/Runpm resume, we need to setup those overdrive tables again, + * but we have to preserve user defined values in "user_od_table". + */ + if (!smu->adev->in_suspend) { + memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t)); + smu->user_dpm_profile.user_od = false; + } else if (smu->user_dpm_profile.user_od) { + memcpy(&user_od_table_bak, user_od_table, sizeof(OverDriveTable_t)); + memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t)); + user_od_table->GfxclkFmin = user_od_table_bak.GfxclkFmin; + user_od_table->GfxclkFmax = user_od_table_bak.GfxclkFmax; + user_od_table->UclkFmin = user_od_table_bak.UclkFmin; + user_od_table->UclkFmax = user_od_table_bak.UclkFmax; + user_od_table->VddGfxOffset = user_od_table_bak.VddGfxOffset; + } return 0; } @@ -2373,6 +2382,20 @@ static int sienna_cichlid_od_edit_dpm_table(struct smu_context *smu, return ret; } +static int sienna_cichlid_restore_user_od_settings(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTable_t *od_table = table_context->overdrive_table; + OverDriveTable_t *user_od_table = table_context->user_overdrive_table; + int res; + + res = smu_v11_0_restore_user_od_settings(smu); + if (res == 0) + memcpy(od_table, user_od_table, sizeof(OverDriveTable_t)); + + return res; +} + static int sienna_cichlid_run_btc(struct smu_context *smu) { int res; @@ -4400,7 +4423,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .set_default_od_settings = sienna_cichlid_set_default_od_settings, .od_edit_dpm_table = sienna_cichlid_od_edit_dpm_table, - .restore_user_od_settings = smu_v11_0_restore_user_od_settings, + .restore_user_od_settings = sienna_cichlid_restore_user_od_settings, .run_btc = sienna_cichlid_run_btc, .set_power_source = smu_v11_0_set_power_source, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, From d71e38df3b730a17ab6b25cabb2ccfe8a7f04385 Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Tue, 28 Feb 2023 18:48:41 +0800 Subject: [PATCH 06/18] drm/amdgpu/vcn: custom video info caps for sriov for sriov, we added a new flag to indicate av1 support, this will override the original caps info. Signed-off-by: Jane Jian Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 4 + drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 3 +- drivers/gpu/drm/amd/amdgpu/soc21.c | 103 ++++++++++++++++++-- 3 files changed, 99 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b9e9480448af..4f7bab52282a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -124,6 +124,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_PP_ONE_VF = (1 << 4), /* Indirect Reg Access enabled */ AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5), + /* AV1 Support MODE*/ + AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6), }; enum AMDGIM_REG_ACCESS_FLAG { @@ -322,6 +324,8 @@ static inline bool is_virtual_machine(void) ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug) #define amdgpu_sriov_is_normal(adev) \ ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug)) +#define amdgpu_sriov_is_av1_support(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 6c97148ca0ed..24d42d24e6a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -93,7 +93,8 @@ union amd_sriov_msg_feature_flags { uint32_t mm_bw_management : 1; uint32_t pp_one_vf_mode : 1; uint32_t reg_indirect_acc : 1; - uint32_t reserved : 26; + uint32_t av1_support : 1; + uint32_t reserved : 25; } flags; uint32_t all; }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 061793d390cc..c82b3a7ea5f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -102,6 +102,59 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 = .codec_array = vcn_4_0_0_video_codecs_decode_array_vcn1, }; +/* SRIOV SOC21, not const since data is controlled by host */ +static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn0[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn1[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, +}; + +static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn0 = { + .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0), + .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn0, +}; + +static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = { + .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1), + .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn1, +}; + +static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, +}; + +static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn0 = { + .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0), + .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn0, +}; + +static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn1 = { + .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1), + .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn1, +}; + static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { @@ -112,16 +165,31 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, case IP_VERSION(4, 0, 0): case IP_VERSION(4, 0, 2): case IP_VERSION(4, 0, 4): - if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) { - if (encode) - *codecs = &vcn_4_0_0_video_codecs_encode_vcn1; - else - *codecs = &vcn_4_0_0_video_codecs_decode_vcn1; + if (amdgpu_sriov_vf(adev)) { + if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) || + !amdgpu_sriov_is_av1_support(adev)) { + if (encode) + *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn1; + else + *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn1; + } else { + if (encode) + *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn0; + else + *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn0; + } } else { - if (encode) - *codecs = &vcn_4_0_0_video_codecs_encode_vcn0; - else - *codecs = &vcn_4_0_0_video_codecs_decode_vcn0; + if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)) { + if (encode) + *codecs = &vcn_4_0_0_video_codecs_encode_vcn1; + else + *codecs = &vcn_4_0_0_video_codecs_decode_vcn1; + } else { + if (encode) + *codecs = &vcn_4_0_0_video_codecs_encode_vcn0; + else + *codecs = &vcn_4_0_0_video_codecs_decode_vcn0; + } } return 0; default: @@ -730,8 +798,23 @@ static int soc21_common_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_get_irq(adev); + if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) || + !amdgpu_sriov_is_av1_support(adev)) { + amdgpu_virt_update_sriov_video_codec(adev, + sriov_vcn_4_0_0_video_codecs_encode_array_vcn1, + ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1), + sriov_vcn_4_0_0_video_codecs_decode_array_vcn1, + ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1)); + } else { + amdgpu_virt_update_sriov_video_codec(adev, + sriov_vcn_4_0_0_video_codecs_encode_array_vcn0, + ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0), + sriov_vcn_4_0_0_video_codecs_decode_array_vcn0, + ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0)); + } + } return 0; } From 9da050b0d9e04439d225a2ec3044af70cdfb3933 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 8 Mar 2023 13:37:24 -0800 Subject: [PATCH 07/18] drm/amdkfd: fix potential kgd_mem UAFs kgd_mem pointers returned by kfd_process_device_translate_handle are only guaranteed to be valid while p->mutex is held. As soon as the mutex is unlocked, another thread can free the BO. Signed-off-by: Chia-I Wu Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a0e30f21e12e..de310ed367ca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1312,14 +1312,14 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, args->n_success = i+1; } - mutex_unlock(&p->mutex); - err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; } + mutex_unlock(&p->mutex); + /* Flush TLBs after waiting for the page table updates to complete */ for (i = 0; i < args->n_devices; i++) { peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); @@ -1335,9 +1335,9 @@ get_process_device_data_failed: bind_process_to_device_failed: get_mem_obj_from_handle_failed: map_memory_to_gpu_failed: +sync_memory_failed: mutex_unlock(&p->mutex); copy_from_user_failed: -sync_memory_failed: kfree(devices_arr); return err; @@ -1351,6 +1351,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, void *mem; long err = 0; uint32_t *devices_arr = NULL, i; + bool flush_tlb; if (!args->n_devices) { pr_debug("Device IDs array empty\n"); @@ -1403,16 +1404,19 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, } args->n_success = i+1; } - mutex_unlock(&p->mutex); - if (kfd_flush_tlb_after_unmap(pdd->dev)) { + flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev); + if (flush_tlb) { err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; } + } + mutex_unlock(&p->mutex); + if (flush_tlb) { /* Flush TLBs after waiting for the page table updates to complete */ for (i = 0; i < args->n_devices; i++) { peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); @@ -1428,9 +1432,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, bind_process_to_device_failed: get_mem_obj_from_handle_failed: unmap_memory_from_gpu_failed: +sync_memory_failed: mutex_unlock(&p->mutex); copy_from_user_failed: -sync_memory_failed: kfree(devices_arr); return err; } From 728cefa53a36ba378ed4a7f31a0c08289687d824 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 17 Feb 2023 16:08:21 -0500 Subject: [PATCH 08/18] drm/amd/display: Fix HDCP failing to enable after suspend [Why] On resume some displays are not ready for HDCP, so they will fail if we start the hdcp authentintication too soon. Add a delay so that the displays can be ready before we start. NOTE: Previoulsy this delay was set to 3 seconds but it was causing issues with compliance, 2 seconds should enough for compliance and the s3 resume case. [How] Change the Delay to 2 seconds. Reviewed-by: Aurabindo Pillai Acked-by: Qingqing Zhuo Signed-off-by: Bhawanpreet Lakha Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 8e572f07ec47..4abfd2c9679f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -561,7 +561,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) link->dp.mst_enabled = config->mst_enabled; link->dp.usb4_enabled = config->usb4_enabled; display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION; - link->adjust.auth_delay = 0; + link->adjust.auth_delay = 2; link->adjust.hdcp1.disable = 0; conn_state = aconnector->base.state; From 3fadda5de8073e2cb65744803a6941736411d55b Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Thu, 9 Mar 2023 10:02:45 +0800 Subject: [PATCH 09/18] drm/amdgpu: move poll enabled/disable into non DC path Some amd asics having reliable hotplug support don't call drm_kms_helper_poll_init in driver init sequence. However, due to the unified suspend/resume path for all asics, because the output_poll_work->func is not set for these asics, a warning arrives when suspending. [ 90.656049] [ 90.656050] ? console_unlock+0x4d/0x100 [ 90.656053] ? __irq_work_queue_local+0x27/0x60 [ 90.656056] ? irq_work_queue+0x2b/0x50 [ 90.656057] ? __wake_up_klogd+0x40/0x60 [ 90.656059] __cancel_work_timer+0xed/0x180 [ 90.656061] drm_kms_helper_poll_disable.cold+0x1f/0x2c [drm_kms_helper] [ 90.656072] amdgpu_device_suspend+0x81/0x170 [amdgpu] [ 90.656180] amdgpu_pmops_runtime_suspend+0xb5/0x1b0 [amdgpu] [ 90.656269] pci_pm_runtime_suspend+0x61/0x1b0 drm_kms_helper_poll_enable/disable is valid when poll_init is called in amdgpu code, which is only used in non DC path. So move such codes into non-DC path code to get rid of such warnings. v1: introduce use_kms_poll flag in amdgpu as the poll stuff check v2: use dc_enabled as the flag to simply code v3: move code into non DC path instead of relying on any flag Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2411 Fixes: a4e771729a51 ("drm/probe_helper: sort out poll_running vs poll_enabled") Reported-by: Bert Karwatzki Suggested-by: Dmitry Baryshkov Suggested-by: Alex Deucher Signed-off-by: Guchun Chen Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c4a4e2fe6681..da5b0258a237 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4145,8 +4145,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) DRM_WARN("smart shift update failed\n"); - drm_kms_helper_poll_disable(dev); - if (fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); @@ -4243,8 +4241,6 @@ exit: if (fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false); - drm_kms_helper_poll_enable(dev); - amdgpu_ras_resume(adev); if (adev->mode_info.num_crtc) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 503f89a766c3..d60fe7eb5579 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1618,6 +1618,8 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) struct drm_connector_list_iter iter; int r; + drm_kms_helper_poll_disable(dev); + /* turn off display hw */ drm_modeset_lock_all(dev); drm_connector_list_iter_begin(dev, &iter); @@ -1694,6 +1696,8 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev) drm_modeset_unlock_all(dev); + drm_kms_helper_poll_enable(dev); + return 0; } From 751281c55579f0cb0e56c9797d4663f689909681 Mon Sep 17 00:00:00 2001 From: Benjamin Cheng Date: Sun, 12 Mar 2023 20:47:39 -0400 Subject: [PATCH 10/18] drm/amd/display: Write to correct dirty_rect When FB_DAMAGE_CLIPS are provided in a non-MPO scenario, the loop does not use the counter i. This causes the fill_dc_dity_rect() to always fill dirty_rects[0], causing graphical artifacts when a damage clip aware DRM client sends more than 1 damage clip. Instead, use the flip_addrs->dirty_rect_count which is incremented by fill_dc_dirty_rect() on a successful fill. Fixes: 30ebe41582d1 ("drm/amd/display: add FB_DAMAGE_CLIPS support") Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2453 Signed-off-by: Benjamin Cheng Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 009ef917dad4..32abbafd43fa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5105,9 +5105,9 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, for (; flip_addrs->dirty_rect_count < num_clips; clips++) fill_dc_dirty_rect(new_plane_state->plane, - &dirty_rects[i], clips->x1, - clips->y1, clips->x2 - clips->x1, - clips->y2 - clips->y1, + &dirty_rects[flip_addrs->dirty_rect_count], + clips->x1, clips->y1, + clips->x2 - clips->x1, clips->y2 - clips->y1, &flip_addrs->dirty_rect_count, false); return; From 45aa07fa832412f1de99194f37fd847915d7e0f6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Mar 2023 22:45:59 -0500 Subject: [PATCH 11/18] drm/amdgpu/nv: fix codec array for SR_IOV Copy paste error. Fixes: 384334120b66 ("drm/amdgpu/nv: don't expose AV1 if VCN0 is harvested") Reported-by: Abaci Robot Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4454 Cc: Jiapeng Chong Acked-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 855d390c41de..22e25ca285f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1055,8 +1055,8 @@ static int nv_common_late_init(void *handle) amdgpu_virt_update_sriov_video_codec(adev, sriov_sc_video_codecs_encode_array, ARRAY_SIZE(sriov_sc_video_codecs_encode_array), - sriov_sc_video_codecs_decode_array_vcn1, - ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn1)); + sriov_sc_video_codecs_decode_array_vcn0, + ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn0)); } } From 542a56e8eb4467ae654eefab31ff194569db39cd Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Sun, 12 Mar 2023 13:51:00 -0300 Subject: [PATCH 12/18] drm/amdgpu/vcn: Disable indirect SRAM on Vangogh broken BIOSes The VCN firmware loading path enables the indirect SRAM mode if it's advertised as supported. We might have some cases of FW issues that prevents this mode to working properly though, ending-up in a failed probe. An example below, observed in the Steam Deck: [...] [drm] failed to load ucode VCN0_RAM(0x3A) [drm] psp gfx command LOAD_IP_FW(0x6) failed and response status is (0xFFFF0000) amdgpu 0000:04:00.0: [drm:amdgpu_ring_test_helper [amdgpu]] *ERROR* ring vcn_dec_0 test failed (-110) [drm:amdgpu_device_init.cold [amdgpu]] *ERROR* hw_init of IP block failed -110 amdgpu 0000:04:00.0: amdgpu: amdgpu_device_ip_init failed amdgpu 0000:04:00.0: amdgpu: Fatal error during GPU init [...] Disabling the VCN block circumvents this, but it's a very invasive workaround that turns off the entire feature. So, let's add a quirk on VCN loading that checks for known problematic BIOSes on Vangogh, so we can proactively disable the indirect SRAM mode and allow the HW proper probe and VCN IP block to work fine. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2385 Fixes: 82132ecc5432 ("drm/amdgpu: enable Vangogh VCN indirect sram mode") Cc: stable@vger.kernel.org Cc: James Zhu Cc: Leo Liu Signed-off-by: Guilherme G. Piccoli Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 25217b05c0ea..e7974de8b035 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -114,6 +115,24 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; + /* + * Some Steam Deck's BIOS versions are incompatible with the + * indirect SRAM mode, leading to amdgpu being unable to get + * properly probed (and even potentially crashing the kernel). + * Hence, check for these versions here - notice this is + * restricted to Vangogh (Deck's APU). + */ + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 2)) { + const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION); + + if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) || + !strncmp("F7A0114", bios_ver, 7))) { + adev->vcn.indirect_sram = false; + dev_info(adev->dev, + "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver); + } + } + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); From 709671ffb15dcd1b4f6afe2a9d8c67c7c4ead4a1 Mon Sep 17 00:00:00 2001 From: Saaem Rizvi Date: Mon, 27 Feb 2023 18:55:07 -0500 Subject: [PATCH 13/18] drm/amd/display: Remove OTG DIV register write for Virtual signals. [WHY] Hot plugging and then hot unplugging leads to k1 and k2 values to change, as signal is detected as a virtual signal on hot unplug. Writing these values to OTG_PIXEL_RATE_DIV register might cause primary display to blank (known hw bug). [HOW] No longer write k1 and k2 values to register if signal is virtual, we have safe guards in place in the case that k1 and k2 is unassigned so that an unknown value is not written to the register either. Cc: stable@vger.kernel.org Cc: Mario Limonciello Reviewed-by: Samson Tam Reviewed-by: Alvin Lee Acked-by: Qingqing Zhuo Signed-off-by: Saaem Rizvi Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 16f892125b6f..9d14045cccd6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -1104,7 +1104,7 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign *k2_div = PIXEL_RATE_DIV_BY_2; else *k2_div = PIXEL_RATE_DIV_BY_4; - } else if (dc_is_dp_signal(stream->signal) || dc_is_virtual_signal(stream->signal)) { + } else if (dc_is_dp_signal(stream->signal)) { if (two_pix_per_container) { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_2; From 56574f89dbd84004c3fd6485bcaafb5aa9b8be14 Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Thu, 3 Nov 2022 22:29:31 -0400 Subject: [PATCH 14/18] drm/amd/display: Do not set DRR on pipe Commit [WHY] Writing to DRR registers such as OTG_V_TOTAL_MIN on the same frame as a pipe commit can cause underflow. Cc: stable@vger.kernel.org Cc: Mario Limonciello Reviewed-by: Jun Lei Acked-by: Qingqing Zhuo Signed-off-by: Wesley Chalmers Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 3b4d4d68359b..df787fcf8e86 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -998,8 +998,5 @@ void dcn30_prepare_bandwidth(struct dc *dc, dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz); dcn20_prepare_bandwidth(dc, context); - - dc_dmub_srv_p_state_delegate(dc, - context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching, context); } From cbd6c1b17d3b42b7935526a86ad5f66838767d03 Mon Sep 17 00:00:00 2001 From: Cruise Hung Date: Thu, 2 Mar 2023 10:33:51 +0800 Subject: [PATCH 15/18] drm/amd/display: Fix DP MST sinks removal issue [Why] In USB4 DP tunneling, it's possible to have this scenario that the path becomes unavailable and CM tears down the path a little bit late. So, in this case, the HPD is high but fails to read any DPCD register. That causes the link connection type to be set to sst. And not all sinks are removed behind the MST branch. [How] Restore the link connection type if it fails to read DPCD register. Cc: stable@vger.kernel.org Cc: Mario Limonciello Reviewed-by: Wenjing Liu Acked-by: Qingqing Zhuo Signed-off-by: Cruise Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_detection.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 38216c789d77..f70025ef7b69 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -855,6 +855,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, struct dc_sink *prev_sink = NULL; struct dpcd_caps prev_dpcd_caps; enum dc_connection_type new_connection_type = dc_connection_none; + enum dc_connection_type pre_connection_type = link->type; const uint32_t post_oui_delay = 30; // 30ms DC_LOGGER_INIT(link->ctx->logger); @@ -957,6 +958,8 @@ static bool detect_link_and_local_sink(struct dc_link *link, } if (!detect_dp(link, &sink_caps, reason)) { + link->type = pre_connection_type; + if (prev_sink) dc_sink_release(prev_sink); return false; @@ -1244,11 +1247,16 @@ bool link_detect(struct dc_link *link, enum dc_detect_reason reason) bool is_delegated_to_mst_top_mgr = false; enum dc_connection_type pre_link_type = link->type; + DC_LOGGER_INIT(link->ctx->logger); + is_local_sink_detect_success = detect_link_and_local_sink(link, reason); if (is_local_sink_detect_success && link->local_sink) verify_link_capability(link, link->local_sink, reason); + DC_LOG_DC("%s: link_index=%d is_local_sink_detect_success=%d pre_link_type=%d link_type=%d\n", __func__, + link->link_index, is_local_sink_detect_success, pre_link_type, link->type); + if (is_local_sink_detect_success && link->local_sink && dc_is_dp_signal(link->local_sink->sink_signal) && link->dpcd_caps.is_mst_capable) From 7304ee979b6b6422f41a1312391a5e505fc29ccd Mon Sep 17 00:00:00 2001 From: Ayush Gupta Date: Thu, 2 Mar 2023 09:58:05 -0500 Subject: [PATCH 16/18] drm/amd/display: disconnect MPCC only on OTG change [Why] Framedrops are observed while playing Vp9 and Av1 10 bit video on 8k resolution using VSR while playback controls are disappeared/appeared [How] Now ODM 2 to 1 is disabled for 5k or greater resolutions on VSR. Cc: stable@vger.kernel.org Cc: Mario Limonciello Reviewed-by: Alvin Lee Acked-by: Qingqing Zhuo Signed-off-by: Ayush Gupta Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 74e50c09bb62..d024007f0f65 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1915,6 +1915,7 @@ int dcn32_populate_dml_pipes_from_context( bool subvp_in_use = false; uint8_t is_pipe_split_expected[MAX_PIPES] = {0}; struct dc_crtc_timing *timing; + bool vsr_odm_support = false; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); @@ -1932,12 +1933,15 @@ int dcn32_populate_dml_pipes_from_context( timing = &pipe->stream->timing; pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + vsr_odm_support = (res_ctx->pipe_ctx[i].stream->src.width >= 5120 && + res_ctx->pipe_ctx[i].stream->src.width > res_ctx->pipe_ctx[i].stream->dst.width); if (context->stream_count == 1 && context->stream_status[0].plane_count == 1 && !dc_is_hdmi_signal(res_ctx->pipe_ctx[i].stream->signal) && is_h_timing_divisible_by_2(res_ctx->pipe_ctx[i].stream) && pipe->stream->timing.pix_clk_100hz * 100 > DCN3_2_VMIN_DISPCLK_HZ && - dc->debug.enable_single_display_2to1_odm_policy) { + dc->debug.enable_single_display_2to1_odm_policy && + !vsr_odm_support) { //excluding 2to1 ODM combine on >= 5k vsr pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; } pipe_cnt++; From 20bc9f76b6a2455c6b54b91ae7634f147f64987f Mon Sep 17 00:00:00 2001 From: David Belanger Date: Tue, 28 Feb 2023 14:11:24 -0500 Subject: [PATCH 17/18] drm/amdkfd: Fixed kfd_process cleanup on module exit. Handle case when module is unloaded (kfd_exit) before a process space (mm_struct) is released. v2: Fixed potential race conditions by removing all kfd_process from the process table first, then working on releasing the resources. v3: Fixed loop element access / synchronization. Fixed extra empty lines. Signed-off-by: David Belanger Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 67 +++++++++++++++++++++--- 3 files changed, 62 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 09b966dc3768..aee2212e52f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -77,6 +77,7 @@ err_ioctl: static void kfd_exit(void) { + kfd_cleanup_processes(); kfd_debugfs_fini(); kfd_process_destroy_wq(); kfd_procfs_shutdown(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bfa30d12406b..7e4d992e48b3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -928,6 +928,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev); int kfd_process_create_wq(void); void kfd_process_destroy_wq(void); +void kfd_cleanup_processes(void); struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *task); struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 7acd55a814b2..4208e0f01064 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1167,6 +1167,17 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn) kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); } +static void kfd_process_notifier_release_internal(struct kfd_process *p) +{ + cancel_delayed_work_sync(&p->eviction_work); + cancel_delayed_work_sync(&p->restore_work); + + /* Indicate to other users that MM is no longer valid */ + p->mm = NULL; + + mmu_notifier_put(&p->mmu_notifier); +} + static void kfd_process_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { @@ -1181,17 +1192,22 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, return; mutex_lock(&kfd_processes_mutex); + /* + * Do early return if table is empty. + * + * This could potentially happen if this function is called concurrently + * by mmu_notifier and by kfd_cleanup_pocesses. + * + */ + if (hash_empty(kfd_processes_table)) { + mutex_unlock(&kfd_processes_mutex); + return; + } hash_del_rcu(&p->kfd_processes); mutex_unlock(&kfd_processes_mutex); synchronize_srcu(&kfd_processes_srcu); - cancel_delayed_work_sync(&p->eviction_work); - cancel_delayed_work_sync(&p->restore_work); - - /* Indicate to other users that MM is no longer valid */ - p->mm = NULL; - - mmu_notifier_put(&p->mmu_notifier); + kfd_process_notifier_release_internal(p); } static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { @@ -1200,6 +1216,43 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .free_notifier = kfd_process_free_notifier, }; +/* + * This code handles the case when driver is being unloaded before all + * mm_struct are released. We need to safely free the kfd_process and + * avoid race conditions with mmu_notifier that might try to free them. + * + */ +void kfd_cleanup_processes(void) +{ + struct kfd_process *p; + struct hlist_node *p_temp; + unsigned int temp; + HLIST_HEAD(cleanup_list); + + /* + * Move all remaining kfd_process from the process table to a + * temp list for processing. Once done, callback from mmu_notifier + * release will not see the kfd_process in the table and do early return, + * avoiding double free issues. + */ + mutex_lock(&kfd_processes_mutex); + hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) { + hash_del_rcu(&p->kfd_processes); + synchronize_srcu(&kfd_processes_srcu); + hlist_add_head(&p->kfd_processes, &cleanup_list); + } + mutex_unlock(&kfd_processes_mutex); + + hlist_for_each_entry_safe(p, p_temp, &cleanup_list, kfd_processes) + kfd_process_notifier_release_internal(p); + + /* + * Ensures that all outstanding free_notifier get called, triggering + * the release of the kfd_process struct. + */ + mmu_notifier_synchronize(); +} + static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) { unsigned long offset; From f3921a9a641483784448fb982b2eb738b383d9b9 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 13 Mar 2023 20:03:08 -0400 Subject: [PATCH 18/18] drm/amdgpu: Don't resume IOMMU after incomplete init Check kfd->init_complete in kgd2kfd_iommu_resume, consistent with other kgd2kfd calls. This should fix IOMMU errors on resume from suspend when KFD IOMMU initialization failed. Reported-by: Matt Fagnani Link: https://lore.kernel.org/r/4a3b225c-2ffd-e758-4de1-447375e34cad@bell.net/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=217170 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2454 Cc: Vasant Hegde Cc: Linux regression tracking (Thorsten Leemhuis) Cc: stable@vger.kernel.org Signed-off-by: Felix Kuehling Acked-by: Alex Deucher Tested-by: Matt Fagnani Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3de7f616a001..ec70a1658dc3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -59,6 +59,7 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size); static void kfd_gtt_sa_fini(struct kfd_dev *kfd); +static int kfd_resume_iommu(struct kfd_dev *kfd); static int kfd_resume(struct kfd_dev *kfd); static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) @@ -624,7 +625,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, svm_migrate_init(kfd->adev); - if (kgd2kfd_resume_iommu(kfd)) + if (kfd_resume_iommu(kfd)) goto device_iommu_error; if (kfd_resume(kfd)) @@ -772,6 +773,14 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) } int kgd2kfd_resume_iommu(struct kfd_dev *kfd) +{ + if (!kfd->init_complete) + return 0; + + return kfd_resume_iommu(kfd); +} + +static int kfd_resume_iommu(struct kfd_dev *kfd) { int err = 0;