From 7bb91228291aa95bfee3b9d5710887673711c74c Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Wed, 7 Sep 2022 20:31:36 +0800 Subject: [PATCH 01/88] drm/amd/pm: disable BACO entry/exit completely on several sienna cichlid cards To avoid hardware intermittent failures. Signed-off-by: Guchun Chen Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 7ed4d4265797..74996a8fb671 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -369,6 +369,17 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu) smu_baco->platform_support = (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : false; + + /* + * Disable BACO entry/exit completely on below SKUs to + * avoid hardware intermittent failures. + */ + if (((adev->pdev->device == 0x73A1) && + (adev->pdev->revision == 0x00)) || + ((adev->pdev->device == 0x73BF) && + (adev->pdev->revision == 0xCF))) + smu_baco->platform_support = false; + } } From dd6aeb4e5f59984078275169986ff601f5d16385 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 8 Sep 2022 08:28:57 +0530 Subject: [PATCH 02/88] drm/amdgpu: Don't enable LTR if not supported As per PCIE Base Spec r4.0 Section 6.18 'Software must not enable LTR in an Endpoint unless the Root Complex and all intermediate Switches indicate support for LTR.' This fixes the Unsupported Request error reported through AER during ASPM enablement. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216455 The error was unnoticed before and got visible because of the commit referenced below. This doesn't fix anything in the commit below, rather fixes the issue in amdgpu exposed by the commit. The reference is only to associate this commit with below one so that both go together. Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()") Reported-by: Gustaw Smolarczyk Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 9 ++++++++- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 9 ++++++++- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 9 ++++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index b465baa26762..aa761ff3a5fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -380,6 +380,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, WREG32_PCIE(smnPCIE_LC_CNTL, data); } +#ifdef CONFIG_PCIEASPM static void nbio_v2_3_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; @@ -401,9 +402,11 @@ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); } +#endif static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) { +#ifdef CONFIG_PCIEASPM uint32_t def, data; def = data = RREG32_PCIE(smnPCIE_LC_CNTL); @@ -459,7 +462,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL6, data); - nbio_v2_3_program_ltr(adev); + /* Don't bother about LTR if LTR is not enabled + * in the path */ + if (adev->pdev->ltr_path) + nbio_v2_3_program_ltr(adev); def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; @@ -483,6 +489,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL3, data); +#endif } static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index f7f6ddebd3e4..37615a77287b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -282,6 +282,7 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } +#ifdef CONFIG_PCIEASPM static void nbio_v6_1_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; @@ -303,9 +304,11 @@ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); } +#endif static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) { +#ifdef CONFIG_PCIEASPM uint32_t def, data; def = data = RREG32_PCIE(smnPCIE_LC_CNTL); @@ -361,7 +364,10 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL6, data); - nbio_v6_1_program_ltr(adev); + /* Don't bother about LTR if LTR is not enabled + * in the path */ + if (adev->pdev->ltr_path) + nbio_v6_1_program_ltr(adev); def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; @@ -385,6 +391,7 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL3, data); +#endif } const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 11848d1e238b..19455a725939 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -673,6 +673,7 @@ struct amdgpu_nbio_ras nbio_v7_4_ras = { }; +#ifdef CONFIG_PCIEASPM static void nbio_v7_4_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; @@ -694,9 +695,11 @@ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); } +#endif static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) { +#ifdef CONFIG_PCIEASPM uint32_t def, data; if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4)) @@ -755,7 +758,10 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL6, data); - nbio_v7_4_program_ltr(adev); + /* Don't bother about LTR if LTR is not enabled + * in the path */ + if (adev->pdev->ltr_path) + nbio_v7_4_program_ltr(adev); def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; @@ -779,6 +785,7 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL3, data); +#endif } const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { From 542ab4917338d735e2dde6c8f4e042ac18ff5152 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 6 Sep 2022 15:01:49 -0400 Subject: [PATCH 03/88] drm/amdgpu: use dirty framebuffer helper Currently, we aren't handling DRM_IOCTL_MODE_DIRTYFB. So, use drm_atomic_helper_dirtyfb() as the dirty callback in the amdgpu_fb_funcs struct. Signed-off-by: Hamza Mahfooz Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index c20922a5af9f..5b09c8f4fe95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -496,6 +497,7 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { .destroy = drm_gem_fb_destroy, .create_handle = drm_gem_fb_create_handle, + .dirty = drm_atomic_helper_dirtyfb, }; uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, From a4a3798f0077a1584491574027b0067c13396d12 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Wed, 7 Sep 2022 14:13:54 -0400 Subject: [PATCH 04/88] drm/amdkfd: Fix CRIU restore op due to doorbell offset Recently introduced change to allocate doorbells only when the first queue is created or mapped for CPU / GPU access, did not consider Checkpoint Restore scenario completely. This fix allows the CRIU restore operation by extending the doorbell optimization to CRIU restore scenario. Fixes: 16f0013157bf ("drm/amdkfd: Allocate doorbells only when needed") Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 ++++++ drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 3 +++ drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 7 +++++++ 3 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 84da1a9ce37c..56f7307c21d2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2153,6 +2153,12 @@ static int criu_restore_devices(struct kfd_process *p, ret = PTR_ERR(pdd); goto exit; } + + if (!pdd->doorbell_index && + kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) { + ret = -ENOMEM; + goto exit; + } } /* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index b33798f89ef0..cd4e61bf0493 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -303,6 +303,9 @@ int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_inde if (r > 0) *doorbell_index = r; + if (r < 0) + pr_err("Failed to allocate process doorbells\n"); + return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 6e3e7f54381b..5137476ec18e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -857,6 +857,13 @@ int kfd_criu_restore_queue(struct kfd_process *p, ret = -EINVAL; goto exit; } + + if (!pdd->doorbell_index && + kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) { + ret = -ENOMEM; + goto exit; + } + /* data stored in this order: mqd, ctl_stack */ mqd = q_extra_data; ctl_stack = mqd + q_data->mqd_size; From 1ed1f6be6eb3daa8097d6419dde516c9854a8790 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 8 Sep 2022 09:36:18 -0400 Subject: [PATCH 05/88] drm/amd/amdgpu: update GC 10.3.0 pwrdec The 10.3 GC headers were missing most of the pwrdec block. This patch adds the registers and bits present in the 10.1 header but based on the contents of the 10.3 specs. Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../include/asic_reg/gc/gc_10_3_0_offset.h | 100 ++ .../include/asic_reg/gc/gc_10_3_0_sh_mask.h | 1459 ++++++++++++++++- 2 files changed, 1558 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h index 1115dfc6ae1f..3973110f149c 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h @@ -9800,18 +9800,118 @@ // addressBlock: gc_pwrdec // base address: 0x3c000 +#define mmCGTS_RD_CTRL_REG 0x5004 +#define mmCGTS_RD_CTRL_REG_BASE_IDX 1 +#define mmCGTS_RD_REG 0x5005 +#define mmCGTS_RD_REG_BASE_IDX 1 #define mmCGTS_TCC_DISABLE 0x5006 #define mmCGTS_TCC_DISABLE_BASE_IDX 1 #define mmCGTS_USER_TCC_DISABLE 0x5007 #define mmCGTS_USER_TCC_DISABLE_BASE_IDX 1 +#define mmCGTS_STATUS_REG 0x5008 +#define mmCGTS_STATUS_REG_BASE_IDX 1 +#define mmCGTT_SPI_CGTSSM_CLK_CTRL 0x5009 +#define mmCGTT_SPI_CGTSSM_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_SPI_PS_CLK_CTRL 0x507d +#define mmCGTT_SPI_PS_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_SPIS_CLK_CTRL 0x507e +#define mmCGTT_SPIS_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_SPI_CLK_CTRL 0x5080 +#define mmCGTT_SPI_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_PC_CLK_CTRL 0x5081 +#define mmCGTT_PC_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_BCI_CLK_CTRL 0x5082 +#define mmCGTT_BCI_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_VGT_CLK_CTRL 0x5084 +#define mmCGTT_VGT_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_IA_CLK_CTRL 0x5085 +#define mmCGTT_IA_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_WD_CLK_CTRL 0x5086 +#define mmCGTT_WD_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_GS_NGG_CLK_CTRL 0x5087 +#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_PA_CLK_CTRL 0x5088 +#define mmCGTT_PA_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_SC_CLK_CTRL0 0x5089 +#define mmCGTT_SC_CLK_CTRL0_BASE_IDX 1 +#define mmCGTT_SC_CLK_CTRL1 0x508a +#define mmCGTT_SC_CLK_CTRL1_BASE_IDX 1 +#define mmCGTT_SC_CLK_CTRL2 0x508b +#define mmCGTT_SC_CLK_CTRL2_BASE_IDX 1 +#define mmCGTT_SQ_CLK_CTRL 0x508c +#define mmCGTT_SQ_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_SQG_CLK_CTRL 0x508d +#define mmCGTT_SQG_CLK_CTRL_BASE_IDX 1 #define mmSQ_ALU_CLK_CTRL 0x508e #define mmSQ_ALU_CLK_CTRL_BASE_IDX 1 #define mmSQ_TEX_CLK_CTRL 0x508f #define mmSQ_TEX_CLK_CTRL_BASE_IDX 1 #define mmSQ_LDS_CLK_CTRL 0x5090 #define mmSQ_LDS_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_SX_CLK_CTRL0 0x5094 +#define mmCGTT_SX_CLK_CTRL0_BASE_IDX 1 +#define mmCGTT_SX_CLK_CTRL1 0x5095 +#define mmCGTT_SX_CLK_CTRL1_BASE_IDX 1 +#define mmCGTT_SX_CLK_CTRL2 0x5096 +#define mmCGTT_SX_CLK_CTRL2_BASE_IDX 1 +#define mmCGTT_SX_CLK_CTRL3 0x5097 +#define mmCGTT_SX_CLK_CTRL3_BASE_IDX 1 +#define mmCGTT_SX_CLK_CTRL4 0x5098 +#define mmCGTT_SX_CLK_CTRL4_BASE_IDX 1 +#define mmTD_CGTT_CTRL 0x509c +#define mmTD_CGTT_CTRL_BASE_IDX 1 +#define mmTA_CGTT_CTRL 0x509d +#define mmTA_CGTT_CTRL_BASE_IDX 1 +#define mmCGTT_TCPI_CLK_CTRL 0x5109 +#define mmCGTT_TCPI_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_GDS_CLK_CTRL 0x50a0 +#define mmCGTT_GDS_CLK_CTRL_BASE_IDX 1 +#define mmDB_CGTT_CLK_CTRL_0 0x50a4 +#define mmDB_CGTT_CLK_CTRL_0_BASE_IDX 1 +#define mmCB_CGTT_SCLK_CTRL 0x50a8 +#define mmCB_CGTT_SCLK_CTRL_BASE_IDX 1 +#define mmGL2C_CGTT_SCLK_CTRL 0x50fc +#define mmGL2C_CGTT_SCLK_CTRL_BASE_IDX 1 +#define mmGL2A_CGTT_SCLK_CTRL 0x50ac +#define mmGL2A_CGTT_SCLK_CTRL_BASE_IDX 1 +#define mmGL2A_CGTT_SCLK_CTRL_1 0x50ad +#define mmGL2A_CGTT_SCLK_CTRL_1_BASE_IDX 1 +#define mmCGTT_CP_CLK_CTRL 0x50b0 +#define mmCGTT_CP_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_CPF_CLK_CTRL 0x50b1 +#define mmCGTT_CPF_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_CPC_CLK_CTRL 0x50b2 +#define mmCGTT_CPC_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_RLC_CLK_CTRL 0x50b5 +#define mmCGTT_RLC_CLK_CTRL_BASE_IDX 1 #define mmRLC_GFX_RM_CNTL 0x50b6 #define mmRLC_GFX_RM_CNTL_BASE_IDX 1 +#define mmRMI_CGTT_SCLK_CTRL 0x50c0 +#define mmRMI_CGTT_SCLK_CTRL_BASE_IDX 1 +#define mmCGTT_TCPF_CLK_CTRL 0x5111 +#define mmCGTT_TCPF_CLK_CTRL_BASE_IDX 1 +#define mmGCR_CGTT_SCLK_CTRL 0x50c2 +#define mmGCR_CGTT_SCLK_CTRL_BASE_IDX 1 +#define mmUTCL1_CGTT_CLK_CTRL 0x50c3 +#define mmUTCL1_CGTT_CLK_CTRL_BASE_IDX 1 +#define mmGCEA_CGTT_CLK_CTRL 0x50c4 +#define mmGCEA_CGTT_CLK_CTRL_BASE_IDX 1 +#define mmSE_CAC_CGTT_CLK_CTRL 0x50d0 +#define mmSE_CAC_CGTT_CLK_CTRL_BASE_IDX 1 +#define mmGC_CAC_CGTT_CLK_CTRL 0x50d8 +#define mmGC_CAC_CGTT_CLK_CTRL_BASE_IDX 1 +#define mmGRBM_CGTT_CLK_CNTL 0x50e0 +#define mmGRBM_CGTT_CLK_CNTL_BASE_IDX 1 +#define mmGUS_CGTT_CLK_CTRL 0x50f4 +#define mmGUS_CGTT_CLK_CTRL_BASE_IDX 1 +#define mmCGTT_PH_CLK_CTRL0 0x50f8 +#define mmCGTT_PH_CLK_CTRL0_BASE_IDX 1 +#define mmCGTT_PH_CLK_CTRL1 0x50f9 +#define mmCGTT_PH_CLK_CTRL1_BASE_IDX 1 +#define mmCGTT_PH_CLK_CTRL2 0x50fa +#define mmCGTT_PH_CLK_CTRL2_BASE_IDX 1 +#define mmCGTT_PH_CLK_CTRL3 0x50fb +#define mmCGTT_PH_CLK_CTRL3_BASE_IDX 1 // addressBlock: gc_hypdec diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h index 83faa276523f..d4e8ff22ecb8 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h @@ -34547,6 +34547,14 @@ // addressBlock: gc_pwrdec +//CGTS_RD_CTRL_REG +#define CGTS_RD_CTRL_REG__ROW_MUX_SEL__SHIFT 0x0 +#define CGTS_RD_CTRL_REG__REG_MUX_SEL__SHIFT 0x4 +#define CGTS_RD_CTRL_REG__ROW_MUX_SEL_MASK 0x0000000FL +#define CGTS_RD_CTRL_REG__REG_MUX_SEL_MASK 0x000000F0L +//CGTS_RD_REG +#define CGTS_RD_REG__READ_DATA__SHIFT 0x0 +#define CGTS_RD_REG__READ_DATA_MASK 0xFFFFFFFFL //CGTS_TCC_DISABLE #define CGTS_TCC_DISABLE__HI_TCC_DISABLE__SHIFT 0x8 #define CGTS_TCC_DISABLE__TCC_DISABLE__SHIFT 0x10 @@ -34557,6 +34565,485 @@ #define CGTS_USER_TCC_DISABLE__TCC_DISABLE__SHIFT 0x10 #define CGTS_USER_TCC_DISABLE__HI_TCC_DISABLE_MASK 0x0000FF00L #define CGTS_USER_TCC_DISABLE__TCC_DISABLE_MASK 0xFFFF0000L +//CGTS_STATUS_REG +#define CGTS_STATUS_REG__SA0_QUAD0_MGCG_ENABLED__SHIFT 0x0 +#define CGTS_STATUS_REG__SA0_QUAD0_CG_STATUS__SHIFT 0x1 +#define CGTS_STATUS_REG__SA1_QUAD0_MGCG_ENABLED__SHIFT 0x8 +#define CGTS_STATUS_REG__SA1_QUAD0_CG_STATUS__SHIFT 0x9 +#define CGTS_STATUS_REG__SA0_QUAD0_MGCG_ENABLED_MASK 0x00000001L +#define CGTS_STATUS_REG__SA0_QUAD0_CG_STATUS_MASK 0x00000006L +#define CGTS_STATUS_REG__SA1_QUAD0_MGCG_ENABLED_MASK 0x00000100L +#define CGTS_STATUS_REG__SA1_QUAD0_CG_STATUS_MASK 0x00000600L +//CGTT_SPI_CGTSSM_CLK_CTRL +#define CGTT_SPI_CGTSSM_CLK_CTRL__GRP3_OVERRIDE__SHIFT 0x1b +#define CGTT_SPI_CGTSSM_CLK_CTRL__GRP2_OVERRIDE__SHIFT 0x1c +#define CGTT_SPI_CGTSSM_CLK_CTRL__GRP1_OVERRIDE__SHIFT 0x1d +#define CGTT_SPI_CGTSSM_CLK_CTRL__GRP0_OVERRIDE__SHIFT 0x1e +#define CGTT_SPI_CGTSSM_CLK_CTRL__GRP3_OVERRIDE_MASK 0x08000000L +#define CGTT_SPI_CGTSSM_CLK_CTRL__GRP2_OVERRIDE_MASK 0x10000000L +#define CGTT_SPI_CGTSSM_CLK_CTRL__GRP1_OVERRIDE_MASK 0x20000000L +#define CGTT_SPI_CGTSSM_CLK_CTRL__GRP0_OVERRIDE_MASK 0x40000000L +//CGTT_SPI_PS_CLK_CTRL +#define CGTT_SPI_PS_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_SPI_PS_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x10 +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x11 +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x12 +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x13 +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x14 +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x15 +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x16 +#define CGTT_SPI_PS_CLK_CTRL__GRP6_OVERRIDE__SHIFT 0x18 +#define CGTT_SPI_PS_CLK_CTRL__GRP5_OVERRIDE__SHIFT 0x19 +#define CGTT_SPI_PS_CLK_CTRL__GRP4_OVERRIDE__SHIFT 0x1a +#define CGTT_SPI_PS_CLK_CTRL__GRP3_OVERRIDE__SHIFT 0x1b +#define CGTT_SPI_PS_CLK_CTRL__GRP2_OVERRIDE__SHIFT 0x1c +#define CGTT_SPI_PS_CLK_CTRL__GRP1_OVERRIDE__SHIFT 0x1d +#define CGTT_SPI_PS_CLK_CTRL__GRP0_OVERRIDE__SHIFT 0x1e +#define CGTT_SPI_PS_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f +#define CGTT_SPI_PS_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_SPI_PS_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00010000L +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00020000L +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00040000L +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00080000L +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00100000L +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00200000L +#define CGTT_SPI_PS_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00400000L +#define CGTT_SPI_PS_CLK_CTRL__GRP6_OVERRIDE_MASK 0x01000000L +#define CGTT_SPI_PS_CLK_CTRL__GRP5_OVERRIDE_MASK 0x02000000L +#define CGTT_SPI_PS_CLK_CTRL__GRP4_OVERRIDE_MASK 0x04000000L +#define CGTT_SPI_PS_CLK_CTRL__GRP3_OVERRIDE_MASK 0x08000000L +#define CGTT_SPI_PS_CLK_CTRL__GRP2_OVERRIDE_MASK 0x10000000L +#define CGTT_SPI_PS_CLK_CTRL__GRP1_OVERRIDE_MASK 0x20000000L +#define CGTT_SPI_PS_CLK_CTRL__GRP0_OVERRIDE_MASK 0x40000000L +#define CGTT_SPI_PS_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000L +//CGTT_SPIS_CLK_CTRL +#define CGTT_SPIS_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_SPIS_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x10 +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x11 +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x12 +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x13 +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x14 +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x15 +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x16 +#define CGTT_SPIS_CLK_CTRL__GRP6_OVERRIDE__SHIFT 0x18 +#define CGTT_SPIS_CLK_CTRL__GRP5_OVERRIDE__SHIFT 0x19 +#define CGTT_SPIS_CLK_CTRL__GRP4_OVERRIDE__SHIFT 0x1a +#define CGTT_SPIS_CLK_CTRL__GRP3_OVERRIDE__SHIFT 0x1b +#define CGTT_SPIS_CLK_CTRL__GRP2_OVERRIDE__SHIFT 0x1c +#define CGTT_SPIS_CLK_CTRL__GRP1_OVERRIDE__SHIFT 0x1d +#define CGTT_SPIS_CLK_CTRL__GRP0_OVERRIDE__SHIFT 0x1e +#define CGTT_SPIS_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f +#define CGTT_SPIS_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_SPIS_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00010000L +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00020000L +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00040000L +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00080000L +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00100000L +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00200000L +#define CGTT_SPIS_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00400000L +#define CGTT_SPIS_CLK_CTRL__GRP6_OVERRIDE_MASK 0x01000000L +#define CGTT_SPIS_CLK_CTRL__GRP5_OVERRIDE_MASK 0x02000000L +#define CGTT_SPIS_CLK_CTRL__GRP4_OVERRIDE_MASK 0x04000000L +#define CGTT_SPIS_CLK_CTRL__GRP3_OVERRIDE_MASK 0x08000000L +#define CGTT_SPIS_CLK_CTRL__GRP2_OVERRIDE_MASK 0x10000000L +#define CGTT_SPIS_CLK_CTRL__GRP1_OVERRIDE_MASK 0x20000000L +#define CGTT_SPIS_CLK_CTRL__GRP0_OVERRIDE_MASK 0x40000000L +#define CGTT_SPIS_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000L +//CGTT_SPI_CLK_CTRL +#define CGTT_SPI_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_SPI_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x10 +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x11 +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x12 +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x13 +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x14 +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x15 +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x16 +#define CGTT_SPI_CLK_CTRL__GRP6_OVERRIDE__SHIFT 0x18 +#define CGTT_SPI_CLK_CTRL__GRP5_OVERRIDE__SHIFT 0x19 +#define CGTT_SPI_CLK_CTRL__GRP4_OVERRIDE__SHIFT 0x1a +#define CGTT_SPI_CLK_CTRL__GRP3_OVERRIDE__SHIFT 0x1b +#define CGTT_SPI_CLK_CTRL__GRP2_OVERRIDE__SHIFT 0x1c +#define CGTT_SPI_CLK_CTRL__GRP1_OVERRIDE__SHIFT 0x1d +#define CGTT_SPI_CLK_CTRL__GRP0_OVERRIDE__SHIFT 0x1e +#define CGTT_SPI_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f +#define CGTT_SPI_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_SPI_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00010000L +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00020000L +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00040000L +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00080000L +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00100000L +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00200000L +#define CGTT_SPI_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00400000L +#define CGTT_SPI_CLK_CTRL__GRP6_OVERRIDE_MASK 0x01000000L +#define CGTT_SPI_CLK_CTRL__GRP5_OVERRIDE_MASK 0x02000000L +#define CGTT_SPI_CLK_CTRL__GRP4_OVERRIDE_MASK 0x04000000L +#define CGTT_SPI_CLK_CTRL__GRP3_OVERRIDE_MASK 0x08000000L +#define CGTT_SPI_CLK_CTRL__GRP2_OVERRIDE_MASK 0x10000000L +#define CGTT_SPI_CLK_CTRL__GRP1_OVERRIDE_MASK 0x20000000L +#define CGTT_SPI_CLK_CTRL__GRP0_OVERRIDE_MASK 0x40000000L +#define CGTT_SPI_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000L +//CGTT_PC_CLK_CTRL +#define CGTT_PC_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_PC_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_PC_CLK_CTRL__PC_RAM_FGCG_OVERRIDE__SHIFT 0x11 +#define CGTT_PC_CLK_CTRL__PC_WRITE_CLK_EN_OVERRIDE__SHIFT 0xd +#define CGTT_PC_CLK_CTRL__PC_READ_CLK_EN_OVERRIDE__SHIFT 0xe +#define CGTT_PC_CLK_CTRL__CORE3_OVERRIDE__SHIFT 0x1b +#define CGTT_PC_CLK_CTRL__CORE2_OVERRIDE__SHIFT 0x1c +#define CGTT_PC_CLK_CTRL__CORE1_OVERRIDE__SHIFT 0x1d +#define CGTT_PC_CLK_CTRL__CORE0_OVERRIDE__SHIFT 0x1e +#define CGTT_PC_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_PC_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_PC_CLK_CTRL__PC_RAM_FGCG_OVERRIDE_MASK 0x00020000L +#define CGTT_PC_CLK_CTRL__PC_WRITE_CLK_EN_OVERRIDE_MASK 0x00002000L +#define CGTT_PC_CLK_CTRL__PC_READ_CLK_EN_OVERRIDE_MASK 0x00004000L +#define CGTT_PC_CLK_CTRL__CORE3_OVERRIDE_MASK 0x08000000L +#define CGTT_PC_CLK_CTRL__CORE2_OVERRIDE_MASK 0x10000000L +#define CGTT_PC_CLK_CTRL__CORE1_OVERRIDE_MASK 0x20000000L +#define CGTT_PC_CLK_CTRL__CORE0_OVERRIDE_MASK 0x40000000L +//CGTT_BCI_CLK_CTRL +#define CGTT_BCI_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_BCI_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_BCI_CLK_CTRL__RESERVED__SHIFT 0xc +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_BCI_CLK_CTRL__CORE6_OVERRIDE__SHIFT 0x18 +#define CGTT_BCI_CLK_CTRL__CORE5_OVERRIDE__SHIFT 0x19 +#define CGTT_BCI_CLK_CTRL__CORE4_OVERRIDE__SHIFT 0x1a +#define CGTT_BCI_CLK_CTRL__CORE3_OVERRIDE__SHIFT 0x1b +#define CGTT_BCI_CLK_CTRL__CORE2_OVERRIDE__SHIFT 0x1c +#define CGTT_BCI_CLK_CTRL__CORE1_OVERRIDE__SHIFT 0x1d +#define CGTT_BCI_CLK_CTRL__CORE0_OVERRIDE__SHIFT 0x1e +#define CGTT_BCI_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f +#define CGTT_BCI_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_BCI_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_BCI_CLK_CTRL__RESERVED_MASK 0x0000F000L +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_BCI_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_BCI_CLK_CTRL__CORE6_OVERRIDE_MASK 0x01000000L +#define CGTT_BCI_CLK_CTRL__CORE5_OVERRIDE_MASK 0x02000000L +#define CGTT_BCI_CLK_CTRL__CORE4_OVERRIDE_MASK 0x04000000L +#define CGTT_BCI_CLK_CTRL__CORE3_OVERRIDE_MASK 0x08000000L +#define CGTT_BCI_CLK_CTRL__CORE2_OVERRIDE_MASK 0x10000000L +#define CGTT_BCI_CLK_CTRL__CORE1_OVERRIDE_MASK 0x20000000L +#define CGTT_BCI_CLK_CTRL__CORE0_OVERRIDE_MASK 0x40000000L +#define CGTT_BCI_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000L +//CGTT_VGT_CLK_CTRL +#define CGTT_VGT_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_VGT_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_VGT_CLK_CTRL__PERF_ENABLE__SHIFT 0xf +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_VGT_CLK_CTRL__TESS_OVERRIDE__SHIFT 0x1c +#define CGTT_VGT_CLK_CTRL__CORE_OVERRIDE__SHIFT 0x1d +#define CGTT_VGT_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f +#define CGTT_VGT_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_VGT_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_VGT_CLK_CTRL__PERF_ENABLE_MASK 0x00008000L +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_VGT_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_VGT_CLK_CTRL__TESS_OVERRIDE_MASK 0x10000000L +#define CGTT_VGT_CLK_CTRL__CORE_OVERRIDE_MASK 0x20000000L +#define CGTT_VGT_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000L +//CGTT_IA_CLK_CTRL +#define CGTT_IA_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_IA_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_IA_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18 +#define CGTT_IA_CLK_CTRL__PERF_ENABLE__SHIFT 0x19 +#define CGTT_IA_CLK_CTRL__CORE_OVERRIDE__SHIFT 0x1e +#define CGTT_IA_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f +#define CGTT_IA_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_IA_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_IA_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_IA_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x01000000L +#define CGTT_IA_CLK_CTRL__PERF_ENABLE_MASK 0x02000000L +#define CGTT_IA_CLK_CTRL__CORE_OVERRIDE_MASK 0x40000000L +#define CGTT_IA_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000L +//CGTT_WD_CLK_CTRL +#define CGTT_WD_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_WD_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_WD_CLK_CTRL__PERF_ENABLE__SHIFT 0xf +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_WD_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18 +#define CGTT_WD_CLK_CTRL__CORE_OVERRIDE__SHIFT 0x1d +#define CGTT_WD_CLK_CTRL__RBIU_INPUT_OVERRIDE__SHIFT 0x1e +#define CGTT_WD_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f +#define CGTT_WD_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_WD_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_WD_CLK_CTRL__PERF_ENABLE_MASK 0x00008000L +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_WD_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_WD_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x01000000L +#define CGTT_WD_CLK_CTRL__CORE_OVERRIDE_MASK 0x20000000L +#define CGTT_WD_CLK_CTRL__RBIU_INPUT_OVERRIDE_MASK 0x40000000L +#define CGTT_WD_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000L +//CGTT_GS_NGG_CLK_CTRL +#define CGTT_GS_NGG_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_GS_NGG_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_GS_NGG_CLK_CTRL__PERF_ENABLE__SHIFT 0xf +#define CGTT_GS_NGG_CLK_CTRL__DBG_ENABLE__SHIFT 0x10 +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_GS_NGG_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18 +#define CGTT_GS_NGG_CLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19 +#define CGTT_GS_NGG_CLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a +#define CGTT_GS_NGG_CLK_CTRL__PRIMGEN_OVERRIDE__SHIFT 0x1c +#define CGTT_GS_NGG_CLK_CTRL__GS1_OVERRIDE__SHIFT 0x1d +#define CGTT_GS_NGG_CLK_CTRL__GS0_OVERRIDE__SHIFT 0x1e +#define CGTT_GS_NGG_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f +#define CGTT_GS_NGG_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_GS_NGG_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_GS_NGG_CLK_CTRL__PERF_ENABLE_MASK 0x00008000L +#define CGTT_GS_NGG_CLK_CTRL__DBG_ENABLE_MASK 0x00010000L +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_GS_NGG_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_GS_NGG_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x01000000L +#define CGTT_GS_NGG_CLK_CTRL__SOFT_OVERRIDE6_MASK 0x02000000L +#define CGTT_GS_NGG_CLK_CTRL__SOFT_OVERRIDE5_MASK 0x04000000L +#define CGTT_GS_NGG_CLK_CTRL__PRIMGEN_OVERRIDE_MASK 0x10000000L +#define CGTT_GS_NGG_CLK_CTRL__GS1_OVERRIDE_MASK 0x20000000L +#define CGTT_GS_NGG_CLK_CTRL__GS0_OVERRIDE_MASK 0x40000000L +#define CGTT_GS_NGG_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000L +//CGTT_PA_CLK_CTRL +#define CGTT_PA_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_PA_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18 +#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19 +#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a +#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b +#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c +#define CGTT_PA_CLK_CTRL__SU_CLK_OVERRIDE__SHIFT 0x1d +#define CGTT_PA_CLK_CTRL__CL_CLK_OVERRIDE__SHIFT 0x1e +#define CGTT_PA_CLK_CTRL__REG_CLK_OVERRIDE__SHIFT 0x1f +#define CGTT_PA_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_PA_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_PA_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x01000000L +#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE6_MASK 0x02000000L +#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE5_MASK 0x04000000L +#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x08000000L +#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000L +#define CGTT_PA_CLK_CTRL__SU_CLK_OVERRIDE_MASK 0x20000000L +#define CGTT_PA_CLK_CTRL__CL_CLK_OVERRIDE_MASK 0x40000000L +#define CGTT_PA_CLK_CTRL__REG_CLK_OVERRIDE_MASK 0x80000000L +//CGTT_SC_CLK_CTRL0 +#define CGTT_SC_CLK_CTRL0__ON_DELAY__SHIFT 0x0 +#define CGTT_SC_CLK_CTRL0__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SC_CLK_CTRL0__PFF_ZFF_MEM_CLK_STALL_OVERRIDE__SHIFT 0x10 +#define CGTT_SC_CLK_CTRL0__SOFT_STALL_OVERRIDE5__SHIFT 0x11 +#define CGTT_SC_CLK_CTRL0__SOFT_STALL_OVERRIDE4__SHIFT 0x12 +#define CGTT_SC_CLK_CTRL0__SOFT_STALL_OVERRIDE3__SHIFT 0x13 +#define CGTT_SC_CLK_CTRL0__SOFT_STALL_OVERRIDE2__SHIFT 0x14 +#define CGTT_SC_CLK_CTRL0__SOFT_STALL_OVERRIDE1__SHIFT 0x15 +#define CGTT_SC_CLK_CTRL0__SOFT_STALL_OVERRIDE0__SHIFT 0x16 +#define CGTT_SC_CLK_CTRL0__REG_CLK_STALL_OVERRIDE__SHIFT 0x17 +#define CGTT_SC_CLK_CTRL0__PFF_ZFF_MEM_CLK_OVERRIDE__SHIFT 0x18 +#define CGTT_SC_CLK_CTRL0__SOFT_OVERRIDE5__SHIFT 0x19 +#define CGTT_SC_CLK_CTRL0__SOFT_OVERRIDE4__SHIFT 0x1a +#define CGTT_SC_CLK_CTRL0__SOFT_OVERRIDE3__SHIFT 0x1b +#define CGTT_SC_CLK_CTRL0__SOFT_OVERRIDE2__SHIFT 0x1c +#define CGTT_SC_CLK_CTRL0__SOFT_OVERRIDE1__SHIFT 0x1d +#define CGTT_SC_CLK_CTRL0__SOFT_OVERRIDE0__SHIFT 0x1e +#define CGTT_SC_CLK_CTRL0__REG_CLK_OVERRIDE__SHIFT 0x1f +#define CGTT_SC_CLK_CTRL0__ON_DELAY_MASK 0x0000000FL +#define CGTT_SC_CLK_CTRL0__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SC_CLK_CTRL0__PFF_ZFF_MEM_CLK_STALL_OVERRIDE_MASK 0x00010000L +#define CGTT_SC_CLK_CTRL0__SOFT_STALL_OVERRIDE5_MASK 0x00020000L +#define CGTT_SC_CLK_CTRL0__SOFT_STALL_OVERRIDE4_MASK 0x00040000L +#define CGTT_SC_CLK_CTRL0__SOFT_STALL_OVERRIDE3_MASK 0x00080000L +#define CGTT_SC_CLK_CTRL0__SOFT_STALL_OVERRIDE2_MASK 0x00100000L +#define CGTT_SC_CLK_CTRL0__SOFT_STALL_OVERRIDE1_MASK 0x00200000L +#define CGTT_SC_CLK_CTRL0__SOFT_STALL_OVERRIDE0_MASK 0x00400000L +#define CGTT_SC_CLK_CTRL0__REG_CLK_STALL_OVERRIDE_MASK 0x00800000L +#define CGTT_SC_CLK_CTRL0__PFF_ZFF_MEM_CLK_OVERRIDE_MASK 0x01000000L +#define CGTT_SC_CLK_CTRL0__SOFT_OVERRIDE5_MASK 0x02000000L +#define CGTT_SC_CLK_CTRL0__SOFT_OVERRIDE4_MASK 0x04000000L +#define CGTT_SC_CLK_CTRL0__SOFT_OVERRIDE3_MASK 0x08000000L +#define CGTT_SC_CLK_CTRL0__SOFT_OVERRIDE2_MASK 0x10000000L +#define CGTT_SC_CLK_CTRL0__SOFT_OVERRIDE1_MASK 0x20000000L +#define CGTT_SC_CLK_CTRL0__SOFT_OVERRIDE0_MASK 0x40000000L +#define CGTT_SC_CLK_CTRL0__REG_CLK_OVERRIDE_MASK 0x80000000L +//CGTT_SC_CLK_CTRL1 +#define CGTT_SC_CLK_CTRL1__ON_DELAY__SHIFT 0x0 +#define CGTT_SC_CLK_CTRL1__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SC_CLK_CTRL1__PBB_BINNING_CLK_STALL_OVERRIDE0__SHIFT 0x10 +#define CGTT_SC_CLK_CTRL1__PBB_BINNING_CLK_STALL_OVERRIDE__SHIFT 0x11 +#define CGTT_SC_CLK_CTRL1__PBB_SCISSOR_CLK_STALL_OVERRIDE__SHIFT 0x12 +#define CGTT_SC_CLK_CTRL1__OTHER_SPECIAL_SC_REG_CLK_STALL_OVERRIDE__SHIFT 0x13 +#define CGTT_SC_CLK_CTRL1__SCREEN_EXT_REG_CLK_STALL_OVERRIDE__SHIFT 0x14 +#define CGTT_SC_CLK_CTRL1__VPORT_REG_MEM_CLK_STALL_OVERRIDE__SHIFT 0x15 +#define CGTT_SC_CLK_CTRL1__PBB_CLK_STALL_OVERRIDE__SHIFT 0x16 +#define CGTT_SC_CLK_CTRL1__PBB_WARP_CLK_STALL_OVERRIDE__SHIFT 0x17 +#define CGTT_SC_CLK_CTRL1__PBB_BINNING_CLK_OVERRIDE0__SHIFT 0x18 +#define CGTT_SC_CLK_CTRL1__PBB_BINNING_CLK_OVERRIDE__SHIFT 0x19 +#define CGTT_SC_CLK_CTRL1__PBB_SCISSOR_CLK_OVERRIDE__SHIFT 0x1a +#define CGTT_SC_CLK_CTRL1__OTHER_SPECIAL_SC_REG_CLK_OVERRIDE__SHIFT 0x1b +#define CGTT_SC_CLK_CTRL1__SCREEN_EXT_REG_CLK_OVERRIDE__SHIFT 0x1c +#define CGTT_SC_CLK_CTRL1__VPORT_REG_MEM_CLK_OVERRIDE__SHIFT 0x1d +#define CGTT_SC_CLK_CTRL1__PBB_CLK_OVERRIDE__SHIFT 0x1e +#define CGTT_SC_CLK_CTRL1__PBB_WARP_CLK_OVERRIDE__SHIFT 0x1f +#define CGTT_SC_CLK_CTRL1__ON_DELAY_MASK 0x0000000FL +#define CGTT_SC_CLK_CTRL1__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SC_CLK_CTRL1__PBB_BINNING_CLK_STALL_OVERRIDE0_MASK 0x00010000L +#define CGTT_SC_CLK_CTRL1__PBB_BINNING_CLK_STALL_OVERRIDE_MASK 0x00020000L +#define CGTT_SC_CLK_CTRL1__PBB_SCISSOR_CLK_STALL_OVERRIDE_MASK 0x00040000L +#define CGTT_SC_CLK_CTRL1__OTHER_SPECIAL_SC_REG_CLK_STALL_OVERRIDE_MASK 0x00080000L +#define CGTT_SC_CLK_CTRL1__SCREEN_EXT_REG_CLK_STALL_OVERRIDE_MASK 0x00100000L +#define CGTT_SC_CLK_CTRL1__VPORT_REG_MEM_CLK_STALL_OVERRIDE_MASK 0x00200000L +#define CGTT_SC_CLK_CTRL1__PBB_CLK_STALL_OVERRIDE_MASK 0x00400000L +#define CGTT_SC_CLK_CTRL1__PBB_WARP_CLK_STALL_OVERRIDE_MASK 0x00800000L +#define CGTT_SC_CLK_CTRL1__PBB_BINNING_CLK_OVERRIDE0_MASK 0x01000000L +#define CGTT_SC_CLK_CTRL1__PBB_BINNING_CLK_OVERRIDE_MASK 0x02000000L +#define CGTT_SC_CLK_CTRL1__PBB_SCISSOR_CLK_OVERRIDE_MASK 0x04000000L +#define CGTT_SC_CLK_CTRL1__OTHER_SPECIAL_SC_REG_CLK_OVERRIDE_MASK 0x08000000L +#define CGTT_SC_CLK_CTRL1__SCREEN_EXT_REG_CLK_OVERRIDE_MASK 0x10000000L +#define CGTT_SC_CLK_CTRL1__VPORT_REG_MEM_CLK_OVERRIDE_MASK 0x20000000L +#define CGTT_SC_CLK_CTRL1__PBB_CLK_OVERRIDE_MASK 0x40000000L +#define CGTT_SC_CLK_CTRL1__PBB_WARP_CLK_OVERRIDE_MASK 0x80000000L +//CGTT_SC_CLK_CTRL2 +#define CGTT_SC_CLK_CTRL2__ON_DELAY__SHIFT 0x0 +#define CGTT_SC_CLK_CTRL2__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SC_CLK_CTRL2__DBR_CLK_OVERRIDE__SHIFT 0x1a +#define CGTT_SC_CLK_CTRL2__SCF_SCB_INTF_CLK_OVERRIDE__SHIFT 0x1b +#define CGTT_SC_CLK_CTRL2__SC_PKR_INTF_CLK_OVERRIDE__SHIFT 0x1c +#define CGTT_SC_CLK_CTRL2__SC_DB_INTF_CLK_OVERRIDE__SHIFT 0x1d +#define CGTT_SC_CLK_CTRL2__PA_SC_INTF_CLK_OVERRIDE__SHIFT 0x1e +#define CGTT_SC_CLK_CTRL2__ON_DELAY_MASK 0x0000000FL +#define CGTT_SC_CLK_CTRL2__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SC_CLK_CTRL2__DBR_CLK_OVERRIDE_MASK 0x04000000L +#define CGTT_SC_CLK_CTRL2__SCF_SCB_INTF_CLK_OVERRIDE_MASK 0x08000000L +#define CGTT_SC_CLK_CTRL2__SC_PKR_INTF_CLK_OVERRIDE_MASK 0x10000000L +#define CGTT_SC_CLK_CTRL2__SC_DB_INTF_CLK_OVERRIDE_MASK 0x20000000L +#define CGTT_SC_CLK_CTRL2__PA_SC_INTF_CLK_OVERRIDE_MASK 0x40000000L +//CGTT_SQ_CLK_CTRL +#define CGTT_SQ_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_SQ_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_SQ_CLK_CTRL__CORE_OVERRIDE__SHIFT 0x1e +#define CGTT_SQ_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f +#define CGTT_SQ_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_SQ_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_SQ_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_SQ_CLK_CTRL__CORE_OVERRIDE_MASK 0x40000000L +#define CGTT_SQ_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000L +//CGTT_SQG_CLK_CTRL +#define CGTT_SQG_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_SQG_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_SQG_CLK_CTRL__TTRACE_OVERRIDE__SHIFT 0x1c +#define CGTT_SQG_CLK_CTRL__PERFMON_OVERRIDE__SHIFT 0x1d +#define CGTT_SQG_CLK_CTRL__CORE_OVERRIDE__SHIFT 0x1e +#define CGTT_SQG_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f +#define CGTT_SQG_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_SQG_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_SQG_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_SQG_CLK_CTRL__TTRACE_OVERRIDE_MASK 0x10000000L +#define CGTT_SQG_CLK_CTRL__PERFMON_OVERRIDE_MASK 0x20000000L +#define CGTT_SQG_CLK_CTRL__CORE_OVERRIDE_MASK 0x40000000L +#define CGTT_SQG_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000L //SQ_ALU_CLK_CTRL #define SQ_ALU_CLK_CTRL__FORCE_WGP_ON_SA0__SHIFT 0x0 #define SQ_ALU_CLK_CTRL__FORCE_WGP_ON_SA1__SHIFT 0x10 @@ -34572,12 +35059,982 @@ #define SQ_LDS_CLK_CTRL__FORCE_WGP_ON_SA1__SHIFT 0x10 #define SQ_LDS_CLK_CTRL__FORCE_WGP_ON_SA0_MASK 0x0000FFFFL #define SQ_LDS_CLK_CTRL__FORCE_WGP_ON_SA1_MASK 0xFFFF0000L +//CGTT_SX_CLK_CTRL0 +#define CGTT_SX_CLK_CTRL0__ON_DELAY__SHIFT 0x0 +#define CGTT_SX_CLK_CTRL0__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SX_CLK_CTRL0__RESERVED__SHIFT 0xc +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE7__SHIFT 0x18 +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE6__SHIFT 0x19 +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE5__SHIFT 0x1a +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE4__SHIFT 0x1b +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE3__SHIFT 0x1c +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE2__SHIFT 0x1d +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE1__SHIFT 0x1e +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE0__SHIFT 0x1f +#define CGTT_SX_CLK_CTRL0__ON_DELAY_MASK 0x0000000FL +#define CGTT_SX_CLK_CTRL0__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SX_CLK_CTRL0__RESERVED_MASK 0x0000F000L +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_SX_CLK_CTRL0__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE7_MASK 0x01000000L +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE6_MASK 0x02000000L +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE5_MASK 0x04000000L +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE4_MASK 0x08000000L +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE3_MASK 0x10000000L +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE2_MASK 0x20000000L +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE1_MASK 0x40000000L +#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE0_MASK 0x80000000L +//CGTT_SX_CLK_CTRL1 +#define CGTT_SX_CLK_CTRL1__ON_DELAY__SHIFT 0x0 +#define CGTT_SX_CLK_CTRL1__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SX_CLK_CTRL1__RESERVED__SHIFT 0xc +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE6__SHIFT 0x19 +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE5__SHIFT 0x1a +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE4__SHIFT 0x1b +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE3__SHIFT 0x1c +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE2__SHIFT 0x1d +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE1__SHIFT 0x1e +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE0__SHIFT 0x1f +#define CGTT_SX_CLK_CTRL1__ON_DELAY_MASK 0x0000000FL +#define CGTT_SX_CLK_CTRL1__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SX_CLK_CTRL1__RESERVED_MASK 0x0000F000L +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_SX_CLK_CTRL1__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE6_MASK 0x02000000L +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE5_MASK 0x04000000L +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE4_MASK 0x08000000L +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE3_MASK 0x10000000L +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE2_MASK 0x20000000L +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE1_MASK 0x40000000L +#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE0_MASK 0x80000000L +//CGTT_SX_CLK_CTRL2 +#define CGTT_SX_CLK_CTRL2__ON_DELAY__SHIFT 0x0 +#define CGTT_SX_CLK_CTRL2__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SX_CLK_CTRL2__RESERVED__SHIFT 0xd +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE6__SHIFT 0x19 +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE5__SHIFT 0x1a +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE4__SHIFT 0x1b +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE3__SHIFT 0x1c +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE2__SHIFT 0x1d +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE1__SHIFT 0x1e +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE0__SHIFT 0x1f +#define CGTT_SX_CLK_CTRL2__ON_DELAY_MASK 0x0000000FL +#define CGTT_SX_CLK_CTRL2__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SX_CLK_CTRL2__RESERVED_MASK 0x0000E000L +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_SX_CLK_CTRL2__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE6_MASK 0x02000000L +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE5_MASK 0x04000000L +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE4_MASK 0x08000000L +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE3_MASK 0x10000000L +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE2_MASK 0x20000000L +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE1_MASK 0x40000000L +#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE0_MASK 0x80000000L +//CGTT_SX_CLK_CTRL3 +#define CGTT_SX_CLK_CTRL3__ON_DELAY__SHIFT 0x0 +#define CGTT_SX_CLK_CTRL3__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SX_CLK_CTRL3__RESERVED__SHIFT 0xd +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE6__SHIFT 0x19 +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE5__SHIFT 0x1a +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE4__SHIFT 0x1b +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE3__SHIFT 0x1c +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE2__SHIFT 0x1d +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE1__SHIFT 0x1e +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE0__SHIFT 0x1f +#define CGTT_SX_CLK_CTRL3__ON_DELAY_MASK 0x0000000FL +#define CGTT_SX_CLK_CTRL3__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SX_CLK_CTRL3__RESERVED_MASK 0x0000E000L +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_SX_CLK_CTRL3__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE6_MASK 0x02000000L +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE5_MASK 0x04000000L +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE4_MASK 0x08000000L +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE3_MASK 0x10000000L +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE2_MASK 0x20000000L +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE1_MASK 0x40000000L +#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE0_MASK 0x80000000L +//CGTT_SX_CLK_CTRL4 +#define CGTT_SX_CLK_CTRL4__ON_DELAY__SHIFT 0x0 +#define CGTT_SX_CLK_CTRL4__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_SX_CLK_CTRL4__RESERVED__SHIFT 0xc +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE6__SHIFT 0x19 +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE5__SHIFT 0x1a +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE4__SHIFT 0x1b +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE3__SHIFT 0x1c +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE2__SHIFT 0x1d +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE1__SHIFT 0x1e +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE0__SHIFT 0x1f +#define CGTT_SX_CLK_CTRL4__ON_DELAY_MASK 0x0000000FL +#define CGTT_SX_CLK_CTRL4__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_SX_CLK_CTRL4__RESERVED_MASK 0x0000F000L +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_SX_CLK_CTRL4__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE6_MASK 0x02000000L +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE5_MASK 0x04000000L +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE4_MASK 0x08000000L +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE3_MASK 0x10000000L +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE2_MASK 0x20000000L +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE1_MASK 0x40000000L +#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE0_MASK 0x80000000L +//TD_CGTT_CTRL +#define TD_CGTT_CTRL__ON_DELAY__SHIFT 0x0 +#define TD_CGTT_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define TD_CGTT_CTRL__SOFT_OVERRIDE7__SHIFT 0x18 +#define TD_CGTT_CTRL__SOFT_OVERRIDE6__SHIFT 0x19 +#define TD_CGTT_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a +#define TD_CGTT_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b +#define TD_CGTT_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c +#define TD_CGTT_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d +#define TD_CGTT_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e +#define TD_CGTT_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f +#define TD_CGTT_CTRL__ON_DELAY_MASK 0x0000000FL +#define TD_CGTT_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define TD_CGTT_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define TD_CGTT_CTRL__SOFT_OVERRIDE7_MASK 0x01000000L +#define TD_CGTT_CTRL__SOFT_OVERRIDE6_MASK 0x02000000L +#define TD_CGTT_CTRL__SOFT_OVERRIDE5_MASK 0x04000000L +#define TD_CGTT_CTRL__SOFT_OVERRIDE4_MASK 0x08000000L +#define TD_CGTT_CTRL__SOFT_OVERRIDE3_MASK 0x10000000L +#define TD_CGTT_CTRL__SOFT_OVERRIDE2_MASK 0x20000000L +#define TD_CGTT_CTRL__SOFT_OVERRIDE1_MASK 0x40000000L +#define TD_CGTT_CTRL__SOFT_OVERRIDE0_MASK 0x80000000L +//TA_CGTT_CTRL +#define TA_CGTT_CTRL__ON_DELAY__SHIFT 0x0 +#define TA_CGTT_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define TA_CGTT_CTRL__SOFT_OVERRIDE7__SHIFT 0x18 +#define TA_CGTT_CTRL__SOFT_OVERRIDE6__SHIFT 0x19 +#define TA_CGTT_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a +#define TA_CGTT_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b +#define TA_CGTT_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c +#define TA_CGTT_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d +#define TA_CGTT_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e +#define TA_CGTT_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f +#define TA_CGTT_CTRL__ON_DELAY_MASK 0x0000000FL +#define TA_CGTT_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define TA_CGTT_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define TA_CGTT_CTRL__SOFT_OVERRIDE7_MASK 0x01000000L +#define TA_CGTT_CTRL__SOFT_OVERRIDE6_MASK 0x02000000L +#define TA_CGTT_CTRL__SOFT_OVERRIDE5_MASK 0x04000000L +#define TA_CGTT_CTRL__SOFT_OVERRIDE4_MASK 0x08000000L +#define TA_CGTT_CTRL__SOFT_OVERRIDE3_MASK 0x10000000L +#define TA_CGTT_CTRL__SOFT_OVERRIDE2_MASK 0x20000000L +#define TA_CGTT_CTRL__SOFT_OVERRIDE1_MASK 0x40000000L +#define TA_CGTT_CTRL__SOFT_OVERRIDE0_MASK 0x80000000L +//CGTT_TCPI_CLK_CTRL +#define CGTT_TCPI_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_TCPI_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_TCPI_CLK_CTRL__SPARE__SHIFT 0xc +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0xf +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x10 +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x11 +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x12 +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x13 +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x14 +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x15 +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x16 +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x17 +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x18 +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x19 +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1a +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1b +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1c +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1d +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1e +#define CGTT_TCPI_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_TCPI_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_TCPI_CLK_CTRL__SPARE_MASK 0x00007000L +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00008000L +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00010000L +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00020000L +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00040000L +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00080000L +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00100000L +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00200000L +#define CGTT_TCPI_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00400000L +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x00800000L +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE6_MASK 0x01000000L +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE5_MASK 0x02000000L +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x04000000L +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x08000000L +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE2_MASK 0x10000000L +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE1_MASK 0x20000000L +#define CGTT_TCPI_CLK_CTRL__SOFT_OVERRIDE0_MASK 0x40000000L +//CGTT_GDS_CLK_CTRL +#define CGTT_GDS_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_GDS_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_GDS_CLK_CTRL__UNUSED__SHIFT 0xc +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18 +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19 +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f +#define CGTT_GDS_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_GDS_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_GDS_CLK_CTRL__UNUSED_MASK 0x0000F000L +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_GDS_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x01000000L +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE6_MASK 0x02000000L +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE5_MASK 0x04000000L +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x08000000L +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000L +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000L +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000L +#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000L +//DB_CGTT_CLK_CTRL_0 +#define DB_CGTT_CLK_CTRL_0__ON_DELAY__SHIFT 0x0 +#define DB_CGTT_CLK_CTRL_0__OFF_HYSTERESIS__SHIFT 0x4 +#define DB_CGTT_CLK_CTRL_0__RESERVED__SHIFT 0xc +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE7__SHIFT 0x18 +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE6__SHIFT 0x19 +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE5__SHIFT 0x1a +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE4__SHIFT 0x1b +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE3__SHIFT 0x1c +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE2__SHIFT 0x1d +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE1__SHIFT 0x1e +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE0__SHIFT 0x1f +#define DB_CGTT_CLK_CTRL_0__ON_DELAY_MASK 0x0000000FL +#define DB_CGTT_CLK_CTRL_0__OFF_HYSTERESIS_MASK 0x00000FF0L +#define DB_CGTT_CLK_CTRL_0__RESERVED_MASK 0x0000F000L +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define DB_CGTT_CLK_CTRL_0__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE7_MASK 0x01000000L +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE6_MASK 0x02000000L +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE5_MASK 0x04000000L +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE4_MASK 0x08000000L +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE3_MASK 0x10000000L +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE2_MASK 0x20000000L +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE1_MASK 0x40000000L +#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE0_MASK 0x80000000L +//CB_CGTT_SCLK_CTRL +#define CB_CGTT_SCLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CB_CGTT_SCLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18 +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19 +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f +#define CB_CGTT_SCLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CB_CGTT_SCLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CB_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE7_MASK 0x01000000L +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE6_MASK 0x02000000L +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE5_MASK 0x04000000L +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE4_MASK 0x08000000L +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000L +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000L +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000L +#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000L +//GL2C_CGTT_SCLK_CTRL +#define GL2C_CGTT_SCLK_CTRL__ON_DELAY__SHIFT 0x0 +#define GL2C_CGTT_SCLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18 +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19 +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f +#define GL2C_CGTT_SCLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define GL2C_CGTT_SCLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE7_MASK 0x01000000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE6_MASK 0x02000000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE5_MASK 0x04000000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE4_MASK 0x08000000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000L +#define GL2C_CGTT_SCLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000L +//GL2A_CGTT_SCLK_CTRL +#define GL2A_CGTT_SCLK_CTRL__ON_DELAY__SHIFT 0x0 +#define GL2A_CGTT_SCLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18 +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19 +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f +#define GL2A_CGTT_SCLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define GL2A_CGTT_SCLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE7_MASK 0x01000000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE6_MASK 0x02000000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE5_MASK 0x04000000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE4_MASK 0x08000000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000L +#define GL2A_CGTT_SCLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000L +//GL2A_CGTT_SCLK_CTRL_1 +#define GL2A_CGTT_SCLK_CTRL_1__ON_DELAY__SHIFT 0x0 +#define GL2A_CGTT_SCLK_CTRL_1__OFF_HYSTERESIS__SHIFT 0x4 +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE7__SHIFT 0x18 +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE6__SHIFT 0x19 +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE5__SHIFT 0x1a +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE4__SHIFT 0x1b +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE3__SHIFT 0x1c +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE2__SHIFT 0x1d +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE1__SHIFT 0x1e +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE0__SHIFT 0x1f +#define GL2A_CGTT_SCLK_CTRL_1__ON_DELAY_MASK 0x0000000FL +#define GL2A_CGTT_SCLK_CTRL_1__OFF_HYSTERESIS_MASK 0x00000FF0L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE7_MASK 0x01000000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE6_MASK 0x02000000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE5_MASK 0x04000000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE4_MASK 0x08000000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE3_MASK 0x10000000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE2_MASK 0x20000000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE1_MASK 0x40000000L +#define GL2A_CGTT_SCLK_CTRL_1__SOFT_OVERRIDE0_MASK 0x80000000L +//CGTT_CP_CLK_CTRL +#define CGTT_CP_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_CP_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_CP_CLK_CTRL__MGLS_OVERRIDE__SHIFT 0xf +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_CP_CLK_CTRL__SOFT_OVERRIDE_PERFMON__SHIFT 0x1d +#define CGTT_CP_CLK_CTRL__SOFT_OVERRIDE_DYN__SHIFT 0x1e +#define CGTT_CP_CLK_CTRL__SOFT_OVERRIDE_REG__SHIFT 0x1f +#define CGTT_CP_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_CP_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_CP_CLK_CTRL__MGLS_OVERRIDE_MASK 0x00008000L +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_CP_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_CP_CLK_CTRL__SOFT_OVERRIDE_PERFMON_MASK 0x20000000L +#define CGTT_CP_CLK_CTRL__SOFT_OVERRIDE_DYN_MASK 0x40000000L +#define CGTT_CP_CLK_CTRL__SOFT_OVERRIDE_REG_MASK 0x80000000L +//CGTT_CPF_CLK_CTRL +#define CGTT_CPF_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_CPF_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_CPF_CLK_CTRL__MGLS_OVERRIDE__SHIFT 0xf +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_PERFMON__SHIFT 0x1a +#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_PRT__SHIFT 0x1b +#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_CMP__SHIFT 0x1c +#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_GFX__SHIFT 0x1d +#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_DYN__SHIFT 0x1e +#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_REG__SHIFT 0x1f +#define CGTT_CPF_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_CPF_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_CPF_CLK_CTRL__MGLS_OVERRIDE_MASK 0x00008000L +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_CPF_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_PERFMON_MASK 0x04000000L +#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_PRT_MASK 0x08000000L +#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_CMP_MASK 0x10000000L +#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_GFX_MASK 0x20000000L +#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_DYN_MASK 0x40000000L +#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_REG_MASK 0x80000000L +//CGTT_CPC_CLK_CTRL +#define CGTT_CPC_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_CPC_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_CPC_CLK_CTRL__MGLS_OVERRIDE__SHIFT 0xf +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_CPC_CLK_CTRL__SOFT_OVERRIDE_PERFMON__SHIFT 0x1d +#define CGTT_CPC_CLK_CTRL__SOFT_OVERRIDE_DYN__SHIFT 0x1e +#define CGTT_CPC_CLK_CTRL__SOFT_OVERRIDE_REG__SHIFT 0x1f +#define CGTT_CPC_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_CPC_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_CPC_CLK_CTRL__MGLS_OVERRIDE_MASK 0x00008000L +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_CPC_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_CPC_CLK_CTRL__SOFT_OVERRIDE_PERFMON_MASK 0x20000000L +#define CGTT_CPC_CLK_CTRL__SOFT_OVERRIDE_DYN_MASK 0x40000000L +#define CGTT_CPC_CLK_CTRL__SOFT_OVERRIDE_REG_MASK 0x80000000L +//CGTT_RLC_CLK_CTRL +#define CGTT_RLC_CLK_CTRL__RESERVED__SHIFT 0x0 +#define CGTT_RLC_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define CGTT_RLC_CLK_CTRL__SOFT_OVERRIDE_DYN__SHIFT 0x1e +#define CGTT_RLC_CLK_CTRL__SOFT_OVERRIDE_REG__SHIFT 0x1f +#define CGTT_RLC_CLK_CTRL__RESERVED_MASK 0x0000000FL +#define CGTT_RLC_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_RLC_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define CGTT_RLC_CLK_CTRL__SOFT_OVERRIDE_DYN_MASK 0x40000000L +#define CGTT_RLC_CLK_CTRL__SOFT_OVERRIDE_REG_MASK 0x80000000L //RLC_GFX_RM_CNTL #define RLC_GFX_RM_CNTL__RLC_GFX_RM_VALID__SHIFT 0x0 #define RLC_GFX_RM_CNTL__RESERVED__SHIFT 0x1 #define RLC_GFX_RM_CNTL__RLC_GFX_RM_VALID_MASK 0x00000001L #define RLC_GFX_RM_CNTL__RESERVED_MASK 0xFFFFFFFEL - +//RMI_CGTT_SCLK_CTRL +#define RMI_CGTT_SCLK_CTRL__ON_DELAY__SHIFT 0x0 +#define RMI_CGTT_SCLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19 +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f +#define RMI_CGTT_SCLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define RMI_CGTT_SCLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define RMI_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE6_MASK 0x02000000L +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE5_MASK 0x04000000L +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE4_MASK 0x08000000L +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000L +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000L +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000L +#define RMI_CGTT_SCLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000L +//CGTT_TCPF_CLK_CTRL +#define CGTT_TCPF_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define CGTT_TCPF_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_TCPF_CLK_CTRL__SPARE__SHIFT 0xc +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0xf +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x10 +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x11 +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x12 +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x13 +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x14 +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x15 +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x16 +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x17 +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x18 +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x19 +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1a +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1b +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1c +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1d +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1e +#define CGTT_TCPF_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define CGTT_TCPF_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_TCPF_CLK_CTRL__SPARE_MASK 0x00007000L +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00008000L +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00010000L +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00020000L +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00040000L +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00080000L +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00100000L +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00200000L +#define CGTT_TCPF_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00400000L +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x00800000L +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE6_MASK 0x01000000L +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE5_MASK 0x02000000L +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x04000000L +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x08000000L +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE2_MASK 0x10000000L +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE1_MASK 0x20000000L +#define CGTT_TCPF_CLK_CTRL__SOFT_OVERRIDE0_MASK 0x40000000L +//GCR_CGTT_SCLK_CTRL +#define GCR_CGTT_SCLK_CTRL__ON_DELAY__SHIFT 0x0 +#define GCR_CGTT_SCLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19 +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f +#define GCR_CGTT_SCLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define GCR_CGTT_SCLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define GCR_CGTT_SCLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE6_MASK 0x02000000L +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE5_MASK 0x04000000L +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE4_MASK 0x08000000L +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000L +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000L +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000L +#define GCR_CGTT_SCLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000L +//UTCL1_CGTT_CLK_CTRL +#define UTCL1_CGTT_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define UTCL1_CGTT_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19 +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f +#define UTCL1_CGTT_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define UTCL1_CGTT_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE6_MASK 0x02000000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE5_MASK 0x04000000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x08000000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000L +#define UTCL1_CGTT_CLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000L +//GCEA_CGTT_CLK_CTRL +#define GCEA_CGTT_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define GCEA_CGTT_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define GCEA_CGTT_CLK_CTRL__SPARE0__SHIFT 0xc +#define GCEA_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_WRITE__SHIFT 0x14 +#define GCEA_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_READ__SHIFT 0x15 +#define GCEA_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_RETURN__SHIFT 0x16 +#define GCEA_CGTT_CLK_CTRL__SPARE1__SHIFT 0x17 +#define GCEA_CGTT_CLK_CTRL__SOFT_OVERRIDE_WRITE__SHIFT 0x1c +#define GCEA_CGTT_CLK_CTRL__SOFT_OVERRIDE_READ__SHIFT 0x1d +#define GCEA_CGTT_CLK_CTRL__SOFT_OVERRIDE_RETURN__SHIFT 0x1e +#define GCEA_CGTT_CLK_CTRL__SOFT_OVERRIDE_REGISTER__SHIFT 0x1f +#define GCEA_CGTT_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define GCEA_CGTT_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define GCEA_CGTT_CLK_CTRL__SPARE0_MASK 0x000FF000L +#define GCEA_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_WRITE_MASK 0x00100000L +#define GCEA_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_READ_MASK 0x00200000L +#define GCEA_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_RETURN_MASK 0x00400000L +#define GCEA_CGTT_CLK_CTRL__SPARE1_MASK 0x0F800000L +#define GCEA_CGTT_CLK_CTRL__SOFT_OVERRIDE_WRITE_MASK 0x10000000L +#define GCEA_CGTT_CLK_CTRL__SOFT_OVERRIDE_READ_MASK 0x20000000L +#define GCEA_CGTT_CLK_CTRL__SOFT_OVERRIDE_RETURN_MASK 0x40000000L +#define GCEA_CGTT_CLK_CTRL__SOFT_OVERRIDE_REGISTER_MASK 0x80000000L +//SE_CAC_CGTT_CLK_CTRL +#define SE_CAC_CGTT_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define SE_CAC_CGTT_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define SE_CAC_CGTT_CLK_CTRL__SOFT_OVERRIDE_DYN__SHIFT 0x1e +#define SE_CAC_CGTT_CLK_CTRL__SOFT_OVERRIDE_REG__SHIFT 0x1f +#define SE_CAC_CGTT_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define SE_CAC_CGTT_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define SE_CAC_CGTT_CLK_CTRL__SOFT_OVERRIDE_DYN_MASK 0x40000000L +#define SE_CAC_CGTT_CLK_CTRL__SOFT_OVERRIDE_REG_MASK 0x80000000L +//GC_CAC_CGTT_CLK_CTRL +#define GC_CAC_CGTT_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define GC_CAC_CGTT_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define GC_CAC_CGTT_CLK_CTRL__SOFT_OVERRIDE_DYN__SHIFT 0x1e +#define GC_CAC_CGTT_CLK_CTRL__SOFT_OVERRIDE_REG__SHIFT 0x1f +#define GC_CAC_CGTT_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define GC_CAC_CGTT_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define GC_CAC_CGTT_CLK_CTRL__SOFT_OVERRIDE_DYN_MASK 0x40000000L +#define GC_CAC_CGTT_CLK_CTRL__SOFT_OVERRIDE_REG_MASK 0x80000000L +//GRBM_CGTT_CLK_CNTL +#define GRBM_CGTT_CLK_CNTL__ON_DELAY__SHIFT 0x0 +#define GRBM_CGTT_CLK_CNTL__OFF_HYSTERESIS__SHIFT 0x4 +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE0__SHIFT 0x17 +#define GRBM_CGTT_CLK_CNTL__SOFT_OVERRIDE_DYN__SHIFT 0x1e +#define GRBM_CGTT_CLK_CNTL__ON_DELAY_MASK 0x0000000FL +#define GRBM_CGTT_CLK_CNTL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define GRBM_CGTT_CLK_CNTL__SOFT_STALL_OVERRIDE0_MASK 0x00800000L +#define GRBM_CGTT_CLK_CNTL__SOFT_OVERRIDE_DYN_MASK 0x40000000L +//GUS_CGTT_CLK_CTRL +#define GUS_CGTT_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define GUS_CGTT_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define GUS_CGTT_CLK_CTRL__SPARE0__SHIFT 0xc +#define GUS_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_DRAM__SHIFT 0x13 +#define GUS_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_WRITE__SHIFT 0x14 +#define GUS_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_READ__SHIFT 0x15 +#define GUS_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_RETURN__SHIFT 0x16 +#define GUS_CGTT_CLK_CTRL__SPARE1__SHIFT 0x17 +#define GUS_CGTT_CLK_CTRL__SOFT_OVERRIDE_DRAM__SHIFT 0x1b +#define GUS_CGTT_CLK_CTRL__SOFT_OVERRIDE_WRITE__SHIFT 0x1c +#define GUS_CGTT_CLK_CTRL__SOFT_OVERRIDE_READ__SHIFT 0x1d +#define GUS_CGTT_CLK_CTRL__SOFT_OVERRIDE_RETURN__SHIFT 0x1e +#define GUS_CGTT_CLK_CTRL__SOFT_OVERRIDE_REGISTER__SHIFT 0x1f +#define GUS_CGTT_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define GUS_CGTT_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define GUS_CGTT_CLK_CTRL__SPARE0_MASK 0x0007F000L +#define GUS_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_DRAM_MASK 0x00080000L +#define GUS_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_WRITE_MASK 0x00100000L +#define GUS_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_READ_MASK 0x00200000L +#define GUS_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_RETURN_MASK 0x00400000L +#define GUS_CGTT_CLK_CTRL__SPARE1_MASK 0x07800000L +#define GUS_CGTT_CLK_CTRL__SOFT_OVERRIDE_DRAM_MASK 0x08000000L +#define GUS_CGTT_CLK_CTRL__SOFT_OVERRIDE_WRITE_MASK 0x10000000L +#define GUS_CGTT_CLK_CTRL__SOFT_OVERRIDE_READ_MASK 0x20000000L +#define GUS_CGTT_CLK_CTRL__SOFT_OVERRIDE_RETURN_MASK 0x40000000L +#define GUS_CGTT_CLK_CTRL__SOFT_OVERRIDE_REGISTER_MASK 0x80000000L +//CGTT_PH_CLK_CTRL0 +#define CGTT_PH_CLK_CTRL0__ON_DELAY__SHIFT 0x0 +#define CGTT_PH_CLK_CTRL0__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE7__SHIFT 0x10 +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE6__SHIFT 0x11 +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE5__SHIFT 0x12 +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE4__SHIFT 0x13 +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE3__SHIFT 0x14 +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE2__SHIFT 0x15 +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE1__SHIFT 0x16 +#define CGTT_PH_CLK_CTRL0__SOFT_OVERRIDE7__SHIFT 0x18 +#define CGTT_PH_CLK_CTRL0__SOFT_OVERRIDE6__SHIFT 0x19 +#define CGTT_PH_CLK_CTRL0__SOFT_OVERRIDE5__SHIFT 0x1a +#define CGTT_PH_CLK_CTRL0__SOFT_OVERRIDE4__SHIFT 0x1b +#define CGTT_PH_CLK_CTRL0__SOFT_OVERRIDE3__SHIFT 0x1c +#define CGTT_PH_CLK_CTRL0__SOFT_OVERRIDE2__SHIFT 0x1d +#define CGTT_PH_CLK_CTRL0__PERFMON_CLK_OVERRIDE__SHIFT 0x1e +#define CGTT_PH_CLK_CTRL0__REG_CLK_OVERRIDE__SHIFT 0x1f +#define CGTT_PH_CLK_CTRL0__ON_DELAY_MASK 0x0000000FL +#define CGTT_PH_CLK_CTRL0__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE7_MASK 0x00010000L +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE6_MASK 0x00020000L +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE5_MASK 0x00040000L +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE4_MASK 0x00080000L +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE3_MASK 0x00100000L +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE2_MASK 0x00200000L +#define CGTT_PH_CLK_CTRL0__SOFT_STALL_OVERRIDE1_MASK 0x00400000L +#define CGTT_PH_CLK_CTRL0__SOFT_OVERRIDE7_MASK 0x01000000L +#define CGTT_PH_CLK_CTRL0__SOFT_OVERRIDE6_MASK 0x02000000L +#define CGTT_PH_CLK_CTRL0__SOFT_OVERRIDE5_MASK 0x04000000L +#define CGTT_PH_CLK_CTRL0__SOFT_OVERRIDE4_MASK 0x08000000L +#define CGTT_PH_CLK_CTRL0__SOFT_OVERRIDE3_MASK 0x10000000L +#define CGTT_PH_CLK_CTRL0__SOFT_OVERRIDE2_MASK 0x20000000L +#define CGTT_PH_CLK_CTRL0__PERFMON_CLK_OVERRIDE_MASK 0x40000000L +#define CGTT_PH_CLK_CTRL0__REG_CLK_OVERRIDE_MASK 0x80000000L +//CGTT_PH_CLK_CTRL1 +#define CGTT_PH_CLK_CTRL1__ON_DELAY__SHIFT 0x0 +#define CGTT_PH_CLK_CTRL1__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE7__SHIFT 0x18 +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE6__SHIFT 0x19 +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE5__SHIFT 0x1a +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE4__SHIFT 0x1b +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE3__SHIFT 0x1c +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE2__SHIFT 0x1d +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE1__SHIFT 0x1e +#define CGTT_PH_CLK_CTRL1__ON_DELAY_MASK 0x0000000FL +#define CGTT_PH_CLK_CTRL1__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE7_MASK 0x01000000L +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE6_MASK 0x02000000L +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE5_MASK 0x04000000L +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE4_MASK 0x08000000L +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE3_MASK 0x10000000L +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE2_MASK 0x20000000L +#define CGTT_PH_CLK_CTRL1__SOFT_OVERRIDE1_MASK 0x40000000L +//CGTT_PH_CLK_CTRL2 +#define CGTT_PH_CLK_CTRL2__ON_DELAY__SHIFT 0x0 +#define CGTT_PH_CLK_CTRL2__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE7__SHIFT 0x18 +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE6__SHIFT 0x19 +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE5__SHIFT 0x1a +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE4__SHIFT 0x1b +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE3__SHIFT 0x1c +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE2__SHIFT 0x1d +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE1__SHIFT 0x1e +#define CGTT_PH_CLK_CTRL2__ON_DELAY_MASK 0x0000000FL +#define CGTT_PH_CLK_CTRL2__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE7_MASK 0x01000000L +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE6_MASK 0x02000000L +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE5_MASK 0x04000000L +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE4_MASK 0x08000000L +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE3_MASK 0x10000000L +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE2_MASK 0x20000000L +#define CGTT_PH_CLK_CTRL2__SOFT_OVERRIDE1_MASK 0x40000000L +//CGTT_PH_CLK_CTRL3 +#define CGTT_PH_CLK_CTRL3__ON_DELAY__SHIFT 0x0 +#define CGTT_PH_CLK_CTRL3__OFF_HYSTERESIS__SHIFT 0x4 +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE7__SHIFT 0x18 +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE6__SHIFT 0x19 +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE5__SHIFT 0x1a +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE4__SHIFT 0x1b +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE3__SHIFT 0x1c +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE2__SHIFT 0x1d +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE1__SHIFT 0x1e +#define CGTT_PH_CLK_CTRL3__ON_DELAY_MASK 0x0000000FL +#define CGTT_PH_CLK_CTRL3__OFF_HYSTERESIS_MASK 0x00000FF0L +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE7_MASK 0x01000000L +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE6_MASK 0x02000000L +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE5_MASK 0x04000000L +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE4_MASK 0x08000000L +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE3_MASK 0x10000000L +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE2_MASK 0x20000000L +#define CGTT_PH_CLK_CTRL3__SOFT_OVERRIDE1_MASK 0x40000000L // addressBlock: gc_hypdec //CP_HYP_PFP_UCODE_ADDR From 46c676600c715f833b066581247cd5a461e03441 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Tue, 6 Sep 2022 23:05:11 -0400 Subject: [PATCH 06/88] drm/amdgpu: Use per device reset_domain for XGMI on sriov configuration For SRIOV configuration, host driver control the reset method(either FLR or heavier chain reset). The host will notify the guest individually with FLR message if individual GPU within the hive need to be reset. So for guest side, no need to use hive->reset_domain to replace the original per device reset_domain Signed-off-by: shaoyunl Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 22 +++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 37 ++++++++++++++-------- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 62b26f0e37b0..e21804362995 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2453,17 +2453,19 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (amdgpu_xgmi_add_device(adev) == 0) { struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); - if (!hive->reset_domain || - !amdgpu_reset_get_reset_domain(hive->reset_domain)) { - r = -ENOENT; - amdgpu_put_xgmi_hive(hive); - goto init_failed; - } + if (!amdgpu_sriov_vf(adev)) { + if (!hive->reset_domain || + !amdgpu_reset_get_reset_domain(hive->reset_domain)) { + r = -ENOENT; + amdgpu_put_xgmi_hive(hive); + goto init_failed; + } - /* Drop the early temporary reset domain we created for device */ - amdgpu_reset_put_reset_domain(adev->reset_domain); - adev->reset_domain = hive->reset_domain; - amdgpu_put_xgmi_hive(hive); + /* Drop the early temporary reset domain we created for device */ + amdgpu_reset_put_reset_domain(adev->reset_domain); + adev->reset_domain = hive->reset_domain; + amdgpu_put_xgmi_hive(hive); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index d3b483aa81f8..10477ca52a41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -391,24 +391,33 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) goto pro_end; } + /** + * Only init hive->reset_domain for none SRIOV configuration. For SRIOV, + * Host driver decide how to reset the GPU either through FLR or chain reset. + * Guest side will get individual notifications from the host for the FLR + * if necessary. + */ + if (!amdgpu_sriov_vf(adev)) { /** * Avoid recreating reset domain when hive is reconstructed for the case - * of reset the devices in the XGMI hive during probe for SRIOV + * of reset the devices in the XGMI hive during probe for passthrough GPU * See https://www.spinics.net/lists/amd-gfx/msg58836.html */ - if (adev->reset_domain->type != XGMI_HIVE) { - hive->reset_domain = amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive"); - if (!hive->reset_domain) { - dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n"); - ret = -ENOMEM; - kobject_put(&hive->kobj); - kfree(hive); - hive = NULL; - goto pro_end; - } - } else { - amdgpu_reset_get_reset_domain(adev->reset_domain); - hive->reset_domain = adev->reset_domain; + if (adev->reset_domain->type != XGMI_HIVE) { + hive->reset_domain = + amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive"); + if (!hive->reset_domain) { + dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n"); + ret = -ENOMEM; + kobject_put(&hive->kobj); + kfree(hive); + hive = NULL; + goto pro_end; + } + } else { + amdgpu_reset_get_reset_domain(adev->reset_domain); + hive->reset_domain = adev->reset_domain; + } } hive->hive_id = adev->gmc.xgmi.hive_id; From 7a3f8b7c4cdb9d678d6311ca93e6b793e8957295 Mon Sep 17 00:00:00 2001 From: wangjianli Date: Thu, 8 Sep 2022 21:17:12 +0800 Subject: [PATCH 07/88] amd/amdkfd: fix repeated words in comments Delete the redundant word 'to'. Signed-off-by: wangjianli Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 24b414cff3ec..cd5f8b219bf9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -2284,7 +2284,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, /* Fill in Subtype: IO_LINKS * Only direct links are added here which is Link from GPU to - * to its NUMA node. Indirect links are added by userspace. + * its NUMA node. Indirect links are added by userspace. */ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + cache_mem_filled); From e1f84eef313f4820cca068a238c645d0a38c6a9b Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 7 Sep 2022 12:30:12 -0400 Subject: [PATCH 08/88] drm/amdkfd: handle CPU fault on COW mapping If CPU page fault in a page with zone_device_data svm_bo from another process, that means it is COW mapping in the child process and the range is migrated to VRAM by parent process. Migrate the parent process range back to system memory to recover the CPU page fault. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 42 ++++++++++++++++-------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index b059a77b6081..6555d775a532 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -886,7 +886,7 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) { unsigned long addr = vmf->address; - struct vm_area_struct *vma; + struct svm_range_bo *svm_bo; enum svm_work_list_ops op; struct svm_range *parent; struct svm_range *prange; @@ -894,29 +894,42 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) struct mm_struct *mm; int r = 0; - vma = vmf->vma; - mm = vma->vm_mm; + svm_bo = vmf->page->zone_device_data; + if (!svm_bo) { + pr_debug("failed get device page at addr 0x%lx\n", addr); + return VM_FAULT_SIGBUS; + } + if (!mmget_not_zero(svm_bo->eviction_fence->mm)) { + pr_debug("addr 0x%lx of process mm is detroyed\n", addr); + return VM_FAULT_SIGBUS; + } - p = kfd_lookup_process_by_mm(vma->vm_mm); + mm = svm_bo->eviction_fence->mm; + if (mm != vmf->vma->vm_mm) + pr_debug("addr 0x%lx is COW mapping in child process\n", addr); + + p = kfd_lookup_process_by_mm(mm); if (!p) { pr_debug("failed find process at fault address 0x%lx\n", addr); - return VM_FAULT_SIGBUS; + r = VM_FAULT_SIGBUS; + goto out_mmput; } if (READ_ONCE(p->svms.faulting_task) == current) { pr_debug("skipping ram migration\n"); - kfd_unref_process(p); - return 0; + r = 0; + goto out_unref_process; } - addr >>= PAGE_SHIFT; + pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr); + addr >>= PAGE_SHIFT; mutex_lock(&p->svms.lock); prange = svm_range_from_addr(&p->svms, addr, &parent); if (!prange) { - pr_debug("cannot find svm range at 0x%lx\n", addr); + pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr); r = -EFAULT; - goto out; + goto out_unlock_svms; } mutex_lock(&parent->migrate_mutex); @@ -940,8 +953,8 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU); if (r) - pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, - prange, prange->start, prange->last); + pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n", + r, prange->svms, prange, prange->start, prange->last); /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ if (p->xnack_enabled && parent == prange) @@ -955,9 +968,12 @@ out_unlock_prange: if (prange != parent) mutex_unlock(&prange->migrate_mutex); mutex_unlock(&parent->migrate_mutex); -out: +out_unlock_svms: mutex_unlock(&p->svms.lock); +out_unref_process: kfd_unref_process(p); +out_mmput: + mmput(mm); pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); From c969c5fd21fd700b126aa139ec57875697946474 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 26 Jul 2022 15:13:30 -0400 Subject: [PATCH 09/88] drm/amdkfd: Remove prefault before migrating to VRAM Prefaulting potentially allocates system memory pages before a migration. This adds unnecessary overhead. Instead we can skip unallocated pages in the migration and just point migrate->dst to a 0-initialized VRAM page directly. Then the VRAM page will be inserted to the PTE. A subsequent CPU page fault will migrate the page back to system memory. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 12 +++++------- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 22 ---------------------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 -- 3 files changed, 5 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 6555d775a532..929dec1da0e4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -322,12 +322,13 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, for (i = j = 0; i < npages; i++) { struct page *spage; + dst[i] = cursor.start + (j << PAGE_SHIFT); + migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); + svm_migrate_get_vram_page(prange, migrate->dst[i]); + migrate->dst[i] = migrate_pfn(migrate->dst[i]); + spage = migrate_pfn_to_page(migrate->src[i]); if (spage && !is_zone_device_page(spage)) { - dst[i] = cursor.start + (j << PAGE_SHIFT); - migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); - svm_migrate_get_vram_page(prange, migrate->dst[i]); - migrate->dst[i] = migrate_pfn(migrate->dst[i]); src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_TO_DEVICE); r = dma_mapping_error(dev, src[i]); @@ -522,9 +523,6 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, prange->start, prange->last, best_loc); - /* FIXME: workaround for page locking bug with invalid pages */ - svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev)); - start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 11074cc8c333..cf5b4005534c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3181,28 +3181,6 @@ out: return best_loc; } -/* FIXME: This is a workaround for page locking bug when some pages are - * invalid during migration to VRAM - */ -void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm, - void *owner) -{ - struct hmm_range *hmm_range; - int r; - - if (prange->validated_once) - return; - - r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, - prange->start << PAGE_SHIFT, - prange->npages, &hmm_range, - false, true, owner); - if (!r) { - amdgpu_hmm_range_get_pages_done(hmm_range); - prange->validated_once = true; - } -} - /* svm_range_trigger_migration - start page migration if prefetch loc changed * @mm: current process mm_struct * @prange: svm range structure diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index cfac13ad06ef..012c53729516 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -181,8 +181,6 @@ void schedule_deferred_list_work(struct svm_range_list *svms); void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, unsigned long offset, unsigned long npages); void svm_range_free_dma_mappings(struct svm_range *prange); -void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm, - void *owner); int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges, uint64_t *svm_priv_data_size); int kfd_criu_checkpoint_svm(struct kfd_process *p, From 9dea5dd0e9dc65c9f5fd9390062381b17ef242b2 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 23 Aug 2022 09:39:56 -0400 Subject: [PATCH 10/88] drm/amd/display: update header files [why] update header files, and remove not used register access marco Reviewed-by: Hansen Dsouza Acked-by: Pavle Kotarac Signed-off-by: Charlene Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c | 3 ++- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h | 8 -------- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.h | 10 ---------- 3 files changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c index 694260c10a01..ccd91792991b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c @@ -215,7 +215,8 @@ void mmhubbub2_config_mcif_irq(struct mcif_wb *mcif_wb, REG_UPDATE(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB_BUFMGR_SW_OVERRUN_INT_EN, params->sw_overrun_int_en); REG_UPDATE(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_INT_EN, params->vce_int_en); - REG_UPDATE(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_SLICE_INT_EN, params->vce_slice_int_en); + if (mcif_wb20->mcif_wb_mask->MCIF_WB_BUFMGR_VCE_SLICE_INT_EN) + REG_UPDATE(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_SLICE_INT_EN, params->vce_slice_int_en); } void mmhubbub2_enable_mcif(struct mcif_wb *mcif_wb) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h index f2580e65196c..7446e54bf5aa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h @@ -227,11 +227,7 @@ SF(MCIF_WB0_MCIF_WB_BUF_3_ADDR_C, MCIF_WB_BUF_3_ADDR_C, mask_sh),\ SF(MCIF_WB0_MCIF_WB_BUF_4_ADDR_Y, MCIF_WB_BUF_4_ADDR_Y, mask_sh),\ SF(MCIF_WB0_MCIF_WB_BUF_4_ADDR_C, MCIF_WB_BUF_4_ADDR_C, mask_sh),\ - SF(MCIF_WB0_MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_LOCK_IGNORE, mask_sh),\ - SF(MCIF_WB0_MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_INT_EN, mask_sh),\ - SF(MCIF_WB0_MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_INT_ACK, mask_sh),\ SF(MCIF_WB0_MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_SLICE_INT_EN, mask_sh),\ - SF(MCIF_WB0_MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_LOCK, mask_sh),\ SF(MCIF_WB0_MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_SLICE_SIZE, mask_sh),\ SF(MCIF_WB_NB_PSTATE_LATENCY_WATERMARK, NB_PSTATE_CHANGE_REFRESH_WATERMARK, mask_sh),\ SF(MCIF_WB_NB_PSTATE_LATENCY_WATERMARK, NB_PSTATE_CHANGE_WATERMARK_MASK, mask_sh),\ @@ -363,11 +359,7 @@ SF(MCIF_WB_BUF_3_ADDR_C, MCIF_WB_BUF_3_ADDR_C, mask_sh),\ SF(MCIF_WB_BUF_4_ADDR_Y, MCIF_WB_BUF_4_ADDR_Y, mask_sh),\ SF(MCIF_WB_BUF_4_ADDR_C, MCIF_WB_BUF_4_ADDR_C, mask_sh),\ - SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_LOCK_IGNORE, mask_sh),\ - SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_INT_EN, mask_sh),\ - SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_INT_ACK, mask_sh),\ SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_SLICE_INT_EN, mask_sh),\ - SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_LOCK, mask_sh),\ SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_SLICE_SIZE, mask_sh),\ SF(MCIF_WB_NB_PSTATE_LATENCY_WATERMARK, NB_PSTATE_CHANGE_REFRESH_WATERMARK, mask_sh),\ SF(MCIF_WB_NB_PSTATE_LATENCY_WATERMARK, NB_PSTATE_CHANGE_WATERMARK_MASK, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.h index 22355051f5f7..e460cf8d9041 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.h @@ -90,7 +90,6 @@ SF(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB_BUFMGR_SW_OVERRUN_INT_EN, mask_sh),\ SF(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB_BUFMGR_SW_LOCK, mask_sh),\ SF(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB_BUF_ADDR_FENCE_EN, mask_sh),\ - SF(MCIF_WB_BUFMGR_STATUS, MCIF_WB_BUFMGR_VCE_INT_STATUS, mask_sh),\ SF(MCIF_WB_BUFMGR_STATUS, MCIF_WB_BUFMGR_SW_INT_STATUS, mask_sh),\ SF(MCIF_WB_BUFMGR_STATUS, MCIF_WB_BUFMGR_SW_OVERRUN_INT_STATUS, mask_sh),\ SF(MCIF_WB_BUFMGR_STATUS, MCIF_WB_BUFMGR_CUR_BUF, mask_sh),\ @@ -101,7 +100,6 @@ SF(MCIF_WB_BUF_PITCH, MCIF_WB_BUF_CHROMA_PITCH, mask_sh),\ SF(MCIF_WB_BUF_1_STATUS, MCIF_WB_BUF_1_ACTIVE, mask_sh),\ SF(MCIF_WB_BUF_1_STATUS, MCIF_WB_BUF_1_SW_LOCKED, mask_sh),\ - SF(MCIF_WB_BUF_1_STATUS, MCIF_WB_BUF_1_VCE_LOCKED, mask_sh),\ SF(MCIF_WB_BUF_1_STATUS, MCIF_WB_BUF_1_OVERFLOW, mask_sh),\ SF(MCIF_WB_BUF_1_STATUS, MCIF_WB_BUF_1_DISABLE, mask_sh),\ SF(MCIF_WB_BUF_1_STATUS, MCIF_WB_BUF_1_MODE, mask_sh),\ @@ -116,7 +114,6 @@ SF(MCIF_WB_BUF_1_STATUS2, MCIF_WB_BUF_1_C_OVERRUN, mask_sh),\ SF(MCIF_WB_BUF_2_STATUS, MCIF_WB_BUF_2_ACTIVE, mask_sh),\ SF(MCIF_WB_BUF_2_STATUS, MCIF_WB_BUF_2_SW_LOCKED, mask_sh),\ - SF(MCIF_WB_BUF_2_STATUS, MCIF_WB_BUF_2_VCE_LOCKED, mask_sh),\ SF(MCIF_WB_BUF_2_STATUS, MCIF_WB_BUF_2_OVERFLOW, mask_sh),\ SF(MCIF_WB_BUF_2_STATUS, MCIF_WB_BUF_2_DISABLE, mask_sh),\ SF(MCIF_WB_BUF_2_STATUS, MCIF_WB_BUF_2_MODE, mask_sh),\ @@ -131,7 +128,6 @@ SF(MCIF_WB_BUF_2_STATUS2, MCIF_WB_BUF_2_C_OVERRUN, mask_sh),\ SF(MCIF_WB_BUF_3_STATUS, MCIF_WB_BUF_3_ACTIVE, mask_sh),\ SF(MCIF_WB_BUF_3_STATUS, MCIF_WB_BUF_3_SW_LOCKED, mask_sh),\ - SF(MCIF_WB_BUF_3_STATUS, MCIF_WB_BUF_3_VCE_LOCKED, mask_sh),\ SF(MCIF_WB_BUF_3_STATUS, MCIF_WB_BUF_3_OVERFLOW, mask_sh),\ SF(MCIF_WB_BUF_3_STATUS, MCIF_WB_BUF_3_DISABLE, mask_sh),\ SF(MCIF_WB_BUF_3_STATUS, MCIF_WB_BUF_3_MODE, mask_sh),\ @@ -146,7 +142,6 @@ SF(MCIF_WB_BUF_3_STATUS2, MCIF_WB_BUF_3_C_OVERRUN, mask_sh),\ SF(MCIF_WB_BUF_4_STATUS, MCIF_WB_BUF_4_ACTIVE, mask_sh),\ SF(MCIF_WB_BUF_4_STATUS, MCIF_WB_BUF_4_SW_LOCKED, mask_sh),\ - SF(MCIF_WB_BUF_4_STATUS, MCIF_WB_BUF_4_VCE_LOCKED, mask_sh),\ SF(MCIF_WB_BUF_4_STATUS, MCIF_WB_BUF_4_OVERFLOW, mask_sh),\ SF(MCIF_WB_BUF_4_STATUS, MCIF_WB_BUF_4_DISABLE, mask_sh),\ SF(MCIF_WB_BUF_4_STATUS, MCIF_WB_BUF_4_MODE, mask_sh),\ @@ -172,11 +167,6 @@ SF(MCIF_WB_BUF_3_ADDR_C, MCIF_WB_BUF_3_ADDR_C, mask_sh),\ SF(MCIF_WB_BUF_4_ADDR_Y, MCIF_WB_BUF_4_ADDR_Y, mask_sh),\ SF(MCIF_WB_BUF_4_ADDR_C, MCIF_WB_BUF_4_ADDR_C, mask_sh),\ - SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_LOCK_IGNORE, mask_sh),\ - SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_INT_EN, mask_sh),\ - SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_INT_ACK, mask_sh),\ - SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_SLICE_INT_EN, mask_sh),\ - SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_VCE_LOCK, mask_sh),\ SF(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB_BUFMGR_SLICE_SIZE, mask_sh),\ SF(MCIF_WB_NB_PSTATE_LATENCY_WATERMARK, NB_PSTATE_CHANGE_REFRESH_WATERMARK, mask_sh),\ SF(MCIF_WB_NB_PSTATE_LATENCY_WATERMARK, NB_PSTATE_CHANGE_WATERMARK_MASK, mask_sh),\ From d27ec594b48d9d2842af9e3fdf3159bfdac1b1f8 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Wed, 7 Sep 2022 15:56:27 +0800 Subject: [PATCH 11/88] drm/amdgpu: Rely on MCUMC_STATUS for umc v8_10 correctable error counter only Only check MCUMC_STATUS for CE counter for umc v8_10. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v8_10.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index 36a2053f2e8b..a8cbda81828d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -101,22 +101,16 @@ static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset, unsigned long *error_count) { - uint32_t ecc_err_cnt, ecc_err_cnt_addr; uint64_t mc_umc_status; uint32_t mc_umc_status_addr; /* UMC 8_10 registers */ - ecc_err_cnt_addr = - SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); - ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); - *error_count += - (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) - - UMC_V8_10_CE_CNT_INIT); - - /* Check for SRAM correctable error, MCUMC_STATUS is a 64 bit register */ + /* Rely on MCUMC_STATUS for correctable error counter + * MCUMC_STATUS is a 64 bit register + */ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) From 34dfca890835ca9f4a59c8a2ff6be8c0513637c7 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Wed, 7 Sep 2022 15:58:59 +0800 Subject: [PATCH 12/88] drm/amdgpu: Enable full reset when RAS is supported on gc v11_0_0 Enable full reset for RAS supported configuration on gc v11_0_0. v2: simplify the code. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index a26c5723c46e..5f0d6983714a 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -421,6 +421,7 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev) { switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): + return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC); case IP_VERSION(11, 0, 2): return false; default: From cd3a49af58c23dd5ffa1d4d5a120ee2354d2e8d8 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 9 Sep 2022 11:06:50 +0800 Subject: [PATCH 13/88] drm/amdgpu: change the alignment size of TMR BO to 1M align TMR BO size TO tmr size is not necessary, modify the size to 1M to avoid re-create BO fail when serious VRAM fragmentation. v2: add new macro PSP_TMR_ALIGNMENT for TMR BO alignment size Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index cfcaf890a6a1..e430a3142310 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -766,7 +766,7 @@ static int psp_tmr_init(struct psp_context *psp) } pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE(psp->adev), + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, AMDGPU_GEM_DOMAIN_VRAM, &psp->tmr_bo, &psp->tmr_mc_addr, pptr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index c32b74bd970f..e593e8c2a54d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -36,6 +36,7 @@ #define PSP_CMD_BUFFER_SIZE 0x1000 #define PSP_1_MEG 0x100000 #define PSP_TMR_SIZE(adev) ((adev)->asic_type == CHIP_ALDEBARAN ? 0x800000 : 0x400000) +#define PSP_TMR_ALIGNMENT 0x100000 #define PSP_FW_NAME_LEN 0x24 enum psp_shared_mem_size { From 3a876060892ba52dd67d197c78b955e62657d906 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 8 Sep 2022 17:56:09 -0400 Subject: [PATCH 14/88] drm/amdkfd: Migrate in CPU page fault use current mm migrate_vma_setup shows below warning because we don't hold another process mm mmap_lock. We should use current vmf->vma->vm_mm instead, the caller already hold current mmap lock inside CPU page fault handler. WARNING: CPU: 10 PID: 3054 at include/linux/mmap_lock.h:155 find_vma Call Trace: walk_page_range+0x76/0x150 migrate_vma_setup+0x18a/0x640 svm_migrate_vram_to_ram+0x245/0xa10 [amdgpu] svm_migrate_to_ram+0x36f/0x470 [amdgpu] do_swap_page+0xcfe/0xec0 __handle_mm_fault+0x96b/0x15e0 handle_mm_fault+0x13f/0x3e0 do_user_addr_fault+0x1e7/0x690 Fixes: e1f84eef313f ("drm/amdkfd: handle CPU fault on COW mapping") Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 929dec1da0e4..7522bf2d2f57 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -949,7 +949,8 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) goto out_unlock_prange; } - r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU); + r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, + KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU); if (r) pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n", r, prange->svms, prange, prange->start, prange->last); From 2efc30f0161b064f6918316ecd486a8e3feee10e Mon Sep 17 00:00:00 2001 From: Vignesh Chander Date: Fri, 9 Sep 2022 09:28:46 -0400 Subject: [PATCH 15/88] drm/amdgpu: Fix hive reference count leak both get_xgmi_hive and put_xgmi_hive can be skipped since the reset domain is not necessary for VF Signed-off-by: Vignesh Chander Reviewed-by: Shaoyun Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e21804362995..943c9e750575 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2451,9 +2451,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) */ if (adev->gmc.xgmi.num_physical_nodes > 1) { if (amdgpu_xgmi_add_device(adev) == 0) { - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); - if (!amdgpu_sriov_vf(adev)) { + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + if (!hive->reset_domain || !amdgpu_reset_get_reset_domain(hive->reset_domain)) { r = -ENOENT; From fc0cd4cd1f7fbe6b0cbe651a8b6db6ec5476c949 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 1 Sep 2022 15:56:55 +0800 Subject: [PATCH 16/88] drm/amd/display: clean up some inconsistent indentings This if statement is the content of the for statement above it. It should be indented. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2026 Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 9dd705b985b9..7c2e3b8dc26a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -417,8 +417,8 @@ void get_subvp_visual_confirm_color( for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->stream->mall_stream_config.paired_stream && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (pipe->stream && pipe->stream->mall_stream_config.paired_stream && + pipe->stream->mall_stream_config.type == SUBVP_MAIN) { /* SubVP enable - red */ color->color_r_cr = color_value; enable_subvp = true; From 02bcbd6bfc5932d4300b017dcd2ba7e7bbbffe79 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 1 Sep 2022 16:11:31 +0800 Subject: [PATCH 17/88] drm/amd/display: Simplify bool conversion The result of relational operation is Boolean, and the question mark expression is redundant. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2027 Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index dc501ee7d01a..e4fd540dec0f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -1873,7 +1873,7 @@ void dml32_CalculateSurfaceSizeInMall( if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable) TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; } - *ExceededMALLSize = (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true); + *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024); } // CalculateSurfaceSizeInMall void dml32_CalculateVMRowAndSwath( From a92bfda8783729ac684a0f005a1680602ed1096d Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 1 Sep 2022 16:34:38 +0800 Subject: [PATCH 18/88] drm/amd/display: remove possible condition with no effect (if == else) Conditional statements have no effect to next process.So remove it. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2028 Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index e4fd540dec0f..8f1c0e12dd86 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -4663,10 +4663,6 @@ void dml32_CalculateMinAndMaxPrefetchMode( } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) { *MinPrefetchMode = 0; *MaxPrefetchMode = 0; - } else if (AllowForPStateChangeOrStutterInVBlankFinal == - dm_prefetch_support_uclk_fclk_and_stutter_if_possible) { - *MinPrefetchMode = 0; - *MaxPrefetchMode = 3; } else { *MinPrefetchMode = 0; *MaxPrefetchMode = 3; From 8a948b1b2f2bef2444c403a2884134d46100ec26 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Fri, 2 Sep 2022 07:31:48 +0000 Subject: [PATCH 19/88] drm/radeon/ci_dpm: Remove the unneeded result variable Return the value ci_load_smc_ucode() directly instead of storing it in another redundant variable. Reported-by: Zeal Robot Signed-off-by: ye xingchen Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ci_dpm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index ac006bed4743..8ef25ab305ae 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -2056,7 +2056,7 @@ static void ci_clear_vc(struct radeon_device *rdev) static int ci_upload_firmware(struct radeon_device *rdev) { struct ci_power_info *pi = ci_get_pi(rdev); - int i, ret; + int i; for (i = 0; i < rdev->usec_timeout; i++) { if (RREG32_SMC(RCU_UC_EVENTS) & BOOT_SEQ_DONE) @@ -2067,9 +2067,7 @@ static int ci_upload_firmware(struct radeon_device *rdev) ci_stop_smc_clock(rdev); ci_reset_smc(rdev); - ret = ci_load_smc_ucode(rdev, pi->sram_end); - - return ret; + return ci_load_smc_ucode(rdev, pi->sram_end); } From d4242216dd792b2af6f3e6738dfa71cd50bcd791 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Fri, 2 Sep 2022 07:33:20 +0000 Subject: [PATCH 20/88] drm/radeon: Remove the unneeded result variable Return the value radeon_drm_ioctl() directly instead of storing it in another redundant variable. Reported-by: Zeal Robot Signed-off-by: ye xingchen Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index a28d5ceab628..6cbe1ab81aba 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -512,14 +512,11 @@ long radeon_drm_ioctl(struct file *filp, static long radeon_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { unsigned int nr = DRM_IOCTL_NR(cmd); - int ret; if (nr < DRM_COMMAND_BASE) return drm_compat_ioctl(filp, cmd, arg); - ret = radeon_drm_ioctl(filp, cmd, arg); - - return ret; + return radeon_drm_ioctl(filp, cmd, arg); } #endif From 79c0d7ddcbb84b2a714620a2abc5016529fcc38c Mon Sep 17 00:00:00 2001 From: zhang songyi Date: Fri, 2 Sep 2022 08:04:01 +0000 Subject: [PATCH 21/88] drm/amdgpu: Remove the unneeded result variable Return the sdma_v6_0_start() directly instead of storing it in another redundant variable. Reported-by: Zeal Robot Signed-off-by: zhang songyi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 7ae572a08cb3..3a0b14be1a57 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1377,12 +1377,9 @@ static int sdma_v6_0_sw_fini(void *handle) static int sdma_v6_0_hw_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = sdma_v6_0_start(adev); - - return r; + return sdma_v6_0_start(adev); } static int sdma_v6_0_hw_fini(void *handle) From 556bdae320b2b6db124738eb97f2624e92cceff7 Mon Sep 17 00:00:00 2001 From: Jingyu Wang Date: Mon, 5 Sep 2022 09:14:31 +0800 Subject: [PATCH 22/88] drm/amdgpu: cleanup coding style in amdgpu_acpi.c Fix everything checkpatch.pl complained about in amdgpu_acpi.c Signed-off-by: Jingyu Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 55402d238919..3da27436922c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2012 Advanced Micro Devices, Inc. * @@ -849,6 +850,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) if (amdgpu_device_has_dc_support(adev)) { #if defined(CONFIG_DRM_AMD_DC) struct amdgpu_display_manager *dm = &adev->dm; + if (dm->backlight_dev[0]) atif->bd = dm->backlight_dev[0]; #endif @@ -863,6 +865,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) && enc->enc_priv) { struct amdgpu_encoder_atom_dig *dig = enc->enc_priv; + if (dig->bl_dev) { atif->bd = dig->bl_dev; break; @@ -919,9 +922,9 @@ static bool amdgpu_atif_pci_probe_handle(struct pci_dev *pdev) return false; status = acpi_get_handle(dhandle, "ATIF", &atif_handle); - if (ACPI_FAILURE(status)) { + if (ACPI_FAILURE(status)) return false; - } + amdgpu_acpi_priv.atif.handle = atif_handle; acpi_get_name(amdgpu_acpi_priv.atif.handle, ACPI_FULL_PATHNAME, &buffer); DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name); @@ -954,9 +957,9 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev) return false; status = acpi_get_handle(dhandle, "ATCS", &atcs_handle); - if (ACPI_FAILURE(status)) { + if (ACPI_FAILURE(status)) return false; - } + amdgpu_acpi_priv.atcs.handle = atcs_handle; acpi_get_name(amdgpu_acpi_priv.atcs.handle, ACPI_FULL_PATHNAME, &buffer); DRM_DEBUG_DRIVER("Found ATCS handle %s\n", acpi_method_name); From 826f03b8ac7a9c4660de9b65a81e8926c60b3b97 Mon Sep 17 00:00:00 2001 From: Jingyu Wang Date: Mon, 5 Sep 2022 02:33:07 +0800 Subject: [PATCH 23/88] drm/amdgpu: cleanup coding style in amdgpu_sync.c file This is a patch to the amdgpu_sync.c file that fixes some warnings found by the checkpatch.pl tool Signed-off-by: Jingyu Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 504af1b93bfa..090e66a1b284 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2014 Advanced Micro Devices, Inc. * All Rights Reserved. @@ -315,6 +316,7 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) struct hlist_node *tmp; struct dma_fence *f; int i; + hash_for_each_safe(sync->fences, i, tmp, e, node) { f = e->fence; @@ -392,7 +394,7 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) { struct amdgpu_sync_entry *e; struct hlist_node *tmp; - unsigned i; + unsigned int i; hash_for_each_safe(sync->fences, i, tmp, e, node) { hash_del(&e->node); From 2f4ca1ba6c9e7a4c2eea2ed8a378817ec1946f4f Mon Sep 17 00:00:00 2001 From: Jingyu Wang Date: Mon, 5 Sep 2022 15:56:24 +0800 Subject: [PATCH 24/88] drm/amdgpu: cleanup coding style in amdgpu_amdkfd.c Fix everything checkpatch.pl complained about in amdgpu_amdkfd.c Signed-off-by: Jingyu Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 091415a4abf0..4f5bd96000ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2014 Advanced Micro Devices, Inc. * @@ -130,6 +131,7 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) kfd.reset_work); struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); reset_context.method = AMD_RESET_METHOD_NONE; From e7c94bfb747fcec57528f89e53a1461b48ab06dc Mon Sep 17 00:00:00 2001 From: Jingyu Wang Date: Mon, 5 Sep 2022 16:38:25 +0800 Subject: [PATCH 25/88] drm/amdgpu: cleanup coding style in amdgpu_amdkfd_gpuvm.c Fix everything checkpatch.pl complained about in amdgpu_amdkfd_gpuvm.c Reviewed-by: Felix Kuehling Signed-off-by: Jingyu Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2170db83e41d..f4c49537d837 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2014-2018 Advanced Micro Devices, Inc. * @@ -1612,6 +1613,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); size_t available; + spin_lock(&kfd_mem_limit.mem_limit_lock); available = adev->gmc.real_vram_size - adev->kfd.vram_used_aligned @@ -2216,7 +2218,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, { if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { *mem = *adev->gmc.vm_fault_info; - mb(); + mb(); /* make sure read happened */ atomic_set(&adev->gmc.vm_fault_info_updated, 0); } return 0; From 65529fa32c60ea15514e99b6ea12304aea732b67 Mon Sep 17 00:00:00 2001 From: Jilin Yuan Date: Wed, 7 Sep 2022 12:10:58 +0800 Subject: [PATCH 26/88] drm/amd/display: fix repeated words in comments Delete the redundant word 'in'. Signed-off-by: Jilin Yuan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index bdb6bac8dd97..c94a966c6612 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -300,7 +300,7 @@ static void set_high_bit_rate_capable( AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_HBR, value); } -/* set video latency in in ms/2+1 */ +/* set video latency in ms/2+1 */ static void set_video_latency( struct audio *audio, int latency_in_ms) From 49791b988ac78531ece312043d0113ad1ff9cf0f Mon Sep 17 00:00:00 2001 From: Jingyu Wang Date: Sat, 10 Sep 2022 03:53:30 +0800 Subject: [PATCH 27/88] drm/amdgpu/display: remove unneeded "default n" options Remove "default n" options. If the "default" line is removed, it defaults to 'n'. Signed-off-by: Jingyu Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 413d8c6d592f..6925e0280dbe 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -28,7 +28,6 @@ config DRM_AMD_DC_SI bool "AMD DC support for Southern Islands ASICs" depends on DRM_AMDGPU_SI depends on DRM_AMD_DC - default n help Choose this option to enable new AMD DC support for SI asics by default. This includes Tahiti, Pitcairn, Cape Verde, Oland. @@ -43,7 +42,6 @@ config DEBUG_KERNEL_DC config DRM_AMD_SECURE_DISPLAY bool "Enable secure display support" - default n depends on DEBUG_FS depends on DRM_AMD_DC_DCN help From 7a87040ca6a1dbbcf1cbddddf74e82c95c99667a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 30 Aug 2022 11:08:09 -0400 Subject: [PATCH 28/88] drm/amdgpu: add HDP remap functionality to nbio 7.7 Was missing before and would have resulted in a write to a non-existant register. Normally APUs don't use HDP, but other asics could use this code and APUs do use the HDP when used in passthrough. Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index f30bc826a878..def89379b51a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -28,6 +28,14 @@ #include "nbio/nbio_7_7_0_sh_mask.h" #include +static void nbio_v7_7_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v7_7_get_rev_id(struct amdgpu_device *adev) { u32 tmp; @@ -336,4 +344,5 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .get_clockgating_state = nbio_v7_7_get_clockgating_state, .ih_control = nbio_v7_7_ih_control, .init_registers = nbio_v7_7_init_registers, + .remap_hdp_registers = nbio_v7_7_remap_hdp_registers, }; From c3db1b9065d08dac7dd602301c42641dbbe89990 Mon Sep 17 00:00:00 2001 From: John Clements Date: Sat, 30 Jul 2022 06:08:36 +0800 Subject: [PATCH 29/88] drm/amdgpu: added support for ras driver loading copy ras driver to psp if present Signed-off-by: John Clements Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 6 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 1 + drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 7 +++++++ 4 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e430a3142310..c4848522be16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2055,6 +2055,15 @@ static int psp_hw_start(struct psp_context *psp) } } + if ((is_psp_fw_valid(psp->ras_drv)) && + (psp->funcs->bootloader_load_ras_drv != NULL)) { + ret = psp_bootloader_load_ras_drv(psp); + if (ret) { + DRM_ERROR("PSP load ras_drv failed!\n"); + return ret; + } + } + if ((is_psp_fw_valid(psp->sos)) && (psp->funcs->bootloader_load_sos != NULL)) { ret = psp_bootloader_load_sos(psp); @@ -3040,6 +3049,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp, psp->dbg_drv.size_bytes = le32_to_cpu(desc->size_bytes); psp->dbg_drv.start_addr = ucode_start_addr; break; + case PSP_FW_TYPE_PSP_RAS_DRV: + psp->ras_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->ras_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->ras_drv.start_addr = ucode_start_addr; + break; default: dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index e593e8c2a54d..58ce3ebb446c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -72,6 +72,7 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_SOCDRV = 0xB0000, PSP_BL__LOAD_DBGDRV = 0xC0000, PSP_BL__LOAD_INTFDRV = 0xD0000, + PSP_BL__LOAD_RASDRV = 0xE0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, @@ -115,6 +116,7 @@ struct psp_funcs int (*bootloader_load_soc_drv)(struct psp_context *psp); int (*bootloader_load_intf_drv)(struct psp_context *psp); int (*bootloader_load_dbg_drv)(struct psp_context *psp); + int (*bootloader_load_ras_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); int (*ring_create)(struct psp_context *psp, @@ -324,6 +326,7 @@ struct psp_context struct psp_bin_desc soc_drv; struct psp_bin_desc intf_drv; struct psp_bin_desc dbg_drv; + struct psp_bin_desc ras_drv; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -404,6 +407,9 @@ struct amdgpu_psp_funcs { ((psp)->funcs->bootloader_load_intf_drv ? (psp)->funcs->bootloader_load_intf_drv((psp)) : 0) #define psp_bootloader_load_dbg_drv(psp) \ ((psp)->funcs->bootloader_load_dbg_drv ? (psp)->funcs->bootloader_load_dbg_drv((psp)) : 0) +#define psp_bootloader_load_ras_drv(psp) \ + ((psp)->funcs->bootloader_load_ras_drv ? \ + (psp)->funcs->bootloader_load_ras_drv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 96b6cf4c4d54..3975bcaa2c89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -124,6 +124,7 @@ enum psp_fw_type { PSP_FW_TYPE_PSP_SOC_DRV, PSP_FW_TYPE_PSP_INTF_DRV, PSP_FW_TYPE_PSP_DBG_DRV, + PSP_FW_TYPE_PSP_RAS_DRV, }; /* version_major=2, version_minor=0 */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 5b5b1ef0c2b1..21d822b1d589 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -224,6 +224,12 @@ static int psp_v13_0_bootloader_load_dbg_drv(struct psp_context *psp) return psp_v13_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV); } +static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV); +} + + static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) { int ret; @@ -720,6 +726,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .bootloader_load_soc_drv = psp_v13_0_bootloader_load_soc_drv, .bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv, .bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv, + .bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv, .bootloader_load_sos = psp_v13_0_bootloader_load_sos, .ring_init = psp_v13_0_ring_init, .ring_create = psp_v13_0_ring_create, From 158225294683310566445f8477336e747b74f03f Mon Sep 17 00:00:00 2001 From: Candice Li Date: Wed, 7 Sep 2022 15:54:25 +0800 Subject: [PATCH 30/88] drm/amdgpu: Add EEPROM I2C address for smu v13_0_0 Set correct EEPROM I2C address for smu v13_0_0. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index c4283987bb1e..84c241b9a2a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -38,6 +38,7 @@ #define EEPROM_I2C_MADDR_ARCTURUS_D342 0x0 #define EEPROM_I2C_MADDR_SIENNA_CICHLID 0x0 #define EEPROM_I2C_MADDR_ALDEBARAN 0x0 +#define EEPROM_I2C_MADDR_SMU_13_0_0 (0x54UL << 16) /* * The 2 macros bellow represent the actual size in bytes that @@ -156,6 +157,15 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, return false; } + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 0): + control->i2c_address = EEPROM_I2C_MADDR_SMU_13_0_0; + break; + + default: + break; + } + return true; } From 6da15a236c8c80d9f87e4c5216e00ad8f1cace2d Mon Sep 17 00:00:00 2001 From: Candice Li Date: Wed, 7 Sep 2022 15:52:04 +0800 Subject: [PATCH 31/88] drm/amdgpu: Skip reset error status for psp v13_0_0 No need to reset error status since only umc ras supported on psp v13_0_0. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index ab9ba5a9c33d..e55f106621ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1811,7 +1811,8 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) amdgpu_ras_query_error_status(adev, &info); if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && - adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4) && + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 0)) { if (amdgpu_ras_reset_error_status(adev, info.head.block)) dev_warn(adev->dev, "Failed to reset error counter and error status"); } From 17529ea2acfa3e2118f5a9ee911e0daf2d88c13f Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sat, 20 Aug 2022 17:20:30 -0400 Subject: [PATCH 32/88] drm/amd/display: Optimizations for DML math [why] Conditionals in the DML basic math functions significantly impact mode enumeration. [how] Remove conditionals for floor/ceil operations which are used frequently in DML and add an assertion for invalid callers using zero granuality. Fix existing callers that rely on 0 granularity. Tested-by: Daniel Wheeler Reviewed-by: Nevenko Stupar Acked-by: Pavle Kotarac Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher --- .../amd/display/dc/dml/calcs/dcn_calc_auto.c | 22 ++++++------- .../amd/display/dc/dml/calcs/dcn_calc_math.c | 16 +++++----- .../dc/dml/dcn20/display_mode_vba_20v2.c | 10 +++--- .../dc/dml/dcn21/display_mode_vba_21.c | 6 ++-- .../dc/dml/dcn30/display_mode_vba_30.c | 8 ++--- .../dc/dml/dcn31/display_mode_vba_31.c | 6 ++-- .../dc/dml/dcn314/display_mode_vba_314.c | 6 ++-- .../dc/dml/dcn32/display_mode_vba_util_32.c | 31 +++++++++---------- .../drm/amd/display/dc/dml/dml_inline_defs.h | 9 ++---- 9 files changed, 53 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c index 41284e263325..288d22a16cf2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c @@ -526,10 +526,10 @@ void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v) } if (v->max_swath_height_c[k] > 0.0) { v->swath_width_granularity_c = 256.0 /dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / v->max_swath_height_c[k]; - } - v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width_yper_state[i][j][k] / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pixel_in_detc[k] * v->max_swath_height_c[k]; - if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) { - v->rounded_up_max_swath_size_bytes_c =dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256; + v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width_yper_state[i][j][k] / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pixel_in_detc[k] * v->max_swath_height_c[k]; + if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) { + v->rounded_up_max_swath_size_bytes_c = dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256; + } } if (v->rounded_up_max_swath_size_bytes_y + v->rounded_up_max_swath_size_bytes_c <= v->det_buffer_size_in_kbyte * 1024.0 / 2.0) { v->swath_height_yper_state[i][j][k] = v->max_swath_height_y[k]; @@ -552,14 +552,14 @@ void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v) v->lines_in_det_chroma = v->det_buffer_size_in_kbyte * 1024.0 / 3.0 / v->byte_per_pixel_in_dety[k] / (v->swath_width_yper_state[i][j][k] / 2.0); } v->effective_lb_latency_hiding_source_lines_luma =dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] /dcn_bw_max2(v->h_ratio[k], 1.0)), 1.0)) - (v->vtaps[k] - 1.0); - v->effective_lb_latency_hiding_source_lines_chroma =dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] / 2.0 /dcn_bw_max2(v->h_ratio[k] / 2.0, 1.0)), 1.0)) - (v->vta_pschroma[k] - 1.0); v->effective_detlb_lines_luma =dcn_bw_floor2(v->lines_in_det_luma +dcn_bw_min2(v->lines_in_det_luma * v->required_dispclk[i][j] * v->byte_per_pixel_in_dety[k] * v->pscl_factor[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_luma), v->swath_height_yper_state[i][j][k]); - v->effective_detlb_lines_chroma =dcn_bw_floor2(v->lines_in_det_chroma +dcn_bw_min2(v->lines_in_det_chroma * v->required_dispclk[i][j] * v->byte_per_pixel_in_detc[k] * v->pscl_factor_chroma[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_chroma), v->swath_height_cper_state[i][j][k]); if (v->byte_per_pixel_in_detc[k] == 0.0) { v->urgent_latency_support_us_per_state[i][j][k] = v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]); } else { - v->urgent_latency_support_us_per_state[i][j][k] =dcn_bw_min2(v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]), v->effective_detlb_lines_chroma * (v->htotal[k] / v->pixel_clock[k]) / (v->v_ratio[k] / 2.0) - v->effective_detlb_lines_chroma * v->swath_width_yper_state[i][j][k] / 2.0 *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k])); + v->effective_lb_latency_hiding_source_lines_chroma = dcn_bw_min2(v->max_line_buffer_lines, dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] / 2.0 / dcn_bw_max2(v->h_ratio[k] / 2.0, 1.0)), 1.0)) - (v->vta_pschroma[k] - 1.0); + v->effective_detlb_lines_chroma = dcn_bw_floor2(v->lines_in_det_chroma + dcn_bw_min2(v->lines_in_det_chroma * v->required_dispclk[i][j] * v->byte_per_pixel_in_detc[k] * v->pscl_factor_chroma[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_chroma), v->swath_height_cper_state[i][j][k]); + v->urgent_latency_support_us_per_state[i][j][k] = dcn_bw_min2(v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] * dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]), v->effective_detlb_lines_chroma * (v->htotal[k] / v->pixel_clock[k]) / (v->v_ratio[k] / 2.0) - v->effective_detlb_lines_chroma * v->swath_width_yper_state[i][j][k] / 2.0 * dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k])); } } } @@ -1146,10 +1146,10 @@ void display_pipe_configuration(struct dcn_bw_internal_vars *v) } if (v->maximum_swath_height_c > 0.0) { v->swath_width_granularity_c = 256.0 /dcn_bw_ceil2(v->byte_per_pix_detc, 2.0) / v->maximum_swath_height_c; - } - v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pix_detc * v->maximum_swath_height_c; - if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) { - v->rounded_up_max_swath_size_bytes_c =dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256; + v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pix_detc * v->maximum_swath_height_c; + if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) { + v->rounded_up_max_swath_size_bytes_c = dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256; + } } if (v->rounded_up_max_swath_size_bytes_y + v->rounded_up_max_swath_size_bytes_c <= v->det_buffer_size_in_kbyte * 1024.0 / 2.0) { v->swath_height_y[k] = v->maximum_swath_height_y; diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c index 07d18e78de49..cac72413a097 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c @@ -23,6 +23,7 @@ * */ +#include "os_types.h" #include "dcn_calc_math.h" #define isNaN(number) ((number) != (number)) @@ -69,8 +70,8 @@ float dcn_bw_max2(const float arg1, const float arg2) float dcn_bw_floor2(const float arg, const float significance) { - if (significance == 0) - return 0; + ASSERT(significance != 0); + return ((int) (arg / significance)) * significance; } float dcn_bw_floor(const float arg) @@ -80,17 +81,14 @@ float dcn_bw_floor(const float arg) float dcn_bw_ceil(const float arg) { - float flr = dcn_bw_floor2(arg, 1); - - return flr + 0.00001 >= arg ? arg : flr + 1; + return (int) (arg + 0.99999); } float dcn_bw_ceil2(const float arg, const float significance) { - float flr = dcn_bw_floor2(arg, significance); - if (significance == 0) - return 0; - return flr + 0.00001 >= arg ? arg : flr + significance; + ASSERT(significance != 0); + + return ((int) (arg / significance + 0.99999)) * significance; } float dcn_bw_max3(float v1, float v2, float v3) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index 63bbdf8b8678..edd098c7eb92 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -4478,17 +4478,17 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->EffectiveLBLatencyHidingSourceLinesLuma), locals->SwathHeightYPerState[i][j][k]); - locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( - locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * - locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0], - locals->EffectiveLBLatencyHidingSourceLinesChroma), - locals->SwathHeightCPerState[i][j][k]); if (locals->BytePerPixelInDETC[k] == 0) { locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]); } else { + locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( + locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * + locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0], + locals->EffectiveLBLatencyHidingSourceLinesChroma), + locals->SwathHeightCPerState[i][j][k]); locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min( locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index 8a7485e21d53..d40d32e380f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -806,10 +806,12 @@ static bool CalculatePrefetchSchedule( if (myPipe->SourceScan == dm_horz) { *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY; - *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC; + if (myPipe->BlockWidth256BytesC > 0) + *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC; } else { *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY; - *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC; + if (myPipe->BlockWidth256BytesC > 0) + *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC; } prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index b7fa003ffe06..c117a9724ae1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -6322,10 +6322,6 @@ static void CalculateSwathWidth( for (k = 0; k < NumberOfActivePlanes; ++k) { enum odm_combine_mode MainPlaneODMCombine = 0; - surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); - surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); - surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); - surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); if (SourceScan[k] != dm_vert) { SwathWidthSingleDPPY[k] = ViewportWidth[k]; @@ -6365,8 +6361,6 @@ static void CalculateSwathWidth( surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); - surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); - surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); if (SourceScan[k] != dm_vert) { MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; @@ -6374,6 +6368,7 @@ static void CalculateSwathWidth( swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); if (BytePerPixC[k] > 0) { + surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); } else { @@ -6385,6 +6380,7 @@ static void CalculateSwathWidth( swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); if (BytePerPixC[k] > 0) { + surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); } else { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index d63b4209b14c..8753f94bdd79 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -6933,8 +6933,6 @@ static void CalculateSwathWidth( { int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); - int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); - int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); @@ -6945,6 +6943,8 @@ static void CalculateSwathWidth( MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); if (BytePerPixC[k] > 0) { + int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); + swath_width_chroma_ub[k] = dml_min( surface_width_ub_c, (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); @@ -6956,6 +6956,8 @@ static void CalculateSwathWidth( MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); if (BytePerPixC[k] > 0) { + int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); + swath_width_chroma_ub[k] = dml_min( surface_height_ub_c, (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index fc4d7474c111..503d9ede0ac1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -7049,8 +7049,6 @@ static void CalculateSwathWidth( { int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); - int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); - int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); @@ -7061,6 +7059,8 @@ static void CalculateSwathWidth( MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); if (BytePerPixC[k] > 0) { + int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); + swath_width_chroma_ub[k] = dml_min( surface_width_ub_c, (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); @@ -7072,6 +7072,8 @@ static void CalculateSwathWidth( MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); if (BytePerPixC[k] > 0) { + int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); + swath_width_chroma_ub[k] = dml_min( surface_height_ub_c, (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 8f1c0e12dd86..36e4a4a9296b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -719,8 +719,8 @@ void dml32_CalculateSwathWidth( unsigned int surface_width_ub_l; unsigned int surface_height_ub_l; - unsigned int surface_width_ub_c; - unsigned int surface_height_ub_c; + unsigned int surface_width_ub_c = 0; + unsigned int surface_height_ub_c = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); @@ -784,21 +784,6 @@ void dml32_CalculateSwathWidth( surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); - surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); - surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); - dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l); - dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c); - dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c); - dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]); - dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]); - dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]); - dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]); - dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]); - dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]); -#endif if (!IsVertical(SourceRotation[k])) { MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; @@ -818,6 +803,7 @@ void dml32_CalculateSwathWidth( Read256BytesBlockWidthY[k]); } if (BytePerPixC[k] > 0) { + surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); if (ViewportStationary[k] && DPPPerSurface[k] == 1) { swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, dml_floor(ViewportXStartC[k] + SwathWidthC[k] + @@ -848,6 +834,7 @@ void dml32_CalculateSwathWidth( Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); } if (BytePerPixC[k] > 0) { + surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); if (ViewportStationary[k] && DPPPerSurface[k] == 1) { swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, dml_floor(ViewportYStartC[k] + SwathWidthC[k] + @@ -866,6 +853,16 @@ void dml32_CalculateSwathWidth( } #ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); + dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l); + dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c); + dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c); + dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]); + dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]); + dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]); + dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]); + dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]); + dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]); dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]); dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]); dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h index 479d7d83220c..072bd0539605 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h @@ -76,14 +76,9 @@ static inline double dml_floor(double a, double granularity) static inline double dml_round(double a) { - double round_pt = 0.5; - double ceil = dml_ceil(a, 1); - double floor = dml_floor(a, 1); + const double round_pt = 0.5; - if (a - floor >= round_pt) - return ceil; - else - return floor; + return dml_floor(a + round_pt, 1); } /* float From 4f76da231826190658d19ec8d89ea8cd46fdfb7d Mon Sep 17 00:00:00 2001 From: Chris Park Date: Tue, 23 Aug 2022 12:21:52 -0400 Subject: [PATCH 33/88] drm/amd/display: Correct I2C register offset [Why] I2C register name starts with 1, unlike other registers that start with 0. This creates a problem with the new register macro refactoring when I2C HW objects are created in an array. [How] Correct I2C register offset by making a new macro to account for array offset. Tested-by: Daniel Wheeler Reviewed-by: Charlene Liu Acked-by: Pavle Kotarac Signed-off-by: Chris Park Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 9 ++++++++- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 17 +++++++++-------- .../drm/amd/display/dc/dcn321/dcn321_resource.c | 9 ++++++++- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index ef0a6d468a10..5a21cf041732 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -152,6 +152,13 @@ enum dcn32_clk_src_array_id { REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## reg_name +#define SR_ARR_I2C(reg_name, id) \ + REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name + +#define SRI_ARR_I2C(reg_name, block, id)\ + REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + #define SRI_ARR_ALPHABET(reg_name, block, index, id)\ REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## reg_name @@ -792,7 +799,7 @@ static struct dce_aux *dcn32_aux_engine_create( #define i2c_inst_regs_init(id)\ I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) -static struct dce_i2c_registers i2c_hw_regs[6]; +static struct dce_i2c_registers i2c_hw_regs[5]; static const struct dce_i2c_shift i2c_shifts = { I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 60d8fad16eee..dbcdf8607ee9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -1175,18 +1175,19 @@ void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_par #define I2C_HW_ENGINE_COMMON_REG_LIST_RI(id) \ ( \ - SRI_ARR(SETUP, DC_I2C_DDC, id), SRI_ARR(SPEED, DC_I2C_DDC, id), \ - SRI_ARR(HW_STATUS, DC_I2C_DDC, id), SR_ARR(DC_I2C_ARBITRATION, id), \ - SR_ARR(DC_I2C_CONTROL, id), SR_ARR(DC_I2C_SW_STATUS, id), \ - SR_ARR(DC_I2C_TRANSACTION0, id), SR_ARR(DC_I2C_TRANSACTION1, id), \ - SR_ARR(DC_I2C_TRANSACTION2, id), SR_ARR(DC_I2C_TRANSACTION3, id), \ - SR_ARR(DC_I2C_DATA, id), SR_ARR(MICROSECOND_TIME_BASE_DIV, id) \ + SRI_ARR_I2C(SETUP, DC_I2C_DDC, id), SRI_ARR_I2C(SPEED, DC_I2C_DDC, id), \ + SRI_ARR_I2C(HW_STATUS, DC_I2C_DDC, id), \ + SR_ARR_I2C(DC_I2C_ARBITRATION, id), \ + SR_ARR_I2C(DC_I2C_CONTROL, id), SR_ARR_I2C(DC_I2C_SW_STATUS, id), \ + SR_ARR_I2C(DC_I2C_TRANSACTION0, id), SR_ARR_I2C(DC_I2C_TRANSACTION1, id),\ + SR_ARR_I2C(DC_I2C_TRANSACTION2, id), SR_ARR_I2C(DC_I2C_TRANSACTION3, id),\ + SR_ARR_I2C(DC_I2C_DATA, id), SR_ARR_I2C(MICROSECOND_TIME_BASE_DIV, id) \ ) #define I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) \ ( \ - I2C_HW_ENGINE_COMMON_REG_LIST_RI(id), SR_ARR(DIO_MEM_PWR_CTRL, id), \ - SR_ARR(DIO_MEM_PWR_STATUS, id) \ + I2C_HW_ENGINE_COMMON_REG_LIST_RI(id), SR_ARR_I2C(DIO_MEM_PWR_CTRL, id), \ + SR_ARR_I2C(DIO_MEM_PWR_STATUS, id) \ ) #endif /* _DCN32_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index a93dc00ebfb5..8a89c28add0d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -159,6 +159,13 @@ enum dcn321_clk_src_array_id { REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## reg_name +#define SR_ARR_I2C(reg_name, id) \ + REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name + +#define SRI_ARR_I2C(reg_name, block, id)\ + REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + #define SRI_ARR_ALPHABET(reg_name, block, index, id)\ REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ reg ## block ## id ## _ ## reg_name @@ -796,7 +803,7 @@ static struct dce_aux *dcn321_aux_engine_create( #define i2c_inst_regs_init(id)\ I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) -static struct dce_i2c_registers i2c_hw_regs[6]; +static struct dce_i2c_registers i2c_hw_regs[5]; static const struct dce_i2c_shift i2c_shifts = { I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) From 615268d4935082ea64729fcc8a35af394ff90e7c Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 11 Aug 2022 16:42:12 -0400 Subject: [PATCH 34/88] drm/amd/display: Revert "Fallback to SW cursor if SubVP + cursor too big" This reverts commit a4f1b04216023ff0f4cd89328b59ee6890248130 since returning false in case of SubVP results in no cursor being visible on desktop as there is no sw cursor fallback path on all platforms. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Pavle Kotarac Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index f62d50901d92..6752ca44e6e0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -328,11 +328,6 @@ bool dc_stream_set_cursor_attributes( } dc = stream->ctx->dc; - - if (attributes->height * attributes->width * 4 > 16384) - if (stream->mall_stream_config.type == SUBVP_MAIN) - return false; - stream->cursor_attributes = *attributes; dc_z10_restore(dc); From 41c81dcf599918c53e08933a0b0a522508eb6019 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 23 Aug 2022 17:14:03 -0400 Subject: [PATCH 35/88] drm/amd/display: Update MBLK calculation for SubVP [Description] Update MBLK calculation according to hardware doc. For DCC case we were not allocation enough MALL due to an inaccurate MBLK calculation. Tested-by: Daniel Wheeler Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 3 + .../display/dc/dcn32/dcn32_resource_helpers.c | 59 ++++++++++++++++--- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index dbcdf8607ee9..d1e85622dd3b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -30,6 +30,9 @@ #define DCN3_2_DET_SEG_SIZE 64 #define DCN3_2_MALL_MBLK_SIZE_BYTES 65536 // 64 * 1024 +#define DCN3_2_MBLK_WIDTH 128 +#define DCN3_2_MBLK_HEIGHT_4BPE 128 +#define DCN3_2_MBLK_HEIGHT_8BPE 64 #define TO_DCN32_RES_POOL(pool)\ container_of(pool, struct dcn32_resource_pool, base) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index ab918fe38f6a..1f195c5b3377 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -46,7 +46,6 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_state *context) { uint32_t num_ways = 0; - uint32_t mall_region_pixels = 0; uint32_t bytes_per_pixel = 0; uint32_t cache_lines_used = 0; uint32_t lines_per_way = 0; @@ -54,20 +53,64 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat uint32_t bytes_in_mall = 0; uint32_t num_mblks = 0; uint32_t cache_lines_per_plane = 0; - uint32_t i = 0; + uint32_t i = 0, j = 0; + uint32_t mblk_width = 0; + uint32_t mblk_height = 0; + uint32_t full_vp_width_blk_aligned = 0; + uint32_t full_vp_height_blk_aligned = 0; + uint32_t mall_alloc_width_blk_aligned = 0; + uint32_t mall_alloc_height_blk_aligned = 0; + uint32_t full_vp_height = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; // Find the phantom pipes - if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; - mall_region_pixels = pipe->plane_state->plane_size.surface_pitch * pipe->stream->timing.v_addressable; + struct pipe_ctx *main_pipe = NULL; - // For bytes required in MALL, calculate based on number of MBlks required - num_mblks = (mall_region_pixels * bytes_per_pixel + - DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES; + /* Get full viewport height from main pipe (required for MBLK calculation) */ + for (j = 0; j < dc->res_pool->pipe_count; j++) { + main_pipe = &context->res_ctx.pipe_ctx[j]; + if (main_pipe->stream == pipe->stream->mall_stream_config.paired_stream) { + full_vp_height = main_pipe->plane_res.scl_data.viewport.height; + break; + } + } + + bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; + mblk_width = DCN3_2_MBLK_WIDTH; + mblk_height = bytes_per_pixel == 4 ? DCN3_2_MBLK_HEIGHT_4BPE : DCN3_2_MBLK_HEIGHT_8BPE; + + /* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) - + * FLOOR(vp_x_start, blk_width) + */ + full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x + + pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) + + (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width); + + /* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) - + * FLOOR(vp_y_start, blk_height) + */ + full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y + + full_vp_height + mblk_height - 1) / mblk_height * mblk_height) + + (pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height); + + /* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */ + mall_alloc_width_blk_aligned = full_vp_width_blk_aligned; + + /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */ + mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) / + mblk_height * mblk_height + mblk_height; + + /* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c; + * full_mblk_height_ub_l/c = mall_alloc_height_blk_aligned_l/c; + * num_mblk_l/c = (full_mblk_width_ub_l/c / mblk_width_l/c) * (full_mblk_height_ub_l/c / mblk_height_l/c); + * (Should be divisible, but round up if not) + */ + num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) * + ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height); bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES; // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment // (MALL is 64-byte aligned) From c7783a6ed4fcfad9acd353a087384d31d4cf42b2 Mon Sep 17 00:00:00 2001 From: "Nagulendran, Iswara" Date: Tue, 23 Aug 2022 12:01:56 -0400 Subject: [PATCH 36/88] drm/amd/display: Reverted DSC programming sequence change [HOW&WHY] Revert a previous commit by moving DSC programming back to before link enablement. Tested-by: Daniel Wheeler Reviewed-by: Jayendran Ramani Acked-by: Pavle Kotarac Signed-off-by: Nagulendran, Iswara Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 13 +++++++++++++ .../drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 13 ------------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 4ab27e231337..7f8da6a0f74a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -4299,6 +4299,19 @@ void core_link_enable_stream( if (pipe_ctx->stream->dpms_off) return; + /* Have to setup DSC before DIG FE and BE are connected (which happens before the + * link training). This is to make sure the bandwidth sent to DIG BE won't be + * bigger than what the link and/or DIG BE can handle. VBID[6]/CompressedStream_flag + * will be automatically set at a later time when the video is enabled + * (DP_VID_STREAM_EN = 1). + */ + if (pipe_ctx->stream->timing.flags.DSC) { + if (dc_is_dp_signal(pipe_ctx->stream->signal) || + dc_is_virtual_signal(pipe_ctx->stream->signal)) + dp_set_dsc_enable(pipe_ctx, true); + + } + status = enable_link(state, pipe_ctx); if (status != DC_OK) { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index fe346e96c2d1..801206aed63a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1577,19 +1577,6 @@ static enum dc_status apply_single_controller_ctx_to_hw( if (dc_is_dp_signal(pipe_ctx->stream->signal)) dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG); - /* Have to setup DSC before DIG FE and BE are connected (which happens before the - * link training). This is to make sure the bandwidth sent to DIG BE won't be - * bigger than what the link and/or DIG BE can handle. VBID[6]/CompressedStream_flag - * will be automatically set at a later time when the video is enabled - * (DP_VID_STREAM_EN = 1). - */ - if (pipe_ctx->stream->timing.flags.DSC) { - if (dc_is_dp_signal(pipe_ctx->stream->signal) || - dc_is_virtual_signal(pipe_ctx->stream->signal)) - dp_set_dsc_enable(pipe_ctx, true); - - } - if (!stream->dpms_off) { if (dc->hwss.update_phy_state) dc->hwss.update_phy_state(context, pipe_ctx, TX_ON_SYMCLK_ON); From d3d4211050665442ce50c7b5e837b9031ee85b33 Mon Sep 17 00:00:00 2001 From: "JinZe.Xu" Date: Tue, 23 Aug 2022 22:18:06 -0700 Subject: [PATCH 37/88] drm/amd/display: Assign link type before check dsc workaround. [Why] link type is not assigned before check dpia_mst_dsc_always_on conditions. Tested-by: Daniel Wheeler Reviewed-by: Wenjing Liu Acked-by: Pavle Kotarac Signed-off-by: JinZe.Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 7f8da6a0f74a..4bb78e356ebd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -832,8 +832,9 @@ static bool discover_dp_mst_topology(struct dc_link *link, enum dc_detect_reason LINK_INFO("link=%d, mst branch is now Connected\n", link->link_index); - apply_dpia_mst_dsc_always_on_wa(link); link->type = dc_connection_mst_branch; + apply_dpia_mst_dsc_always_on_wa(link); + dm_helpers_dp_update_branch_info(link->ctx, link); if (dm_helpers_dp_mst_start_top_mgr(link->ctx, link, (reason == DETECT_REASON_BOOT || reason == DETECT_REASON_RESUMEFROMS3S4))) { From fe77d95f7a7f2f567205ce747124efa465fb01be Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 24 Aug 2022 11:53:50 -0400 Subject: [PATCH 38/88] drm/amd/display: SW cursor fallback for SubVP [Description] Leverage SW cursor fall back for SubVP when the cursor is too big. We want to take advantage of being able to fallback to SW cursor when possible because it's not worth it to disable MCLK switching because the cursor is slightly too big. Tested-by: Daniel Wheeler Reviewed-by: Aurabindo Pillai Acked-by: Pavle Kotarac Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 5 +++++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 1 + 4 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 6752ca44e6e0..0c85ab5933b4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -328,6 +328,11 @@ bool dc_stream_set_cursor_attributes( } dc = stream->ctx->dc; + + if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384) + if (stream->mall_stream_config.type == SUBVP_MAIN) + return false; + stream->cursor_attributes = *attributes; dc_z10_restore(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 75dbc665f435..c48403d49f2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -830,6 +830,7 @@ struct dc_debug_options { bool disable_fixed_vs_aux_timeout_wa; bool force_disable_subvp; bool force_subvp_mclk_switch; + bool allow_sw_cursor_fallback; bool force_usr_allow; /* uses value at boot and disables switch */ bool disable_dtb_ref_clk_switch; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 5a21cf041732..5953083b76a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -747,6 +747,7 @@ static const struct dc_debug_options debug_defaults_drv = { .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, + .allow_sw_cursor_fallback = false, }; static const struct dc_debug_options debug_defaults_diags = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 8a89c28add0d..64c225ee3a46 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -750,6 +750,7 @@ static const struct dc_debug_options debug_defaults_drv = { .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, + .allow_sw_cursor_fallback = false, }; static const struct dc_debug_options debug_defaults_diags = { From 94caa423e2b5658626382ba9d98f989a47bd2b8a Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 24 Aug 2022 15:34:24 -0400 Subject: [PATCH 39/88] drm/amd/display: Fixing DIG FIFO Error [Why & How] DIG_FIFO_READ_START_LEVEL should only be set to default value (7) by software. Removed all instances of resetting the register to 0 Tested-by: Daniel Wheeler Reviewed-by: Nicholas Kazlauskas Acked-by: Pavle Kotarac Signed-off-by: Leo Chen Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c index e3351ddc566c..06d8638db696 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c @@ -67,8 +67,7 @@ static void enc314_disable_fifo(struct stream_encoder *enc) { struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); - REG_UPDATE_2(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0, - DIG_FIFO_READ_START_LEVEL, 0); + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0); } static void enc314_dp_set_odm_combine( From 827e3c9caa77d358a824c3f302032ad6c9d2ba46 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Wed, 24 Aug 2022 16:34:53 -0400 Subject: [PATCH 40/88] drm/amd/display: Fix divide by zero in DML [why] Incorrectly using MicroTileWidth instead of MacroTileWidth for calculations. [how] Remove all unused references to MicroTile and change them to MacroTile. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher --- .../dc/dml/dcn32/display_mode_vba_32.c | 24 +++++++++---------- .../drm/amd/display/dc/dml/display_mode_vba.h | 10 ++++---- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index d8014bfbc3fe..cd6bf67035d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1806,10 +1806,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.Read256BlockHeightC[k], &mode_lib->vba.Read256BlockWidthY[k], &mode_lib->vba.Read256BlockWidthC[k], - &mode_lib->vba.MicroTileHeightY[k], - &mode_lib->vba.MicroTileHeightC[k], - &mode_lib->vba.MicroTileWidthY[k], - &mode_lib->vba.MicroTileWidthC[k]); + &mode_lib->vba.MacroTileHeightY[k], + &mode_lib->vba.MacroTileHeightC[k], + &mode_lib->vba.MacroTileWidthY[k], + &mode_lib->vba.MacroTileWidthC[k]); } /*Bandwidth Support Check*/ @@ -2661,10 +2661,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.Read256BlockWidthC, mode_lib->vba.Read256BlockHeightY, mode_lib->vba.Read256BlockHeightC, - mode_lib->vba.MicroTileWidthY, - mode_lib->vba.MicroTileWidthC, - mode_lib->vba.MicroTileHeightY, - mode_lib->vba.MicroTileHeightC, + mode_lib->vba.MacroTileWidthY, + mode_lib->vba.MacroTileWidthC, + mode_lib->vba.MacroTileHeightY, + mode_lib->vba.MacroTileHeightC, /* Output */ mode_lib->vba.SurfaceSizeInMALL, @@ -2711,10 +2711,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; - v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthY = mode_lib->vba.MicroTileWidthY[k]; - v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightY = mode_lib->vba.MicroTileHeightY[k]; - v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthC = mode_lib->vba.MicroTileWidthC[k]; - v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightC = mode_lib->vba.MicroTileHeightC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthY = mode_lib->vba.MacroTileWidthY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightY = mode_lib->vba.MacroTileHeightY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthC = mode_lib->vba.MacroTileWidthC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightC = mode_lib->vba.MacroTileHeightC[k]; v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].InterlaceEnable = mode_lib->vba.Interlace[k]; v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].HTotal = mode_lib->vba.HTotal[k]; v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCEnable = mode_lib->vba.DCCEnable[k]; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index da8acf59ccac..630f3395e90a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -652,10 +652,10 @@ struct vba_vars_st { unsigned int OutputTypeAndRatePerState[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX]; double RequiredDISPCLKPerSurface[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX]; - unsigned int MicroTileHeightY[DC__NUM_DPP__MAX]; - unsigned int MicroTileHeightC[DC__NUM_DPP__MAX]; - unsigned int MicroTileWidthY[DC__NUM_DPP__MAX]; - unsigned int MicroTileWidthC[DC__NUM_DPP__MAX]; + unsigned int MacroTileHeightY[DC__NUM_DPP__MAX]; + unsigned int MacroTileHeightC[DC__NUM_DPP__MAX]; + unsigned int MacroTileWidthY[DC__NUM_DPP__MAX]; + unsigned int MacroTileWidthC[DC__NUM_DPP__MAX]; bool ImmediateFlipRequiredFinal; bool DCCProgrammingAssumesScanDirectionUnknownFinal; bool EnoughWritebackUnits; @@ -801,8 +801,6 @@ struct vba_vars_st { double PSCL_FACTOR[DC__NUM_DPP__MAX]; double PSCL_FACTOR_CHROMA[DC__NUM_DPP__MAX]; double MaximumVStartup[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX]; - unsigned int MacroTileWidthY[DC__NUM_DPP__MAX]; - unsigned int MacroTileWidthC[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitch[DC__NUM_DPP__MAX]; double AlignedYPitch[DC__NUM_DPP__MAX]; double AlignedCPitch[DC__NUM_DPP__MAX]; From 34e205112ec078ed1b24e30d90af81e43bc12f4e Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 1 Sep 2022 16:00:52 -0400 Subject: [PATCH 41/88] drm/amd/display: Fix compilation errors on DCN314 We have some compilation errors in some DML files from DCN314 that we never noticed because we were not compiling some of the DML files. This commit fixes those syntax errors before we enable the compilation. Cc: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index 503d9ede0ac1..9fe4dc32fbe0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -61,7 +61,7 @@ // fudge factor for min dcfclk calclation #define __DML_MIN_DCFCLK_FACTOR__ 1.15 -struct { +typedef struct { double DPPCLK; double DISPCLK; double PixelClock; @@ -1599,7 +1599,7 @@ static void CalculateDCCConfiguration( int segment_order_vert_contiguous_luma; int segment_order_vert_contiguous_chroma; - enum { + typedef enum { REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA } RequestType; RequestType RequestLuma; @@ -7159,12 +7159,13 @@ static double CalculateExtraLatencyBytes( HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); else HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); - else + } else { HostVMDynamicLevels = 0; + } ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; - if (GPUVMEnable == true) + if (GPUVMEnable == true) { for (k = 0; k < NumberOfActivePlanes; ++k) ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; } From b8e13105b7fe9a0adf1dc7dd3d5d0647962adfc0 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 1 Sep 2022 16:07:19 -0400 Subject: [PATCH 42/88] drm/amd/display: Enable dlg and vba compilation for dcn314 We were not using the VBA and DLG files for DCN314, but the next sequence of changes for DCN314 will require those files. This commit adds the necessary files to the Makefile. Cc: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 86a3b5bfd699..cb81ed2fbd53 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -70,6 +70,8 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_ccflags) $(frame_warn_flag) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags) @@ -123,6 +125,7 @@ DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o +DML += dcn314/display_mode_vba_314.o dcn314/display_rq_dlg_calc_314.o DML += dcn32/display_mode_vba_32.o dcn32/display_rq_dlg_calc_32.o dcn32/display_mode_vba_util_32.o DML += dcn31/dcn31_fpu.o DML += dcn32/dcn32_fpu.o From d56e38d51cbc902a27f8b2ba244bb6a8950e8a7c Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 24 Aug 2022 09:24:18 -0400 Subject: [PATCH 43/88] drm/amd/display: Hook up DCN314 specific dml implementation [Why & How] Add support for the DML314 functions and hook up DCN314 to use them. This has some necessary additions for calculating Max VSTARTUP for future features, but there's also some changes that we have to make for pixel format/swizzle support. That will come in a following patch to make this transition easier to bisect. Tested-by: Daniel Wheeler Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c | 2 +- .../gpu/drm/amd/display/dc/dml/display_mode_lib.c | 12 ++++++++++++ .../gpu/drm/amd/display/dc/dml/display_mode_lib.h | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index 34a5d0f87b5f..fc5529fa51b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -262,7 +262,7 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p } if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31); + dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN314); else dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c index 5d27ff0ebb5f..f5400eda07a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c @@ -35,6 +35,8 @@ #include "dcn30/display_rq_dlg_calc_30.h" #include "dcn31/display_mode_vba_31.h" #include "dcn31/display_rq_dlg_calc_31.h" +#include "dcn314/display_mode_vba_314.h" +#include "dcn314/display_rq_dlg_calc_314.h" #include "dcn32/display_mode_vba_32.h" #include "dcn32/display_rq_dlg_calc_32.h" #include "dml_logger.h" @@ -74,6 +76,13 @@ const struct dml_funcs dml31_funcs = { .rq_dlg_get_rq_reg = dml31_rq_dlg_get_rq_reg }; +const struct dml_funcs dml314_funcs = { + .validate = dml314_ModeSupportAndSystemConfigurationFull, + .recalculate = dml314_recalculate, + .rq_dlg_get_dlg_reg = dml314_rq_dlg_get_dlg_reg, + .rq_dlg_get_rq_reg = dml314_rq_dlg_get_rq_reg +}; + const struct dml_funcs dml32_funcs = { .validate = dml32_ModeSupportAndSystemConfigurationFull, .recalculate = dml32_recalculate, @@ -107,6 +116,9 @@ void dml_init_instance(struct display_mode_lib *lib, case DML_PROJECT_DCN31_FPGA: lib->funcs = dml31_funcs; break; + case DML_PROJECT_DCN314: + lib->funcs = dml314_funcs; + break; case DML_PROJECT_DCN32: lib->funcs = dml32_funcs; break; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h index 2bdd6ed22611..b1878a1440e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h @@ -41,6 +41,7 @@ enum dml_project { DML_PROJECT_DCN30, DML_PROJECT_DCN31, DML_PROJECT_DCN31_FPGA, + DML_PROJECT_DCN314, DML_PROJECT_DCN32, }; From e7dbdfa226ce621931a84f60e885d42c232fb990 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 24 Aug 2022 10:51:21 -0400 Subject: [PATCH 44/88] drm/amd/display: Relax swizzle checks for video non-RGB formats on DCN314 [Why] HW can support the display swizzle modes for video, and those are preferable over standard or linear for decode use. [How] Remove the check for DCN314. Tested-by: Daniel Wheeler Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index 9fe4dc32fbe0..2829f179f982 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -4071,9 +4071,7 @@ void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_ v->SourceFormatPixelAndScanSupport = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { - if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) - || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t - || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) { + if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) { v->SourceFormatPixelAndScanSupport = false; } } From 247a94a627dbd9fed370b575288b874ef4a01991 Mon Sep 17 00:00:00 2001 From: Duncan Ma Date: Wed, 24 Aug 2022 14:35:03 -0400 Subject: [PATCH 45/88] drm/amd/display: Correct dram channel width for dcn314 [Why] The interpretation of the number of memory channels differ by memory type, and this affects channel width for the DML input. [How] Set dram channel width according to memory type for dcn314. Tested-by: Daniel Wheeler Reviewed-by: Nicholas Kazlauskas Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Duncan Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 2 ++ drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c | 3 +++ drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 1 + 3 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 171c38fac6a3..cd86aa912e3c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -677,6 +677,8 @@ static void dcn314_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *cl } ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz); bw_params->vram_type = bios_info->memory_type; + + bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4; bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4; for (i = 0; i < WM_SET_COUNT; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index fc5529fa51b3..4bb3b31ea7e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -194,6 +194,9 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; + if (bw_params->dram_channel_width_bytes > 0) + dcn3_14_soc.dram_channel_width_bytes = bw_params->dram_channel_width_bytes; + if (bw_params->num_channels > 0) dcn3_14_soc.num_chans = bw_params->num_channels; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 5d2b028e5dad..d9f1b0a4fbd4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -214,6 +214,7 @@ struct dummy_pstate_entry { struct clk_bw_params { unsigned int vram_type; unsigned int num_channels; + unsigned int dram_channel_width_bytes; unsigned int dispclk_vco_khz; unsigned int dc_mode_softmax_memclk; struct clk_limit_table clk_table; From 39fc82b7e031dbf5145de485d66f4a23ef711786 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Thu, 25 Aug 2022 12:33:12 -0400 Subject: [PATCH 46/88] drm/amd/display: Round cursor width up for MALL allocation [Why & How] When calculating cursor size for MALL allocation, the cursor width should be the actual width rounded up to 64 alignment. Additionally, the bit depth should vary depending on color format. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Pavle Kotarac Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c | 24 ++++++++++++++++++- .../drm/amd/display/dc/dcn32/dcn32_hwseq.c | 24 ++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c index 6ec1c52535b9..2038cbda33f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c @@ -103,6 +103,11 @@ void hubp32_cursor_set_attributes( enum cursor_lines_per_chunk lpc = hubp2_get_lines_per_chunk( attr->width, attr->color_format); + //Round cursor width up to next multiple of 64 + uint32_t cursor_width = ((attr->width + 63) / 64) * 64; + uint32_t cursor_height = attr->height; + uint32_t cursor_size = cursor_width * cursor_height; + hubp->curs_attr = *attr; REG_UPDATE(CURSOR_SURFACE_ADDRESS_HIGH, @@ -126,7 +131,24 @@ void hubp32_cursor_set_attributes( /* used to shift the cursor chunk request deadline */ CURSOR0_CHUNK_HDL_ADJUST, 3); - if (attr->width * attr->height * 4 > 16384) + switch (attr->color_format) { + case CURSOR_MODE_MONO: + cursor_size /= 2; + break; + case CURSOR_MODE_COLOR_1BIT_AND: + case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA: + case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA: + cursor_size *= 4; + break; + + case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED: + case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED: + default: + cursor_size *= 8; + break; + } + + if (cursor_size > 16384) REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, true); else REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, false); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 769171ab8ef6..dbf2d7cc96c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -741,7 +741,29 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context) struct hubp *hubp = pipe->plane_res.hubp; if (pipe->stream && pipe->plane_state && hubp && hubp->funcs->hubp_update_mall_sel) { - if (hubp->curs_attr.width * hubp->curs_attr.height * 4 > 16384) + //Round cursor width up to next multiple of 64 + int cursor_width = ((hubp->curs_attr.width + 63) / 64) * 64; + int cursor_height = hubp->curs_attr.height; + int cursor_size = cursor_width * cursor_height; + + switch (hubp->curs_attr.color_format) { + case CURSOR_MODE_MONO: + cursor_size /= 2; + break; + case CURSOR_MODE_COLOR_1BIT_AND: + case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA: + case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA: + cursor_size *= 4; + break; + + case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED: + case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED: + default: + cursor_size *= 8; + break; + } + + if (cursor_size > 16384) cache_cursor = true; if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { From 43080c9b7536c5cbc53c5d8c74027c92c7e0110a Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 25 Aug 2022 16:04:29 -0400 Subject: [PATCH 47/88] drm/amd/display: Update viewport position for phantom pipes [Description] In some cases the viewport position of the main pipes can change without triggering a full update. In this case the subvp phantom viewports must be updated accordingly. Tested-by: Daniel Wheeler Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 7 ++-- .../drm/amd/display/dc/dcn32/dcn32_hwseq.c | 32 +++++++++++++++++++ .../drm/amd/display/dc/dcn32/dcn32_hwseq.h | 4 +++ .../gpu/drm/amd/display/dc/dcn32/dcn32_init.c | 1 + .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 5 +++ 5 files changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 6271caca4d9a..6f8c344e321d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1898,8 +1898,11 @@ void dcn20_post_unlock_program_front_end( * can underflow due to HUBP_VTG_SEL programming if done in the regular front end * programming sequence). */ - if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) - dcn20_program_pipe(dc, pipe, context); + if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (dc->hwss.update_phantom_vp_position) + dc->hwss.update_phantom_vp_position(dc, context, pipe); + dcn20_program_pipe(dc, pipe, context); + } } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index dbf2d7cc96c5..449459ca5a72 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -1301,3 +1301,35 @@ void dcn32_update_phy_state(struct dc_state *state, struct pipe_ctx *pipe_ctx, } else BREAK_TO_DEBUGGER(); } + +/* For SubVP the main pipe can have a viewport position change + * without a full update. In this case we must also update the + * viewport positions for the phantom pipe accordingly. + */ +void dcn32_update_phantom_vp_position(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *phantom_pipe) +{ + uint8_t i; + struct dc_plane_state *phantom_plane = phantom_pipe->plane_state; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_MAIN && + pipe->stream->mall_stream_config.paired_stream == phantom_pipe->stream) { + if (pipe->plane_state && pipe->plane_state->update_flags.bits.position_change) { + + phantom_plane->src_rect.x = pipe->plane_state->src_rect.x; + phantom_plane->src_rect.y = pipe->plane_state->src_rect.y; + phantom_plane->clip_rect.x = pipe->plane_state->clip_rect.x; + phantom_plane->dst_rect.x = pipe->plane_state->dst_rect.x; + phantom_plane->dst_rect.y = pipe->plane_state->dst_rect.y; + + phantom_pipe->plane_state->update_flags.bits.position_change = 1; + resource_build_scaling_params(phantom_pipe); + return; + } + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h index 221e31144d50..150d541f35b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h @@ -87,4 +87,8 @@ bool dcn32_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx); void dcn32_update_phy_state(struct dc_state *state, struct pipe_ctx *pipe_ctx, enum phy_state target_state); +void dcn32_update_phantom_vp_position(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *phantom_pipe); + #endif /* __DC_HWSS_DCN32_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c index 28d220218133..c554929471af 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c @@ -105,6 +105,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .subvp_pipe_control_lock = dcn32_subvp_pipe_control_lock, .update_visual_confirm_color = dcn20_update_visual_confirm_color, .update_phy_state = dcn32_update_phy_state, + .update_phantom_vp_position = dcn32_update_phantom_vp_position, }; static const struct hwseq_private_funcs dcn32_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 52b4350c9cd8..d7a7b2790143 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -247,6 +247,11 @@ struct hw_sequencer_funcs { void (*update_phy_state)(struct dc_state *state, struct pipe_ctx *pipe_ctx, enum phy_state target_state); + + void (*update_phantom_vp_position)(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *phantom_pipe); + void (*commit_subvp_config)(struct dc *dc, struct dc_state *context); void (*subvp_pipe_control_lock)(struct dc *dc, struct dc_state *context, From 5c1a431aaf52bbba8b6e2c4e9b4037a09509c0e3 Mon Sep 17 00:00:00 2001 From: "Lee, Alvin" Date: Thu, 25 Aug 2022 16:05:03 -0400 Subject: [PATCH 48/88] drm/amd/display: Added debug option for forcing subvp num ways [Description] Regkey option for forcing num ways for subvp for debug purposes Tested-by: Daniel Wheeler Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index c48403d49f2e..e8e8e031b9d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -831,6 +831,7 @@ struct dc_debug_options { bool force_disable_subvp; bool force_subvp_mclk_switch; bool allow_sw_cursor_fallback; + unsigned int force_subvp_num_ways; bool force_usr_allow; /* uses value at boot and disables switch */ bool disable_dtb_ref_clk_switch; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 1f195c5b3377..417dfdcf9596 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -54,13 +54,14 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat uint32_t num_mblks = 0; uint32_t cache_lines_per_plane = 0; uint32_t i = 0, j = 0; - uint32_t mblk_width = 0; - uint32_t mblk_height = 0; + uint16_t mblk_width = 0; + uint16_t mblk_height = 0; uint32_t full_vp_width_blk_aligned = 0; uint32_t full_vp_height_blk_aligned = 0; uint32_t mall_alloc_width_blk_aligned = 0; uint32_t mall_alloc_height_blk_aligned = 0; - uint32_t full_vp_height = 0; + uint16_t full_vp_height = 0; + bool subvp_in_use = false; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -70,6 +71,7 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { struct pipe_ctx *main_pipe = NULL; + subvp_in_use = true; /* Get full viewport height from main pipe (required for MBLK calculation) */ for (j = 0; j < dc->res_pool->pipe_count; j++) { main_pipe = &context->res_ctx.pipe_ctx[j]; @@ -129,6 +131,9 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat if (cache_lines_used % lines_per_way > 0) num_ways++; + if (subvp_in_use && dc->debug.force_subvp_num_ways > 0) + num_ways = dc->debug.force_subvp_num_ways; + return num_ways; } From 3b304bced035fa4e1144d657b4687238be931806 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 25 Aug 2022 15:05:58 -0400 Subject: [PATCH 49/88] drm/amd/display: add workaround for subvp cursor corruption for DCN32/321 [Why&How] Kernel does not have a means to tell the userspace to use software cursor. Due to lack of this functionality, reducing the max cursor size is the only way to ensure that power savings of Subview port feature is utilized for asics that support it. The workaround could be removed after cursor caching is fixed while a subviewport config is active. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Pavle Kotarac Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 3 ++- drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 5953083b76a7..0b94a783d285 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2116,7 +2116,8 @@ static bool dcn32_resource_construct( dc->caps.max_downscale_ratio = 600; dc->caps.i2c_speed_in_khz = 100; dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/ - dc->caps.max_cursor_size = 256; + /* TODO: Bring max_cursor_size back to 256 after subvp cursor corruption is fixed*/ + dc->caps.max_cursor_size = 64; dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; dc->caps.mall_size_per_mem_channel = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 64c225ee3a46..b602fdbbaf10 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -1721,7 +1721,8 @@ static bool dcn321_resource_construct( dc->caps.max_downscale_ratio = 600; dc->caps.i2c_speed_in_khz = 100; dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/ - dc->caps.max_cursor_size = 256; + /* TODO: Bring max cursor size back to 256 after subvp cursor corruption is fixed*/ + dc->caps.max_cursor_size = 64; dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; dc->caps.mall_size_per_mem_channel = 0; From 61e4de65b99c616d5e074a9e2a4a1a9f821a964b Mon Sep 17 00:00:00 2001 From: Yao Wang1 Date: Mon, 22 Aug 2022 18:30:31 +0800 Subject: [PATCH 50/88] drm/amd/display: Limit user regamma to a valid value [Why] For HDR mode, we get total 512 tf_point and after switching to SDR mode we actually get 400 tf_point and the rest of points(401~512) still use dirty value from HDR mode. We should limit the rest of the points to max value. [How] Limit the value when coordinates_x.x > 1, just like what we do in translate_from_linear_space for other re-gamma build paths. Tested-by: Daniel Wheeler Reviewed-by: Krunoslav Kovac Reviewed-by: Aric Cyr Acked-by: Pavle Kotarac Signed-off-by: Yao Wang1 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/color/color_gamma.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 859ffd8725c5..04f7656906ca 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1600,6 +1600,7 @@ static void interpolate_user_regamma(uint32_t hw_points_num, struct fixed31_32 lut2; struct fixed31_32 delta_lut; struct fixed31_32 delta_index; + const struct fixed31_32 one = dc_fixpt_from_int(1); i = 0; /* fixed_pt library has problems handling too small values */ @@ -1628,6 +1629,9 @@ static void interpolate_user_regamma(uint32_t hw_points_num, } else hw_x = coordinates_x[i].x; + if (dc_fixpt_le(one, hw_x)) + hw_x = one; + norm_x = dc_fixpt_mul(norm_factor, hw_x); index = dc_fixpt_floor(norm_x); if (index < 0 || index > 255) From 4bd09d7481047500fcad389251e13946c083b9fb Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Fri, 26 Aug 2022 11:29:23 -0400 Subject: [PATCH 51/88] drm/amd/display: Adding log for spread_spectrum_info [Why & How] Enable logging for spread_spectrum_percentage in spread_spectrum_info to facilitate debugging for audio compliance issues Tested-by: Daniel Wheeler Co-authored-by: Leo Chen Reviewed-by: Charlene Liu Acked-by: Pavle Kotarac Signed-off-by: Leo Chen Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/bios/bios_parser2.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index ead4da11a992..85ed0afb74a9 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -849,6 +849,8 @@ static enum bp_result get_ss_info_v4_1( disp_cntl_tbl->dvi_ss_rate_10hz * 10; if (disp_cntl_tbl->dvi_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) ss_info->type.CENTER_MODE = true; + + DC_LOG_BIOS("AS_SIGNAL_TYPE_DVI: %d\n", ss_info->spread_spectrum_percentage); break; case AS_SIGNAL_TYPE_HDMI: ss_info->spread_spectrum_percentage = @@ -857,6 +859,8 @@ static enum bp_result get_ss_info_v4_1( disp_cntl_tbl->hdmi_ss_rate_10hz * 10; if (disp_cntl_tbl->hdmi_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) ss_info->type.CENTER_MODE = true; + + DC_LOG_BIOS("AS_SIGNAL_TYPE_HDMI: %d\n", ss_info->spread_spectrum_percentage); break; /* TODO LVDS not support anymore? */ case AS_SIGNAL_TYPE_DISPLAY_PORT: @@ -866,6 +870,8 @@ static enum bp_result get_ss_info_v4_1( disp_cntl_tbl->dp_ss_rate_10hz * 10; if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) ss_info->type.CENTER_MODE = true; + + DC_LOG_BIOS("AS_SIGNAL_TYPE_DISPLAY_PORT: %d\n", ss_info->spread_spectrum_percentage); break; case AS_SIGNAL_TYPE_GPU_PLL: /* atom_firmware: DAL only get data from dce_info table. @@ -886,6 +892,8 @@ static enum bp_result get_ss_info_v4_1( smu_info->gpuclk_ss_rate_10hz * 10; if (smu_info->waflclk_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) ss_info->type.CENTER_MODE = true; + + DC_LOG_BIOS("AS_SIGNAL_TYPE_XGMI: %d\n", ss_info->spread_spectrum_percentage); break; default: result = BP_RESULT_UNSUPPORTED; @@ -935,6 +943,8 @@ static enum bp_result get_ss_info_v4_2( disp_cntl_tbl->dvi_ss_rate_10hz * 10; if (disp_cntl_tbl->dvi_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) ss_info->type.CENTER_MODE = true; + + DC_LOG_BIOS("AS_SIGNAL_TYPE_DVI: %d\n", ss_info->spread_spectrum_percentage); break; case AS_SIGNAL_TYPE_HDMI: ss_info->spread_spectrum_percentage = @@ -943,6 +953,8 @@ static enum bp_result get_ss_info_v4_2( disp_cntl_tbl->hdmi_ss_rate_10hz * 10; if (disp_cntl_tbl->hdmi_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) ss_info->type.CENTER_MODE = true; + + DC_LOG_BIOS("AS_SIGNAL_TYPE_HDMI: %d\n", ss_info->spread_spectrum_percentage); break; /* TODO LVDS not support anymore? */ case AS_SIGNAL_TYPE_DISPLAY_PORT: @@ -952,6 +964,8 @@ static enum bp_result get_ss_info_v4_2( smu_info->gpuclk_ss_rate_10hz * 10; if (smu_info->gpuclk_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) ss_info->type.CENTER_MODE = true; + + DC_LOG_BIOS("AS_SIGNAL_TYPE_DISPLAY_PORT: %d\n", ss_info->spread_spectrum_percentage); break; case AS_SIGNAL_TYPE_GPU_PLL: /* atom_firmware: DAL only get data from dce_info table. @@ -1000,6 +1014,8 @@ static enum bp_result get_ss_info_v4_5( disp_cntl_tbl->dvi_ss_rate_10hz * 10; if (disp_cntl_tbl->dvi_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) ss_info->type.CENTER_MODE = true; + + DC_LOG_BIOS("AS_SIGNAL_TYPE_DVI: %d\n", ss_info->spread_spectrum_percentage); break; case AS_SIGNAL_TYPE_HDMI: ss_info->spread_spectrum_percentage = @@ -1008,6 +1024,8 @@ static enum bp_result get_ss_info_v4_5( disp_cntl_tbl->hdmi_ss_rate_10hz * 10; if (disp_cntl_tbl->hdmi_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) ss_info->type.CENTER_MODE = true; + + DC_LOG_BIOS("AS_SIGNAL_TYPE_HDMI: %d\n", ss_info->spread_spectrum_percentage); break; case AS_SIGNAL_TYPE_DISPLAY_PORT: ss_info->spread_spectrum_percentage = @@ -1016,6 +1034,8 @@ static enum bp_result get_ss_info_v4_5( disp_cntl_tbl->dp_ss_rate_10hz * 10; if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) ss_info->type.CENTER_MODE = true; + + DC_LOG_BIOS("AS_SIGNAL_TYPE_DISPLAY_PORT: %d\n", ss_info->spread_spectrum_percentage); break; case AS_SIGNAL_TYPE_GPU_PLL: /* atom_smu_info_v4_0 does not have fields for SS for SMU Display PLL anymore. From 34955a1e797d074e72d0ac6a514d934d8fe80da1 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Fri, 26 Aug 2022 17:53:34 -0400 Subject: [PATCH 52/88] drm/amd/display: Don't adjust VRR unnecessarily [why] Do not need to spend time reprogramming DRR if there were no updates to the parameters. [how] Compare the current stream state to the requested one to determine if an update is required. In amdgpu_dm the timing_changed flag is set but never used so can remove it. Similarly, the stream update for VRR is done after dc_commit and should not update its adjust field until after the update is completed. The adjust field is managed by dc_stream_adjust_vmin_vmax and should not be manually updated in amdgpu_dm. Tested-by: Daniel Wheeler Reviewed-by: Anthony Koo Acked-by: Pavle Kotarac Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ---------- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 1 - drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++ 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7a93162633ae..819d61f2307b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7370,11 +7370,6 @@ static void update_freesync_state_on_stream( &vrr_infopacket, pack_sdp_v1_3); - new_crtc_state->freesync_timing_changed |= - (memcmp(&acrtc->dm_irq_params.vrr_params.adjust, - &vrr_params.adjust, - sizeof(vrr_params.adjust)) != 0); - new_crtc_state->freesync_vrr_info_changed |= (memcmp(&new_crtc_state->vrr_infopacket, &vrr_infopacket, @@ -7383,7 +7378,6 @@ static void update_freesync_state_on_stream( acrtc->dm_irq_params.vrr_params = vrr_params; new_crtc_state->vrr_infopacket = vrr_infopacket; - new_stream->adjust = acrtc->dm_irq_params.vrr_params.adjust; new_stream->vrr_infopacket = vrr_infopacket; if (new_crtc_state->freesync_vrr_info_changed) @@ -7446,10 +7440,6 @@ static void update_stream_irq_parameters( new_stream, &config, &vrr_params); - new_crtc_state->freesync_timing_changed |= - (memcmp(&acrtc->dm_irq_params.vrr_params.adjust, - &vrr_params.adjust, sizeof(vrr_params.adjust)) != 0); - new_crtc_state->freesync_config = config; /* Copy state for access from DM IRQ handler */ acrtc->dm_irq_params.freesync_config = config; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index b44faaad9b0b..b5ce15c43bcc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -681,7 +681,6 @@ struct dm_crtc_state { int crc_skip_count; - bool freesync_timing_changed; bool freesync_vrr_info_changed; bool dsc_force_changed; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 9860bf38c547..7481801c6d7c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -401,6 +401,9 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, { int i; + if (memcmp(adjust, &stream->adjust, sizeof(struct dc_crtc_timing_adjust)) == 0) + return true; + stream->adjust.v_total_max = adjust->v_total_max; stream->adjust.v_total_mid = adjust->v_total_mid; stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num; From b04fa59a51e80eae9883a419e41b1bf8179e5a42 Mon Sep 17 00:00:00 2001 From: JeromeHong Date: Thu, 25 Aug 2022 15:52:11 +0800 Subject: [PATCH 53/88] drm/amd/display: Avoid force minimal transaction in case of surface_count equal to 0 [why] Call commit_minimal_transition_state wrongly in case of surface_count equal to 0. [how] Add a condition to filter case of surface_count equal to 0. Tested-by: Daniel Wheeler Reviewed-by: Aric Cyr Acked-by: Pavle Kotarac Signed-off-by: JeromeHong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 7481801c6d7c..253dc4e35ba4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3625,11 +3625,13 @@ bool dc_update_planes_and_stream(struct dc *dc, dc->current_state->stream_count > 0 && dc->debug.pipe_split_policy != MPC_SPLIT_AVOID) { /* determine if minimal transition is required */ - if (cur_stream_status->plane_count > surface_count) { - force_minimal_pipe_splitting = true; - } else if (cur_stream_status->plane_count < surface_count) { - force_minimal_pipe_splitting = true; - is_plane_addition = true; + if (surface_count > 0) { + if (cur_stream_status->plane_count > surface_count) { + force_minimal_pipe_splitting = true; + } else if (cur_stream_status->plane_count < surface_count) { + force_minimal_pipe_splitting = true; + is_plane_addition = true; + } } } From 410e747401a3121cffba6ecb932f5df596799550 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Sat, 6 Aug 2022 01:58:00 +0800 Subject: [PATCH 54/88] drm/amd/display: Refactor SubVP calculation to remove FPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor calculation to remove floating point operations from dmub_srv. To ensure that 32-bit compilation works well, we use the div64 family of macros to do integer division for SubVP-related timing parameters. Cc: Maíra Canal Cc: Alex Deucher Cc: Isabella Basso Cc: Magali Lemes Tested-by: Daniel Wheeler Reviewed-by: Samson Tam Acked-by: Tom Chung Signed-off-by: Alvin Lee Co-developed-by: Aurabindo Pillai Signed-off-by: Aurabindo Pillai Co-developed-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 87 ++++++++++---------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 7b765efe0825..df5ad02fc4b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -450,44 +450,42 @@ static void populate_subvp_cmd_drr_info(struct dc *dc, struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing; struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing; - int16_t drr_frame_us = 0; - int16_t min_drr_supported_us = 0; - int16_t max_drr_supported_us = 0; - int16_t max_drr_vblank_us = 0; - int16_t max_drr_mallregion_us = 0; - int16_t mall_region_us = 0; - int16_t prefetch_us = 0; - int16_t subvp_active_us = 0; - int16_t drr_active_us = 0; - int16_t min_vtotal_supported = 0; - int16_t max_vtotal_supported = 0; + uint16_t drr_frame_us = 0; + uint16_t min_drr_supported_us = 0; + uint16_t max_drr_supported_us = 0; + uint16_t max_drr_vblank_us = 0; + uint16_t max_drr_mallregion_us = 0; + uint16_t mall_region_us = 0; + uint16_t prefetch_us = 0; + uint16_t subvp_active_us = 0; + uint16_t drr_active_us = 0; + uint16_t min_vtotal_supported = 0; + uint16_t max_vtotal_supported = 0; pipe_data->pipe_config.vblank_data.drr_info.drr_in_use = true; pipe_data->pipe_config.vblank_data.drr_info.use_ramping = false; // for now don't use ramping pipe_data->pipe_config.vblank_data.drr_info.drr_window_size_ms = 4; // hardcode 4ms DRR window for now - drr_frame_us = div64_s64(drr_timing->v_total * drr_timing->h_total, - (int64_t)(drr_timing->pix_clk_100hz * 100) * 1000000); + drr_frame_us = div64_u64(((uint64_t)drr_timing->v_total * drr_timing->h_total * 1000000), + (((uint64_t)drr_timing->pix_clk_100hz * 100))); // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = div64_s64(phantom_timing->v_addressable * phantom_timing->h_total, - (int64_t)(phantom_timing->pix_clk_100hz * 100) * 1000000); + mall_region_us = div64_u64(((uint64_t)phantom_timing->v_addressable * phantom_timing->h_total * 1000000), + (((uint64_t)phantom_timing->pix_clk_100hz * 100))); min_drr_supported_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - min_vtotal_supported = div64_s64(drr_timing->pix_clk_100hz * 100 * - (div64_s64((int64_t)min_drr_supported_us, 1000000)), - (int64_t)drr_timing->h_total); + min_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * min_drr_supported_us), + (((uint64_t)drr_timing->h_total * 1000000))); - prefetch_us = div64_s64((phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total, - (int64_t)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us); - subvp_active_us = div64_s64(main_timing->v_addressable * main_timing->h_total, - (int64_t)(main_timing->pix_clk_100hz * 100) * 1000000); - drr_active_us = div64_s64(drr_timing->v_addressable * drr_timing->h_total, - (int64_t)(drr_timing->pix_clk_100hz * 100) * 1000000); - max_drr_vblank_us = div64_s64((int64_t)(subvp_active_us - prefetch_us - drr_active_us), 2) + drr_active_us; + prefetch_us = div64_u64(((uint64_t)(phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total * 1000000), + (((uint64_t)phantom_timing->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us)); + subvp_active_us = div64_u64(((uint64_t)main_timing->v_addressable * main_timing->h_total * 1000000), + (((uint64_t)main_timing->pix_clk_100hz * 100))); + drr_active_us = div64_u64(((uint64_t)drr_timing->v_addressable * drr_timing->h_total * 1000000), + (((uint64_t)drr_timing->pix_clk_100hz * 100))); + max_drr_vblank_us = div64_u64((subvp_active_us - prefetch_us - drr_active_us), 2) + drr_active_us; max_drr_mallregion_us = subvp_active_us - prefetch_us - mall_region_us; max_drr_supported_us = max_drr_vblank_us > max_drr_mallregion_us ? max_drr_vblank_us : max_drr_mallregion_us; - max_vtotal_supported = div64_s64(drr_timing->pix_clk_100hz * 100 * (div64_s64((int64_t)max_drr_supported_us, 1000000)), - (int64_t)drr_timing->h_total); + max_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * max_drr_supported_us), + (((uint64_t)drr_timing->h_total * 1000000))); pipe_data->pipe_config.vblank_data.drr_info.min_vtotal_supported = min_vtotal_supported; pipe_data->pipe_config.vblank_data.drr_info.max_vtotal_supported = max_vtotal_supported; @@ -581,10 +579,12 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc, struct dc_crtc_timing *phantom_timing1 = &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing; struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL; - subvp0_prefetch_us = div64_s64((phantom_timing0->v_total - phantom_timing0->v_front_porch) * phantom_timing0->h_total, - (int64_t)(phantom_timing0->pix_clk_100hz * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us); - subvp1_prefetch_us = div64_s64((phantom_timing1->v_total - phantom_timing1->v_front_porch) * phantom_timing1->h_total, - (int64_t)(phantom_timing1->pix_clk_100hz * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us); + subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) * + (uint64_t)phantom_timing0->h_total * 1000000), + (((uint64_t)phantom_timing0->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us)); + subvp1_prefetch_us = div64_u64(((uint64_t)(phantom_timing1->v_total - phantom_timing1->v_front_porch) * + (uint64_t)phantom_timing1->h_total * 1000000), + (((uint64_t)phantom_timing1->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us)); // Whichever SubVP PIPE has the smaller prefetch (including the prefetch end to mall start time) // should increase it's prefetch time to match the other @@ -592,16 +592,17 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc, pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[1]; prefetch_delta_us = subvp0_prefetch_us - subvp1_prefetch_us; pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines = - div64_s64(((div64_s64((int64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us), 1000000)) * - (phantom_timing1->pix_clk_100hz * 100) + phantom_timing1->h_total - 1), - (int64_t)phantom_timing1->h_total); + div64_u64(((uint64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) * + ((uint64_t)phantom_timing1->pix_clk_100hz * 100) + ((uint64_t)phantom_timing1->h_total * 1000000 - 1)), + ((uint64_t)phantom_timing1->h_total * 1000000)); + } else if (subvp1_prefetch_us > subvp0_prefetch_us) { pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[0]; prefetch_delta_us = subvp1_prefetch_us - subvp0_prefetch_us; pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines = - div64_s64(((div64_s64((int64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us), 1000000)) * - (phantom_timing0->pix_clk_100hz * 100) + phantom_timing0->h_total - 1), - (int64_t)phantom_timing0->h_total); + div64_u64(((uint64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) * + ((uint64_t)phantom_timing0->pix_clk_100hz * 100) + ((uint64_t)phantom_timing0->h_total * 1000000 - 1)), + ((uint64_t)phantom_timing0->h_total * 1000000)); } } @@ -668,13 +669,11 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, // Round up pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines = - div64_s64(((div64_s64((int64_t)dc->caps.subvp_prefetch_end_to_mall_start_us, 1000000)) * - (phantom_timing->pix_clk_100hz * 100) + phantom_timing->h_total - 1), - (int64_t)phantom_timing->h_total); + div64_u64(((uint64_t)dc->caps.subvp_prefetch_end_to_mall_start_us * ((uint64_t)phantom_timing->pix_clk_100hz * 100) + + ((uint64_t)phantom_timing->h_total * 1000000 - 1)), ((uint64_t)phantom_timing->h_total * 1000000)); pipe_data->pipe_config.subvp_data.processing_delay_lines = - div64_s64(((div64_s64((int64_t)dc->caps.subvp_fw_processing_delay_us, 1000000)) * - (phantom_timing->pix_clk_100hz * 100) + phantom_timing->h_total - 1), - (int64_t)phantom_timing->h_total); + div64_u64(((uint64_t)(dc->caps.subvp_fw_processing_delay_us) * ((uint64_t)phantom_timing->pix_clk_100hz * 100) + + ((uint64_t)phantom_timing->h_total * 1000000 - 1)), ((uint64_t)phantom_timing->h_total * 1000000)); // Find phantom pipe index based on phantom stream for (j = 0; j < dc->res_pool->pipe_count; j++) { struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j]; From 70c04ad8441a60ee65ca2e1c40fac04882ba09f8 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 1 Sep 2022 15:27:36 -0400 Subject: [PATCH 55/88] drm/amd/display: Fix register definitions for DCN32/321 [Why & How] Fix the instatiation sequence for MPC registers and add a few other missing register definitions that were ommited erroneously when copying them over to enable runtime initialization of reigster offsets for DCN32/321 Tested-by: Daniel Wheeler Reviewed-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 27 +-- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 216 ++++++++++++------ .../amd/display/dc/dcn321/dcn321_resource.c | 24 +- 3 files changed, 166 insertions(+), 101 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 0b94a783d285..11f1435b8c07 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -468,22 +468,17 @@ static const struct dcn20_dsc_mask dsc_mask = { }; static struct dcn30_mpc_registers mpc_regs; -#define dcn_mpc_regs_init()\ - ( \ - MPC_REG_LIST_DCN3_0_RI(0),\ - MPC_REG_LIST_DCN3_0_RI(1),\ - MPC_REG_LIST_DCN3_0_RI(2),\ - MPC_REG_LIST_DCN3_0_RI(3),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\ - MPC_MCM_REG_LIST_DCN32_RI(0),\ - MPC_MCM_REG_LIST_DCN32_RI(1),\ - MPC_MCM_REG_LIST_DCN32_RI(2),\ - MPC_MCM_REG_LIST_DCN32_RI(3),\ - MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0)\ - ) + +#define dcn_mpc_regs_init() \ + MPC_REG_LIST_DCN3_2_RI(0),\ + MPC_REG_LIST_DCN3_2_RI(1),\ + MPC_REG_LIST_DCN3_2_RI(2),\ + MPC_REG_LIST_DCN3_2_RI(3),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\ + MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0) static const struct dcn30_mpc_shift mpc_shift = { MPC_COMMON_MASK_SH_LIST_DCN32(__SHIFT) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index d1e85622dd3b..95f3517724d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -225,7 +225,8 @@ void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_par SRI_ARR(DP_MSA_TIMING_PARAM4, DP, id), \ SRI_ARR(DP_MSE_RATE_CNTL, DP, id), SRI_ARR(DP_MSE_RATE_UPDATE, DP, id), \ SRI_ARR(DP_PIXEL_FORMAT, DP, id), SRI_ARR(DP_SEC_CNTL, DP, id), \ - SRI_ARR(DP_SEC_CNTL2, DP, id), SRI_ARR(DP_SEC_CNTL6, DP, id), \ + SRI_ARR(DP_SEC_CNTL1, DP, id), SRI_ARR(DP_SEC_CNTL2, DP, id), \ + SRI_ARR(DP_SEC_CNTL5, DP, id), SRI_ARR(DP_SEC_CNTL6, DP, id), \ SRI_ARR(DP_STEER_FIFO, DP, id), SRI_ARR(DP_VID_M, DP, id), \ SRI_ARR(DP_VID_N, DP, id), SRI_ARR(DP_VID_STREAM_CNTL, DP, id), \ SRI_ARR(DP_VID_TIMING, DP, id), SRI_ARR(DP_SEC_AUD_N, DP, id), \ @@ -738,75 +739,6 @@ void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_par #define MPC_DWB_MUX_REG_LIST_DCN3_0_RI(inst) \ SRII_DWB(DWB_MUX, MUX, MPC_DWB, inst) -#define MPC_MCM_REG_LIST_DCN32_RI(inst) \ - ( \ - SRII(MPCC_MCM_SHAPER_CONTROL, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_OFFSET_R, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_OFFSET_G, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_OFFSET_B, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_SCALE_R, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_SCALE_G_B, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_LUT_INDEX, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_LUT_DATA, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_LUT_WRITE_EN_MASK, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_START_CNTL_B, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_START_CNTL_G, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_START_CNTL_R, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_END_CNTL_B, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_END_CNTL_G, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_END_CNTL_R, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_0_1, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_2_3, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_4_5, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_6_7, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_8_9, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_10_11, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_12_13, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_14_15, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_16_17, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_18_19, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_20_21, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_22_23, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_24_25, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_26_27, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_28_29, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_30_31, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_32_33, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_START_CNTL_B, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_START_CNTL_G, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_START_CNTL_R, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_END_CNTL_B, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_END_CNTL_G, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_END_CNTL_R, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_0_1, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_2_3, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_4_5, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_6_7, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_8_9, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_10_11, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_12_13, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_14_15, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_16_17, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_18_19, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_20_21, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_22_23, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_24_25, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_26_27, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_28_29, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_30_31, MPCC_MCM, inst), \ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_32_33, MPCC_MCM, inst), \ - SRII(MPCC_MCM_3DLUT_MODE, MPCC_MCM, inst), \ - SRII(MPCC_MCM_3DLUT_INDEX, MPCC_MCM, inst), \ - SRII(MPCC_MCM_3DLUT_DATA, MPCC_MCM, inst), \ - SRII(MPCC_MCM_3DLUT_DATA_30BIT, MPCC_MCM, inst), \ - SRII(MPCC_MCM_3DLUT_READ_WRITE_CONTROL, MPCC_MCM, inst), \ - SRII(MPCC_MCM_3DLUT_OUT_NORM_FACTOR, MPCC_MCM, inst), \ - SRII(MPCC_MCM_3DLUT_OUT_OFFSET_R, MPCC_MCM, inst), \ - SRII(MPCC_MCM_3DLUT_OUT_OFFSET_G, MPCC_MCM, inst), \ - SRII(MPCC_MCM_3DLUT_OUT_OFFSET_B, MPCC_MCM, inst), \ - SRII(MPCC_MCM_MEM_PWR_CTRL, MPCC_MCM, inst) \ - ) - #define MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0_RI(inst) \ ( \ SRII(MUX, MPC_OUT, inst), VUPDATE_SRII(CUR, VUPDATE_LOCK_SET, inst) \ @@ -890,6 +822,149 @@ void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_par SRII(MPCC_OGAM_LUT_CONTROL, MPCC_OGAM, inst) \ ) +#define MPC_REG_LIST_DCN3_2_RI(inst) \ + MPC_REG_LIST_DCN3_0_RI(inst),\ + SRII(MPCC_MOVABLE_CM_LOCATION_CONTROL, MPCC, inst),\ + SRII(MPCC_MCM_SHAPER_CONTROL, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_OFFSET_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_OFFSET_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_OFFSET_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_SCALE_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_SCALE_G_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_LUT_INDEX, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_LUT_DATA, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_LUT_WRITE_EN_MASK, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_START_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_START_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_START_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_END_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_END_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_END_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_0_1, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_2_3, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_4_5, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_6_7, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_8_9, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_10_11, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_12_13, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_14_15, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_16_17, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_18_19, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_20_21, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_22_23, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_24_25, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_26_27, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_28_29, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_30_31, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_32_33, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_START_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_START_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_START_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_END_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_END_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_END_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_0_1, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_2_3, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_4_5, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_6_7, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_8_9, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_10_11, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_12_13, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_14_15, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_16_17, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_18_19, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_20_21, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_22_23, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_24_25, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_26_27, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_28_29, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_30_31, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_32_33, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_MODE, MPCC_MCM, inst), /*TODO: may need to add other 3DLUT regs*/\ + SRII(MPCC_MCM_3DLUT_INDEX, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_DATA, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_DATA_30BIT, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_READ_WRITE_CONTROL, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_OUT_NORM_FACTOR, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_OUT_OFFSET_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_OUT_OFFSET_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_OUT_OFFSET_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_CONTROL, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_LUT_INDEX, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_LUT_DATA, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_LUT_CONTROL, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_SLOPE_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_SLOPE_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_SLOPE_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_BASE_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_BASE_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_BASE_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL1_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL2_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL1_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL2_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL1_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL2_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_OFFSET_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_OFFSET_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_OFFSET_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_0_1, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_2_3, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_4_5, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_6_7, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_8_9, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_10_11, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_12_13, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_14_15, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_16_17, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_18_19, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_20_21, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_22_23, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_24_25, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_26_27, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_28_29, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_30_31, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_32_33, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_SLOPE_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_SLOPE_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_SLOPE_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_BASE_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_BASE_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_BASE_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL1_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL2_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL1_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL2_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL1_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL2_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_OFFSET_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_OFFSET_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_OFFSET_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_0_1, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_2_3, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_4_5, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_6_7, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_8_9, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_10_11, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_12_13, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_14_15, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_16_17, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_18_19, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_20_21, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_22_23, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_24_25, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_26_27, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_28_29, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_30_31, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_32_33, MPCC_MCM, inst),\ + SRII(MPCC_MCM_MEM_PWR_CTRL, MPCC_MCM, inst) + /* OPTC */ #define OPTC_COMMON_REG_LIST_DCN3_2_RI(inst) \ @@ -1124,6 +1199,7 @@ void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_par SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D), SR(DCHUBBUB_DET0_CTRL), \ SR(DCHUBBUB_DET1_CTRL), SR(DCHUBBUB_DET2_CTRL), SR(DCHUBBUB_DET3_CTRL), \ SR(DCHUBBUB_COMPBUF_CTRL), SR(COMPBUF_RESERVED_SPACE), \ + SR(DCHUBBUB_DEBUG_CTRL_0), \ SR(DCHUBBUB_ARB_USR_RETRAINING_CNTL), \ SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A), \ SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B), \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index b602fdbbaf10..1bbc0bdf5dc3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -473,21 +473,15 @@ static const struct dcn20_dsc_mask dsc_mask = { static struct dcn30_mpc_registers mpc_regs; #define dcn_mpc_regs_init()\ - ( \ - MPC_REG_LIST_DCN3_0_RI(0),\ - MPC_REG_LIST_DCN3_0_RI(1),\ - MPC_REG_LIST_DCN3_0_RI(2),\ - MPC_REG_LIST_DCN3_0_RI(3),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\ - MPC_MCM_REG_LIST_DCN32_RI(0),\ - MPC_MCM_REG_LIST_DCN32_RI(1),\ - MPC_MCM_REG_LIST_DCN32_RI(2),\ - MPC_MCM_REG_LIST_DCN32_RI(3),\ - MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0)\ - ) + MPC_REG_LIST_DCN3_2_RI(0),\ + MPC_REG_LIST_DCN3_2_RI(1),\ + MPC_REG_LIST_DCN3_2_RI(2),\ + MPC_REG_LIST_DCN3_2_RI(3),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\ + MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0) static const struct dcn30_mpc_shift mpc_shift = { MPC_COMMON_MASK_SH_LIST_DCN32(__SHIFT) From 419c14521e0b090e82353556008833d58c2efde1 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sat, 27 Aug 2022 15:08:49 -0400 Subject: [PATCH 56/88] drm/amd/display: 3.2.202 This version brings along following fixes: *Fixed register definitions for DCN32/321 *Adding log for spread_spectrum_info *Reverted DSC programming sequence change *Correct I2C register offset *Updated DCN30 header files, viewport position for phantom pipes *Enable dlg and vba compilation for dcn314 Tested-by: Daniel Wheeler Acked-by: Pavle Kotarac Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index e8e8e031b9d7..53b13beff0b2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.201" +#define DC_VER "3.2.202" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 1df7e569522486e58307929a726ec8f303c5abf4 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Aug 2022 13:34:05 -0700 Subject: [PATCH 57/88] drm/amd/display: Reduce number of arguments of dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most of the arguments are identical between the two call sites and they can be accessed through the 'struct vba_vars_st' pointer created at the top of dml32_ModeSupportAndSystemConfigurationFull(). This reduces the total amount of stack space that dml32_ModeSupportAndSystemConfigurationFull() uses by 216 bytes with LLVM 16 (2152 -> 1936), helping clear up the following clang warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1721:6: error: stack frame size (2152) exceeds limit (2048) in 'dml32_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) ^ 1 error generated. Additionally, while modifying the arguments to dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(), use 'v' consistently, instead of 'v' mixed with 'mode_lib->vba'. Link: https://github.com/ClangBuiltLinux/linux/issues/1681 Reported-by: "Sudip Mukherjee (Codethink)" Tested-by: Maíra Canal Reviewed-by: Rodrigo Siqueira Signed-off-by: Nathan Chancellor Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml/dcn32/display_mode_vba_32.c | 118 +++------ .../dc/dml/dcn32/display_mode_vba_util_32.c | 246 ++++++++---------- .../dc/dml/dcn32/display_mode_vba_util_32.h | 33 +-- 3 files changed, 139 insertions(+), 258 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index cd6bf67035d4..664e22e95935 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1163,58 +1163,28 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - mode_lib->vba.USRRetrainingRequiredFinal, - mode_lib->vba.UsesMALLForPStateChange, - mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], - mode_lib->vba.NumberOfActiveSurfaces, - mode_lib->vba.MaxLineBufferLines, - mode_lib->vba.LineBufferSizeFinal, - mode_lib->vba.WritebackInterfaceBufferSize, - mode_lib->vba.DCFCLK, - mode_lib->vba.ReturnBW, - mode_lib->vba.SynchronizeTimingsFinal, - mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - mode_lib->vba.DRRDisplay, - v->dpte_group_bytes, - v->meta_row_height, - v->meta_row_height_chroma, + v, + v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb], + v->DCFCLK, + v->ReturnBW, v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters, - mode_lib->vba.WritebackChunkSize, - mode_lib->vba.SOCCLK, + v->SOCCLK, v->DCFCLKDeepSleep, - mode_lib->vba.DETBufferSizeY, - mode_lib->vba.DETBufferSizeC, - mode_lib->vba.SwathHeightY, - mode_lib->vba.SwathHeightC, - mode_lib->vba.LBBitPerPixel, + v->DETBufferSizeY, + v->DETBufferSizeC, + v->SwathHeightY, + v->SwathHeightC, v->SwathWidthY, v->SwathWidthC, - mode_lib->vba.HRatio, - mode_lib->vba.HRatioChroma, - mode_lib->vba.vtaps, - mode_lib->vba.VTAPsChroma, - mode_lib->vba.VRatio, - mode_lib->vba.VRatioChroma, - mode_lib->vba.HTotal, - mode_lib->vba.VTotal, - mode_lib->vba.VActive, - mode_lib->vba.PixelClock, - mode_lib->vba.BlendingAndTiming, - mode_lib->vba.DPPPerPlane, + v->DPPPerPlane, v->BytePerPixelDETY, v->BytePerPixelDETC, v->DSTXAfterScaler, v->DSTYAfterScaler, - mode_lib->vba.WritebackEnable, - mode_lib->vba.WritebackPixelFormat, - mode_lib->vba.WritebackDestinationWidth, - mode_lib->vba.WritebackDestinationHeight, - mode_lib->vba.WritebackSourceHeight, v->UnboundedRequestEnabled, v->CompressedBufferSizeInkByte, /* Output */ - &v->Watermark, &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_dramchange_support, v->MaxActiveDRAMClockChangeLatencySupported, v->SubViewportLinesNeededInMALL, @@ -3559,62 +3529,32 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - mode_lib->vba.USRRetrainingRequiredFinal, - mode_lib->vba.UsesMALLForPStateChange, - mode_lib->vba.PrefetchModePerState[i][j], - mode_lib->vba.NumberOfActiveSurfaces, - mode_lib->vba.MaxLineBufferLines, - mode_lib->vba.LineBufferSizeFinal, - mode_lib->vba.WritebackInterfaceBufferSize, - mode_lib->vba.DCFCLKState[i][j], - mode_lib->vba.ReturnBWPerState[i][j], - mode_lib->vba.SynchronizeTimingsFinal, - mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - mode_lib->vba.DRRDisplay, - mode_lib->vba.dpte_group_bytes, - mode_lib->vba.meta_row_height, - mode_lib->vba.meta_row_height_chroma, + v, + v->PrefetchModePerState[i][j], + v->DCFCLKState[i][j], + v->ReturnBWPerState[i][j], v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters, - mode_lib->vba.WritebackChunkSize, - mode_lib->vba.SOCCLKPerState[i], - mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j], - mode_lib->vba.DETBufferSizeYThisState, - mode_lib->vba.DETBufferSizeCThisState, - mode_lib->vba.SwathHeightYThisState, - mode_lib->vba.SwathHeightCThisState, - mode_lib->vba.LBBitPerPixel, - mode_lib->vba.SwathWidthYThisState, // 24 - mode_lib->vba.SwathWidthCThisState, - mode_lib->vba.HRatio, - mode_lib->vba.HRatioChroma, - mode_lib->vba.vtaps, - mode_lib->vba.VTAPsChroma, - mode_lib->vba.VRatio, - mode_lib->vba.VRatioChroma, - mode_lib->vba.HTotal, - mode_lib->vba.VTotal, - mode_lib->vba.VActive, - mode_lib->vba.PixelClock, - mode_lib->vba.BlendingAndTiming, - mode_lib->vba.NoOfDPPThisState, - mode_lib->vba.BytePerPixelInDETY, - mode_lib->vba.BytePerPixelInDETC, + v->SOCCLKPerState[i], + v->ProjectedDCFCLKDeepSleep[i][j], + v->DETBufferSizeYThisState, + v->DETBufferSizeCThisState, + v->SwathHeightYThisState, + v->SwathHeightCThisState, + v->SwathWidthYThisState, // 24 + v->SwathWidthCThisState, + v->NoOfDPPThisState, + v->BytePerPixelInDETY, + v->BytePerPixelInDETC, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler, - mode_lib->vba.WritebackEnable, - mode_lib->vba.WritebackPixelFormat, - mode_lib->vba.WritebackDestinationWidth, - mode_lib->vba.WritebackDestinationHeight, - mode_lib->vba.WritebackSourceHeight, - mode_lib->vba.UnboundedRequestEnabledThisState, - mode_lib->vba.CompressedBufferSizeInkByteThisState, + v->UnboundedRequestEnabledThisState, + v->CompressedBufferSizeInkByteThisState, /* Output */ - &mode_lib->vba.Watermark, // Store the values in vba - &mode_lib->vba.DRAMClockChangeSupport[i][j], + &v->DRAMClockChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[0], // double *MaxActiveDRAMClockChangeLatencySupported &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // Long SubViewportLinesNeededInMALL[] - &mode_lib->vba.FCLKChangeSupport[i][j], + &v->FCLKChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[1], // double *MinActiveFCLKChangeLatencySupported &mode_lib->vba.USRRetrainingSupport[i][j], mode_lib->vba.ActiveDRAMClockChangeLatencyMarginPerState[i][j]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 36e4a4a9296b..a34f6d52c372 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -4206,58 +4206,28 @@ void dml32_CalculateFlipSchedule( } // CalculateFlipSchedule void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - bool USRRetrainingRequiredFinal, - enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + struct vba_vars_st *v, unsigned int PrefetchMode, - unsigned int NumberOfActiveSurfaces, - unsigned int MaxLineBufferLines, - unsigned int LineBufferSize, - unsigned int WritebackInterfaceBufferSize, double DCFCLK, double ReturnBW, - bool SynchronizeTimingsFinal, - bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - bool DRRDisplay[], - unsigned int dpte_group_bytes[], - unsigned int meta_row_height[], - unsigned int meta_row_height_chroma[], SOCParametersList mmSOCParameters, - unsigned int WritebackChunkSize, double SOCCLK, double DCFClkDeepSleep, unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], - unsigned int LBBitPerPixel[], double SwathWidthY[], double SwathWidthC[], - double HRatio[], - double HRatioChroma[], - unsigned int VTaps[], - unsigned int VTapsChroma[], - double VRatio[], - double VRatioChroma[], - unsigned int HTotal[], - unsigned int VTotal[], - unsigned int VActive[], - double PixelClock[], - unsigned int BlendingAndTiming[], unsigned int DPPPerSurface[], double BytePerPixelDETY[], double BytePerPixelDETC[], double DSTXAfterScaler[], double DSTYAfterScaler[], - bool WritebackEnable[], - enum source_format_class WritebackPixelFormat[], - double WritebackDestinationWidth[], - double WritebackDestinationHeight[], - double WritebackSourceHeight[], bool UnboundedRequestEnabled, unsigned int CompressedBufferSizeInkByte, /* Output */ - Watermarks *Watermark, enum clock_change_support *DRAMClockChangeSupport, double MaxActiveDRAMClockChangeLatencySupported[], unsigned int SubViewportLinesNeededInMALL[], @@ -4299,136 +4269,136 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; - Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; - Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency + v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; + v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency; - Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark; - Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark; - Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency + v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark; + v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark; + v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; - Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency + v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; - Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency + v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; - Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time + v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency); dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency); dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency); - dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark); - dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark); - dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark); - dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark); - dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark); - dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark); - dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark); + dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark); + dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark); + dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark); + dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark); + dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark); + dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark); + dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark); dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", - __func__, Watermark->Z8StutterEnterPlusExitWatermark); + __func__, v->Watermark.Z8StutterEnterPlusExitWatermark); #endif TotalActiveWriteback = 0; - for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if (WritebackEnable[k] == true) + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if (v->WritebackEnable[k] == true) TotalActiveWriteback = TotalActiveWriteback + 1; } if (TotalActiveWriteback <= 1) { - Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; + v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; } else { - Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency - + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency + + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; } - if (USRRetrainingRequiredFinal) - Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark + if (v->USRRetrainingRequiredFinal) + v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark + mmSOCParameters.USRRetrainingLatency; if (TotalActiveWriteback <= 1) { - Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.WritebackLatency; - Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + mmSOCParameters.WritebackLatency; } else { - Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency - + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; - Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency - + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK; + v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK; } - if (USRRetrainingRequiredFinal) - Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark + if (v->USRRetrainingRequiredFinal) + v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark + mmSOCParameters.USRRetrainingLatency; - if (USRRetrainingRequiredFinal) - Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark + if (v->USRRetrainingRequiredFinal) + v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark + mmSOCParameters.USRRetrainingLatency; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", - __func__, Watermark->WritebackDRAMClockChangeWatermark); - dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark); - dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark); - dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal); + __func__, v->Watermark.WritebackDRAMClockChangeWatermark); + dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark); + dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark); + dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal); dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency); #endif - for (k = 0; k < NumberOfActiveSurfaces; ++k) { - TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + - SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]); + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]); } - for (k = 0; k < NumberOfActiveSurfaces; ++k) { + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { - LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); - LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); + LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); + LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines); - dml_print("DML::%s: k=%d, LineBufferSize = %d\n", __func__, k, LineBufferSize); - dml_print("DML::%s: k=%d, LBBitPerPixel = %d\n", __func__, k, LBBitPerPixel[k]); - dml_print("DML::%s: k=%d, HRatio = %f\n", __func__, k, HRatio[k]); - dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]); + dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines); + dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal); + dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]); + dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]); + dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]); #endif - EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); - EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); + EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); + EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); EffectiveDETBufferSizeY = DETBufferSizeY[k]; if (UnboundedRequestEnabled) { EffectiveDETBufferSizeY = EffectiveDETBufferSizeY + CompressedBufferSizeInkByte * 1024 - * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k]) - / (HTotal[k] / PixelClock[k]) / TotalPixelBW; + * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k]) + / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; } LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); - FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; + FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY - - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; + - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k]; - if (NumberOfActiveSurfaces > 1) { + if (v->NumberOfActiveSurfaces > 1) { ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY - - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k] - / PixelClock[k] / VRatio[k]; + - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k] + / v->PixelClock[k] / v->VRatio[k]; } if (BytePerPixelDETC[k] > 0) { LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); - FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) - / VRatioChroma[k]; + FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) + / v->VRatioChroma[k]; ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] - / PixelClock[k]; - if (NumberOfActiveSurfaces > 1) { + - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] + / v->PixelClock[k]; + if (v->NumberOfActiveSurfaces > 1) { ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC - - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k] - / PixelClock[k] / VRatioChroma[k]; + - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k] + / v->PixelClock[k] / v->VRatioChroma[k]; } ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, ActiveClockChangeLatencyHidingC); @@ -4436,24 +4406,24 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; } - ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - - Watermark->DRAMClockChangeWatermark; - ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - - Watermark->FCLKChangeWatermark; - USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; + ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark + - v->Watermark.DRAMClockChangeWatermark; + ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark + - v->Watermark.FCLKChangeWatermark; + USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark; - if (WritebackEnable[k]) { - WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 - / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] - / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); - if (WritebackPixelFormat[k] == dm_444_64) + if (v->WritebackEnable[k]) { + WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024 + / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] + / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); + if (v->WritebackPixelFormat[k] == dm_444_64) WritebackLatencyHiding = WritebackLatencyHiding / 2; WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding - - Watermark->WritebackDRAMClockChangeWatermark; + - v->Watermark.WritebackDRAMClockChangeWatermark; WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding - - Watermark->WritebackFCLKChangeWatermark; + - v->Watermark.WritebackFCLKChangeWatermark; ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], WritebackFCLKChangeLatencyMargin); @@ -4461,22 +4431,22 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( WritebackDRAMClockChangeLatencyMargin); } MaxActiveDRAMClockChangeLatencySupported[k] = - (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? + (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? 0 : (ActiveDRAMClockChangeLatencyMargin[k] + mmSOCParameters.DRAMClockChangeLatency); } - for (i = 0; i < NumberOfActiveSurfaces; ++i) { - for (j = 0; j < NumberOfActiveSurfaces; ++j) { + for (i = 0; i < v->NumberOfActiveSurfaces; ++i) { + for (j = 0; j < v->NumberOfActiveSurfaces; ++j) { if (i == j || - (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) || - (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) || - (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) || - (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] && - HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] && - VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal && - (DRRDisplay[i] || DRRDisplay[j]))) { + (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) || + (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) || + (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) || + (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] && + v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] && + v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && + (v->DRRDisplay[i] || v->DRRDisplay[j]))) { SynchronizedSurfaces[i][j] = true; } else { SynchronizedSurfaces[i][j] = false; @@ -4484,8 +4454,8 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( } } - for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; @@ -4497,9 +4467,9 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; SameTimingForFCLKChange = true; - for (k = 0; k < NumberOfActiveSurfaces; ++k) { + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { - if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && + if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && (SameTimingForFCLKChange || ActiveFCLKChangeLatencyMargin[k] < SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { @@ -4519,17 +4489,17 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( } *USRRetrainingSupport = true; - for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && (USRRetrainingLatencyMargin[k] < 0)) { *USRRetrainingSupport = false; } } - for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && - UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && - UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && + v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && + v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && ActiveDRAMClockChangeLatencyMargin[k] < 0) { if (PrefetchMode > 0) { DRAMClockChangeSupportNumber = 2; @@ -4543,10 +4513,10 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( } } - for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) DRAMClockChangeMethod = 1; - else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) + else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) DRAMClockChangeMethod = 2; } @@ -4573,16 +4543,16 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; } - for (k = 0; k < NumberOfActiveSurfaces; ++k) { + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { unsigned int dst_y_pstate; unsigned int src_y_pstate_l; unsigned int src_y_pstate_c; unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c; - dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1); - src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]); + dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1); + src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]); src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; - sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k]; + sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k]; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); @@ -4593,21 +4563,21 @@ dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBL dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); -dml_print("DML::%s: k=%d, meta_row_height = %d\n", __func__, k, meta_row_height[k]); +dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]); dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l); #endif SubViewportLinesNeededInMALL[k] = sub_vp_lines_l; if (BytePerPixelDETC[k] > 0) { - src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]); + src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]); src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; - sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k]; + sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k]; SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c); dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c); -dml_print("DML::%s: k=%d, meta_row_height_chroma = %d\n", __func__, k, meta_row_height_chroma[k]); +dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]); dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c); #endif } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 626f6605e2d5..7cdf0418830a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -30,6 +30,7 @@ #include "os_types.h" #include "../dc_features.h" #include "../display_mode_structs.h" +#include "dml/display_mode_vba.h" unsigned int dml32_dscceComputeDelay( unsigned int bpc, @@ -808,58 +809,28 @@ void dml32_CalculateFlipSchedule( bool *ImmediateFlipSupportedForPipe); void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - bool USRRetrainingRequiredFinal, - enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + struct vba_vars_st *v, unsigned int PrefetchMode, - unsigned int NumberOfActiveSurfaces, - unsigned int MaxLineBufferLines, - unsigned int LineBufferSize, - unsigned int WritebackInterfaceBufferSize, double DCFCLK, double ReturnBW, - bool SynchronizeTimingsFinal, - bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - bool DRRDisplay[], - unsigned int dpte_group_bytes[], - unsigned int meta_row_height[], - unsigned int meta_row_height_chroma[], SOCParametersList mmSOCParameters, - unsigned int WritebackChunkSize, double SOCCLK, double DCFClkDeepSleep, unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], - unsigned int LBBitPerPixel[], double SwathWidthY[], double SwathWidthC[], - double HRatio[], - double HRatioChroma[], - unsigned int VTaps[], - unsigned int VTapsChroma[], - double VRatio[], - double VRatioChroma[], - unsigned int HTotal[], - unsigned int VTotal[], - unsigned int VActive[], - double PixelClock[], - unsigned int BlendingAndTiming[], unsigned int DPPPerSurface[], double BytePerPixelDETY[], double BytePerPixelDETC[], double DSTXAfterScaler[], double DSTYAfterScaler[], - bool WritebackEnable[], - enum source_format_class WritebackPixelFormat[], - double WritebackDestinationWidth[], - double WritebackDestinationHeight[], - double WritebackSourceHeight[], bool UnboundedRequestEnabled, unsigned int CompressedBufferSizeInkByte, /* Output */ - Watermarks *Watermark, enum clock_change_support *DRAMClockChangeSupport, double MaxActiveDRAMClockChangeLatencySupported[], unsigned int SubViewportLinesNeededInMALL[], From 3b4e83a232244e2fe911bd39b322e0dc19b22434 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Aug 2022 13:34:06 -0700 Subject: [PATCH 58/88] drm/amd/display: Reduce number of arguments of dml32_CalculatePrefetchSchedule() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several of the arguments are identical between the two call sites and they can be accessed through the 'struct vba_vars_st' pointer. This reduces the total amount of stack space that dml32_ModeSupportAndSystemConfigurationFull() uses by 208 bytes with LLVM 16 (1936 -> 1728), helping clear up the following clang warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1721:6: error: stack frame size (2152) exceeds limit (2048) in 'dml32_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) ^ 1 error generated. Additionally, while modifying the arguments to dml32_CalculatePrefetchSchedule(), use 'v' consistently, instead of 'v' mixed with 'mode_lib->vba'. Link: https://github.com/ClangBuiltLinux/linux/issues/1681 Reported-by: "Sudip Mukherjee (Codethink)" Tested-by: Maíra Canal Reviewed-by: Rodrigo Siqueira Signed-off-by: Nathan Chancellor Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml/dcn32/display_mode_vba_32.c | 118 +++++++----------- .../dc/dml/dcn32/display_mode_vba_util_32.c | 75 +++++------ .../dc/dml/dcn32/display_mode_vba_util_32.h | 18 +-- 3 files changed, 78 insertions(+), 133 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 664e22e95935..ad100658132f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -755,30 +755,18 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; - v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, - &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k], - mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, - mode_lib->vba.DPPCLKDelaySCL, - mode_lib->vba.DPPCLKDelaySCLLBOnly, - mode_lib->vba.DPPCLKDelayCNVCCursor, - mode_lib->vba.DISPCLKDelaySubtotal, - (unsigned int) (v->SwathWidthY[k] / mode_lib->vba.HRatio[k]), - mode_lib->vba.OutputFormat[k], - mode_lib->vba.MaxInterDCNTileRepeaters, + v->ErrorResult[k] = dml32_CalculatePrefetchSchedule( + v, + k, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, + v->DSCDelay[k], + (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), dml_min(v->VStartupLines, v->MaxVStartupLines[k]), v->MaxVStartupLines[k], - mode_lib->vba.GPUVMMaxPageTableLevels, - mode_lib->vba.GPUVMEnable, - mode_lib->vba.HostVMEnable, - mode_lib->vba.HostVMMaxNonCachedPageTableLevels, - mode_lib->vba.HostVMMinPageSize, - mode_lib->vba.DynamicMetadataEnable[k], - mode_lib->vba.DynamicMetadataVMEnabled, - mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], - mode_lib->vba.DynamicMetadataTransmittedBytes[k], v->UrgentLatency, v->UrgentExtraLatency, - mode_lib->vba.TCalc, + v->TCalc, v->PDEAndMetaPTEBytesFrame[k], v->MetaRowByte[k], v->PixelPTEBytesPerRow[k], @@ -792,8 +780,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->MaxNumSwathC[k], v->swath_width_luma_ub[k], v->swath_width_chroma_ub[k], - mode_lib->vba.SwathHeightY[k], - mode_lib->vba.SwathHeightC[k], + v->SwathHeightY[k], + v->SwathHeightC[k], TWait, /* Output */ &v->DSTXAfterScaler[k], @@ -3230,63 +3218,47 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NoTimeForPrefetch[i][j][k] = dml32_CalculatePrefetchSchedule( + v, + k, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, - mode_lib->vba.DSCDelayPerState[i][k], - mode_lib->vba.DPPCLKDelaySubtotal + - mode_lib->vba.DPPCLKDelayCNVCFormater, - mode_lib->vba.DPPCLKDelaySCL, - mode_lib->vba.DPPCLKDelaySCLLBOnly, - mode_lib->vba.DPPCLKDelayCNVCCursor, - mode_lib->vba.DISPCLKDelaySubtotal, - mode_lib->vba.SwathWidthYThisState[k] / - mode_lib->vba.HRatio[k], - mode_lib->vba.OutputFormat[k], - mode_lib->vba.MaxInterDCNTileRepeaters, - dml_min(mode_lib->vba.MaxVStartup, - mode_lib->vba.MaximumVStartup[i][j][k]), - mode_lib->vba.MaximumVStartup[i][j][k], - mode_lib->vba.GPUVMMaxPageTableLevels, - mode_lib->vba.GPUVMEnable, mode_lib->vba.HostVMEnable, - mode_lib->vba.HostVMMaxNonCachedPageTableLevels, - mode_lib->vba.HostVMMinPageSize, - mode_lib->vba.DynamicMetadataEnable[k], - mode_lib->vba.DynamicMetadataVMEnabled, - mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], - mode_lib->vba.DynamicMetadataTransmittedBytes[k], - mode_lib->vba.UrgLatency[i], - mode_lib->vba.ExtraLatency, - mode_lib->vba.TimeCalc, - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k], - mode_lib->vba.MetaRowBytes[i][j][k], - mode_lib->vba.DPTEBytesPerRow[i][j][k], - mode_lib->vba.PrefetchLinesY[i][j][k], - mode_lib->vba.SwathWidthYThisState[k], - mode_lib->vba.PrefillY[k], - mode_lib->vba.MaxNumSwY[k], - mode_lib->vba.PrefetchLinesC[i][j][k], - mode_lib->vba.SwathWidthCThisState[k], - mode_lib->vba.PrefillC[k], - mode_lib->vba.MaxNumSwC[k], - mode_lib->vba.swath_width_luma_ub_this_state[k], - mode_lib->vba.swath_width_chroma_ub_this_state[k], - mode_lib->vba.SwathHeightYThisState[k], - mode_lib->vba.SwathHeightCThisState[k], mode_lib->vba.TWait, + v->DSCDelayPerState[i][k], + v->SwathWidthYThisState[k] / v->HRatio[k], + dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), + v->MaximumVStartup[i][j][k], + v->UrgLatency[i], + v->ExtraLatency, + v->TimeCalc, + v->PDEAndMetaPTEBytesPerFrame[i][j][k], + v->MetaRowBytes[i][j][k], + v->DPTEBytesPerRow[i][j][k], + v->PrefetchLinesY[i][j][k], + v->SwathWidthYThisState[k], + v->PrefillY[k], + v->MaxNumSwY[k], + v->PrefetchLinesC[i][j][k], + v->SwathWidthCThisState[k], + v->PrefillC[k], + v->MaxNumSwC[k], + v->swath_width_luma_ub_this_state[k], + v->swath_width_chroma_ub_this_state[k], + v->SwathHeightYThisState[k], + v->SwathHeightCThisState[k], v->TWait, /* Output */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler[k], - &mode_lib->vba.LineTimesForPrefetch[k], - &mode_lib->vba.PrefetchBW[k], - &mode_lib->vba.LinesForMetaPTE[k], - &mode_lib->vba.LinesForMetaAndDPTERow[k], - &mode_lib->vba.VRatioPreY[i][j][k], - &mode_lib->vba.VRatioPreC[i][j][k], - &mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0][k], - &mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0][k], - &mode_lib->vba.NoTimeForDynamicMetadata[i][j][k], - &mode_lib->vba.Tno_bw[k], - &mode_lib->vba.prefetch_vmrow_bw[k], + &v->LineTimesForPrefetch[k], + &v->PrefetchBW[k], + &v->LinesForMetaPTE[k], + &v->LinesForMetaAndDPTERow[k], + &v->VRatioPreY[i][j][k], + &v->VRatioPreC[i][j][k], + &v->RequiredPrefetchPixelDataBWLuma[0][0][k], + &v->RequiredPrefetchPixelDataBWChroma[0][0][k], + &v->NoTimeForDynamicMetadata[i][j][k], + &v->Tno_bw[k], + &v->prefetch_vmrow_bw[k], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // double *Tdmdl_vm &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // double *Tdmdl &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[2], // double *TSetup diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index a34f6d52c372..5b5b94f1024d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -3363,28 +3363,14 @@ double dml32_CalculateExtraLatency( } // CalculateExtraLatency bool dml32_CalculatePrefetchSchedule( + struct vba_vars_st *v, + unsigned int k, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, - double DPPCLKDelaySubtotalPlusCNVCFormater, - double DPPCLKDelaySCL, - double DPPCLKDelaySCLLBOnly, - double DPPCLKDelayCNVCCursor, - double DISPCLKDelaySubtotal, unsigned int DPP_RECOUT_WIDTH, - enum output_format_class OutputFormat, - unsigned int MaxInterDCNTileRepeaters, unsigned int VStartup, unsigned int MaxVStartup, - unsigned int GPUVMPageTableLevels, - bool GPUVMEnable, - bool HostVMEnable, - unsigned int HostVMMaxNonCachedPageTableLevels, - double HostVMMinPageSize, - bool DynamicMetadataEnable, - bool DynamicMetadataVMEnabled, - int DynamicMetadataLinesBeforeActiveRequired, - unsigned int DynamicMetadataTransmittedBytes, double UrgentLatency, double UrgentExtraLatency, double TCalc, @@ -3425,6 +3411,7 @@ bool dml32_CalculatePrefetchSchedule( double *VUpdateWidthPix, double *VReadyOffsetPix) { + double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; bool MyError = false; unsigned int DPPCycles, DISPCLKCycles; double DSTTotalPixelsAfterScaler; @@ -3461,27 +3448,27 @@ bool dml32_CalculatePrefetchSchedule( double Tsw_est1 = 0; double Tsw_est3 = 0; - if (GPUVMEnable == true && HostVMEnable == true) - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + if (v->GPUVMEnable == true && v->HostVMEnable == true) + HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; else HostVMDynamicLevelsTrips = 0; #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); - dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels); + dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable); + dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels); dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable); - dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n", - __func__, HostVMEnable, HostVMInefficiencyFactor); + dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n", + __func__, v->HostVMEnable, HostVMInefficiencyFactor); #endif dml32_CalculateVUpdateAndDynamicMetadataParameters( - MaxInterDCNTileRepeaters, + v->MaxInterDCNTileRepeaters, myPipe->Dppclk, myPipe->Dispclk, myPipe->DCFClkDeepSleep, myPipe->PixelClock, myPipe->HTotal, myPipe->VBlank, - DynamicMetadataTransmittedBytes, - DynamicMetadataLinesBeforeActiveRequired, + v->DynamicMetadataTransmittedBytes[k], + v->DynamicMetadataLinesBeforeActiveRequired[k], myPipe->InterlaceEnable, myPipe->ProgressiveToInterlaceUnitInOPP, TSetup, @@ -3496,19 +3483,19 @@ bool dml32_CalculatePrefetchSchedule( LineTime = myPipe->HTotal / myPipe->PixelClock; trip_to_mem = UrgentLatency; - Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); + Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); - if (DynamicMetadataVMEnabled == true) + if (v->DynamicMetadataVMEnabled == true) *Tdmdl = TWait + Tvm_trips + trip_to_mem; else *Tdmdl = TWait + UrgentExtraLatency; #ifdef __DML_VBA_ALLOW_DELTA__ - if (DynamicMetadataEnable == false) + if (v->DynamicMetadataEnable[k] == false) *Tdmdl = 0.0; #endif - if (DynamicMetadataEnable == true) { + if (v->DynamicMetadataEnable[k] == true) { if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { *NotEnoughTimeForDynamicMetadata = true; #ifdef __DML_VBA_DEBUG__ @@ -3528,17 +3515,17 @@ bool dml32_CalculatePrefetchSchedule( *NotEnoughTimeForDynamicMetadata = false; } - *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && - GPUVMEnable == true ? TWait + Tvm_trips : 0); + *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && + v->GPUVMEnable == true ? TWait + Tvm_trips : 0); if (myPipe->ScalerEnabled) - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; else - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; - DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; + DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; - DISPCLKCycles = DISPCLKDelaySubtotal; + DISPCLKCycles = v->DISPCLKDelaySubtotal; if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) return true; @@ -3564,7 +3551,7 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); #endif - if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) + if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) *DSTYAfterScaler = 1; else *DSTYAfterScaler = 0; @@ -3581,13 +3568,13 @@ bool dml32_CalculatePrefetchSchedule( Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); - if (GPUVMEnable == true) { + if (v->GPUVMEnable == true) { Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; - if (GPUVMPageTableLevels >= 3) { + if (v->GPUVMMaxPageTableLevels >= 3) { *Tno_bw = UrgentExtraLatency + trip_to_mem * - (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); - } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) { + (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); + } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) { Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / 4.0 * LineTime; // VBA_ERROR *Tno_bw = UrgentExtraLatency; @@ -3622,7 +3609,7 @@ bool dml32_CalculatePrefetchSchedule( min_Lsw = dml_max(min_Lsw, 1.0); Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; - if (GPUVMEnable == true) { + if (v->GPUVMEnable == true) { Tvm_oto = dml_max3( Tvm_trips, *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, @@ -3630,7 +3617,7 @@ bool dml32_CalculatePrefetchSchedule( } else Tvm_oto = LineTime / 4.0; - if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { + if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { Tr0_oto = dml_max4( Tr0_trips, (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, @@ -3833,7 +3820,7 @@ bool dml32_CalculatePrefetchSchedule( #endif if (prefetch_bw_equ > 0) { - if (GPUVMEnable == true) { + if (v->GPUVMEnable == true) { Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); @@ -3841,7 +3828,7 @@ bool dml32_CalculatePrefetchSchedule( Tvm_equ = LineTime / 4; } - if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { + if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, (LineTime - Tvm_equ) / 2, LineTime / 4); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 7cdf0418830a..3dbc9cf46aad 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -714,28 +714,14 @@ double dml32_CalculateExtraLatency( unsigned int HostVMMaxNonCachedPageTableLevels); bool dml32_CalculatePrefetchSchedule( + struct vba_vars_st *v, + unsigned int k, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, - double DPPCLKDelaySubtotalPlusCNVCFormater, - double DPPCLKDelaySCL, - double DPPCLKDelaySCLLBOnly, - double DPPCLKDelayCNVCCursor, - double DISPCLKDelaySubtotal, unsigned int DPP_RECOUT_WIDTH, - enum output_format_class OutputFormat, - unsigned int MaxInterDCNTileRepeaters, unsigned int VStartup, unsigned int MaxVStartup, - unsigned int GPUVMPageTableLevels, - bool GPUVMEnable, - bool HostVMEnable, - unsigned int HostVMMaxNonCachedPageTableLevels, - double HostVMMinPageSize, - bool DynamicMetadataEnable, - bool DynamicMetadataVMEnabled, - int DynamicMetadataLinesBeforeActiveRequired, - unsigned int DynamicMetadataTransmittedBytes, double UrgentLatency, double UrgentExtraLatency, double TCalc, From ab2ac59c32dbec068954de30eda741d012be3c74 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Aug 2022 13:34:07 -0700 Subject: [PATCH 59/88] drm/amd/display: Reduce number of arguments of dml31's CalculateWatermarksAndDRAMSpeedChangeSupport() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most of the arguments are identical between the two call sites and they can be accessed through the 'struct vba_vars_st' pointer. This reduces the total amount of stack space that dml31_ModeSupportAndSystemConfigurationFull() uses by 240 bytes with LLVM 16 (2216 -> 1976), helping clear up the following clang warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:3908:6: error: stack frame size (2216) exceeds limit (2048) in 'dml31_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) ^ 1 error generated. Link: https://github.com/ClangBuiltLinux/linux/issues/1681 Reported-by: "Sudip Mukherjee (Codethink)" Tested-by: Maíra Canal Reviewed-by: Rodrigo Siqueira Signed-off-by: Nathan Chancellor Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml/dcn31/display_mode_vba_31.c | 248 ++++-------------- 1 file changed, 52 insertions(+), 196 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 8753f94bdd79..9af34cd3b125 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -311,64 +311,28 @@ static void CalculateVupdateAndDynamicMetadataParameters( static void CalculateWatermarksAndDRAMSpeedChangeSupport( struct display_mode_lib *mode_lib, unsigned int PrefetchMode, - unsigned int NumberOfActivePlanes, - unsigned int MaxLineBufferLines, - unsigned int LineBufferSize, - unsigned int WritebackInterfaceBufferSize, double DCFCLK, double ReturnBW, - bool SynchronizedVBlank, - unsigned int dpte_group_bytes[], - unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, - double WritebackLatency, - double WritebackChunkSize, double SOCCLK, - double DRAMClockChangeLatency, - double SRExitTime, - double SREnterPlusExitTime, - double SRExitZ8Time, - double SREnterPlusExitZ8Time, double DCFCLKDeepSleep, unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], - unsigned int LBBitPerPixel[], double SwathWidthY[], double SwathWidthC[], - double HRatio[], - double HRatioChroma[], - unsigned int vtaps[], - unsigned int VTAPsChroma[], - double VRatio[], - double VRatioChroma[], - unsigned int HTotal[], - double PixelClock[], - unsigned int BlendingAndTiming[], unsigned int DPPPerPlane[], double BytePerPixelDETY[], double BytePerPixelDETC[], - double DSTXAfterScaler[], - double DSTYAfterScaler[], - bool WritebackEnable[], - enum source_format_class WritebackPixelFormat[], - double WritebackDestinationWidth[], - double WritebackDestinationHeight[], - double WritebackSourceHeight[], bool UnboundedRequestEnabled, int unsigned CompressedBufferSizeInkByte, enum clock_change_support *DRAMClockChangeSupport, - double *UrgentWatermark, - double *WritebackUrgentWatermark, - double *DRAMClockChangeWatermark, - double *WritebackDRAMClockChangeWatermark, double *StutterExitWatermark, double *StutterEnterPlusExitWatermark, double *Z8StutterExitWatermark, - double *Z8StutterEnterPlusExitWatermark, - double *MinActiveDRAMClockChangeLatencySupported); + double *Z8StutterEnterPlusExitWatermark); static void CalculateDCFCLKDeepSleep( struct display_mode_lib *mode_lib, @@ -3017,64 +2981,28 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman CalculateWatermarksAndDRAMSpeedChangeSupport( mode_lib, PrefetchMode, - v->NumberOfActivePlanes, - v->MaxLineBufferLines, - v->LineBufferSize, - v->WritebackInterfaceBufferSize, v->DCFCLK, v->ReturnBW, - v->SynchronizedVBlank, - v->dpte_group_bytes, - v->MetaChunkSize, v->UrgentLatency, v->UrgentExtraLatency, - v->WritebackLatency, - v->WritebackChunkSize, v->SOCCLK, - v->DRAMClockChangeLatency, - v->SRExitTime, - v->SREnterPlusExitTime, - v->SRExitZ8Time, - v->SREnterPlusExitZ8Time, v->DCFCLKDeepSleep, v->DETBufferSizeY, v->DETBufferSizeC, v->SwathHeightY, v->SwathHeightC, - v->LBBitPerPixel, v->SwathWidthY, v->SwathWidthC, - v->HRatio, - v->HRatioChroma, - v->vtaps, - v->VTAPsChroma, - v->VRatio, - v->VRatioChroma, - v->HTotal, - v->PixelClock, - v->BlendingAndTiming, v->DPPPerPlane, v->BytePerPixelDETY, v->BytePerPixelDETC, - v->DSTXAfterScaler, - v->DSTYAfterScaler, - v->WritebackEnable, - v->WritebackPixelFormat, - v->WritebackDestinationWidth, - v->WritebackDestinationHeight, - v->WritebackSourceHeight, v->UnboundedRequestEnabled, v->CompressedBufferSizeInkByte, &DRAMClockChangeSupport, - &v->UrgentWatermark, - &v->WritebackUrgentWatermark, - &v->DRAMClockChangeWatermark, - &v->WritebackDRAMClockChangeWatermark, &v->StutterExitWatermark, &v->StutterEnterPlusExitWatermark, &v->Z8StutterExitWatermark, - &v->Z8StutterEnterPlusExitWatermark, - &v->MinActiveDRAMClockChangeLatencySupported); + &v->Z8StutterEnterPlusExitWatermark); for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->WritebackEnable[k] == true) { @@ -5384,64 +5312,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l CalculateWatermarksAndDRAMSpeedChangeSupport( mode_lib, v->PrefetchModePerState[i][j], - v->NumberOfActivePlanes, - v->MaxLineBufferLines, - v->LineBufferSize, - v->WritebackInterfaceBufferSize, v->DCFCLKState[i][j], v->ReturnBWPerState[i][j], - v->SynchronizedVBlank, - v->dpte_group_bytes, - v->MetaChunkSize, v->UrgLatency[i], v->ExtraLatency, - v->WritebackLatency, - v->WritebackChunkSize, v->SOCCLKPerState[i], - v->DRAMClockChangeLatency, - v->SRExitTime, - v->SREnterPlusExitTime, - v->SRExitZ8Time, - v->SREnterPlusExitZ8Time, v->ProjectedDCFCLKDeepSleep[i][j], v->DETBufferSizeYThisState, v->DETBufferSizeCThisState, v->SwathHeightYThisState, v->SwathHeightCThisState, - v->LBBitPerPixel, v->SwathWidthYThisState, v->SwathWidthCThisState, - v->HRatio, - v->HRatioChroma, - v->vtaps, - v->VTAPsChroma, - v->VRatio, - v->VRatioChroma, - v->HTotal, - v->PixelClock, - v->BlendingAndTiming, v->NoOfDPPThisState, v->BytePerPixelInDETY, v->BytePerPixelInDETC, - v->DSTXAfterScaler, - v->DSTYAfterScaler, - v->WritebackEnable, - v->WritebackPixelFormat, - v->WritebackDestinationWidth, - v->WritebackDestinationHeight, - v->WritebackSourceHeight, UnboundedRequestEnabledThisState, CompressedBufferSizeInkByteThisState, &v->DRAMClockChangeSupport[i][j], - &v->UrgentWatermark, - &v->WritebackUrgentWatermark, - &v->DRAMClockChangeWatermark, - &v->WritebackDRAMClockChangeWatermark, &dummy, &dummy, &dummy, - &dummy, - &v->MinActiveDRAMClockChangeLatencySupported); + &dummy); } } @@ -5566,64 +5458,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l static void CalculateWatermarksAndDRAMSpeedChangeSupport( struct display_mode_lib *mode_lib, unsigned int PrefetchMode, - unsigned int NumberOfActivePlanes, - unsigned int MaxLineBufferLines, - unsigned int LineBufferSize, - unsigned int WritebackInterfaceBufferSize, double DCFCLK, double ReturnBW, - bool SynchronizedVBlank, - unsigned int dpte_group_bytes[], - unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, - double WritebackLatency, - double WritebackChunkSize, double SOCCLK, - double DRAMClockChangeLatency, - double SRExitTime, - double SREnterPlusExitTime, - double SRExitZ8Time, - double SREnterPlusExitZ8Time, double DCFCLKDeepSleep, unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], - unsigned int LBBitPerPixel[], double SwathWidthY[], double SwathWidthC[], - double HRatio[], - double HRatioChroma[], - unsigned int vtaps[], - unsigned int VTAPsChroma[], - double VRatio[], - double VRatioChroma[], - unsigned int HTotal[], - double PixelClock[], - unsigned int BlendingAndTiming[], unsigned int DPPPerPlane[], double BytePerPixelDETY[], double BytePerPixelDETC[], - double DSTXAfterScaler[], - double DSTYAfterScaler[], - bool WritebackEnable[], - enum source_format_class WritebackPixelFormat[], - double WritebackDestinationWidth[], - double WritebackDestinationHeight[], - double WritebackSourceHeight[], bool UnboundedRequestEnabled, int unsigned CompressedBufferSizeInkByte, enum clock_change_support *DRAMClockChangeSupport, - double *UrgentWatermark, - double *WritebackUrgentWatermark, - double *DRAMClockChangeWatermark, - double *WritebackDRAMClockChangeWatermark, double *StutterExitWatermark, double *StutterEnterPlusExitWatermark, double *Z8StutterExitWatermark, - double *Z8StutterEnterPlusExitWatermark, - double *MinActiveDRAMClockChangeLatencySupported) + double *Z8StutterEnterPlusExitWatermark) { struct vba_vars_st *v = &mode_lib->vba; double EffectiveLBLatencyHidingY; @@ -5643,103 +5499,103 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double TotalPixelBW = 0.0; int k, j; - *UrgentWatermark = UrgentLatency + ExtraLatency; + v->UrgentWatermark = UrgentLatency + ExtraLatency; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); - dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark); + dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark); #endif - *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark; + v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark; #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency); - dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark); + dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency); + dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark); #endif v->TotalActiveWriteback = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (WritebackEnable[k] == true) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->WritebackEnable[k] == true) { v->TotalActiveWriteback = v->TotalActiveWriteback + 1; } } if (v->TotalActiveWriteback <= 1) { - *WritebackUrgentWatermark = WritebackLatency; + v->WritebackUrgentWatermark = v->WritebackLatency; } else { - *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; } if (v->TotalActiveWriteback <= 1) { - *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency; + v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency; } else { - *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; } - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { TotalPixelBW = TotalPixelBW - + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) - / (HTotal[k] / PixelClock[k]); + + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) + / (v->HTotal[k] / v->PixelClock[k]); } - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { double EffectiveDETBufferSizeY = DETBufferSizeY[k]; v->LBLatencyHidingSourceLinesY = dml_min( - (double) MaxLineBufferLines, - dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1); + (double) v->MaxLineBufferLines, + dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); v->LBLatencyHidingSourceLinesC = dml_min( - (double) MaxLineBufferLines, - dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1); + (double) v->MaxLineBufferLines, + dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); - EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]); + EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); - EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); + EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); if (UnboundedRequestEnabled) { EffectiveDETBufferSizeY = EffectiveDETBufferSizeY - + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW; + + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; } LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); - FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; + FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; if (BytePerPixelDETC[k] > 0) { LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); - FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; + FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; } else { LinesInDETC = 0; FullDETBufferingTimeC = 999999; } ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY - - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark; + - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; - if (NumberOfActivePlanes > 1) { + if (v->NumberOfActivePlanes > 1) { ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; + - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; } if (BytePerPixelDETC[k] > 0) { ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark; + - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; - if (NumberOfActivePlanes > 1) { + if (v->NumberOfActivePlanes > 1) { ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; + - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; } v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); } else { v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; } - if (WritebackEnable[k] == true) { - WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 - / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); - if (WritebackPixelFormat[k] == dm_444_64) { + if (v->WritebackEnable[k] == true) { + WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 + / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); + if (v->WritebackPixelFormat[k] == dm_444_64) { WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; } WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; @@ -5749,14 +5605,14 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( v->MinActiveDRAMClockChangeMargin = 999999; PlaneWithMinActiveDRAMClockChangeMargin = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; - if (BlendingAndTiming[k] == k) { + if (v->BlendingAndTiming[k] == k) { PlaneWithMinActiveDRAMClockChangeMargin = k; } else { - for (j = 0; j < NumberOfActivePlanes; ++j) { - if (BlendingAndTiming[k] == j) { + for (j = 0; j < v->NumberOfActivePlanes; ++j) { + if (v->BlendingAndTiming[k] == j) { PlaneWithMinActiveDRAMClockChangeMargin = j; } } @@ -5764,11 +5620,11 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( } } - *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency; + v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ; SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; } @@ -5776,25 +5632,25 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( v->TotalNumberOfActiveOTG = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (BlendingAndTiming[k] == k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->BlendingAndTiming[k] == k) { v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; } } if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { *DRAMClockChangeSupport = dm_dram_clock_change_vactive; - } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 + } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { *DRAMClockChangeSupport = dm_dram_clock_change_vblank; } else { *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; } - *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; - *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); - *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; - *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; + *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; + *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); + *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; + *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); From 1dbec5b4b0ef319d6961d3ecb7384b4f9ef9d358 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Aug 2022 13:34:08 -0700 Subject: [PATCH 60/88] drm/amd/display: Reduce number of arguments of dml31's CalculateFlipSchedule() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most of the arguments are identical between the two call sites and they can be accessed through the 'struct vba_vars_st' pointer. This reduces the total amount of stack space that dml31_ModeSupportAndSystemConfigurationFull() uses by 112 bytes with LLVM 16 (1976 -> 1864), helping clear up the following clang warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:3908:6: error: stack frame size (2216) exceeds limit (2048) in 'dml31_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) ^ 1 error generated. Link: https://github.com/ClangBuiltLinux/linux/issues/1681 Reported-by: "Sudip Mukherjee (Codethink)" Tested-by: Maíra Canal Reviewed-by: Rodrigo Siqueira Signed-off-by: Nathan Chancellor Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml/dcn31/display_mode_vba_31.c | 172 +++++------------- 1 file changed, 47 insertions(+), 125 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 9af34cd3b125..4e3356d12147 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -251,33 +251,13 @@ static void CalculateRowBandwidth( static void CalculateFlipSchedule( struct display_mode_lib *mode_lib, + unsigned int k, double HostVMInefficiencyFactor, double UrgentExtraLatency, double UrgentLatency, - unsigned int GPUVMMaxPageTableLevels, - bool HostVMEnable, - unsigned int HostVMMaxNonCachedPageTableLevels, - bool GPUVMEnable, - double HostVMMinPageSize, double PDEAndMetaPTEBytesPerFrame, double MetaRowBytes, - double DPTEBytesPerRow, - double BandwidthAvailableForImmediateFlip, - unsigned int TotImmediateFlipBytes, - enum source_format_class SourcePixelFormat, - double LineTime, - double VRatio, - double VRatioChroma, - double Tno_bw, - bool DCCEnable, - unsigned int dpte_row_height, - unsigned int meta_row_height, - unsigned int dpte_row_height_chroma, - unsigned int meta_row_height_chroma, - double *DestinationLinesToRequestVMInImmediateFlip, - double *DestinationLinesToRequestRowInImmediateFlip, - double *final_flip_bw, - bool *ImmediateFlipSupportedForPipe); + double DPTEBytesPerRow); static double CalculateWriteBackDelay( enum source_format_class WritebackPixelFormat, double WritebackHRatio, @@ -2868,33 +2848,13 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman for (k = 0; k < v->NumberOfActivePlanes; ++k) { CalculateFlipSchedule( mode_lib, + k, HostVMInefficiencyFactor, v->UrgentExtraLatency, v->UrgentLatency, - v->GPUVMMaxPageTableLevels, - v->HostVMEnable, - v->HostVMMaxNonCachedPageTableLevels, - v->GPUVMEnable, - v->HostVMMinPageSize, v->PDEAndMetaPTEBytesFrame[k], v->MetaRowByte[k], - v->PixelPTEBytesPerRow[k], - v->BandwidthAvailableForImmediateFlip, - v->TotImmediateFlipBytes, - v->SourcePixelFormat[k], - v->HTotal[k] / v->PixelClock[k], - v->VRatio[k], - v->VRatioChroma[k], - v->Tno_bw[k], - v->DCCEnable[k], - v->dpte_row_height[k], - v->meta_row_height[k], - v->dpte_row_height_chroma[k], - v->meta_row_height_chroma[k], - &v->DestinationLinesToRequestVMInImmediateFlip[k], - &v->DestinationLinesToRequestRowInImmediateFlip[k], - &v->final_flip_bw[k], - &v->ImmediateFlipSupportedForPipe[k]); + v->PixelPTEBytesPerRow[k]); } v->total_dcn_read_bw_with_flip = 0.0; @@ -3526,61 +3486,43 @@ static void CalculateRowBandwidth( static void CalculateFlipSchedule( struct display_mode_lib *mode_lib, + unsigned int k, double HostVMInefficiencyFactor, double UrgentExtraLatency, double UrgentLatency, - unsigned int GPUVMMaxPageTableLevels, - bool HostVMEnable, - unsigned int HostVMMaxNonCachedPageTableLevels, - bool GPUVMEnable, - double HostVMMinPageSize, double PDEAndMetaPTEBytesPerFrame, double MetaRowBytes, - double DPTEBytesPerRow, - double BandwidthAvailableForImmediateFlip, - unsigned int TotImmediateFlipBytes, - enum source_format_class SourcePixelFormat, - double LineTime, - double VRatio, - double VRatioChroma, - double Tno_bw, - bool DCCEnable, - unsigned int dpte_row_height, - unsigned int meta_row_height, - unsigned int dpte_row_height_chroma, - unsigned int meta_row_height_chroma, - double *DestinationLinesToRequestVMInImmediateFlip, - double *DestinationLinesToRequestRowInImmediateFlip, - double *final_flip_bw, - bool *ImmediateFlipSupportedForPipe) + double DPTEBytesPerRow) { + struct vba_vars_st *v = &mode_lib->vba; double min_row_time = 0.0; unsigned int HostVMDynamicLevelsTrips; double TimeForFetchingMetaPTEImmediateFlip; double TimeForFetchingRowInVBlankImmediateFlip; double ImmediateFlipBW; + double LineTime = v->HTotal[k] / v->PixelClock[k]; - if (GPUVMEnable == true && HostVMEnable == true) { - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + if (v->GPUVMEnable == true && v->HostVMEnable == true) { + HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; } else { HostVMDynamicLevelsTrips = 0; } - if (GPUVMEnable == true || DCCEnable == true) { - ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; + if (v->GPUVMEnable == true || v->DCCEnable[k] == true) { + ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes; } - if (GPUVMEnable == true) { + if (v->GPUVMEnable == true) { TimeForFetchingMetaPTEImmediateFlip = dml_max3( - Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, - UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), + v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, + UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0); } else { TimeForFetchingMetaPTEImmediateFlip = 0; } - *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; - if ((GPUVMEnable == true || DCCEnable == true)) { + v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; + if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { TimeForFetchingRowInVBlankImmediateFlip = dml_max3( (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevelsTrips + 1), @@ -3589,54 +3531,54 @@ static void CalculateFlipSchedule( TimeForFetchingRowInVBlankImmediateFlip = 0; } - *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; + v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; - if (GPUVMEnable == true) { - *final_flip_bw = dml_max( - PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), - (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); - } else if ((GPUVMEnable == true || DCCEnable == true)) { - *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime); + if (v->GPUVMEnable == true) { + v->final_flip_bw[k] = dml_max( + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime), + (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime)); + } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { + v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime); } else { - *final_flip_bw = 0; + v->final_flip_bw[k] = 0; } - if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { - if (GPUVMEnable == true && DCCEnable != true) { - min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); - } else if (GPUVMEnable != true && DCCEnable == true) { - min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); + if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { + if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { + min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); + } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { + min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); } else { min_row_time = dml_min4( - dpte_row_height * LineTime / VRatio, - meta_row_height * LineTime / VRatio, - dpte_row_height_chroma * LineTime / VRatioChroma, - meta_row_height_chroma * LineTime / VRatioChroma); + v->dpte_row_height[k] * LineTime / v->VRatio[k], + v->meta_row_height[k] * LineTime / v->VRatio[k], + v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k], + v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); } } else { - if (GPUVMEnable == true && DCCEnable != true) { - min_row_time = dpte_row_height * LineTime / VRatio; - } else if (GPUVMEnable != true && DCCEnable == true) { - min_row_time = meta_row_height * LineTime / VRatio; + if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { + min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k]; + } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { + min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k]; } else { - min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); + min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]); } } - if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 + if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { - *ImmediateFlipSupportedForPipe = false; + v->ImmediateFlipSupportedForPipe[k] = false; } else { - *ImmediateFlipSupportedForPipe = true; + v->ImmediateFlipSupportedForPipe[k] = true; } #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip); - dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip); + dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]); + dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]); dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); - dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); + dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]); #endif } @@ -5228,33 +5170,13 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (k = 0; k < v->NumberOfActivePlanes; k++) { CalculateFlipSchedule( mode_lib, + k, HostVMInefficiencyFactor, v->ExtraLatency, v->UrgLatency[i], - v->GPUVMMaxPageTableLevels, - v->HostVMEnable, - v->HostVMMaxNonCachedPageTableLevels, - v->GPUVMEnable, - v->HostVMMinPageSize, v->PDEAndMetaPTEBytesPerFrame[i][j][k], v->MetaRowBytes[i][j][k], - v->DPTEBytesPerRow[i][j][k], - v->BandwidthAvailableForImmediateFlip, - v->TotImmediateFlipBytes, - v->SourcePixelFormat[k], - v->HTotal[k] / v->PixelClock[k], - v->VRatio[k], - v->VRatioChroma[k], - v->Tno_bw[k], - v->DCCEnable[k], - v->dpte_row_height[k], - v->meta_row_height[k], - v->dpte_row_height_chroma[k], - v->meta_row_height_chroma[k], - &v->DestinationLinesToRequestVMInImmediateFlip[k], - &v->DestinationLinesToRequestRowInImmediateFlip[k], - &v->final_flip_bw[k], - &v->ImmediateFlipSupportedForPipe[k]); + v->DPTEBytesPerRow[i][j][k]); } v->total_dcn_read_bw_with_flip = 0.0; for (k = 0; k < v->NumberOfActivePlanes; k++) { From b0f4b23fc3dbd8c5398e9ea9cf1f16a00d9006a2 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Aug 2022 13:34:09 -0700 Subject: [PATCH 61/88] drm/amd/display: Mark dml30's UseMinimumDCFCLK() as noinline for stack usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function consumes a lot of stack space and it blows up the size of dml30_ModeSupportAndSystemConfigurationFull() with clang: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3542:6: error: stack frame size (2200) exceeds limit (2048) in 'dml30_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) ^ 1 error generated. Commit a0f7e7f759cf ("drm/amd/display: fix i386 frame size warning") aimed to address this for i386 but it did not help x86_64. To reduce the amount of stack space that dml30_ModeSupportAndSystemConfigurationFull() uses, mark UseMinimumDCFCLK() as noinline, using the _for_stack variant for documentation. While this will increase the total amount of stack usage between the two functions (1632 and 1304 bytes respectively), it will make sure both stay below the limit of 2048 bytes for these files. The aforementioned change does help reduce UseMinimumDCFCLK()'s stack usage so it should not be reverted in favor of this change. Link: https://github.com/ClangBuiltLinux/linux/issues/1681 Reported-by: "Sudip Mukherjee (Codethink)" Tested-by: Maíra Canal Reviewed-by: Rodrigo Siqueira Signed-off-by: Nathan Chancellor Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index c117a9724ae1..479e2c1a1301 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -6493,7 +6493,7 @@ static double CalculateUrgentLatency( return ret; } -static void UseMinimumDCFCLK( +static noinline_for_stack void UseMinimumDCFCLK( struct display_mode_lib *mode_lib, struct vba_vars_st *v, int MaxPrefetchMode, From 9b94c609cc1757d5cd4621dc15ba73faa492b99e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 28 Feb 2022 20:46:57 +0100 Subject: [PATCH 62/88] drm/amdgpu: remove SRIOV and MCBP dependencies from the CS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should not have any different CS constrains based on the execution environment. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b7bae833c804..6f80cf2ea9ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -814,7 +814,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { + if (fpriv->csa_va) { bo_va = fpriv->csa_va; BUG_ON(!bo_va); r = amdgpu_vm_bo_update(adev, bo_va, false); @@ -898,13 +898,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, continue; if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && - (amdgpu_mcbp || amdgpu_sriov_vf(adev))) { - if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { - if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) - ce_preempt++; - else - de_preempt++; - } + chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { + if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) + ce_preempt++; + else + de_preempt++; /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */ if (ce_preempt > 1 || de_preempt > 1) From 736ec9fadd7a1fde8480df7e5cfac465c07ff6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 1 Mar 2022 10:59:14 +0100 Subject: [PATCH 63/88] drm/amdgpu: move setting the job resources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move setting the job resources into amdgpu_job.c Signed-off-by: Christian König Reviewed-by: Andrey Grodzovsky Reviewed-by: Luben Tuikov Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 21 ++------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 17 +++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 2 ++ 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 6f80cf2ea9ae..f7bf61d96be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -495,9 +495,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry *e; struct list_head duplicates; - struct amdgpu_bo *gds; - struct amdgpu_bo *gws; - struct amdgpu_bo *oa; int r; INIT_LIST_HEAD(&p->validated); @@ -614,22 +611,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); - gds = p->bo_list->gds_obj; - gws = p->bo_list->gws_obj; - oa = p->bo_list->oa_obj; - - if (gds) { - p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; - p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; - } - if (gws) { - p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; - p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; - } - if (oa) { - p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; - p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; - } + amdgpu_job_set_resources(p->job, p->bo_list->gds_obj, + p->bo_list->gws_obj, p->bo_list->oa_obj); if (!r && p->uf_entry.tv.bo) { struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 1062b7ed74ec..877b3c22a8a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -132,6 +132,23 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, return r; } +void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, + struct amdgpu_bo *gws, struct amdgpu_bo *oa) +{ + if (gds) { + job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; + job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; + } + if (gws) { + job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; + job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; + } + if (oa) { + job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; + job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; + } +} + void amdgpu_job_free_resources(struct amdgpu_job *job) { struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index babc0af751c2..2a1961bf1194 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -76,6 +76,8 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_job **job, struct amdgpu_vm *vm); int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, enum amdgpu_ib_pool_type pool, struct amdgpu_job **job); +void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, + struct amdgpu_bo *gws, struct amdgpu_bo *oa); void amdgpu_job_free_resources(struct amdgpu_job *job); void amdgpu_job_free(struct amdgpu_job *job); int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, From dd80d9c8eecac8c516da5b240d01a35660ba6cb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 14 Jul 2022 10:23:38 +0200 Subject: [PATCH 64/88] drm/amdgpu: revert "partial revert "remove ctx->lock" v2" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 94f4c4965e5513ba624488f4b601d6b385635aec. We found that the bo_list is missing a protection for its list entries. Since that is fixed now this workaround can be removed again. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 21 ++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 - 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f7bf61d96be5..52ba6325944e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -128,8 +128,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs goto free_chunk; } - mutex_lock(&p->ctx->lock); - /* skip guilty context job */ if (atomic_read(&p->ctx->guilty) == 1) { ret = -ECANCELED; @@ -691,7 +689,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, dma_fence_put(parser->fence); if (parser->ctx) { - mutex_unlock(&parser->ctx->lock); amdgpu_ctx_put(parser->ctx); } if (parser->bo_list) @@ -1138,9 +1135,6 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, { int i, r; - /* TODO: Investigate why we still need the context lock */ - mutex_unlock(&p->ctx->lock); - for (i = 0; i < p->nchunks; ++i) { struct amdgpu_cs_chunk *chunk; @@ -1151,34 +1145,32 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: r = amdgpu_cs_process_fence_dep(p, chunk); if (r) - goto out; + return r; break; case AMDGPU_CHUNK_ID_SYNCOBJ_IN: r = amdgpu_cs_process_syncobj_in_dep(p, chunk); if (r) - goto out; + return r; break; case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: r = amdgpu_cs_process_syncobj_out_dep(p, chunk); if (r) - goto out; + return r; break; case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); if (r) - goto out; + return r; break; case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); if (r) - goto out; + return r; break; } } -out: - mutex_lock(&p->ctx->lock); - return r; + return 0; } static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) @@ -1340,7 +1332,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; r = amdgpu_cs_submit(&parser, cs); - out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 3ea48385fab3..f6d9d5da53cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -315,7 +315,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, kref_init(&ctx->refcount); ctx->mgr = mgr; spin_lock_init(&ctx->ring_lock); - mutex_init(&ctx->lock); ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); ctx->reset_counter_query = ctx->reset_counter; @@ -407,7 +406,6 @@ static void amdgpu_ctx_fini(struct kref *ref) drm_dev_exit(idx); } - mutex_destroy(&ctx->lock); kfree(ctx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index cc7c8afff414..0fa0e56daf67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -53,7 +53,6 @@ struct amdgpu_ctx { bool preamble_presented; int32_t init_priority; int32_t override_priority; - struct mutex lock; atomic_t guilty; unsigned long ras_counter_ce; unsigned long ras_counter_ue; From 4247084057cffcd8f9897110d8d827d550858dc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 5 Apr 2022 10:35:31 +0200 Subject: [PATCH 65/88] drm/amdgpu: use DMA_RESV_USAGE_BOOKKEEP v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use DMA_RESV_USAGE_BOOKKEEP for VM page table updates and KFD preemption fence. v2: actually update all usages for KFD Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 26 ++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 3 ++- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f4c49537d837..978d3970b5cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -298,7 +298,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, */ replacement = dma_fence_get_stub(); dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context, - replacement, DMA_RESV_USAGE_READ); + replacement, DMA_RESV_USAGE_BOOKKEEP); dma_fence_put(replacement); return 0; } @@ -1391,8 +1391,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; - amdgpu_bo_fence(vm->root.bo, - &vm->process_info->eviction_fence->base, true); + dma_resv_add_fence(vm->root.bo->tbo.base.resv, + &vm->process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); amdgpu_bo_unreserve(vm->root.bo); /* Update process info */ @@ -1989,9 +1990,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( } if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count) - amdgpu_bo_fence(bo, - &avm->process_info->eviction_fence->base, - true); + dma_resv_add_fence(bo->tbo.base.resv, + &avm->process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); ret = unreserve_bo_and_vms(&ctx, false, false); goto out; @@ -2760,15 +2761,18 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) if (mem->bo->tbo.pin_count) continue; - amdgpu_bo_fence(mem->bo, - &process_info->eviction_fence->base, true); + dma_resv_add_fence(mem->bo->tbo.base.resv, + &process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); } /* Attach eviction fence to PD / PT BOs */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { struct amdgpu_bo *bo = peer_vm->root.bo; - amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); + dma_resv_add_fence(bo->tbo.base.resv, + &process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); } validate_map_fail: @@ -2822,7 +2826,9 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; - amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); + dma_resv_add_fence(gws_bo->tbo.base.resv, + &process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); amdgpu_bo_unreserve(gws_bo); mutex_unlock(&(*mem)->process_info->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 1fd3cbca20a2..03ec099d64e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -112,7 +112,8 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, swap(p->vm->last_unlocked, tmp); dma_fence_put(tmp); } else { - amdgpu_bo_fence(p->vm->root.bo, f, true); + dma_resv_add_fence(p->vm->root.bo->tbo.base.resv, f, + DMA_RESV_USAGE_BOOKKEEP); } if (fence && !p->immediate) From 88c98d54b220dfec0b8e4a4400f6822aea5a383d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 7 Sep 2022 11:37:06 +0200 Subject: [PATCH 66/88] drm/amdgpu: cleanup CS init/fini and pass1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup the coding style and function names to represent the data they process. Only initialize and cleanup the CS structure in init/fini. Check the size of the IB chunk in pass1. v2: fix job initialisation order and use correct scheduler instance v3: try to move all functional changes into a separate patch. v4: move reordering and pass2 out of this patch as well Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 197 +++++++++++++------------ 1 file changed, 104 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 52ba6325944e..ee02fecc27b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -39,9 +39,42 @@ #include "amdgpu_gem.h" #include "amdgpu_ras.h" -static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, - struct drm_amdgpu_cs_chunk_fence *data, - uint32_t *offset) +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, + struct amdgpu_device *adev, + struct drm_file *filp, + union drm_amdgpu_cs *cs) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + + if (cs->in.num_chunks == 0) + return -EINVAL; + + memset(p, 0, sizeof(*p)); + p->adev = adev; + p->filp = filp; + + p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); + if (!p->ctx) + return -EINVAL; + + if (atomic_read(&p->ctx->guilty)) { + amdgpu_ctx_put(p->ctx); + return -ECANCELED; + } + return 0; +} + +static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, + struct drm_amdgpu_cs_chunk_ib *chunk_ib, + unsigned int *num_ibs) +{ + ++(*num_ibs); + return 0; +} + +static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p, + struct drm_amdgpu_cs_chunk_fence *data, + uint32_t *offset) { struct drm_gem_object *gobj; struct amdgpu_bo *bo; @@ -80,11 +113,11 @@ error_unref: return r; } -static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, - struct drm_amdgpu_bo_list_in *data) +static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p, + struct drm_amdgpu_bo_list_in *data) { + struct drm_amdgpu_bo_list_entry *info; int r; - struct drm_amdgpu_bo_list_entry *info = NULL; r = amdgpu_bo_create_list_entry_array(data, &info); if (r) @@ -104,7 +137,9 @@ error_free: return r; } -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) +/* Copy the data from userspace and go over it the first time */ +static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, + union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; @@ -112,28 +147,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs uint64_t *chunk_array; unsigned size, num_ibs = 0; uint32_t uf_offset = 0; - int i; int ret; + int i; - if (cs->in.num_chunks == 0) - return -EINVAL; - - chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); + chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), + GFP_KERNEL); if (!chunk_array) return -ENOMEM; - p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); - if (!p->ctx) { - ret = -EINVAL; - goto free_chunk; - } - - /* skip guilty context job */ - if (atomic_read(&p->ctx->guilty) == 1) { - ret = -ECANCELED; - goto free_chunk; - } - /* get chunks */ chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, @@ -168,7 +189,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs size = p->chunks[i].length_dw; cdata = u64_to_user_ptr(user_chunk.chunk_data); - p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); + p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), + GFP_KERNEL); if (p->chunks[i].kdata == NULL) { ret = -ENOMEM; i--; @@ -180,36 +202,35 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs goto free_partial_kdata; } + /* Assume the worst on the following checks */ + ret = -EINVAL; switch (p->chunks[i].chunk_id) { case AMDGPU_CHUNK_ID_IB: - ++num_ibs; + if (size < sizeof(struct drm_amdgpu_cs_chunk_ib)) + goto free_partial_kdata; + + ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, &num_ibs); + if (ret) + goto free_partial_kdata; break; case AMDGPU_CHUNK_ID_FENCE: - size = sizeof(struct drm_amdgpu_cs_chunk_fence); - if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { - ret = -EINVAL; + if (size < sizeof(struct drm_amdgpu_cs_chunk_fence)) goto free_partial_kdata; - } - ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata, - &uf_offset); + ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata, + &uf_offset); if (ret) goto free_partial_kdata; - break; case AMDGPU_CHUNK_ID_BO_HANDLES: - size = sizeof(struct drm_amdgpu_bo_list_in); - if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { - ret = -EINVAL; + if (size < sizeof(struct drm_amdgpu_bo_list_in)) goto free_partial_kdata; - } - ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); + ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); if (ret) goto free_partial_kdata; - break; case AMDGPU_CHUNK_ID_DEPENDENCIES: @@ -221,7 +242,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs break; default: - ret = -EINVAL; goto free_partial_kdata; } } @@ -660,52 +680,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return 0; } -/** - * amdgpu_cs_parser_fini() - clean parser states - * @parser: parser structure holding parsing context. - * @error: error number - * @backoff: indicator to backoff the reservation - * - * If error is set then unvalidate buffer, otherwise just free memory - * used by parsing context. - **/ -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, - bool backoff) -{ - unsigned i; - - if (error && backoff) { - ttm_eu_backoff_reservation(&parser->ticket, - &parser->validated); - mutex_unlock(&parser->bo_list->bo_list_mutex); - } - - for (i = 0; i < parser->num_post_deps; i++) { - drm_syncobj_put(parser->post_deps[i].syncobj); - kfree(parser->post_deps[i].chain); - } - kfree(parser->post_deps); - - dma_fence_put(parser->fence); - - if (parser->ctx) { - amdgpu_ctx_put(parser->ctx); - } - if (parser->bo_list) - amdgpu_bo_list_put(parser->bo_list); - - for (i = 0; i < parser->nchunks; i++) - kvfree(parser->chunks[i].kdata); - kvfree(parser->chunks); - if (parser->job) - amdgpu_job_free(parser->job); - if (parser->uf_entry.tv.bo) { - struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); - - amdgpu_bo_unref(&uf); - } -} - static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) { struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); @@ -1280,11 +1254,47 @@ static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser) trace_amdgpu_cs(parser, i); } +/* Cleanup the parser structure */ +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, + bool backoff) +{ + unsigned i; + + if (error && backoff) { + ttm_eu_backoff_reservation(&parser->ticket, + &parser->validated); + mutex_unlock(&parser->bo_list->bo_list_mutex); + } + + for (i = 0; i < parser->num_post_deps; i++) { + drm_syncobj_put(parser->post_deps[i].syncobj); + kfree(parser->post_deps[i].chain); + } + kfree(parser->post_deps); + + dma_fence_put(parser->fence); + + if (parser->ctx) + amdgpu_ctx_put(parser->ctx); + if (parser->bo_list) + amdgpu_bo_list_put(parser->bo_list); + + for (i = 0; i < parser->nchunks; i++) + kvfree(parser->chunks[i].kdata); + kvfree(parser->chunks); + if (parser->job) + amdgpu_job_free(parser->job); + if (parser->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); + + amdgpu_bo_unref(&uf); + } +} + int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = drm_to_adev(dev); - union drm_amdgpu_cs *cs = data; - struct amdgpu_cs_parser parser = {}; + struct amdgpu_cs_parser parser; bool reserved_buffers = false; int r; @@ -1294,16 +1304,17 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!adev->accel_working) return -EBUSY; - parser.adev = adev; - parser.filp = filp; - - r = amdgpu_cs_parser_init(&parser, data); + r = amdgpu_cs_parser_init(&parser, adev, filp, data); if (r) { if (printk_ratelimit()) DRM_ERROR("Failed to initialize parser %d!\n", r); goto out; } + r = amdgpu_cs_pass1(&parser, data); + if (r) + goto out; + r = amdgpu_cs_ib_fill(adev, &parser); if (r) goto out; @@ -1331,7 +1342,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) goto out; - r = amdgpu_cs_submit(&parser, cs); + r = amdgpu_cs_submit(&parser, data); out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); From d4e8ad908b2095388a300b19d76b78c170052a8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 8 Sep 2022 08:43:15 +0200 Subject: [PATCH 67/88] drm/amdgpu: reorder CS code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sort the functions in the order they are called Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 674 +++++++++++++------------ 1 file changed, 338 insertions(+), 336 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ee02fecc27b7..e104d7ef3c3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -278,6 +278,323 @@ free_chunk: return ret; } +static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, + struct amdgpu_cs_parser *parser) +{ + struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + int r, ce_preempt = 0, de_preempt = 0; + struct amdgpu_ring *ring; + int i, j; + + for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { + struct amdgpu_cs_chunk *chunk; + struct amdgpu_ib *ib; + struct drm_amdgpu_cs_chunk_ib *chunk_ib; + struct drm_sched_entity *entity; + + chunk = &parser->chunks[i]; + ib = &parser->job->ibs[j]; + chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + continue; + + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && + chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { + if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) + ce_preempt++; + else + de_preempt++; + + /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */ + if (ce_preempt > 1 || de_preempt > 1) + return -EINVAL; + } + + r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, + chunk_ib->ip_instance, chunk_ib->ring, + &entity); + if (r) + return r; + + if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) + parser->job->preamble_status |= + AMDGPU_PREAMBLE_IB_PRESENT; + + if (parser->entity && parser->entity != entity) + return -EINVAL; + + /* Return if there is no run queue associated with this entity. + * Possibly because of disabled HW IP*/ + if (entity->rq == NULL) + return -EINVAL; + + parser->entity = entity; + + ring = to_amdgpu_ring(entity->rq->sched); + r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? + chunk_ib->ib_bytes : 0, + AMDGPU_IB_POOL_DELAYED, ib); + if (r) { + DRM_ERROR("Failed to get ib !\n"); + return r; + } + + ib->gpu_addr = chunk_ib->va_start; + ib->length_dw = chunk_ib->ib_bytes / 4; + ib->flags = chunk_ib->flags; + + j++; + } + + /* MM engine doesn't support user fences */ + ring = to_amdgpu_ring(parser->entity->rq->sched); + if (parser->job->uf_addr && ring->funcs->no_user_fence) + return -EINVAL; + + return 0; +} + +static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + unsigned num_deps; + int i, r; + struct drm_amdgpu_cs_chunk_dep *deps; + + deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_dep); + + for (i = 0; i < num_deps; ++i) { + struct amdgpu_ctx *ctx; + struct drm_sched_entity *entity; + struct dma_fence *fence; + + ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); + if (ctx == NULL) + return -EINVAL; + + r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, + deps[i].ip_instance, + deps[i].ring, &entity); + if (r) { + amdgpu_ctx_put(ctx); + return r; + } + + fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); + amdgpu_ctx_put(ctx); + + if (IS_ERR(fence)) + return PTR_ERR(fence); + else if (!fence) + continue; + + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { + struct drm_sched_fence *s_fence; + struct dma_fence *old = fence; + + s_fence = to_drm_sched_fence(fence); + fence = dma_fence_get(&s_fence->scheduled); + dma_fence_put(old); + } + + r = amdgpu_sync_fence(&p->job->sync, fence); + dma_fence_put(fence); + if (r) + return r; + } + return 0; +} + +static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, + uint32_t handle, u64 point, + u64 flags) +{ + struct dma_fence *fence; + int r; + + r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); + if (r) { + DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", + handle, point, r); + return r; + } + + r = amdgpu_sync_fence(&p->job->sync, fence); + dma_fence_put(fence); + + return r; +} + +static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_sem *deps; + unsigned num_deps; + int i, r; + + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, + 0, 0); + if (r) + return r; + } + + return 0; +} + +static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i, r; + + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, + syncobj_deps[i].handle, + syncobj_deps[i].point, + syncobj_deps[i].flags); + if (r) + return r; + } + + return 0; +} + +static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_sem *deps; + unsigned num_deps; + int i; + + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + + if (p->post_deps) + return -EINVAL; + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; + + if (!p->post_deps) + return -ENOMEM; + + + for (i = 0; i < num_deps; ++i) { + p->post_deps[i].syncobj = + drm_syncobj_find(p->filp, deps[i].handle); + if (!p->post_deps[i].syncobj) + return -EINVAL; + p->post_deps[i].chain = NULL; + p->post_deps[i].point = 0; + p->num_post_deps++; + } + + return 0; +} + + +static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i; + + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); + + if (p->post_deps) + return -EINVAL; + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; + + if (!p->post_deps) + return -ENOMEM; + + for (i = 0; i < num_deps; ++i) { + struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; + + dep->chain = NULL; + if (syncobj_deps[i].point) { + dep->chain = dma_fence_chain_alloc(); + if (!dep->chain) + return -ENOMEM; + } + + dep->syncobj = drm_syncobj_find(p->filp, + syncobj_deps[i].handle); + if (!dep->syncobj) { + dma_fence_chain_free(dep->chain); + return -EINVAL; + } + dep->point = syncobj_deps[i].point; + p->num_post_deps++; + } + + return 0; +} + +static int amdgpu_cs_dependencies(struct amdgpu_device *adev, + struct amdgpu_cs_parser *p) +{ + int i, r; + + for (i = 0; i < p->nchunks; ++i) { + struct amdgpu_cs_chunk *chunk; + + chunk = &p->chunks[i]; + + switch (chunk->chunk_id) { + case AMDGPU_CHUNK_ID_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: + r = amdgpu_cs_process_fence_dep(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: + r = amdgpu_cs_process_syncobj_in_dep(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: + r = amdgpu_cs_process_syncobj_out_dep(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: + r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); + if (r) + return r; + break; + } + } + + return 0; +} + /* Convert microseconds to bytes. */ static u64 us_to_bytes(struct amdgpu_device *adev, s64 us) { @@ -659,25 +976,15 @@ out_free_user_pages: return r; } -static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) +static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser) { - struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_bo_list_entry *e; - int r; + int i; - list_for_each_entry(e, &p->validated, tv.head) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - struct dma_resv *resv = bo->tbo.base.resv; - enum amdgpu_sync_mode sync_mode; + if (!trace_amdgpu_cs_enabled()) + return; - sync_mode = amdgpu_bo_explicit_sync(bo) ? - AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; - r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, - &fpriv->vm); - if (r) - return r; - } - return 0; + for (i = 0; i < parser->job->num_ibs; i++) + trace_amdgpu_cs(parser, i); } static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) @@ -753,8 +1060,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) } if (!p->job->vm) - return amdgpu_cs_sync_rings(p); - + return 0; r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) @@ -826,324 +1132,27 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) } } - return amdgpu_cs_sync_rings(p); -} - -static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, - struct amdgpu_cs_parser *parser) -{ - struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; - struct amdgpu_vm *vm = &fpriv->vm; - int r, ce_preempt = 0, de_preempt = 0; - struct amdgpu_ring *ring; - int i, j; - - for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { - struct amdgpu_cs_chunk *chunk; - struct amdgpu_ib *ib; - struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct drm_sched_entity *entity; - - chunk = &parser->chunks[i]; - ib = &parser->job->ibs[j]; - chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; - - if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) - continue; - - if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && - chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { - if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) - ce_preempt++; - else - de_preempt++; - - /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */ - if (ce_preempt > 1 || de_preempt > 1) - return -EINVAL; - } - - r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, - chunk_ib->ip_instance, chunk_ib->ring, - &entity); - if (r) - return r; - - if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) - parser->job->preamble_status |= - AMDGPU_PREAMBLE_IB_PRESENT; - - if (parser->entity && parser->entity != entity) - return -EINVAL; - - /* Return if there is no run queue associated with this entity. - * Possibly because of disabled HW IP*/ - if (entity->rq == NULL) - return -EINVAL; - - parser->entity = entity; - - ring = to_amdgpu_ring(entity->rq->sched); - r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? - chunk_ib->ib_bytes : 0, - AMDGPU_IB_POOL_DELAYED, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - - ib->gpu_addr = chunk_ib->va_start; - ib->length_dw = chunk_ib->ib_bytes / 4; - ib->flags = chunk_ib->flags; - - j++; - } - - /* MM engine doesn't support user fences */ - ring = to_amdgpu_ring(parser->entity->rq->sched); - if (parser->job->uf_addr && ring->funcs->no_user_fence) - return -EINVAL; - return 0; } -static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) +static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - unsigned num_deps; - int i, r; - struct drm_amdgpu_cs_chunk_dep *deps; - - deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_dep); - - for (i = 0; i < num_deps; ++i) { - struct amdgpu_ctx *ctx; - struct drm_sched_entity *entity; - struct dma_fence *fence; - - ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); - if (ctx == NULL) - return -EINVAL; - - r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, - deps[i].ip_instance, - deps[i].ring, &entity); - if (r) { - amdgpu_ctx_put(ctx); - return r; - } - - fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); - amdgpu_ctx_put(ctx); - - if (IS_ERR(fence)) - return PTR_ERR(fence); - else if (!fence) - continue; - - if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { - struct drm_sched_fence *s_fence; - struct dma_fence *old = fence; - - s_fence = to_drm_sched_fence(fence); - fence = dma_fence_get(&s_fence->scheduled); - dma_fence_put(old); - } - - r = amdgpu_sync_fence(&p->job->sync, fence); - dma_fence_put(fence); - if (r) - return r; - } - return 0; -} - -static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, - uint32_t handle, u64 point, - u64 flags) -{ - struct dma_fence *fence; + struct amdgpu_bo_list_entry *e; int r; - r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); - if (r) { - DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", - handle, point, r); - return r; - } + list_for_each_entry(e, &p->validated, tv.head) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + struct dma_resv *resv = bo->tbo.base.resv; + enum amdgpu_sync_mode sync_mode; - r = amdgpu_sync_fence(&p->job->sync, fence); - dma_fence_put(fence); - - return r; -} - -static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_sem *deps; - unsigned num_deps; - int i, r; - - deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_sem); - for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, - 0, 0); + sync_mode = amdgpu_bo_explicit_sync(bo) ? + AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, + &fpriv->vm); if (r) return r; } - - return 0; -} - - -static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; - unsigned num_deps; - int i, r; - - syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_syncobj); - for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, - syncobj_deps[i].handle, - syncobj_deps[i].point, - syncobj_deps[i].flags); - if (r) - return r; - } - - return 0; -} - -static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_sem *deps; - unsigned num_deps; - int i; - - deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_sem); - - if (p->post_deps) - return -EINVAL; - - p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), - GFP_KERNEL); - p->num_post_deps = 0; - - if (!p->post_deps) - return -ENOMEM; - - - for (i = 0; i < num_deps; ++i) { - p->post_deps[i].syncobj = - drm_syncobj_find(p->filp, deps[i].handle); - if (!p->post_deps[i].syncobj) - return -EINVAL; - p->post_deps[i].chain = NULL; - p->post_deps[i].point = 0; - p->num_post_deps++; - } - - return 0; -} - - -static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; - unsigned num_deps; - int i; - - syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_syncobj); - - if (p->post_deps) - return -EINVAL; - - p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), - GFP_KERNEL); - p->num_post_deps = 0; - - if (!p->post_deps) - return -ENOMEM; - - for (i = 0; i < num_deps; ++i) { - struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; - - dep->chain = NULL; - if (syncobj_deps[i].point) { - dep->chain = dma_fence_chain_alloc(); - if (!dep->chain) - return -ENOMEM; - } - - dep->syncobj = drm_syncobj_find(p->filp, - syncobj_deps[i].handle); - if (!dep->syncobj) { - dma_fence_chain_free(dep->chain); - return -EINVAL; - } - dep->point = syncobj_deps[i].point; - p->num_post_deps++; - } - - return 0; -} - -static int amdgpu_cs_dependencies(struct amdgpu_device *adev, - struct amdgpu_cs_parser *p) -{ - int i, r; - - for (i = 0; i < p->nchunks; ++i) { - struct amdgpu_cs_chunk *chunk; - - chunk = &p->chunks[i]; - - switch (chunk->chunk_id) { - case AMDGPU_CHUNK_ID_DEPENDENCIES: - case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: - r = amdgpu_cs_process_fence_dep(p, chunk); - if (r) - return r; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_IN: - r = amdgpu_cs_process_syncobj_in_dep(p, chunk); - if (r) - return r; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: - r = amdgpu_cs_process_syncobj_out_dep(p, chunk); - if (r) - return r; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: - r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); - if (r) - return r; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: - r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); - if (r) - return r; - break; - } - } - return 0; } @@ -1243,17 +1252,6 @@ error_unlock: return r; } -static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser) -{ - int i; - - if (!trace_amdgpu_cs_enabled()) - return; - - for (i = 0; i < parser->job->num_ibs; i++) - trace_amdgpu_cs(parser, i); -} - /* Cleanup the parser structure */ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) @@ -1342,6 +1340,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) goto out; + r = amdgpu_cs_sync_rings(&parser); + if (r) + goto out; + r = amdgpu_cs_submit(&parser, data); out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); From 221bb3a9c34f8004c10ebcd0eda9d74354d519d1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 9 Sep 2022 17:36:26 -0400 Subject: [PATCH 68/88] drm/amdgpu: fix warning about missing imu prototype for imu_v11_0_3_program_rlc_ram(). Include imu_v11_0_3.h in imu_v11_0_3.c. Reviewed-by: Hawking Zhang Reported-by: kernel test robot Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c index 536dafb57ee0..fc69c1a29e23 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c @@ -22,6 +22,7 @@ */ #include "amdgpu.h" #include "amdgpu_imu.h" +#include "imu_v11_0_3.h" #include "gc/gc_11_0_3_offset.h" #include "gc/gc_11_0_3_sh_mask.h" From 7136f956c73c4ba50bfeb61653dfd6a9669ea915 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Mon, 12 Sep 2022 19:34:32 -0300 Subject: [PATCH 69/88] drm/amdgpu: Fix memory leak in hpd_rx_irq_create_workqueue() If construction of the array of work queues to handle hpd_rx_irq offload work fails, we need to unwind. Destroy all the created workqueues and the allocated memory for the hpd_rx_irq_offload_work_queue struct array. Fixes: 8e794421bc98 ("drm/amd/display: Fork thread to offload work of hpd_rx_irq") Signed-off-by: Rafael Mendonca Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 819d61f2307b..4d92e55b4f64 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1296,13 +1296,21 @@ static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct if (hpd_rx_offload_wq[i].wq == NULL) { DRM_ERROR("create amdgpu_dm_hpd_rx_offload_wq fail!"); - return NULL; + goto out_err; } spin_lock_init(&hpd_rx_offload_wq[i].offload_lock); } return hpd_rx_offload_wq; + +out_err: + for (i = 0; i < max_caps; i++) { + if (hpd_rx_offload_wq[i].wq) + destroy_workqueue(hpd_rx_offload_wq[i].wq); + } + kfree(hpd_rx_offload_wq); + return NULL; } struct amdgpu_stutter_quirk { From 1e0301e127cfc708c852278539e0bbd4c77ccb09 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 13 Sep 2022 16:38:01 +0800 Subject: [PATCH 70/88] drm/amd/display: make enc32_stream_encoder_dvi_set_stream_attribute static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This symbol is not used outside of dcn32_dio_stream_encoder.c, so marks it static. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_dio_stream_encoder.c:63:6: warning: no previous prototype for ‘enc32_stream_encoder_dvi_set_stream_attribute’. Link:https://bugzilla.openanolis.cn/show_bug.cgi?id=2119 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c index 38a48983f663..0e9dce414641 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c @@ -60,7 +60,7 @@ static void enc32_dp_set_odm_combine( } /* setup stream encoder in dvi mode */ -void enc32_stream_encoder_dvi_set_stream_attribute( +static void enc32_stream_encoder_dvi_set_stream_attribute( struct stream_encoder *enc, struct dc_crtc_timing *crtc_timing, bool is_dual_link) From 1a468dd88a13b39dace8b5eb87a274782a56cece Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 13 Sep 2022 16:38:02 +0800 Subject: [PATCH 71/88] drm/amd/display: make dscl32_calc_lb_num_partitions static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This symbol is not used outside of dcn32_dpp.c, so marks it static. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_dpp.c:34:6: warning: no previous prototype for ‘dscl32_calc_lb_num_partitions’. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2118 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dpp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dpp.c index f349cbe2a0f0..dcf12a0b031c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dpp.c @@ -31,7 +31,7 @@ #include "dcn30/dcn30_cm_common.h" /* Compute the maximum number of lines that we can fit in the line buffer */ -void dscl32_calc_lb_num_partitions( +static void dscl32_calc_lb_num_partitions( const struct scaler_data *scl_data, enum lb_memory_config lb_config, int *num_part_y, From 7f0eac6564dc0967fcd430e0b729068afb70d60e Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 13 Sep 2022 16:38:03 +0800 Subject: [PATCH 72/88] drm/amd/display: make dcn32_link_encoder_is_in_alt_mode and dcn32_link_encoder_get_max_link_cap static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These two functions are not used outside the function dcn32_dio_link_encoder.c, so the modification is defined as static. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_dio_link_encoder.c:121:6: warning: no previous prototype for ‘dcn32_link_encoder_is_in_alt_mode’. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_dio_link_encoder.c:136:6: warning: no previous prototype for ‘dcn32_link_encoder_get_max_link_cap’. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2117 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c index d6855d4f749b..fdae6aa89908 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c @@ -118,7 +118,7 @@ void dcn32_link_encoder_enable_dp_output( } } -bool dcn32_link_encoder_is_in_alt_mode(struct link_encoder *enc) +static bool dcn32_link_encoder_is_in_alt_mode(struct link_encoder *enc) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); uint32_t dp_alt_mode_disable = 0; @@ -133,7 +133,7 @@ bool dcn32_link_encoder_is_in_alt_mode(struct link_encoder *enc) return is_usb_c_alt_mode; } -void dcn32_link_encoder_get_max_link_cap(struct link_encoder *enc, +static void dcn32_link_encoder_get_max_link_cap(struct link_encoder *enc, struct dc_link_settings *link_settings) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); From 0cdbffd55f11ab39e5f8902860cc48ae967cd1b9 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 13 Sep 2022 16:38:04 +0800 Subject: [PATCH 73/88] drm/amd/display: make mpc32_program_shaper and mpc32_program_3dlut static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These two functions are not used outside the function dcn32_mpc.c, so the modification is defined as static. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_mpc.c:704:6: warning: no previous prototype for ‘mpc32_program_shaper’. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_mpc.c:900:6: warning: no previous prototype for ‘mpc32_program_3dlut’. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2115 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index 357bd2461bc9..f4bc77972c4e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -701,7 +701,7 @@ static void mpc32_power_on_shaper_3dlut( } -bool mpc32_program_shaper( +static bool mpc32_program_shaper( struct mpc *mpc, const struct pwl_params *params, uint32_t mpcc_id) @@ -897,7 +897,7 @@ static void mpc32_set_3dlut_mode( } -bool mpc32_program_3dlut( +static bool mpc32_program_3dlut( struct mpc *mpc, const struct tetrahedral_params *params, int mpcc_id) From bd7352332b60a5d2fff625332acfa2ad96e0ca1c Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 13 Sep 2022 16:38:05 +0800 Subject: [PATCH 74/88] drm/amd/display: Remove the unused function copy_stream_update_to_stream() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function copy_stream_update_to_stream() is defined in the notif.c file, but not called elsewhere, so delete this unused function. drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc.c:2852:6: warning: no previous prototype for ‘dc_reset_state’. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2113 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 253dc4e35ba4..bc7325831c91 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2852,16 +2852,6 @@ static void copy_stream_update_to_stream(struct dc *dc, } } -void dc_reset_state(struct dc *dc, struct dc_state *context) -{ - dc_resource_state_destruct(context); - - /* clear the structure, but don't reset the reference count */ - memset(context, 0, offsetof(struct dc_state, refcount)); - - init_state(dc, context); -} - static bool update_planes_and_stream_state(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_state *stream, From ee10818362d81ca1796cacdfb8a6c6f857cdb34a Mon Sep 17 00:00:00 2001 From: Xu Panda Date: Mon, 12 Sep 2022 03:22:42 +0000 Subject: [PATCH 75/88] drm/amd/display/amdgpu_dm: remove duplicate included header files soc15_common.h is included more than once. Reported-by: Zeal Robot Signed-off-by: Xu Panda Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4d92e55b4f64..5da2a1d97b81 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -98,8 +98,6 @@ #include "soc15_common.h" #include "vega10_ip_offset.h" -#include "soc15_common.h" - #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" From 0d6516efff2cf275591c57faadce249257d58980 Mon Sep 17 00:00:00 2001 From: Li Ma Date: Mon, 5 Sep 2022 17:20:58 +0800 Subject: [PATCH 76/88] drm/amd/pm:add new gpu_metrics_v2_3 to acquire average temperature info Add new gpu_metrics_v2_3 to acquire average temperature info from SMU metrics. To acquire average temp info from gpu_metrics interface, but gpu_metrics_v2_2 only has members to show current temp info. --- v1: Only add average_temperature_gfx in gpu_metrics_v2_3. v2: Add average temp members for soc, core and l3 in gpu_metrics_v2_3 and put these new members at the end of gpu_metrics_v2_3. Add operation to read average temp info from metrics table. v3: Merge v1 and v2 and rename the patch. v4: Merge v3. Add firmware version judgment in vangogh_common_get_gpu_metrics to maintain backward compatibility and rename the patch. "return ret" on error scenario in smu_cmn_get_smc_version. Signed-off-by: Li Ma Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- .../gpu/drm/amd/include/kgd_pp_interface.h | 58 +++++++ .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 160 ++++++++++++++++-- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 3 + 3 files changed, 209 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 7e3231c2191c..a40ead44778a 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -824,4 +824,62 @@ struct gpu_metrics_v2_2 { uint64_t indep_throttle_status; }; +struct gpu_metrics_v2_3 { + struct metrics_table_header common_header; + + /* Temperature */ + uint16_t temperature_gfx; // gfx temperature on APUs + uint16_t temperature_soc; // soc temperature on APUs + uint16_t temperature_core[8]; // CPU core temperature on APUs + uint16_t temperature_l3[2]; + + /* Utilization */ + uint16_t average_gfx_activity; + uint16_t average_mm_activity; // UVD or VCN + + /* Driver attached timestamp (in ns) */ + uint64_t system_clock_counter; + + /* Power/Energy */ + uint16_t average_socket_power; // dGPU + APU power on A + A platform + uint16_t average_cpu_power; + uint16_t average_soc_power; + uint16_t average_gfx_power; + uint16_t average_core_power[8]; // CPU core power on APUs + + /* Average clocks */ + uint16_t average_gfxclk_frequency; + uint16_t average_socclk_frequency; + uint16_t average_uclk_frequency; + uint16_t average_fclk_frequency; + uint16_t average_vclk_frequency; + uint16_t average_dclk_frequency; + + /* Current clocks */ + uint16_t current_gfxclk; + uint16_t current_socclk; + uint16_t current_uclk; + uint16_t current_fclk; + uint16_t current_vclk; + uint16_t current_dclk; + uint16_t current_coreclk[8]; // CPU core clocks + uint16_t current_l3clk[2]; + + /* Throttle status (ASIC dependent) */ + uint32_t throttle_status; + + /* Fans */ + uint16_t fan_pwm; + + uint16_t padding[3]; + + /* Throttle status (ASIC independent) */ + uint64_t indep_throttle_status; + + /* Average Temperature */ + uint16_t average_temperature_gfx; // average gfx temperature on APUs + uint16_t average_temperature_soc; // average soc temperature on APUs + uint16_t average_temperature_core[8]; // average CPU core temperature on APUs + uint16_t average_temperature_l3[2]; +}; #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 847990145dcd..cb10c7e31264 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -223,14 +223,13 @@ static int vangogh_tables_init(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *tables = smu_table->tables; - struct amdgpu_device *adev = smu->adev; uint32_t if_version; + uint32_t smu_version; uint32_t ret = 0; - ret = smu_cmn_get_smc_version(smu, &if_version, NULL); + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); if (ret) { - dev_err(adev->dev, "Failed to get smu if version!\n"); - goto err0_out; + return ret; } SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t), @@ -255,7 +254,10 @@ static int vangogh_tables_init(struct smu_context *smu) goto err0_out; smu_table->metrics_time = 0; - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2); + if (smu_version >= 0x043F3E00) + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_3); + else + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) goto err1_out; @@ -1648,6 +1650,63 @@ static int vangogh_set_watermarks_table(struct smu_context *smu, return 0; } +static ssize_t vangogh_get_legacy_gpu_metrics_v2_3(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v2_3 *gpu_metrics = + (struct gpu_metrics_v2_3 *)smu_table->gpu_metrics_table; + SmuMetrics_legacy_t metrics; + int ret = 0; + + ret = smu_cmn_get_metrics_table(smu, &metrics, true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 3); + + gpu_metrics->temperature_gfx = metrics.GfxTemperature; + gpu_metrics->temperature_soc = metrics.SocTemperature; + memcpy(&gpu_metrics->temperature_core[0], + &metrics.CoreTemperature[0], + sizeof(uint16_t) * 4); + gpu_metrics->temperature_l3[0] = metrics.L3Temperature[0]; + + gpu_metrics->average_gfx_activity = metrics.GfxActivity; + gpu_metrics->average_mm_activity = metrics.UvdActivity; + + gpu_metrics->average_socket_power = metrics.CurrentSocketPower; + gpu_metrics->average_cpu_power = metrics.Power[0]; + gpu_metrics->average_soc_power = metrics.Power[1]; + gpu_metrics->average_gfx_power = metrics.Power[2]; + memcpy(&gpu_metrics->average_core_power[0], + &metrics.CorePower[0], + sizeof(uint16_t) * 4); + + gpu_metrics->average_gfxclk_frequency = metrics.GfxclkFrequency; + gpu_metrics->average_socclk_frequency = metrics.SocclkFrequency; + gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency; + gpu_metrics->average_fclk_frequency = metrics.MemclkFrequency; + gpu_metrics->average_vclk_frequency = metrics.VclkFrequency; + gpu_metrics->average_dclk_frequency = metrics.DclkFrequency; + + memcpy(&gpu_metrics->current_coreclk[0], + &metrics.CoreFrequency[0], + sizeof(uint16_t) * 4); + gpu_metrics->current_l3clk[0] = metrics.L3Frequency[0]; + + gpu_metrics->throttle_status = metrics.ThrottlerStatus; + gpu_metrics->indep_throttle_status = + smu_cmn_get_indep_throttler_status(metrics.ThrottlerStatus, + vangogh_throttler_map); + + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v2_3); +} + static ssize_t vangogh_get_legacy_gpu_metrics(struct smu_context *smu, void **table) { @@ -1705,6 +1764,77 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v2_2); } +static ssize_t vangogh_get_gpu_metrics_v2_3(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v2_3 *gpu_metrics = + (struct gpu_metrics_v2_3 *)smu_table->gpu_metrics_table; + SmuMetrics_t metrics; + int ret = 0; + + ret = smu_cmn_get_metrics_table(smu, &metrics, true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 3); + + gpu_metrics->temperature_gfx = metrics.Current.GfxTemperature; + gpu_metrics->temperature_soc = metrics.Current.SocTemperature; + memcpy(&gpu_metrics->temperature_core[0], + &metrics.Current.CoreTemperature[0], + sizeof(uint16_t) * 4); + gpu_metrics->temperature_l3[0] = metrics.Current.L3Temperature[0]; + + gpu_metrics->average_temperature_gfx = metrics.Average.GfxTemperature; + gpu_metrics->average_temperature_soc = metrics.Average.SocTemperature; + memcpy(&gpu_metrics->average_temperature_core[0], + &metrics.Average.CoreTemperature[0], + sizeof(uint16_t) * 4); + gpu_metrics->average_temperature_l3[0] = metrics.Average.L3Temperature[0]; + + gpu_metrics->average_gfx_activity = metrics.Current.GfxActivity; + gpu_metrics->average_mm_activity = metrics.Current.UvdActivity; + + gpu_metrics->average_socket_power = metrics.Current.CurrentSocketPower; + gpu_metrics->average_cpu_power = metrics.Current.Power[0]; + gpu_metrics->average_soc_power = metrics.Current.Power[1]; + gpu_metrics->average_gfx_power = metrics.Current.Power[2]; + memcpy(&gpu_metrics->average_core_power[0], + &metrics.Average.CorePower[0], + sizeof(uint16_t) * 4); + + gpu_metrics->average_gfxclk_frequency = metrics.Average.GfxclkFrequency; + gpu_metrics->average_socclk_frequency = metrics.Average.SocclkFrequency; + gpu_metrics->average_uclk_frequency = metrics.Average.MemclkFrequency; + gpu_metrics->average_fclk_frequency = metrics.Average.MemclkFrequency; + gpu_metrics->average_vclk_frequency = metrics.Average.VclkFrequency; + gpu_metrics->average_dclk_frequency = metrics.Average.DclkFrequency; + + gpu_metrics->current_gfxclk = metrics.Current.GfxclkFrequency; + gpu_metrics->current_socclk = metrics.Current.SocclkFrequency; + gpu_metrics->current_uclk = metrics.Current.MemclkFrequency; + gpu_metrics->current_fclk = metrics.Current.MemclkFrequency; + gpu_metrics->current_vclk = metrics.Current.VclkFrequency; + gpu_metrics->current_dclk = metrics.Current.DclkFrequency; + + memcpy(&gpu_metrics->current_coreclk[0], + &metrics.Current.CoreFrequency[0], + sizeof(uint16_t) * 4); + gpu_metrics->current_l3clk[0] = metrics.Current.L3Frequency[0]; + + gpu_metrics->throttle_status = metrics.Current.ThrottlerStatus; + gpu_metrics->indep_throttle_status = + smu_cmn_get_indep_throttler_status(metrics.Current.ThrottlerStatus, + vangogh_throttler_map); + + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v2_3); +} + static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu, void **table) { @@ -1772,20 +1902,26 @@ static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu, static ssize_t vangogh_common_get_gpu_metrics(struct smu_context *smu, void **table) { - struct amdgpu_device *adev = smu->adev; uint32_t if_version; + uint32_t smu_version; int ret = 0; - ret = smu_cmn_get_smc_version(smu, &if_version, NULL); + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); if (ret) { - dev_err(adev->dev, "Failed to get smu if version!\n"); return ret; } - if (if_version < 0x3) - ret = vangogh_get_legacy_gpu_metrics(smu, table); - else - ret = vangogh_get_gpu_metrics(smu, table); + if (smu_version >= 0x043F3E00) { + if (if_version < 0x3) + ret = vangogh_get_legacy_gpu_metrics_v2_3(smu, table); + else + ret = vangogh_get_gpu_metrics_v2_3(smu, table); + } else { + if (if_version < 0x3) + ret = vangogh_get_legacy_gpu_metrics(smu, table); + else + ret = vangogh_get_gpu_metrics(smu, table); + } return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 15e4298c7cc8..e4f8f90ac5aa 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -969,6 +969,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev) case METRICS_VERSION(2, 2): structure_size = sizeof(struct gpu_metrics_v2_2); break; + case METRICS_VERSION(2, 3): + structure_size = sizeof(struct gpu_metrics_v2_3); + break; default: return; } From db10109793bbd20c47d9cd324b27e1466dc236fb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 9 Sep 2022 11:47:20 -0400 Subject: [PATCH 77/88] drm/amdgpu: move nbio ih_doorbell_range() into ih code for vega MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This mirrors what we do for other asics and this way we are sure the ih doorbell range is properly initialized. There is a comment about the way doorbells on gfx9 work that requires that they are initialized for other IPs before GFX is initialized. In this case IH is initialized before GFX, so there should be no issue. This is a prerequisite for fixing the Unsupported Request error reported through AER during driver load. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216373 The error was unnoticed before and got visible because of the commit referenced below. This doesn't fix anything in the commit below, rather fixes the issue in amdgpu exposed by the commit. The reference is only to associate this commit with below one so that both go together. Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()") Acked-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 --- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 4 ++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index fde6154f2009..7324e304288e 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1224,9 +1224,6 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev) ring->use_doorbell, ring->doorbell_index, adev->doorbell_index.sdma_doorbell_range); } - - adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, - adev->irq.ih.doorbell_index); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 03b7066471f9..1e83db0c5438 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -289,6 +289,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) } } + if (!amdgpu_sriov_vf(adev)) + adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->irq.ih.doorbell_index); + pci_set_master(adev->pdev); /* enable interrupts */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 2022ffbb8dba..59dfca093155 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -340,6 +340,10 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) } } + if (!amdgpu_sriov_vf(adev)) + adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->irq.ih.doorbell_index); + pci_set_master(adev->pdev); /* enable interrupts */ From 59c43748c7c82de9fb537fe790cbfc71734333ad Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 9 Sep 2022 11:53:27 -0400 Subject: [PATCH 78/88] drm/amdgpu: move nbio sdma_doorbell_range() into sdma code for vega MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This mirrors what we do for other asics and this way we are sure the sdma doorbell range is properly initialized. There is a comment about the way doorbells on gfx9 work that requires that they are initialized for other IPs before GFX is initialized. However, the statement says that it applies to multimedia as well, but the VCN code currently initializes doorbells after GFX and there are no known issues there. In my testing at least I don't see any problems on SDMA. This is a prerequisite for fixing the Unsupported Request error reported through AER during driver load. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216373 The error was unnoticed before and got visible because of the commit referenced below. This doesn't fix anything in the commit below, rather fixes the issue in amdgpu exposed by the commit. The reference is only to associate this commit with below one so that both go together. Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()") Acked-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/soc15.c | 22 ---------------------- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 0cf9d3b486b2..7fe8bf3417db 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1504,6 +1504,11 @@ static int sdma_v4_0_start(struct amdgpu_device *adev) WREG32_SDMA(i, mmSDMA0_CNTL, temp); if (!amdgpu_sriov_vf(adev)) { + ring = &adev->sdma.instance[i].ring; + adev->nbio.funcs->sdma_doorbell_range(adev, i, + ring->use_doorbell, ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); + /* unhalt engine */ temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL); temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 7324e304288e..183024d7c184 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1211,22 +1211,6 @@ static int soc15_common_sw_fini(void *handle) return 0; } -static void soc15_doorbell_range_init(struct amdgpu_device *adev) -{ - int i; - struct amdgpu_ring *ring; - - /* sdma/ih doorbell range are programed by hypervisor */ - if (!amdgpu_sriov_vf(adev)) { - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; - adev->nbio.funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); - } - } -} - static int soc15_common_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1246,12 +1230,6 @@ static int soc15_common_hw_init(void *handle) /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); - /* HW doorbell routing policy: doorbell writing not - * in SDMA/IH/MM/ACV range will be routed to CP. So - * we need to init SDMA/IH/MM/ACV doorbell range prior - * to CP ip block init and ring test. - */ - soc15_doorbell_range_init(adev); return 0; } From c1c39032a0748be08f6bc33a8dad70b83ef311a6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 30 Aug 2022 10:59:49 -0400 Subject: [PATCH 79/88] drm/amdgpu: make sure to init common IP before gmc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move common IP init before GMC init so that HDP gets remapped before GMC init which uses it. This fixes the Unsupported Request error reported through AER during driver load. The error happens as a write happens to the remap offset before real remapping is done. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216373 The error was unnoticed before and got visible because of the commit referenced below. This doesn't fix anything in the commit below, rather fixes the issue in amdgpu exposed by the commit. The reference is only to associate this commit with below one so that both go together. Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()") Acked-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 943c9e750575..47aac179951a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2365,8 +2365,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } adev->ip_blocks[i].status.sw = true; - /* need to do gmc hw init early so we can allocate gpu mem */ - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { + /* need to do common hw init early so everything is set up for gmc */ + r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); + if (r) { + DRM_ERROR("hw_init %d failed %d\n", i, r); + goto init_failed; + } + adev->ip_blocks[i].status.hw = true; + } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { + /* need to do gmc hw init early so we can allocate gpu mem */ /* Try to reserve bad pages early */ if (amdgpu_sriov_vf(adev)) amdgpu_virt_exchange_data(adev); @@ -3054,8 +3062,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) int i, r; static enum amd_ip_block_type ip_order[] = { - AMD_IP_BLOCK_TYPE_GMC, AMD_IP_BLOCK_TYPE_COMMON, + AMD_IP_BLOCK_TYPE_GMC, AMD_IP_BLOCK_TYPE_PSP, AMD_IP_BLOCK_TYPE_IH, }; From e0f1d483b85bd35ed10985e7d3e64386e4e14e50 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Sep 2022 13:27:35 +0800 Subject: [PATCH 80/88] drm/amd/display: make some functions static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions are not used outside the file dcn30_resource.c, so the modification is defined as static. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_resource.c:1949:6: warning: no previous prototype for ‘is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch’. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_resource.c:1919:5: warning: no previous prototype for ‘get_frame_rate_at_max_stretch_100hz’. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2143 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 64320e0ca446..f6f3878c99b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1916,7 +1916,7 @@ static int get_refresh_rate(struct dc_state *context) */ #define V_SCALE (10000 / MAX_STRETCHED_V_BLANK) -int get_frame_rate_at_max_stretch_100hz(struct dc_state *context) +static int get_frame_rate_at_max_stretch_100hz(struct dc_state *context) { struct dc_crtc_timing *timing = NULL; uint32_t sec_per_100_lines; @@ -1946,7 +1946,7 @@ int get_frame_rate_at_max_stretch_100hz(struct dc_state *context) return scaled_refresh_rate; } -bool is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(struct dc_state *context) +static bool is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(struct dc_state *context) { int refresh_rate_max_stretch_100hz; int min_refresh_100hz; From 668562f49dc67a16e3649432cef341d1649c1109 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Sep 2022 13:27:36 +0800 Subject: [PATCH 81/88] drm/amd/display: make enc314_stream_encoder_dvi_set_stream_attribute static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This symbol is not used outside of dcn314_dio_stream_encoder.c, so marks it static. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn314/dcn314_dio_stream_encoder.c:84:6: warning: no previous prototype for ‘enc314_stream_encoder_dvi_set_stream_attribute’. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2146 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c index 06d8638db696..914c5da737ed 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c @@ -80,7 +80,7 @@ static void enc314_dp_set_odm_combine( } /* setup stream encoder in dvi mode */ -void enc314_stream_encoder_dvi_set_stream_attribute( +static void enc314_stream_encoder_dvi_set_stream_attribute( struct stream_encoder *enc, struct dc_crtc_timing *crtc_timing, bool is_dual_link) From d67927e4483811a23bca1a5508de53d289df3209 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Sep 2022 13:27:37 +0800 Subject: [PATCH 82/88] drm/amd/display: make optc314_phantom_crtc_post_enable static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This symbol is not used outside of dcn314_optc.c, so marks it static. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn314/dcn314_optc.c:153:6: warning: no previous prototype for ‘optc314_phantom_crtc_post_enable’. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2147 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c index 38aa28ec6b13..47eb162f1a75 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c @@ -150,7 +150,7 @@ static bool optc314_disable_crtc(struct timing_generator *optc) return true; } -void optc314_phantom_crtc_post_enable(struct timing_generator *optc) +static void optc314_phantom_crtc_post_enable(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); From b70cfd91a980fb423299d1c48f4127d4b7fe79c4 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Sep 2022 13:27:38 +0800 Subject: [PATCH 83/88] drm/amd/display: make some functions static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions are not used outside the file dcn32_hubbub.c, so the modification is defined as static. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn314/dcn314_dccg.c:223:6: warning: no previous prototype for ‘dccg314_set_valid_pixel_rate’. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn314/dcn314_dccg.c:184:6: warning: no previous prototype for ‘dccg314_set_dpstreamclk’. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn314/dcn314_dccg.c:140:6: warning: no previous prototype for ‘dccg314_set_dtbclk_dto’. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2144 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index 232cc15979dd..36630d532c18 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -137,7 +137,7 @@ static void dccg314_set_dtbclk_p_src( } /* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */ -void dccg314_set_dtbclk_dto( +static void dccg314_set_dtbclk_dto( struct dccg *dccg, const struct dtbclk_dto_params *params) { @@ -181,7 +181,7 @@ void dccg314_set_dtbclk_dto( } } -void dccg314_set_dpstreamclk( +static void dccg314_set_dpstreamclk( struct dccg *dccg, enum streamclk_source src, int otg_inst, @@ -220,7 +220,7 @@ void dccg314_set_dpstreamclk( } } -void dccg314_set_valid_pixel_rate( +static void dccg314_set_valid_pixel_rate( struct dccg *dccg, int ref_dtbclk_khz, int otg_inst, From 9e33e951ecafea581a5d346c56cbdd00f58814ca Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Sep 2022 13:27:39 +0800 Subject: [PATCH 84/88] drm/amd/display: make mmhubbub32_config_mcif_buf static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This symbol is not used outside of dcn32_mmhubbub.c, so marks it static. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_mmhubbub.c:103:6: warning: no previous prototype for ‘mmhubbub32_config_mcif_buf’. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2145 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c index adf93cc8359c..41b0baf8e183 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c @@ -100,7 +100,7 @@ static void mmhubbub32_warmup_mcif(struct mcif_wb *mcif_wb, REG_UPDATE(MMHUBBUB_WARMUP_CONTROL_STATUS, MMHUBBUB_WARMUP_EN, false); } -void mmhubbub32_config_mcif_buf(struct mcif_wb *mcif_wb, +static void mmhubbub32_config_mcif_buf(struct mcif_wb *mcif_wb, struct mcif_buf_params *params, unsigned int dest_height) { From c1e48e36c120df7e3a3cb39d17d64c753485f225 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Sep 2022 13:27:40 +0800 Subject: [PATCH 85/88] drm/amd/display: make some functions static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions are not used outside the file dcn32_hubbub.c, so the modification is defined as static. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_hubbub.c:912:6: warning: no previous prototype for ‘hubbub32_force_wm_propagate_to_pipes’. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_hubbub.c:823:6: warning: no previous prototype for ‘hubbub32_wm_read_state’. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_hubbub.c:772:6: warning: no previous prototype for ‘hubbub32_init_watermarks’. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_hubbub.c:712:6: warning: no previous prototype for ‘hubbub32_force_usr_retraining_allow’. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2141 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c index 99eb239bbc7b..d4963959426c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c @@ -709,7 +709,7 @@ static bool hubbub32_program_usr_watermarks( return wm_pending; } -void hubbub32_force_usr_retraining_allow(struct hubbub *hubbub, bool allow) +static void hubbub32_force_usr_retraining_allow(struct hubbub *hubbub, bool allow) { struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); @@ -769,7 +769,7 @@ static bool hubbub32_program_watermarks( } /* Copy values from WM set A to all other sets */ -void hubbub32_init_watermarks(struct hubbub *hubbub) +static void hubbub32_init_watermarks(struct hubbub *hubbub) { struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); uint32_t reg; @@ -820,7 +820,7 @@ void hubbub32_init_watermarks(struct hubbub *hubbub) REG_WRITE(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D, reg); } -void hubbub32_wm_read_state(struct hubbub *hubbub, +static void hubbub32_wm_read_state(struct hubbub *hubbub, struct dcn_hubbub_wm *wm) { struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); @@ -909,7 +909,7 @@ void hubbub32_wm_read_state(struct hubbub *hubbub, DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D, &s->fclk_pstate_change); } -void hubbub32_force_wm_propagate_to_pipes(struct hubbub *hubbub) +static void hubbub32_force_wm_propagate_to_pipes(struct hubbub *hubbub) { struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); uint32_t refclk_mhz = hubbub->ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; From 73258e916a4e7e8adc9402da68ad9bc40dd38686 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Sep 2022 13:27:41 +0800 Subject: [PATCH 86/88] drm/amd/display: make some functions static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions are not used outside the file dcn32_dccg.c, so the modification is defined as static. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_dccg.c:257:6: warning: no previous prototype for ‘dccg32_otg_drop_pixel’. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_dccg.c:248:6: warning: no previous prototype for ‘dccg32_otg_add_pixel’. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_dccg.c:211:6: warning: no previous prototype for ‘dccg32_set_dpstreamclk’. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_dccg.c:136:6: warning: no previous prototype for ‘dccg32_set_dtbclk_dto’. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2142 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index 0d5e8a441512..26eb04ea472c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -133,7 +133,7 @@ static void dccg32_set_dtbclk_p_src( } /* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */ -void dccg32_set_dtbclk_dto( +static void dccg32_set_dtbclk_dto( struct dccg *dccg, const struct dtbclk_dto_params *params) { @@ -208,7 +208,7 @@ static void dccg32_get_dccg_ref_freq(struct dccg *dccg, return; } -void dccg32_set_dpstreamclk( +static void dccg32_set_dpstreamclk( struct dccg *dccg, enum streamclk_source src, int otg_inst, @@ -245,7 +245,7 @@ void dccg32_set_dpstreamclk( } } -void dccg32_otg_add_pixel(struct dccg *dccg, +static void dccg32_otg_add_pixel(struct dccg *dccg, uint32_t otg_inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -254,7 +254,7 @@ void dccg32_otg_add_pixel(struct dccg *dccg, OTG_ADD_PIXEL[otg_inst], 1); } -void dccg32_otg_drop_pixel(struct dccg *dccg, +static void dccg32_otg_drop_pixel(struct dccg *dccg, uint32_t otg_inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); From 04206ff06a849834e1feb21b51735fe673a3e484 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Sep 2022 13:27:42 +0800 Subject: [PATCH 87/88] drm/amd/display: make optc32_phantom_crtc_post_enable, optc32_setup_manual_trigger and optc32_set_drr static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These three functions are not used outside the function dcn32_optc.c, so the modification is defined as static. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_optc.c:159:6: warning: no previous prototype for function 'optc32_phantom_crtc_post_enable'. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_optc.c:218:6: warning: no previous prototype for ‘optc32_set_drr’. drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_optc.c:193:6: warning: no previous prototype for ‘optc32_setup_manual_trigger’. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2140 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c index 1fad7b48bd5b..ec3989d37086 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c @@ -156,7 +156,7 @@ static bool optc32_disable_crtc(struct timing_generator *optc) return true; } -void optc32_phantom_crtc_post_enable(struct timing_generator *optc) +static void optc32_phantom_crtc_post_enable(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -190,7 +190,7 @@ static void optc32_set_odm_bypass(struct timing_generator *optc, optc1->opp_count = 1; } -void optc32_setup_manual_trigger(struct timing_generator *optc) +static void optc32_setup_manual_trigger(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); struct dc *dc = optc->ctx->dc; @@ -215,7 +215,7 @@ void optc32_setup_manual_trigger(struct timing_generator *optc) } } -void optc32_set_drr( +static void optc32_set_drr( struct timing_generator *optc, const struct drr_params *params) { From dacd2d2d9d800b7ab2ee2734578112532cba8105 Mon Sep 17 00:00:00 2001 From: Yihao Han Date: Wed, 14 Sep 2022 00:47:49 -0700 Subject: [PATCH 88/88] drm/amd/display: fix boolconv.cocci warning ./drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c:729:63-68: WARNING: conversion to bool not needed here Generated by: scripts/coccinelle/misc/boolconv.cocci Signed-off-by: Yihao Han Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index f4bc77972c4e..4edd0655965b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -726,7 +726,7 @@ static bool mpc32_program_shaper( else next_mode = LUT_RAM_A; - mpc32_configure_shaper_lut(mpc, next_mode == LUT_RAM_A ? true:false, mpcc_id); + mpc32_configure_shaper_lut(mpc, next_mode == LUT_RAM_A, mpcc_id); if (next_mode == LUT_RAM_A) mpc32_program_shaper_luta_settings(mpc, params, mpcc_id);