From 7c6fb61a400bf3218c6504cb2d48858f98822c9d Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Wed, 7 Sep 2022 20:31:36 +0800 Subject: [PATCH 01/29] drm/amd/pm: disable BACO entry/exit completely on several sienna cichlid cards To avoid hardware intermittent failures. Signed-off-by: Guchun Chen Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 6db67f082d91..644ea150e075 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -368,6 +368,17 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu) smu_baco->platform_support = (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : false; + + /* + * Disable BACO entry/exit completely on below SKUs to + * avoid hardware intermittent failures. + */ + if (((adev->pdev->device == 0x73A1) && + (adev->pdev->revision == 0x00)) || + ((adev->pdev->device == 0x73BF) && + (adev->pdev->revision == 0xCF))) + smu_baco->platform_support = false; + } } From 6c20490663553cd7e07d8de8af482012329ab9d6 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 8 Sep 2022 08:28:57 +0530 Subject: [PATCH 02/29] drm/amdgpu: Don't enable LTR if not supported As per PCIE Base Spec r4.0 Section 6.18 'Software must not enable LTR in an Endpoint unless the Root Complex and all intermediate Switches indicate support for LTR.' This fixes the Unsupported Request error reported through AER during ASPM enablement. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216455 The error was unnoticed before and got visible because of the commit referenced below. This doesn't fix anything in the commit below, rather fixes the issue in amdgpu exposed by the commit. The reference is only to associate this commit with below one so that both go together. Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()") Reported-by: Gustaw Smolarczyk Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 9 ++++++++- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 9 ++++++++- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 9 ++++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index b465baa26762..aa761ff3a5fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -380,6 +380,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, WREG32_PCIE(smnPCIE_LC_CNTL, data); } +#ifdef CONFIG_PCIEASPM static void nbio_v2_3_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; @@ -401,9 +402,11 @@ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); } +#endif static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) { +#ifdef CONFIG_PCIEASPM uint32_t def, data; def = data = RREG32_PCIE(smnPCIE_LC_CNTL); @@ -459,7 +462,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL6, data); - nbio_v2_3_program_ltr(adev); + /* Don't bother about LTR if LTR is not enabled + * in the path */ + if (adev->pdev->ltr_path) + nbio_v2_3_program_ltr(adev); def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; @@ -483,6 +489,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL3, data); +#endif } static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index f7f6ddebd3e4..37615a77287b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -282,6 +282,7 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } +#ifdef CONFIG_PCIEASPM static void nbio_v6_1_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; @@ -303,9 +304,11 @@ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); } +#endif static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) { +#ifdef CONFIG_PCIEASPM uint32_t def, data; def = data = RREG32_PCIE(smnPCIE_LC_CNTL); @@ -361,7 +364,10 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL6, data); - nbio_v6_1_program_ltr(adev); + /* Don't bother about LTR if LTR is not enabled + * in the path */ + if (adev->pdev->ltr_path) + nbio_v6_1_program_ltr(adev); def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; @@ -385,6 +391,7 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL3, data); +#endif } const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 11848d1e238b..19455a725939 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -673,6 +673,7 @@ struct amdgpu_nbio_ras nbio_v7_4_ras = { }; +#ifdef CONFIG_PCIEASPM static void nbio_v7_4_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; @@ -694,9 +695,11 @@ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); } +#endif static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) { +#ifdef CONFIG_PCIEASPM uint32_t def, data; if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4)) @@ -755,7 +758,10 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL6, data); - nbio_v7_4_program_ltr(adev); + /* Don't bother about LTR if LTR is not enabled + * in the path */ + if (adev->pdev->ltr_path) + nbio_v7_4_program_ltr(adev); def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; @@ -779,6 +785,7 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL3, data); +#endif } const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { From 66f99628eb24409cb8feb5061f78283c8b65f820 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 6 Sep 2022 15:01:49 -0400 Subject: [PATCH 03/29] drm/amdgpu: use dirty framebuffer helper Currently, we aren't handling DRM_IOCTL_MODE_DIRTYFB. So, use drm_atomic_helper_dirtyfb() as the dirty callback in the amdgpu_fb_funcs struct. Signed-off-by: Hamza Mahfooz Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index c20922a5af9f..5b09c8f4fe95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -496,6 +497,7 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { .destroy = drm_gem_fb_destroy, .create_handle = drm_gem_fb_create_handle, + .dirty = drm_atomic_helper_dirtyfb, }; uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, From df2c6e0c95ca22db5d6bea7e8169841c95426f8d Mon Sep 17 00:00:00 2001 From: Candice Li Date: Wed, 7 Sep 2022 15:58:59 +0800 Subject: [PATCH 04/29] drm/amdgpu: Enable full reset when RAS is supported on gc v11_0_0 Enable full reset for RAS supported configuration on gc v11_0_0. v2: simplify the code. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 55284b24f113..2e50db3b761e 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -421,6 +421,7 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev) { switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): + return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC); case IP_VERSION(11, 0, 2): return false; default: From 36de13fdb04abef3ee03ade5129ab146de63983b Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 9 Sep 2022 11:06:50 +0800 Subject: [PATCH 05/29] drm/amdgpu: change the alignment size of TMR BO to 1M align TMR BO size TO tmr size is not necessary, modify the size to 1M to avoid re-create BO fail when serious VRAM fragmentation. v2: add new macro PSP_TMR_ALIGNMENT for TMR BO alignment size Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 35738db12cd1..c9dec2434f37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -756,7 +756,7 @@ static int psp_tmr_init(struct psp_context *psp) } pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE(psp->adev), + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, AMDGPU_GEM_DOMAIN_VRAM, &psp->tmr_bo, &psp->tmr_mc_addr, pptr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index c32b74bd970f..e593e8c2a54d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -36,6 +36,7 @@ #define PSP_CMD_BUFFER_SIZE 0x1000 #define PSP_1_MEG 0x100000 #define PSP_TMR_SIZE(adev) ((adev)->asic_type == CHIP_ALDEBARAN ? 0x800000 : 0x400000) +#define PSP_TMR_ALIGNMENT 0x100000 #define PSP_FW_NAME_LEN 0x24 enum psp_shared_mem_size { From 8c5708d3da37b8c7c3c22c7e945b9a76a7c9539b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 30 Aug 2022 11:08:09 -0400 Subject: [PATCH 06/29] drm/amdgpu: add HDP remap functionality to nbio 7.7 Was missing before and would have resulted in a write to a non-existant register. Normally APUs don't use HDP, but other asics could use this code and APUs do use the HDP when used in passthrough. Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index f30bc826a878..def89379b51a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -28,6 +28,14 @@ #include "nbio/nbio_7_7_0_sh_mask.h" #include +static void nbio_v7_7_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v7_7_get_rev_id(struct amdgpu_device *adev) { u32 tmp; @@ -336,4 +344,5 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .get_clockgating_state = nbio_v7_7_get_clockgating_state, .ih_control = nbio_v7_7_ih_control, .init_registers = nbio_v7_7_init_registers, + .remap_hdp_registers = nbio_v7_7_remap_hdp_registers, }; From 86875d558b91cb46f43be112799c06ecce60ec1e Mon Sep 17 00:00:00 2001 From: Candice Li Date: Wed, 7 Sep 2022 15:52:04 +0800 Subject: [PATCH 07/29] drm/amdgpu: Skip reset error status for psp v13_0_0 No need to reset error status since only umc ras supported on psp v13_0_0. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index ff5361f5c2d4..12c6f97945a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1811,7 +1811,8 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) amdgpu_ras_query_error_status(adev, &info); if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && - adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4) && + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 0)) { if (amdgpu_ras_reset_error_status(adev, info.head.block)) dev_warn(adev->dev, "Failed to reset error counter and error status"); } From 42ff33e63b83d0fd40985ccbb50ff54e320a3bd5 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 11 Aug 2022 16:42:12 -0400 Subject: [PATCH 08/29] drm/amd/display: Revert "Fallback to SW cursor if SubVP + cursor too big" This reverts commit a4f1b04216023ff0f4cd89328b59ee6890248130 since returning false in case of SubVP results in no cursor being visible on desktop as there is no sw cursor fallback path on all platforms. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Pavle Kotarac Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index f62d50901d92..6752ca44e6e0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -328,11 +328,6 @@ bool dc_stream_set_cursor_attributes( } dc = stream->ctx->dc; - - if (attributes->height * attributes->width * 4 > 16384) - if (stream->mall_stream_config.type == SUBVP_MAIN) - return false; - stream->cursor_attributes = *attributes; dc_z10_restore(dc); From 58d97c99c99f4559072a5410ec0135271e24e95d Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 23 Aug 2022 17:14:03 -0400 Subject: [PATCH 09/29] drm/amd/display: Update MBLK calculation for SubVP [Description] Update MBLK calculation according to hardware doc. For DCC case we were not allocation enough MALL due to an inaccurate MBLK calculation. Tested-by: Daniel Wheeler Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 3 + .../display/dc/dcn32/dcn32_resource_helpers.c | 59 ++++++++++++++++--- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 1e7e6201c880..cf15d0e5e9b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -30,6 +30,9 @@ #define DCN3_2_DET_SEG_SIZE 64 #define DCN3_2_MALL_MBLK_SIZE_BYTES 65536 // 64 * 1024 +#define DCN3_2_MBLK_WIDTH 128 +#define DCN3_2_MBLK_HEIGHT_4BPE 128 +#define DCN3_2_MBLK_HEIGHT_8BPE 64 #define TO_DCN32_RES_POOL(pool)\ container_of(pool, struct dcn32_resource_pool, base) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index ab918fe38f6a..1f195c5b3377 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -46,7 +46,6 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_state *context) { uint32_t num_ways = 0; - uint32_t mall_region_pixels = 0; uint32_t bytes_per_pixel = 0; uint32_t cache_lines_used = 0; uint32_t lines_per_way = 0; @@ -54,20 +53,64 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat uint32_t bytes_in_mall = 0; uint32_t num_mblks = 0; uint32_t cache_lines_per_plane = 0; - uint32_t i = 0; + uint32_t i = 0, j = 0; + uint32_t mblk_width = 0; + uint32_t mblk_height = 0; + uint32_t full_vp_width_blk_aligned = 0; + uint32_t full_vp_height_blk_aligned = 0; + uint32_t mall_alloc_width_blk_aligned = 0; + uint32_t mall_alloc_height_blk_aligned = 0; + uint32_t full_vp_height = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; // Find the phantom pipes - if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; - mall_region_pixels = pipe->plane_state->plane_size.surface_pitch * pipe->stream->timing.v_addressable; + struct pipe_ctx *main_pipe = NULL; - // For bytes required in MALL, calculate based on number of MBlks required - num_mblks = (mall_region_pixels * bytes_per_pixel + - DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES; + /* Get full viewport height from main pipe (required for MBLK calculation) */ + for (j = 0; j < dc->res_pool->pipe_count; j++) { + main_pipe = &context->res_ctx.pipe_ctx[j]; + if (main_pipe->stream == pipe->stream->mall_stream_config.paired_stream) { + full_vp_height = main_pipe->plane_res.scl_data.viewport.height; + break; + } + } + + bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; + mblk_width = DCN3_2_MBLK_WIDTH; + mblk_height = bytes_per_pixel == 4 ? DCN3_2_MBLK_HEIGHT_4BPE : DCN3_2_MBLK_HEIGHT_8BPE; + + /* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) - + * FLOOR(vp_x_start, blk_width) + */ + full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x + + pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) + + (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width); + + /* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) - + * FLOOR(vp_y_start, blk_height) + */ + full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y + + full_vp_height + mblk_height - 1) / mblk_height * mblk_height) + + (pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height); + + /* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */ + mall_alloc_width_blk_aligned = full_vp_width_blk_aligned; + + /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */ + mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) / + mblk_height * mblk_height + mblk_height; + + /* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c; + * full_mblk_height_ub_l/c = mall_alloc_height_blk_aligned_l/c; + * num_mblk_l/c = (full_mblk_width_ub_l/c / mblk_width_l/c) * (full_mblk_height_ub_l/c / mblk_height_l/c); + * (Should be divisible, but round up if not) + */ + num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) * + ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height); bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES; // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment // (MALL is 64-byte aligned) From 269aad0919c7aedffc18dcf46393a1bec457af0a Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 24 Aug 2022 15:34:24 -0400 Subject: [PATCH 10/29] drm/amd/display: Fixing DIG FIFO Error [Why & How] DIG_FIFO_READ_START_LEVEL should only be set to default value (7) by software. Removed all instances of resetting the register to 0 Tested-by: Daniel Wheeler Reviewed-by: Nicholas Kazlauskas Acked-by: Pavle Kotarac Signed-off-by: Leo Chen Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c index e3351ddc566c..06d8638db696 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c @@ -67,8 +67,7 @@ static void enc314_disable_fifo(struct stream_encoder *enc) { struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); - REG_UPDATE_2(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0, - DIG_FIFO_READ_START_LEVEL, 0); + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0); } static void enc314_dp_set_odm_combine( From f9c182056b8ff7402a46c39c34d5c91133fdf9a4 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Wed, 24 Aug 2022 16:34:53 -0400 Subject: [PATCH 11/29] drm/amd/display: Fix divide by zero in DML [why] Incorrectly using MicroTileWidth instead of MacroTileWidth for calculations. [how] Remove all unused references to MicroTile and change them to MacroTile. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher --- .../dc/dml/dcn32/display_mode_vba_32.c | 24 +++++++++---------- .../drm/amd/display/dc/dml/display_mode_vba.h | 10 ++++---- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index cb2025771646..db8a019ddfe0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1806,10 +1806,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.Read256BlockHeightC[k], &mode_lib->vba.Read256BlockWidthY[k], &mode_lib->vba.Read256BlockWidthC[k], - &mode_lib->vba.MicroTileHeightY[k], - &mode_lib->vba.MicroTileHeightC[k], - &mode_lib->vba.MicroTileWidthY[k], - &mode_lib->vba.MicroTileWidthC[k]); + &mode_lib->vba.MacroTileHeightY[k], + &mode_lib->vba.MacroTileHeightC[k], + &mode_lib->vba.MacroTileWidthY[k], + &mode_lib->vba.MacroTileWidthC[k]); } /*Bandwidth Support Check*/ @@ -2659,10 +2659,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.Read256BlockWidthC, mode_lib->vba.Read256BlockHeightY, mode_lib->vba.Read256BlockHeightC, - mode_lib->vba.MicroTileWidthY, - mode_lib->vba.MicroTileWidthC, - mode_lib->vba.MicroTileHeightY, - mode_lib->vba.MicroTileHeightC, + mode_lib->vba.MacroTileWidthY, + mode_lib->vba.MacroTileWidthC, + mode_lib->vba.MacroTileHeightY, + mode_lib->vba.MacroTileHeightC, /* Output */ mode_lib->vba.SurfaceSizeInMALL, @@ -2709,10 +2709,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; - v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthY = mode_lib->vba.MicroTileWidthY[k]; - v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightY = mode_lib->vba.MicroTileHeightY[k]; - v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthC = mode_lib->vba.MicroTileWidthC[k]; - v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightC = mode_lib->vba.MicroTileHeightC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthY = mode_lib->vba.MacroTileWidthY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightY = mode_lib->vba.MacroTileHeightY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthC = mode_lib->vba.MacroTileWidthC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightC = mode_lib->vba.MacroTileHeightC[k]; v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].InterlaceEnable = mode_lib->vba.Interlace[k]; v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].HTotal = mode_lib->vba.HTotal[k]; v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCEnable = mode_lib->vba.DCCEnable[k]; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 492aec634b68..2051ddaa641a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -651,10 +651,10 @@ struct vba_vars_st { unsigned int OutputTypeAndRatePerState[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX]; double RequiredDISPCLKPerSurface[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX]; - unsigned int MicroTileHeightY[DC__NUM_DPP__MAX]; - unsigned int MicroTileHeightC[DC__NUM_DPP__MAX]; - unsigned int MicroTileWidthY[DC__NUM_DPP__MAX]; - unsigned int MicroTileWidthC[DC__NUM_DPP__MAX]; + unsigned int MacroTileHeightY[DC__NUM_DPP__MAX]; + unsigned int MacroTileHeightC[DC__NUM_DPP__MAX]; + unsigned int MacroTileWidthY[DC__NUM_DPP__MAX]; + unsigned int MacroTileWidthC[DC__NUM_DPP__MAX]; bool ImmediateFlipRequiredFinal; bool DCCProgrammingAssumesScanDirectionUnknownFinal; bool EnoughWritebackUnits; @@ -800,8 +800,6 @@ struct vba_vars_st { double PSCL_FACTOR[DC__NUM_DPP__MAX]; double PSCL_FACTOR_CHROMA[DC__NUM_DPP__MAX]; double MaximumVStartup[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX]; - unsigned int MacroTileWidthY[DC__NUM_DPP__MAX]; - unsigned int MacroTileWidthC[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitch[DC__NUM_DPP__MAX]; double AlignedYPitch[DC__NUM_DPP__MAX]; double AlignedCPitch[DC__NUM_DPP__MAX]; From 18aefea7fc71759a2405bc65eae057ffda3c429c Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 1 Sep 2022 16:00:52 -0400 Subject: [PATCH 12/29] drm/amd/display: Fix compilation errors on DCN314 We have some compilation errors in some DML files from DCN314 that we never noticed because we were not compiling some of the DML files. This commit fixes those syntax errors before we enable the compilation. Cc: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index fc4d7474c111..a58723c3725b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -61,7 +61,7 @@ // fudge factor for min dcfclk calclation #define __DML_MIN_DCFCLK_FACTOR__ 1.15 -struct { +typedef struct { double DPPCLK; double DISPCLK; double PixelClock; @@ -1599,7 +1599,7 @@ static void CalculateDCCConfiguration( int segment_order_vert_contiguous_luma; int segment_order_vert_contiguous_chroma; - enum { + typedef enum { REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA } RequestType; RequestType RequestLuma; @@ -7157,12 +7157,13 @@ static double CalculateExtraLatencyBytes( HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); else HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); - else + } else { HostVMDynamicLevels = 0; + } ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; - if (GPUVMEnable == true) + if (GPUVMEnable == true) { for (k = 0; k < NumberOfActivePlanes; ++k) ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; } From af2f2a256e048f1b83605eaae49948e4a6811ac1 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 1 Sep 2022 16:07:19 -0400 Subject: [PATCH 13/29] drm/amd/display: Enable dlg and vba compilation for dcn314 We were not using the VBA and DLG files for DCN314, but the next sequence of changes for DCN314 will require those files. This commit adds the necessary files to the Makefile. Cc: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 86a3b5bfd699..cb81ed2fbd53 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -70,6 +70,8 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_ccflags) $(frame_warn_flag) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags) @@ -123,6 +125,7 @@ DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o +DML += dcn314/display_mode_vba_314.o dcn314/display_rq_dlg_calc_314.o DML += dcn32/display_mode_vba_32.o dcn32/display_rq_dlg_calc_32.o dcn32/display_mode_vba_util_32.o DML += dcn31/dcn31_fpu.o DML += dcn32/dcn32_fpu.o From ea45405d704e20826a899380c19ec163336f42ab Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 24 Aug 2022 09:24:18 -0400 Subject: [PATCH 14/29] drm/amd/display: Hook up DCN314 specific dml implementation [Why & How] Add support for the DML314 functions and hook up DCN314 to use them. This has some necessary additions for calculating Max VSTARTUP for future features, but there's also some changes that we have to make for pixel format/swizzle support. That will come in a following patch to make this transition easier to bisect. Tested-by: Daniel Wheeler Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c | 2 +- .../gpu/drm/amd/display/dc/dml/display_mode_lib.c | 12 ++++++++++++ .../gpu/drm/amd/display/dc/dml/display_mode_lib.h | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index 34a5d0f87b5f..fc5529fa51b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -262,7 +262,7 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p } if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31); + dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN314); else dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c index 5d27ff0ebb5f..f5400eda07a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c @@ -35,6 +35,8 @@ #include "dcn30/display_rq_dlg_calc_30.h" #include "dcn31/display_mode_vba_31.h" #include "dcn31/display_rq_dlg_calc_31.h" +#include "dcn314/display_mode_vba_314.h" +#include "dcn314/display_rq_dlg_calc_314.h" #include "dcn32/display_mode_vba_32.h" #include "dcn32/display_rq_dlg_calc_32.h" #include "dml_logger.h" @@ -74,6 +76,13 @@ const struct dml_funcs dml31_funcs = { .rq_dlg_get_rq_reg = dml31_rq_dlg_get_rq_reg }; +const struct dml_funcs dml314_funcs = { + .validate = dml314_ModeSupportAndSystemConfigurationFull, + .recalculate = dml314_recalculate, + .rq_dlg_get_dlg_reg = dml314_rq_dlg_get_dlg_reg, + .rq_dlg_get_rq_reg = dml314_rq_dlg_get_rq_reg +}; + const struct dml_funcs dml32_funcs = { .validate = dml32_ModeSupportAndSystemConfigurationFull, .recalculate = dml32_recalculate, @@ -107,6 +116,9 @@ void dml_init_instance(struct display_mode_lib *lib, case DML_PROJECT_DCN31_FPGA: lib->funcs = dml31_funcs; break; + case DML_PROJECT_DCN314: + lib->funcs = dml314_funcs; + break; case DML_PROJECT_DCN32: lib->funcs = dml32_funcs; break; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h index 2bdd6ed22611..b1878a1440e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h @@ -41,6 +41,7 @@ enum dml_project { DML_PROJECT_DCN30, DML_PROJECT_DCN31, DML_PROJECT_DCN31_FPGA, + DML_PROJECT_DCN314, DML_PROJECT_DCN32, }; From 82c4018479fba63db8db7c7fbfd9e4afba95603a Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 24 Aug 2022 10:51:21 -0400 Subject: [PATCH 15/29] drm/amd/display: Relax swizzle checks for video non-RGB formats on DCN314 [Why] HW can support the display swizzle modes for video, and those are preferable over standard or linear for decode use. [How] Remove the check for DCN314. Tested-by: Daniel Wheeler Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index a58723c3725b..01f3fad172f3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -4071,9 +4071,7 @@ void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_ v->SourceFormatPixelAndScanSupport = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { - if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) - || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t - || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) { + if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) { v->SourceFormatPixelAndScanSupport = false; } } From 0b15b1ec8b74bd5c9a4e4cbadab82c0657832799 Mon Sep 17 00:00:00 2001 From: Duncan Ma Date: Wed, 24 Aug 2022 14:35:03 -0400 Subject: [PATCH 16/29] drm/amd/display: Correct dram channel width for dcn314 [Why] The interpretation of the number of memory channels differ by memory type, and this affects channel width for the DML input. [How] Set dram channel width according to memory type for dcn314. Tested-by: Daniel Wheeler Reviewed-by: Nicholas Kazlauskas Reviewed-by: Jun Lei Acked-by: Pavle Kotarac Signed-off-by: Duncan Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 2 ++ drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c | 3 +++ drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 1 + 3 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index beb025cd3dc2..9781a8dbc238 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -670,6 +670,8 @@ static void dcn314_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *cl } ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz); bw_params->vram_type = bios_info->memory_type; + + bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4; bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4; for (i = 0; i < WM_SET_COUNT; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index fc5529fa51b3..4bb3b31ea7e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -194,6 +194,9 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; + if (bw_params->dram_channel_width_bytes > 0) + dcn3_14_soc.dram_channel_width_bytes = bw_params->dram_channel_width_bytes; + if (bw_params->num_channels > 0) dcn3_14_soc.num_chans = bw_params->num_channels; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 5d2b028e5dad..d9f1b0a4fbd4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -214,6 +214,7 @@ struct dummy_pstate_entry { struct clk_bw_params { unsigned int vram_type; unsigned int num_channels; + unsigned int dram_channel_width_bytes; unsigned int dispclk_vco_khz; unsigned int dc_mode_softmax_memclk; struct clk_limit_table clk_table; From 6acc6196a7320b3d2a391925c4c884fc07f0b3df Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Thu, 25 Aug 2022 12:33:12 -0400 Subject: [PATCH 17/29] drm/amd/display: Round cursor width up for MALL allocation [Why & How] When calculating cursor size for MALL allocation, the cursor width should be the actual width rounded up to 64 alignment. Additionally, the bit depth should vary depending on color format. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Pavle Kotarac Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c | 24 ++++++++++++++++++- .../drm/amd/display/dc/dcn32/dcn32_hwseq.c | 24 ++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c index 6ec1c52535b9..2038cbda33f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c @@ -103,6 +103,11 @@ void hubp32_cursor_set_attributes( enum cursor_lines_per_chunk lpc = hubp2_get_lines_per_chunk( attr->width, attr->color_format); + //Round cursor width up to next multiple of 64 + uint32_t cursor_width = ((attr->width + 63) / 64) * 64; + uint32_t cursor_height = attr->height; + uint32_t cursor_size = cursor_width * cursor_height; + hubp->curs_attr = *attr; REG_UPDATE(CURSOR_SURFACE_ADDRESS_HIGH, @@ -126,7 +131,24 @@ void hubp32_cursor_set_attributes( /* used to shift the cursor chunk request deadline */ CURSOR0_CHUNK_HDL_ADJUST, 3); - if (attr->width * attr->height * 4 > 16384) + switch (attr->color_format) { + case CURSOR_MODE_MONO: + cursor_size /= 2; + break; + case CURSOR_MODE_COLOR_1BIT_AND: + case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA: + case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA: + cursor_size *= 4; + break; + + case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED: + case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED: + default: + cursor_size *= 8; + break; + } + + if (cursor_size > 16384) REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, true); else REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, false); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 8d9d96c39808..344fe7535df5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -741,7 +741,29 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context) struct hubp *hubp = pipe->plane_res.hubp; if (pipe->stream && pipe->plane_state && hubp && hubp->funcs->hubp_update_mall_sel) { - if (hubp->curs_attr.width * hubp->curs_attr.height * 4 > 16384) + //Round cursor width up to next multiple of 64 + int cursor_width = ((hubp->curs_attr.width + 63) / 64) * 64; + int cursor_height = hubp->curs_attr.height; + int cursor_size = cursor_width * cursor_height; + + switch (hubp->curs_attr.color_format) { + case CURSOR_MODE_MONO: + cursor_size /= 2; + break; + case CURSOR_MODE_COLOR_1BIT_AND: + case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA: + case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA: + cursor_size *= 4; + break; + + case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED: + case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED: + default: + cursor_size *= 8; + break; + } + + if (cursor_size > 16384) cache_cursor = true; if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { From 1bb8df66920a5549db2be92c23ab81fd06992e5d Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 24 Aug 2022 11:53:50 -0400 Subject: [PATCH 18/29] drm/amd/display: SW cursor fallback for SubVP [Description] Leverage SW cursor fall back for SubVP when the cursor is too big. We want to take advantage of being able to fallback to SW cursor when possible because it's not worth it to disable MCLK switching because the cursor is slightly too big. Tested-by: Daniel Wheeler Reviewed-by: Aurabindo Pillai Acked-by: Pavle Kotarac Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 5 +++++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 1 + 4 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 6752ca44e6e0..0c85ab5933b4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -328,6 +328,11 @@ bool dc_stream_set_cursor_attributes( } dc = stream->ctx->dc; + + if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384) + if (stream->mall_stream_config.type == SUBVP_MAIN) + return false; + stream->cursor_attributes = *attributes; dc_z10_restore(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 5908b60db313..dbf8158b832e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -745,6 +745,7 @@ struct dc_debug_options { bool disable_fixed_vs_aux_timeout_wa; bool force_disable_subvp; bool force_subvp_mclk_switch; + bool allow_sw_cursor_fallback; bool force_usr_allow; /* uses value at boot and disables switch */ bool disable_dtb_ref_clk_switch; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 8b887b552f2c..9b8be00a23ed 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -871,6 +871,7 @@ static const struct dc_debug_options debug_defaults_drv = { .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, + .allow_sw_cursor_fallback = false, }; static const struct dc_debug_options debug_defaults_diags = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index c8b7d6ff38f4..c8ed01bbe659 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -872,6 +872,7 @@ static const struct dc_debug_options debug_defaults_drv = { .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, + .allow_sw_cursor_fallback = false, }; static const struct dc_debug_options debug_defaults_diags = { From ceb756004a30239c3a50dc237313e234b667077e Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 25 Aug 2022 15:05:58 -0400 Subject: [PATCH 19/29] drm/amd/display: add workaround for subvp cursor corruption for DCN32/321 [Why&How] Kernel does not have a means to tell the userspace to use software cursor. Due to lack of this functionality, reducing the max cursor size is the only way to ensure that power savings of Subview port feature is utilized for asics that support it. The workaround could be removed after cursor caching is fixed while a subviewport config is active. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Pavle Kotarac Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 3 ++- drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 9b8be00a23ed..c3b783cea8a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2040,7 +2040,8 @@ static bool dcn32_resource_construct( dc->caps.max_downscale_ratio = 600; dc->caps.i2c_speed_in_khz = 100; dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/ - dc->caps.max_cursor_size = 256; + /* TODO: Bring max_cursor_size back to 256 after subvp cursor corruption is fixed*/ + dc->caps.max_cursor_size = 64; dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; dc->caps.mall_size_per_mem_channel = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index c8ed01bbe659..7309eed33a61 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -1652,7 +1652,8 @@ static bool dcn321_resource_construct( dc->caps.max_downscale_ratio = 600; dc->caps.i2c_speed_in_khz = 100; dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/ - dc->caps.max_cursor_size = 256; + /* TODO: Bring max cursor size back to 256 after subvp cursor corruption is fixed*/ + dc->caps.max_cursor_size = 64; dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; dc->caps.mall_size_per_mem_channel = 0; From 3601d620f22e37740cf73f8278eabf9f2aa19eb7 Mon Sep 17 00:00:00 2001 From: Yao Wang1 Date: Mon, 22 Aug 2022 18:30:31 +0800 Subject: [PATCH 20/29] drm/amd/display: Limit user regamma to a valid value [Why] For HDR mode, we get total 512 tf_point and after switching to SDR mode we actually get 400 tf_point and the rest of points(401~512) still use dirty value from HDR mode. We should limit the rest of the points to max value. [How] Limit the value when coordinates_x.x > 1, just like what we do in translate_from_linear_space for other re-gamma build paths. Tested-by: Daniel Wheeler Reviewed-by: Krunoslav Kovac Reviewed-by: Aric Cyr Acked-by: Pavle Kotarac Signed-off-by: Yao Wang1 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/color/color_gamma.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 859ffd8725c5..04f7656906ca 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1600,6 +1600,7 @@ static void interpolate_user_regamma(uint32_t hw_points_num, struct fixed31_32 lut2; struct fixed31_32 delta_lut; struct fixed31_32 delta_index; + const struct fixed31_32 one = dc_fixpt_from_int(1); i = 0; /* fixed_pt library has problems handling too small values */ @@ -1628,6 +1629,9 @@ static void interpolate_user_regamma(uint32_t hw_points_num, } else hw_x = coordinates_x[i].x; + if (dc_fixpt_le(one, hw_x)) + hw_x = one; + norm_x = dc_fixpt_mul(norm_factor, hw_x); index = dc_fixpt_floor(norm_x); if (index < 0 || index > 255) From d978c51f8d1f4314c84cf50291156862a4c34fc8 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Sat, 6 Aug 2022 01:58:00 +0800 Subject: [PATCH 21/29] drm/amd/display: Refactor SubVP calculation to remove FPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor calculation to remove floating point operations from dmub_srv. To ensure that 32-bit compilation works well, we use the div64 family of macros to do integer division for SubVP-related timing parameters. Cc: Maíra Canal Cc: Alex Deucher Cc: Isabella Basso Cc: Magali Lemes Tested-by: Daniel Wheeler Reviewed-by: Samson Tam Acked-by: Tom Chung Signed-off-by: Alvin Lee Co-developed-by: Aurabindo Pillai Signed-off-by: Aurabindo Pillai Co-developed-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 87 ++++++++++---------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 09b304507bad..52a61b3e5a8b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -417,44 +417,42 @@ static void populate_subvp_cmd_drr_info(struct dc *dc, struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing; struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing; - int16_t drr_frame_us = 0; - int16_t min_drr_supported_us = 0; - int16_t max_drr_supported_us = 0; - int16_t max_drr_vblank_us = 0; - int16_t max_drr_mallregion_us = 0; - int16_t mall_region_us = 0; - int16_t prefetch_us = 0; - int16_t subvp_active_us = 0; - int16_t drr_active_us = 0; - int16_t min_vtotal_supported = 0; - int16_t max_vtotal_supported = 0; + uint16_t drr_frame_us = 0; + uint16_t min_drr_supported_us = 0; + uint16_t max_drr_supported_us = 0; + uint16_t max_drr_vblank_us = 0; + uint16_t max_drr_mallregion_us = 0; + uint16_t mall_region_us = 0; + uint16_t prefetch_us = 0; + uint16_t subvp_active_us = 0; + uint16_t drr_active_us = 0; + uint16_t min_vtotal_supported = 0; + uint16_t max_vtotal_supported = 0; pipe_data->pipe_config.vblank_data.drr_info.drr_in_use = true; pipe_data->pipe_config.vblank_data.drr_info.use_ramping = false; // for now don't use ramping pipe_data->pipe_config.vblank_data.drr_info.drr_window_size_ms = 4; // hardcode 4ms DRR window for now - drr_frame_us = div64_s64(drr_timing->v_total * drr_timing->h_total, - (int64_t)(drr_timing->pix_clk_100hz * 100) * 1000000); + drr_frame_us = div64_u64(((uint64_t)drr_timing->v_total * drr_timing->h_total * 1000000), + (((uint64_t)drr_timing->pix_clk_100hz * 100))); // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = div64_s64(phantom_timing->v_addressable * phantom_timing->h_total, - (int64_t)(phantom_timing->pix_clk_100hz * 100) * 1000000); + mall_region_us = div64_u64(((uint64_t)phantom_timing->v_addressable * phantom_timing->h_total * 1000000), + (((uint64_t)phantom_timing->pix_clk_100hz * 100))); min_drr_supported_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - min_vtotal_supported = div64_s64(drr_timing->pix_clk_100hz * 100 * - (div64_s64((int64_t)min_drr_supported_us, 1000000)), - (int64_t)drr_timing->h_total); + min_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * min_drr_supported_us), + (((uint64_t)drr_timing->h_total * 1000000))); - prefetch_us = div64_s64((phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total, - (int64_t)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us); - subvp_active_us = div64_s64(main_timing->v_addressable * main_timing->h_total, - (int64_t)(main_timing->pix_clk_100hz * 100) * 1000000); - drr_active_us = div64_s64(drr_timing->v_addressable * drr_timing->h_total, - (int64_t)(drr_timing->pix_clk_100hz * 100) * 1000000); - max_drr_vblank_us = div64_s64((int64_t)(subvp_active_us - prefetch_us - drr_active_us), 2) + drr_active_us; + prefetch_us = div64_u64(((uint64_t)(phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total * 1000000), + (((uint64_t)phantom_timing->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us)); + subvp_active_us = div64_u64(((uint64_t)main_timing->v_addressable * main_timing->h_total * 1000000), + (((uint64_t)main_timing->pix_clk_100hz * 100))); + drr_active_us = div64_u64(((uint64_t)drr_timing->v_addressable * drr_timing->h_total * 1000000), + (((uint64_t)drr_timing->pix_clk_100hz * 100))); + max_drr_vblank_us = div64_u64((subvp_active_us - prefetch_us - drr_active_us), 2) + drr_active_us; max_drr_mallregion_us = subvp_active_us - prefetch_us - mall_region_us; max_drr_supported_us = max_drr_vblank_us > max_drr_mallregion_us ? max_drr_vblank_us : max_drr_mallregion_us; - max_vtotal_supported = div64_s64(drr_timing->pix_clk_100hz * 100 * (div64_s64((int64_t)max_drr_supported_us, 1000000)), - (int64_t)drr_timing->h_total); + max_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * max_drr_supported_us), + (((uint64_t)drr_timing->h_total * 1000000))); pipe_data->pipe_config.vblank_data.drr_info.min_vtotal_supported = min_vtotal_supported; pipe_data->pipe_config.vblank_data.drr_info.max_vtotal_supported = max_vtotal_supported; @@ -548,10 +546,12 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc, struct dc_crtc_timing *phantom_timing1 = &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing; struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL; - subvp0_prefetch_us = div64_s64((phantom_timing0->v_total - phantom_timing0->v_front_porch) * phantom_timing0->h_total, - (int64_t)(phantom_timing0->pix_clk_100hz * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us); - subvp1_prefetch_us = div64_s64((phantom_timing1->v_total - phantom_timing1->v_front_porch) * phantom_timing1->h_total, - (int64_t)(phantom_timing1->pix_clk_100hz * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us); + subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) * + (uint64_t)phantom_timing0->h_total * 1000000), + (((uint64_t)phantom_timing0->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us)); + subvp1_prefetch_us = div64_u64(((uint64_t)(phantom_timing1->v_total - phantom_timing1->v_front_porch) * + (uint64_t)phantom_timing1->h_total * 1000000), + (((uint64_t)phantom_timing1->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us)); // Whichever SubVP PIPE has the smaller prefetch (including the prefetch end to mall start time) // should increase it's prefetch time to match the other @@ -559,16 +559,17 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc, pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[1]; prefetch_delta_us = subvp0_prefetch_us - subvp1_prefetch_us; pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines = - div64_s64(((div64_s64((int64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us), 1000000)) * - (phantom_timing1->pix_clk_100hz * 100) + phantom_timing1->h_total - 1), - (int64_t)phantom_timing1->h_total); + div64_u64(((uint64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) * + ((uint64_t)phantom_timing1->pix_clk_100hz * 100) + ((uint64_t)phantom_timing1->h_total * 1000000 - 1)), + ((uint64_t)phantom_timing1->h_total * 1000000)); + } else if (subvp1_prefetch_us > subvp0_prefetch_us) { pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[0]; prefetch_delta_us = subvp1_prefetch_us - subvp0_prefetch_us; pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines = - div64_s64(((div64_s64((int64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us), 1000000)) * - (phantom_timing0->pix_clk_100hz * 100) + phantom_timing0->h_total - 1), - (int64_t)phantom_timing0->h_total); + div64_u64(((uint64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) * + ((uint64_t)phantom_timing0->pix_clk_100hz * 100) + ((uint64_t)phantom_timing0->h_total * 1000000 - 1)), + ((uint64_t)phantom_timing0->h_total * 1000000)); } } @@ -630,13 +631,11 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, // Round up pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines = - div64_s64(((div64_s64((int64_t)dc->caps.subvp_prefetch_end_to_mall_start_us, 1000000)) * - (phantom_timing->pix_clk_100hz * 100) + phantom_timing->h_total - 1), - (int64_t)phantom_timing->h_total); + div64_u64(((uint64_t)dc->caps.subvp_prefetch_end_to_mall_start_us * ((uint64_t)phantom_timing->pix_clk_100hz * 100) + + ((uint64_t)phantom_timing->h_total * 1000000 - 1)), ((uint64_t)phantom_timing->h_total * 1000000)); pipe_data->pipe_config.subvp_data.processing_delay_lines = - div64_s64(((div64_s64((int64_t)dc->caps.subvp_fw_processing_delay_us, 1000000)) * - (phantom_timing->pix_clk_100hz * 100) + phantom_timing->h_total - 1), - (int64_t)phantom_timing->h_total); + div64_u64(((uint64_t)(dc->caps.subvp_fw_processing_delay_us) * ((uint64_t)phantom_timing->pix_clk_100hz * 100) + + ((uint64_t)phantom_timing->h_total * 1000000 - 1)), ((uint64_t)phantom_timing->h_total * 1000000)); // Find phantom pipe index based on phantom stream for (j = 0; j < dc->res_pool->pipe_count; j++) { struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j]; From c4be0ac987f21e12e7ad23bc480e826d8c30de20 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Aug 2022 13:34:05 -0700 Subject: [PATCH 22/29] drm/amd/display: Reduce number of arguments of dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most of the arguments are identical between the two call sites and they can be accessed through the 'struct vba_vars_st' pointer created at the top of dml32_ModeSupportAndSystemConfigurationFull(). This reduces the total amount of stack space that dml32_ModeSupportAndSystemConfigurationFull() uses by 216 bytes with LLVM 16 (2152 -> 1936), helping clear up the following clang warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1721:6: error: stack frame size (2152) exceeds limit (2048) in 'dml32_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) ^ 1 error generated. Additionally, while modifying the arguments to dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(), use 'v' consistently, instead of 'v' mixed with 'mode_lib->vba'. Link: https://github.com/ClangBuiltLinux/linux/issues/1681 Reported-by: "Sudip Mukherjee (Codethink)" Tested-by: Maíra Canal Reviewed-by: Rodrigo Siqueira Signed-off-by: Nathan Chancellor Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml/dcn32/display_mode_vba_32.c | 118 +++------ .../dc/dml/dcn32/display_mode_vba_util_32.c | 246 ++++++++---------- .../dc/dml/dcn32/display_mode_vba_util_32.h | 33 +-- 3 files changed, 139 insertions(+), 258 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index db8a019ddfe0..eea2f0ba7367 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1163,58 +1163,28 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - mode_lib->vba.USRRetrainingRequiredFinal, - mode_lib->vba.UsesMALLForPStateChange, - mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], - mode_lib->vba.NumberOfActiveSurfaces, - mode_lib->vba.MaxLineBufferLines, - mode_lib->vba.LineBufferSizeFinal, - mode_lib->vba.WritebackInterfaceBufferSize, - mode_lib->vba.DCFCLK, - mode_lib->vba.ReturnBW, - mode_lib->vba.SynchronizeTimingsFinal, - mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - mode_lib->vba.DRRDisplay, - v->dpte_group_bytes, - v->meta_row_height, - v->meta_row_height_chroma, + v, + v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb], + v->DCFCLK, + v->ReturnBW, v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters, - mode_lib->vba.WritebackChunkSize, - mode_lib->vba.SOCCLK, + v->SOCCLK, v->DCFCLKDeepSleep, - mode_lib->vba.DETBufferSizeY, - mode_lib->vba.DETBufferSizeC, - mode_lib->vba.SwathHeightY, - mode_lib->vba.SwathHeightC, - mode_lib->vba.LBBitPerPixel, + v->DETBufferSizeY, + v->DETBufferSizeC, + v->SwathHeightY, + v->SwathHeightC, v->SwathWidthY, v->SwathWidthC, - mode_lib->vba.HRatio, - mode_lib->vba.HRatioChroma, - mode_lib->vba.vtaps, - mode_lib->vba.VTAPsChroma, - mode_lib->vba.VRatio, - mode_lib->vba.VRatioChroma, - mode_lib->vba.HTotal, - mode_lib->vba.VTotal, - mode_lib->vba.VActive, - mode_lib->vba.PixelClock, - mode_lib->vba.BlendingAndTiming, - mode_lib->vba.DPPPerPlane, + v->DPPPerPlane, v->BytePerPixelDETY, v->BytePerPixelDETC, v->DSTXAfterScaler, v->DSTYAfterScaler, - mode_lib->vba.WritebackEnable, - mode_lib->vba.WritebackPixelFormat, - mode_lib->vba.WritebackDestinationWidth, - mode_lib->vba.WritebackDestinationHeight, - mode_lib->vba.WritebackSourceHeight, v->UnboundedRequestEnabled, v->CompressedBufferSizeInkByte, /* Output */ - &v->Watermark, &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_dramchange_support, v->MaxActiveDRAMClockChangeLatencySupported, v->SubViewportLinesNeededInMALL, @@ -3557,62 +3527,32 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - mode_lib->vba.USRRetrainingRequiredFinal, - mode_lib->vba.UsesMALLForPStateChange, - mode_lib->vba.PrefetchModePerState[i][j], - mode_lib->vba.NumberOfActiveSurfaces, - mode_lib->vba.MaxLineBufferLines, - mode_lib->vba.LineBufferSizeFinal, - mode_lib->vba.WritebackInterfaceBufferSize, - mode_lib->vba.DCFCLKState[i][j], - mode_lib->vba.ReturnBWPerState[i][j], - mode_lib->vba.SynchronizeTimingsFinal, - mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - mode_lib->vba.DRRDisplay, - mode_lib->vba.dpte_group_bytes, - mode_lib->vba.meta_row_height, - mode_lib->vba.meta_row_height_chroma, + v, + v->PrefetchModePerState[i][j], + v->DCFCLKState[i][j], + v->ReturnBWPerState[i][j], v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters, - mode_lib->vba.WritebackChunkSize, - mode_lib->vba.SOCCLKPerState[i], - mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j], - mode_lib->vba.DETBufferSizeYThisState, - mode_lib->vba.DETBufferSizeCThisState, - mode_lib->vba.SwathHeightYThisState, - mode_lib->vba.SwathHeightCThisState, - mode_lib->vba.LBBitPerPixel, - mode_lib->vba.SwathWidthYThisState, // 24 - mode_lib->vba.SwathWidthCThisState, - mode_lib->vba.HRatio, - mode_lib->vba.HRatioChroma, - mode_lib->vba.vtaps, - mode_lib->vba.VTAPsChroma, - mode_lib->vba.VRatio, - mode_lib->vba.VRatioChroma, - mode_lib->vba.HTotal, - mode_lib->vba.VTotal, - mode_lib->vba.VActive, - mode_lib->vba.PixelClock, - mode_lib->vba.BlendingAndTiming, - mode_lib->vba.NoOfDPPThisState, - mode_lib->vba.BytePerPixelInDETY, - mode_lib->vba.BytePerPixelInDETC, + v->SOCCLKPerState[i], + v->ProjectedDCFCLKDeepSleep[i][j], + v->DETBufferSizeYThisState, + v->DETBufferSizeCThisState, + v->SwathHeightYThisState, + v->SwathHeightCThisState, + v->SwathWidthYThisState, // 24 + v->SwathWidthCThisState, + v->NoOfDPPThisState, + v->BytePerPixelInDETY, + v->BytePerPixelInDETC, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler, - mode_lib->vba.WritebackEnable, - mode_lib->vba.WritebackPixelFormat, - mode_lib->vba.WritebackDestinationWidth, - mode_lib->vba.WritebackDestinationHeight, - mode_lib->vba.WritebackSourceHeight, - mode_lib->vba.UnboundedRequestEnabledThisState, - mode_lib->vba.CompressedBufferSizeInkByteThisState, + v->UnboundedRequestEnabledThisState, + v->CompressedBufferSizeInkByteThisState, /* Output */ - &mode_lib->vba.Watermark, // Store the values in vba - &mode_lib->vba.DRAMClockChangeSupport[i][j], + &v->DRAMClockChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[0], // double *MaxActiveDRAMClockChangeLatencySupported &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // Long SubViewportLinesNeededInMALL[] - &mode_lib->vba.FCLKChangeSupport[i][j], + &v->FCLKChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[1], // double *MinActiveFCLKChangeLatencySupported &mode_lib->vba.USRRetrainingSupport[i][j], mode_lib->vba.ActiveDRAMClockChangeLatencyMargin); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 05fc14a47fba..de1ce3b21b2a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -4206,58 +4206,28 @@ void dml32_CalculateFlipSchedule( } // CalculateFlipSchedule void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - bool USRRetrainingRequiredFinal, - enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + struct vba_vars_st *v, unsigned int PrefetchMode, - unsigned int NumberOfActiveSurfaces, - unsigned int MaxLineBufferLines, - unsigned int LineBufferSize, - unsigned int WritebackInterfaceBufferSize, double DCFCLK, double ReturnBW, - bool SynchronizeTimingsFinal, - bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - bool DRRDisplay[], - unsigned int dpte_group_bytes[], - unsigned int meta_row_height[], - unsigned int meta_row_height_chroma[], SOCParametersList mmSOCParameters, - unsigned int WritebackChunkSize, double SOCCLK, double DCFClkDeepSleep, unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], - unsigned int LBBitPerPixel[], double SwathWidthY[], double SwathWidthC[], - double HRatio[], - double HRatioChroma[], - unsigned int VTaps[], - unsigned int VTapsChroma[], - double VRatio[], - double VRatioChroma[], - unsigned int HTotal[], - unsigned int VTotal[], - unsigned int VActive[], - double PixelClock[], - unsigned int BlendingAndTiming[], unsigned int DPPPerSurface[], double BytePerPixelDETY[], double BytePerPixelDETC[], double DSTXAfterScaler[], double DSTYAfterScaler[], - bool WritebackEnable[], - enum source_format_class WritebackPixelFormat[], - double WritebackDestinationWidth[], - double WritebackDestinationHeight[], - double WritebackSourceHeight[], bool UnboundedRequestEnabled, unsigned int CompressedBufferSizeInkByte, /* Output */ - Watermarks *Watermark, enum clock_change_support *DRAMClockChangeSupport, double MaxActiveDRAMClockChangeLatencySupported[], unsigned int SubViewportLinesNeededInMALL[], @@ -4299,136 +4269,136 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; - Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; - Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency + v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; + v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency; - Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark; - Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark; - Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency + v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark; + v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark; + v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; - Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency + v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; - Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency + v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; - Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time + v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency); dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency); dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency); - dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark); - dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark); - dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark); - dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark); - dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark); - dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark); - dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark); + dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark); + dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark); + dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark); + dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark); + dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark); + dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark); + dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark); dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", - __func__, Watermark->Z8StutterEnterPlusExitWatermark); + __func__, v->Watermark.Z8StutterEnterPlusExitWatermark); #endif TotalActiveWriteback = 0; - for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if (WritebackEnable[k] == true) + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if (v->WritebackEnable[k] == true) TotalActiveWriteback = TotalActiveWriteback + 1; } if (TotalActiveWriteback <= 1) { - Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; + v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; } else { - Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency - + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency + + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; } - if (USRRetrainingRequiredFinal) - Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark + if (v->USRRetrainingRequiredFinal) + v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark + mmSOCParameters.USRRetrainingLatency; if (TotalActiveWriteback <= 1) { - Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.WritebackLatency; - Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + mmSOCParameters.WritebackLatency; } else { - Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency - + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; - Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency - + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK; + v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK; } - if (USRRetrainingRequiredFinal) - Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark + if (v->USRRetrainingRequiredFinal) + v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark + mmSOCParameters.USRRetrainingLatency; - if (USRRetrainingRequiredFinal) - Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark + if (v->USRRetrainingRequiredFinal) + v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark + mmSOCParameters.USRRetrainingLatency; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", - __func__, Watermark->WritebackDRAMClockChangeWatermark); - dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark); - dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark); - dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal); + __func__, v->Watermark.WritebackDRAMClockChangeWatermark); + dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark); + dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark); + dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal); dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency); #endif - for (k = 0; k < NumberOfActiveSurfaces; ++k) { - TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + - SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]); + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]); } - for (k = 0; k < NumberOfActiveSurfaces; ++k) { + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { - LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); - LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); + LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); + LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines); - dml_print("DML::%s: k=%d, LineBufferSize = %d\n", __func__, k, LineBufferSize); - dml_print("DML::%s: k=%d, LBBitPerPixel = %d\n", __func__, k, LBBitPerPixel[k]); - dml_print("DML::%s: k=%d, HRatio = %f\n", __func__, k, HRatio[k]); - dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]); + dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines); + dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal); + dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]); + dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]); + dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]); #endif - EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); - EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); + EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); + EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); EffectiveDETBufferSizeY = DETBufferSizeY[k]; if (UnboundedRequestEnabled) { EffectiveDETBufferSizeY = EffectiveDETBufferSizeY + CompressedBufferSizeInkByte * 1024 - * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k]) - / (HTotal[k] / PixelClock[k]) / TotalPixelBW; + * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k]) + / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; } LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); - FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; + FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY - - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; + - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k]; - if (NumberOfActiveSurfaces > 1) { + if (v->NumberOfActiveSurfaces > 1) { ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY - - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k] - / PixelClock[k] / VRatio[k]; + - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k] + / v->PixelClock[k] / v->VRatio[k]; } if (BytePerPixelDETC[k] > 0) { LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); - FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) - / VRatioChroma[k]; + FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) + / v->VRatioChroma[k]; ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] - / PixelClock[k]; - if (NumberOfActiveSurfaces > 1) { + - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] + / v->PixelClock[k]; + if (v->NumberOfActiveSurfaces > 1) { ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC - - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k] - / PixelClock[k] / VRatioChroma[k]; + - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k] + / v->PixelClock[k] / v->VRatioChroma[k]; } ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, ActiveClockChangeLatencyHidingC); @@ -4436,24 +4406,24 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; } - ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - - Watermark->DRAMClockChangeWatermark; - ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - - Watermark->FCLKChangeWatermark; - USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; + ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark + - v->Watermark.DRAMClockChangeWatermark; + ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark + - v->Watermark.FCLKChangeWatermark; + USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark; - if (WritebackEnable[k]) { - WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 - / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] - / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); - if (WritebackPixelFormat[k] == dm_444_64) + if (v->WritebackEnable[k]) { + WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024 + / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] + / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); + if (v->WritebackPixelFormat[k] == dm_444_64) WritebackLatencyHiding = WritebackLatencyHiding / 2; WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding - - Watermark->WritebackDRAMClockChangeWatermark; + - v->Watermark.WritebackDRAMClockChangeWatermark; WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding - - Watermark->WritebackFCLKChangeWatermark; + - v->Watermark.WritebackFCLKChangeWatermark; ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], WritebackFCLKChangeLatencyMargin); @@ -4461,22 +4431,22 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( WritebackDRAMClockChangeLatencyMargin); } MaxActiveDRAMClockChangeLatencySupported[k] = - (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? + (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? 0 : (ActiveDRAMClockChangeLatencyMargin[k] + mmSOCParameters.DRAMClockChangeLatency); } - for (i = 0; i < NumberOfActiveSurfaces; ++i) { - for (j = 0; j < NumberOfActiveSurfaces; ++j) { + for (i = 0; i < v->NumberOfActiveSurfaces; ++i) { + for (j = 0; j < v->NumberOfActiveSurfaces; ++j) { if (i == j || - (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) || - (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) || - (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) || - (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] && - HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] && - VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal && - (DRRDisplay[i] || DRRDisplay[j]))) { + (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) || + (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) || + (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) || + (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] && + v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] && + v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && + (v->DRRDisplay[i] || v->DRRDisplay[j]))) { SynchronizedSurfaces[i][j] = true; } else { SynchronizedSurfaces[i][j] = false; @@ -4484,8 +4454,8 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( } } - for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; @@ -4497,9 +4467,9 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; SameTimingForFCLKChange = true; - for (k = 0; k < NumberOfActiveSurfaces; ++k) { + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { - if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && + if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && (SameTimingForFCLKChange || ActiveFCLKChangeLatencyMargin[k] < SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { @@ -4519,17 +4489,17 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( } *USRRetrainingSupport = true; - for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && (USRRetrainingLatencyMargin[k] < 0)) { *USRRetrainingSupport = false; } } - for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && - UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && - UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && + v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && + v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && ActiveDRAMClockChangeLatencyMargin[k] < 0) { if (PrefetchMode > 0) { DRAMClockChangeSupportNumber = 2; @@ -4543,10 +4513,10 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( } } - for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { + if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) DRAMClockChangeMethod = 1; - else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) + else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) DRAMClockChangeMethod = 2; } @@ -4573,16 +4543,16 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; } - for (k = 0; k < NumberOfActiveSurfaces; ++k) { + for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { unsigned int dst_y_pstate; unsigned int src_y_pstate_l; unsigned int src_y_pstate_c; unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c; - dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1); - src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]); + dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1); + src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]); src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; - sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k]; + sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k]; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); @@ -4593,21 +4563,21 @@ dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBL dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); -dml_print("DML::%s: k=%d, meta_row_height = %d\n", __func__, k, meta_row_height[k]); +dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]); dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l); #endif SubViewportLinesNeededInMALL[k] = sub_vp_lines_l; if (BytePerPixelDETC[k] > 0) { - src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]); + src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]); src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; - sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k]; + sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k]; SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c); dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c); -dml_print("DML::%s: k=%d, meta_row_height_chroma = %d\n", __func__, k, meta_row_height_chroma[k]); +dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]); dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c); #endif } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index d293856ba906..1b1085454f4d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -30,6 +30,7 @@ #include "os_types.h" #include "../dc_features.h" #include "../display_mode_structs.h" +#include "dml/display_mode_vba.h" unsigned int dml32_dscceComputeDelay( unsigned int bpc, @@ -807,58 +808,28 @@ void dml32_CalculateFlipSchedule( bool *ImmediateFlipSupportedForPipe); void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - bool USRRetrainingRequiredFinal, - enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + struct vba_vars_st *v, unsigned int PrefetchMode, - unsigned int NumberOfActiveSurfaces, - unsigned int MaxLineBufferLines, - unsigned int LineBufferSize, - unsigned int WritebackInterfaceBufferSize, double DCFCLK, double ReturnBW, - bool SynchronizeTimingsFinal, - bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - bool DRRDisplay[], - unsigned int dpte_group_bytes[], - unsigned int meta_row_height[], - unsigned int meta_row_height_chroma[], SOCParametersList mmSOCParameters, - unsigned int WritebackChunkSize, double SOCCLK, double DCFClkDeepSleep, unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], - unsigned int LBBitPerPixel[], double SwathWidthY[], double SwathWidthC[], - double HRatio[], - double HRatioChroma[], - unsigned int VTaps[], - unsigned int VTapsChroma[], - double VRatio[], - double VRatioChroma[], - unsigned int HTotal[], - unsigned int VTotal[], - unsigned int VActive[], - double PixelClock[], - unsigned int BlendingAndTiming[], unsigned int DPPPerSurface[], double BytePerPixelDETY[], double BytePerPixelDETC[], double DSTXAfterScaler[], double DSTYAfterScaler[], - bool WritebackEnable[], - enum source_format_class WritebackPixelFormat[], - double WritebackDestinationWidth[], - double WritebackDestinationHeight[], - double WritebackSourceHeight[], bool UnboundedRequestEnabled, unsigned int CompressedBufferSizeInkByte, /* Output */ - Watermarks *Watermark, enum clock_change_support *DRAMClockChangeSupport, double MaxActiveDRAMClockChangeLatencySupported[], unsigned int SubViewportLinesNeededInMALL[], From a3fef74b1d48d89d4d911fcd7c2630d0eb6a0012 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Aug 2022 13:34:06 -0700 Subject: [PATCH 23/29] drm/amd/display: Reduce number of arguments of dml32_CalculatePrefetchSchedule() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several of the arguments are identical between the two call sites and they can be accessed through the 'struct vba_vars_st' pointer. This reduces the total amount of stack space that dml32_ModeSupportAndSystemConfigurationFull() uses by 208 bytes with LLVM 16 (1936 -> 1728), helping clear up the following clang warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1721:6: error: stack frame size (2152) exceeds limit (2048) in 'dml32_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) ^ 1 error generated. Additionally, while modifying the arguments to dml32_CalculatePrefetchSchedule(), use 'v' consistently, instead of 'v' mixed with 'mode_lib->vba'. Link: https://github.com/ClangBuiltLinux/linux/issues/1681 Reported-by: "Sudip Mukherjee (Codethink)" Tested-by: Maíra Canal Reviewed-by: Rodrigo Siqueira Signed-off-by: Nathan Chancellor Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml/dcn32/display_mode_vba_32.c | 118 +++++++----------- .../dc/dml/dcn32/display_mode_vba_util_32.c | 75 +++++------ .../dc/dml/dcn32/display_mode_vba_util_32.h | 18 +-- 3 files changed, 78 insertions(+), 133 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index eea2f0ba7367..9a60f27eceaa 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -755,30 +755,18 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; - v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, - &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k], - mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, - mode_lib->vba.DPPCLKDelaySCL, - mode_lib->vba.DPPCLKDelaySCLLBOnly, - mode_lib->vba.DPPCLKDelayCNVCCursor, - mode_lib->vba.DISPCLKDelaySubtotal, - (unsigned int) (v->SwathWidthY[k] / mode_lib->vba.HRatio[k]), - mode_lib->vba.OutputFormat[k], - mode_lib->vba.MaxInterDCNTileRepeaters, + v->ErrorResult[k] = dml32_CalculatePrefetchSchedule( + v, + k, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, + v->DSCDelay[k], + (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), dml_min(v->VStartupLines, v->MaxVStartupLines[k]), v->MaxVStartupLines[k], - mode_lib->vba.GPUVMMaxPageTableLevels, - mode_lib->vba.GPUVMEnable, - mode_lib->vba.HostVMEnable, - mode_lib->vba.HostVMMaxNonCachedPageTableLevels, - mode_lib->vba.HostVMMinPageSize, - mode_lib->vba.DynamicMetadataEnable[k], - mode_lib->vba.DynamicMetadataVMEnabled, - mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], - mode_lib->vba.DynamicMetadataTransmittedBytes[k], v->UrgentLatency, v->UrgentExtraLatency, - mode_lib->vba.TCalc, + v->TCalc, v->PDEAndMetaPTEBytesFrame[k], v->MetaRowByte[k], v->PixelPTEBytesPerRow[k], @@ -792,8 +780,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->MaxNumSwathC[k], v->swath_width_luma_ub[k], v->swath_width_chroma_ub[k], - mode_lib->vba.SwathHeightY[k], - mode_lib->vba.SwathHeightC[k], + v->SwathHeightY[k], + v->SwathHeightC[k], TWait, /* Output */ &v->DSTXAfterScaler[k], @@ -3228,63 +3216,47 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NoTimeForPrefetch[i][j][k] = dml32_CalculatePrefetchSchedule( + v, + k, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, - mode_lib->vba.DSCDelayPerState[i][k], - mode_lib->vba.DPPCLKDelaySubtotal + - mode_lib->vba.DPPCLKDelayCNVCFormater, - mode_lib->vba.DPPCLKDelaySCL, - mode_lib->vba.DPPCLKDelaySCLLBOnly, - mode_lib->vba.DPPCLKDelayCNVCCursor, - mode_lib->vba.DISPCLKDelaySubtotal, - mode_lib->vba.SwathWidthYThisState[k] / - mode_lib->vba.HRatio[k], - mode_lib->vba.OutputFormat[k], - mode_lib->vba.MaxInterDCNTileRepeaters, - dml_min(mode_lib->vba.MaxVStartup, - mode_lib->vba.MaximumVStartup[i][j][k]), - mode_lib->vba.MaximumVStartup[i][j][k], - mode_lib->vba.GPUVMMaxPageTableLevels, - mode_lib->vba.GPUVMEnable, mode_lib->vba.HostVMEnable, - mode_lib->vba.HostVMMaxNonCachedPageTableLevels, - mode_lib->vba.HostVMMinPageSize, - mode_lib->vba.DynamicMetadataEnable[k], - mode_lib->vba.DynamicMetadataVMEnabled, - mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], - mode_lib->vba.DynamicMetadataTransmittedBytes[k], - mode_lib->vba.UrgLatency[i], - mode_lib->vba.ExtraLatency, - mode_lib->vba.TimeCalc, - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k], - mode_lib->vba.MetaRowBytes[i][j][k], - mode_lib->vba.DPTEBytesPerRow[i][j][k], - mode_lib->vba.PrefetchLinesY[i][j][k], - mode_lib->vba.SwathWidthYThisState[k], - mode_lib->vba.PrefillY[k], - mode_lib->vba.MaxNumSwY[k], - mode_lib->vba.PrefetchLinesC[i][j][k], - mode_lib->vba.SwathWidthCThisState[k], - mode_lib->vba.PrefillC[k], - mode_lib->vba.MaxNumSwC[k], - mode_lib->vba.swath_width_luma_ub_this_state[k], - mode_lib->vba.swath_width_chroma_ub_this_state[k], - mode_lib->vba.SwathHeightYThisState[k], - mode_lib->vba.SwathHeightCThisState[k], mode_lib->vba.TWait, + v->DSCDelayPerState[i][k], + v->SwathWidthYThisState[k] / v->HRatio[k], + dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), + v->MaximumVStartup[i][j][k], + v->UrgLatency[i], + v->ExtraLatency, + v->TimeCalc, + v->PDEAndMetaPTEBytesPerFrame[i][j][k], + v->MetaRowBytes[i][j][k], + v->DPTEBytesPerRow[i][j][k], + v->PrefetchLinesY[i][j][k], + v->SwathWidthYThisState[k], + v->PrefillY[k], + v->MaxNumSwY[k], + v->PrefetchLinesC[i][j][k], + v->SwathWidthCThisState[k], + v->PrefillC[k], + v->MaxNumSwC[k], + v->swath_width_luma_ub_this_state[k], + v->swath_width_chroma_ub_this_state[k], + v->SwathHeightYThisState[k], + v->SwathHeightCThisState[k], v->TWait, /* Output */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler[k], - &mode_lib->vba.LineTimesForPrefetch[k], - &mode_lib->vba.PrefetchBW[k], - &mode_lib->vba.LinesForMetaPTE[k], - &mode_lib->vba.LinesForMetaAndDPTERow[k], - &mode_lib->vba.VRatioPreY[i][j][k], - &mode_lib->vba.VRatioPreC[i][j][k], - &mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0][k], - &mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0][k], - &mode_lib->vba.NoTimeForDynamicMetadata[i][j][k], - &mode_lib->vba.Tno_bw[k], - &mode_lib->vba.prefetch_vmrow_bw[k], + &v->LineTimesForPrefetch[k], + &v->PrefetchBW[k], + &v->LinesForMetaPTE[k], + &v->LinesForMetaAndDPTERow[k], + &v->VRatioPreY[i][j][k], + &v->VRatioPreC[i][j][k], + &v->RequiredPrefetchPixelDataBWLuma[0][0][k], + &v->RequiredPrefetchPixelDataBWChroma[0][0][k], + &v->NoTimeForDynamicMetadata[i][j][k], + &v->Tno_bw[k], + &v->prefetch_vmrow_bw[k], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // double *Tdmdl_vm &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // double *Tdmdl &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[2], // double *TSetup diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index de1ce3b21b2a..59c2547d01b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -3363,28 +3363,14 @@ double dml32_CalculateExtraLatency( } // CalculateExtraLatency bool dml32_CalculatePrefetchSchedule( + struct vba_vars_st *v, + unsigned int k, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, - double DPPCLKDelaySubtotalPlusCNVCFormater, - double DPPCLKDelaySCL, - double DPPCLKDelaySCLLBOnly, - double DPPCLKDelayCNVCCursor, - double DISPCLKDelaySubtotal, unsigned int DPP_RECOUT_WIDTH, - enum output_format_class OutputFormat, - unsigned int MaxInterDCNTileRepeaters, unsigned int VStartup, unsigned int MaxVStartup, - unsigned int GPUVMPageTableLevels, - bool GPUVMEnable, - bool HostVMEnable, - unsigned int HostVMMaxNonCachedPageTableLevels, - double HostVMMinPageSize, - bool DynamicMetadataEnable, - bool DynamicMetadataVMEnabled, - int DynamicMetadataLinesBeforeActiveRequired, - unsigned int DynamicMetadataTransmittedBytes, double UrgentLatency, double UrgentExtraLatency, double TCalc, @@ -3425,6 +3411,7 @@ bool dml32_CalculatePrefetchSchedule( double *VUpdateWidthPix, double *VReadyOffsetPix) { + double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; bool MyError = false; unsigned int DPPCycles, DISPCLKCycles; double DSTTotalPixelsAfterScaler; @@ -3461,27 +3448,27 @@ bool dml32_CalculatePrefetchSchedule( double Tsw_est1 = 0; double Tsw_est3 = 0; - if (GPUVMEnable == true && HostVMEnable == true) - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + if (v->GPUVMEnable == true && v->HostVMEnable == true) + HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; else HostVMDynamicLevelsTrips = 0; #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); - dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels); + dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable); + dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels); dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable); - dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n", - __func__, HostVMEnable, HostVMInefficiencyFactor); + dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n", + __func__, v->HostVMEnable, HostVMInefficiencyFactor); #endif dml32_CalculateVUpdateAndDynamicMetadataParameters( - MaxInterDCNTileRepeaters, + v->MaxInterDCNTileRepeaters, myPipe->Dppclk, myPipe->Dispclk, myPipe->DCFClkDeepSleep, myPipe->PixelClock, myPipe->HTotal, myPipe->VBlank, - DynamicMetadataTransmittedBytes, - DynamicMetadataLinesBeforeActiveRequired, + v->DynamicMetadataTransmittedBytes[k], + v->DynamicMetadataLinesBeforeActiveRequired[k], myPipe->InterlaceEnable, myPipe->ProgressiveToInterlaceUnitInOPP, TSetup, @@ -3496,19 +3483,19 @@ bool dml32_CalculatePrefetchSchedule( LineTime = myPipe->HTotal / myPipe->PixelClock; trip_to_mem = UrgentLatency; - Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); + Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); - if (DynamicMetadataVMEnabled == true) + if (v->DynamicMetadataVMEnabled == true) *Tdmdl = TWait + Tvm_trips + trip_to_mem; else *Tdmdl = TWait + UrgentExtraLatency; #ifdef __DML_VBA_ALLOW_DELTA__ - if (DynamicMetadataEnable == false) + if (v->DynamicMetadataEnable[k] == false) *Tdmdl = 0.0; #endif - if (DynamicMetadataEnable == true) { + if (v->DynamicMetadataEnable[k] == true) { if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { *NotEnoughTimeForDynamicMetadata = true; #ifdef __DML_VBA_DEBUG__ @@ -3528,17 +3515,17 @@ bool dml32_CalculatePrefetchSchedule( *NotEnoughTimeForDynamicMetadata = false; } - *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && - GPUVMEnable == true ? TWait + Tvm_trips : 0); + *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && + v->GPUVMEnable == true ? TWait + Tvm_trips : 0); if (myPipe->ScalerEnabled) - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; else - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; - DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; + DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; - DISPCLKCycles = DISPCLKDelaySubtotal; + DISPCLKCycles = v->DISPCLKDelaySubtotal; if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) return true; @@ -3564,7 +3551,7 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); #endif - if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) + if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) *DSTYAfterScaler = 1; else *DSTYAfterScaler = 0; @@ -3581,13 +3568,13 @@ bool dml32_CalculatePrefetchSchedule( Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); - if (GPUVMEnable == true) { + if (v->GPUVMEnable == true) { Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; - if (GPUVMPageTableLevels >= 3) { + if (v->GPUVMMaxPageTableLevels >= 3) { *Tno_bw = UrgentExtraLatency + trip_to_mem * - (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); - } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) { + (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); + } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) { Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / 4.0 * LineTime; // VBA_ERROR *Tno_bw = UrgentExtraLatency; @@ -3622,7 +3609,7 @@ bool dml32_CalculatePrefetchSchedule( min_Lsw = dml_max(min_Lsw, 1.0); Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; - if (GPUVMEnable == true) { + if (v->GPUVMEnable == true) { Tvm_oto = dml_max3( Tvm_trips, *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, @@ -3630,7 +3617,7 @@ bool dml32_CalculatePrefetchSchedule( } else Tvm_oto = LineTime / 4.0; - if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { + if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { Tr0_oto = dml_max4( Tr0_trips, (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, @@ -3833,7 +3820,7 @@ bool dml32_CalculatePrefetchSchedule( #endif if (prefetch_bw_equ > 0) { - if (GPUVMEnable == true) { + if (v->GPUVMEnable == true) { Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); @@ -3841,7 +3828,7 @@ bool dml32_CalculatePrefetchSchedule( Tvm_equ = LineTime / 4; } - if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { + if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, (LineTime - Tvm_equ) / 2, LineTime / 4); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 1b1085454f4d..924e361ad243 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -713,28 +713,14 @@ double dml32_CalculateExtraLatency( unsigned int HostVMMaxNonCachedPageTableLevels); bool dml32_CalculatePrefetchSchedule( + struct vba_vars_st *v, + unsigned int k, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, - double DPPCLKDelaySubtotalPlusCNVCFormater, - double DPPCLKDelaySCL, - double DPPCLKDelaySCLLBOnly, - double DPPCLKDelayCNVCCursor, - double DISPCLKDelaySubtotal, unsigned int DPP_RECOUT_WIDTH, - enum output_format_class OutputFormat, - unsigned int MaxInterDCNTileRepeaters, unsigned int VStartup, unsigned int MaxVStartup, - unsigned int GPUVMPageTableLevels, - bool GPUVMEnable, - bool HostVMEnable, - unsigned int HostVMMaxNonCachedPageTableLevels, - double HostVMMinPageSize, - bool DynamicMetadataEnable, - bool DynamicMetadataVMEnabled, - int DynamicMetadataLinesBeforeActiveRequired, - unsigned int DynamicMetadataTransmittedBytes, double UrgentLatency, double UrgentExtraLatency, double TCalc, From 37934d4118e22bceb80141804391975078f31734 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Aug 2022 13:34:07 -0700 Subject: [PATCH 24/29] drm/amd/display: Reduce number of arguments of dml31's CalculateWatermarksAndDRAMSpeedChangeSupport() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most of the arguments are identical between the two call sites and they can be accessed through the 'struct vba_vars_st' pointer. This reduces the total amount of stack space that dml31_ModeSupportAndSystemConfigurationFull() uses by 240 bytes with LLVM 16 (2216 -> 1976), helping clear up the following clang warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:3908:6: error: stack frame size (2216) exceeds limit (2048) in 'dml31_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) ^ 1 error generated. Link: https://github.com/ClangBuiltLinux/linux/issues/1681 Reported-by: "Sudip Mukherjee (Codethink)" Tested-by: Maíra Canal Reviewed-by: Rodrigo Siqueira Signed-off-by: Nathan Chancellor Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml/dcn31/display_mode_vba_31.c | 248 ++++-------------- 1 file changed, 52 insertions(+), 196 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index d63b4209b14c..6790a18eebc8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -311,64 +311,28 @@ static void CalculateVupdateAndDynamicMetadataParameters( static void CalculateWatermarksAndDRAMSpeedChangeSupport( struct display_mode_lib *mode_lib, unsigned int PrefetchMode, - unsigned int NumberOfActivePlanes, - unsigned int MaxLineBufferLines, - unsigned int LineBufferSize, - unsigned int WritebackInterfaceBufferSize, double DCFCLK, double ReturnBW, - bool SynchronizedVBlank, - unsigned int dpte_group_bytes[], - unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, - double WritebackLatency, - double WritebackChunkSize, double SOCCLK, - double DRAMClockChangeLatency, - double SRExitTime, - double SREnterPlusExitTime, - double SRExitZ8Time, - double SREnterPlusExitZ8Time, double DCFCLKDeepSleep, unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], - unsigned int LBBitPerPixel[], double SwathWidthY[], double SwathWidthC[], - double HRatio[], - double HRatioChroma[], - unsigned int vtaps[], - unsigned int VTAPsChroma[], - double VRatio[], - double VRatioChroma[], - unsigned int HTotal[], - double PixelClock[], - unsigned int BlendingAndTiming[], unsigned int DPPPerPlane[], double BytePerPixelDETY[], double BytePerPixelDETC[], - double DSTXAfterScaler[], - double DSTYAfterScaler[], - bool WritebackEnable[], - enum source_format_class WritebackPixelFormat[], - double WritebackDestinationWidth[], - double WritebackDestinationHeight[], - double WritebackSourceHeight[], bool UnboundedRequestEnabled, int unsigned CompressedBufferSizeInkByte, enum clock_change_support *DRAMClockChangeSupport, - double *UrgentWatermark, - double *WritebackUrgentWatermark, - double *DRAMClockChangeWatermark, - double *WritebackDRAMClockChangeWatermark, double *StutterExitWatermark, double *StutterEnterPlusExitWatermark, double *Z8StutterExitWatermark, - double *Z8StutterEnterPlusExitWatermark, - double *MinActiveDRAMClockChangeLatencySupported); + double *Z8StutterEnterPlusExitWatermark); static void CalculateDCFCLKDeepSleep( struct display_mode_lib *mode_lib, @@ -3017,64 +2981,28 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman CalculateWatermarksAndDRAMSpeedChangeSupport( mode_lib, PrefetchMode, - v->NumberOfActivePlanes, - v->MaxLineBufferLines, - v->LineBufferSize, - v->WritebackInterfaceBufferSize, v->DCFCLK, v->ReturnBW, - v->SynchronizedVBlank, - v->dpte_group_bytes, - v->MetaChunkSize, v->UrgentLatency, v->UrgentExtraLatency, - v->WritebackLatency, - v->WritebackChunkSize, v->SOCCLK, - v->DRAMClockChangeLatency, - v->SRExitTime, - v->SREnterPlusExitTime, - v->SRExitZ8Time, - v->SREnterPlusExitZ8Time, v->DCFCLKDeepSleep, v->DETBufferSizeY, v->DETBufferSizeC, v->SwathHeightY, v->SwathHeightC, - v->LBBitPerPixel, v->SwathWidthY, v->SwathWidthC, - v->HRatio, - v->HRatioChroma, - v->vtaps, - v->VTAPsChroma, - v->VRatio, - v->VRatioChroma, - v->HTotal, - v->PixelClock, - v->BlendingAndTiming, v->DPPPerPlane, v->BytePerPixelDETY, v->BytePerPixelDETC, - v->DSTXAfterScaler, - v->DSTYAfterScaler, - v->WritebackEnable, - v->WritebackPixelFormat, - v->WritebackDestinationWidth, - v->WritebackDestinationHeight, - v->WritebackSourceHeight, v->UnboundedRequestEnabled, v->CompressedBufferSizeInkByte, &DRAMClockChangeSupport, - &v->UrgentWatermark, - &v->WritebackUrgentWatermark, - &v->DRAMClockChangeWatermark, - &v->WritebackDRAMClockChangeWatermark, &v->StutterExitWatermark, &v->StutterEnterPlusExitWatermark, &v->Z8StutterExitWatermark, - &v->Z8StutterEnterPlusExitWatermark, - &v->MinActiveDRAMClockChangeLatencySupported); + &v->Z8StutterEnterPlusExitWatermark); for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->WritebackEnable[k] == true) { @@ -5384,64 +5312,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l CalculateWatermarksAndDRAMSpeedChangeSupport( mode_lib, v->PrefetchModePerState[i][j], - v->NumberOfActivePlanes, - v->MaxLineBufferLines, - v->LineBufferSize, - v->WritebackInterfaceBufferSize, v->DCFCLKState[i][j], v->ReturnBWPerState[i][j], - v->SynchronizedVBlank, - v->dpte_group_bytes, - v->MetaChunkSize, v->UrgLatency[i], v->ExtraLatency, - v->WritebackLatency, - v->WritebackChunkSize, v->SOCCLKPerState[i], - v->DRAMClockChangeLatency, - v->SRExitTime, - v->SREnterPlusExitTime, - v->SRExitZ8Time, - v->SREnterPlusExitZ8Time, v->ProjectedDCFCLKDeepSleep[i][j], v->DETBufferSizeYThisState, v->DETBufferSizeCThisState, v->SwathHeightYThisState, v->SwathHeightCThisState, - v->LBBitPerPixel, v->SwathWidthYThisState, v->SwathWidthCThisState, - v->HRatio, - v->HRatioChroma, - v->vtaps, - v->VTAPsChroma, - v->VRatio, - v->VRatioChroma, - v->HTotal, - v->PixelClock, - v->BlendingAndTiming, v->NoOfDPPThisState, v->BytePerPixelInDETY, v->BytePerPixelInDETC, - v->DSTXAfterScaler, - v->DSTYAfterScaler, - v->WritebackEnable, - v->WritebackPixelFormat, - v->WritebackDestinationWidth, - v->WritebackDestinationHeight, - v->WritebackSourceHeight, UnboundedRequestEnabledThisState, CompressedBufferSizeInkByteThisState, &v->DRAMClockChangeSupport[i][j], - &v->UrgentWatermark, - &v->WritebackUrgentWatermark, - &v->DRAMClockChangeWatermark, - &v->WritebackDRAMClockChangeWatermark, &dummy, &dummy, &dummy, - &dummy, - &v->MinActiveDRAMClockChangeLatencySupported); + &dummy); } } @@ -5566,64 +5458,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l static void CalculateWatermarksAndDRAMSpeedChangeSupport( struct display_mode_lib *mode_lib, unsigned int PrefetchMode, - unsigned int NumberOfActivePlanes, - unsigned int MaxLineBufferLines, - unsigned int LineBufferSize, - unsigned int WritebackInterfaceBufferSize, double DCFCLK, double ReturnBW, - bool SynchronizedVBlank, - unsigned int dpte_group_bytes[], - unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, - double WritebackLatency, - double WritebackChunkSize, double SOCCLK, - double DRAMClockChangeLatency, - double SRExitTime, - double SREnterPlusExitTime, - double SRExitZ8Time, - double SREnterPlusExitZ8Time, double DCFCLKDeepSleep, unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], - unsigned int LBBitPerPixel[], double SwathWidthY[], double SwathWidthC[], - double HRatio[], - double HRatioChroma[], - unsigned int vtaps[], - unsigned int VTAPsChroma[], - double VRatio[], - double VRatioChroma[], - unsigned int HTotal[], - double PixelClock[], - unsigned int BlendingAndTiming[], unsigned int DPPPerPlane[], double BytePerPixelDETY[], double BytePerPixelDETC[], - double DSTXAfterScaler[], - double DSTYAfterScaler[], - bool WritebackEnable[], - enum source_format_class WritebackPixelFormat[], - double WritebackDestinationWidth[], - double WritebackDestinationHeight[], - double WritebackSourceHeight[], bool UnboundedRequestEnabled, int unsigned CompressedBufferSizeInkByte, enum clock_change_support *DRAMClockChangeSupport, - double *UrgentWatermark, - double *WritebackUrgentWatermark, - double *DRAMClockChangeWatermark, - double *WritebackDRAMClockChangeWatermark, double *StutterExitWatermark, double *StutterEnterPlusExitWatermark, double *Z8StutterExitWatermark, - double *Z8StutterEnterPlusExitWatermark, - double *MinActiveDRAMClockChangeLatencySupported) + double *Z8StutterEnterPlusExitWatermark) { struct vba_vars_st *v = &mode_lib->vba; double EffectiveLBLatencyHidingY; @@ -5643,103 +5499,103 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double TotalPixelBW = 0.0; int k, j; - *UrgentWatermark = UrgentLatency + ExtraLatency; + v->UrgentWatermark = UrgentLatency + ExtraLatency; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); - dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark); + dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark); #endif - *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark; + v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark; #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency); - dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark); + dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency); + dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark); #endif v->TotalActiveWriteback = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (WritebackEnable[k] == true) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->WritebackEnable[k] == true) { v->TotalActiveWriteback = v->TotalActiveWriteback + 1; } } if (v->TotalActiveWriteback <= 1) { - *WritebackUrgentWatermark = WritebackLatency; + v->WritebackUrgentWatermark = v->WritebackLatency; } else { - *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; } if (v->TotalActiveWriteback <= 1) { - *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency; + v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency; } else { - *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; } - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { TotalPixelBW = TotalPixelBW - + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) - / (HTotal[k] / PixelClock[k]); + + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) + / (v->HTotal[k] / v->PixelClock[k]); } - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { double EffectiveDETBufferSizeY = DETBufferSizeY[k]; v->LBLatencyHidingSourceLinesY = dml_min( - (double) MaxLineBufferLines, - dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1); + (double) v->MaxLineBufferLines, + dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); v->LBLatencyHidingSourceLinesC = dml_min( - (double) MaxLineBufferLines, - dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1); + (double) v->MaxLineBufferLines, + dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); - EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]); + EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); - EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); + EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); if (UnboundedRequestEnabled) { EffectiveDETBufferSizeY = EffectiveDETBufferSizeY - + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW; + + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; } LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); - FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; + FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; if (BytePerPixelDETC[k] > 0) { LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); - FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; + FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; } else { LinesInDETC = 0; FullDETBufferingTimeC = 999999; } ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY - - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark; + - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; - if (NumberOfActivePlanes > 1) { + if (v->NumberOfActivePlanes > 1) { ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; + - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; } if (BytePerPixelDETC[k] > 0) { ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark; + - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; - if (NumberOfActivePlanes > 1) { + if (v->NumberOfActivePlanes > 1) { ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; + - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; } v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); } else { v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; } - if (WritebackEnable[k] == true) { - WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 - / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); - if (WritebackPixelFormat[k] == dm_444_64) { + if (v->WritebackEnable[k] == true) { + WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 + / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); + if (v->WritebackPixelFormat[k] == dm_444_64) { WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; } WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; @@ -5749,14 +5605,14 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( v->MinActiveDRAMClockChangeMargin = 999999; PlaneWithMinActiveDRAMClockChangeMargin = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; - if (BlendingAndTiming[k] == k) { + if (v->BlendingAndTiming[k] == k) { PlaneWithMinActiveDRAMClockChangeMargin = k; } else { - for (j = 0; j < NumberOfActivePlanes; ++j) { - if (BlendingAndTiming[k] == j) { + for (j = 0; j < v->NumberOfActivePlanes; ++j) { + if (v->BlendingAndTiming[k] == j) { PlaneWithMinActiveDRAMClockChangeMargin = j; } } @@ -5764,11 +5620,11 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( } } - *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency; + v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ; SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; } @@ -5776,25 +5632,25 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( v->TotalNumberOfActiveOTG = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (BlendingAndTiming[k] == k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->BlendingAndTiming[k] == k) { v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; } } if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { *DRAMClockChangeSupport = dm_dram_clock_change_vactive; - } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 + } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { *DRAMClockChangeSupport = dm_dram_clock_change_vblank; } else { *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; } - *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; - *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); - *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; - *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; + *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; + *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); + *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; + *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); From 21485d3da659b66c37d99071623af83ee1c6733d Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Aug 2022 13:34:08 -0700 Subject: [PATCH 25/29] drm/amd/display: Reduce number of arguments of dml31's CalculateFlipSchedule() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most of the arguments are identical between the two call sites and they can be accessed through the 'struct vba_vars_st' pointer. This reduces the total amount of stack space that dml31_ModeSupportAndSystemConfigurationFull() uses by 112 bytes with LLVM 16 (1976 -> 1864), helping clear up the following clang warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:3908:6: error: stack frame size (2216) exceeds limit (2048) in 'dml31_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) ^ 1 error generated. Link: https://github.com/ClangBuiltLinux/linux/issues/1681 Reported-by: "Sudip Mukherjee (Codethink)" Tested-by: Maíra Canal Reviewed-by: Rodrigo Siqueira Signed-off-by: Nathan Chancellor Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml/dcn31/display_mode_vba_31.c | 172 +++++------------- 1 file changed, 47 insertions(+), 125 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 6790a18eebc8..8ca66f1644dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -251,33 +251,13 @@ static void CalculateRowBandwidth( static void CalculateFlipSchedule( struct display_mode_lib *mode_lib, + unsigned int k, double HostVMInefficiencyFactor, double UrgentExtraLatency, double UrgentLatency, - unsigned int GPUVMMaxPageTableLevels, - bool HostVMEnable, - unsigned int HostVMMaxNonCachedPageTableLevels, - bool GPUVMEnable, - double HostVMMinPageSize, double PDEAndMetaPTEBytesPerFrame, double MetaRowBytes, - double DPTEBytesPerRow, - double BandwidthAvailableForImmediateFlip, - unsigned int TotImmediateFlipBytes, - enum source_format_class SourcePixelFormat, - double LineTime, - double VRatio, - double VRatioChroma, - double Tno_bw, - bool DCCEnable, - unsigned int dpte_row_height, - unsigned int meta_row_height, - unsigned int dpte_row_height_chroma, - unsigned int meta_row_height_chroma, - double *DestinationLinesToRequestVMInImmediateFlip, - double *DestinationLinesToRequestRowInImmediateFlip, - double *final_flip_bw, - bool *ImmediateFlipSupportedForPipe); + double DPTEBytesPerRow); static double CalculateWriteBackDelay( enum source_format_class WritebackPixelFormat, double WritebackHRatio, @@ -2868,33 +2848,13 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman for (k = 0; k < v->NumberOfActivePlanes; ++k) { CalculateFlipSchedule( mode_lib, + k, HostVMInefficiencyFactor, v->UrgentExtraLatency, v->UrgentLatency, - v->GPUVMMaxPageTableLevels, - v->HostVMEnable, - v->HostVMMaxNonCachedPageTableLevels, - v->GPUVMEnable, - v->HostVMMinPageSize, v->PDEAndMetaPTEBytesFrame[k], v->MetaRowByte[k], - v->PixelPTEBytesPerRow[k], - v->BandwidthAvailableForImmediateFlip, - v->TotImmediateFlipBytes, - v->SourcePixelFormat[k], - v->HTotal[k] / v->PixelClock[k], - v->VRatio[k], - v->VRatioChroma[k], - v->Tno_bw[k], - v->DCCEnable[k], - v->dpte_row_height[k], - v->meta_row_height[k], - v->dpte_row_height_chroma[k], - v->meta_row_height_chroma[k], - &v->DestinationLinesToRequestVMInImmediateFlip[k], - &v->DestinationLinesToRequestRowInImmediateFlip[k], - &v->final_flip_bw[k], - &v->ImmediateFlipSupportedForPipe[k]); + v->PixelPTEBytesPerRow[k]); } v->total_dcn_read_bw_with_flip = 0.0; @@ -3526,61 +3486,43 @@ static void CalculateRowBandwidth( static void CalculateFlipSchedule( struct display_mode_lib *mode_lib, + unsigned int k, double HostVMInefficiencyFactor, double UrgentExtraLatency, double UrgentLatency, - unsigned int GPUVMMaxPageTableLevels, - bool HostVMEnable, - unsigned int HostVMMaxNonCachedPageTableLevels, - bool GPUVMEnable, - double HostVMMinPageSize, double PDEAndMetaPTEBytesPerFrame, double MetaRowBytes, - double DPTEBytesPerRow, - double BandwidthAvailableForImmediateFlip, - unsigned int TotImmediateFlipBytes, - enum source_format_class SourcePixelFormat, - double LineTime, - double VRatio, - double VRatioChroma, - double Tno_bw, - bool DCCEnable, - unsigned int dpte_row_height, - unsigned int meta_row_height, - unsigned int dpte_row_height_chroma, - unsigned int meta_row_height_chroma, - double *DestinationLinesToRequestVMInImmediateFlip, - double *DestinationLinesToRequestRowInImmediateFlip, - double *final_flip_bw, - bool *ImmediateFlipSupportedForPipe) + double DPTEBytesPerRow) { + struct vba_vars_st *v = &mode_lib->vba; double min_row_time = 0.0; unsigned int HostVMDynamicLevelsTrips; double TimeForFetchingMetaPTEImmediateFlip; double TimeForFetchingRowInVBlankImmediateFlip; double ImmediateFlipBW; + double LineTime = v->HTotal[k] / v->PixelClock[k]; - if (GPUVMEnable == true && HostVMEnable == true) { - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + if (v->GPUVMEnable == true && v->HostVMEnable == true) { + HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; } else { HostVMDynamicLevelsTrips = 0; } - if (GPUVMEnable == true || DCCEnable == true) { - ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; + if (v->GPUVMEnable == true || v->DCCEnable[k] == true) { + ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes; } - if (GPUVMEnable == true) { + if (v->GPUVMEnable == true) { TimeForFetchingMetaPTEImmediateFlip = dml_max3( - Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, - UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), + v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, + UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0); } else { TimeForFetchingMetaPTEImmediateFlip = 0; } - *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; - if ((GPUVMEnable == true || DCCEnable == true)) { + v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; + if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { TimeForFetchingRowInVBlankImmediateFlip = dml_max3( (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevelsTrips + 1), @@ -3589,54 +3531,54 @@ static void CalculateFlipSchedule( TimeForFetchingRowInVBlankImmediateFlip = 0; } - *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; + v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; - if (GPUVMEnable == true) { - *final_flip_bw = dml_max( - PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), - (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); - } else if ((GPUVMEnable == true || DCCEnable == true)) { - *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime); + if (v->GPUVMEnable == true) { + v->final_flip_bw[k] = dml_max( + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime), + (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime)); + } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { + v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime); } else { - *final_flip_bw = 0; + v->final_flip_bw[k] = 0; } - if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { - if (GPUVMEnable == true && DCCEnable != true) { - min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); - } else if (GPUVMEnable != true && DCCEnable == true) { - min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); + if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { + if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { + min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); + } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { + min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); } else { min_row_time = dml_min4( - dpte_row_height * LineTime / VRatio, - meta_row_height * LineTime / VRatio, - dpte_row_height_chroma * LineTime / VRatioChroma, - meta_row_height_chroma * LineTime / VRatioChroma); + v->dpte_row_height[k] * LineTime / v->VRatio[k], + v->meta_row_height[k] * LineTime / v->VRatio[k], + v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k], + v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); } } else { - if (GPUVMEnable == true && DCCEnable != true) { - min_row_time = dpte_row_height * LineTime / VRatio; - } else if (GPUVMEnable != true && DCCEnable == true) { - min_row_time = meta_row_height * LineTime / VRatio; + if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { + min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k]; + } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { + min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k]; } else { - min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); + min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]); } } - if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 + if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { - *ImmediateFlipSupportedForPipe = false; + v->ImmediateFlipSupportedForPipe[k] = false; } else { - *ImmediateFlipSupportedForPipe = true; + v->ImmediateFlipSupportedForPipe[k] = true; } #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip); - dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip); + dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]); + dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]); dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); - dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); + dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]); #endif } @@ -5228,33 +5170,13 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (k = 0; k < v->NumberOfActivePlanes; k++) { CalculateFlipSchedule( mode_lib, + k, HostVMInefficiencyFactor, v->ExtraLatency, v->UrgLatency[i], - v->GPUVMMaxPageTableLevels, - v->HostVMEnable, - v->HostVMMaxNonCachedPageTableLevels, - v->GPUVMEnable, - v->HostVMMinPageSize, v->PDEAndMetaPTEBytesPerFrame[i][j][k], v->MetaRowBytes[i][j][k], - v->DPTEBytesPerRow[i][j][k], - v->BandwidthAvailableForImmediateFlip, - v->TotImmediateFlipBytes, - v->SourcePixelFormat[k], - v->HTotal[k] / v->PixelClock[k], - v->VRatio[k], - v->VRatioChroma[k], - v->Tno_bw[k], - v->DCCEnable[k], - v->dpte_row_height[k], - v->meta_row_height[k], - v->dpte_row_height_chroma[k], - v->meta_row_height_chroma[k], - &v->DestinationLinesToRequestVMInImmediateFlip[k], - &v->DestinationLinesToRequestRowInImmediateFlip[k], - &v->final_flip_bw[k], - &v->ImmediateFlipSupportedForPipe[k]); + v->DPTEBytesPerRow[i][j][k]); } v->total_dcn_read_bw_with_flip = 0.0; for (k = 0; k < v->NumberOfActivePlanes; k++) { From 41012d715d5d7b9751ae84b8fb255e404ac9c5d0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Aug 2022 13:34:09 -0700 Subject: [PATCH 26/29] drm/amd/display: Mark dml30's UseMinimumDCFCLK() as noinline for stack usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function consumes a lot of stack space and it blows up the size of dml30_ModeSupportAndSystemConfigurationFull() with clang: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3542:6: error: stack frame size (2200) exceeds limit (2048) in 'dml30_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) ^ 1 error generated. Commit a0f7e7f759cf ("drm/amd/display: fix i386 frame size warning") aimed to address this for i386 but it did not help x86_64. To reduce the amount of stack space that dml30_ModeSupportAndSystemConfigurationFull() uses, mark UseMinimumDCFCLK() as noinline, using the _for_stack variant for documentation. While this will increase the total amount of stack usage between the two functions (1632 and 1304 bytes respectively), it will make sure both stay below the limit of 2048 bytes for these files. The aforementioned change does help reduce UseMinimumDCFCLK()'s stack usage so it should not be reverted in favor of this change. Link: https://github.com/ClangBuiltLinux/linux/issues/1681 Reported-by: "Sudip Mukherjee (Codethink)" Tested-by: Maíra Canal Reviewed-by: Rodrigo Siqueira Signed-off-by: Nathan Chancellor Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 876b321b30ca..1cb858dd6ea0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -6610,8 +6610,7 @@ static double CalculateUrgentLatency( return ret; } - -static void UseMinimumDCFCLK( +static noinline_for_stack void UseMinimumDCFCLK( struct display_mode_lib *mode_lib, int MaxInterDCNTileRepeaters, int MaxPrefetchMode, From dc1d85cb790f2091eea074cee24a704b2d6c4a06 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 9 Sep 2022 11:47:20 -0400 Subject: [PATCH 27/29] drm/amdgpu: move nbio ih_doorbell_range() into ih code for vega MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This mirrors what we do for other asics and this way we are sure the ih doorbell range is properly initialized. There is a comment about the way doorbells on gfx9 work that requires that they are initialized for other IPs before GFX is initialized. In this case IH is initialized before GFX, so there should be no issue. This is a prerequisite for fixing the Unsupported Request error reported through AER during driver load. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216373 The error was unnoticed before and got visible because of the commit referenced below. This doesn't fix anything in the commit below, rather fixes the issue in amdgpu exposed by the commit. The reference is only to associate this commit with below one so that both go together. Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()") Acked-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 --- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 4 ++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index fde6154f2009..7324e304288e 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1224,9 +1224,6 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev) ring->use_doorbell, ring->doorbell_index, adev->doorbell_index.sdma_doorbell_range); } - - adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, - adev->irq.ih.doorbell_index); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 03b7066471f9..1e83db0c5438 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -289,6 +289,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) } } + if (!amdgpu_sriov_vf(adev)) + adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->irq.ih.doorbell_index); + pci_set_master(adev->pdev); /* enable interrupts */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 2022ffbb8dba..59dfca093155 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -340,6 +340,10 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) } } + if (!amdgpu_sriov_vf(adev)) + adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->irq.ih.doorbell_index); + pci_set_master(adev->pdev); /* enable interrupts */ From e3163bc8ffdfdb405e10530b140135b2ee487f89 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 9 Sep 2022 11:53:27 -0400 Subject: [PATCH 28/29] drm/amdgpu: move nbio sdma_doorbell_range() into sdma code for vega MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This mirrors what we do for other asics and this way we are sure the sdma doorbell range is properly initialized. There is a comment about the way doorbells on gfx9 work that requires that they are initialized for other IPs before GFX is initialized. However, the statement says that it applies to multimedia as well, but the VCN code currently initializes doorbells after GFX and there are no known issues there. In my testing at least I don't see any problems on SDMA. This is a prerequisite for fixing the Unsupported Request error reported through AER during driver load. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216373 The error was unnoticed before and got visible because of the commit referenced below. This doesn't fix anything in the commit below, rather fixes the issue in amdgpu exposed by the commit. The reference is only to associate this commit with below one so that both go together. Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()") Acked-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/soc15.c | 22 ---------------------- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 65181efba50e..56424f75dd2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1504,6 +1504,11 @@ static int sdma_v4_0_start(struct amdgpu_device *adev) WREG32_SDMA(i, mmSDMA0_CNTL, temp); if (!amdgpu_sriov_vf(adev)) { + ring = &adev->sdma.instance[i].ring; + adev->nbio.funcs->sdma_doorbell_range(adev, i, + ring->use_doorbell, ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); + /* unhalt engine */ temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL); temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 7324e304288e..183024d7c184 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1211,22 +1211,6 @@ static int soc15_common_sw_fini(void *handle) return 0; } -static void soc15_doorbell_range_init(struct amdgpu_device *adev) -{ - int i; - struct amdgpu_ring *ring; - - /* sdma/ih doorbell range are programed by hypervisor */ - if (!amdgpu_sriov_vf(adev)) { - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; - adev->nbio.funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); - } - } -} - static int soc15_common_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1246,12 +1230,6 @@ static int soc15_common_hw_init(void *handle) /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); - /* HW doorbell routing policy: doorbell writing not - * in SDMA/IH/MM/ACV range will be routed to CP. So - * we need to init SDMA/IH/MM/ACV doorbell range prior - * to CP ip block init and ring test. - */ - soc15_doorbell_range_init(adev); return 0; } From a8671493d2074950553da3cf07d1be43185ef6c6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 30 Aug 2022 10:59:49 -0400 Subject: [PATCH 29/29] drm/amdgpu: make sure to init common IP before gmc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move common IP init before GMC init so that HDP gets remapped before GMC init which uses it. This fixes the Unsupported Request error reported through AER during driver load. The error happens as a write happens to the remap offset before real remapping is done. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216373 The error was unnoticed before and got visible because of the commit referenced below. This doesn't fix anything in the commit below, rather fixes the issue in amdgpu exposed by the commit. The reference is only to associate this commit with below one so that both go together. Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()") Acked-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1400abee9f40..be7aff2d4a57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2365,8 +2365,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } adev->ip_blocks[i].status.sw = true; - /* need to do gmc hw init early so we can allocate gpu mem */ - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { + /* need to do common hw init early so everything is set up for gmc */ + r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); + if (r) { + DRM_ERROR("hw_init %d failed %d\n", i, r); + goto init_failed; + } + adev->ip_blocks[i].status.hw = true; + } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { + /* need to do gmc hw init early so we can allocate gpu mem */ /* Try to reserve bad pages early */ if (amdgpu_sriov_vf(adev)) amdgpu_virt_exchange_data(adev); @@ -3052,8 +3060,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) int i, r; static enum amd_ip_block_type ip_order[] = { - AMD_IP_BLOCK_TYPE_GMC, AMD_IP_BLOCK_TYPE_COMMON, + AMD_IP_BLOCK_TYPE_GMC, AMD_IP_BLOCK_TYPE_PSP, AMD_IP_BLOCK_TYPE_IH, };