From 1fc636c9b3b251d065dbf04eb2293fb6de9fdcf9 Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Mon, 13 Jan 2020 08:15:19 -0500 Subject: [PATCH 001/113] drm/amdgpu/display: Use u64 divide macro for round up division [why] Fix compilation warnings on i386 architecture: undefined reference to `__udivdi3' [how] Switch DIV_ROUND_UP to DIV64_U64_ROUND_UP Reported-by: Randy Dunlap Reviewed-by: Harry Wentland Signed-off-by: Mikita Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 52fb207393ef..96b391e4b3e7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -534,7 +534,7 @@ static int kbps_to_peak_pbn(int kbps) peak_kbps *= 1006; peak_kbps = div_u64(peak_kbps, 1000); - return (int) DIV_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); + return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *params, From a434b94c5a6c56ea7078d43d932284005e08ed62 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 13 Dec 2019 11:31:48 -0500 Subject: [PATCH 002/113] drm/amdkfd: Improve function get_sdma_rlc_reg_offset() (v2) The SOC15_REG_OFFSET() macro needs to dereference adev->reg_offset[IP] pointer, which is sometimes NULL when there are fewer than 8 sdma engines. Avoid that by not initializing the array regardless. v2: squash in warning fixes Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 66 +++++++++++++------ 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 3c119407dc34..86e71fd6ddea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -71,32 +71,56 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t sdma_engine_reg_base[8] = { - SOC15_REG_OFFSET(SDMA0, 0, - mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA1, 0, - mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA2, 0, - mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA3, 0, - mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA4, 0, - mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA5, 0, - mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA6, 0, - mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA7, 0, - mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL - }; + uint32_t sdma_engine_reg_base = 0; + uint32_t sdma_rlc_reg_offset; - uint32_t retval = sdma_engine_reg_base[engine_id] + switch (engine_id) { + default: + dev_warn(adev->dev, + "Invalid sdma engine id (%d), using engine id 0\n", + engine_id); + /* fall through */ + case 0: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; + break; + case 1: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL; + break; + case 2: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0, + mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL; + break; + case 3: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0, + mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL; + break; + case 4: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0, + mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL; + break; + case 5: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0, + mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL; + break; + case 6: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0, + mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL; + break; + case 7: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0, + mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL; + break; + } + + sdma_rlc_reg_offset = sdma_engine_reg_base + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, - queue_id, retval); + queue_id, sdma_rlc_reg_offset); - return retval; + return sdma_rlc_reg_offset; } static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, From 9530273ec90cc0614f6ac56d0c024e2f39886419 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 7 Jan 2020 16:57:39 +0800 Subject: [PATCH 003/113] drm/amd/powerplay: cover the powerplay implementation details V3 This can save users much troubles. As they do not actually need to care whether swSMU or traditional powerplay routine should be used. V2: apply the fixes to vi.c and cik.c also V3: squash in oops fix Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 12 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 53 +------ drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c | 160 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h | 24 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 - drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 8 +- drivers/gpu/drm/amd/amdgpu/cik.c | 12 +- drivers/gpu/drm/amd/amdgpu/cik.h | 2 - drivers/gpu/drm/amd/amdgpu/nv.c | 8 +- drivers/gpu/drm/amd/amdgpu/soc15.c | 85 ++--------- drivers/gpu/drm/amd/amdgpu/vi.c | 48 +------ drivers/gpu/drm/amd/amdgpu/vi.h | 2 - 12 files changed, 212 insertions(+), 208 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d3da9dde4ee1..88e10b956413 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -613,15 +613,9 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (is_support_sw_smu(adev)) - smu_switch_power_profile(&adev->smu, - PP_SMC_POWER_PROFILE_COMPUTE, - !idle); - else if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->switch_power_profile) - amdgpu_dpm_switch_power_profile(adev, - PP_SMC_POWER_PROFILE_COMPUTE, - !idle); + amdgpu_dpm_switch_power_profile(adev, + PP_SMC_POWER_PROFILE_COMPUTE, + !idle); } bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9b4c18b3546f..1bbea9669204 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2345,14 +2345,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; /* handle putting the SMC in the appropriate state */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { - if (is_support_sw_smu(adev)) { - r = smu_set_mp1_state(&adev->smu, adev->mp1_state); - } else if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_mp1_state) { - r = adev->powerplay.pp_funcs->set_mp1_state( - adev->powerplay.pp_handle, - adev->mp1_state); - } + r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); if (r) { DRM_ERROR("SMC failed to set mp1 state %d, %d\n", adev->mp1_state, r); @@ -4359,55 +4352,21 @@ int amdgpu_device_baco_enter(struct drm_device *dev) if (ras && ras->supported) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - int ret; - - ret = smu_baco_enter(smu); - if (ret) - return ret; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - } - - return 0; + return amdgpu_dpm_baco_enter(adev); } int amdgpu_device_baco_exit(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; if (!amdgpu_device_supports_baco(adev->ddev)) return -ENOTSUPP; - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - int ret; - - ret = smu_baco_exit(smu); - if (ret) - return ret; - - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - } + ret = amdgpu_dpm_baco_exit(adev); + if (ret) + return ret; if (ras && ras->supported) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index cd76fbf4385d..6c7dca1da992 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -983,3 +983,163 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block return ret; } + +int amdgpu_dpm_baco_enter(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_baco_enter(smu); + } else { + if (!pp_funcs || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 1); + } + + return ret; +} + +int amdgpu_dpm_baco_exit(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_baco_exit(smu); + } else { + if (!pp_funcs || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* exit BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 0); + } + + return ret; +} + +int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, + enum pp_mp1_state mp1_state) +{ + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_set_mp1_state(&adev->smu, mp1_state); + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_mp1_state) { + ret = adev->powerplay.pp_funcs->set_mp1_state( + adev->powerplay.pp_handle, + mp1_state); + } + + return ret; +} + +bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + bool baco_cap; + + if (is_support_sw_smu(adev)) { + return smu_baco_is_support(smu); + } else { + if (!pp_funcs || !pp_funcs->get_asic_baco_capability) + return false; + + if (pp_funcs->get_asic_baco_capability(pp_handle, &baco_cap)) + return false; + + return baco_cap ? true : false; + } +} + +int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + + if (is_support_sw_smu(adev)) { + return smu_mode2_reset(smu); + } else { + if (!pp_funcs || !pp_funcs->asic_reset_mode_2) + return -ENOENT; + + return pp_funcs->asic_reset_mode_2(pp_handle); + } +} + +int amdgpu_dpm_baco_reset(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + dev_info(adev->dev, "GPU BACO reset\n"); + + if (is_support_sw_smu(adev)) { + ret = smu_baco_enter(smu); + if (ret) + return ret; + + ret = smu_baco_exit(smu); + if (ret) + return ret; + } else { + if (!pp_funcs + || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 1); + if (ret) + return ret; + + /* exit BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 0); + if (ret) + return ret; + } + + return 0; +} + +int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, + enum PP_SMC_POWER_PROFILE type, + bool en) +{ + int ret = 0; + + if (is_support_sw_smu(adev)) + ret = smu_switch_power_profile(&adev->smu, type, en); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->switch_power_profile) + ret = adev->powerplay.pp_funcs->switch_power_profile( + adev->powerplay.pp_handle, type, en); + + return ret; +} + +int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, + uint32_t pstate) +{ + int ret = 0; + + if (is_support_sw_smu_xgmi(adev)) + ret = smu_set_xgmi_pstate(&adev->smu, pstate); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_xgmi_pstate) + ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, + pstate); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 2cfb677272af..902ca6c00cca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -341,10 +341,6 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ (adev)->powerplay.pp_handle, request)) -#define amdgpu_dpm_switch_power_profile(adev, type, en) \ - ((adev)->powerplay.pp_funcs->switch_power_profile(\ - (adev)->powerplay.pp_handle, type, en)) - #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \ ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ (adev)->powerplay.pp_handle, msg_id)) @@ -517,4 +513,24 @@ extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low); extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low); +int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, + uint32_t pstate); + +int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, + enum PP_SMC_POWER_PROFILE type, + bool en); + +int amdgpu_dpm_baco_reset(struct amdgpu_device *adev); + +int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev); + +bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev); + +int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, + enum pp_mp1_state mp1_state); + +int amdgpu_dpm_baco_exit(struct amdgpu_device *adev); + +int amdgpu_dpm_baco_enter(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index db7b2b3f9966..b88b8b82bb64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -543,12 +543,6 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return; - if (!is_support_sw_smu(adev) && - (!adev->powerplay.pp_funcs || - !adev->powerplay.pp_funcs->set_powergating_by_smu)) - return; - - mutex_lock(&adev->gfx.gfx_off_mutex); if (!enable) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 5cf920d9358b..c626f3e59ff9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -291,13 +291,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); - if (is_support_sw_smu_xgmi(adev)) - ret = smu_set_xgmi_pstate(&adev->smu, pstate); - else if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_xgmi_pstate) - ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, - pstate); - + ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate); if (ret) { dev_err(adev->dev, "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index e9822ea8bb19..006f21ef7ddf 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1312,19 +1312,13 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev) static bool cik_asic_supports_baco(struct amdgpu_device *adev) { - bool baco_support; - switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: - smu7_asic_get_baco_capability(adev, &baco_support); - break; + return amdgpu_dpm_is_baco_supported(adev); default: - baco_support = false; - break; + return false; } - - return baco_support; } static enum amd_reset_method @@ -1366,7 +1360,7 @@ static int cik_asic_reset(struct amdgpu_device *adev) if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); - r = smu7_asic_baco_reset(adev); + r = amdgpu_dpm_baco_reset(adev); } else { r = cik_asic_pci_config_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h index 9870bf27870e..f91ab4c246b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.h +++ b/drivers/gpu/drm/amd/amdgpu/cik.h @@ -31,7 +31,5 @@ void cik_srbm_select(struct amdgpu_device *adev, int cik_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); -int smu7_asic_baco_reset(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index b0229543e887..42ede3aa6dbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -478,7 +478,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -489,7 +489,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); @@ -502,7 +502,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -513,7 +513,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 714cf4dfd0a7..25cfc636c732 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -479,62 +479,18 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) return ret; } -static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) -{ - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - - *cap = smu_baco_is_support(smu); - return 0; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { - *cap = false; - return -ENOENT; - } - - return pp_funcs->get_asic_baco_capability(pp_handle, cap); - } -} - static int soc15_asic_baco_reset(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; /* avoid NBIF got stuck when do RAS recovery in BACO reset */ if (ras && ras->supported) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); - dev_info(adev->dev, "GPU BACO reset\n"); - - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - int ret; - - ret = smu_baco_enter(smu); - if (ret) - return ret; - - ret = smu_baco_exit(smu); - if (ret) - return ret; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - } + ret = amdgpu_dpm_baco_reset(adev); + if (ret) + return ret; /* re-enable doorbell interrupt after BACO exit */ if (ras && ras->supported) @@ -543,17 +499,6 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) return 0; } -static int soc15_mode2_reset(struct amdgpu_device *adev) -{ - if (is_support_sw_smu(adev)) - return smu_mode2_reset(&adev->smu); - if (!adev->powerplay.pp_funcs || - !adev->powerplay.pp_funcs->asic_reset_mode_2) - return -ENOENT; - - return adev->powerplay.pp_funcs->asic_reset_mode_2(adev->powerplay.pp_handle); -} - static enum amd_reset_method soc15_asic_reset_method(struct amdgpu_device *adev) { @@ -567,11 +512,11 @@ soc15_asic_reset_method(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_ARCTURUS: - soc15_asic_get_baco_capability(adev, &baco_reset); + baco_reset = amdgpu_dpm_is_baco_supported(adev); break; case CHIP_VEGA20: if (adev->psp.sos_fw_version >= 0x80067) - soc15_asic_get_baco_capability(adev, &baco_reset); + baco_reset = amdgpu_dpm_is_baco_supported(adev); /* * 1. PMFW version > 0x284300: all cases use baco @@ -598,7 +543,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev) amdgpu_inc_vram_lost(adev); return soc15_asic_baco_reset(adev); case AMD_RESET_METHOD_MODE2: - return soc15_mode2_reset(adev); + return amdgpu_dpm_mode2_reset(adev); default: if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); @@ -608,25 +553,18 @@ static int soc15_asic_reset(struct amdgpu_device *adev) static bool soc15_supports_baco(struct amdgpu_device *adev) { - bool baco_support; - switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_ARCTURUS: - soc15_asic_get_baco_capability(adev, &baco_support); - break; + return amdgpu_dpm_is_baco_supported(adev); case CHIP_VEGA20: if (adev->psp.sos_fw_version >= 0x80067) - soc15_asic_get_baco_capability(adev, &baco_support); - else - baco_support = false; - break; + return amdgpu_dpm_is_baco_supported(adev); + return false; default: return false; } - - return baco_support; } /*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock, @@ -846,8 +784,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block); - if (is_support_sw_smu(adev)) - amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index e4f4201b3c34..78b35901643b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -689,40 +689,6 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev) return -EINVAL; } -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) -{ - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { - *cap = false; - return -ENOENT; - } - - return pp_funcs->get_asic_baco_capability(pp_handle, cap); -} - -int smu7_asic_baco_reset(struct amdgpu_device *adev) -{ - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - - dev_info(adev->dev, "GPU BACO reset\n"); - - return 0; -} - /** * vi_asic_pci_config_reset - soft reset GPU * @@ -747,8 +713,6 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev) static bool vi_asic_supports_baco(struct amdgpu_device *adev) { - bool baco_support; - switch (adev->asic_type) { case CHIP_FIJI: case CHIP_TONGA: @@ -756,14 +720,10 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev) case CHIP_POLARIS11: case CHIP_POLARIS12: case CHIP_TOPAZ: - smu7_asic_get_baco_capability(adev, &baco_support); - break; + return amdgpu_dpm_is_baco_supported(adev); default: - baco_support = false; - break; + return false; } - - return baco_support; } static enum amd_reset_method @@ -778,7 +738,7 @@ vi_asic_reset_method(struct amdgpu_device *adev) case CHIP_POLARIS11: case CHIP_POLARIS12: case CHIP_TOPAZ: - smu7_asic_get_baco_capability(adev, &baco_reset); + baco_reset = amdgpu_dpm_is_baco_supported(adev); break; default: baco_reset = false; @@ -807,7 +767,7 @@ static int vi_asic_reset(struct amdgpu_device *adev) if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); - r = smu7_asic_baco_reset(adev); + r = amdgpu_dpm_baco_reset(adev); } else { r = vi_asic_pci_config_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index 40d4174913a4..defb4aaf929a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -31,7 +31,5 @@ void vi_srbm_select(struct amdgpu_device *adev, int vi_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); -int smu7_asic_baco_reset(struct amdgpu_device *adev); #endif From 5021e9a83160dab64ccc0460e820340d71fafe14 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 8 Jan 2020 10:32:55 +0100 Subject: [PATCH 004/113] drm/amdgpu: catch amdgpu_irq_add_id failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not ignore amdgpu_irq_add_id return value while registering VMC page fault interrupt. Signed-off-by: Nirmoy Das Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index da9765ff45d6..7dc8c068c62a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -720,6 +720,10 @@ static int gmc_v10_0_sw_init(void *handle) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); + + if (r) + return r; + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); From 2d6605911d1246764ade5e9eee1f9c1cfb4955b5 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Tue, 7 Jan 2020 15:45:01 -0500 Subject: [PATCH 005/113] drm/amdgpu/vcn2.5: fix PSP FW loading for the second instance ucodes for instances are from different location Signed-off-by: Leo Liu Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 4ea8e20ed15d..fa9024988918 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -384,9 +384,9 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo)); WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi)); WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); offset = 0; } else { From e7ddb878484dcef0abfe318f9f64bfe013d42c04 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Tue, 7 Jan 2020 15:47:26 -0500 Subject: [PATCH 006/113] drm/amdgpu: enable VCN2.5 IP block for Arcturus With default PSP FW loading Signed-off-by: Leo Liu Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 25cfc636c732..6c75021dda0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -772,8 +772,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); } else { - if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) - amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); } if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block); From 93070deb58c964185acace9fcfdc916a6cea6644 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 9 Jan 2020 00:45:18 +0800 Subject: [PATCH 007/113] drm/amdgpu: add query_ras_error_count function for sdma v4 query_ras_error_count function will be invoked to query single bit error count detected in sdma ip block Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 6 + drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 163 +++++++++++++++++++++++ 2 files changed, 169 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 346dcb1f7146..3cdf12284ef9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -50,6 +50,11 @@ struct amdgpu_sdma_instance { bool burst_nop; }; +struct amdgpu_sdma_ras_funcs { + int (*query_ras_error_count)(struct amdgpu_device *adev, + uint32_t instance, void *ras_error_status); +}; + struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; struct drm_gpu_scheduler *sdma_sched[AMDGPU_MAX_SDMA_INSTANCES]; @@ -61,6 +66,7 @@ struct amdgpu_sdma { uint32_t srbm_soft_reset; bool has_page_queue; struct ras_common_if *ras_if; + const struct amdgpu_sdma_ras_funcs *funcs; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ce0753a9d241..be5f7f61b863 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -82,6 +82,7 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); +static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev); static const struct soc15_reg_golden golden_settings_sdma_4[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), @@ -257,6 +258,105 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000) }; +static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = { + { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED), + 0, 0, + }, + { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED), + 0, 0, + }, + { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED), + 0, 0, + }, + { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED), + 0, 0, + }, + { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED), + 0, 0, + }, + { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED), + 0, 0, + }, + { "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED), + 0, 0, + }, + { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED), + 0, 0, + }, +}; + static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) { @@ -1686,6 +1786,7 @@ static int sdma_v4_0_early_init(void *handle) sdma_v4_0_set_buffer_funcs(adev); sdma_v4_0_set_vm_pte_funcs(adev); sdma_v4_0_set_irq_funcs(adev); + sdma_v4_0_set_ras_funcs(adev); return 0; } @@ -2414,6 +2515,68 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } +static void sdma_v4_0_get_ras_error_count(uint32_t value, + uint32_t instance, + uint32_t *sec_count) +{ + uint32_t i; + uint32_t sec_cnt; + + /* double bits error (multiple bits) error detection is not supported */ + for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) { + /* the SDMA_EDC_COUNTER register in each sdma instance + * shares the same sed shift_mask + * */ + sec_cnt = (value & + sdma_v4_0_ras_fields[i].sec_count_mask) >> + sdma_v4_0_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("Detected %s in SDMA%d, SED %d\n", + sdma_v4_0_ras_fields[i].name, + instance, sec_cnt); + *sec_count += sec_cnt; + } + } +} + +static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, + uint32_t instance, void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0; + uint32_t reg_value = 0; + + reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER); + /* double bit error is not supported */ + if (reg_value) + sdma_v4_0_get_ras_error_count(reg_value, + instance, &sec_count); + /* err_data->ce_count should be initialized to 0 + * before calling into this function */ + err_data->ce_count += sec_count; + /* double bit error is not supported + * set ue count to 0 */ + err_data->ue_count = 0; + + return 0; +}; + +static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = { + .query_ras_error_count = sdma_v4_0_query_ras_error_count, +}; + +static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA20: + case CHIP_ARCTURUS: + adev->sdma.funcs = &sdma_v4_0_ras_funcs; + break; + default: + break; + } +} + const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 4, From 3e81ee9a78e5df7df46329e0dcfa751b59573bb7 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 9 Jan 2020 00:48:46 +0800 Subject: [PATCH 008/113] drm/amdgpu: support error reporting for sdma ip block invoke sdma query_ras_error_count to get sdma single bit error count Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 96fc538ec824..991c4eaac244 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -686,6 +686,7 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data = {0, 0, 0, NULL}; + int i; if (!obj) return -EINVAL; @@ -700,6 +701,13 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, if (adev->umc.funcs->query_ras_error_address) adev->umc.funcs->query_ras_error_address(adev, &err_data); break; + case AMDGPU_RAS_BLOCK__SDMA: + if (adev->sdma.funcs->query_ras_error_count) { + for (i = 0; i < adev->sdma.num_instances; i++) + adev->sdma.funcs->query_ras_error_count(adev, i, + &err_data); + } + break; case AMDGPU_RAS_BLOCK__GFX: if (adev->gfx.funcs->query_ras_error_count) adev->gfx.funcs->query_ras_error_count(adev, &err_data); From 1dd5ead2940903b2cf36f6725f1d6670abd6f14b Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 8 Jan 2020 21:33:49 +0800 Subject: [PATCH 009/113] drm/amdgpu: add ras_late_init and ras_fini for sdma v4 move ras_late_init and ras_fini to sdma_ras_funcs table Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 3 +++ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 3cdf12284ef9..485335267d78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -51,6 +51,9 @@ struct amdgpu_sdma_instance { }; struct amdgpu_sdma_ras_funcs { + int (*ras_late_init)(struct amdgpu_device *adev, + void *ras_ih_info); + void (*ras_fini)(struct amdgpu_device *adev); int (*query_ras_error_count)(struct amdgpu_device *adev, uint32_t instance, void *ras_error_status); }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index be5f7f61b863..1e0767e88d19 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1802,7 +1802,7 @@ static int sdma_v4_0_late_init(void *handle) .cb = sdma_v4_0_process_ras_data_cb, }; - return amdgpu_sdma_ras_late_init(adev, &ih_info); + return adev->sdma.funcs->ras_late_init(adev, &ih_info); } static int sdma_v4_0_sw_init(void *handle) @@ -1874,7 +1874,7 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - amdgpu_sdma_ras_fini(adev); + adev->sdma.funcs->ras_fini(adev); for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); @@ -2562,6 +2562,8 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, }; static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = { + .ras_late_init = amdgpu_sdma_ras_late_init, + .ras_fini = amdgpu_sdma_ras_fini, .query_ras_error_count = sdma_v4_0_query_ras_error_count, }; From 5e62db9df684673f4ce7187c3c02e6a995c5cde9 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 8 Jan 2020 23:28:05 +0800 Subject: [PATCH 010/113] drm/amdgpu: read sdma edc counter to clear the counters SDMA edc counter registers were added in gfx edc counters array. When querying gfx error counter in that array, there is no way to differentiate sdma instance number for different asic and then results to NULL pointer access when trying to read sdma register base address for instances greater than 2 on Vega20. In addition, this also results to wrong gfx error counters since it actually added sdma edc counters. Therefore, sdma edc counter registers should be separated from gfx edc counter regsiter array and only get initialized when driver tries to enable sdma ras. Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 11 +---------- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 7 +++++++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a5492e375f29..89c04cfcfe12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4021,14 +4021,6 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA2, 0, mmSDMA2_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA3, 0, mmSDMA3_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA4, 0, mmSDMA4_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA5, 0, mmSDMA5_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA6, 0, mmSDMA6_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA7, 0, mmSDMA7_EDC_COUNTER), 0, 1, 1}, }; static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) @@ -4092,7 +4084,6 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se; int sgpr_work_group_size = 5; int gpr_reg_size = compute_dim_x / 16 + 6; - int sec_ded_counter_reg_size = adev->sdma.num_instances + 34; /* only support when RAS is enabled */ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) @@ -4232,7 +4223,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* read back registers to clear the counters */ mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < sec_ded_counter_reg_size; i++) { + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { gfx_v9_0_select_se_sh(adev, j, 0x0, k); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 1e0767e88d19..ec9d7873ed42 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1801,6 +1801,13 @@ static int sdma_v4_0_late_init(void *handle) struct ras_ih_if ih_info = { .cb = sdma_v4_0_process_ras_data_cb, }; + int i; + + /* read back edc counter registers to clear the counters */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + for (i = 0; i < adev->sdma.num_instances; i++) + RREG32_SDMA(i, mmSDMA0_EDC_COUNTER); + } return adev->sdma.funcs->ras_late_init(adev, &ih_info); } From 2d5ef0b42c0bca33aaaff2f1b98855e4e3225a0f Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Mon, 23 Dec 2019 15:37:24 -0500 Subject: [PATCH 011/113] drm/amd/display: Don't disable DP PHY when link loss happens [Why] There is a use case that link loss happens accidentally, and we need to recover that link loss as soon as possible. Under this circumstance, we will perform link training, and try to recover the link that's just lost. However, if link PHY is disabled before link training happens, then DP display will never come back again. Also, please note that dropping this disable_phy function call won't break USB-C hotplug functionality. (This line of code was firstly introduced associated with a patch to fix USB-C hotplug issue) [How] Don't disable DP transmitter and its encoder before link training happens, even if link loss is detected. Signed-off-by: Zhan Liu Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 38b0f4347383..49f48d432923 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2876,7 +2876,6 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd return false; previous_link_settings = link->cur_link_settings; - dp_disable_link_phy(link, pipe_ctx->stream->signal); perform_link_training_with_retries(&previous_link_settings, true, LINK_TRAINING_ATTEMPTS, From df5e984c8bd414561c320d6cbbb66d53abf4c7e2 Mon Sep 17 00:00:00 2001 From: Tiecheng Zhou Date: Wed, 8 Jan 2020 13:44:29 +0800 Subject: [PATCH 012/113] drm/amdgpu/sriov: workaround on rev_id for Navi12 under sriov guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0, as a consequence, the rev_id and external_rev_id are wrong. workaround it by hardcoding the rev_id to 0, which is the default value. v2. add comment in the code Signed-off-by: Tiecheng Zhou Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 42ede3aa6dbd..2e0f8933410e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -726,6 +726,12 @@ static int nv_common_early_init(void *handle) AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_ATHUB; + /* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0, + * as a consequence, the rev_id and external_rev_id are wrong. + * workaround it by hardcoding rev_id to 0 (default value). + */ + if (amdgpu_sriov_vf(adev)) + adev->rev_id = 0; adev->external_rev_id = adev->rev_id + 0xa; break; default: From b1ffd1e309c7b75b193ac934296f68ef8ce6f5c0 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 9 Jan 2020 15:01:41 +0800 Subject: [PATCH 013/113] drm/amd/powerplay: sw ctf for arcturus change the sw ctf setting to smu_v11_0_set_thermal_range() since software_shutdown_temp shares the same definition and name in all the smu11 project. Signed-off-by: Kenneth Feng Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index e804f9854027..76bddd582c60 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1154,11 +1154,12 @@ static int smu_v11_0_set_thermal_range(struct smu_context *smu, int low = SMU_THERMAL_MINIMUM_ALERT_TEMP; int high = SMU_THERMAL_MAXIMUM_ALERT_TEMP; uint32_t val; + struct smu_table_context *table_context = &smu->smu_table; + struct smu_11_0_powerplay_table *powerplay_table = table_context->power_play_table; low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP, range.min / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES); - high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, - range.max / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES); + high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, powerplay_table->software_shutdown_temp); if (low > high) return -EINVAL; From 40c9e7b5783c99bb65ab0f81ccedba783f8ffb3b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 9 Jan 2020 15:41:10 -0500 Subject: [PATCH 014/113] drm/amdgpu/powerplay: fix warning in smu_v11_0.c Cast to make min() happy. The values are well within range. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 76bddd582c60..02f8c9cb89d9 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1159,7 +1159,7 @@ static int smu_v11_0_set_thermal_range(struct smu_context *smu, low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP, range.min / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES); - high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, powerplay_table->software_shutdown_temp); + high = min((uint16_t)SMU_THERMAL_MAXIMUM_ALERT_TEMP, powerplay_table->software_shutdown_temp); if (low > high) return -EINVAL; From 817396dc9f6ab2481b94071de2e586aae876e89c Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Wed, 8 Jan 2020 13:50:10 +0800 Subject: [PATCH 015/113] drm/amdgpu: add MCUMC_ADDRT0 offset to ip header file Both are needed on vega20 and arcturus chip. Signed-off-by: Guchun Chen Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h index 043aa695d63f..0d6b594be775 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h @@ -27,5 +27,7 @@ #define mmUMCCH0_0_EccErrCnt_BASE_IDX 0 #define mmMCA_UMC_UMC0_MCUMC_STATUST0 0x03c2 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_BASE_IDX 0 +#define mmMCA_UMC_UMC0_MCUMC_ADDRT0 0x03c4 +#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_BASE_IDX 0 #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h index 03be415e9555..ce005c674a18 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h @@ -27,5 +27,7 @@ #define mmUMCCH0_0_EccErrCnt_ARCT_BASE_IDX 1 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT 0x03c2 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT_BASE_IDX 1 +#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT 0x03c4 +#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT_BASE_IDX 1 #endif From 5d4667ec33a0d2978afb999eedc81ac54d76fe5c Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Wed, 8 Jan 2020 13:52:00 +0800 Subject: [PATCH 016/113] drm/amdgpu: calculate MCUMC_ADDRT0 per asic's UMC offset Hardcoded offset is not friendly. And another benifit of this patch is to keep read and write access to this register be consistent with other similar UMC regsiters in this file. Signed-off-by: Guchun Chen Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 11e924dd88ff..11428b66c74e 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -30,8 +30,6 @@ #include "umc/umc_6_1_1_sh_mask.h" #include "umc/umc_6_1_2_offset.h" -#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10 - #define UMC_6_INST_DIST 0x40000 /* @@ -186,7 +184,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, uint32_t umc_inst) { uint32_t lsb, mc_umc_status_addr; - uint64_t mc_umc_status, err_addr, retired_page; + uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; struct eeprom_table_record *err_rec; uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; @@ -194,10 +192,14 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, /* UMC 6_1_2 registers */ mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT); } else { /* UMC 6_1_1 registers */ mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0); } /* skip error address process if -ENOMEM */ @@ -214,7 +216,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { - err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4); + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); /* the lowest lsb bits should be ignored */ lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); From 49da2ccd2d6e449b1a5cefd2b3f90e4d9a5c1adb Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 9 Jan 2020 19:38:25 +0800 Subject: [PATCH 017/113] drm/amdgpu: check sdma ras funcs pointer before accessing sdma ras funcs are not supported by ASIC prior to vega20 Signed-off-by: Hawking Zhang Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ec9d7873ed42..5d9597f5ac5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1809,7 +1809,10 @@ static int sdma_v4_0_late_init(void *handle) RREG32_SDMA(i, mmSDMA0_EDC_COUNTER); } - return adev->sdma.funcs->ras_late_init(adev, &ih_info); + if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) + return adev->sdma.funcs->ras_late_init(adev, &ih_info); + else + return 0; } static int sdma_v4_0_sw_init(void *handle) @@ -1881,7 +1884,8 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - adev->sdma.funcs->ras_fini(adev); + if (adev->sdma.funcs && adev->sdma.funcs->ras_fini) + adev->sdma.funcs->ras_fini(adev); for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); From 351d5ac55c72ee6f6ad018e2a756b5fe09fceed3 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 9 Jan 2020 10:37:56 -0500 Subject: [PATCH 018/113] drm/amd/amdgpu: add missing umc_6_1_2_sh_mask.h header file (v2) (v2): Fix preprocessor tag Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../include/asic_reg/umc/umc_6_1_2_sh_mask.h | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h new file mode 100644 index 000000000000..a5a8c993ec3a --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_6_1_2_SH_MASK_HEADER +#define _umc_6_1_2_SH_MASK_HEADER + +//UMCCH0_0_EccErrCntSel_ARCT +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntCsSel__SHIFT 0x0 +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrInt__SHIFT 0xc +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntEn__SHIFT 0xf +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntCsSel_MASK 0x0000000FL +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrInt_MASK 0x00003000L +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntEn_MASK 0x00008000L +//UMCCH0_0_EccErrCnt_ARCT +#define UMCCH0_0_EccErrCnt_ARCT__EccErrCnt__SHIFT 0x0 +#define UMCCH0_0_EccErrCnt_ARCT__EccErrCnt_MASK 0x0000FFFFL +//MCA_UMC_UMC0_MCUMC_STATUST0_ARCT +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCode__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCodeExt__SHIFT 0x10 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV0__SHIFT 0x16 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreId__SHIFT 0x20 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV1__SHIFT 0x26 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Scrub__SHIFT 0x28 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV2__SHIFT 0x29 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Poison__SHIFT 0x2b +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Deferred__SHIFT 0x2c +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UECC__SHIFT 0x2d +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__CECC__SHIFT 0x2e +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV3__SHIFT 0x2f +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Transparent__SHIFT 0x34 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__SyndV__SHIFT 0x35 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV4__SHIFT 0x36 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__TCC__SHIFT 0x37 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreIdVal__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__PCC__SHIFT 0x39 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__AddrV__SHIFT 0x3a +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__MiscV__SHIFT 0x3b +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__En__SHIFT 0x3c +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UC__SHIFT 0x3d +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Overflow__SHIFT 0x3e +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Val__SHIFT 0x3f +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCode_MASK 0x000000000000FFFFL +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCodeExt_MASK 0x00000000003F0000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV0_MASK 0x00000000FFC00000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreId_MASK 0x0000003F00000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV1_MASK 0x000000C000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Scrub_MASK 0x0000010000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV2_MASK 0x0000060000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Poison_MASK 0x0000080000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Deferred_MASK 0x0000100000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UECC_MASK 0x0000200000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__CECC_MASK 0x0000400000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV3_MASK 0x000F800000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Transparent_MASK 0x0010000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__SyndV_MASK 0x0020000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV4_MASK 0x0040000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__TCC_MASK 0x0080000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreIdVal_MASK 0x0100000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__PCC_MASK 0x0200000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__AddrV_MASK 0x0400000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__MiscV_MASK 0x0800000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__En_MASK 0x1000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UC_MASK 0x2000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Overflow_MASK 0x4000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Val_MASK 0x8000000000000000L +//MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__ErrorAddr__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__LSB__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__Reserved__SHIFT 0x3e +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__ErrorAddr_MASK 0x00FFFFFFFFFFFFFFL +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__LSB_MASK 0x3F00000000000000L +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__Reserved_MASK 0xC000000000000000L + +#endif From d44394a9e18f37a17a70b2d2d81594c66151f523 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jan 2020 17:33:51 -0500 Subject: [PATCH 019/113] drm/amdgpu/gfx9: remove unused sdma headers All of the sdma stuff these were used for moves to the sdma code, so remove them. Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 89c04cfcfe12..9b94e9d15f7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -48,15 +48,6 @@ #include "amdgpu_ras.h" -#include "sdma0/sdma0_4_2_offset.h" -#include "sdma1/sdma1_4_2_offset.h" -#include "sdma2/sdma2_4_2_2_offset.h" -#include "sdma3/sdma3_4_2_2_offset.h" -#include "sdma4/sdma4_4_2_2_offset.h" -#include "sdma5/sdma5_4_2_2_offset.h" -#include "sdma6/sdma6_4_2_2_offset.h" -#include "sdma7/sdma7_4_2_2_offset.h" - #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L From 60fb100b3d1676490a9e5afef9fbd3f514f7d63d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jan 2020 17:43:55 -0500 Subject: [PATCH 020/113] drm/amdgpu/display: set gpu vm flag for all asics which support it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It won't get used unless the driver allows the gtt domain for display buffers which is controlled elsewhere. Acked-by: Huang Rui Acked-by: Christian König Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 76673c7234ed..490c73eada8f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -906,13 +906,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.dce_environment = DCE_ENV_PRODUCTION_DRV; - /* - * TODO debug why this doesn't work on Raven - */ - if (adev->flags & AMD_IS_APU && - adev->asic_type >= CHIP_CARRIZO && - adev->asic_type < CHIP_RAVEN) + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: + case CHIP_RAVEN: init_data.flags.gpu_vm_support = true; + break; + default: + break; + } if (amdgpu_dc_feature_mask & DC_FBC_MASK) init_data.flags.fbc_support = true; From 403c1ef0d2cce0be8ab5cf29d9e3d97d587e8582 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jan 2020 17:46:02 -0500 Subject: [PATCH 021/113] drm/amdgpu: enable S/G display on PCO and RV2 (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should work on all Raven variants, but some users have reported issues with original Raven with IOMMU enabled. So far there have been no issues observed with PCO or RV2. v2: split out the dm init and domain changes into separate patches. Acked-by: Harry Wentland Acked-by: Huang Rui Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 22 +++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 4e699071d144..6d520a3eec40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -513,13 +513,23 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, * will not allow USWC mappings. * Also, don't allow GTT domain if the BO doens't have USWC falg set. */ - if (adev->asic_type >= CHIP_CARRIZO && - adev->asic_type < CHIP_RAVEN && - (adev->flags & AMD_IS_APU) && - (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && + if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && amdgpu_bo_support_uswc(bo_flags) && - amdgpu_device_asic_has_dc_support(adev->asic_type)) - domain |= AMDGPU_GEM_DOMAIN_GTT; + amdgpu_device_asic_has_dc_support(adev->asic_type)) { + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: + domain |= AMDGPU_GEM_DOMAIN_GTT; + break; + case CHIP_RAVEN: + /* enable S/G on PCO and RV2 */ + if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + domain |= AMDGPU_GEM_DOMAIN_GTT; + break; + default: + break; + } + } #endif return domain; From fe3db43742390044990999076d41f5cbffa9b814 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jan 2020 17:47:23 -0500 Subject: [PATCH 022/113] drm/amdgpu/display: set gpu vm flag for renoir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It won't get used unless the driver allows the gtt domain for display buffers which is controlled elsewhere. Reviewed-by: Harry Wentland Acked-by: Huang Rui Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 490c73eada8f..504278d94c22 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -910,6 +910,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) case CHIP_CARRIZO: case CHIP_STONEY: case CHIP_RAVEN: + case CHIP_RENOIR: init_data.flags.gpu_vm_support = true; break; default: From bbde7162f7085884476a4a17b9792748e3c69b64 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jan 2020 22:10:00 -0500 Subject: [PATCH 023/113] drm/amdgpu/gmc10: remove dead code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Leftover from bring up. We look up the actual pre-OS memory usage value later in the same function. Reviewed-by: Huang Rui Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 7dc8c068c62a..faa310b76b50 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -736,15 +736,6 @@ static int gmc_v10_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - /* - * Reserve 8M stolen memory for navi10 like vega10 - * TODO: will check if it's really needed on asic. - */ - if (amdgpu_emu_mode == 1) - adev->gmc.stolen_size = 0; - else - adev->gmc.stolen_size = 9 * 1024 *1024; - r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); From 1499bcc7a216770c9946f1ec1aa331604338f7e2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jan 2020 22:14:24 -0500 Subject: [PATCH 024/113] drm/amdgpu/gmc10: free stolen memory in late_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to store the pre-OS console memory after the driver has loaded so free it. Reviewed-by: Huang Rui Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index faa310b76b50..5ad89bb6f3ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -566,6 +566,8 @@ static int gmc_v10_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + amdgpu_bo_late_init(adev); + r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) return r; From 6ca476bab8fd3d1db82626a5aab1bf8ebae996c9 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Thu, 9 Jan 2020 19:26:40 +0800 Subject: [PATCH 025/113] drm/amd/powerplay: update SMU12_DRIVER_IF_VERSION to 11 This patch updates SMU12_DRIVER_IF_VERSION to 11. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h index c27c82851468..2f85a34c0591 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define SMU12_DRIVER_IF_VERSION 10 +#define SMU12_DRIVER_IF_VERSION 11 typedef struct { int32_t value; @@ -192,6 +192,11 @@ typedef struct { uint16_t SocTemperature; //[centi-Celsius] uint16_t ThrottlerStatus; uint16_t spare; + + uint16_t StapmOriginalLimit; //[mW] + uint16_t StapmCurrentLimit; //[mW] + uint16_t ApuPower; //[mW] + uint16_t dGpuPower; //[mW] } SmuMetrics_t; From d8459d1b7f689daaaa32ceef74d387f354a461ce Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Thu, 9 Jan 2020 19:37:10 +0800 Subject: [PATCH 026/113] drm/amdgpu: update goldensetting for renoir Update mmSDMA0_UTCL1_WATERMK golden setting for renoir. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 5d9597f5ac5f..27c7001be1ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -255,7 +255,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000) + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe) }; static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = { From 9c8c81fe7d2c6ef732b50d456a627fdf4383c1d5 Mon Sep 17 00:00:00 2001 From: John Clements Date: Wed, 8 Jan 2020 16:36:14 +0800 Subject: [PATCH 027/113] drm/amdgpu: disable XGMI TA unload for arcturus in event of GPU reset, XGMI TA unload causes unrecoverable GPU hang Acked-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 281d89640344..3a1570dafe34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -529,6 +529,11 @@ static int psp_xgmi_unload(struct psp_context *psp) { int ret; struct psp_gfx_cmd_resp *cmd; + struct amdgpu_device *adev = psp->adev; + + /* XGMI TA unload currently is not supported on Arcturus */ + if (adev->asic_type == CHIP_ARCTURUS) + return 0; /* * TODO: bypass the unloading in sriov for now From eee2eabafe1da1da05927fafbc6d49ec6cf5ca44 Mon Sep 17 00:00:00 2001 From: John Clements Date: Fri, 10 Jan 2020 17:38:31 +0800 Subject: [PATCH 028/113] drm/amdgpu: preserve RSMU UMC index mode state between UMC RAS err register access restore previous RSMU UMC index mode state Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 43 +++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 11428b66c74e..793bf70e64b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -54,12 +54,30 @@ const uint32_t {9, 25, 0, 16}, {15, 31, 6, 22} }; +static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev) +{ + WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 1); +} + static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) { WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, RSMU_UMC_INDEX_MODE_EN, 0); } +static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev) +{ + uint32_t rsmu_umc_index; + + rsmu_umc_index = RREG32_SOC15(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + + return REG_GET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN); +} + static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev, uint32_t umc_inst, uint32_t ch_inst) @@ -163,6 +181,11 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, uint32_t ch_inst = 0; uint32_t umc_reg_offset = 0; + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { umc_reg_offset = get_umc_6_reg_offset(adev, umc_inst, @@ -175,6 +198,9 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, umc_reg_offset, &(err_data->ue_count)); } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); } static void umc_v6_1_query_error_address(struct amdgpu_device *adev, @@ -216,8 +242,8 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { - err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); /* the lowest lsb bits should be ignored */ lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); @@ -257,6 +283,11 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, uint32_t ch_inst = 0; uint32_t umc_reg_offset = 0; + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { umc_reg_offset = get_umc_6_reg_offset(adev, umc_inst, @@ -269,6 +300,8 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, umc_inst); } + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); } static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, @@ -315,7 +348,10 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) uint32_t ch_inst = 0; uint32_t umc_reg_offset = 0; - umc_v6_1_disable_umc_index_mode(adev); + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { umc_reg_offset = get_umc_6_reg_offset(adev, @@ -324,6 +360,9 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset); } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); } const struct amdgpu_umc_funcs umc_v6_1_funcs = { From 61e50646f0bbfb24002c4935e1ed9bf04ae4266e Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Mon, 4 Nov 2019 13:39:20 -0500 Subject: [PATCH 029/113] drm/amd/display: add Pollock IDs, fix Pollock & Dali clk mgr construct [WHY] Only a single voltage level should be available to Pollock (min level) Pollock & Dali get misidentified as Renoir, use wrong clk mgr constructor [HOW] Add provided Pollock IDs to ASIC Rev. ID list. Create new Pollock ASIC RID check, fix RV2 & Dali ASIC checks. Check RID and set max voltage level to 0 if Pollock is detected. Work around broken ASICREV_IS_RENOIR, IS_RAVEN2, etc. checks by performing Dali/Pollock checks before they can be misidentified as RN. Signed-off-by: Michael Strauss Signed-off-by: Bhawanpreet Lakha Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 4 ++-- drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 6 +++--- drivers/gpu/drm/amd/display/include/dal_asic_id.h | 10 ++++++++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index e6c22345f0ea..a27d84ca15a5 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -705,8 +705,8 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v, unsigned int get_highest_allowed_voltage_level(uint32_t hw_internal_rev) { - /* for dali, the highest voltage level we want is 0 */ - if (ASICREV_IS_DALI(hw_internal_rev)) + /* for dali & pollock, the highest voltage level we want is 0 */ + if (ASICREV_IS_POLLOCK(hw_internal_rev) || ASICREV_IS_DALI(hw_internal_rev)) return 0; /* we are ok with all levels */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 6d60ef822619..a78e5c74c79c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -134,13 +134,13 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p #if defined(CONFIG_DRM_AMD_DC_DCN) case FAMILY_RV: - if (ASICREV_IS_DALI(asic_id.hw_internal_rev)) { + if (ASICREV_IS_DALI(asic_id.hw_internal_rev) || + ASICREV_IS_POLLOCK(asic_id.hw_internal_rev)) { /* TEMP: this check has to come before ASICREV_IS_RENOIR */ - /* which also incorrectly returns true for Dali */ + /* which also incorrectly returns true for Dali/Pollock*/ rv2_clk_mgr_construct(ctx, clk_mgr, pp_smu); break; } - if (ASICREV_IS_RENOIR(asic_id.hw_internal_rev)) { rn_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); break; diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 11d7daf6f076..a2903985b9e8 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -134,8 +134,13 @@ #define PICASSO_A0 0x41 /* DCN1_01 */ #define RAVEN2_A0 0x81 +#define RAVEN2_15D8_REV_94 0x94 +#define RAVEN2_15D8_REV_95 0x95 #define RAVEN2_15D8_REV_E3 0xE3 #define RAVEN2_15D8_REV_E4 0xE4 +#define RAVEN2_15D8_REV_E9 0xE9 +#define RAVEN2_15D8_REV_EA 0xEA +#define RAVEN2_15D8_REV_EB 0xEB #define RAVEN1_F0 0xF0 #define RAVEN_UNKNOWN 0xFF #ifndef ASICREV_IS_RAVEN @@ -149,6 +154,11 @@ #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN)) #define ASICREV_IS_DALI(eChipRev) ((eChipRev == RAVEN2_15D8_REV_E3) \ || (eChipRev == RAVEN2_15D8_REV_E4)) +#define ASICREV_IS_POLLOCK(eChipRev) (eChipRev == RAVEN2_15D8_REV_94 \ + || eChipRev == RAVEN2_15D8_REV_95 \ + || eChipRev == RAVEN2_15D8_REV_E9 \ + || eChipRev == RAVEN2_15D8_REV_EA \ + || eChipRev == RAVEN2_15D8_REV_EB) #define FAMILY_RV 142 /* DCN 1*/ From bdf84a80e0ce5f60f4f41a27b4cf74bcfabcea56 Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Tue, 14 Jan 2020 10:05:21 -0500 Subject: [PATCH 030/113] drm/amdgpu: Create generic DF struct in adev The only data fabric information the adev struct currently contains is a function pointer table. In the near future, we will be adding some cached DF information into adev. As such, this patch creates a new amdgpu_df struct for adev. Right now, it only containst the old function pointer table, but new stuff will be added soon. Signed-off-by: Joseph Greathouse Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 29 ++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_df.h | 62 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c | 12 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 6 +-- drivers/gpu/drm/amd/amdgpu/df_v1_7.c | 6 +-- drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 6 +-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 6 +-- drivers/gpu/drm/amd/amdgpu/soc15.c | 12 ++--- 8 files changed, 90 insertions(+), 49 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_df.h diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f42e8d467c12..b1bb10625cd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -90,6 +90,7 @@ #include "amdgpu_mes.h" #include "amdgpu_umc.h" #include "amdgpu_mmhub.h" +#include "amdgpu_df.h" #define MAX_GPU_INSTANCE 16 @@ -664,29 +665,6 @@ struct amdgpu_mmio_remap { resource_size_t bus_addr; }; -struct amdgpu_df_funcs { - void (*sw_init)(struct amdgpu_device *adev); - void (*sw_fini)(struct amdgpu_device *adev); - void (*enable_broadcast_mode)(struct amdgpu_device *adev, - bool enable); - u32 (*get_fb_channel_number)(struct amdgpu_device *adev); - u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); - void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, - bool enable); - void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); - void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, - bool enable); - int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, - int is_enable); - int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, - int is_disable); - void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, - uint64_t *count); - uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); - void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, - uint32_t ficadl_val, uint32_t ficadh_val); -}; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { GC_HWIP = 1, @@ -930,6 +908,9 @@ struct amdgpu_device { bool enable_mes; struct amdgpu_mes mes; + /* df */ + struct amdgpu_df df; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; int num_ip_blocks; struct mutex mn_lock; @@ -943,8 +924,6 @@ struct amdgpu_device { /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; - const struct amdgpu_df_funcs *df_funcs; - /* delayed work_func for deferring clockgating during resume */ struct delayed_work delayed_init_work; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h new file mode 100644 index 000000000000..61a26c15c8dd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -0,0 +1,62 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DF_H__ +#define __AMDGPU_DF_H__ + +struct amdgpu_df_hash_status { + bool hash_64k; + bool hash_2m; + bool hash_1g; +}; + +struct amdgpu_df_funcs { + void (*sw_init)(struct amdgpu_device *adev); + void (*sw_fini)(struct amdgpu_device *adev); + void (*enable_broadcast_mode)(struct amdgpu_device *adev, + bool enable); + u32 (*get_fb_channel_number)(struct amdgpu_device *adev); + u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, + bool enable); + int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, + int is_enable); + int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, + int is_disable); + void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, + uint64_t *count); + uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); + void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, + uint32_t ficadl_val, uint32_t ficadh_val); +}; + +struct amdgpu_df { + struct amdgpu_df_hash_status hash_status; + const struct amdgpu_df_funcs *funcs; +}; + +#endif /* __AMDGPU_DF_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index cf21ad0cad9a..07914e34bc25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: if (!(flags & PERF_EF_RELOAD)) - pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1); - pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 0); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 0); break; default: break; @@ -101,7 +101,7 @@ static void amdgpu_perf_read(struct perf_event *event) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_get_count(pe->adev, hwc->conf, + pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->conf, &count); break; default: @@ -126,7 +126,7 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 0); + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 0); break; default: break; @@ -156,7 +156,7 @@ static int amdgpu_perf_add(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - retval = pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + retval = pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1); break; default: return 0; @@ -184,7 +184,7 @@ static void amdgpu_perf_del(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 1); + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 1); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index c626f3e59ff9..a97af422575a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -146,16 +146,16 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev, ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200); ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208); - fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in); + fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in); if (fica_out != 0x1f) pr_err("xGMI error counters not enabled!\n"); - fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in); + fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in); if ((fica_out & 0xffff) == 2) error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63); - adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); + adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); return snprintf(buf, PAGE_SIZE, "%d\n", error_count); } diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index d6221298b477..03fdeef568d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -66,7 +66,7 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev) { int fb_channel_number; - fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + fb_channel_number = adev->df.funcs->get_fb_channel_number(adev); return df_v1_7_channel_number[fb_channel_number]; } @@ -77,7 +77,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, u32 tmp; /* Put DF on broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, true); + adev->df.funcs->enable_broadcast_mode(adev, true); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); @@ -92,7 +92,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, } /* Exit boradcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, false); + adev->df.funcs->enable_broadcast_mode(adev, false); } static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 2f884d941e8d..7bd29d97adfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -311,7 +311,7 @@ static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev) { int fb_channel_number; - fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + fb_channel_number = adev->df.funcs->get_fb_channel_number(adev); if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number)) fb_channel_number = 0; @@ -325,7 +325,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) { /* Put DF on broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, true); + adev->df.funcs->enable_broadcast_mode(adev, true); if (enable) { tmp = RREG32_SOC15(DF, 0, @@ -344,7 +344,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, } /* Exit broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, false); + adev->df.funcs->enable_broadcast_mode(adev, false); } } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 26194ac9af98..b83c8d745f42 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -817,8 +817,8 @@ static int gmc_v9_0_late_init(void *handle) r = amdgpu_atomfirmware_mem_ecc_supported(adev); if (!r) { DRM_INFO("ECC is not present.\n"); - if (adev->df_funcs->enable_ecc_force_par_wr_rmw) - adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); + if (adev->df.funcs->enable_ecc_force_par_wr_rmw) + adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); } else { DRM_INFO("ECC is active.\n"); } @@ -1023,7 +1023,7 @@ static int gmc_v9_0_sw_init(void *handle) else chansize = 128; - numchan = adev->df_funcs->get_hbm_channel_number(adev); + numchan = adev->df.funcs->get_hbm_channel_number(adev); adev->gmc.vram_width = numchan * chansize; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 6c75021dda0f..317803f6a561 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -677,9 +677,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) } if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) - adev->df_funcs = &df_v3_6_funcs; + adev->df.funcs = &df_v3_6_funcs; else - adev->df_funcs = &df_v1_7_funcs; + adev->df.funcs = &df_v1_7_funcs; adev->rev_id = soc15_get_rev_id(adev); adev->nbio.funcs->detect_hw_virt(adev); @@ -1247,7 +1247,7 @@ static int soc15_common_sw_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_add_irq_id(adev); - adev->df_funcs->sw_init(adev); + adev->df.funcs->sw_init(adev); return 0; } @@ -1257,7 +1257,7 @@ static int soc15_common_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_nbio_ras_fini(adev); - adev->df_funcs->sw_fini(adev); + adev->df.funcs->sw_fini(adev); return 0; } @@ -1478,7 +1478,7 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE ? true : false); soc15_update_rom_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - adev->df_funcs->update_medium_grain_clock_gating(adev, + adev->df.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; case CHIP_RAVEN: @@ -1536,7 +1536,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) *flags |= AMD_CG_SUPPORT_ROM_MGCG; - adev->df_funcs->get_clockgating_state(adev, flags); + adev->df.funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, From ae99fc35ceea9ae9c496987d113187464b33b2b8 Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Thu, 9 Jan 2020 17:07:12 -0600 Subject: [PATCH 031/113] drm/amdgpu: add defines for DF and TCP Hashing On Arcturus, we need TC channel hashing, which is set by the driver, to match DF hashing, which is set by VBIOS. To match these, we plan to query the DF information and then properly set the TC configuration bits to match them. This patch adds the required fields to register definitions in preparation for a future patch which will use them. Signed-off-by: Joseph Greathouse Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h | 3 +++ drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h | 8 ++++++++ drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h | 6 ++++++ 3 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h index f301e5fe2109..87c84691b5be 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h @@ -27,6 +27,9 @@ #define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc #define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0 +#define mmDF_CS_UMC_AON0_DfGlobalCtrl 0x00fe +#define mmDF_CS_UMC_AON0_DfGlobalCtrl_BASE_IDX 0 + #define mmDF_CS_UMC_AON0_DramBaseAddress0 0x0044 #define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX 0 diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h index 06fac509e987..65e9f756e86e 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h @@ -33,6 +33,14 @@ #define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0 #define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL +/* DF_CS_UMC_AON0_DfGlobalCtrl */ +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl64K__SHIFT 0x14 +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl2M__SHIFT 0x15 +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl1G__SHIFT 0x16 +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl64K_MASK 0x00100000L +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl2M_MASK 0x00200000L +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl1G_MASK 0x00400000L + /* DF_CS_AON0_DramBaseAddress0 */ #define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 #define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h index d4c613a85352..c9e3f6d849a8 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h @@ -8739,10 +8739,16 @@ #define TCP_ADDR_CONFIG__NUM_BANKS__SHIFT 0x4 #define TCP_ADDR_CONFIG__COLHI_WIDTH__SHIFT 0x6 #define TCP_ADDR_CONFIG__RB_SPLIT_COLHI__SHIFT 0x9 +#define TCP_ADDR_CONFIG__ENABLE64KHASH__SHIFT 0xb +#define TCP_ADDR_CONFIG__ENABLE2MHASH__SHIFT 0xc +#define TCP_ADDR_CONFIG__ENABLE1GHASH__SHIFT 0xd #define TCP_ADDR_CONFIG__NUM_TCC_BANKS_MASK 0x0000000FL #define TCP_ADDR_CONFIG__NUM_BANKS_MASK 0x00000030L #define TCP_ADDR_CONFIG__COLHI_WIDTH_MASK 0x000001C0L #define TCP_ADDR_CONFIG__RB_SPLIT_COLHI_MASK 0x00000200L +#define TCP_ADDR_CONFIG__ENABLE64KHASH_MASK 0x00000800L +#define TCP_ADDR_CONFIG__ENABLE2MHASH_MASK 0x00001000L +#define TCP_ADDR_CONFIG__ENABLE1GHASH_MASK 0x00002000L //TCP_CREDIT #define TCP_CREDIT__LFIFO_CREDIT__SHIFT 0x0 #define TCP_CREDIT__REQ_FIFO_CREDIT__SHIFT 0x10 From 22d39fe729fdd21acf9c29c5892a71ede4b7c619 Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Thu, 9 Jan 2020 17:41:43 -0600 Subject: [PATCH 032/113] drm/amdgpu: Match TC hash settings to DF settings (v2) On Arcturus, data fabric hashing is set by the VBIOS, and affects which addresses map to which memory channels. The gfx core's caches also need to know this mapping, but the hash settings for these these caches is set by the driver. This change queries the DF to understand how the VBIOS configured DF, then matches the TC hash configuration bits to do the same thing. v2: squash in warning fix Signed-off-by: Joseph Greathouse Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/df_v1_7.c | 3 +++ drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 28 +++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 19 ++++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index 03fdeef568d9..d6aca1c08068 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -31,6 +31,9 @@ static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; static void df_v1_7_sw_init(struct amdgpu_device *adev) { + adev->df.hash_status.hash_64k = false; + adev->df.hash_status.hash_2m = false; + adev->df.hash_status.hash_1g = false; } static void df_v1_7_sw_fini(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 7bd29d97adfe..f51326598a8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -262,6 +262,32 @@ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, /* device attr for available perfmon counters */ static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL); +static void df_v3_6_query_hashes(struct amdgpu_device *adev) +{ + u32 tmp; + + adev->df.hash_status.hash_64k = false; + adev->df.hash_status.hash_2m = false; + adev->df.hash_status.hash_1g = false; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + /* encoding for hash-enabled on Arcturus */ + if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) { + tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl); + adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl64K); + adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl2M); + adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl1G); + } +} + /* init perfmons */ static void df_v3_6_sw_init(struct amdgpu_device *adev) { @@ -273,6 +299,8 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++) adev->df_perfmon_config_assign_mask[i] = 0; + + df_v3_6_query_hashes(adev); } static void df_v3_6_sw_fini(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9b94e9d15f7a..ac4153c235ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3637,6 +3637,23 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) return 0; } +static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) +{ + u32 tmp; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, + adev->df.hash_status.hash_64k); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, + adev->df.hash_status.hash_2m); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, + adev->df.hash_status.hash_1g); + WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); +} + static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) { if (adev->asic_type != CHIP_ARCTURUS) @@ -3654,6 +3671,8 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_constants_init(adev); + gfx_v9_0_init_tcp_config(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; From a2e4b418c6714e2dfe3d0026fdc6905186f55001 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 9 Jan 2020 18:27:16 -0500 Subject: [PATCH 033/113] drm/amdgpu/psp: declare navi1x ta firmware So that it gets included in the initrd. At the moment this is optional firmware that contains support for HDCP. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index a57f3d737677..685dd9754c67 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -43,10 +43,13 @@ MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); MODULE_FIRMWARE("amdgpu/navi10_sos.bin"); MODULE_FIRMWARE("amdgpu/navi10_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi10_ta.bin"); MODULE_FIRMWARE("amdgpu/navi14_sos.bin"); MODULE_FIRMWARE("amdgpu/navi14_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ta.bin"); MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi12_ta.bin"); MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); From 5d593d682ffa533615f81a0680d4aa18dbeb97f7 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Thu, 9 Jan 2020 16:20:27 +0100 Subject: [PATCH 034/113] drm/amd/display: Reorder detect_edp_sink_caps before link settings read. read_current_link_settings_on_detect() on eDP 1.4+ may use the edp_supported_link_rates table which is set up by detect_edp_sink_caps(), so that function needs to be called first. Reviewed-by: Harry Wentland Signed-off-by: Mario Kleiner Cc: Martin Leung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index cef8c1ba9797..5ea4a1675259 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -817,8 +817,8 @@ static bool dc_link_detect_helper(struct dc_link *link, } case SIGNAL_TYPE_EDP: { - read_current_link_settings_on_detect(link); detect_edp_sink_caps(link); + read_current_link_settings_on_detect(link); sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C_OVER_AUX; sink_caps.signal = SIGNAL_TYPE_EDP; break; From b239b59bf4345da904b3d96006d8d994c5b7b996 Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Fri, 10 Jan 2020 15:16:16 +0800 Subject: [PATCH 035/113] drm/amd/display: remove unnecessary conversion to bool The conversion to bool is not needed, remove it. Reviewed-by: Harry Wentland Signed-off-by: Chen Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 49f48d432923..5d0e7abb2b98 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3268,7 +3268,7 @@ static bool retrieve_link_cap(struct dc_link *link) dpcd_data[DP_TRAINING_AUX_RD_INTERVAL]; link->dpcd_caps.ext_receiver_cap_field_present = - aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1 ? true:false; + aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1; if (aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1) { uint8_t ext_cap_data[16]; From f81110b8520a4ed1062b044b1704b4fe31593ebe Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Thu, 9 Jan 2020 10:21:39 +0800 Subject: [PATCH 036/113] drm/amdgpu: add header file for macro SZ_1M Fixes: 4dee6e4ca50a ("drm/amdgpu: use linux size macro to simplify ONE_Kib & One_Mib") Signed-off-by: Flora Cui Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 3114d8a47e88..dee446278417 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include From b9a9294b91efd4388db415d657626c1431316379 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Jan 2020 15:31:27 -0500 Subject: [PATCH 037/113] drm/amdgpu/pm: properly handle runtime pm If power management sysfs or debugfs files are accessed, power up the GPU when necessary. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 830 ++++++++++++++++++------- 1 file changed, 618 insertions(+), 212 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 285d460624c8..806e731c1ff4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "hwmgr.h" #define WIDTH_4K 3840 @@ -158,10 +159,15 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type pm; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { if (adev->smu.ppt_funcs->get_current_power_state) pm = smu_get_current_power_state(&adev->smu); @@ -173,6 +179,9 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, pm = adev->pm.dpm.user_state; } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%s\n", (pm == POWER_STATE_TYPE_BATTERY) ? "battery" : (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance"); @@ -186,6 +195,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type state; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; @@ -201,6 +211,10 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, goto fail; } + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { mutex_lock(&adev->pm.mutex); adev->pm.dpm.user_state = state; @@ -212,11 +226,12 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, adev->pm.dpm.user_state = state; mutex_unlock(&adev->pm.mutex); - /* Can't set dpm state when the card is off */ - if (!(adev->flags & AMD_IS_PX) || - (ddev->switch_power_state == DRM_SWITCH_POWER_ON)) - amdgpu_pm_compute_clocks(adev); + amdgpu_pm_compute_clocks(adev); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + fail: return count; } @@ -288,13 +303,14 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_dpm_forced_level level = 0xff; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return snprintf(buf, PAGE_SIZE, "off\n"); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) level = smu_get_performance_level(&adev->smu); @@ -303,6 +319,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, else level = adev->pm.dpm.forced_level; + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%s\n", (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" : (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : @@ -329,11 +348,6 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; - /* Can't force performance level when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - if (strncmp("low", buf, strlen("low")) == 0) { level = AMD_DPM_FORCED_LEVEL_LOW; } else if (strncmp("high", buf, strlen("high")) == 0) { @@ -353,17 +367,23 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, } else if (strncmp("profile_peak", buf, strlen("profile_peak")) == 0) { level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; } else { - count = -EINVAL; - goto fail; + return -EINVAL; } + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) current_level = smu_get_performance_level(&adev->smu); else if (adev->powerplay.pp_funcs->get_performance_level) current_level = amdgpu_dpm_get_performance_level(adev); - if (current_level == level) + if (current_level == level) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; + } /* profile_exit setting is valid only when current mode is in profile mode */ if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | @@ -372,6 +392,8 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) && (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) { pr_err("Currently not in any profile mode!\n"); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; } @@ -382,9 +404,10 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, } else if (adev->powerplay.pp_funcs->force_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->pm.dpm.thermal_active) { - count = -EINVAL; mutex_unlock(&adev->pm.mutex); - goto fail; + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; } ret = amdgpu_dpm_force_performance_level(adev, level); if (ret) @@ -393,8 +416,9 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, adev->pm.dpm.forced_level = level; mutex_unlock(&adev->pm.mutex); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); -fail: return count; } @@ -407,6 +431,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, struct pp_states_info data; int i, buf_len, ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_get_power_num_states(&adev->smu, &data); if (ret) @@ -414,6 +442,9 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, } else if (adev->powerplay.pp_funcs->get_pp_num_states) amdgpu_dpm_get_pp_num_states(adev, &data); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); for (i = 0; i < data.nums; i++) buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i, @@ -439,6 +470,10 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { pm = smu_get_current_power_state(smu); ret = smu_get_power_num_states(smu, &data); @@ -450,6 +485,9 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, amdgpu_dpm_get_pp_num_states(adev, &data); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + for (i = 0; i < data.nums; i++) { if (pm == data.states[i]) break; @@ -500,14 +538,18 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, struct pp_states_info data; ret = kstrtoul(buf, 0, &idx); - if (ret || idx >= ARRAY_SIZE(data.states)) { - count = -EINVAL; - goto fail; - } + if (ret || idx >= ARRAY_SIZE(data.states)) + return -EINVAL; + idx = array_index_nospec(idx, ARRAY_SIZE(data.states)); amdgpu_dpm_get_pp_num_states(adev, &data); state = data.states[idx]; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + /* only set user selected power states */ if (state != POWER_STATE_TYPE_INTERNAL_BOOT && state != POWER_STATE_TYPE_DEFAULT) { @@ -515,8 +557,10 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, AMD_PP_TASK_ENABLE_USER_STATE, &state); adev->pp_force_state_enabled = true; } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); } -fail: + return count; } @@ -538,20 +582,32 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; char *table = NULL; - int size; + int size, ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { size = smu_sys_get_pp_table(&adev->smu, (void **)&table); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (size < 0) return size; - } - else if (adev->powerplay.pp_funcs->get_pp_table) + } else if (adev->powerplay.pp_funcs->get_pp_table) { size = amdgpu_dpm_get_pp_table(adev, &table); - else + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (size < 0) + return size; + } else { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return 0; + } if (size >= PAGE_SIZE) size = PAGE_SIZE - 1; @@ -573,13 +629,23 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return ret; + } } else if (adev->powerplay.pp_funcs->set_pp_table) amdgpu_dpm_set_pp_table(adev, buf, count); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -703,18 +769,28 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, tmp_str++; } + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_od_edit_dpm_table(&adev->smu, type, parameter, parameter_size); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } else { if (adev->powerplay.pp_funcs->odn_edit_dpm_table) { ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, parameter_size); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } if (type == PP_OD_COMMIT_DPM_TABLE) { @@ -722,12 +798,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } else { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; } } } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } @@ -738,27 +820,33 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - uint32_t size = 0; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf); size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size); size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size); size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size); - return size; } else if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size); - return size; } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return size; } /** @@ -796,15 +884,21 @@ static ssize_t amdgpu_set_pp_feature_status(struct device *dev, pr_debug("featuremask = 0x%llx\n", featuremask); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); if (ret) - return -EINVAL; + count = -EINVAL; } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); if (ret) - return -EINVAL; + count = -EINVAL; } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } @@ -815,16 +909,27 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev)) return 0; - if (is_support_sw_smu(adev)) { - return smu_sys_get_pp_feature_mask(&adev->smu, buf); - } else if (adev->powerplay.pp_funcs->get_ppfeature_status) - return amdgpu_dpm_get_ppfeature_status(adev, buf); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; - return snprintf(buf, PAGE_SIZE, "\n"); + if (is_support_sw_smu(adev)) + size = smu_sys_get_pp_feature_mask(&adev->smu, buf); + else if (adev->powerplay.pp_funcs->get_ppfeature_status) + size = amdgpu_dpm_get_ppfeature_status(adev, buf); + else + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } /** @@ -863,16 +968,27 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } /* @@ -928,11 +1044,18 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (ret) return -EINVAL; @@ -945,16 +1068,27 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, @@ -964,8 +1098,8 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - int ret; uint32_t mask = 0; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; @@ -974,11 +1108,18 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (ret) return -EINVAL; @@ -991,16 +1132,27 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, @@ -1020,10 +1172,19 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1037,16 +1198,27 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, @@ -1066,10 +1238,19 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1083,16 +1264,27 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, @@ -1112,10 +1304,19 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1129,16 +1330,27 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); + size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, @@ -1158,10 +1370,19 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1176,15 +1397,23 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; + int ret; if (amdgpu_sriov_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK); else if (adev->powerplay.pp_funcs->get_sclk_od) value = amdgpu_dpm_get_sclk_od(adev); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%d\n", value); } @@ -1203,10 +1432,12 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, ret = kstrtol(buf, 0, &value); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value); @@ -1222,7 +1453,9 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, } } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -1233,15 +1466,23 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; + int ret; if (amdgpu_sriov_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK); else if (adev->powerplay.pp_funcs->get_mclk_od) value = amdgpu_dpm_get_mclk_od(adev); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%d\n", value); } @@ -1260,10 +1501,12 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, ret = kstrtol(buf, 0, &value); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value); @@ -1279,7 +1522,9 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, } } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -1309,16 +1554,27 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; - if (is_support_sw_smu(adev)) - return smu_get_power_profile_mode(&adev->smu, buf); - else if (adev->powerplay.pp_funcs->get_power_profile_mode) - return amdgpu_dpm_get_power_profile_mode(adev, buf); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; - return snprintf(buf, PAGE_SIZE, "\n"); + if (is_support_sw_smu(adev)) + size = smu_get_power_profile_mode(&adev->smu, buf); + else if (adev->powerplay.pp_funcs->get_power_profile_mode) + size = amdgpu_dpm_get_power_profile_mode(adev, buf); + else + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } @@ -1343,7 +1599,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, tmp[1] = '\0'; ret = kstrtol(tmp, 0, &profile_mode); if (ret) - goto fail; + return -EINVAL; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; @@ -1358,23 +1614,30 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, while (tmp_str[0]) { sub_str = strsep(&tmp_str, delimiter); ret = kstrtol(sub_str, 0, ¶meter[parameter_size]); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; parameter_size++; while (isspace(*tmp_str)) tmp_str++; } } parameter[parameter_size] = profile_mode; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true); else if (adev->powerplay.pp_funcs->set_power_profile_mode) ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (!ret) return count; -fail: + return -EINVAL; } @@ -1397,10 +1660,17 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + r = pm_runtime_get_sync(ddev->dev); + if (r < 0) + return r; + /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (r) return r; @@ -1426,10 +1696,17 @@ static ssize_t amdgpu_get_memory_busy_percent(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + r = pm_runtime_get_sync(ddev->dev); + if (r < 0) + return r; + /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (r) return r; @@ -1455,11 +1732,20 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint64_t count0, count1; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + amdgpu_asic_get_pcie_usage(adev, &count0, &count1); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n", count0, count1, pcie_get_mps(adev->pdev)); } @@ -1547,42 +1833,43 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; int channel = to_sensor_dev_attr(attr)->index; int r, temp = 0, size = sizeof(temp); - /* Can't get temperature when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - if (channel >= PP_TEMP_MAX) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + switch (channel) { case PP_TEMP_JUNCTION: /* get current junction temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, (void *)&temp, &size); - if (r) - return r; break; case PP_TEMP_EDGE: /* get current edge temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, (void *)&temp, &size); - if (r) - return r; break; case PP_TEMP_MEM: /* get current memory temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, (void *)&temp, &size); - if (r) - return r; + break; + default: + r = -EINVAL; break; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (r) + return r; + return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -1678,16 +1965,27 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; + int ret; + + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { - if (!adev->powerplay.pp_funcs->get_fan_control_mode) + if (!adev->powerplay.pp_funcs->get_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return sprintf(buf, "%i\n", pwm_mode); } @@ -1697,27 +1995,32 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, size_t count) { struct amdgpu_device *adev = dev_get_drvdata(dev); - int err; + int err, ret; int value; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - err = kstrtoint(buf, 10, &value); if (err) return err; + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, value); } else { - if (!adev->powerplay.pp_funcs->set_fan_control_mode) + if (!adev->powerplay.pp_funcs->set_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } amdgpu_dpm_set_fan_control_mode(adev, value); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return count; } @@ -1744,34 +2047,43 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, u32 value; u32 pwm_mode; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { pr_info("manual fan speed control should be enabled first\n"); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; } err = kstrtou32(buf, 10, &value); - if (err) + if (err) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } value = (value * 100) / 255; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_set_fan_speed_percent(&adev->smu, value); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->set_fan_speed_percent) { + else if (adev->powerplay.pp_funcs->set_fan_speed_percent) err = amdgpu_dpm_set_fan_speed_percent(adev, value); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return count; } @@ -1784,20 +2096,22 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, int err; u32 speed = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_percent(&adev->smu, &speed); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_percent) { + else if (adev->powerplay.pp_funcs->get_fan_speed_percent) err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; speed = (speed * 255) / 100; @@ -1812,20 +2126,22 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, int err; u32 speed = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &speed); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return sprintf(buf, "%i\n", speed); } @@ -1839,8 +2155,16 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, u32 size = sizeof(min_rpm); int r; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, (void *)&min_rpm, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1856,8 +2180,16 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, u32 size = sizeof(max_rpm); int r; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, (void *)&max_rpm, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1872,20 +2204,22 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, int err; u32 rpm = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &rpm); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return sprintf(buf, "%i\n", rpm); } @@ -1899,33 +2233,41 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, u32 value; u32 pwm_mode; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); - if (pwm_mode != AMD_FAN_CTRL_MANUAL) + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -ENODATA; - - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + } err = kstrtou32(buf, 10, &value); + if (err) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return err; + } + + if (is_support_sw_smu(adev)) + err = smu_set_fan_speed_rpm(&adev->smu, value); + else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) + err = amdgpu_dpm_set_fan_speed_rpm(adev, value); + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (err) return err; - if (is_support_sw_smu(adev)) { - err = smu_set_fan_speed_rpm(&adev->smu, value); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) { - err = amdgpu_dpm_set_fan_speed_rpm(adev, value); - if (err) - return err; - } - return count; } @@ -1935,15 +2277,27 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; + int ret; + + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { - if (!adev->powerplay.pp_funcs->get_fan_control_mode) + if (!adev->powerplay.pp_funcs->get_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1); } @@ -1957,12 +2311,6 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, int value; u32 pwm_mode; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - - err = kstrtoint(buf, 10, &value); if (err) return err; @@ -1974,14 +2322,24 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, else return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, pwm_mode); } else { - if (!adev->powerplay.pp_funcs->set_fan_control_mode) + if (!adev->powerplay.pp_funcs->set_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return count; } @@ -1990,18 +2348,20 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 vddgfx; int r, size = sizeof(vddgfx); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&vddgfx, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2020,7 +2380,6 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 vddnb; int r, size = sizeof(vddnb); @@ -2028,14 +2387,17 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, if (!(adev->flags & AMD_IS_APU)) return -EINVAL; - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&vddnb, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2054,19 +2416,21 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 query = 0; int r, size = sizeof(u32); unsigned uw; - /* Can't get power when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2089,16 +2453,27 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; + ssize_t size; + int r; + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; if (is_support_sw_smu(adev)) { smu_get_power_limit(&adev->smu, &limit, true, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + return size; } static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, @@ -2107,16 +2482,27 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; + ssize_t size; + int r; + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; if (is_support_sw_smu(adev)) { smu_get_power_limit(&adev->smu, &limit, false, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + return size; } @@ -2138,13 +2524,20 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, value = value / 1000000; /* convert to Watt */ - if (is_support_sw_smu(adev)) { + + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + + if (is_support_sw_smu(adev)) err = smu_set_power_limit(&adev->smu, value); - } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { + else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); - } else { + else err = -EINVAL; - } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); if (err) return err; @@ -2157,18 +2550,20 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; uint32_t sclk; int r, size = sizeof(sclk); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&sclk, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2187,18 +2582,20 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; uint32_t mclk; int r, size = sizeof(mclk); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, (void *)&mclk, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -3220,8 +3617,12 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - struct drm_device *ddev = adev->ddev; u32 flags = 0; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; amdgpu_device_ip_get_clockgating_state(adev, &flags); seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags); @@ -3230,23 +3631,28 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) if (!adev->pm.dpm_enabled) { seq_printf(m, "dpm not enabled\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); return 0; } - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { - seq_printf(m, "PX asic powered off\n"); - } else if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { + + if (!is_support_sw_smu(adev) && + adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level) adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m); else seq_printf(m, "Debugfs support not implemented for this asic\n"); mutex_unlock(&adev->pm.mutex); + r = 0; } else { - return amdgpu_debugfs_pm_info_pp(m, adev); + r = amdgpu_debugfs_pm_info_pp(m, adev); } - return 0; + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + return r; } static const struct drm_info_list amdgpu_pm_info_list[] = { From a9ffe2a9833835b157a53dcc4d725f71426f55ff Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Jan 2020 15:58:52 -0500 Subject: [PATCH 038/113] drm/amdgpu/debugfs: properly handle runtime pm If driver debugfs files are accessed, power up the GPU when necessary. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 133 ++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 8 ++ 2 files changed, 134 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 63343bb43049..f24ed9a1a3e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -144,10 +145,17 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, *pos &= (1UL << 22) - 1; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || - (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) + (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se_bank, sh_bank, instance_bank); @@ -193,6 +201,9 @@ end: if (pm_pg_lock) mutex_unlock(&adev->pm.mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -237,13 +248,20 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_PCIE(*pos >> 2); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -251,6 +269,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -276,12 +297,19 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_PCIE(*pos >> 2, value); @@ -291,6 +319,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -316,13 +347,20 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_DIDT(*pos >> 2); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -330,6 +368,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -355,12 +396,19 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_DIDT(*pos >> 2, value); @@ -370,6 +418,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -395,13 +446,20 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_SMC(*pos); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -409,6 +467,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -434,12 +495,19 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_SMC(*pos, value); @@ -449,6 +517,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -572,7 +643,16 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, idx = *pos >> 2; valuesize = sizeof(values); + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -633,6 +713,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, wave = (*pos & GENMASK_ULL(36, 31)) >> 31; simd = (*pos & GENMASK_ULL(44, 37)) >> 37; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -644,6 +728,9 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); mutex_unlock(&adev->grbm_idx_mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (!x) return -EINVAL; @@ -711,6 +798,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, if (!data) return -ENOMEM; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -726,6 +817,9 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); mutex_unlock(&adev->grbm_idx_mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + while (size) { uint32_t value; @@ -859,6 +953,10 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; int r = 0, i; + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; + /* Avoid accidently unparking the sched thread during GPU reset */ mutex_lock(&adev->lock_reset); @@ -889,6 +987,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) mutex_unlock(&adev->lock_reset); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } @@ -907,8 +1008,17 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev)); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } @@ -917,8 +1027,17 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT)); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index e9efee04ca23..3c01252b1e0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -741,10 +741,18 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return 0; seq_printf(m, "gpu recover\n"); amdgpu_device_gpu_recover(adev, NULL); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } From a269e44989f3e77fb2ebfb995b8d501924ed07e2 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 17 Dec 2019 16:57:30 -0600 Subject: [PATCH 039/113] drm/amdgpu: Avoid reclaim fs while eviction lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] Avoid reclaim filesystem while eviction lock is held called from MMU notifier. [How] Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked. Using memalloc_nofs_save / memalloc_nofs_restore API. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 +++++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 +++- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 4dc75eda1d91..d16231d6a790 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -82,6 +82,32 @@ struct amdgpu_prt_cb { struct dma_fence_cb cb; }; +/** + * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS + * happens while holding this lock anywhere to prevent deadlocks when + * an MMU notifier runs in reclaim-FS context. + */ +static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) +{ + mutex_lock(&vm->eviction_lock); + vm->saved_flags = memalloc_nofs_save(); +} + +static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) +{ + if (mutex_trylock(&vm->eviction_lock)) { + vm->saved_flags = memalloc_nofs_save(); + return 1; + } + return 0; +} + +static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) +{ + memalloc_nofs_restore(vm->saved_flags); + mutex_unlock(&vm->eviction_lock); +} + /** * amdgpu_vm_level_shift - return the addr shift for each level * @@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } } - mutex_lock(&vm->eviction_lock); + amdgpu_vm_eviction_lock(vm); vm->evicting = false; - mutex_unlock(&vm->eviction_lock); + amdgpu_vm_eviction_unlock(vm); return 0; } @@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_KFD; - mutex_lock(&vm->eviction_lock); + amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; goto error_unlock; @@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, fence); error_unlock: - mutex_unlock(&vm->eviction_lock); + amdgpu_vm_eviction_unlock(vm); return r; } @@ -2533,18 +2559,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return false; /* Try to block ongoing updates */ - if (!mutex_trylock(&bo_base->vm->eviction_lock)) + if (!amdgpu_vm_eviction_trylock(bo_base->vm)) return false; /* Don't evict VM page tables while they are updated */ if (!dma_fence_is_signaled(bo_base->vm->last_direct) || !dma_fence_is_signaled(bo_base->vm->last_delayed)) { - mutex_unlock(&bo_base->vm->eviction_lock); + amdgpu_vm_eviction_unlock(bo_base->vm); return false; } bo_base->vm->evicting = true; - mutex_unlock(&bo_base->vm->eviction_lock); + amdgpu_vm_eviction_unlock(bo_base->vm); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fade4f45320c..b4640ab38c95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -30,6 +30,7 @@ #include #include #include +#include #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -239,9 +240,12 @@ struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; - /* Lock to prevent eviction while we are updating page tables */ + /* Lock to prevent eviction while we are updating page tables + * use vm_eviction_lock/unlock(vm) + */ struct mutex eviction_lock; bool evicting; + unsigned int saved_flags; /* BOs who needs a validation */ struct list_head evicted; From f167ea6a14a24308a34c50c28aec1e1ffe666e22 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 9 Jan 2020 15:36:37 -0600 Subject: [PATCH 040/113] drm/amdgpu: kiq pm4 function implementation for gfx_v9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Functions implemented from kiq_pm4_funcs struct members for gfx_v9 version. Signed-off-by: Alex Sierra Acked-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 115 ++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ac4153c235ad..1883b09dadfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -739,6 +739,120 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, void *inject_if); +static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, + PACKET3_SET_RESOURCES_VMID_MASK(0) | + /* vmid_mask:0* queue_type:0 (KIQ) */ + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); + amdgpu_ring_write(kiq_ring, + lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, + upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ +} + +static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | + /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + /* num_queues: must be 1 */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v9_0_kiq_set_resources, + .kiq_map_queues = gfx_v9_0_kiq_map_queues, + .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, + .kiq_query_status = gfx_v9_0_kiq_query_status, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, +}; + +static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; +} + static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { @@ -4260,6 +4374,7 @@ static int gfx_v9_0_early_init(void *handle) else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + gfx_v9_0_set_kiq_pm4_funcs(adev); gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); gfx_v9_0_set_gds_init(adev); From 58e508b6be82387d22785b8a4e54e50554a91594 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 9 Jan 2020 16:16:28 -0600 Subject: [PATCH 041/113] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10 tlbs invalidate pointer function added to kiq_pm4_funcs struct. This way, tlb flush can be done through kiq member. TLBs invalidatation implemented for gfx9 and gfx10. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 ++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8e88e0411662..af4bd279f42f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -76,11 +76,15 @@ struct kiq_pm4_funcs { struct amdgpu_ring *ring, u64 addr, u64 seq); + void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub); /* Packet sizes */ int set_resources_size; int map_queues_size; int unmap_queues_size; int query_status_size; + int invalidate_tlbs_size; }; struct amdgpu_kiq { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 6bc3b937fba2..1a500bbcdd1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -40,6 +40,7 @@ #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "clearstate_gfx10.h" #include "v10_structs.h" @@ -345,15 +346,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); } +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { .kiq_set_resources = gfx10_kiq_set_resources, .kiq_map_queues = gfx10_kiq_map_queues, .kiq_unmap_queues = gfx10_kiq_unmap_queues, .kiq_query_status = gfx10_kiq_query_status, + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, + .invalidate_tlbs_size = 12, }; static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1883b09dadfc..84b7c14fdc27 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -837,15 +837,29 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); } +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_0_kiq_set_resources, .kiq_map_queues = gfx_v9_0_kiq_map_queues, .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, .kiq_query_status = gfx_v9_0_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, + .invalidate_tlbs_size = 12, }; static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) From 4f01f1e58e073d35d49ca15460d00671c6244323 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 9 Jan 2020 18:14:06 -0600 Subject: [PATCH 042/113] drm/amdgpu: replace kcq enable/disable functions on gfx_v9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] There are HW-indpendent functions that enables and disables kcq. These functions use the kiq_pm4_funcs implementation. [How] Local kcq enable and disable functions removed and replace it by the generic kcq enable under amdgpu_gfx Signed-off-by: Alex Sierra Acked-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 102 +------------------------- 1 file changed, 2 insertions(+), 100 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 84b7c14fdc27..388a38febb4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3234,74 +3234,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } -static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) -{ - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - uint64_t queue_mask = 0; - int r, i; - - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { - if (!test_bit(i, adev->gfx.mec.queue_bitmap)) - continue; - - /* This situation may be hit in the future if a new HW - * generation exposes more than 64 queues. If so, the - * definition of queue_mask needs updating */ - if (WARN_ON(i >= (sizeof(queue_mask)*8))) { - DRM_ERROR("Invalid KCQ enabled: %d\n", i); - break; - } - - queue_mask |= (1ull << i); - } - - r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - return r; - } - - /* set resources */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | - PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ - amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* oac mask */ - amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ - PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ - PACKET3_MAP_QUEUES_QUEUE(ring->queue) | - PACKET3_MAP_QUEUES_PIPE(ring->pipe) | - PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | - PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ - PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ - PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ - amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); - } - - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ enable failed\n"); - - return r; -} - static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -3708,7 +3640,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; } - r = gfx_v9_0_kiq_kcq_enable(adev); + r = amdgpu_gfx_enable_kcq(adev); done: return r; } @@ -3812,36 +3744,6 @@ static int gfx_v9_0_hw_init(void *handle) return r; } -static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) -{ - int r, i; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - - r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); - if (r) - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ - PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | - PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | - PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); - amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - } - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ disable failed\n"); - - return r; -} - static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3853,7 +3755,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) /* disable KCQ to avoid CPC touch memory not valid anymore */ - gfx_v9_0_kcq_disable(adev); + amdgpu_gfx_disable_kcq(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); From ea930000a6dcd161346c957545f7e67b3f77ec06 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 19 Dec 2019 23:40:19 -0600 Subject: [PATCH 043/113] drm/amdgpu: export function to flush TLB via pasid This can be used directly from amdgpu and amdkfd to invalidate TLB through pasid. It supports gmc v7, v8, v9 and v10. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 74 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 +++++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 +++++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 76 +++++++++++++++++++++++++ 5 files changed, 223 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index c91dd602d5f1..d3c27a3c43f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type); + /* flush the vm tlb via pasid */ + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type, bool all_hub); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -216,6 +219,9 @@ struct amdgpu_gmc { }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ + ((adev), (pasid), (type), (allhub))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 5ad89bb6f3ba..8afd05834714 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -30,6 +30,8 @@ #include "hdp/hdp_5_0_0_sh_mask.h" #include "gc/gc_10_1_0_sh_mask.h" #include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_offset.h" #include "dcn/dcn_2_0_0_offset.h" #include "dcn/dcn_2_0_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" @@ -37,6 +39,7 @@ #include "navi10_enum.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "nbio_v2_3.h" @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, (!amdgpu_sriov_vf(adev))); } +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( + struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -380,6 +396,63 @@ error_alloc: DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); } +/** + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid, i; + signed long r; + uint32_t seq; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + if (amdgpu_emu_mode == 0 && ring->sched.ready) { + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, flush_type, all_hub); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; + } + + for (vmid = 1; vmid < 16; vmid++) { + + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + if (all_hub) { + for (i = 0; i < adev->num_vmhubs; i++) + gmc_v10_0_flush_gpu_tlb(adev, vmid, + i, 0); + } else { + gmc_v10_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); + } + break; + } + } + + return 0; +} + static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, .map_mtype = gmc_v10_0_map_mtype, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index f08e5330642d..19d5b133e1d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) return 0; } +/** + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid; + unsigned int tmp; + + if (adev->in_gpu_reset) + return -EIO; + + for (vmid = 1; vmid < 16; vmid++) { + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, .set_prt = gmc_v7_0_set_prt, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 6d96d40fbcb8..27d83204fa2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) return 0; } +/** + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid; + unsigned int tmp; + + if (adev->in_gpu_reset) + return -EIO; + + for (vmid = 1; vmid < 16; vmid++) { + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; + +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, .set_prt = gmc_v8_0_set_prt, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b83c8d745f42..40a496804356 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -38,10 +38,12 @@ #include "dce/dce_12_0_sh_mask.h" #include "vega10_enum.h" #include "mmhub/mmhub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" #include "athub/athub_1_0_offset.h" #include "oss/osssys_4_0_offset.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "umc/umc_6_0_sh_mask.h" @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, adev->pdev->device == 0x15d8))); } +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } +/** + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid, i; + signed long r; + uint32_t seq; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + if (adev->in_gpu_reset) + return -EIO; + + if (ring->sched.ready) { + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, flush_type, all_hub); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; + } + + for (vmid = 1; vmid < 16; vmid++) { + + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + if (all_hub) { + for (i = 0; i < adev->num_vmhubs; i++) + gmc_v9_0_flush_gpu_tlb(adev, vmid, + i, 0); + } else { + gmc_v9_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); + } + break; + } + } + + return 0; + +} + static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, .map_mtype = gmc_v9_0_map_mtype, From ffa022696f46fb6c13e30cad59b7271a687f96b8 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 19 Dec 2019 23:57:03 -0600 Subject: [PATCH 044/113] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] TLB flush method has been deprecated using kfd2kgd interface. This implementation is now on the amdgpu_amdkfd API. [How] TLB flush functions now implemented in amdgpu_amdkfd. Signed-off-by: Alex Sierra Acked-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 ++++-- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 88e10b956413..8609287620ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -628,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + if (adev->family == AMDGPU_FAMILY_AI) { + int i; + + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); + } else { + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); + } + + return 0; +} + +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint32_t flush_type = 0; + bool all_hub = false; + + if (adev->gmc.xgmi.num_physical_nodes && + adev->asic_type == CHIP_VEGA20) + flush_type = 2; + + if (adev->family == AMDGPU_FAMILY_AI) + all_hub = true; + + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); +} + bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 069d5d230810..47b0f2957d1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid); +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 536a153ac9a4..25b90f70aecd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -32,6 +32,7 @@ #include #include #include "amdgpu_amdkfd.h" +#include "amdgpu.h" struct mm_struct; @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, void kfd_flush_tlb(struct kfd_process_device *pdd) { struct kfd_dev *dev = pdd->dev; - const struct kfd2kgd_calls *f2g = dev->kfd2kgd; if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { /* Nothing to flush until a VMID is assigned, which * only happens when the first queue is created. */ if (pdd->qpd.vmid) - f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid); + amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, + pdd->qpd.vmid); } else { - f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid); + amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, + pdd->process->pasid); } } From d175e9acf658334a4a0559a30c39c368a38d9dcd Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Fri, 20 Dec 2019 00:00:35 -0600 Subject: [PATCH 045/113] drm/amdgpu: flush TLB functions removal from kfd2kgd interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] kfd2kgd interface will be deprecated. This removal only covers TLB invalidation for now. They have been replaced in amdgpu_amdkfd API. [How] TLB invalidate functions removed from the different amdkfd_gfx_v* versions. Signed-off-by: Alex Sierra Acked-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 - .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 67 ------------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 41 -------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 41 -------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 96 ------------------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 2 - .../gpu/drm/amd/include/kgd_kfd_interface.h | 2 - 7 files changed, 251 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 86e71fd6ddea..e3cd73cac353 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -320,7 +320,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base, - .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, - .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 61cd707158e4..6132b4874498 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -686,71 +686,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) -{ - signed long r; - uint32_t seq; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - spin_lock(&adev->gfx.kiq.ring_lock); - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); - amdgpu_ring_write(ring, - PACKET3_INVALIDATE_TLBS_DST_SEL(1) | - PACKET3_INVALIDATE_TLBS_PASID(pasid)); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; -} - -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - uint16_t queried_pasid; - bool ret; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - if (amdgpu_emu_mode == 0 && ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid); - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - ret = get_atc_vmid_pasid_mapping_info(kgd, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, 0); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return 0; - } - - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); - return 0; -} - static int kgd_address_watch_disable(struct kgd_dev *kgd) { return 0; @@ -832,7 +767,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { get_atc_vmid_pasid_mapping_info, .get_tile_config = amdgpu_amdkfd_get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 6e6f0a99ec06..8f052e98a3c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -696,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, lower_32_bits(page_table_base)); } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - unsigned int tmp; - - if (adev->in_gpu_reset) - return -EIO; - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid\n"); - return 0; - } - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - return 0; -} - /** * read_vmid_from_vmfault_reg - read vmid from register * @@ -771,7 +732,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index bfbddedb2380..19a10db93d68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -657,45 +657,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, lower_32_bits(page_table_base)); } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - unsigned int tmp; - - if (adev->in_gpu_reset) - return -EIO; - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return -EINVAL; - } - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - return 0; -} - const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -717,6 +678,4 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index e7861f0ef415..932ae85d97e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -617,100 +617,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, - uint32_t flush_type) -{ - signed long r; - uint32_t seq; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - spin_lock(&adev->gfx.kiq.ring_lock); - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); - amdgpu_ring_write(ring, - PACKET3_INVALIDATE_TLBS_DST_SEL(1) | - PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | - PACKET3_INVALIDATE_TLBS_PASID(pasid) | - PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; -} - -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid, i; - uint16_t queried_pasid; - bool ret; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - uint32_t flush_type = 0; - - if (adev->in_gpu_reset) - return -EIO; - if (adev->gmc.xgmi.num_physical_nodes && - adev->asic_type == CHIP_VEGA20) - flush_type = 2; - - if (ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid, flush_type); - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - i, flush_type); - break; - } - } - - return 0; -} - -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int i; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return 0; - } - - /* Use legacy mode tlb invalidation. - * - * Currently on Raven the code below is broken for anything but - * legacy mode due to a MMHUB power gating problem. A workaround - * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ - * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack - * bit. - * - * TODO 1: agree on the right set of invalidation registers for - * KFD use. Use the last one for now. Invalidate both GC and - * MMHUB. - * - * TODO 2: support range-based invalidation, requires kfg2kgd - * interface change - */ - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); - - return 0; -} - int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) { return 0; @@ -793,7 +699,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, - .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, - .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 02b1426d17d1..dfafa28b7559 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -57,7 +57,5 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, uint8_t vmid, uint16_t *p_pasid); -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, struct tile_config *config); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 2cd217e60125..a01ef836ad58 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -307,8 +307,6 @@ struct kfd2kgd_calls { void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); - int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid); - int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid); uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); uint64_t (*get_hive_id)(struct kgd_dev *kgd); From 35cd89d5a658dc26687a7a6909d35fee19a6b173 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 25 Dec 2019 15:50:51 +0800 Subject: [PATCH 046/113] drm/amdkfd: use kiq to load the mqd of hiq queue for gfx v9 (v6) There is an issue that CP will check the HIQ queue to be configured and mapped with KIQ ring, otherwise, it will be unable to read back the secure buffer while the gfxoff is enabled even with trusted IP blocks. v1 -> v2: - Fix to remove surplus set_resources packets. - Fill the whole configuration in MQD. - Change the author as Aaron because he addressed the key point of this issue. - Add kiq ring lock. v2 -> v3: - Free the lock while in error return case. - Remove the programming only needed by the queue is unmapped. v3 -> v4: - Remove doorbell programming because it's used for restarting queue. - Remove CP scheduler programming because map_queue packet will handle this. v4 -> v5: - Remove cp_hqd_active because mec ucode will enable it while use map_queues. - Revise goto out_unlock. - Correct the right doorbell offset for HIQ that kfd driver assigned in the packet. v5 -> v6: - Merge Arcturus fix into this patch because it will get oops in Arcturus platform. Reported-by: Lisa Saturday Signed-off-by: Aaron Liu Signed-off-by: Huang Rui Reviewed-and-Tested-by: Aaron Liu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 79 ++++++++++++++----- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 3 + .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 10 ++- .../gpu/drm/amd/include/kgd_kfd_interface.h | 4 + 5 files changed, 76 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index e3cd73cac353..4bcc175a149d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -305,6 +305,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .init_interrupts = kgd_gfx_v9_init_interrupts, .hqd_load = kgd_gfx_v9_hqd_load, + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_dump = kgd_gfx_v9_hqd_dump, .hqd_sdma_dump = kgd_hqd_sdma_dump, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 932ae85d97e2..8562afe5b761 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -103,13 +103,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, lock_srbm(kgd, mec, pipe, queue_id, 0); } -static uint32_t get_queue_mask(struct amdgpu_device *adev, +static uint64_t get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + - queue_id) & 31; + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id; - return ((uint32_t)1) << bit; + return 1ull << bit; } static void release_queue(struct kgd_dev *kgd) @@ -258,21 +258,6 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, acquire_queue(kgd, pipe_id, queue_id); - /* HIQ is set during driver init period with vmid set to 0*/ - if (m->cp_hqd_vmid == 0) { - uint32_t value, mec, pipe; - - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", - mec, pipe, queue_id); - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); - } - /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); @@ -323,7 +308,7 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), - get_queue_mask(adev, pipe_id, queue_id)); + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ @@ -339,6 +324,59 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } +int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct v9_mqd *m; + uint32_t mec, pipe; + int r; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + + spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 7); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(queue_id) | + PACKET3_MAP_QUEUES_PIPE(pipe) | + PACKET3_MAP_QUEUES_ME((mec - 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq.ring_lock); + release_queue(kgd); + + return r; +} + int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) @@ -684,6 +722,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .init_interrupts = kgd_gfx_v9_init_interrupts, .hqd_load = kgd_gfx_v9_hqd_load, + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_dump = kgd_gfx_v9_hqd_dump, .hqd_sdma_dump = kgd_hqd_sdma_dump, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index dfafa28b7559..63d3e6683dfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -33,6 +33,9 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); +int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off); int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index aa9010995eaf..436b7f518979 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -191,6 +191,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, wptr_shift, 0, mms); } +static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, + queue_id, p->doorbell_off); +} + static void update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { @@ -449,7 +457,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->allocate_mqd = allocate_hiq_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd_hiq_sdma; - mqd->load_mqd = load_mqd; + mqd->load_mqd = hiq_load_mqd_kiq; mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index a01ef836ad58..a607b1034962 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -256,6 +256,10 @@ struct kfd2kgd_calls { uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); + int (*hiq_mqd_load)(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off); + int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd, uint32_t __user *wptr, struct mm_struct *mm); From 8eee00f615f830c0569923ef3047cf9535f82c39 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Fri, 10 Jan 2020 13:54:29 +0800 Subject: [PATCH 047/113] drm/amdkfd: use map_queues for hiq on gfx v10 as well To align with gfx v9, we use the map_queues packet to load hiq MQD. Signed-off-by: Huang Rui Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 82 ++++++++++++++----- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 10 ++- 2 files changed, 70 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 6132b4874498..a7b17c8deb00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -107,13 +107,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, lock_srbm(kgd, mec, pipe, queue_id, 0); } -static uint32_t get_queue_mask(struct amdgpu_device *adev, +static uint64_t get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + - queue_id) & 31; + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id; - return ((uint32_t)1) << bit; + return 1ull << bit; } static void release_queue(struct kgd_dev *kgd) @@ -268,21 +268,6 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); acquire_queue(kgd, pipe_id, queue_id); - /* HIQ is set during driver init period with vmid set to 0*/ - if (m->cp_hqd_vmid == 0) { - uint32_t value, mec, pipe; - - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", - mec, pipe, queue_id); - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); - } - /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); @@ -332,9 +317,10 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, lower_32_bits((uint64_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uint64_t)wptr)); - pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id)); + pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), - get_queue_mask(adev, pipe_id, queue_id)); + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ @@ -350,6 +336,59 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } +static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct v10_compute_mqd *m; + uint32_t mec, pipe; + int r; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + + spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 7); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(queue_id) | + PACKET3_MAP_QUEUES_PIPE(pipe) | + PACKET3_MAP_QUEUES_ME((mec - 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq.ring_lock); + release_queue(kgd); + + return r; +} + static int kgd_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) @@ -752,6 +791,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, + .hiq_mqd_load = kgd_hiq_mqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_dump = kgd_hqd_dump, .hqd_sdma_dump = kgd_hqd_sdma_dump, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 7832ec6e480b..d1d68a51bfb8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -153,6 +153,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, return r; } +static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, + queue_id, p->doorbell_off); +} + static void update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { @@ -409,7 +417,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, mqd->allocate_mqd = allocate_hiq_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd_hiq_sdma; - mqd->load_mqd = load_mqd; + mqd->load_mqd = hiq_load_mqd_kiq; mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; From 55414ad5c983ed708d778ea7b29e80f89750ff73 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 9 Jan 2020 12:07:22 +0100 Subject: [PATCH 048/113] drm/amdgpu: error out on entity with no run queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disabled HW IP's entity initialized with NULL rq. We should not process any submit request from userspace for a disabled HW IP. Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5b330f69194b..7a8772b7caf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -909,6 +909,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (parser->entity && parser->entity != entity) return -EINVAL; + /* Return if there is no run queue associated with this entity. + * Possibly because of disabled HW IP*/ + if (entity->rq == NULL) + return -EINVAL; + parser->entity = entity; ring = to_amdgpu_ring(entity->rq->sched); From 971fe55545de2f67463def381df57d803dddf61d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 16 Dec 2019 14:54:31 +0100 Subject: [PATCH 049/113] drm/amdgpu: drop amdgpu_job.owner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Entirely unused. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 7a8772b7caf7..a52a084158b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1234,7 +1234,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, goto error_abort; } - job->owner = p->filp; p->fence = dma_fence_get(&job->base.s_fence->finished); amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 73328d0c741d..d42be880a236 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -153,7 +153,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, if (r) return r; - job->owner = owner; *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); priority = job->base.s_priority; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index dc7ee9358dcd..3f7b8433d179 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -49,7 +49,6 @@ struct amdgpu_job { uint32_t preamble_status; uint32_t preemption_status; uint32_t num_ibs; - void *owner; bool vm_needs_flush; uint64_t vm_pd_addr; unsigned vmid; From b4df2823ec10c695ae4218965d55ac8909c8842c Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Fri, 10 Jan 2020 13:27:23 -0500 Subject: [PATCH 050/113] drm/amdgpu: check rlc_g firmware pointer is valid before using it In SRIOV, rlc_g firmware is loaded by host, guest driver won't load it which will cause the rlc_fw pointer is null Signed-off-by: shaoyunl Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 1a500bbcdd1d..95f56b541e75 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -822,10 +822,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; info->ucode_id = AMDGPU_UCODE_ID_RLC_G; info->fw = adev->gfx.rlc_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - + if (info->fw) { + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } if (adev->gfx.rlc.is_rlc_v2_1 && adev->gfx.rlc.save_restore_list_cntl_size_bytes && adev->gfx.rlc.save_restore_list_gpm_size_bytes && From 7b7041f89218d9d81de48a9dc365667f0b9e56b4 Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Tue, 14 Jan 2020 19:17:19 +0800 Subject: [PATCH 051/113] drm/amdgpu/gfx10: update gfx golden settings remove registers: mmSPI_CONFIG_CNTL add registers: mmSPI_CONFIG_CNTL_1 Reviewed-by: Alex Deucher Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 95f56b541e75..5317cc156dab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -121,7 +121,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), From 6a1094ab6882cc734af406a4815bd70b75d9b989 Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Tue, 14 Jan 2020 19:19:40 +0800 Subject: [PATCH 052/113] drm/amdgpu/gfx10: update gfx golden settings for navi14 remove registers: mmSPI_CONFIG_CNTL add registers: mmSPI_CONFIG_CNTL_1 Reviewed-by: Alex Deucher Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 5317cc156dab..468f1b1d41e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -169,7 +169,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070105), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070105), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), From 9e441478623fd913d4340654682b19f0c24e629d Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Thu, 9 Jan 2020 15:38:39 +0800 Subject: [PATCH 053/113] drm/amdgpu: fix modprobe failure of the secondary GPU when GDDR6 training enabled(V5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [why] In dual GPUs scenario, stolen_size is assigned to zero on the secondary GPU, since there is no pre-OS console using that memory. Then the bottom region of VRAM was allocated as GTT, unfortunately a small region of bottom VRAM was encroached by UMC firmware during GDDR6 BIST training, this cause page fault. [how] Forcing stolen_size to 3MB, then the bottom region of VRAM was allocated as stolen memory, GTT corruption avoid. Reviewed-by: Christian König Reviewed-by: Feifei Xu Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 5 +++++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 27 ++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index d3c27a3c43f6..86267baca07c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -60,6 +60,11 @@ */ #define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL +/* + * Default stolen memory size, 1024 * 768 * 4 + */ +#define AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE 0x300000ULL + struct firmware; /* diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 8afd05834714..bbede09983e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -640,7 +640,12 @@ static int gmc_v10_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - amdgpu_bo_late_init(adev); + /* + * Can't free the stolen VGA memory when it might be used for memory + * training again. + */ + if (!adev->fw_vram_usage.mem_train_support) + amdgpu_bo_late_init(adev); r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) @@ -824,6 +829,19 @@ static int gmc_v10_0_sw_init(void *handle) adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev); + /* + * In dual GPUs scenario, stolen_size is assigned to zero on the + * secondary GPU, since there is no pre-OS console using that memory. + * Then the bottom region of VRAM was allocated as GTT, unfortunately a + * small region of bottom VRAM was encroached by UMC firmware during + * GDDR6 BIST training, this cause page fault. + * The page fault can be fixed by forcing stolen_size to 3MB, then the + * bottom region of VRAM was allocated as stolen memory, GTT corruption + * avoid. + */ + adev->gmc.stolen_size = max(adev->gmc.stolen_size, + AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE); + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) @@ -863,6 +881,13 @@ static void gmc_v10_0_gart_fini(struct amdgpu_device *adev) static int gmc_v10_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + void *stolen_vga_buf; + + /* + * Free the stolen memory if it wasn't already freed in late_init + * because of memory training. + */ + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); amdgpu_vm_manager_fini(adev); gmc_v10_0_gart_fini(adev); From 1291bd70a2ee4507549bbc6379efbfb290e8f85a Mon Sep 17 00:00:00 2001 From: Jack Zhang Date: Wed, 15 Jan 2020 17:03:31 +0800 Subject: [PATCH 054/113] drm/amdgpu/sriov skip the update of SMU_TABLE_ACTIVITY_MONITOR_COEFF There's no need to dump ACTIVITY_MONITOR_COEFF under VF. Therefore, Skip the update of SMU_TABLE_ACTIVITY_MONITOR_COEFF under SRIOV VF. Signed-off-by: Jack Zhang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 1c15c6fbe3b9..14ba6aa876e2 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1318,6 +1318,7 @@ static int arcturus_get_power_limit(struct smu_context *smu, static int arcturus_get_power_profile_mode(struct smu_context *smu, char *buf) { + struct amdgpu_device *adev = smu->adev; DpmActivityMonitorCoeffInt_t activity_monitor; static const char *profile_name[] = { "BOOTUP_DEFAULT", @@ -1351,7 +1352,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, if (result) return result; - if (smu_version >= 0x360d00) + if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n", title[0], title[1], title[2], title[3], title[4], title[5], title[6], title[7], title[8], title[9], title[10]); @@ -1368,7 +1369,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, if (workload_type < 0) continue; - if (smu_version >= 0x360d00) { + if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) { result = smu_update_table(smu, SMU_TABLE_ACTIVITY_MONITOR_COEFF, workload_type, @@ -1383,7 +1384,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, size += sprintf(buf + size, "%2d %14s%s\n", i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); - if (smu_version >= 0x360d00) { + if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) { size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 0, From 597e6ac3a776c0bf86038a627b3392a52e8a51ab Mon Sep 17 00:00:00 2001 From: James Zhu Date: Mon, 13 Jan 2020 16:40:00 -0500 Subject: [PATCH 055/113] drm/amdgpu/vcn: support multiple-instance dpg pause mode Add multiple-instance dpg pause mode support for VCN2.5 Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index ed106d99d078..99df6930ddf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -298,7 +298,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, j, &new_state); } fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); @@ -341,7 +341,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) new_state.fw_based = VCN_DPG_STATE__PAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index e6dee8224d33..26c6623b4b4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -199,7 +199,7 @@ struct amdgpu_vcn { unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 3b025a3f8c7d..a70351f2740c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -50,7 +50,7 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); static void vcn_v1_0_idle_work_handler(struct work_struct *work); @@ -1199,7 +1199,7 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) } static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) + int inst_idx, struct dpg_pause_state *new_state) { int ret_code; uint32_t reg_data = 0; @@ -1786,7 +1786,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work) else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, 0, &new_state); } fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec); @@ -1840,7 +1840,7 @@ void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring) else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) new_state.jpeg = VCN_DPG_STATE__PAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, 0, &new_state); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index d76ece38c97b..dcdc7adfd884 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -58,7 +58,7 @@ static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v2_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); /** * vcn_v2_0_early_init - set function pointers @@ -1135,7 +1135,7 @@ power_off: } static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) + int inst_idx, struct dpg_pause_state *new_state) { struct amdgpu_ring *ring; uint32_t reg_data = 0; From 5db86843e8bf0e14f648195a26dad54a9c32f9bd Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 15 Jan 2020 11:36:49 -0500 Subject: [PATCH 056/113] drm/amdgpu/vcn: support multiple instance direct SRAM read and write (v2) Add multiple instance direct SRAM read and write support for vcn2.5 v2: squash in indexing fix Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 27 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 46 ++++++------ drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 94 ++++++++++++------------- 3 files changed, 83 insertions(+), 84 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 99df6930ddf1..18a6c357a3ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -165,15 +165,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); return r; } - } - if (adev->vcn.indirect_sram) { - r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r); - return r; + if (adev->vcn.indirect_sram) { + r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo, + &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr); + if (r) { + dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); + return r; + } } } @@ -186,15 +186,14 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) cancel_delayed_work_sync(&adev->vcn.idle_work); - if (adev->vcn.indirect_sram) { - amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, - (void **)&adev->vcn.dpg_sram_cpu_addr); - } - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { if (adev->vcn.harvest_config & (1 << j)) continue; + if (adev->vcn.indirect_sram) { + amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo, + &adev->vcn.inst[j].dpg_sram_gpu_addr, + (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); + } kvfree(adev->vcn.inst[j].saved_bo); amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 26c6623b4b4f..5ce13c06b1ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -104,27 +104,27 @@ internal_reg_offset >>= 2; \ }) -#define RREG32_SOC15_DPG_MODE_2_0(offset, mask_en) \ - ({ \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ - (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ - mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ - offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ - RREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA); \ +#define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) \ + ({ \ + WREG32_SOC15(VCN, inst, mmUVD_DPG_LMA_CTL, \ + (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \ }) -#define WREG32_SOC15_DPG_MODE_2_0(offset, value, mask_en, indirect) \ - do { \ - if (!indirect) { \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ - (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ - mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ - offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ - } else { \ - *adev->vcn.dpg_sram_curr_addr++ = offset; \ - *adev->vcn.dpg_sram_curr_addr++ = value; \ - } \ +#define WREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \ + } \ } while (0) enum engine_status_constants { @@ -173,6 +173,10 @@ struct amdgpu_vcn_inst { struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; struct amdgpu_irq_src irq; struct amdgpu_vcn_reg external; + struct amdgpu_bo *dpg_sram_bo; + void *dpg_sram_cpu_addr; + uint64_t dpg_sram_gpu_addr; + uint32_t *dpg_sram_curr_addr; }; struct amdgpu_vcn { @@ -184,10 +188,6 @@ struct amdgpu_vcn { struct dpg_pause_state pause_state; bool indirect_sram; - struct amdgpu_bo *dpg_sram_bo; - void *dpg_sram_cpu_addr; - uint64_t dpg_sram_gpu_addr; - uint32_t *dpg_sram_curr_addr; uint8_t num_vcn_inst; struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index dcdc7adfd884..9ff59ac9b5fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -356,88 +356,88 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } offset = 0; } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); offset = size; - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); } if (!indirect) - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); else - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); /* cache window 1: stack */ if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); /* cache window 2: context */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); /* non-cache window */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } @@ -583,19 +583,19 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, UVD_CGC_CTRL__WCB_MODE_MASK | UVD_CGC_CTRL__VCPU_MODE_MASK | UVD_CGC_CTRL__SCPU_MODE_MASK); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); /* turn off clock gating */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); /* turn on SUVD clock gating */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); /* turn on sw mode in UVD_SUVD_CGC_CTRL */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); } @@ -759,7 +759,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp); if (indirect) - adev->vcn.dpg_sram_curr_addr = (uint32_t*)adev->vcn.dpg_sram_cpu_addr; + adev->vcn.inst->dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst->dpg_sram_cpu_addr; /* enable clock gating */ vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect); @@ -768,11 +768,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK; - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); /* disable master interupt */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect); /* setup mmUVD_LMI_CTRL */ @@ -784,28 +784,28 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 0x00100000L); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_CNTL), 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUXA0), ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUXB0), ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUX), ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | @@ -813,29 +813,29 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) vcn_v2_0_mc_resume_dpg_mode(adev, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); /* release VCPU reset to boot */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect); /* enable LMI MC and UMC channels */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_CTRL2), 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect); /* enable master interrupt */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) - psp_update_vcn_sram(adev, 0, adev->vcn.dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.dpg_sram_curr_addr - - (uintptr_t)adev->vcn.dpg_sram_cpu_addr)); + psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr)); /* force RBC into idle state */ rb_bufsz = order_base_2(ring->ring_size); From 45cec87cd6987eb17d897ceceb153cc449261289 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 15 Jan 2020 11:38:57 -0500 Subject: [PATCH 057/113] drm/amdgpu/vcn: move macro from vcn2.0 to share amdgpu_vcn (v2) Move macro from vcn2.0 to amdgpu_vcn to share with vcn2.5 v2: squash in macro fix Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 5 +++++ drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 14 +++++++------- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 5 ----- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 5ce13c06b1ed..c4984c5fb2db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -57,6 +57,11 @@ #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 +#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b +#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 +#define mmUVD_REG_XX_MASK 0x026c +#define mmUVD_REG_XX_MASK_BASE_IDX 1 + /* 1 second timeout */ #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index a70351f2740c..e654938f6cca 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -39,10 +39,10 @@ #include "ivsrcid/vcn/irqsrcs_vcn_1_0.h" #include "jpeg_v1_0.h" -#define mmUVD_RBC_XX_IB_REG_CHECK 0x05ab -#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 -#define mmUVD_REG_XX_MASK 0x05ac -#define mmUVD_REG_XX_MASK_BASE_IDX 1 +#define mmUVD_RBC_XX_IB_REG_CHECK_1_0 0x05ab +#define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX 1 +#define mmUVD_REG_XX_MASK_1_0 0x05ac +#define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1 static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); @@ -835,9 +835,9 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) vcn_v1_0_mc_resume_spg_mode(adev); - WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK, 0x10); - WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, - RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK) | 0x3); + WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10); + WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0, + RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0) | 0x3); /* enable VCPU clock */ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 9ff59ac9b5fb..f4db8af6536b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -47,11 +47,6 @@ #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 -#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b -#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 -#define mmUVD_REG_XX_MASK 0x026c -#define mmUVD_REG_XX_MASK_BASE_IDX 1 - static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); From d2a2c64f53718a995fd9a12171ba7ea10e80e96c Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 18 Dec 2019 14:17:56 -0500 Subject: [PATCH 058/113] drm/amdgpu/vcn2.5: add DPG mode start and stop Add DPG mode start and stop functions for vcn2.5 v2: Correct firmware ucode index in vcn_v2_5_mc_resume_dpg_mode Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 290 +++++++++++++++++++++++++- 1 file changed, 288 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index fa9024988918..2345eb3acb8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -286,7 +286,8 @@ static int vcn_v2_5_hw_init(void *handle) done: if (!r) - DRM_INFO("VCN decode and encode initialized successfully.\n"); + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); return r; } @@ -309,7 +310,9 @@ static int vcn_v2_5_hw_fini(void *handle) continue; ring = &adev->vcn.inst[i].ring_dec; - if (RREG32_SOC15(VCN, i, mmUVD_STATUS)) + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, mmUVD_STATUS))) vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); ring->sched.ready = false; @@ -418,6 +421,78 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) } } +static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + offset = 0; + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + + /* cache window 1: stack */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + /** * vcn_v2_5_disable_clock_gating - disable VCN clock gating * @@ -536,6 +611,54 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) } } +static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev, + uint8_t sram_sel, int inst_idx, uint8_t indirect) +{ + uint32_t reg_data = 0; + + /* enable sw clock gating control */ + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); + + /* turn off clock gating */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect); + + /* turn on SUVD clock gating */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); + + /* turn on sw mode in UVD_SUVD_CGC_CTRL */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); +} + /** * vcn_v2_5_enable_clock_gating - enable VCN clock gating * @@ -598,6 +721,130 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) } } +static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + struct amdgpu_ring *ring; + uint32_t rb_bufsz, tmp; + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp); + + /* enable clock gating */ + vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interupt */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect); + + /* setup mmUVD_LMI_CTRL */ + tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MPC_CNTL), + 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MPC_SET_MUXA0), + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MPC_SET_MUXB0), + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MPC_SET_MUX), + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); + + vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); + + /* enable LMI MC and UMC channels */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect); + + /* unblock VCPU register access */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + ring = &adev->vcn.inst[inst_idx].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0); + + WREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2, 0); + + ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + return 0; +} + static int vcn_v2_5_start(struct amdgpu_device *adev) { struct amdgpu_ring *ring; @@ -610,6 +857,9 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v2_5_start_dpg_mode(adev, i, 0); + /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -973,6 +1223,35 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table); } +static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + int ret_code = 0; + uint32_t tmp; + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2); + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return 0; +} + static int vcn_v2_5_stop(struct amdgpu_device *adev) { uint32_t tmp; @@ -981,6 +1260,12 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_5_stop_dpg_mode(adev, i); + goto power_off; + } + /* wait for vcn idle */ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); if (r) @@ -1030,6 +1315,7 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } +power_off: if (adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, false); From 8484df9601d52b548e29763638163391e175ea9f Mon Sep 17 00:00:00 2001 From: James Zhu Date: Mon, 13 Jan 2020 17:12:04 -0500 Subject: [PATCH 059/113] drm/amdgpu/vcn2.5: add dpg pause mode Add dpg pause mode support for vcn2.5 Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 70 +++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 2345eb3acb8f..c09490d0765f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -55,6 +55,8 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v2_5_set_powergating_state(void *handle, enum amd_powergating_state state); +static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); static int amdgpu_ih_clientid_vcns[] = { @@ -212,6 +214,9 @@ static int vcn_v2_5_sw_init(void *handle) return r; } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode; + return 0; } @@ -1322,6 +1327,67 @@ power_off: return 0; } +static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state) +{ + struct amdgpu_ring *ring; + uint32_t reg_data = 0; + int ret_code; + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d -> %d", + adev->vcn.pause_state.fw_based, new_state->fw_based); + reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.inst[inst_idx].ring_enc[1]; + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); + + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, + 0x0, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + /** * vcn_v2_5_dec_ring_get_rptr - get read pointer * @@ -1364,6 +1430,10 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + WREG32_SOC15(UVD, ring->me, mmUVD_SCRATCH2, + lower_32_bits(ring->wptr) | 0x80000000); + if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); From 0c0dab86d9792576301444af211eb2b2c9131bcc Mon Sep 17 00:00:00 2001 From: James Zhu Date: Mon, 13 Jan 2020 17:15:36 -0500 Subject: [PATCH 060/113] drm/amdgpu/vcn2.5: implement indirect DPG SRAM mode Implement indirect DPG SRAM mode for vcn2.5 Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 3 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 69 ++++++++++++++++++------- 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 18a6c357a3ce..f96464e2c157 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -75,6 +75,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case CHIP_ARCTURUS: fw_name = FIRMWARE_ARCTURUS; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; break; case CHIP_RENOIR: fw_name = FIRMWARE_RENOIR; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index c09490d0765f..c8b63d57a541 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -433,14 +433,23 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } offset = 0; } else { WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( @@ -455,19 +464,31 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); } - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + if (!indirect) + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); /* cache window 1: stack */ - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( - UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); - + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); @@ -740,6 +761,9 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp); + if (indirect) + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + /* enable clock gating */ vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); @@ -815,6 +839,11 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo UVD, inst_idx, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + if (indirect) + psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); + ring = &adev->vcn.inst[inst_idx].ring_dec; /* force RBC into idle state */ rb_bufsz = order_base_2(ring->ring_size); @@ -863,7 +892,7 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - return vcn_v2_5_start_dpg_mode(adev, i, 0); + return vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram); /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, From 56822db194232c089601728d68ed078dccb97f8b Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 15 Jan 2020 15:06:04 +0100 Subject: [PATCH 061/113] drm/scheduler: improve job distribution with multiple queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch uses score based logic to select a new rq for better loadbalance between multiple rq/scheds instead of num_jobs. Below are test results after running amdgpu_test from mesa drm Before this patch: sched_name num of many times it got scheduled ========= ================================== sdma0 314 sdma1 32 comp_1.0.0 56 comp_1.0.1 0 comp_1.1.0 0 comp_1.1.1 0 comp_1.2.0 0 comp_1.2.1 0 comp_1.3.0 0 comp_1.3.1 0 After this patch: sched_name num of many times it got scheduled ========= ================================== sdma0 216 sdma1 185 comp_1.0.0 39 comp_1.0.1 9 comp_1.1.0 12 comp_1.1.1 0 comp_1.2.0 12 comp_1.2.1 0 comp_1.3.0 12 comp_1.3.1 0 Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/sched_entity.c | 10 +++++----- drivers/gpu/drm/scheduler/sched_main.c | 6 ++++-- include/drm/gpu_scheduler.h | 6 +++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 2e3a058fc239..33e2cd1089a2 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -130,7 +130,7 @@ static struct drm_sched_rq * drm_sched_entity_get_free_sched(struct drm_sched_entity *entity) { struct drm_sched_rq *rq = NULL; - unsigned int min_jobs = UINT_MAX, num_jobs; + unsigned int min_score = UINT_MAX, num_score; int i; for (i = 0; i < entity->num_sched_list; ++i) { @@ -141,9 +141,9 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity) continue; } - num_jobs = atomic_read(&sched->num_jobs); - if (num_jobs < min_jobs) { - min_jobs = num_jobs; + num_score = atomic_read(&sched->score); + if (num_score < min_score) { + min_score = num_score; rq = &entity->sched_list[i]->sched_rq[entity->priority]; } } @@ -498,7 +498,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, bool first; trace_drm_sched_job(sched_job, entity); - atomic_inc(&entity->rq->sched->num_jobs); + atomic_inc(&entity->rq->sched->score); WRITE_ONCE(entity->last_user, current->group_leader); first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 3fad5876a13f..71ce6215956f 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -92,6 +92,7 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq, if (!list_empty(&entity->list)) return; spin_lock(&rq->lock); + atomic_inc(&rq->sched->score); list_add_tail(&entity->list, &rq->entities); spin_unlock(&rq->lock); } @@ -110,6 +111,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, if (list_empty(&entity->list)) return; spin_lock(&rq->lock); + atomic_dec(&rq->sched->score); list_del_init(&entity->list); if (rq->current_entity == entity) rq->current_entity = NULL; @@ -655,7 +657,7 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) struct drm_gpu_scheduler *sched = s_fence->sched; atomic_dec(&sched->hw_rq_count); - atomic_dec(&sched->num_jobs); + atomic_dec(&sched->score); trace_drm_sched_process_job(s_fence); @@ -830,7 +832,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, spin_lock_init(&sched->job_list_lock); atomic_set(&sched->hw_rq_count, 0); INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); - atomic_set(&sched->num_jobs, 0); + atomic_set(&sched->score, 0); atomic64_set(&sched->job_id_count, 0); /* Each scheduler will run on a seperate kernel thread */ diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 96a1a1b7526e..537f7a4655a5 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -261,7 +261,7 @@ struct drm_sched_backend_ops { * @job_list_lock: lock to protect the ring_mirror_list. * @hang_limit: once the hangs by a job crosses this limit then it is marked * guilty and it will be considered for scheduling further. - * @num_jobs: the number of jobs in queue in the scheduler + * @score: score to help loadbalancer pick a idle sched * @ready: marks if the underlying HW is ready to work * @free_guilty: A hit to time out handler to free the guilty job. * @@ -282,8 +282,8 @@ struct drm_gpu_scheduler { struct list_head ring_mirror_list; spinlock_t job_list_lock; int hang_limit; - atomic_t num_jobs; - bool ready; + atomic_t score; + bool ready; bool free_guilty; }; From 27414cd42a48a4b12897d8e427dc919e4cf41378 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Jan 2020 11:04:27 -0500 Subject: [PATCH 062/113] drm/amdgpu/pm: clean up return types count is size_t so don't use negative values. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 39 ++++++++++++++++---------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 806e731c1ff4..b03b1eb7ba04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -206,10 +206,8 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, state = POWER_STATE_TYPE_BALANCED; else if (strncmp("performance", buf, strlen("performance")) == 0) state = POWER_STATE_TYPE_PERFORMANCE; - else { - count = -EINVAL; - goto fail; - } + else + return -EINVAL; ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) @@ -231,8 +229,6 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - -fail: return count; } @@ -399,8 +395,11 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, if (is_support_sw_smu(adev)) { ret = smu_force_performance_level(&adev->smu, level); - if (ret) - count = -EINVAL; + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } } else if (adev->powerplay.pp_funcs->force_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->pm.dpm.thermal_active) { @@ -410,10 +409,14 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, return -EINVAL; } ret = amdgpu_dpm_force_performance_level(adev, level); - if (ret) - count = -EINVAL; - else + if (ret) { + mutex_unlock(&adev->pm.mutex); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } else { adev->pm.dpm.forced_level = level; + } mutex_unlock(&adev->pm.mutex); } pm_runtime_mark_last_busy(ddev->dev); @@ -890,12 +893,18 @@ static ssize_t amdgpu_set_pp_feature_status(struct device *dev, if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); - if (ret) - count = -EINVAL; + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); - if (ret) - count = -EINVAL; + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } } pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); From 52055039297622f164ed2cead43954bb3e29e1b2 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 10 Jan 2020 14:15:52 -0500 Subject: [PATCH 063/113] drm/amdkfd: Add a message when SW scheduler is used SW scheduler is previously called non HW scheduler, or non HWS. This message is useful when triaging issues from dmesg. Signed-off-by: Yong Zhao Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d7eb6ac37f62..2870553a2ce0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -934,6 +934,7 @@ static void uninitialize(struct device_queue_manager *dqm) static int start_nocpsch(struct device_queue_manager *dqm) { + pr_info("SW scheduler is used"); init_interrupts(dqm); if (dqm->dev->device_info->asic_family == CHIP_HAWAII) From 0e5b7a952818e20a6cd0f90096e7c968616b1418 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Fri, 10 Jan 2020 11:40:36 +0800 Subject: [PATCH 064/113] drm/amdgpu: only set cp active field for kiq queue The mec ucode will set the CP_HQD_ACTIVE bit while the queue is mapped by MAP_QUEUES packet. So we only need set cp active field for kiq queue. Signed-off-by: Huang Rui Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 +++++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 468f1b1d41e7..874f641de281 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3337,8 +3337,11 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index cfc1403fc855..46f0533ba43f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4558,8 +4558,11 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 388a38febb4e..46ab46757b25 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3370,8 +3370,11 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } From 2ac0d686971aad123bb5be07183b7623e99e4e68 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 15 Jan 2020 17:07:21 +0800 Subject: [PATCH 065/113] drm/amd/powerplay: a quick fix for the deadlock issue below NFO: task ocltst:2028 blocked for more than 120 seconds. Tainted: G OE 5.0.0-37-generic #40~18.04.1-Ubuntu echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. cltst D 0 2028 2026 0x00000000 all Trace: __schedule+0x2c0/0x870 schedule+0x2c/0x70 schedule_preempt_disabled+0xe/0x10 __mutex_lock.isra.9+0x26d/0x4e0 __mutex_lock_slowpath+0x13/0x20 ? __mutex_lock_slowpath+0x13/0x20 mutex_lock+0x2f/0x40 amdgpu_dpm_set_powergating_by_smu+0x64/0xe0 [amdgpu] gfx_v8_0_enable_gfx_static_mg_power_gating+0x3c/0x70 [amdgpu] gfx_v8_0_set_powergating_state+0x66/0x260 [amdgpu] amdgpu_device_ip_set_powergating_state+0x62/0xb0 [amdgpu] pp_dpm_force_performance_level+0xe7/0x100 [amdgpu] amdgpu_set_dpm_forced_performance_level+0x129/0x330 [amdgpu] Fixes: a64c9e15e624 ("drm/amd/powerplay: cleanup the interfaces for powergate setting through SMU") Signed-off-by: Evan Quan Reported-by: Rui Teng Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c | 58 ++++++++++++++++++------- 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 6c7dca1da992..a2e8c3dfb4f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -946,23 +946,54 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block bool swsmu = is_support_sw_smu(adev); switch (block_type) { - case AMD_IP_BLOCK_TYPE_GFX: case AMD_IP_BLOCK_TYPE_UVD: - case AMD_IP_BLOCK_TYPE_VCN: case AMD_IP_BLOCK_TYPE_VCE: - case AMD_IP_BLOCK_TYPE_SDMA: if (swsmu) { ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); - } else { - if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_powergating_by_smu) { - mutex_lock(&adev->pm.mutex); - ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( - (adev)->powerplay.pp_handle, block_type, gate)); - mutex_unlock(&adev->pm.mutex); - } + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) { + /* + * TODO: need a better lock mechanism + * + * Here adev->pm.mutex lock protection is enforced on + * UVD and VCE cases only. Since for other cases, there + * may be already lock protection in amdgpu_pm.c. + * This is a quick fix for the deadlock issue below. + * NFO: task ocltst:2028 blocked for more than 120 seconds. + * Tainted: G OE 5.0.0-37-generic #40~18.04.1-Ubuntu + * echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. + * cltst D 0 2028 2026 0x00000000 + * all Trace: + * __schedule+0x2c0/0x870 + * schedule+0x2c/0x70 + * schedule_preempt_disabled+0xe/0x10 + * __mutex_lock.isra.9+0x26d/0x4e0 + * __mutex_lock_slowpath+0x13/0x20 + * ? __mutex_lock_slowpath+0x13/0x20 + * mutex_lock+0x2f/0x40 + * amdgpu_dpm_set_powergating_by_smu+0x64/0xe0 [amdgpu] + * gfx_v8_0_enable_gfx_static_mg_power_gating+0x3c/0x70 [amdgpu] + * gfx_v8_0_set_powergating_state+0x66/0x260 [amdgpu] + * amdgpu_device_ip_set_powergating_state+0x62/0xb0 [amdgpu] + * pp_dpm_force_performance_level+0xe7/0x100 [amdgpu] + * amdgpu_set_dpm_forced_performance_level+0x129/0x330 [amdgpu] + */ + mutex_lock(&adev->pm.mutex); + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate)); + mutex_unlock(&adev->pm.mutex); } break; + case AMD_IP_BLOCK_TYPE_GFX: + case AMD_IP_BLOCK_TYPE_VCN: + case AMD_IP_BLOCK_TYPE_SDMA: + if (swsmu) + ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate)); + break; case AMD_IP_BLOCK_TYPE_JPEG: if (swsmu) ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); @@ -970,12 +1001,9 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block case AMD_IP_BLOCK_TYPE_GMC: case AMD_IP_BLOCK_TYPE_ACP: if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_powergating_by_smu) { - mutex_lock(&adev->pm.mutex); + adev->powerplay.pp_funcs->set_powergating_by_smu) ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( (adev)->powerplay.pp_handle, block_type, gate)); - mutex_unlock(&adev->pm.mutex); - } break; default: break; From 93af20f74e8eb4077fecdcc6e8093f13f0059bc9 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 16 Jan 2020 12:39:15 +0800 Subject: [PATCH 066/113] drm/amdgpu: check if driver should try recovery in ras recovery path To allow the flexibilty for user to disable gpu recovery in RAS recovery path by module parameter amdgpu_gpu_recovery Signed-off-by: Hawking Zhang Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 991c4eaac244..766be7f18282 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1353,7 +1353,8 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_ras *ras = container_of(work, struct amdgpu_ras, recovery_work); - amdgpu_device_gpu_recover(ras->adev, 0); + if (amdgpu_device_should_recover_gpu(ras->adev)) + amdgpu_device_gpu_recover(ras->adev, 0); atomic_set(&ras->in_recovery, 0); } From e9d4cf918f70c6df87265d561aeab8d73397771b Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 16 Jan 2020 12:39:50 +0800 Subject: [PATCH 067/113] drm/amdgpu: add arcturus to gpu recovery check code path support check if dirver should try gpu recovery for arcturus Signed-off-by: Hawking Zhang Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1bbea9669204..d3eff4c6289a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3758,6 +3758,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_RAVEN: + case CHIP_ARCTURUS: break; default: goto disabled; From 9e3e90c50dd34fe961dc662f37ee9640e04cba97 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 14 Jan 2020 10:38:42 +0100 Subject: [PATCH 068/113] drm/scheduler: fix documentation by replacing rq_list with sched_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also replaces old artifacts with a correct one in drm_sched_entity_init() declaration Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/sched_entity.c | 2 +- include/drm/gpu_scheduler.h | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 33e2cd1089a2..ec79e8e5ad3c 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -45,7 +45,7 @@ * @guilty: atomic_t set to 1 when a job on this queue * is found to be guilty causing a timeout * - * Note: the rq_list should have atleast one element to schedule + * Note: the sched_list should have atleast one element to schedule * the entity * * Returns 0 on success or a negative error code on failure. diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 537f7a4655a5..9e71be129c30 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -52,9 +52,9 @@ enum drm_sched_priority { * @list: used to append this struct to the list of entities in the * runqueue. * @rq: runqueue on which this entity is currently scheduled. - * @rq_list: a list of run queues on which jobs from this entity can - * be scheduled - * @num_rq_list: number of run queues in the rq_list + * @sched_list: a list of drm_gpu_schedulers on which jobs from this entity can + * be scheduled + * @num_sched_list: number of drm_gpu_schedulers in the sched_list. * @rq_lock: lock to modify the runqueue to which this entity belongs. * @job_queue: the list of jobs of this entity. * @fence_seq: a linearly increasing seqno incremented with each @@ -81,8 +81,8 @@ enum drm_sched_priority { struct drm_sched_entity { struct list_head list; struct drm_sched_rq *rq; - unsigned int num_sched_list; struct drm_gpu_scheduler **sched_list; + unsigned int num_sched_list; enum drm_sched_priority priority; spinlock_t rq_lock; @@ -315,7 +315,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, int drm_sched_entity_init(struct drm_sched_entity *entity, enum drm_sched_priority priority, struct drm_gpu_scheduler **sched_list, - unsigned int num_rq_list, + unsigned int num_sched_list, atomic_t *guilty); long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout); void drm_sched_entity_fini(struct drm_sched_entity *entity); From d1dcb05f0e45f97061d5c1f921aa038f1ae92c7b Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 7 Jan 2020 15:28:43 -0500 Subject: [PATCH 069/113] drm/amd/include: Add OCSC registers Add registers for handling Post Gamma Color Blending (OCSC), which is useful for conversion from RGB->YUV for HDMI. Reviewed-by: Leo Li Acked-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h | 4 ++++ .../gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h | 9 ++++++++- .../gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h | 5 ++++- .../gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h | 8 ++++++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h index cff8f91555d3..e9b2bd84cfed 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h @@ -8134,6 +8134,10 @@ #define mmMPC_OUT5_CSC_C33_C34_B 0x1604 #define mmMPC_OUT5_CSC_C33_C34_B_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_INDEX 0x163b +#define mmMPC_OCSC_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_DATA_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_DATA 0x163c // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec // base address: 0x5964 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h index 10c83fecd147..dc8ce7aaa0cf 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h @@ -28263,7 +28263,14 @@ #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C34_B__SHIFT 0x10 #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C33_B_MASK 0x0000FFFFL #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C34_B_MASK 0xFFFF0000L - +//MPC_OCSC_TEST_DEBUG_INDEX +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX__SHIFT 0x0 +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN__SHIFT 0x8 +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX_MASK 0x000000FFL +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN_MASK 0x00000100L +//MPC_OCSC_TEST_DEBUG_DATA +#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA__SHIFT 0x0 +#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec //DC_PERFMON17_PERFCOUNTER_CNTL diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h index eddf83ec1c39..7cd0ee61c030 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h @@ -7103,7 +7103,10 @@ #define mmMPC_OUT3_CSC_C31_C32_B_BASE_IDX 2 #define mmMPC_OUT3_CSC_C33_C34_B 0x15ea #define mmMPC_OUT3_CSC_C33_C34_B_BASE_IDX 2 - +#define mmMPC_OCSC_TEST_DEBUG_INDEX 0x163b +#define mmMPC_OCSC_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_DATA_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_DATA 0x163c // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec // base address: 0x5964 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h index faa0e76e32b4..2f780aefc722 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h @@ -56634,5 +56634,13 @@ #define AZF0INPUTENDPOINT7_AZALIA_F0_CODEC_INPUT_PIN_CONTROL_INFOFRAME__INFOFRAME_BYTE_5_MASK 0x00FF0000L #define AZF0INPUTENDPOINT7_AZALIA_F0_CODEC_INPUT_PIN_CONTROL_INFOFRAME__INFOFRAME_VALID_MASK 0x80000000L +//MPC_OCSC_TEST_DEBUG_INDEX +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX__SHIFT 0x0 +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN__SHIFT 0x8 +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX_MASK 0x000000FFL +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN_MASK 0x00000100L +//MPC_OCSC_TEST_DEBUG_DATA +#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA__SHIFT 0x0 +#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL #endif From 6f8f76444baf405bacb0591d97549a71a9aaa1ac Mon Sep 17 00:00:00 2001 From: Amanda Liu Date: Fri, 6 Dec 2019 15:12:30 -0500 Subject: [PATCH 070/113] drm/amd/display: Clear state after exiting fixed active VRR state [why] Upon exiting a fixed active VRR state, the state isn't cleared. This leads to the variable VRR range to be calculated incorrectly. [how] Set fixed active state to false when updating vrr params Signed-off-by: Amanda Liu Reviewed-by: Anthony Koo Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index fa57885503d4..832bc9b3b7d8 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -812,6 +812,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, 2 * in_out_vrr->min_refresh_in_uhz) in_out_vrr->btr.btr_enabled = false; + in_out_vrr->fixed.fixed_active = false; in_out_vrr->btr.btr_active = false; in_out_vrr->btr.inserted_duration_in_us = 0; in_out_vrr->btr.frames_to_insert = 0; @@ -832,6 +833,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, in_out_vrr->adjust.v_total_max = stream->timing.v_total; } else if (in_out_vrr->state == VRR_STATE_ACTIVE_VARIABLE && refresh_range >= MIN_REFRESH_RANGE_IN_US) { + in_out_vrr->adjust.v_total_min = calc_v_total_from_refresh(stream, in_out_vrr->max_refresh_in_uhz); From 30221e6217ebd761e5977c98daab1de0cb70880d Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 9 Dec 2019 10:21:43 -0500 Subject: [PATCH 071/113] drm/amd/display: 3.2.65 Signed-off-by: Aric Cyr Reviewed-by: Aric Cyr Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 039004344dc6..c4e6c696f06f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.64" +#define DC_VER "3.2.65" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 93a8955b109608459c35b4a774f21b3099f0051f Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Mon, 9 Dec 2019 16:18:21 -0500 Subject: [PATCH 072/113] drm/amd/display: fix chroma vp wa corner case [Why] Previous implementation we may have residual chroma address offset if transition from wa enable -> wa disable. [How] Clear address offset cache when viewport updates. Also update the vp size check condition to account for rotation angle Signed-off-by: Eric Yang Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index 216ae170bc50..4d2564f79395 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -251,10 +251,20 @@ static void hubp21_apply_PLAT_54186_wa( ROTATION_ANGLE, &rotation_angle, H_MIRROR_EN, &h_mirror_en); - /* apply wa only for NV12 surface with scatter gather enabled with view port > 512 */ + /* reset persistent cached data */ + hubp21->PLAT_54186_wa_chroma_addr_offset = 0; + /* apply wa only for NV12 surface with scatter gather enabled with viewport > 512 along + * the vertical direction*/ if (address->type != PLN_ADDR_TYPE_VIDEO_PROGRESSIVE || - address->video_progressive.luma_addr.high_part == 0xf4 - || viewport_c_height <= 512) + address->video_progressive.luma_addr.high_part == 0xf4) + return; + + if ((rotation_angle == 0 || rotation_angle == 180) + && viewport_c_height <= 512) + return; + + if ((rotation_angle == 90 || rotation_angle == 270) + && viewport_c_width <= 512) return; switch (rotation_angle) { From 1075735ecce96b34ba9aee529dd2253da6d21ceb Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 5 Dec 2019 17:12:17 -0500 Subject: [PATCH 073/113] drm/amd/display: Fix 300Hz Freesync bug Needed to reprogram vblank_start in dml properly in order to get the correct dlg params to program VTG. Signed-off-by: Alvin Lee Reviewed-by: Martin Leung Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 1 - drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 4 +++- drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 3d89904003f0..46f2b54c3526 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -287,7 +287,6 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->stream == stream && pipe->stream_res.tg) { - pipe->stream->adjust = *adjust; dc->hwss.set_drr(&pipe, 1, adjust->v_total_min, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 2dafa20d769d..b715ecc1b5e6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1883,12 +1883,14 @@ int dcn20_populate_dml_pipes_from_context( for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing; unsigned int v_total; + unsigned int front_porch; int output_bpc; if (!res_ctx->pipe_ctx[i].stream) continue; v_total = timing->v_total; + front_porch = timing->v_front_porch; /* todo: pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0; pipes[pipe_cnt].pipe.src.dcc = 0; @@ -1915,7 +1917,7 @@ int dcn20_populate_dml_pipes_from_context( - timing->h_addressable - timing->h_border_left - timing->h_border_right; - pipes[pipe_cnt].pipe.dest.vblank_start = v_total - timing->v_front_porch; + pipes[pipe_cnt].pipe.dest.vblank_start = v_total - front_porch; pipes[pipe_cnt].pipe.dest.vblank_end = pipes[pipe_cnt].pipe.dest.vblank_start - timing->v_addressable - timing->v_border_top diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 832bc9b3b7d8..6e5ecefe7d9d 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -812,11 +812,12 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, 2 * in_out_vrr->min_refresh_in_uhz) in_out_vrr->btr.btr_enabled = false; - in_out_vrr->fixed.fixed_active = false; in_out_vrr->btr.btr_active = false; in_out_vrr->btr.inserted_duration_in_us = 0; in_out_vrr->btr.frames_to_insert = 0; in_out_vrr->btr.frame_counter = 0; + in_out_vrr->fixed.fixed_active = false; + in_out_vrr->fixed.target_refresh_in_uhz = 0; in_out_vrr->btr.mid_point_in_us = (in_out_vrr->min_duration_in_us + From 6d822156d0b6f529aa95d55286742c908f5b4e35 Mon Sep 17 00:00:00 2001 From: Nikola Cornij Date: Mon, 9 Dec 2019 18:54:26 -0500 Subject: [PATCH 074/113] drm/amd/display: Disable secondary link for certain monitors [why] If the specific monitor supports DSC, the secondary link should be disabled, and the other way around, too: if either that monitor or our ASIC doesn't support DSC, the secodary link should be enabled. [how] Add a monitor patch and disable secondary link if that monitor is detected and if ASIC supports DSC, or otherwise enable secondary link. Signed-off-by: Nikola Cornij Reviewed-by: Joshua Aberback Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 10 +++++++--- drivers/gpu/drm/amd/display/dc/dc.h | 2 ++ drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 64a0e08fd019..a0eb9e533a61 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2066,6 +2066,13 @@ void dc_resource_state_construct( dst_ctx->clk_mgr = dc->clk_mgr; } + +bool dc_resource_is_dsc_encoding_supported(const struct dc *dc) +{ + return dc->res_pool->res_cap->num_dsc > 0; +} + + /** * dc_validate_global_state() - Determine if HW can support a given state * Checks HW resource availability and bandwidth requirement. @@ -2897,6 +2904,3 @@ void get_audio_check(struct audio_info *aud_modes, } } - - - diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index c4e6c696f06f..29762b0531fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -910,6 +910,8 @@ void dc_resource_state_copy_construct_current( void dc_resource_state_destruct(struct dc_state *context); +bool dc_resource_is_dsc_encoding_supported(const struct dc *dc); + /* * TODO update to make it about validation sets * Set up streams and links associated to drive sinks diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index b1a372c8df83..4b5b97520733 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -229,6 +229,7 @@ struct dc_panel_patch { unsigned int extra_t12_ms; unsigned int extra_delay_backlight_off; unsigned int extra_t7_ms; + unsigned int manage_secondary_link; }; struct dc_edid_caps { From 87f24027728f268fdf8669984f0afba599731717 Mon Sep 17 00:00:00 2001 From: Martin Leung Date: Wed, 11 Dec 2019 14:59:36 -0500 Subject: [PATCH 075/113] drm/amd/display: Adding forgotten hubbub func [why] While doing seamless boot I made some changes to dcn2 hubbub functions, missed a link [how] link hubbub1 func to hubbub2 usage. It has already been successfully linked in dcn1 and 3. Signed-off-by: Martin Leung Reviewed-by: Alvin Lee Acked-by: Anthony Koo Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c index 8b8438566101..836af0f2bbda 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c @@ -601,7 +601,8 @@ static const struct hubbub_funcs hubbub2_funcs = { .wm_read_state = hubbub2_wm_read_state, .get_dchub_ref_freq = hubbub2_get_dchub_ref_freq, .program_watermarks = hubbub2_program_watermarks, - .allow_self_refresh_control = hubbub1_allow_self_refresh_control + .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled, + .allow_self_refresh_control = hubbub1_allow_self_refresh_control, }; void hubbub2_construct(struct dcn20_hubbub *hubbub, From b7408a06733f839c98a9903e204010204452fcd3 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 11 Dec 2019 20:53:22 -0500 Subject: [PATCH 076/113] drm/amd/display: Flush framebuffer data before passing to DMCUB [Why] There's a data race that can occur between when we update the inbox write pointer vs when the memory for the command actually gets flushed from the map to the framebuffer. DMCUB can read stale or partially invalid data when this race occurs. [How] Before updating the write pointer we can read back all pending commands to ensure that we stall for the writes to be flushed to framebuffer. We don't need to worry about choosing HDP vs VM flush with this mechanism. Drop the dmub_srv_cmd_submit() while we're updating this to work correctly since nothing was actually using this API and the caller should be explicit about the API flow here - by doing this on execute we can give some extra time for the flush to finish while preparing other commands. We should try to avoid writing single commands because of this overhead. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dmub/inc/dmub_rb.h | 17 +++++++++++ .../gpu/drm/amd/display/dmub/inc/dmub_srv.h | 19 ------------ .../gpu/drm/amd/display/dmub/src/dmub_srv.c | 30 +++++-------------- 3 files changed, 24 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h index ac22744eaa94..ade688fd32f0 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h @@ -113,6 +113,23 @@ static inline bool dmub_rb_pop_front(struct dmub_rb *rb) return true; } +static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) +{ + uint8_t buf[DMUB_RB_CMD_SIZE]; + uint32_t rptr = rb->rptr; + uint32_t wptr = rb->wrpt; + + while (rptr != wptr) { + const uint8_t *data = (const uint8_t *)rb->base_address + rptr; + + dmub_memcpy(buf, data, DMUB_RB_CMD_SIZE); + + rptr += DMUB_RB_CMD_SIZE; + if (rptr >= rb->capacity) + rptr %= rb->capacity; + } +} + static inline void dmub_rb_init(struct dmub_rb *rb, struct dmub_rb_init_params *init_params) { diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h index f34a50dd36ea..8e23a7017588 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h @@ -444,25 +444,6 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, */ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub); -/** - * dmub_srv_cmd_submit() - submits a command to the DMUB immediately - * @dmub: the dmub service - * @cmd: the command to submit - * @timeout_us: the maximum number of microseconds to wait - * - * Submits a command to the DMUB with an optional timeout. - * If timeout_us is given then the service will attempt to - * resubmit for the given number of microseconds. - * - * Return: - * DMUB_STATUS_OK - success - * DMUB_STATUS_TIMEOUT - wait for submit timed out - * DMUB_STATUS_INVALID - unspecified error - */ -enum dmub_status dmub_srv_cmd_submit(struct dmub_srv *dmub, - const struct dmub_cmd_header *cmd, - uint32_t timeout_us); - /** * dmub_srv_wait_for_auto_load() - Waits for firmware auto load to complete * @dmub: the dmub service diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 9a959f871f11..23ca1fe97757 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -405,33 +405,17 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) if (!dmub->hw_init) return DMUB_STATUS_INVALID; + /** + * Read back all the queued commands to ensure that they've + * been flushed to framebuffer memory. Otherwise DMCUB might + * read back stale, fully invalid or partially invalid data. + */ + dmub_rb_flush_pending(&dmub->inbox1_rb); + dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt); return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_cmd_submit(struct dmub_srv *dmub, - const struct dmub_cmd_header *cmd, - uint32_t timeout_us) -{ - uint32_t i = 0; - - if (!dmub->hw_init) - return DMUB_STATUS_INVALID; - - for (i = 0; i <= timeout_us; ++i) { - dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - if (dmub_rb_push_front(&dmub->inbox1_rb, cmd)) { - dmub->hw_funcs.set_inbox1_wptr(dmub, - dmub->inbox1_rb.wrpt); - return DMUB_STATUS_OK; - } - - udelay(1); - } - - return DMUB_STATUS_TIMEOUT; -} - enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, uint32_t timeout_us) { From dee5d542836a911b0175f9cccc25b8f5733e2527 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 11 Dec 2019 21:14:04 -0500 Subject: [PATCH 077/113] drm/amd/display: Read inst_fb data back during DMUB loading [Why] The inst firmware isn't necessarily fully flushed to framebuffer memory and the DMCUB can hang due to invalid inst being parsed. [How] Like the fix to flush ringbuffer commands before updating the inbox write pointer we need to read back inst memory before writing the CW0 registers. Add a helper to read 64-byte chunks to avoid a large temporary buffer. Read the remaining leftover bytes if the inst_fb isn't fully aligned. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dmub/src/dmub_srv.c | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 23ca1fe97757..dee676335d73 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -67,6 +67,26 @@ static inline uint32_t dmub_align(uint32_t val, uint32_t factor) return (val + factor - 1) / factor * factor; } +static void dmub_flush_buffer_mem(const struct dmub_fb *fb) +{ + const uint8_t *base = (const uint8_t *)fb->cpu_addr; + uint8_t buf[64]; + uint32_t pos, end; + + /** + * Read 64-byte chunks since we don't want to store a + * large temporary buffer for this purpose. + */ + end = fb->size / sizeof(buf) * sizeof(buf); + + for (pos = 0; pos < end; pos += sizeof(buf)) + dmub_memcpy(buf, base + pos, sizeof(buf)); + + /* Read anything leftover into the buffer. */ + if (end < fb->size) + dmub_memcpy(buf, base + pos, fb->size - end); +} + static const struct dmub_fw_meta_info * dmub_get_fw_meta_info(const uint8_t *fw_bss_data, uint32_t fw_bss_data_size) { @@ -329,6 +349,13 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, cw1.region.base = DMUB_CW1_BASE; cw1.region.top = cw1.region.base + stack_fb->size - 1; + /** + * Read back all the instruction memory so we don't hang the + * DMCUB when backdoor loading if the write from x86 hasn't been + * flushed yet. This only occurs in backdoor loading. + */ + dmub_flush_buffer_mem(inst_fb); + if (params->load_inst_const && dmub->hw_funcs.backdoor_load) dmub->hw_funcs.backdoor_load(dmub, &cw0, &cw1); } From 1295524e6fee9a978ea2d97a0efc269252b07487 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 11 Dec 2019 21:26:40 -0500 Subject: [PATCH 078/113] drm/amd/display: Soft reset DMUIF during DMUB reset [Why] We need to ensure that the DMUIF in MMHUBBUB is also in reset so we aren't generating requests while the DMCUB is in reset. [How] Set DMUIF_SOFT_RESET=1 on reset and DMUIF_SOFT_RESET=0 on reset release. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c | 2 ++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c index f45e14ada685..cd51c6138894 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c @@ -66,10 +66,12 @@ void dmub_dcn20_reset(struct dmub_srv *dmub) { REG_UPDATE(DMCUB_CNTL, DMCUB_SOFT_RESET, 1); REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); + REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); } void dmub_dcn20_reset_release(struct dmub_srv *dmub) { + REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 0); REG_WRITE(DMCUB_SCRATCH15, dmub->psp_version & 0x001100FF); REG_UPDATE_2(DMCUB_CNTL, DMCUB_ENABLE, 1, DMCUB_TRACEPORT_EN, 1); REG_UPDATE(DMCUB_CNTL, DMCUB_SOFT_RESET, 0); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h index 68af9b190288..53bfd4da69ad 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h @@ -91,7 +91,8 @@ struct dmub_srv; DMUB_SR(DMCUB_SCRATCH13) \ DMUB_SR(DMCUB_SCRATCH14) \ DMUB_SR(DMCUB_SCRATCH15) \ - DMUB_SR(CC_DC_PIPE_DIS) + DMUB_SR(CC_DC_PIPE_DIS) \ + DMUB_SR(MMHUBBUB_SOFT_RESET) #define DMUB_COMMON_FIELDS() \ DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \ @@ -119,7 +120,8 @@ struct dmub_srv; DMUB_SF(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_ENABLE) \ DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_TOP_ADDRESS) \ DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \ - DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) + DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \ + DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) struct dmub_srv_common_reg_offset { #define DMUB_SR(reg) uint32_t reg; From e8027e08843f8934f9701cbeea43268c65ade55b Mon Sep 17 00:00:00 2001 From: Noah Abradjian Date: Wed, 11 Dec 2019 13:34:04 -0500 Subject: [PATCH 079/113] drm/amd/display: Add double buffering to dcn20 OCSC [Why] When rapidly adjusting colour properties (e.g. brightness), screen tearing was observed. This was due to overwritten values in OCSC registers. In dcn10, this issue had been fixed by implementing double buffering by alternating OCSC modes. [How] Alternate which OCSC registers are used by switching modes each time. This double buffers the CSC writes. Signed-off-by: Noah Abradjian Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c | 45 ++++++++++++++++--- .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h | 16 +++++++ .../drm/amd/display/dc/dcn20/dcn20_resource.c | 1 + .../drm/amd/display/dc/dcn21/dcn21_resource.c | 3 +- 4 files changed, 59 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index f90031ed58a6..c38f7fdb43a8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -33,6 +33,9 @@ #define REG(reg)\ mpc20->mpc_regs->reg +#define IND_REG(index) \ + (index) + #define CTX \ mpc20->base.ctx @@ -132,19 +135,32 @@ void mpc2_set_output_csc( const uint16_t *regval, enum mpc_output_csc_mode ocsc_mode) { + uint32_t cur_mode; struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc); struct color_matrices_reg ocsc_regs; - REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); - - if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) + if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) { + REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); return; + } if (regval == NULL) { BREAK_TO_DEBUGGER(); return; } + /* determine which CSC coefficients (A or B) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + cur_mode = IX_REG_READ(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA, + MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE_IDX); + + if (cur_mode != MPC_OUTPUT_CSC_COEF_A) + ocsc_mode = MPC_OUTPUT_CSC_COEF_A; + else + ocsc_mode = MPC_OUTPUT_CSC_COEF_B; + ocsc_regs.shifts.csc_c11 = mpc20->mpc_shift->MPC_OCSC_C11_A; ocsc_regs.masks.csc_c11 = mpc20->mpc_mask->MPC_OCSC_C11_A; ocsc_regs.shifts.csc_c12 = mpc20->mpc_shift->MPC_OCSC_C12_A; @@ -157,10 +173,13 @@ void mpc2_set_output_csc( ocsc_regs.csc_c11_c12 = REG(CSC_C11_C12_B[opp_id]); ocsc_regs.csc_c33_c34 = REG(CSC_C33_C34_B[opp_id]); } + cm_helper_program_color_matrices( mpc20->base.ctx, regval, &ocsc_regs); + + REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); } void mpc2_set_ocsc_default( @@ -169,14 +188,16 @@ void mpc2_set_ocsc_default( enum dc_color_space color_space, enum mpc_output_csc_mode ocsc_mode) { + uint32_t cur_mode; struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc); uint32_t arr_size; struct color_matrices_reg ocsc_regs; const uint16_t *regval = NULL; - REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); - if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) + if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) { + REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); return; + } regval = find_color_matrix(color_space, &arr_size); @@ -185,6 +206,18 @@ void mpc2_set_ocsc_default( return; } + /* determine which CSC coefficients (A or B) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + cur_mode = IX_REG_READ(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA, + MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE_IDX); + + if (cur_mode != MPC_OUTPUT_CSC_COEF_A) + ocsc_mode = MPC_OUTPUT_CSC_COEF_A; + else + ocsc_mode = MPC_OUTPUT_CSC_COEF_B; + ocsc_regs.shifts.csc_c11 = mpc20->mpc_shift->MPC_OCSC_C11_A; ocsc_regs.masks.csc_c11 = mpc20->mpc_mask->MPC_OCSC_C11_A; ocsc_regs.shifts.csc_c12 = mpc20->mpc_shift->MPC_OCSC_C12_A; @@ -203,6 +236,8 @@ void mpc2_set_ocsc_default( mpc20->base.ctx, regval, &ocsc_regs); + + REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); } static void mpc2_ogam_get_reg_field( diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h index 9f53192da2dc..8c77e78e2df5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h @@ -80,6 +80,10 @@ SRII(DENORM_CLAMP_G_Y, MPC_OUT, inst),\ SRII(DENORM_CLAMP_B_CB, MPC_OUT, inst) +#define MPC_DBG_REG_LIST_DCN2_0() \ + SR(MPC_OCSC_TEST_DEBUG_DATA),\ + SR(MPC_OCSC_TEST_DEBUG_INDEX) + #define MPC_REG_VARIABLE_LIST_DCN2_0 \ MPC_COMMON_REG_VARIABLE_LIST \ uint32_t MPCC_TOP_GAIN[MAX_MPCC]; \ @@ -118,6 +122,8 @@ uint32_t MPCC_OGAM_LUT_RAM_CONTROL[MAX_MPCC];\ uint32_t MPCC_OGAM_LUT_DATA[MAX_MPCC];\ uint32_t MPCC_OGAM_MODE[MAX_MPCC];\ + uint32_t MPC_OCSC_TEST_DEBUG_DATA;\ + uint32_t MPC_OCSC_TEST_DEBUG_INDEX;\ uint32_t CSC_MODE[MAX_OPP]; \ uint32_t CSC_C11_C12_A[MAX_OPP]; \ uint32_t CSC_C33_C34_A[MAX_OPP]; \ @@ -134,6 +140,8 @@ SF(MPCC0_MPCC_TOP_GAIN, MPCC_TOP_GAIN, mask_sh),\ SF(MPCC0_MPCC_BOT_GAIN_INSIDE, MPCC_BOT_GAIN_INSIDE, mask_sh),\ SF(MPCC0_MPCC_BOT_GAIN_OUTSIDE, MPCC_BOT_GAIN_OUTSIDE, mask_sh),\ + SF(MPC_OCSC_TEST_DEBUG_DATA, MPC_OCSC_TEST_DEBUG_DATA, mask_sh),\ + SF(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_INDEX, mask_sh),\ SF(MPC_OUT0_CSC_MODE, MPC_OCSC_MODE, mask_sh),\ SF(MPC_OUT0_CSC_C11_C12_A, MPC_OCSC_C11_A, mask_sh),\ SF(MPC_OUT0_CSC_C11_C12_A, MPC_OCSC_C12_A, mask_sh),\ @@ -174,6 +182,12 @@ SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MAX_B_CB, mask_sh),\ SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MIN_B_CB, mask_sh) +/* + * DCN2 MPC_OCSC debug status register: + * + * Field describing current OCSC Mode has index 1 [1..0] + */ +#define MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE_IDX 1 #define MPC_REG_FIELD_LIST_DCN2_0(type) \ MPC_REG_FIELD_LIST(type)\ @@ -182,6 +196,8 @@ type MPCC_TOP_GAIN;\ type MPCC_BOT_GAIN_INSIDE;\ type MPCC_BOT_GAIN_OUTSIDE;\ + type MPC_OCSC_TEST_DEBUG_DATA;\ + type MPC_OCSC_TEST_DEBUG_INDEX;\ type MPC_OCSC_MODE;\ type MPC_OCSC_C11_A;\ type MPC_OCSC_C12_A;\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index b715ecc1b5e6..a05746142ec8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -706,6 +706,7 @@ static const struct dcn20_mpc_registers mpc_regs = { MPC_OUT_MUX_REG_LIST_DCN2_0(3), MPC_OUT_MUX_REG_LIST_DCN2_0(4), MPC_OUT_MUX_REG_LIST_DCN2_0(5), + MPC_DBG_REG_LIST_DCN2_0() }; static const struct dcn20_mpc_shift mpc_shift = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index c76449f58064..5a6e7ac5f785 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -465,7 +465,8 @@ static const struct dcn20_mpc_registers mpc_regs = { MPC_OUT_MUX_REG_LIST_DCN2_0(0), MPC_OUT_MUX_REG_LIST_DCN2_0(1), MPC_OUT_MUX_REG_LIST_DCN2_0(2), - MPC_OUT_MUX_REG_LIST_DCN2_0(3) + MPC_OUT_MUX_REG_LIST_DCN2_0(3), + MPC_DBG_REG_LIST_DCN2_0() }; static const struct dcn20_mpc_shift mpc_shift = { From a166f86e8a3c91adb303fc511acbfa9d9f2899a1 Mon Sep 17 00:00:00 2001 From: abdoulaye berthe Date: Wed, 11 Dec 2019 15:18:08 -0500 Subject: [PATCH 080/113] drm/amd/display: store lttpr mode with dpcd Make sure that lttpr_caps has the mode set to repeater. Signed-off-by: abdoulaye berthe Reviewed-by: Wenjing Liu Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 5d0e7abb2b98..f703b3998644 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1217,24 +1217,33 @@ static void configure_lttpr_mode(struct dc_link *link) uint8_t repeater_cnt; uint32_t aux_interval_address; uint8_t repeater_id; + enum dc_status result = DC_ERROR_UNEXPECTED; uint8_t repeater_mode = DP_PHY_REPEATER_MODE_TRANSPARENT; DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Transparent Mode\n", __func__); - core_link_write_dpcd(link, + result = core_link_write_dpcd(link, DP_PHY_REPEATER_MODE, (uint8_t *)&repeater_mode, sizeof(repeater_mode)); + if (result == DC_OK) { + link->dpcd_caps.lttpr_caps.mode = repeater_mode; + } + if (!link->is_lttpr_mode_transparent) { DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Non Transparent Mode\n", __func__); repeater_mode = DP_PHY_REPEATER_MODE_NON_TRANSPARENT; - core_link_write_dpcd(link, + result = core_link_write_dpcd(link, DP_PHY_REPEATER_MODE, (uint8_t *)&repeater_mode, sizeof(repeater_mode)); + if (result == DC_OK) { + link->dpcd_caps.lttpr_caps.mode = repeater_mode; + } + repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); for (repeater_id = repeater_cnt; repeater_id > 0; repeater_id--) { aux_interval_address = DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 + From 30c9b7a1339bd194c63e0b4412037bd577248864 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 12 Dec 2019 16:23:24 -0500 Subject: [PATCH 081/113] drm/amd/display: Don't always set pstate true if dummy latency = 0 [Why] If dummy pstate latency is 0 we should report the actual pstate support, and not that pstate is always supported. [How] Don't always program pstate support true Signed-off-by: Alvin Lee Reviewed-by: Jun Lei Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index a05746142ec8..eac4c3cecadf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2918,7 +2918,7 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 || (voltage_supported && full_pstate_supported)) { - context->bw_ctx.bw.dcn.clk.p_state_change_support = true; + context->bw_ctx.bw.dcn.clk.p_state_change_support = full_pstate_supported; goto restore_dml_state; } From 95e0b96d84b44663f6b2b8bf1c81b3bfbfdb8f38 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Fri, 13 Dec 2019 11:52:24 -0500 Subject: [PATCH 082/113] drm/amd/display: Update HDMI hang w/a to apply to all TMDS signals [WHY] 48mhz turn off feature does not work on HDMI and DVI, but the feauture was only blocked on HDMI, this change will apply the same wa on DVI [HOW] Apply workaround for all TMDS signal types (HDMI, DVI single/dual link) Signed-off-by: Michael Strauss Reviewed-by: Eric Yang Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index ffed7207c099..7ae4c06232dd 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -59,14 +59,16 @@ int rn_get_active_display_cnt_wa( struct dc_state *context) { int i, display_count; - bool hdmi_present = false; + bool tmds_present = false; display_count = 0; for (i = 0; i < context->stream_count; i++) { const struct dc_stream_state *stream = context->streams[i]; - if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) - hdmi_present = true; + if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A || + stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK || + stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) + tmds_present = true; } for (i = 0; i < dc->link_count; i++) { @@ -85,7 +87,7 @@ int rn_get_active_display_cnt_wa( } /* WA for hang on HDMI after display off back back on*/ - if (display_count == 0 && hdmi_present) + if (display_count == 0 && tmds_present) display_count = 1; return display_count; From ed581a0ace44fc4f454e7765a1625a46258080c7 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Fri, 13 Dec 2019 11:54:29 -0500 Subject: [PATCH 083/113] drm/amd/display: wait for update when setting dpg test pattern Test pattern should be applied to hardware when exiting set test pattern function. Signed-off-by: Wenjing Liu Reviewed-by: Nikola Cornij Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index f703b3998644..e415f7730f43 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3611,6 +3611,7 @@ static void set_crtc_test_pattern(struct dc_link *link, struct pipe_ctx *odm_pipe; enum controller_dp_color_space controller_color_space; int opp_cnt = 1; + uint8_t count = 0; switch (test_pattern_color_space) { case DP_TEST_PATTERN_COLOR_SPACE_RGB: @@ -3646,6 +3647,12 @@ static void set_crtc_test_pattern(struct dc_link *link, NULL, width, height); + /* wait for dpg to blank pixel data with test pattern */ + for (count = 0; count < 1000; count++) + if (odm_opp->funcs->dpg_is_blanked(odm_opp)) + break; + else + udelay(100); } opp->funcs->opp_set_disp_pattern_generator(opp, controller_test_pattern, @@ -3654,6 +3661,12 @@ static void set_crtc_test_pattern(struct dc_link *link, NULL, width, height); + /* wait for dpg to blank pixel data with test pattern */ + for (count = 0; count < 1000; count++) + if (opp->funcs->dpg_is_blanked(opp)) + break; + else + udelay(100); } } break; From 09178aaa2311cbef76c72dad4a0da165945fe75e Mon Sep 17 00:00:00 2001 From: Joseph Gravenor Date: Thu, 12 Dec 2019 15:40:50 -0500 Subject: [PATCH 084/113] drm/amd/display: stop doing unnecessary detection when going to D3 [Why] Don't want to start HW discovery unless we have lost power, as doing rediscovery otherwise is both unnecessary and time consuming. Before this change it takes 40 seconds to go in to suspend, after it takes 27 seconds [How] Accelerated mode gets cleared if we lose power. Only do detection if this register is cleared Signed-off-by: Joseph Gravenor Reviewed-by: Eric Yang Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 6 ++++++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 46f2b54c3526..c918a0cd8c60 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1318,6 +1318,12 @@ bool dc_commit_state(struct dc *dc, struct dc_state *context) return (result == DC_OK); } +bool dc_is_hw_initialized(struct dc *dc) +{ + struct dc_bios *dcb = dc->ctx->dc_bios; + return dcb->funcs->is_accelerated_mode(dcb); +} + bool dc_post_update_surfaces_to_stream(struct dc *dc) { int i; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 29762b0531fc..d1d57432bc7e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1069,6 +1069,7 @@ unsigned int dc_get_current_backlight_pwm(struct dc *dc); unsigned int dc_get_target_backlight_pwm(struct dc *dc); bool dc_is_dmcu_initialized(struct dc *dc); +bool dc_is_hw_initialized(struct dc *dc); enum dc_status dc_set_clock(struct dc *dc, enum dc_clock_type clock_type, uint32_t clk_khz, uint32_t stepping); void dc_get_clock(struct dc *dc, enum dc_clock_type clock_type, struct dc_clock_config *clock_cfg); From 5fc11598166db9e5858164bb105c09f0350e7486 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Wed, 30 Oct 2019 15:25:19 -0400 Subject: [PATCH 085/113] drm/amd/display: expand dml structs Add more fields to support upcoming dml versions Signed-off-by: Dmytro Laktyushkin Reviewed-by: Jun Lei Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 35 +++- .../dc/dml/dcn20/display_mode_vba_20.c | 165 +++++++++-------- .../dc/dml/dcn20/display_mode_vba_20v2.c | 167 +++++++++--------- .../dc/dml/dcn21/display_mode_vba_21.c | 157 ++++++++-------- .../amd/display/dc/dml/display_mode_enums.h | 16 ++ .../amd/display/dc/dml/display_mode_structs.h | 7 +- .../drm/amd/display/dc/dml/display_mode_vba.c | 22 ++- .../drm/amd/display/dc/dml/display_mode_vba.h | 128 ++++++++++---- 8 files changed, 402 insertions(+), 295 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index eac4c3cecadf..9233144ae74e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1858,6 +1858,22 @@ void dcn20_populate_dml_writeback_from_context( } +static int get_num_odm_heads(struct pipe_ctx *pipe) +{ + int odm_head_count = 0; + struct pipe_ctx *next_pipe = pipe->next_odm_pipe; + while (next_pipe) { + odm_head_count++; + next_pipe = next_pipe->next_odm_pipe; + } + pipe = pipe->prev_odm_pipe; + while (pipe) { + odm_head_count++; + pipe = pipe->prev_odm_pipe; + } + return odm_head_count ? odm_head_count + 1 : 0; +} + int dcn20_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes) { @@ -1897,6 +1913,8 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.dcc = 0; pipes[pipe_cnt].pipe.src.vm = 0;*/ + pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; + pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC; /* todo: rotation?*/ pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h; @@ -1935,8 +1953,13 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].dout.dp_lanes = 4; pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min; pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max; - pipes[pipe_cnt].pipe.dest.odm_combine = res_ctx->pipe_ctx[i].prev_odm_pipe - || res_ctx->pipe_ctx[i].next_odm_pipe; + switch (get_num_odm_heads(&res_ctx->pipe_ctx[i])) { + case 2: + pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_2to1; + break; + default: + pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_disabled; + } pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx; if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state == res_ctx->pipe_ctx[i].plane_state) @@ -2046,6 +2069,9 @@ int dcn20_populate_dml_pipes_from_context( if (pipes[pipe_cnt].pipe.src.viewport_height > 1080) pipes[pipe_cnt].pipe.src.viewport_height = 1080; pipes[pipe_cnt].pipe.src.surface_height_y = pipes[pipe_cnt].pipe.src.viewport_height; + pipes[pipe_cnt].pipe.src.surface_width_y = pipes[pipe_cnt].pipe.src.viewport_width; + pipes[pipe_cnt].pipe.src.surface_height_c = pipes[pipe_cnt].pipe.src.viewport_height; + pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width; pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 63) / 64) * 64; /* linear sw only */ pipes[pipe_cnt].pipe.src.source_format = dm_444_32; pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/ @@ -2079,7 +2105,10 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c.width; pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport.height; pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c.height; + pipes[pipe_cnt].pipe.src.surface_width_y = pln->plane_size.surface_size.width; pipes[pipe_cnt].pipe.src.surface_height_y = pln->plane_size.surface_size.height; + pipes[pipe_cnt].pipe.src.surface_width_c = pln->plane_size.chroma_size.width; + pipes[pipe_cnt].pipe.src.surface_height_c = pln->plane_size.chroma_size.height; if (pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch; pipes[pipe_cnt].pipe.src.data_pitch_c = pln->plane_size.chroma_pitch; @@ -2493,7 +2522,7 @@ int dcn20_validate_apply_pipe_split_flags( split[i] = true; if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) { split[i] = true; - context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx] = true; + context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx] = dm_odm_combine_mode_2to1; } context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx] = context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx]; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c index 3b224b155e8c..16559f7fb952 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c @@ -1335,11 +1335,11 @@ static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPer else mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; - if (mode_lib->vba.ODMCombineEnabled[k] == true) + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; if (MainPlaneDoesODMCombine == true) @@ -2848,12 +2848,12 @@ static void dml20_DisplayPipeConfiguration(struct display_mode_lib *mode_lib) SwathWidth = mode_lib->vba.ViewportHeight[k]; } - if (mode_lib->vba.ODMCombineEnabled[k] == true) { + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) { + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } } @@ -3446,10 +3446,10 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->FabricAndDRAMBandwidthPerState[i] * 1000) * locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; - locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState; + locals->ReturnBWPerState[i][0] = locals->ReturnBWToDCNPerState; if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] @@ -3460,7 +3460,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], 4 * locals->ReturnBWToDCNPerState * (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / @@ -3472,7 +3472,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000); if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] @@ -3483,7 +3483,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], 4 * locals->ReturnBWToDCNPerState * (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / @@ -3521,12 +3521,12 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] - + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; - if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] + + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0] > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { - locals->ROBSupport[i] = true; + locals->ROBSupport[i][0] = true; } else { - locals->ROBSupport[i] = false; + locals->ROBSupport[i][0] = false; } } /*Writeback Mode Support Check*/ @@ -3903,7 +3903,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] - && locals->ODMCombineEnablePerState[i][k] == false) { + && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->NoOfDPP[i][j][k] = 1; locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); @@ -3992,16 +3992,16 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*Viewport Size Check*/ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - locals->ViewportSizeSupport[i] = true; + locals->ViewportSizeSupport[i][0] = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } else { if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } } @@ -4183,8 +4183,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DSCFormatFactor = 1; } if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] - == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { locals->DSCCLKRequiredMoreThanSupported[i] = @@ -4207,7 +4206,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.TotalDSCUnitsRequired = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { mode_lib->vba.TotalDSCUnitsRequired = mode_lib->vba.TotalDSCUnitsRequired + 2.0; } else { @@ -4249,7 +4248,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; } if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { - if (locals->ODMCombineEnablePerState[i][k] == false) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->DSCDelayPerState[i][k] = dscceComputeDelay( mode_lib->vba.DSCInputBitPerComponent[k], @@ -4292,7 +4291,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { for (j = 0; j < 2; j++) { for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k])); else locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; @@ -4345,28 +4344,28 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma + dml_min( locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] * - locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i], + locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesLuma), locals->SwathHeightYPerState[i][j][k]); locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * - locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i], + locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesChroma), locals->SwathHeightCPerState[i][j][k]); if (locals->BytePerPixelInDETC[k] == 0) { locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * - dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]); + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]); } else { locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min( locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * - dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]), + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]), locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) - locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 * - dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k])); + dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k])); } } } @@ -4406,14 +4405,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k]; locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k]; locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k]; - mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], mode_lib->vba.PixelClock[k] / 16.0); if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4423,9 +4422,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4436,9 +4435,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } else { if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4448,9 +4447,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4460,9 +4459,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l * mode_lib->vba.RequiredDPPCLK[i][j][k]); } if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETC[k], @@ -4473,9 +4472,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETC[k], @@ -4511,7 +4510,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k], &mode_lib->vba.dpte_row_height[k], &mode_lib->vba.meta_row_height[k]); - mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines( + mode_lib->vba.PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k], mode_lib->vba.vtaps[k], @@ -4550,7 +4549,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k], &mode_lib->vba.dpte_row_height_chroma[k], &mode_lib->vba.meta_row_height_chroma[k]); - mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines( + mode_lib->vba.PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k] / 2.0, mode_lib->vba.VTAPsChroma[k], @@ -4564,14 +4563,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; mode_lib->vba.MetaRowBytesC = 0.0; mode_lib->vba.DPTEBytesPerRowC = 0.0; - locals->PrefetchLinesC[k] = 0.0; + locals->PrefetchLinesC[0][0][k] = 0.0; locals->PTEBufferSizeNotExceededC[i][j][k] = true; locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; } - locals->PDEAndMetaPTEBytesPerFrame[k] = + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] = mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; - locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; - locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; CalculateActiveRowBandwidth( mode_lib->vba.GPUVMEnable, @@ -4598,14 +4597,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] * mode_lib->vba.MetaChunkSize) * 1024.0 - / mode_lib->vba.ReturnBWPerState[i]; + / mode_lib->vba.ReturnBWPerState[i][0]; if (mode_lib->vba.GPUVMEnable == true) { mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency + mode_lib->vba.TotalNumberOfActiveDPP[i][j] * mode_lib->vba.PTEGroupSize - / mode_lib->vba.ReturnBWPerState[i]; + / mode_lib->vba.ReturnBWPerState[i][0]; } - mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { @@ -4655,7 +4654,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); } @@ -4700,7 +4699,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], @@ -4718,7 +4717,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l - mode_lib->vba.VActive[k], mode_lib->vba.HTotal[k], mode_lib->vba.MaxInterDCNTileRepeaters, - mode_lib->vba.MaximumVStartup[k], + mode_lib->vba.MaximumVStartup[0][0][k], mode_lib->vba.GPUVMMaxPageTableLevels, mode_lib->vba.GPUVMEnable, mode_lib->vba.DynamicMetadataEnable[k], @@ -4728,15 +4727,15 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.ExtraLatency, mode_lib->vba.TimeCalc, - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], - mode_lib->vba.PrefetchLinesY[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], + mode_lib->vba.PrefetchLinesY[0][0][k], mode_lib->vba.SwathWidthYPerState[i][j][k], mode_lib->vba.BytePerPixelInDETY[k], mode_lib->vba.PrefillY[k], mode_lib->vba.MaxNumSwY[k], - mode_lib->vba.PrefetchLinesC[k], + mode_lib->vba.PrefetchLinesC[0][0][k], mode_lib->vba.BytePerPixelInDETC[k], mode_lib->vba.PrefillC[k], mode_lib->vba.MaxNumSwC[k], @@ -4767,19 +4766,19 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->prefetch_vm_bw_valid = true; locals->prefetch_row_bw_valid = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0) + if (locals->PDEAndMetaPTEBytesPerFrame[0][0][k] == 0) locals->prefetch_vm_bw[k] = 0; else if (locals->LinesForMetaPTE[k] > 0) - locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k] + locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[0][0][k] / (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]); else { locals->prefetch_vm_bw[k] = 0; locals->prefetch_vm_bw_valid = false; } - if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0) + if (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k] == 0) locals->prefetch_row_bw[k] = 0; else if (locals->LinesForMetaAndDPTERow[k] > 0) - locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]) + locals->prefetch_row_bw[k] = (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k]) / (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]); else { locals->prefetch_row_bw[k] = 0; @@ -4798,13 +4797,13 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k]) + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]); } - locals->BandwidthWithoutPrefetchSupported[i] = true; - if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) { - locals->BandwidthWithoutPrefetchSupported[i] = false; + locals->BandwidthWithoutPrefetchSupported[i][0] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]) { + locals->BandwidthWithoutPrefetchSupported[i][0] = false; } locals->PrefetchSupported[i][j] = true; - if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) { + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]) { locals->PrefetchSupported[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4829,7 +4828,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.PrefetchSupported[i][j] == true && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) { mode_lib->vba.BandwidthAvailableForImmediateFlip = - mode_lib->vba.ReturnBWPerState[i]; + mode_lib->vba.ReturnBWPerState[i][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip @@ -4843,9 +4842,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { mode_lib->vba.ImmediateFlipBytes[k] = - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] - + mode_lib->vba.MetaRowBytes[k] - + mode_lib->vba.DPTEBytesPerRow[k]; + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k] + + mode_lib->vba.MetaRowBytes[0][0][k] + + mode_lib->vba.DPTEBytesPerRow[0][0][k]; } } mode_lib->vba.TotImmediateFlipBytes = 0.0; @@ -4873,9 +4872,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l / mode_lib->vba.PixelClock[k], mode_lib->vba.VRatio[k], mode_lib->vba.Tno_bw[k], - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], mode_lib->vba.DCCEnable[k], mode_lib->vba.dpte_row_height[k], mode_lib->vba.meta_row_height[k], @@ -4900,7 +4899,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true; if (mode_lib->vba.total_dcn_read_bw_with_flip - > mode_lib->vba.ReturnBWPerState[i]) { + > mode_lib->vba.ReturnBWPerState[i][0]) { mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4919,13 +4918,13 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) mode_lib->vba.MaxTotalVActiveRDBandwidth = mode_lib->vba.MaxTotalVActiveRDBandwidth + mode_lib->vba.ReadBandwidth[k]; for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth * + mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) * mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100; - if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i]) - mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true; + if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0]) + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = true; else - mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false; + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = false; } /*PTE Buffer Size Check*/ @@ -5013,7 +5012,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_SCALE_RATIO_TAP; } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { status = DML_FAIL_SOURCE_PIXEL_FORMAT; - } else if (locals->ViewportSizeSupport[i] != true) { + } else if (locals->ViewportSizeSupport[i][0] != true) { status = DML_FAIL_VIEWPORT_SIZE; } else if (locals->DIOSupport[i] != true) { status = DML_FAIL_DIO_SUPPORT; @@ -5023,7 +5022,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_DSC_CLK_REQUIRED; } else if (locals->UrgentLatencySupport[i][j] != true) { status = DML_FAIL_URGENT_LATENCY; - } else if (locals->ROBSupport[i] != true) { + } else if (locals->ROBSupport[i][0] != true) { status = DML_FAIL_REORDERING_BUFFER; } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { status = DML_FAIL_DISPCLK_DPPCLK; @@ -5043,7 +5042,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_PITCH_SUPPORT; } else if (locals->PrefetchSupported[i][j] != true) { status = DML_FAIL_PREFETCH_SUPPORT; - } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { + } else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) { status = DML_FAIL_TOTAL_V_ACTIVE_BW; } else if (locals->VRatioInPrefetchSupported[i][j] != true) { status = DML_FAIL_V_RATIO_PREFETCH; @@ -5089,7 +5088,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index 6482d7b99bae..d6fedae03dc8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -1395,11 +1395,11 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP else mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; - if (mode_lib->vba.ODMCombineEnabled[k] == true) + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; if (MainPlaneDoesODMCombine == true) @@ -2885,12 +2885,12 @@ static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib) SwathWidth = mode_lib->vba.ViewportHeight[k]; } - if (mode_lib->vba.ODMCombineEnabled[k] == true) { + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) { + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } } @@ -3483,10 +3483,10 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->FabricAndDRAMBandwidthPerState[i] * 1000) * locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; - locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState; + locals->ReturnBWPerState[i][0] = locals->ReturnBWToDCNPerState; if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] @@ -3497,7 +3497,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], 4 * locals->ReturnBWToDCNPerState * (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / @@ -3509,7 +3509,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000); if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] @@ -3520,7 +3520,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], 4 * locals->ReturnBWToDCNPerState * (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / @@ -3558,12 +3558,12 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] - + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; - if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] + + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0] > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { - locals->ROBSupport[i] = true; + locals->ROBSupport[i][0] = true; } else { - locals->ROBSupport[i] = false; + locals->ROBSupport[i][0] = false; } } /*Writeback Mode Support Check*/ @@ -3946,7 +3946,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode } if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] - && locals->ODMCombineEnablePerState[i][k] == false) { + && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->NoOfDPP[i][j][k] = 1; locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); @@ -4035,16 +4035,16 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode /*Viewport Size Check*/ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - locals->ViewportSizeSupport[i] = true; + locals->ViewportSizeSupport[i][0] = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } else { if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } } @@ -4226,8 +4226,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.DSCFormatFactor = 1; } if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] - == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { locals->DSCCLKRequiredMoreThanSupported[i] = @@ -4250,7 +4249,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.TotalDSCUnitsRequired = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { mode_lib->vba.TotalDSCUnitsRequired = mode_lib->vba.TotalDSCUnitsRequired + 2.0; } else { @@ -4292,7 +4291,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; } if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { - if (locals->ODMCombineEnablePerState[i][k] == false) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->DSCDelayPerState[i][k] = dscceComputeDelay( mode_lib->vba.DSCInputBitPerComponent[k], @@ -4335,7 +4334,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { for (j = 0; j < 2; j++) { for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k])); else locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; @@ -4388,28 +4387,28 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma + dml_min( locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] * - locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i], + locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesLuma), locals->SwathHeightYPerState[i][j][k]); locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * - locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i], + locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesChroma), locals->SwathHeightCPerState[i][j][k]); if (locals->BytePerPixelInDETC[k] == 0) { locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * - dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]); + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]); } else { locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min( locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * - dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]), + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]), locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) - locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 * - dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k])); + dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k])); } } } @@ -4454,14 +4453,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k]; locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k]; locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k]; - mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], mode_lib->vba.PixelClock[k] / 16.0); if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4471,9 +4470,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4484,9 +4483,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode } } else { if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4496,9 +4495,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4508,9 +4507,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode * mode_lib->vba.RequiredDPPCLK[i][j][k]); } if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETC[k], @@ -4521,9 +4520,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETC[k], @@ -4559,7 +4558,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode &mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k], &mode_lib->vba.dpte_row_height[k], &mode_lib->vba.meta_row_height[k]); - mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines( + mode_lib->vba.PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k], mode_lib->vba.vtaps[k], @@ -4598,7 +4597,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode &mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k], &mode_lib->vba.dpte_row_height_chroma[k], &mode_lib->vba.meta_row_height_chroma[k]); - mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines( + mode_lib->vba.PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k] / 2.0, mode_lib->vba.VTAPsChroma[k], @@ -4612,14 +4611,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; mode_lib->vba.MetaRowBytesC = 0.0; mode_lib->vba.DPTEBytesPerRowC = 0.0; - locals->PrefetchLinesC[k] = 0.0; + locals->PrefetchLinesC[0][0][k] = 0.0; locals->PTEBufferSizeNotExceededC[i][j][k] = true; locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; } - locals->PDEAndMetaPTEBytesPerFrame[k] = + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] = mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; - locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; - locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; CalculateActiveRowBandwidth( mode_lib->vba.GPUVMEnable, @@ -4646,14 +4645,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] * mode_lib->vba.MetaChunkSize) * 1024.0 - / mode_lib->vba.ReturnBWPerState[i]; + / mode_lib->vba.ReturnBWPerState[i][0]; if (mode_lib->vba.GPUVMEnable == true) { mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency + mode_lib->vba.TotalNumberOfActiveDPP[i][j] * mode_lib->vba.PTEGroupSize - / mode_lib->vba.ReturnBWPerState[i]; + / mode_lib->vba.ReturnBWPerState[i][0]; } - mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { @@ -4703,7 +4702,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); } @@ -4743,7 +4742,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0; } - CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth, + CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i][0], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth, mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k], mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k], mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal, @@ -4757,14 +4756,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.NumberOfCursors[k], mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k], mode_lib->vba.HTotal[k], mode_lib->vba.MaxInterDCNTileRepeaters, - mode_lib->vba.MaximumVStartup[k], + mode_lib->vba.MaximumVStartup[0][0][k], mode_lib->vba.GPUVMMaxPageTableLevels, mode_lib->vba.GPUVMEnable, mode_lib->vba.DynamicMetadataEnable[k], @@ -4774,15 +4773,15 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.ExtraLatency, mode_lib->vba.TimeCalc, - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], - mode_lib->vba.PrefetchLinesY[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], + mode_lib->vba.PrefetchLinesY[0][0][k], mode_lib->vba.SwathWidthYPerState[i][j][k], mode_lib->vba.BytePerPixelInDETY[k], mode_lib->vba.PrefillY[k], mode_lib->vba.MaxNumSwY[k], - mode_lib->vba.PrefetchLinesC[k], + mode_lib->vba.PrefetchLinesC[0][0][k], mode_lib->vba.BytePerPixelInDETC[k], mode_lib->vba.PrefillC[k], mode_lib->vba.MaxNumSwC[k], @@ -4812,19 +4811,19 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->prefetch_vm_bw_valid = true; locals->prefetch_row_bw_valid = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0) + if (locals->PDEAndMetaPTEBytesPerFrame[0][0][k] == 0) locals->prefetch_vm_bw[k] = 0; else if (locals->LinesForMetaPTE[k] > 0) - locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k] + locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[0][0][k] / (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]); else { locals->prefetch_vm_bw[k] = 0; locals->prefetch_vm_bw_valid = false; } - if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0) + if (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k] == 0) locals->prefetch_row_bw[k] = 0; else if (locals->LinesForMetaAndDPTERow[k] > 0) - locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]) + locals->prefetch_row_bw[k] = (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k]) / (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]); else { locals->prefetch_row_bw[k] = 0; @@ -4843,13 +4842,13 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k]) + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]); } - locals->BandwidthWithoutPrefetchSupported[i] = true; - if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) { - locals->BandwidthWithoutPrefetchSupported[i] = false; + locals->BandwidthWithoutPrefetchSupported[i][0] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]) { + locals->BandwidthWithoutPrefetchSupported[i][0] = false; } locals->PrefetchSupported[i][j] = true; - if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) { + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]) { locals->PrefetchSupported[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4874,7 +4873,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode if (mode_lib->vba.PrefetchSupported[i][j] == true && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) { mode_lib->vba.BandwidthAvailableForImmediateFlip = - mode_lib->vba.ReturnBWPerState[i]; + mode_lib->vba.ReturnBWPerState[i][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip @@ -4888,9 +4887,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { mode_lib->vba.ImmediateFlipBytes[k] = - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] - + mode_lib->vba.MetaRowBytes[k] - + mode_lib->vba.DPTEBytesPerRow[k]; + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k] + + mode_lib->vba.MetaRowBytes[0][0][k] + + mode_lib->vba.DPTEBytesPerRow[0][0][k]; } } mode_lib->vba.TotImmediateFlipBytes = 0.0; @@ -4918,9 +4917,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode / mode_lib->vba.PixelClock[k], mode_lib->vba.VRatio[k], mode_lib->vba.Tno_bw[k], - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], mode_lib->vba.DCCEnable[k], mode_lib->vba.dpte_row_height[k], mode_lib->vba.meta_row_height[k], @@ -4945,7 +4944,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode } mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true; if (mode_lib->vba.total_dcn_read_bw_with_flip - > mode_lib->vba.ReturnBWPerState[i]) { + > mode_lib->vba.ReturnBWPerState[i][0]) { mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4961,13 +4960,13 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode /*Vertical Active BW support*/ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth * + mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) * mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100; - if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i]) - mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true; + if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0]) + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = true; else - mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false; + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = false; } /*PTE Buffer Size Check*/ @@ -5055,7 +5054,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode status = DML_FAIL_SCALE_RATIO_TAP; } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { status = DML_FAIL_SOURCE_PIXEL_FORMAT; - } else if (locals->ViewportSizeSupport[i] != true) { + } else if (locals->ViewportSizeSupport[i][0] != true) { status = DML_FAIL_VIEWPORT_SIZE; } else if (locals->DIOSupport[i] != true) { status = DML_FAIL_DIO_SUPPORT; @@ -5065,7 +5064,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode status = DML_FAIL_DSC_CLK_REQUIRED; } else if (locals->UrgentLatencySupport[i][j] != true) { status = DML_FAIL_URGENT_LATENCY; - } else if (locals->ROBSupport[i] != true) { + } else if (locals->ROBSupport[i][0] != true) { status = DML_FAIL_REORDERING_BUFFER; } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { status = DML_FAIL_DISPCLK_DPPCLK; @@ -5085,7 +5084,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode status = DML_FAIL_PITCH_SUPPORT; } else if (locals->PrefetchSupported[i][j] != true) { status = DML_FAIL_PREFETCH_SUPPORT; - } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { + } else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) { status = DML_FAIL_TOTAL_V_ACTIVE_BW; } else if (locals->VRatioInPrefetchSupported[i][j] != true) { status = DML_FAIL_V_RATIO_PREFETCH; @@ -5131,7 +5130,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index b6d34669cddf..5dcfbb0af825 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -197,7 +197,7 @@ static unsigned int CalculateVMAndRowBytes( unsigned int *meta_row_width, unsigned int *meta_row_height, unsigned int *vm_group_bytes, - long *dpte_group_bytes, + unsigned int *dpte_group_bytes, unsigned int *PixelPTEReqWidth, unsigned int *PixelPTEReqHeight, unsigned int *PTERequestSize, @@ -295,7 +295,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double UrgentOutOfOrderReturn, double ReturnBW, bool GPUVMEnable, - long dpte_group_bytes[], + int dpte_group_bytes[], unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, @@ -309,13 +309,13 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( int DPPPerPlane[], bool DCCEnable[], double DPPCLK[], - unsigned int SwathWidthSingleDPPY[], + double SwathWidthSingleDPPY[], unsigned int SwathHeightY[], double ReadBandwidthPlaneLuma[], unsigned int SwathHeightC[], double ReadBandwidthPlaneChroma[], unsigned int LBBitPerPixel[], - unsigned int SwathWidthY[], + double SwathWidthY[], double HRatio[], unsigned int vtaps[], unsigned int VTAPsChroma[], @@ -344,7 +344,7 @@ static void CalculateDCFCLKDeepSleep( double BytePerPixelDETY[], double BytePerPixelDETC[], double VRatio[], - unsigned int SwathWidthY[], + double SwathWidthY[], int DPPPerPlane[], double HRatio[], double PixelClock[], @@ -435,7 +435,7 @@ static void CalculateMetaAndPTETimes( unsigned int meta_row_height[], unsigned int meta_req_width[], unsigned int meta_req_height[], - long dpte_group_bytes[], + int dpte_group_bytes[], unsigned int PTERequestSizeY[], unsigned int PTERequestSizeC[], unsigned int PixelPTEReqWidthY[], @@ -477,7 +477,7 @@ static double CalculateExtraLatency( bool HostVMEnable, int NumberOfActivePlanes, int NumberOfDPP[], - long dpte_group_bytes[], + int dpte_group_bytes[], double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, int HostVMMaxPageTableLevels, @@ -1280,7 +1280,7 @@ static unsigned int CalculateVMAndRowBytes( unsigned int *meta_row_width, unsigned int *meta_row_height, unsigned int *vm_group_bytes, - long *dpte_group_bytes, + unsigned int *dpte_group_bytes, unsigned int *PixelPTEReqWidth, unsigned int *PixelPTEReqHeight, unsigned int *PTERequestSize, @@ -1683,11 +1683,11 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman else locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; - if (mode_lib->vba.ODMCombineEnabled[k] == true) + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; if (MainPlaneDoesODMCombine == true) @@ -2940,12 +2940,12 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) SwathWidth = mode_lib->vba.ViewportHeight[k]; } - if (mode_lib->vba.ODMCombineEnabled[k] == true) { + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) { + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } } @@ -3542,17 +3542,17 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - locals->IdealSDPPortBandwidthPerState[i] = dml_min3( + locals->IdealSDPPortBandwidthPerState[i][0] = dml_min3( mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels * mode_lib->vba.DRAMChannelWidth, mode_lib->vba.FabricClockPerState[i] * mode_lib->vba.FabricDatapathToDCNDataReturn); if (mode_lib->vba.HostVMEnable == false) { - locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i] + locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0] * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0; } else { - locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i] + locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0] * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0; } } @@ -3589,12 +3589,12 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly) - * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; - if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] + * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0] > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { - locals->ROBSupport[i] = true; + locals->ROBSupport[i][0] = true; } else { - locals->ROBSupport[i] = false; + locals->ROBSupport[i][0] = false; } } /*Writeback Mode Support Check*/ @@ -3982,7 +3982,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] - && locals->ODMCombineEnablePerState[i][k] == false) { + && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->NoOfDPP[i][j][k] = 1; locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); @@ -4071,16 +4071,16 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*Viewport Size Check*/ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - locals->ViewportSizeSupport[i] = true; + locals->ViewportSizeSupport[i][0] = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } else { if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } } @@ -4269,8 +4269,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DSCFormatFactor = 1; } if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] - == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { locals->DSCCLKRequiredMoreThanSupported[i] = @@ -4293,7 +4292,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.TotalDSCUnitsRequired = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { mode_lib->vba.TotalDSCUnitsRequired = mode_lib->vba.TotalDSCUnitsRequired + 2.0; } else { @@ -4335,7 +4334,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; } if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { - if (locals->ODMCombineEnablePerState[i][k] == false) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->DSCDelayPerState[i][k] = dscceComputeDelay( mode_lib->vba.DSCInputBitPerComponent[k], @@ -4399,7 +4398,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k]; locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k]; - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { locals->SwathWidthYThisState[k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])); } else { @@ -4451,7 +4450,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->PSCL_FACTOR, locals->PSCL_FACTOR_CHROMA, locals->RequiredDPPCLKThisState, - &mode_lib->vba.ProjectedDCFCLKDeepSleep); + &mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]); for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 @@ -4496,7 +4495,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->PTERequestSizeC, locals->dpde0_bytes_per_frame_ub_c, locals->meta_pte_bytes_per_frame_ub_c); - locals->PrefetchLinesC[k] = CalculatePrefetchSourceLines( + locals->PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k]/2, mode_lib->vba.VTAPsChroma[k], @@ -4511,7 +4510,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; mode_lib->vba.MetaRowBytesC = 0.0; mode_lib->vba.DPTEBytesPerRowC = 0.0; - locals->PrefetchLinesC[k] = 0.0; + locals->PrefetchLinesC[0][0][k] = 0.0; locals->PTEBufferSizeNotExceededC[i][j][k] = true; locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; } @@ -4552,7 +4551,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->PTERequestSizeY, locals->dpde0_bytes_per_frame_ub_l, locals->meta_pte_bytes_per_frame_ub_l); - locals->PrefetchLinesY[k] = CalculatePrefetchSourceLines( + locals->PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k], mode_lib->vba.vtaps[k], @@ -4562,10 +4561,10 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.ViewportYStartY[k], &locals->PrefillY[k], &locals->MaxNumSwY[k]); - locals->PDEAndMetaPTEBytesPerFrame[k] = + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] = mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; - locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; - locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; CalculateActiveRowBandwidth( mode_lib->vba.GPUVMEnable, @@ -4591,7 +4590,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.PixelChunkSizeInKByte, locals->TotalNumberOfDCCActiveDPP[i][j], mode_lib->vba.MetaChunkSize, - locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0], mode_lib->vba.GPUVMEnable, mode_lib->vba.HostVMEnable, mode_lib->vba.NumberOfActivePlanes, @@ -4602,7 +4601,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.HostVMMaxPageTableLevels, mode_lib->vba.HostVMCachedPageTableLevels); - mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { if (mode_lib->vba.WritebackEnable[k] == true) { @@ -4644,15 +4643,15 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } } - mode_lib->vba.MaxMaxVStartup = 0; + mode_lib->vba.MaxMaxVStartup[0][0] = 0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); - mode_lib->vba.MaxMaxVStartup = dml_max(mode_lib->vba.MaxMaxVStartup, locals->MaximumVStartup[k]); + mode_lib->vba.MaxMaxVStartup[0][0] = dml_max(mode_lib->vba.MaxMaxVStartup[0][0], locals->MaximumVStartup[0][0][k]); } mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode; - mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup; + mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0]; do { mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode; mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup; @@ -4693,7 +4692,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k]; myPipe.DISPCLK = locals->RequiredDISPCLK[i][j]; myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep; + myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k]; myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; myPipe.SourceScan = mode_lib->vba.SourceScan[k]; @@ -4727,8 +4726,8 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.MaxInterDCNTileRepeaters, - dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[k]), - locals->MaximumVStartup[k], + dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]), + locals->MaximumVStartup[0][0][k], mode_lib->vba.GPUVMMaxPageTableLevels, mode_lib->vba.GPUVMEnable, &myHostVM, @@ -4739,15 +4738,15 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentLatency, mode_lib->vba.ExtraLatency, mode_lib->vba.TimeCalc, - locals->PDEAndMetaPTEBytesPerFrame[k], - locals->MetaRowBytes[k], - locals->DPTEBytesPerRow[k], - locals->PrefetchLinesY[k], + locals->PDEAndMetaPTEBytesPerFrame[0][0][k], + locals->MetaRowBytes[0][0][k], + locals->DPTEBytesPerRow[0][0][k], + locals->PrefetchLinesY[0][0][k], locals->SwathWidthYThisState[k], locals->BytePerPixelInDETY[k], locals->PrefillY[k], locals->MaxNumSwY[k], - locals->PrefetchLinesC[k], + locals->PrefetchLinesC[0][0][k], locals->BytePerPixelInDETC[k], locals->PrefillC[k], locals->MaxNumSwC[k], @@ -4836,14 +4835,14 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); } - locals->BandwidthWithoutPrefetchSupported[i] = true; - if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i] + locals->BandwidthWithoutPrefetchSupported[i][0] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0] || locals->NotEnoughUrgentLatencyHiding == 1) { - locals->BandwidthWithoutPrefetchSupported[i] = false; + locals->BandwidthWithoutPrefetchSupported[i][0] = false; } locals->PrefetchSupported[i][j] = true; - if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i] + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0] || locals->NotEnoughUrgentLatencyHiding == 1 || locals->NotEnoughUrgentLatencyHidingPre == 1) { locals->PrefetchSupported[i][j] = false; @@ -4872,17 +4871,17 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) { - mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup; + mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0]; mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1; } else { mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1; } } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) - && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup + && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0] || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode)); if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) { - mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i]; + mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip - dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] @@ -4895,7 +4894,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.TotImmediateFlipBytes = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes - + locals->PDEAndMetaPTEBytesPerFrame[k] + locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]; + + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] + locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k]; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4910,9 +4909,9 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.HostVMMaxPageTableLevels, mode_lib->vba.HostVMCachedPageTableLevels, mode_lib->vba.GPUVMEnable, - locals->PDEAndMetaPTEBytesPerFrame[k], - locals->MetaRowBytes[k], - locals->DPTEBytesPerRow[k], + locals->PDEAndMetaPTEBytesPerFrame[0][0][k], + locals->MetaRowBytes[0][0][k], + locals->DPTEBytesPerRow[0][0][k], mode_lib->vba.BandwidthAvailableForImmediateFlip, mode_lib->vba.TotImmediateFlipBytes, mode_lib->vba.SourcePixelFormat[k], @@ -4943,7 +4942,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } locals->ImmediateFlipSupportedForState[i][j] = true; if (mode_lib->vba.total_dcn_read_bw_with_flip - > locals->ReturnBWPerState[i]) { + > locals->ReturnBWPerState[i][0]) { locals->ImmediateFlipSupportedForState[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4970,7 +4969,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.WritebackInterfaceChromaBufferSize, mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels, - locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0], mode_lib->vba.GPUVMEnable, locals->dpte_group_bytes, mode_lib->vba.MetaChunkSize, @@ -4982,7 +4981,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DRAMClockChangeLatency, mode_lib->vba.SRExitTime, mode_lib->vba.SREnterPlusExitTime, - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], locals->NoOfDPPThisState, mode_lib->vba.DCCEnable, locals->RequiredDPPCLKThisState, @@ -5025,8 +5024,8 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k]; } for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) { - locals->MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min( - locals->IdealSDPPortBandwidthPerState[i] * + locals->MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min( + locals->IdealSDPPortBandwidthPerState[i][0] * mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100.0, mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels * @@ -5034,10 +5033,10 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100.0); - if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i]) { - locals->TotalVerticalActiveBandwidthSupport[i] = true; + if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i][0]) { + locals->TotalVerticalActiveBandwidthSupport[i][0] = true; } else { - locals->TotalVerticalActiveBandwidthSupport[i] = false; + locals->TotalVerticalActiveBandwidthSupport[i][0] = false; } } } @@ -5116,7 +5115,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_SCALE_RATIO_TAP; } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { status = DML_FAIL_SOURCE_PIXEL_FORMAT; - } else if (locals->ViewportSizeSupport[i] != true) { + } else if (locals->ViewportSizeSupport[i][0] != true) { status = DML_FAIL_VIEWPORT_SIZE; } else if (locals->DIOSupport[i] != true) { status = DML_FAIL_DIO_SUPPORT; @@ -5124,7 +5123,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_NOT_ENOUGH_DSC; } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) { status = DML_FAIL_DSC_CLK_REQUIRED; - } else if (locals->ROBSupport[i] != true) { + } else if (locals->ROBSupport[i][0] != true) { status = DML_FAIL_REORDERING_BUFFER; } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { status = DML_FAIL_DISPCLK_DPPCLK; @@ -5142,7 +5141,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_CURSOR_SUPPORT; } else if (mode_lib->vba.PitchSupport != true) { status = DML_FAIL_PITCH_SUPPORT; - } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { + } else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) { status = DML_FAIL_TOTAL_V_ACTIVE_BW; } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) { status = DML_FAIL_PTE_BUFFER_SIZE; @@ -5198,7 +5197,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { mode_lib->vba.ODMCombineEnabled[k] = @@ -5227,7 +5226,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double UrgentOutOfOrderReturn, double ReturnBW, bool GPUVMEnable, - long dpte_group_bytes[], + int dpte_group_bytes[], unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, @@ -5241,13 +5240,13 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( int DPPPerPlane[], bool DCCEnable[], double DPPCLK[], - unsigned int SwathWidthSingleDPPY[], + double SwathWidthSingleDPPY[], unsigned int SwathHeightY[], double ReadBandwidthPlaneLuma[], unsigned int SwathHeightC[], double ReadBandwidthPlaneChroma[], unsigned int LBBitPerPixel[], - unsigned int SwathWidthY[], + double SwathWidthY[], double HRatio[], unsigned int vtaps[], unsigned int VTAPsChroma[], @@ -5503,7 +5502,7 @@ static void CalculateDCFCLKDeepSleep( double BytePerPixelDETY[], double BytePerPixelDETC[], double VRatio[], - unsigned int SwathWidthY[], + double SwathWidthY[], int DPPPerPlane[], double HRatio[], double PixelClock[], @@ -5831,7 +5830,7 @@ static void CalculateMetaAndPTETimes( unsigned int meta_row_height[], unsigned int meta_req_width[], unsigned int meta_req_height[], - long dpte_group_bytes[], + int dpte_group_bytes[], unsigned int PTERequestSizeY[], unsigned int PTERequestSizeC[], unsigned int PixelPTEReqWidthY[], @@ -6087,7 +6086,7 @@ static double CalculateExtraLatency( bool HostVMEnable, int NumberOfActivePlanes, int NumberOfDPP[], - long dpte_group_bytes[], + int dpte_group_bytes[], double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, int HostVMMaxPageTableLevels, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h index 55d4cb23a073..658e0733b99d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h @@ -119,6 +119,10 @@ enum mpc_combine_affinity { dm_mpc_never }; +enum RequestType { + REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA +}; + enum self_refresh_affinity { dm_try_to_allow_self_refresh_and_mclk_switch, dm_allow_self_refresh_and_mclk_switch, @@ -165,4 +169,16 @@ enum odm_combine_mode { dm_odm_combine_mode_4to1, }; +enum odm_combine_policy { + dm_odm_combine_policy_dal, + dm_odm_combine_policy_none, + dm_odm_combine_policy_2to1, + dm_odm_combine_policy_4to1, +}; + +enum immediate_flip_requirement { + dm_immediate_flip_not_required, + dm_immediate_flip_required, +}; + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index dbf6a021d0d8..658f81e757e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -63,6 +63,7 @@ struct _vcs_dpi_voltage_scaling_st { double dispclk_mhz; double phyclk_mhz; double dppclk_mhz; + double dtbclk_mhz; }; struct _vcs_dpi_soc_bounding_box_st { @@ -214,6 +215,7 @@ struct _vcs_dpi_display_pipe_source_params_st { int source_format; unsigned char dcc; unsigned int dcc_rate; + unsigned int dcc_rate_chroma; unsigned char dcc_use_global; unsigned char vm; bool gpuvm; // gpuvm enabled @@ -225,7 +227,10 @@ struct _vcs_dpi_display_pipe_source_params_st { int source_scan; int sw_mode; int macro_tile_size; + unsigned int surface_width_y; unsigned int surface_height_y; + unsigned int surface_width_c; + unsigned int surface_height_c; unsigned int viewport_width; unsigned int viewport_height; unsigned int viewport_y_y; @@ -324,7 +329,7 @@ struct _vcs_dpi_display_pipe_dest_params_st { double pixel_rate_mhz; unsigned char synchronized_vblank_all_planes; unsigned char otg_inst; - unsigned char odm_combine; + unsigned int odm_combine; unsigned char use_maximum_vstartup; unsigned int vtotal_max; unsigned int vtotal_min; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 66ca014a6b92..b3c96d9b472f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -264,7 +264,10 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib) mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mts; //mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mhz; mode_lib->vba.MaxDispclk[i] = soc->clock_limits[i].dispclk_mhz; + mode_lib->vba.DTBCLKPerState[i] = soc->clock_limits[i].dtbclk_mhz; } + mode_lib->vba.MinVoltageLevel = 0; + mode_lib->vba.MaxVoltageLevel = mode_lib->vba.soc.num_states; mode_lib->vba.DoUrgentLatencyAdjustment = soc->do_urgent_latency_adjustment; @@ -306,8 +309,6 @@ static void fetch_ip_params(struct display_mode_lib *mode_lib) mode_lib->vba.WritebackInterfaceBufferSize = ip->writeback_interface_buffer_size_kbytes; mode_lib->vba.WritebackLineBufferSize = ip->writeback_line_buffer_buffer_size; - mode_lib->vba.MinVoltageLevel = 0; - mode_lib->vba.MaxVoltageLevel = 5; mode_lib->vba.WritebackChromaLineBufferWidth = ip->writeback_chroma_line_buffer_width_pixels; @@ -423,8 +424,8 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) ip->dcc_supported : src->dcc && ip->dcc_supported; mode_lib->vba.DCCRate[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate; /* TODO: Needs to be set based on src->dcc_rate_luma/chroma */ - mode_lib->vba.DCCRateLuma[mode_lib->vba.NumberOfActivePlanes] = 0; - mode_lib->vba.DCCRateChroma[mode_lib->vba.NumberOfActivePlanes] = 0; + mode_lib->vba.DCCRateLuma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate; + mode_lib->vba.DCCRateChroma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate_chroma; mode_lib->vba.SourcePixelFormat[mode_lib->vba.NumberOfActivePlanes] = (enum source_format_class) (src->source_format); @@ -436,8 +437,6 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) dst->recout_width; // TODO: or should this be full_recout_width???...maybe only when in hsplit mode? mode_lib->vba.ODMCombineEnabled[mode_lib->vba.NumberOfActivePlanes] = dst->odm_combine; - mode_lib->vba.ODMCombineTypeEnabled[mode_lib->vba.NumberOfActivePlanes] = - dst->odm_combine; mode_lib->vba.OutputFormat[mode_lib->vba.NumberOfActivePlanes] = (enum output_format_class) (dout->output_format); mode_lib->vba.OutputBpp[mode_lib->vba.NumberOfActivePlanes] = @@ -590,6 +589,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) for (k = j + 1; k < mode_lib->vba.cache_num_pipes; ++k) { display_pipe_source_params_st *src_k = &pipes[k].pipe.src; display_pipe_dest_params_st *dst_k = &pipes[k].pipe.dest; + display_output_params_st *dout_k = &pipes[j].dout; if (src_k->is_hsplit && !visited[k] && src->hsplit_grp == src_k->hsplit_grp) { @@ -600,12 +600,18 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) == dm_horz) { mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] += src_k->viewport_width; + mode_lib->vba.ViewportWidthChroma[mode_lib->vba.NumberOfActivePlanes] += + src_k->viewport_width; mode_lib->vba.ScalerRecoutWidth[mode_lib->vba.NumberOfActivePlanes] += dst_k->recout_width; } else { mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] += src_k->viewport_height; + mode_lib->vba.ViewportHeightChroma[mode_lib->vba.NumberOfActivePlanes] += + src_k->viewport_height; } + mode_lib->vba.NumberOfDSCSlices[mode_lib->vba.NumberOfActivePlanes] += + dout_k->dsc_slices; visited[k] = true; } @@ -811,7 +817,9 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib) unsigned int total_pipes = 0; mode_lib->vba.VoltageLevel = mode_lib->vba.cache_pipes[0].clks_cfg.voltage; - mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]; + if (mode_lib->vba.ReturnBW == 0) + mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; mode_lib->vba.FabricAndDRAMBandwidth = mode_lib->vba.FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; fetch_socbb_params(mode_lib); diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 3eb657ed5714..e7a44df676ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -157,6 +157,7 @@ struct vba_vars_st { bool DummyPStateCheck; bool DRAMClockChangeSupportsVActive; bool PrefetchModeSupported; + bool PrefetchAndImmediateFlipSupported; enum self_refresh_affinity AllowDRAMSelfRefreshOrDRAMClockChangeInVblank; // Mode Support only double XFCRemoteSurfaceFlipDelay; double TInitXFill; @@ -318,8 +319,7 @@ struct vba_vars_st { unsigned int DynamicMetadataTransmittedBytes[DC__NUM_DPP__MAX]; double DCCRate[DC__NUM_DPP__MAX]; double AverageDCCCompressionRate; - bool ODMCombineEnabled[DC__NUM_DPP__MAX]; - enum odm_combine_mode ODMCombineTypeEnabled[DC__NUM_DPP__MAX]; + enum odm_combine_mode ODMCombineEnabled[DC__NUM_DPP__MAX]; double OutputBpp[DC__NUM_DPP__MAX]; bool DSCEnabled[DC__NUM_DPP__MAX]; unsigned int DSCInputBitPerComponent[DC__NUM_DPP__MAX]; @@ -347,6 +347,7 @@ struct vba_vars_st { unsigned int EffectiveLBLatencyHidingSourceLinesChroma; double BandwidthAvailableForImmediateFlip; unsigned int PrefetchMode[DC__VOLTAGE_STATES + 1][2]; + unsigned int PrefetchModePerState[DC__VOLTAGE_STATES + 1][2]; unsigned int MinPrefetchMode; unsigned int MaxPrefetchMode; bool AnyLinesForVMOrRowTooLarge; @@ -396,6 +397,7 @@ struct vba_vars_st { bool WritebackLumaAndChromaScalingSupported; bool Cursor64BppSupport; double DCFCLKPerState[DC__VOLTAGE_STATES + 1]; + double DCFCLKState[DC__VOLTAGE_STATES + 1][2]; double FabricClockPerState[DC__VOLTAGE_STATES + 1]; double SOCCLKPerState[DC__VOLTAGE_STATES + 1]; double PHYCLKPerState[DC__VOLTAGE_STATES + 1]; @@ -444,7 +446,7 @@ struct vba_vars_st { double OutputLinkDPLanes[DC__NUM_DPP__MAX]; double ForcedOutputLinkBPP[DC__NUM_DPP__MAX]; // Mode Support only double ImmediateFlipBW[DC__NUM_DPP__MAX]; - double MaxMaxVStartup; + double MaxMaxVStartup[DC__VOLTAGE_STATES + 1][2]; double WritebackLumaVExtra; double WritebackChromaVExtra; @@ -471,7 +473,7 @@ struct vba_vars_st { double RoundedUpMaxSwathSizeBytesC; double EffectiveDETLBLinesLuma; double EffectiveDETLBLinesChroma; - double ProjectedDCFCLKDeepSleep; + double ProjectedDCFCLKDeepSleep[DC__VOLTAGE_STATES + 1][2]; double PDEAndMetaPTEBytesPerFrameY; double PDEAndMetaPTEBytesPerFrameC; unsigned int MetaRowBytesY; @@ -489,12 +491,11 @@ struct vba_vars_st { double FractionOfUrgentBandwidthImmediateFlip; // Mode Support debugging output /* ms locals */ - double IdealSDPPortBandwidthPerState[DC__VOLTAGE_STATES + 1]; + double IdealSDPPortBandwidthPerState[DC__VOLTAGE_STATES + 1][2]; unsigned int NoOfDPP[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; int NoOfDPPThisState[DC__NUM_DPP__MAX]; - bool ODMCombineEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - enum odm_combine_mode ODMCombineTypeEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - unsigned int SwathWidthYThisState[DC__NUM_DPP__MAX]; + enum odm_combine_mode ODMCombineEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; + double SwathWidthYThisState[DC__NUM_DPP__MAX]; unsigned int SwathHeightCPerState[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; unsigned int SwathHeightYThisState[DC__NUM_DPP__MAX]; unsigned int SwathHeightCThisState[DC__NUM_DPP__MAX]; @@ -506,7 +507,7 @@ struct vba_vars_st { double RequiredDPPCLKThisState[DC__NUM_DPP__MAX]; bool PTEBufferSizeNotExceededY[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; bool PTEBufferSizeNotExceededC[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; - bool BandwidthWithoutPrefetchSupported[DC__VOLTAGE_STATES + 1]; + bool BandwidthWithoutPrefetchSupported[DC__VOLTAGE_STATES + 1][2]; bool PrefetchSupported[DC__VOLTAGE_STATES + 1][2]; bool VRatioInPrefetchSupported[DC__VOLTAGE_STATES + 1][2]; double RequiredDISPCLK[DC__VOLTAGE_STATES + 1][2]; @@ -515,22 +516,22 @@ struct vba_vars_st { unsigned int TotalNumberOfActiveDPP[DC__VOLTAGE_STATES + 1][2]; unsigned int TotalNumberOfDCCActiveDPP[DC__VOLTAGE_STATES + 1][2]; bool ModeSupport[DC__VOLTAGE_STATES + 1][2]; - double ReturnBWPerState[DC__VOLTAGE_STATES + 1]; + double ReturnBWPerState[DC__VOLTAGE_STATES + 1][2]; bool DIOSupport[DC__VOLTAGE_STATES + 1]; bool NotEnoughDSCUnits[DC__VOLTAGE_STATES + 1]; bool DSCCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1]; bool DTBCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1]; double UrgentRoundTripAndOutOfOrderLatencyPerState[DC__VOLTAGE_STATES + 1]; - bool ROBSupport[DC__VOLTAGE_STATES + 1]; + bool ROBSupport[DC__VOLTAGE_STATES + 1][2]; bool PTEBufferSizeNotExceeded[DC__VOLTAGE_STATES + 1][2]; - bool TotalVerticalActiveBandwidthSupport[DC__VOLTAGE_STATES + 1]; - double MaxTotalVerticalActiveAvailableBandwidth[DC__VOLTAGE_STATES + 1]; + bool TotalVerticalActiveBandwidthSupport[DC__VOLTAGE_STATES + 1][2]; + double MaxTotalVerticalActiveAvailableBandwidth[DC__VOLTAGE_STATES + 1][2]; double PrefetchBW[DC__NUM_DPP__MAX]; - double PDEAndMetaPTEBytesPerFrame[DC__NUM_DPP__MAX]; - double MetaRowBytes[DC__NUM_DPP__MAX]; - double DPTEBytesPerRow[DC__NUM_DPP__MAX]; - double PrefetchLinesY[DC__NUM_DPP__MAX]; - double PrefetchLinesC[DC__NUM_DPP__MAX]; + double PDEAndMetaPTEBytesPerFrame[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double MetaRowBytes[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double DPTEBytesPerRow[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double PrefetchLinesY[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double PrefetchLinesC[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; unsigned int MaxNumSwY[DC__NUM_DPP__MAX]; unsigned int MaxNumSwC[DC__NUM_DPP__MAX]; double PrefillY[DC__NUM_DPP__MAX]; @@ -539,7 +540,7 @@ struct vba_vars_st { double LinesForMetaPTE[DC__NUM_DPP__MAX]; double LinesForMetaAndDPTERow[DC__NUM_DPP__MAX]; double MinDPPCLKUsingSingleDPP[DC__NUM_DPP__MAX]; - unsigned int SwathWidthYSingleDPP[DC__NUM_DPP__MAX]; + double SwathWidthYSingleDPP[DC__NUM_DPP__MAX]; double BytePerPixelInDETY[DC__NUM_DPP__MAX]; double BytePerPixelInDETC[DC__NUM_DPP__MAX]; bool RequiresDSC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; @@ -547,7 +548,7 @@ struct vba_vars_st { double RequiresFEC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; double OutputBppPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; double DSCDelayPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - bool ViewportSizeSupport[DC__VOLTAGE_STATES + 1]; + bool ViewportSizeSupport[DC__VOLTAGE_STATES + 1][2]; unsigned int Read256BlockHeightY[DC__NUM_DPP__MAX]; unsigned int Read256BlockWidthY[DC__NUM_DPP__MAX]; unsigned int Read256BlockHeightC[DC__NUM_DPP__MAX]; @@ -562,7 +563,7 @@ struct vba_vars_st { double WriteBandwidth[DC__NUM_DPP__MAX]; double PSCL_FACTOR[DC__NUM_DPP__MAX]; double PSCL_FACTOR_CHROMA[DC__NUM_DPP__MAX]; - double MaximumVStartup[DC__NUM_DPP__MAX]; + double MaximumVStartup[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; unsigned int MacroTileWidthY[DC__NUM_DPP__MAX]; unsigned int MacroTileWidthC[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitch[DC__NUM_DPP__MAX]; @@ -579,7 +580,7 @@ struct vba_vars_st { bool ImmediateFlipSupportedForState[DC__VOLTAGE_STATES + 1][2]; double WritebackDelay[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; unsigned int vm_group_bytes[DC__NUM_DPP__MAX]; - long dpte_group_bytes[DC__NUM_DPP__MAX]; + unsigned int dpte_group_bytes[DC__NUM_DPP__MAX]; unsigned int dpte_row_height[DC__NUM_DPP__MAX]; unsigned int meta_req_height[DC__NUM_DPP__MAX]; unsigned int meta_req_width[DC__NUM_DPP__MAX]; @@ -605,14 +606,14 @@ struct vba_vars_st { double UrgentBurstFactorChroma[DC__NUM_DPP__MAX]; double UrgentBurstFactorChromaPre[DC__NUM_DPP__MAX]; + bool MPCCombine[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; double SwathWidthCSingleDPP[DC__NUM_DPP__MAX]; double MaximumSwathWidthInLineBufferLuma; double MaximumSwathWidthInLineBufferChroma; double MaximumSwathWidthLuma[DC__NUM_DPP__MAX]; double MaximumSwathWidthChroma[DC__NUM_DPP__MAX]; - bool odm_combine_dummy[DC__NUM_DPP__MAX]; - enum odm_combine_mode odm_combine_mode_dummy[DC__NUM_DPP__MAX]; + enum odm_combine_mode odm_combine_dummy[DC__NUM_DPP__MAX]; double dummy1[DC__NUM_DPP__MAX]; double dummy2[DC__NUM_DPP__MAX]; double dummy3[DC__NUM_DPP__MAX]; @@ -622,9 +623,9 @@ struct vba_vars_st { double dummy7[DC__NUM_DPP__MAX]; double dummy8[DC__NUM_DPP__MAX]; unsigned int dummyinteger1ms[DC__NUM_DPP__MAX]; - unsigned int dummyinteger2ms[DC__NUM_DPP__MAX]; + double dummyinteger2ms[DC__NUM_DPP__MAX]; unsigned int dummyinteger3[DC__NUM_DPP__MAX]; - unsigned int dummyinteger4; + unsigned int dummyinteger4[DC__NUM_DPP__MAX]; unsigned int dummyinteger5; unsigned int dummyinteger6; unsigned int dummyinteger7; @@ -637,7 +638,6 @@ struct vba_vars_st { unsigned int dummyintegerarr2[DC__NUM_DPP__MAX]; unsigned int dummyintegerarr3[DC__NUM_DPP__MAX]; unsigned int dummyintegerarr4[DC__NUM_DPP__MAX]; - long dummylongarr1[DC__NUM_DPP__MAX]; bool dummysinglestring; bool SingleDPPViewportSizeSupportPerPlane[DC__NUM_DPP__MAX]; double PlaneRequiredDISPCLKWithODMCombine2To1; @@ -645,20 +645,19 @@ struct vba_vars_st { unsigned int TotalNumberOfSingleDPPPlanes[DC__VOLTAGE_STATES + 1][2]; bool LinkDSCEnable; bool ODMCombine4To1SupportCheckOK[DC__VOLTAGE_STATES + 1]; - bool ODMCombineEnableThisState[DC__NUM_DPP__MAX]; - enum odm_combine_mode ODMCombineEnableTypeThisState[DC__NUM_DPP__MAX]; - unsigned int SwathWidthCThisState[DC__NUM_DPP__MAX]; + enum odm_combine_mode ODMCombineEnableThisState[DC__NUM_DPP__MAX]; + double SwathWidthCThisState[DC__NUM_DPP__MAX]; bool ViewportSizeSupportPerPlane[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitchY[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitchC[DC__NUM_DPP__MAX]; unsigned int NotEnoughUrgentLatencyHiding; unsigned int NotEnoughUrgentLatencyHidingPre; - long PTEBufferSizeInRequestsForLuma; - long PTEBufferSizeInRequestsForChroma; + int PTEBufferSizeInRequestsForLuma; + int PTEBufferSizeInRequestsForChroma; // Missing from VBA - long dpte_group_bytes_chroma; + int dpte_group_bytes_chroma; unsigned int vm_group_bytes_chroma; double dst_x_after_scaler; double dst_y_after_scaler; @@ -683,8 +682,8 @@ struct vba_vars_st { double MinTTUVBlank[DC__NUM_DPP__MAX]; double BytePerPixelDETY[DC__NUM_DPP__MAX]; double BytePerPixelDETC[DC__NUM_DPP__MAX]; - unsigned int SwathWidthY[DC__NUM_DPP__MAX]; - unsigned int SwathWidthSingleDPPY[DC__NUM_DPP__MAX]; + double SwathWidthY[DC__NUM_DPP__MAX]; + double SwathWidthSingleDPPY[DC__NUM_DPP__MAX]; double CursorRequestDeliveryTime[DC__NUM_DPP__MAX]; double CursorRequestDeliveryTimePrefetch[DC__NUM_DPP__MAX]; double ReadBandwidthPlaneLuma[DC__NUM_DPP__MAX]; @@ -760,8 +759,8 @@ struct vba_vars_st { double LinesInDETY[DC__NUM_DPP__MAX]; double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; - unsigned int SwathWidthSingleDPPC[DC__NUM_DPP__MAX]; - unsigned int SwathWidthC[DC__NUM_DPP__MAX]; + double SwathWidthSingleDPPC[DC__NUM_DPP__MAX]; + double SwathWidthC[DC__NUM_DPP__MAX]; unsigned int BytePerPixelY[DC__NUM_DPP__MAX]; unsigned int BytePerPixelC[DC__NUM_DPP__MAX]; long dummyinteger1; @@ -779,6 +778,7 @@ struct vba_vars_st { unsigned int DCCCMaxCompressedBlock[DC__NUM_DPP__MAX]; unsigned int DCCCIndependent64ByteBlock[DC__NUM_DPP__MAX]; double VStartupMargin; + bool NotEnoughTimeForDynamicMetadata; /* Missing from VBA */ unsigned int MaximumMaxVStartupLines; @@ -814,7 +814,7 @@ struct vba_vars_st { unsigned int ViewportHeightChroma[DC__NUM_DPP__MAX]; double HRatioChroma[DC__NUM_DPP__MAX]; double VRatioChroma[DC__NUM_DPP__MAX]; - long WritebackSourceWidth[DC__NUM_DPP__MAX]; + int WritebackSourceWidth[DC__NUM_DPP__MAX]; bool ModeIsSupported; bool ODMCombine4To1Supported; @@ -850,6 +850,58 @@ struct vba_vars_st { unsigned int MaxNumHDMIFRLOutputs; int AudioSampleRate[DC__NUM_DPP__MAX]; int AudioSampleLayout[DC__NUM_DPP__MAX]; + + int PercentMarginOverMinimumRequiredDCFCLK; + bool DynamicMetadataSupported[DC__VOLTAGE_STATES + 1][2]; + enum immediate_flip_requirement ImmediateFlipRequirement; + double DETBufferSizeYThisState[DC__NUM_DPP__MAX]; + double DETBufferSizeCThisState[DC__NUM_DPP__MAX]; + bool NoUrgentLatencyHiding[DC__NUM_DPP__MAX]; + bool NoUrgentLatencyHidingPre[DC__NUM_DPP__MAX]; + int swath_width_luma_ub_this_state[DC__NUM_DPP__MAX]; + int swath_width_chroma_ub_this_state[DC__NUM_DPP__MAX]; + double UrgLatency[DC__VOLTAGE_STATES + 1]; + double VActiveCursorBandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double VActivePixelBandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + bool NoTimeForPrefetch[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + bool NoTimeForDynamicMetadata[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double dpte_row_bandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double meta_row_bandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double DETBufferSizeYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double DETBufferSizeCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + int swath_width_luma_ub_all_states[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + int swath_width_chroma_ub_all_states[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + bool NotUrgentLatencyHiding[DC__VOLTAGE_STATES + 1][2]; + unsigned int SwathHeightYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + unsigned int SwathHeightCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + unsigned int SwathWidthYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + unsigned int SwathWidthCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double TotalDPTERowBandwidth[DC__VOLTAGE_STATES + 1][2]; + double TotalMetaRowBandwidth[DC__VOLTAGE_STATES + 1][2]; + double TotalVActiveCursorBandwidth[DC__VOLTAGE_STATES + 1][2]; + double TotalVActivePixelBandwidth[DC__VOLTAGE_STATES + 1][2]; + bool UseMinimumRequiredDCFCLK; + double WritebackDelayTime[DC__NUM_DPP__MAX]; + unsigned int DCCYIndependentBlock[DC__NUM_DPP__MAX]; + unsigned int DCCCIndependentBlock[DC__NUM_DPP__MAX]; + unsigned int dummyinteger15; + unsigned int dummyinteger16; + unsigned int dummyinteger17; + unsigned int dummyinteger18; + unsigned int dummyinteger19; + unsigned int dummyinteger20; + unsigned int dummyinteger21; + unsigned int dummyinteger22; + unsigned int dummyinteger23; + unsigned int dummyinteger24; + unsigned int dummyinteger25; + unsigned int dummyinteger26; + unsigned int dummyinteger27; + unsigned int dummyinteger28; + unsigned int dummyinteger29; + bool dummystring[DC__NUM_DPP__MAX]; + double BPP; + enum odm_combine_policy ODMCombinePolicy; }; bool CalculateMinAndMaxPrefetchMode( From 5fb3a1a5a03837ef1036383f943434870d3ed588 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 16 Dec 2019 11:08:35 -0500 Subject: [PATCH 086/113] drm/amd/display: 3.2.66 Signed-off-by: Aric Cyr Reviewed-by: Aric Cyr Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index d1d57432bc7e..dfc66954a24b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.65" +#define DC_VER "3.2.66" #define MAX_SURFACES 3 #define MAX_PLANES 6 From d9eb70ae610fea5ff41b9849cc541c8d5f0146db Mon Sep 17 00:00:00 2001 From: Noah Abradjian Date: Fri, 13 Dec 2019 09:31:20 -0500 Subject: [PATCH 087/113] drm/amd/display: Fix double buffering in dcn2 ICSC [Why] When rapidly adjusting video brightness, screen tearing was observed. This was due to overwritten values in ICSC registers. In dcn10, this issue had been fixed by implementing double buffering via alternating ICSC modes. However, the second register set used in dcn1 doesn't exist in dcn2. [How] Create new program_input_csc for dcn20. Use ICSC_B registers instead of COMA registers as second set. Signed-off-by: Noah Abradjian Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_dpp_cm.c | 20 ----- .../gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c | 24 +++--- .../gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h | 31 ++++++- .../drm/amd/display/dc/dcn20/dcn20_dpp_cm.c | 83 ++++++++++++++++++- .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c | 10 ++- .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h | 6 +- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 1 + .../drm/amd/display/dc/dcn21/dcn21_resource.c | 7 +- drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h | 20 +++++ 9 files changed, 160 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c index 935c892622a0..4d3f7d5e1473 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c @@ -88,26 +88,6 @@ enum dscl_mode_sel { DSCL_MODE_DSCL_BYPASS = 6 }; -static const struct dpp_input_csc_matrix dpp_input_csc_matrix[] = { - {COLOR_SPACE_SRGB, - {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, - {COLOR_SPACE_SRGB_LIMITED, - {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, - {COLOR_SPACE_YCBCR601, - {0x2cdd, 0x2000, 0, 0xe991, 0xe926, 0x2000, 0xf4fd, 0x10ef, - 0, 0x2000, 0x38b4, 0xe3a6} }, - {COLOR_SPACE_YCBCR601_LIMITED, - {0x3353, 0x2568, 0, 0xe400, 0xe5dc, 0x2568, 0xf367, 0x1108, - 0, 0x2568, 0x40de, 0xdd3a} }, - {COLOR_SPACE_YCBCR709, - {0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0, - 0x2000, 0x3b61, 0xe24f} }, - - {COLOR_SPACE_YCBCR709_LIMITED, - {0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0, - 0x2568, 0x43ee, 0xdbb2} } -}; - static void program_gamut_remap( struct dcn10_dpp *dpp, const uint16_t *regval, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c index 4d7e45892f08..bbdab5000a7c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c @@ -104,7 +104,7 @@ static void dpp2_cnv_setup ( uint32_t pixel_format = 0; uint32_t alpha_en = 1; enum dc_color_space color_space = COLOR_SPACE_SRGB; - enum dcn10_input_csc_select select = INPUT_CSC_SELECT_BYPASS; + enum dcn20_input_csc_select select = DCN2_ICSC_SELECT_BYPASS; bool force_disable_cursor = false; struct out_csc_color_matrix tbl_entry; uint32_t is_2bit = 0; @@ -145,25 +145,25 @@ static void dpp2_cnv_setup ( force_disable_cursor = false; pixel_format = 65; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: force_disable_cursor = true; pixel_format = 64; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: force_disable_cursor = true; pixel_format = 67; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: force_disable_cursor = true; pixel_format = 66; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: pixel_format = 22; @@ -177,7 +177,7 @@ static void dpp2_cnv_setup ( case SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888: pixel_format = 12; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX: pixel_format = 112; @@ -188,13 +188,13 @@ static void dpp2_cnv_setup ( case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010: pixel_format = 114; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; is_2bit = 1; break; case SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102: pixel_format = 115; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; is_2bit = 1; break; case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT: @@ -227,13 +227,13 @@ static void dpp2_cnv_setup ( tbl_entry.color_space = input_color_space; if (color_space >= COLOR_SPACE_YCBCR601) - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; else - select = INPUT_CSC_SELECT_BYPASS; + select = DCN2_ICSC_SELECT_BYPASS; - dpp1_program_input_csc(dpp_base, color_space, select, &tbl_entry); + dpp2_program_input_csc(dpp_base, color_space, select, &tbl_entry); } else - dpp1_program_input_csc(dpp_base, color_space, select, NULL); + dpp2_program_input_csc(dpp_base, color_space, select, NULL); if (force_disable_cursor) { REG_UPDATE(CURSOR_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h index 5b03b737b1d6..d9ce60b6aa6e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h @@ -150,6 +150,10 @@ SRI(CM_SHAPER_RAMA_REGION_32_33, CM, id), \ SRI(CM_SHAPER_LUT_INDEX, CM, id) +#define TF_REG_LIST_DCN20_COMMON_APPEND(id) \ + SRI(CM_ICSC_B_C11_C12, CM, id), \ + SRI(CM_ICSC_B_C33_C34, CM, id) + #define TF_REG_LIST_DCN20(id) \ TF_REG_LIST_DCN(id), \ TF_REG_LIST_DCN20_COMMON(id), \ @@ -572,6 +576,14 @@ TF_SF(DSCL0_OBUF_MEM_PWR_CTRL, OBUF_MEM_PWR_FORCE, mask_sh),\ TF_SF(DSCL0_DSCL_MEM_PWR_CTRL, LUT_MEM_PWR_FORCE, mask_sh) +/* DPP CM debug status register: + * + * Status index including current ICSC, Gamut Remap Mode is 9 + * ICSC Mode: [5..4] + */ +#define CM_TEST_DEBUG_DATA_STATUS_IDX 9 +#define CM_TEST_DEBUG_DATA_ICSC_MODE_SH 4 +#define CM_TEST_DEBUG_DATA_ICSC_MODE_MASK 0x3 #define TF_REG_FIELD_LIST_DCN2_0(type) \ TF_REG_FIELD_LIST(type) \ @@ -630,11 +642,16 @@ struct dcn2_dpp_mask { uint32_t COLOR_KEYER_RED; \ uint32_t COLOR_KEYER_GREEN; \ uint32_t COLOR_KEYER_BLUE; \ - uint32_t OBUF_MEM_PWR_CTRL;\ + uint32_t OBUF_MEM_PWR_CTRL; \ uint32_t DSCL_MEM_PWR_CTRL +#define DPP_DCN2_REG_VARIABLE_LIST_CM_APPEND \ + uint32_t CM_ICSC_B_C11_C12; \ + uint32_t CM_ICSC_B_C33_C34 + struct dcn2_dpp_registers { DPP_DCN2_REG_VARIABLE_LIST; + DPP_DCN2_REG_VARIABLE_LIST_CM_APPEND; }; struct dcn20_dpp { @@ -656,6 +673,12 @@ struct dcn20_dpp { struct pwl_params pwl_data; }; +enum dcn20_input_csc_select { + DCN2_ICSC_SELECT_BYPASS = 0, + DCN2_ICSC_SELECT_ICSC_A = 1, + DCN2_ICSC_SELECT_ICSC_B = 2 +}; + void dpp20_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s); @@ -667,6 +690,12 @@ void dpp2_set_degamma( struct dpp *dpp_base, enum ipp_degamma_mode mode); +void dpp2_program_input_csc( + struct dpp *dpp_base, + enum dc_color_space color_space, + enum dcn20_input_csc_select input_select, + const struct out_csc_color_matrix *tbl_entry); + bool dpp20_program_blnd_lut( struct dpp *dpp_base, const struct pwl_params *params); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c index 05a3e7f97ef0..423f3daa177f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c @@ -36,6 +36,9 @@ #define REG(reg)\ dpp->tf_regs->reg +#define IND_REG(index) \ + (index) + #define CTX \ dpp->base.ctx @@ -44,9 +47,6 @@ dpp->tf_shift->field_name, dpp->tf_mask->field_name - - - static void dpp2_enable_cm_block( struct dpp *dpp_base) { @@ -158,6 +158,83 @@ void dpp2_set_degamma( } } +void dpp2_program_input_csc( + struct dpp *dpp_base, + enum dc_color_space color_space, + enum dcn20_input_csc_select input_select, + const struct out_csc_color_matrix *tbl_entry) +{ + struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); + int i; + int arr_size = sizeof(dpp_input_csc_matrix)/sizeof(struct dpp_input_csc_matrix); + const uint16_t *regval = NULL; + uint32_t cur_select = 0; + enum dcn20_input_csc_select select; + struct color_matrices_reg icsc_regs; + + if (input_select == DCN2_ICSC_SELECT_BYPASS) { + REG_SET(CM_ICSC_CONTROL, 0, CM_ICSC_MODE, 0); + return; + } + + if (tbl_entry == NULL) { + for (i = 0; i < arr_size; i++) + if (dpp_input_csc_matrix[i].color_space == color_space) { + regval = dpp_input_csc_matrix[i].regval; + break; + } + + if (regval == NULL) { + BREAK_TO_DEBUGGER(); + return; + } + } else { + regval = tbl_entry->regval; + } + + /* determine which CSC coefficients (A or B) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + cur_select = IX_REG_READ(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA, + CM_TEST_DEBUG_DATA_STATUS_IDX); + + /* IX_REG_READ reads whole reg, so isolate part we want [5..4] */ + cur_select = (cur_select >> CM_TEST_DEBUG_DATA_ICSC_MODE_SH) + & CM_TEST_DEBUG_DATA_ICSC_MODE_MASK; + + /* value stored in dbg reg will be 1 greater than mode we want */ + if (cur_select - 1 != DCN2_ICSC_SELECT_ICSC_A) + select = DCN2_ICSC_SELECT_ICSC_A; + else + select = DCN2_ICSC_SELECT_ICSC_B; + + icsc_regs.shifts.csc_c11 = dpp->tf_shift->CM_ICSC_C11; + icsc_regs.masks.csc_c11 = dpp->tf_mask->CM_ICSC_C11; + icsc_regs.shifts.csc_c12 = dpp->tf_shift->CM_ICSC_C12; + icsc_regs.masks.csc_c12 = dpp->tf_mask->CM_ICSC_C12; + + if (select == DCN2_ICSC_SELECT_ICSC_A) { + + icsc_regs.csc_c11_c12 = REG(CM_ICSC_C11_C12); + icsc_regs.csc_c33_c34 = REG(CM_ICSC_C33_C34); + + } else { + + icsc_regs.csc_c11_c12 = REG(CM_ICSC_B_C11_C12); + icsc_regs.csc_c33_c34 = REG(CM_ICSC_B_C33_C34); + + } + + cm_helper_program_color_matrices( + dpp->base.ctx, + regval, + &icsc_regs); + + REG_SET(CM_ICSC_CONTROL, 0, + CM_ICSC_MODE, select); +} + static void dpp20_power_on_blnd_lut( struct dpp *dpp_base, bool power_on) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index c38f7fdb43a8..ce95e7db4814 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -154,7 +154,10 @@ void mpc2_set_output_csc( * the CSC update so CSC is updated on frame boundary */ cur_mode = IX_REG_READ(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA, - MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE_IDX); + MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX); + + /* Isolate part of reg data we want [1..0] */ + cur_mode = cur_mode & MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE_MASK; if (cur_mode != MPC_OUTPUT_CSC_COEF_A) ocsc_mode = MPC_OUTPUT_CSC_COEF_A; @@ -211,7 +214,10 @@ void mpc2_set_ocsc_default( * the CSC update so CSC is updated on frame boundary */ cur_mode = IX_REG_READ(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA, - MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE_IDX); + MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX); + + /* Isolate part of reg data we want [1..0] */ + cur_mode = cur_mode & MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE_MASK; if (cur_mode != MPC_OUTPUT_CSC_COEF_A) ocsc_mode = MPC_OUTPUT_CSC_COEF_A; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h index 8c77e78e2df5..950e6f9cd23e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h @@ -185,9 +185,11 @@ /* * DCN2 MPC_OCSC debug status register: * - * Field describing current OCSC Mode has index 1 [1..0] + * Status index including current OCSC Mode is 1 + * OCSC Mode: [1..0] */ -#define MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE_IDX 1 +#define MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX 1 +#define MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE_MASK 0x3 #define MPC_REG_FIELD_LIST_DCN2_0(type) \ MPC_REG_FIELD_LIST(type)\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 9233144ae74e..2ef6d0e2b0ed 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -638,6 +638,7 @@ static const struct dce110_aux_registers aux_engine_regs[] = { #define tf_regs(id)\ [id] = {\ TF_REG_LIST_DCN20(id),\ + TF_REG_LIST_DCN20_COMMON_APPEND(id),\ } static const struct dcn2_dpp_registers tf_regs[] = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 5a6e7ac5f785..81e087e85721 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -609,6 +609,7 @@ static const struct dce110_aux_registers aux_engine_regs[] = { #define tf_regs(id)\ [id] = {\ TF_REG_LIST_DCN20(id),\ + TF_REG_LIST_DCN20_COMMON_APPEND(id),\ } static const struct dcn2_dpp_registers tf_regs[] = { @@ -619,11 +620,13 @@ static const struct dcn2_dpp_registers tf_regs[] = { }; static const struct dcn2_dpp_shift tf_shift = { - TF_REG_LIST_SH_MASK_DCN20(__SHIFT) + TF_REG_LIST_SH_MASK_DCN20(__SHIFT), + TF_DEBUG_REG_LIST_SH_DCN10 }; static const struct dcn2_dpp_mask tf_mask = { - TF_REG_LIST_SH_MASK_DCN20(_MASK) + TF_REG_LIST_SH_MASK_DCN20(_MASK), + TF_DEBUG_REG_LIST_MASK_DCN10 }; #define stream_enc_regs(id)\ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 125e42dbd3c5..45ef390ae052 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -47,6 +47,26 @@ struct dpp_input_csc_matrix { uint16_t regval[12]; }; +static const struct dpp_input_csc_matrix dpp_input_csc_matrix[] = { + {COLOR_SPACE_SRGB, + {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, + {COLOR_SPACE_SRGB_LIMITED, + {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, + {COLOR_SPACE_YCBCR601, + {0x2cdd, 0x2000, 0, 0xe991, 0xe926, 0x2000, 0xf4fd, 0x10ef, + 0, 0x2000, 0x38b4, 0xe3a6} }, + {COLOR_SPACE_YCBCR601_LIMITED, + {0x3353, 0x2568, 0, 0xe400, 0xe5dc, 0x2568, 0xf367, 0x1108, + 0, 0x2568, 0x40de, 0xdd3a} }, + {COLOR_SPACE_YCBCR709, + {0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0, + 0x2000, 0x3b61, 0xe24f} }, + + {COLOR_SPACE_YCBCR709_LIMITED, + {0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0, + 0x2568, 0x43ee, 0xdbb2} } +}; + struct dpp_grph_csc_adjustment { struct fixed31_32 temperature_matrix[CSC_TEMPERATURE_MATRIX_SIZE]; enum graphics_gamut_adjust_type gamut_adjust_type; From 4c1a1335dfe0d771908a63950bee67b9b465fd06 Mon Sep 17 00:00:00 2001 From: Wyatt Wood Date: Wed, 11 Dec 2019 21:42:03 -0500 Subject: [PATCH 088/113] drm/amd/display: Driverside changes to support PSR in DMCUB [Why] Moving PSR from DMCU to DMCUB. [How] Add driverside PSR changes required to send inbox messages to fw. These changes are non-functional until the psr structure allocation is uncommented. Signed-off-by: Wyatt Wood Reviewed-by: Nicholas Kazlauskas Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 22 +- drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 220 ++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h | 47 ++++ .../drm/amd/display/dc/dcn21/dcn21_resource.c | 5 + .../gpu/drm/amd/display/dc/inc/core_types.h | 1 + .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 34 ++- 6 files changed, 313 insertions(+), 16 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c create mode 100644 drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 5ea4a1675259..17f00cbbdc44 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -45,6 +45,7 @@ #include "dpcd_defs.h" #include "dmcu.h" #include "hw/clk_mgr.h" +#include "../dce/dmub_psr.h" #define DC_LOGGER_INIT(logger) @@ -2404,10 +2405,11 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, bool allow_active, bool { struct dc *dc = link->ctx->dc; struct dmcu *dmcu = dc->res_pool->dmcu; + struct dmub_psr *psr = dc->res_pool->psr; - - - if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_feature_enabled) + if ((psr != NULL) && link->psr_feature_enabled) + psr->funcs->set_psr_enable(psr, allow_active); + else if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_feature_enabled) dmcu->funcs->set_psr_enable(dmcu, allow_active, wait); link->psr_allow_active = allow_active; @@ -2419,8 +2421,11 @@ bool dc_link_get_psr_state(const struct dc_link *link, uint32_t *psr_state) { struct dc *dc = link->ctx->dc; struct dmcu *dmcu = dc->res_pool->dmcu; + struct dmub_psr *psr = dc->res_pool->psr; - if (dmcu != NULL && link->psr_feature_enabled) + if (psr != NULL && link->psr_feature_enabled) + psr->funcs->get_psr_state(psr_state); + else if (dmcu != NULL && link->psr_feature_enabled) dmcu->funcs->get_psr_state(dmcu, psr_state); return true; @@ -2467,6 +2472,7 @@ bool dc_link_setup_psr(struct dc_link *link, { struct dc *dc; struct dmcu *dmcu; + struct dmub_psr *psr; int i; /* updateSinkPsrDpcdConfig*/ union dpcd_psr_configuration psr_configuration; @@ -2478,8 +2484,9 @@ bool dc_link_setup_psr(struct dc_link *link, dc = link->ctx->dc; dmcu = dc->res_pool->dmcu; + psr = dc->res_pool->psr; - if (!dmcu) + if (!dmcu && !psr) return false; @@ -2588,7 +2595,10 @@ bool dc_link_setup_psr(struct dc_link *link, */ psr_context->frame_delay = 0; - link->psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context); + if (psr) + link->psr_feature_enabled = psr->funcs->setup_psr(psr, link, psr_context); + else + link->psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context); /* psr_enabled == 0 indicates setup_psr did not succeed, but this * should not happen since firmware should be running at this point diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c new file mode 100644 index 000000000000..225955ec6d39 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -0,0 +1,220 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dmub_psr.h" +#include "dc.h" +#include "dc_dmub_srv.h" +#include "../../dmub/inc/dmub_srv.h" +#include "dmub_fw_state.h" +#include "core_types.h" +#include "ipp.h" + +#define MAX_PIPES 6 + +/** + * Get PSR state from firmware. + */ +static void dmub_get_psr_state(uint32_t *psr_state) +{ + // Not yet implemented + // Trigger GPINT interrupt from firmware +} + +/** + * Enable/Disable PSR. + */ +static void dmub_set_psr_enable(struct dmub_psr *dmub, bool enable) +{ + union dmub_rb_cmd cmd; + struct dc_context *dc = dmub->ctx; + + cmd.psr_enable.header.type = DMUB_CMD__PSR; + + if (enable) + cmd.psr_enable.header.sub_type = DMUB_CMD__PSR_ENABLE; + else + cmd.psr_enable.header.sub_type = DMUB_CMD__PSR_DISABLE; + + cmd.psr_enable.header.payload_bytes = 0; // Send header only + + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_enable.header); + dc_dmub_srv_cmd_execute(dc->dmub_srv); + dc_dmub_srv_wait_idle(dc->dmub_srv); +} + +/** + * Set PSR level. + */ +static void dmub_set_psr_level(struct dmub_psr *dmub, uint16_t psr_level) +{ + union dmub_rb_cmd cmd; + uint32_t psr_state = 0; + struct dc_context *dc = dmub->ctx; + + dmub_get_psr_state(&psr_state); + + if (psr_state == 0) + return; + + cmd.psr_set_level.header.type = DMUB_CMD__PSR; + cmd.psr_set_level.header.sub_type = DMUB_CMD__PSR_SET_LEVEL; + cmd.psr_set_level.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_level_data); + cmd.psr_set_level.psr_set_level_data.psr_level = psr_level; + + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_set_level.header); + dc_dmub_srv_cmd_execute(dc->dmub_srv); + dc_dmub_srv_wait_idle(dc->dmub_srv); +} + +/** + * Setup PSR by programming phy registers and sending psr hw context values to firmware. + */ +static bool dmub_setup_psr(struct dmub_psr *dmub, + struct dc_link *link, + struct psr_context *psr_context) +{ + union dmub_rb_cmd cmd; + struct dc_context *dc = dmub->ctx; + struct dmub_cmd_psr_copy_settings_data *copy_settings_data + = &cmd.psr_copy_settings.psr_copy_settings_data; + struct pipe_ctx *pipe_ctx = NULL; + struct resource_context *res_ctx = &link->ctx->dc->current_state->res_ctx; + + for (int i = 0; i < MAX_PIPES; i++) { + if (res_ctx && + res_ctx->pipe_ctx[i].stream && + res_ctx->pipe_ctx[i].stream->link && + res_ctx->pipe_ctx[i].stream->link == link && + res_ctx->pipe_ctx[i].stream->link->connector_signal == SIGNAL_TYPE_EDP) { + pipe_ctx = &res_ctx->pipe_ctx[i]; + break; + } + } + + if (!pipe_ctx || + !&pipe_ctx->plane_res || + !&pipe_ctx->stream_res) + return false; + + // Program DP DPHY fast training registers + link->link_enc->funcs->psr_program_dp_dphy_fast_training(link->link_enc, + psr_context->psrExitLinkTrainingRequired); + + // Program DP_SEC_CNTL1 register to set transmission GPS0 line num and priority to high + link->link_enc->funcs->psr_program_secondary_packet(link->link_enc, + psr_context->sdpTransmitLineNumDeadline); + + cmd.psr_copy_settings.header.type = DMUB_CMD__PSR; + cmd.psr_copy_settings.header.sub_type = DMUB_CMD__PSR_COPY_SETTINGS; + cmd.psr_copy_settings.header.payload_bytes = sizeof(struct dmub_cmd_psr_copy_settings_data); + + // Hw insts + copy_settings_data->dpphy_inst = psr_context->phyType; + copy_settings_data->aux_inst = psr_context->channel; + copy_settings_data->digfe_inst = psr_context->engineId; + copy_settings_data->digbe_inst = psr_context->transmitterId; + + copy_settings_data->mpcc_inst = pipe_ctx->plane_res.mpcc_inst; + + if (pipe_ctx->plane_res.hubp) + copy_settings_data->hubp_inst = pipe_ctx->plane_res.hubp->inst; + else + copy_settings_data->hubp_inst = 0; + if (pipe_ctx->plane_res.dpp) + copy_settings_data->dpp_inst = pipe_ctx->plane_res.dpp->inst; + else + copy_settings_data->dpp_inst = 0; + if (pipe_ctx->stream_res.opp) + copy_settings_data->opp_inst = pipe_ctx->stream_res.opp->inst; + else + copy_settings_data->opp_inst = 0; + if (pipe_ctx->stream_res.tg) + copy_settings_data->otg_inst = pipe_ctx->stream_res.tg->inst; + else + copy_settings_data->otg_inst = 0; + + // Misc + copy_settings_data->psr_level = psr_context->psr_level.u32all; + copy_settings_data->hyst_frames = psr_context->timehyst_frames; + copy_settings_data->hyst_lines = psr_context->hyst_lines; + copy_settings_data->phy_type = psr_context->phyType; + copy_settings_data->aux_repeat = psr_context->aux_repeats; + copy_settings_data->smu_optimizations_en = psr_context->allow_smu_optimizations; + copy_settings_data->skip_wait_for_pll_lock = psr_context->skipPsrWaitForPllLock; + copy_settings_data->frame_delay = psr_context->frame_delay; + copy_settings_data->smu_phy_id = psr_context->smuPhyId; + copy_settings_data->num_of_controllers = psr_context->numberOfControllers; + copy_settings_data->frame_cap_ind = psr_context->psrFrameCaptureIndicationReq; + copy_settings_data->phy_num = psr_context->frame_delay & 0x7; + copy_settings_data->link_rate = psr_context->frame_delay & 0xF; + + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_copy_settings.header); + dc_dmub_srv_cmd_execute(dc->dmub_srv); + dc_dmub_srv_wait_idle(dc->dmub_srv); + + return true; +} + +static const struct dmub_psr_funcs psr_funcs = { + .set_psr_enable = dmub_set_psr_enable, + .setup_psr = dmub_setup_psr, + .get_psr_state = dmub_get_psr_state, + .set_psr_level = dmub_set_psr_level, +}; + +/** + * Construct PSR object. + */ +static void dmub_psr_construct(struct dmub_psr *psr, struct dc_context *ctx) +{ + psr->ctx = ctx; + psr->funcs = &psr_funcs; +} + +/** + * Allocate and initialize PSR object. + */ +struct dmub_psr *dmub_psr_create(struct dc_context *ctx) +{ + struct dmub_psr *psr = kzalloc(sizeof(struct dmub_psr), GFP_KERNEL); + + if (psr == NULL) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dmub_psr_construct(psr, ctx); + + return psr; +} + +/** + * Deallocate PSR object. + */ +void dmub_psr_destroy(struct dmub_psr **dmub) +{ + kfree(dmub); + *dmub = NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h new file mode 100644 index 000000000000..229958de3035 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h @@ -0,0 +1,47 @@ +/* + * Copyright 2012-16 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DMUB_PSR_H_ +#define _DMUB_PSR_H_ + +#include "os_types.h" + +struct dmub_psr { + struct dc_context *ctx; + const struct dmub_psr_funcs *funcs; +}; + +struct dmub_psr_funcs { + void (*set_psr_enable)(struct dmub_psr *dmub, bool enable); + bool (*setup_psr)(struct dmub_psr *dmub, struct dc_link *link, struct psr_context *psr_context); + void (*get_psr_state)(uint32_t *psr_state); + void (*set_psr_level)(struct dmub_psr *dmub, uint16_t psr_level); +}; + +struct dmub_psr *dmub_psr_create(struct dc_context *ctx); +void dmub_psr_destroy(struct dmub_psr **dmub); + + +#endif /* _DCE_DMUB_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 81e087e85721..0dd724de201a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -83,6 +83,7 @@ #include "dcn21_resource.h" #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" +#include "../dce/dmub_psr.h" #define SOC_BOUNDING_BOX_VALID false #define DC_LOGGER_INIT(logger) @@ -1744,6 +1745,10 @@ static bool dcn21_resource_construct( goto create_fail; } + // Leave as NULL to not affect current dmcu psr programming sequence + // Will be uncommented when functionality is confirmed to be working + pool->base.psr = NULL; + pool->base.abm = dce_abm_create(ctx, &abm_regs, &abm_shift, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 16f6ef22367b..f285b76888fb 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -212,6 +212,7 @@ struct resource_pool { struct abm *abm; struct dmcu *dmcu; + struct dmub_psr *psr; const struct resource_funcs *funcs; const struct resource_caps *res_cap; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index b10728f33f62..d8fdf7e76ded 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -187,9 +187,28 @@ struct dmub_rb_cmd_dpphy_init { }; struct dmub_cmd_psr_copy_settings_data { - uint32_t reg1; - uint32_t reg2; - uint32_t reg3; + uint16_t psr_level; + uint8_t hubp_inst; + uint8_t dpp_inst; + uint8_t mpcc_inst; + uint8_t opp_inst; + uint8_t otg_inst; + uint8_t digfe_inst; + uint8_t digbe_inst; + uint8_t dpphy_inst; + uint8_t aux_inst; + uint8_t hyst_frames; + uint8_t hyst_lines; + uint8_t phy_num; + uint8_t phy_type; + uint8_t aux_repeat; + uint8_t smu_optimizations_en; + uint8_t skip_wait_for_pll_lock; + uint8_t frame_delay; + uint8_t smu_phy_id; + uint8_t num_of_controllers; + uint8_t link_rate; + uint8_t frame_cap_ind; }; struct dmub_rb_cmd_psr_copy_settings { @@ -206,10 +225,6 @@ struct dmub_rb_cmd_psr_set_level { struct dmub_cmd_psr_set_level_data psr_set_level_data; }; -struct dmub_rb_cmd_psr_disable { - struct dmub_cmd_header header; -}; - struct dmub_rb_cmd_psr_enable { struct dmub_cmd_header header; }; @@ -224,8 +239,8 @@ struct dmub_rb_cmd_notify_vblank { }; struct dmub_cmd_psr_notify_static_state_data { - uint32_t ss_int; // Which static screen interrupt was triggered - uint32_t ss_enter; // Enter (1) or exit (0) static screen + uint32_t ss_int; // Which static screen interrupt was triggered + uint32_t ss_enter; // Enter (1) or exit (0) static screen }; struct dmub_rb_cmd_psr_notify_static_state { @@ -245,7 +260,6 @@ union dmub_rb_cmd { struct dmub_rb_cmd_dpphy_init dpphy_init; struct dmub_rb_cmd_dig1_transmitter_control dig1_transmitter_control; struct dmub_rb_cmd_psr_enable psr_enable; - struct dmub_rb_cmd_psr_disable psr_disable; struct dmub_rb_cmd_psr_copy_settings psr_copy_settings; struct dmub_rb_cmd_psr_set_level psr_set_level; }; From 2c1a180ac12d76d2be3586262552619c0fc1daab Mon Sep 17 00:00:00 2001 From: Noah Abradjian Date: Mon, 16 Dec 2019 10:50:53 -0500 Subject: [PATCH 089/113] drm/amd/display: Double buffer dcn2 Gamut Remap [Why] When rapidly adjusting color temperature, screen tearing was observed. This was due to overwritten values in gamut remap registers. This issue was solved for OCSC and ICSC by alternating between "A" and "B" registers to double buffer the writes. [How] Create new set_gamut_remap and program_gamut_remap for dcn20. Alternate which registers are written to by switching modes each time. Also fixes ICSC mode reg read to use proper data offset. Signed-off-by: Noah Abradjian Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c | 2 +- .../gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h | 29 ++++++- .../drm/amd/display/dc/dcn20/dcn20_dpp_cm.c | 84 ++++++++++++++++++- 3 files changed, 109 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c index bbdab5000a7c..13e057d7ee93 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c @@ -458,7 +458,7 @@ static struct dpp_funcs dcn20_dpp_funcs = { .dpp_reset = dpp_reset, .dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale, .dpp_get_optimal_number_of_taps = dpp1_get_optimal_number_of_taps, - .dpp_set_gamut_remap = dpp1_cm_set_gamut_remap, + .dpp_set_gamut_remap = dpp2_cm_set_gamut_remap, .dpp_set_csc_adjustment = NULL, .dpp_set_csc_default = NULL, .dpp_program_regamma_pwl = oppn20_dummy_program_regamma_pwl, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h index d9ce60b6aa6e..141dea963db9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h @@ -151,6 +151,12 @@ SRI(CM_SHAPER_LUT_INDEX, CM, id) #define TF_REG_LIST_DCN20_COMMON_APPEND(id) \ + SRI(CM_GAMUT_REMAP_B_C11_C12, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C13_C14, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C21_C22, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C23_C24, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C31_C32, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C33_C34, CM, id),\ SRI(CM_ICSC_B_C11_C12, CM, id), \ SRI(CM_ICSC_B_C33_C34, CM, id) @@ -579,11 +585,14 @@ /* DPP CM debug status register: * * Status index including current ICSC, Gamut Remap Mode is 9 - * ICSC Mode: [5..4] + * ICSC Mode: [4..3] + * Gamut Remap Mode: [10..9] */ #define CM_TEST_DEBUG_DATA_STATUS_IDX 9 -#define CM_TEST_DEBUG_DATA_ICSC_MODE_SH 4 +#define CM_TEST_DEBUG_DATA_ICSC_MODE_SH 3 #define CM_TEST_DEBUG_DATA_ICSC_MODE_MASK 0x3 +#define CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE_SH 9 +#define CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE_MASK 0x3 #define TF_REG_FIELD_LIST_DCN2_0(type) \ TF_REG_FIELD_LIST(type) \ @@ -646,6 +655,12 @@ struct dcn2_dpp_mask { uint32_t DSCL_MEM_PWR_CTRL #define DPP_DCN2_REG_VARIABLE_LIST_CM_APPEND \ + uint32_t CM_GAMUT_REMAP_B_C11_C12; \ + uint32_t CM_GAMUT_REMAP_B_C13_C14; \ + uint32_t CM_GAMUT_REMAP_B_C21_C22; \ + uint32_t CM_GAMUT_REMAP_B_C23_C24; \ + uint32_t CM_GAMUT_REMAP_B_C31_C32; \ + uint32_t CM_GAMUT_REMAP_B_C33_C34; \ uint32_t CM_ICSC_B_C11_C12; \ uint32_t CM_ICSC_B_C33_C34 @@ -679,6 +694,12 @@ enum dcn20_input_csc_select { DCN2_ICSC_SELECT_ICSC_B = 2 }; +enum dcn20_gamut_remap_select { + DCN2_GAMUT_REMAP_BYPASS = 0, + DCN2_GAMUT_REMAP_COEF_A = 1, + DCN2_GAMUT_REMAP_COEF_B = 2 +}; + void dpp20_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s); @@ -690,6 +711,10 @@ void dpp2_set_degamma( struct dpp *dpp_base, enum ipp_degamma_mode mode); +void dpp2_cm_set_gamut_remap( + struct dpp *dpp_base, + const struct dpp_grph_csc_adjustment *adjust); + void dpp2_program_input_csc( struct dpp *dpp_base, enum dc_color_space color_space, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c index 423f3daa177f..4047d406a74c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c @@ -158,6 +158,85 @@ void dpp2_set_degamma( } } +static void program_gamut_remap( + struct dcn20_dpp *dpp, + const uint16_t *regval, + enum dcn20_gamut_remap_select select) +{ + uint32_t cur_select = 0; + struct color_matrices_reg gam_regs; + + if (regval == NULL || select == DCN2_GAMUT_REMAP_BYPASS) { + REG_SET(CM_GAMUT_REMAP_CONTROL, 0, + CM_GAMUT_REMAP_MODE, 0); + return; + } + + /* determine which gamut_remap coefficients (A or B) we are using + * currently. select the alternate set to double buffer + * the update so gamut_remap is updated on frame boundary + */ + cur_select = IX_REG_READ(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA, + CM_TEST_DEBUG_DATA_STATUS_IDX); + + /* IX_REG_READ reads whole reg, so isolate part we want [10..9] */ + cur_select = (cur_select >> CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE_SH) + & CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE_MASK; + + /* value stored in dbg reg will be 1 greater than mode we want */ + if (cur_select != DCN2_GAMUT_REMAP_COEF_A) + select = DCN2_GAMUT_REMAP_COEF_A; + else + select = DCN2_GAMUT_REMAP_COEF_B; + + gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_GAMUT_REMAP_C11; + gam_regs.masks.csc_c11 = dpp->tf_mask->CM_GAMUT_REMAP_C11; + gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_GAMUT_REMAP_C12; + gam_regs.masks.csc_c12 = dpp->tf_mask->CM_GAMUT_REMAP_C12; + + if (select == DCN2_GAMUT_REMAP_COEF_A) { + gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_C11_C12); + gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_C33_C34); + } else { + gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_B_C11_C12); + gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_B_C33_C34); + } + + cm_helper_program_color_matrices( + dpp->base.ctx, + regval, + &gam_regs); + + REG_SET( + CM_GAMUT_REMAP_CONTROL, 0, + CM_GAMUT_REMAP_MODE, select); + +} + +void dpp2_cm_set_gamut_remap( + struct dpp *dpp_base, + const struct dpp_grph_csc_adjustment *adjust) +{ + struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); + int i = 0; + + if (adjust->gamut_adjust_type != GRAPHICS_GAMUT_ADJUST_TYPE_SW) + /* Bypass if type is bypass or hw */ + program_gamut_remap(dpp, NULL, DCN2_GAMUT_REMAP_BYPASS); + else { + struct fixed31_32 arr_matrix[12]; + uint16_t arr_reg_val[12]; + + for (i = 0; i < 12; i++) + arr_matrix[i] = adjust->temperature_matrix[i]; + + convert_float_matrix( + arr_reg_val, arr_matrix, 12); + + program_gamut_remap(dpp, arr_reg_val, DCN2_GAMUT_REMAP_COEF_A); + } +} + void dpp2_program_input_csc( struct dpp *dpp_base, enum dc_color_space color_space, @@ -199,12 +278,11 @@ void dpp2_program_input_csc( cur_select = IX_REG_READ(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA, CM_TEST_DEBUG_DATA_STATUS_IDX); - /* IX_REG_READ reads whole reg, so isolate part we want [5..4] */ + /* IX_REG_READ reads whole reg, so isolate part we want [4..3] */ cur_select = (cur_select >> CM_TEST_DEBUG_DATA_ICSC_MODE_SH) & CM_TEST_DEBUG_DATA_ICSC_MODE_MASK; - /* value stored in dbg reg will be 1 greater than mode we want */ - if (cur_select - 1 != DCN2_ICSC_SELECT_ICSC_A) + if (cur_select != DCN2_ICSC_SELECT_ICSC_A) select = DCN2_ICSC_SELECT_ICSC_A; else select = DCN2_ICSC_SELECT_ICSC_B; From 8c0192533c39660ae229d7b80adeeb3bc63a3eba Mon Sep 17 00:00:00 2001 From: Yongqiang Sun Date: Mon, 16 Dec 2019 17:21:10 -0500 Subject: [PATCH 090/113] drm/amd/display: programing surface flip by dmcub. Programming surface flip addresses via dmcub uC for optimizing the data flush. Signed-off-by: Yongqiang Sun Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 116 ++++++++++++++++++ .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 36 +++++- .../gpu/drm/amd/display/dmub/inc/dmub_rb.h | 18 ++- 4 files changed, 165 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index dfc66954a24b..ecd2257de80b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -420,6 +420,7 @@ struct dc_debug_options { bool nv12_iflip_vm_wa; bool disable_dram_clock_change_vactive_support; bool validate_dml_output; + bool enable_dmcub_surface_flip; }; struct dc_debug_data { diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index 4d2564f79395..1f4e2cd08d4c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -29,6 +29,8 @@ #include "dm_services.h" #include "reg_helper.h" +#include "dc_dmub_srv.h" + #define DC_LOGGER_INIT(logger) #define REG(reg)\ @@ -688,6 +690,113 @@ void hubp21_validate_dml_output(struct hubp *hubp, dml_dlg_attr->refcyc_per_meta_chunk_flip_l, dlg_attr.refcyc_per_meta_chunk_flip_l); } +static void program_video_progressive_dmcub( + struct dc_dmub_srv *dmcub, + struct hubp *hubp, + const struct dc_plane_address *address, + bool flip_immediate) +{ + struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); + struct dmub_rb_cmd_flip surface_flip = { 0 }; + + surface_flip.header.type = DMUB_CMD__SURFACE_FLIP; + + surface_flip.flip.addr_type = address->type; + surface_flip.flip.immediate = flip_immediate; + surface_flip.flip.vmid = address->vmid; + + surface_flip.flip.hubp_inst = hubp->inst; + surface_flip.flip.tmz_surface = address->tmz_surface; + + switch (address->type) { + case PLN_ADDR_TYPE_GRAPHICS: + if (address->grph.addr.quad_part == 0) + return; + + if (address->grph.meta_addr.quad_part != 0) { + surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS = + address->grph.meta_addr.low_part; + surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = + address->grph.meta_addr.high_part; + } + + surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = + address->grph.addr.low_part; + surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + address->grph.addr.high_part; + break; + case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE: + if (address->video_progressive.luma_addr.quad_part == 0 + || address->video_progressive.chroma_addr.quad_part == 0) + return; + + if (address->video_progressive.luma_meta_addr.quad_part != 0) { + surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS = + address->video_progressive.luma_meta_addr.low_part; + surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = + address->video_progressive.luma_meta_addr.high_part; + + surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_C = + address->video_progressive.chroma_meta_addr.low_part; + surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C = + address->video_progressive.chroma_meta_addr.high_part; + } + + surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = + address->video_progressive.luma_addr.low_part; + surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + address->video_progressive.luma_addr.high_part; + + surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = + address->video_progressive.chroma_addr.low_part; + surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = + address->video_progressive.chroma_addr.high_part; + + break; + case PLN_ADDR_TYPE_GRPH_STEREO: + if (address->grph_stereo.left_addr.quad_part == 0) + return; + if (address->grph_stereo.right_addr.quad_part == 0) + return; + + surface_flip.flip.grph_stereo = true; + + if (address->grph_stereo.right_meta_addr.quad_part != 0) { + surface_flip.flip.DCSURF_SECONDARY_META_SURFACE_ADDRESS = + address->grph_stereo.right_meta_addr.low_part; + surface_flip.flip.DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH = + address->grph_stereo.right_meta_addr.high_part; + } + + if (address->grph_stereo.left_meta_addr.quad_part != 0) { + surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS = + address->grph_stereo.left_meta_addr.low_part; + surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = + address->grph_stereo.left_meta_addr.high_part; + } + + surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = + address->grph_stereo.left_addr.low_part; + surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + address->grph_stereo.left_addr.high_part; + + surface_flip.flip.DCSURF_SECONDARY_SURFACE_ADDRESS = + address->grph_stereo.right_addr.low_part; + surface_flip.flip.DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH = + address->grph_stereo.right_addr.high_part; + + break; + + } + + PERF_TRACE(); // TODO: remove after performance is stable. + dc_dmub_srv_cmd_queue(dmcub, &surface_flip.header); + PERF_TRACE(); // TODO: remove after performance is stable. + dc_dmub_srv_cmd_execute(dmcub); + PERF_TRACE(); // TODO: remove after performance is stable. + dc_dmub_srv_wait_idle(dmcub); +} + bool hubp21_program_surface_flip_and_addr( struct hubp *hubp, const struct dc_plane_address *address, @@ -696,6 +805,13 @@ bool hubp21_program_surface_flip_and_addr( struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); struct dc_debug_options *debug = &hubp->ctx->dc->debug; + + if (hubp->ctx->dc->debug.enable_dmcub_surface_flip) { + program_video_progressive_dmcub(hubp->ctx->dmub_srv, hubp, address, flip_immediate); + hubp->request_address = *address; + return true; + } + //program flip type REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_TYPE, flip_immediate); diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index d8fdf7e76ded..919323257edb 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -30,12 +30,13 @@ #include "dmub_cmd_dal.h" #include "dmub_cmd_vbios.h" #include "atomfirmware.h" - +#include "dc_hw_types.h" #define DMUB_RB_CMD_SIZE 64 #define DMUB_RB_MAX_ENTRY 128 #define DMUB_RB_SIZE (DMUB_RB_CMD_SIZE * DMUB_RB_MAX_ENTRY) #define REG_SET_MASK 0xFFFF + /* * Command IDs should be treated as stable ABI. * Do not reuse or modify IDs. @@ -47,6 +48,7 @@ enum dmub_cmd_type { DMUB_CMD__REG_SEQ_FIELD_UPDATE_SEQ = 2, DMUB_CMD__REG_SEQ_BURST_WRITE = 3, DMUB_CMD__REG_REG_WAIT = 4, + DMUB_CMD__SURFACE_FLIP = 5, DMUB_CMD__PSR = 64, DMUB_CMD__VBIOS = 128, }; @@ -145,6 +147,37 @@ struct dmub_rb_cmd_reg_wait { struct dmub_cmd_reg_wait_data reg_wait; }; +#ifndef PHYSICAL_ADDRESS_LOC +#define PHYSICAL_ADDRESS_LOC union large_integer +#endif + +struct dmub_cmd_surface_flip { + uint32_t DCSURF_SURFACE_CONTROL; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; + uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS; + uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS; + enum dc_plane_addr_type addr_type; + uint8_t hubp_inst; + bool tmz_surface; + bool immediate; + uint8_t vmid; + bool grph_stereo; +}; + +struct dmub_rb_cmd_flip { + struct dmub_cmd_header header; + struct dmub_cmd_surface_flip flip; +}; + struct dmub_cmd_digx_encoder_control_data { union dig_encoder_control_parameters_v1_5 dig; }; @@ -262,6 +295,7 @@ union dmub_rb_cmd { struct dmub_rb_cmd_psr_enable psr_enable; struct dmub_rb_cmd_psr_copy_settings psr_copy_settings; struct dmub_rb_cmd_psr_set_level psr_set_level; + struct dmub_rb_cmd_flip surface_flip; }; #pragma pack(pop) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h index ade688fd32f0..df875fdd2ab0 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h @@ -73,12 +73,17 @@ static inline bool dmub_rb_full(struct dmub_rb *rb) static inline bool dmub_rb_push_front(struct dmub_rb *rb, const struct dmub_cmd_header *cmd) { - uint8_t *wt_ptr = (uint8_t *)(rb->base_address) + rb->wrpt; + uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + rb->wrpt / sizeof(uint64_t); + const uint64_t *src = (const uint64_t *)cmd; + int i; if (dmub_rb_full(rb)) return false; - dmub_memcpy(wt_ptr, cmd, DMUB_RB_CMD_SIZE); + // copying data + for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) + *dst++ = *src++; + rb->wrpt += DMUB_RB_CMD_SIZE; if (rb->wrpt >= rb->capacity) @@ -115,14 +120,17 @@ static inline bool dmub_rb_pop_front(struct dmub_rb *rb) static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) { - uint8_t buf[DMUB_RB_CMD_SIZE]; uint32_t rptr = rb->rptr; uint32_t wptr = rb->wrpt; while (rptr != wptr) { - const uint8_t *data = (const uint8_t *)rb->base_address + rptr; + uint64_t volatile *data = (uint64_t volatile *)rb->base_address + rptr / sizeof(uint64_t); + //uint64_t volatile *p = (uint64_t volatile *)data; + uint64_t temp; + int i; - dmub_memcpy(buf, data, DMUB_RB_CMD_SIZE); + for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) + temp = *data++; rptr += DMUB_RB_CMD_SIZE; if (rptr >= rb->capacity) From ec256f449c07b2498f624ec7b9ca41177c989d7c Mon Sep 17 00:00:00 2001 From: Wyatt Wood Date: Wed, 11 Dec 2019 14:16:57 -0500 Subject: [PATCH 091/113] drm/amd/display: DMCUB FW Changes to support PSR [Why] Moving PSR from DMCU to DMCUB. [How] Cleanup psr spec files and add PSR hw programming files. No functionality is included in this change. Signed-off-by: Wyatt Wood Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 19 +++++-------------- .../drm/amd/display/dmub/inc/dmub_cmd_dal.h | 6 ++++++ 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 919323257edb..3b79079ec9b8 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -262,23 +262,13 @@ struct dmub_rb_cmd_psr_enable { struct dmub_cmd_header header; }; -struct dmub_cmd_psr_notify_vblank_data { - uint32_t vblank_int; // Which vblank interrupt was triggered +struct dmub_cmd_psr_setup_data { + enum psr_version version; // PSR version 1 or 2 }; -struct dmub_rb_cmd_notify_vblank { +struct dmub_rb_cmd_psr_setup { struct dmub_cmd_header header; - struct dmub_cmd_psr_notify_vblank_data psr_notify_vblank_data; -}; - -struct dmub_cmd_psr_notify_static_state_data { - uint32_t ss_int; // Which static screen interrupt was triggered - uint32_t ss_enter; // Enter (1) or exit (0) static screen -}; - -struct dmub_rb_cmd_psr_notify_static_state { - struct dmub_cmd_header header; - struct dmub_cmd_psr_notify_static_state_data psr_notify_static_state_data; + struct dmub_cmd_psr_setup_data psr_setup_data; }; union dmub_rb_cmd { @@ -296,6 +286,7 @@ union dmub_rb_cmd { struct dmub_rb_cmd_psr_copy_settings psr_copy_settings; struct dmub_rb_cmd_psr_set_level psr_set_level; struct dmub_rb_cmd_flip surface_flip; + struct dmub_rb_cmd_psr_setup psr_setup; }; #pragma pack(pop) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h index 14f13e8a6f3b..20b47649f991 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h @@ -36,6 +36,12 @@ enum dmub_cmd_psr_type { DMUB_CMD__PSR_DISABLE = 1, DMUB_CMD__PSR_COPY_SETTINGS = 2, DMUB_CMD__PSR_SET_LEVEL = 3, + DMUB_CMD__PSR_SETUP = 4, +}; + +enum psr_version { + PSR_VERSION_1 = 0x0, + PSR_VERSION_2 = 0x10, }; #endif /* _DMUB_CMD_DAL_H_ */ From c1e3417558beda21fd41ed870ca16b36a69188d5 Mon Sep 17 00:00:00 2001 From: Noah Abradjian Date: Tue, 17 Dec 2019 15:49:14 -0500 Subject: [PATCH 092/113] drm/amd/display: Indirect reg read macro with shift and mask [Why] Recent double buffering changes for dcn2 use IX_REG_READ. However, this macro returns the full register value, with the need to manually shift and mask it to retrieve field data. [How] Create new IX_REG_GET macro that handles shift and mask. Use this for double buffering reads instead of IX_REG_READ. Signed-off-by: Noah Abradjian Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_helper.c | 30 +++++++++++++++++++ .../gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h | 16 +++++++--- .../drm/amd/display/dc/dcn20/dcn20_dpp_cm.c | 18 ++++------- .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c | 16 ++++------ .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h | 10 +++++-- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 10 ++++--- .../drm/amd/display/dc/dcn21/dcn21_resource.c | 10 ++++--- .../gpu/drm/amd/display/dc/inc/reg_helper.h | 13 ++++++++ 8 files changed, 86 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 02a63e9cb62f..737048d8a96c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -552,6 +552,36 @@ uint32_t generic_read_indirect_reg(const struct dc_context *ctx, return value; } +uint32_t generic_indirect_reg_get(const struct dc_context *ctx, + uint32_t addr_index, uint32_t addr_data, + uint32_t index, int n, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + ...) +{ + uint32_t shift, mask, *field_value; + uint32_t value = 0; + int i = 1; + + va_list ap; + + va_start(ap, field_value1); + + value = generic_read_indirect_reg(ctx, addr_index, addr_data, index); + *field_value1 = get_reg_field_value_ex(value, mask1, shift1); + + while (i < n) { + shift = va_arg(ap, uint32_t); + mask = va_arg(ap, uint32_t); + field_value = va_arg(ap, uint32_t *); + + *field_value = get_reg_field_value_ex(value, mask, shift); + i++; + } + + va_end(ap); + + return value; +} uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx, uint32_t addr_index, uint32_t addr_data, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h index 141dea963db9..27610251c57f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h @@ -589,14 +589,22 @@ * Gamut Remap Mode: [10..9] */ #define CM_TEST_DEBUG_DATA_STATUS_IDX 9 -#define CM_TEST_DEBUG_DATA_ICSC_MODE_SH 3 -#define CM_TEST_DEBUG_DATA_ICSC_MODE_MASK 0x3 -#define CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE_SH 9 -#define CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE_MASK 0x3 + +#define TF_DEBUG_REG_LIST_SH_DCN20 \ + TF_DEBUG_REG_LIST_SH_DCN10, \ + .CM_TEST_DEBUG_DATA_ICSC_MODE = 3, \ + .CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE = 9 + +#define TF_DEBUG_REG_LIST_MASK_DCN20 \ + TF_DEBUG_REG_LIST_MASK_DCN10, \ + .CM_TEST_DEBUG_DATA_ICSC_MODE = 0x18, \ + .CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE = 0x600 #define TF_REG_FIELD_LIST_DCN2_0(type) \ TF_REG_FIELD_LIST(type) \ type CM_BLNDGAM_LUT_DATA; \ + type CM_TEST_DEBUG_DATA_ICSC_MODE; \ + type CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE; \ type FORMAT_CNV16; \ type CNVC_BYPASS_MSB_ALIGN; \ type CLAMP_POSITIVE; \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c index 4047d406a74c..8dc3d1f73984 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c @@ -176,12 +176,9 @@ static void program_gamut_remap( * currently. select the alternate set to double buffer * the update so gamut_remap is updated on frame boundary */ - cur_select = IX_REG_READ(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA, - CM_TEST_DEBUG_DATA_STATUS_IDX); - - /* IX_REG_READ reads whole reg, so isolate part we want [10..9] */ - cur_select = (cur_select >> CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE_SH) - & CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE_MASK; + IX_REG_GET(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA, + CM_TEST_DEBUG_DATA_STATUS_IDX, + CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE, &cur_select); /* value stored in dbg reg will be 1 greater than mode we want */ if (cur_select != DCN2_GAMUT_REMAP_COEF_A) @@ -275,12 +272,9 @@ void dpp2_program_input_csc( * currently. select the alternate set to double buffer * the CSC update so CSC is updated on frame boundary */ - cur_select = IX_REG_READ(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA, - CM_TEST_DEBUG_DATA_STATUS_IDX); - - /* IX_REG_READ reads whole reg, so isolate part we want [4..3] */ - cur_select = (cur_select >> CM_TEST_DEBUG_DATA_ICSC_MODE_SH) - & CM_TEST_DEBUG_DATA_ICSC_MODE_MASK; + IX_REG_GET(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA, + CM_TEST_DEBUG_DATA_STATUS_IDX, + CM_TEST_DEBUG_DATA_ICSC_MODE, &cur_select); if (cur_select != DCN2_ICSC_SELECT_ICSC_A) select = DCN2_ICSC_SELECT_ICSC_A; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index ce95e7db4814..de9c857ab3e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -153,11 +153,9 @@ void mpc2_set_output_csc( * currently. select the alternate set to double buffer * the CSC update so CSC is updated on frame boundary */ - cur_mode = IX_REG_READ(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA, - MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX); - - /* Isolate part of reg data we want [1..0] */ - cur_mode = cur_mode & MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE_MASK; + IX_REG_GET(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA, + MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX, + MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE, &cur_mode); if (cur_mode != MPC_OUTPUT_CSC_COEF_A) ocsc_mode = MPC_OUTPUT_CSC_COEF_A; @@ -213,11 +211,9 @@ void mpc2_set_ocsc_default( * currently. select the alternate set to double buffer * the CSC update so CSC is updated on frame boundary */ - cur_mode = IX_REG_READ(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA, - MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX); - - /* Isolate part of reg data we want [1..0] */ - cur_mode = cur_mode & MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE_MASK; + IX_REG_GET(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA, + MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX, + MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE, &cur_mode); if (cur_mode != MPC_OUTPUT_CSC_COEF_A) ocsc_mode = MPC_OUTPUT_CSC_COEF_A; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h index 950e6f9cd23e..c78fd5123497 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h @@ -140,7 +140,6 @@ SF(MPCC0_MPCC_TOP_GAIN, MPCC_TOP_GAIN, mask_sh),\ SF(MPCC0_MPCC_BOT_GAIN_INSIDE, MPCC_BOT_GAIN_INSIDE, mask_sh),\ SF(MPCC0_MPCC_BOT_GAIN_OUTSIDE, MPCC_BOT_GAIN_OUTSIDE, mask_sh),\ - SF(MPC_OCSC_TEST_DEBUG_DATA, MPC_OCSC_TEST_DEBUG_DATA, mask_sh),\ SF(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_INDEX, mask_sh),\ SF(MPC_OUT0_CSC_MODE, MPC_OCSC_MODE, mask_sh),\ SF(MPC_OUT0_CSC_C11_C12_A, MPC_OCSC_C11_A, mask_sh),\ @@ -189,7 +188,12 @@ * OCSC Mode: [1..0] */ #define MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX 1 -#define MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE_MASK 0x3 + +#define MPC_DEBUG_REG_LIST_SH_DCN20 \ + .MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE = 0 + +#define MPC_DEBUG_REG_LIST_MASK_DCN20 \ + .MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE = 0x3 #define MPC_REG_FIELD_LIST_DCN2_0(type) \ MPC_REG_FIELD_LIST(type)\ @@ -198,7 +202,7 @@ type MPCC_TOP_GAIN;\ type MPCC_BOT_GAIN_INSIDE;\ type MPCC_BOT_GAIN_OUTSIDE;\ - type MPC_OCSC_TEST_DEBUG_DATA;\ + type MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE;\ type MPC_OCSC_TEST_DEBUG_INDEX;\ type MPC_OCSC_MODE;\ type MPC_OCSC_C11_A;\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 2ef6d0e2b0ed..85f90f3e24cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -652,12 +652,12 @@ static const struct dcn2_dpp_registers tf_regs[] = { static const struct dcn2_dpp_shift tf_shift = { TF_REG_LIST_SH_MASK_DCN20(__SHIFT), - TF_DEBUG_REG_LIST_SH_DCN10 + TF_DEBUG_REG_LIST_SH_DCN20 }; static const struct dcn2_dpp_mask tf_mask = { TF_REG_LIST_SH_MASK_DCN20(_MASK), - TF_DEBUG_REG_LIST_MASK_DCN10 + TF_DEBUG_REG_LIST_MASK_DCN20 }; #define dwbc_regs_dcn2(id)\ @@ -711,11 +711,13 @@ static const struct dcn20_mpc_registers mpc_regs = { }; static const struct dcn20_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) + MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT), + MPC_DEBUG_REG_LIST_SH_DCN20 }; static const struct dcn20_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK) + MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK), + MPC_DEBUG_REG_LIST_MASK_DCN20 }; #define tg_regs(id)\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 0dd724de201a..f1ec3448c0c0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -471,11 +471,13 @@ static const struct dcn20_mpc_registers mpc_regs = { }; static const struct dcn20_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) + MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT), + MPC_DEBUG_REG_LIST_SH_DCN20 }; static const struct dcn20_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK) + MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK), + MPC_DEBUG_REG_LIST_MASK_DCN20 }; #define hubp_regs(id)\ @@ -622,12 +624,12 @@ static const struct dcn2_dpp_registers tf_regs[] = { static const struct dcn2_dpp_shift tf_shift = { TF_REG_LIST_SH_MASK_DCN20(__SHIFT), - TF_DEBUG_REG_LIST_SH_DCN10 + TF_DEBUG_REG_LIST_SH_DCN20 }; static const struct dcn2_dpp_mask tf_mask = { TF_REG_LIST_SH_MASK_DCN20(_MASK), - TF_DEBUG_REG_LIST_MASK_DCN10 + TF_DEBUG_REG_LIST_MASK_DCN20 }; #define stream_enc_regs(id)\ diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h index 47e307388581..2470405e996b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h +++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h @@ -458,7 +458,14 @@ uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr, #define IX_REG_READ(index_reg_name, data_reg_name, index) \ generic_read_indirect_reg(CTX, REG(index_reg_name), REG(data_reg_name), IND_REG(index)) +#define IX_REG_GET_N(index_reg_name, data_reg_name, index, n, ...) \ + generic_indirect_reg_get(CTX, REG(index_reg_name), REG(data_reg_name), \ + IND_REG(index), \ + n, __VA_ARGS__) +#define IX_REG_GET(index_reg_name, data_reg_name, index, field, val) \ + IX_REG_GET_N(index_reg_name, data_reg_name, index, 1, \ + FN(data_reg_name, field), val) #define IX_REG_UPDATE_N(index_reg_name, data_reg_name, index, n, ...) \ generic_indirect_reg_update_ex(CTX, \ @@ -479,6 +486,12 @@ uint32_t generic_read_indirect_reg(const struct dc_context *ctx, uint32_t addr_index, uint32_t addr_data, uint32_t index); +uint32_t generic_indirect_reg_get(const struct dc_context *ctx, + uint32_t addr_index, uint32_t addr_data, + uint32_t index, int n, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + ...); + uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx, uint32_t addr_index, uint32_t addr_data, uint32_t index, uint32_t reg_val, int n, From 22aa56145f71a271ac1f0151bc90d0366b41928e Mon Sep 17 00:00:00 2001 From: Yongqiang Sun Date: Tue, 17 Dec 2019 16:26:48 -0500 Subject: [PATCH 093/113] drm/amd/display: Refactor surface flip programming Rework surface programming for RN to separate preparing parameters and register programming. Signed-off-by: Yongqiang Sun Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 324 +++++++----------- .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 16 +- 2 files changed, 127 insertions(+), 213 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index 1f4e2cd08d4c..12396c371569 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -690,28 +690,114 @@ void hubp21_validate_dml_output(struct hubp *hubp, dml_dlg_attr->refcyc_per_meta_chunk_flip_l, dlg_attr.refcyc_per_meta_chunk_flip_l); } -static void program_video_progressive_dmcub( - struct dc_dmub_srv *dmcub, +static void program_surface_flip_and_addr(struct hubp *hubp, struct dmub_rb_cmd_flip *surface_flip) +{ + struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); + + REG_UPDATE_3(DCSURF_FLIP_CONTROL, + SURFACE_FLIP_TYPE, surface_flip->flip.flip_params.immediate, + SURFACE_FLIP_MODE_FOR_STEREOSYNC, surface_flip->flip.flip_params.grph_stereo, + SURFACE_FLIP_IN_STEREOSYNC, surface_flip->flip.flip_params.grph_stereo); + + REG_UPDATE(VMID_SETTINGS_0, + VMID, surface_flip->flip.flip_params.vmid); + + REG_UPDATE_8(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_TMZ, surface_flip->flip.flip_params.tmz_surface, + PRIMARY_SURFACE_TMZ_C, surface_flip->flip.flip_params.tmz_surface, + PRIMARY_META_SURFACE_TMZ, surface_flip->flip.flip_params.tmz_surface, + PRIMARY_META_SURFACE_TMZ_C, surface_flip->flip.flip_params.tmz_surface, + SECONDARY_SURFACE_TMZ, surface_flip->flip.flip_params.tmz_surface, + SECONDARY_SURFACE_TMZ_C, surface_flip->flip.flip_params.tmz_surface, + SECONDARY_META_SURFACE_TMZ, surface_flip->flip.flip_params.tmz_surface, + SECONDARY_META_SURFACE_TMZ_C, surface_flip->flip.flip_params.tmz_surface); + + REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0, + PRIMARY_META_SURFACE_ADDRESS_HIGH_C, + surface_flip->flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C); + + REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0, + PRIMARY_META_SURFACE_ADDRESS_C, + surface_flip->flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_C); + + REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, + PRIMARY_META_SURFACE_ADDRESS_HIGH, + surface_flip->flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH); + + REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, + PRIMARY_META_SURFACE_ADDRESS, + surface_flip->flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS); + + REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, 0, + SECONDARY_META_SURFACE_ADDRESS_HIGH, + surface_flip->flip.DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH); + + REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS, 0, + SECONDARY_META_SURFACE_ADDRESS, + surface_flip->flip.DCSURF_SECONDARY_META_SURFACE_ADDRESS); + + + REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0, + SECONDARY_SURFACE_ADDRESS_HIGH, + surface_flip->flip.DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH); + + REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0, + SECONDARY_SURFACE_ADDRESS, + surface_flip->flip.DCSURF_SECONDARY_SURFACE_ADDRESS); + + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0, + PRIMARY_SURFACE_ADDRESS_HIGH_C, + surface_flip->flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, + PRIMARY_SURFACE_ADDRESS_C, + surface_flip->flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, + PRIMARY_SURFACE_ADDRESS_HIGH, + surface_flip->flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, + PRIMARY_SURFACE_ADDRESS, + surface_flip->flip.DCSURF_PRIMARY_SURFACE_ADDRESS); +} + +void program_surface_flip_and_addr_dmcub(struct hubp *hubp, struct dmub_rb_cmd_flip *surface_flip) +{ + struct dc_dmub_srv *dmcub = hubp->ctx->dmub_srv; + struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); + + PERF_TRACE(); // TODO: remove after performance is stable. + dc_dmub_srv_cmd_queue(dmcub, &surface_flip->header); + PERF_TRACE(); // TODO: remove after performance is stable. + dc_dmub_srv_cmd_execute(dmcub); + PERF_TRACE(); // TODO: remove after performance is stable. + dc_dmub_srv_wait_idle(dmcub); + PERF_TRACE(); // TODO: remove after performance is stable. +} + +bool hubp21_program_surface_flip_and_addr( struct hubp *hubp, const struct dc_plane_address *address, bool flip_immediate) { - struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); struct dmub_rb_cmd_flip surface_flip = { 0 }; + bool grph_stereo = false; + struct dc_debug_options *debug = &hubp->ctx->dc->debug; + struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); surface_flip.header.type = DMUB_CMD__SURFACE_FLIP; - surface_flip.flip.addr_type = address->type; - surface_flip.flip.immediate = flip_immediate; - surface_flip.flip.vmid = address->vmid; - - surface_flip.flip.hubp_inst = hubp->inst; - surface_flip.flip.tmz_surface = address->tmz_surface; + surface_flip.flip.flip_params.vmid = address->vmid; + surface_flip.flip.flip_params.hubp_inst = hubp->inst; switch (address->type) { case PLN_ADDR_TYPE_GRAPHICS: - if (address->grph.addr.quad_part == 0) - return; + if (address->grph.addr.quad_part == 0) { + BREAK_TO_DEBUGGER(); + break; + } if (address->grph.meta_addr.quad_part != 0) { surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS = @@ -728,7 +814,7 @@ static void program_video_progressive_dmcub( case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE: if (address->video_progressive.luma_addr.quad_part == 0 || address->video_progressive.chroma_addr.quad_part == 0) - return; + break; if (address->video_progressive.luma_meta_addr.quad_part != 0) { surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS = @@ -747,19 +833,24 @@ static void program_video_progressive_dmcub( surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = address->video_progressive.luma_addr.high_part; - surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = - address->video_progressive.chroma_addr.low_part; + if (debug->nv12_iflip_vm_wa) { + surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = + address->video_progressive.chroma_addr.low_part + hubp21->PLAT_54186_wa_chroma_addr_offset; + } else + surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = + address->video_progressive.chroma_addr.low_part; + surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = address->video_progressive.chroma_addr.high_part; break; case PLN_ADDR_TYPE_GRPH_STEREO: if (address->grph_stereo.left_addr.quad_part == 0) - return; + break; if (address->grph_stereo.right_addr.quad_part == 0) - return; + break; - surface_flip.flip.grph_stereo = true; + grph_stereo = true; if (address->grph_stereo.right_meta_addr.quad_part != 0) { surface_flip.flip.DCSURF_SECONDARY_META_SURFACE_ADDRESS = @@ -786,200 +877,21 @@ static void program_video_progressive_dmcub( address->grph_stereo.right_addr.high_part; break; - - } - - PERF_TRACE(); // TODO: remove after performance is stable. - dc_dmub_srv_cmd_queue(dmcub, &surface_flip.header); - PERF_TRACE(); // TODO: remove after performance is stable. - dc_dmub_srv_cmd_execute(dmcub); - PERF_TRACE(); // TODO: remove after performance is stable. - dc_dmub_srv_wait_idle(dmcub); -} - -bool hubp21_program_surface_flip_and_addr( - struct hubp *hubp, - const struct dc_plane_address *address, - bool flip_immediate) -{ - struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); - struct dc_debug_options *debug = &hubp->ctx->dc->debug; - - - if (hubp->ctx->dc->debug.enable_dmcub_surface_flip) { - program_video_progressive_dmcub(hubp->ctx->dmub_srv, hubp, address, flip_immediate); - hubp->request_address = *address; - return true; - } - - //program flip type - REG_UPDATE(DCSURF_FLIP_CONTROL, - SURFACE_FLIP_TYPE, flip_immediate); - - // Program VMID reg - REG_UPDATE(VMID_SETTINGS_0, - VMID, address->vmid); - - if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) { - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1); - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1); - - } else { - // turn off stereo if not in stereo - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x0); - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x0); - } - - - - /* HW automatically latch rest of address register on write to - * DCSURF_PRIMARY_SURFACE_ADDRESS if SURFACE_UPDATE_LOCK is not used - * - * program high first and then the low addr, order matters! - */ - switch (address->type) { - case PLN_ADDR_TYPE_GRAPHICS: - /* DCN1.0 does not support const color - * TODO: program DCHUBBUB_RET_PATH_DCC_CFGx_0/1 - * base on address->grph.dcc_const_color - * x = 0, 2, 4, 6 for pipe 0, 1, 2, 3 for rgb and luma - * x = 1, 3, 5, 7 for pipe 0, 1, 2, 3 for chroma - */ - - if (address->grph.addr.quad_part == 0) - break; - - REG_UPDATE_2(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ, address->tmz_surface); - - if (address->grph.meta_addr.quad_part != 0) { - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_META_SURFACE_ADDRESS_HIGH, - address->grph.meta_addr.high_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, - PRIMARY_META_SURFACE_ADDRESS, - address->grph.meta_addr.low_part); - } - - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_SURFACE_ADDRESS_HIGH, - address->grph.addr.high_part); - - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, - PRIMARY_SURFACE_ADDRESS, - address->grph.addr.low_part); - break; - case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE: - if (address->video_progressive.luma_addr.quad_part == 0 - || address->video_progressive.chroma_addr.quad_part == 0) - break; - - REG_UPDATE_4(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, address->tmz_surface, - PRIMARY_SURFACE_TMZ_C, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface); - - if (address->video_progressive.luma_meta_addr.quad_part != 0) { - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0, - PRIMARY_META_SURFACE_ADDRESS_HIGH_C, - address->video_progressive.chroma_meta_addr.high_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0, - PRIMARY_META_SURFACE_ADDRESS_C, - address->video_progressive.chroma_meta_addr.low_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_META_SURFACE_ADDRESS_HIGH, - address->video_progressive.luma_meta_addr.high_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, - PRIMARY_META_SURFACE_ADDRESS, - address->video_progressive.luma_meta_addr.low_part); - } - - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0, - PRIMARY_SURFACE_ADDRESS_HIGH_C, - address->video_progressive.chroma_addr.high_part); - - if (debug->nv12_iflip_vm_wa) { - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, - PRIMARY_SURFACE_ADDRESS_C, - address->video_progressive.chroma_addr.low_part + hubp21->PLAT_54186_wa_chroma_addr_offset); - } else { - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, - PRIMARY_SURFACE_ADDRESS_C, - address->video_progressive.chroma_addr.low_part); - } - - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_SURFACE_ADDRESS_HIGH, - address->video_progressive.luma_addr.high_part); - - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, - PRIMARY_SURFACE_ADDRESS, - address->video_progressive.luma_addr.low_part); - break; - case PLN_ADDR_TYPE_GRPH_STEREO: - if (address->grph_stereo.left_addr.quad_part == 0) - break; - if (address->grph_stereo.right_addr.quad_part == 0) - break; - - REG_UPDATE_8(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, address->tmz_surface, - PRIMARY_SURFACE_TMZ_C, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface, - SECONDARY_SURFACE_TMZ, address->tmz_surface, - SECONDARY_SURFACE_TMZ_C, address->tmz_surface, - SECONDARY_META_SURFACE_TMZ, address->tmz_surface, - SECONDARY_META_SURFACE_TMZ_C, address->tmz_surface); - - if (address->grph_stereo.right_meta_addr.quad_part != 0) { - - REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, 0, - SECONDARY_META_SURFACE_ADDRESS_HIGH, - address->grph_stereo.right_meta_addr.high_part); - - REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS, 0, - SECONDARY_META_SURFACE_ADDRESS, - address->grph_stereo.right_meta_addr.low_part); - } - if (address->grph_stereo.left_meta_addr.quad_part != 0) { - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_META_SURFACE_ADDRESS_HIGH, - address->grph_stereo.left_meta_addr.high_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, - PRIMARY_META_SURFACE_ADDRESS, - address->grph_stereo.left_meta_addr.low_part); - } - - REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0, - SECONDARY_SURFACE_ADDRESS_HIGH, - address->grph_stereo.right_addr.high_part); - - REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0, - SECONDARY_SURFACE_ADDRESS, - address->grph_stereo.right_addr.low_part); - - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_SURFACE_ADDRESS_HIGH, - address->grph_stereo.left_addr.high_part); - - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, - PRIMARY_SURFACE_ADDRESS, - address->grph_stereo.left_addr.low_part); - break; default: BREAK_TO_DEBUGGER(); break; } + surface_flip.flip.flip_params.vmid = address->vmid; + surface_flip.flip.flip_params.grph_stereo = grph_stereo; + surface_flip.flip.flip_params.tmz_surface = address->tmz_surface; + surface_flip.flip.flip_params.immediate = flip_immediate; + + if (hubp->ctx->dc->debug.enable_dmcub_surface_flip) + program_surface_flip_and_addr_dmcub(hubp, &surface_flip); + else + program_surface_flip_and_addr(hubp, &surface_flip); + hubp->request_address = *address; return true; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 3b79079ec9b8..0c3ae02148a6 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -30,7 +30,7 @@ #include "dmub_cmd_dal.h" #include "dmub_cmd_vbios.h" #include "atomfirmware.h" -#include "dc_hw_types.h" + #define DMUB_RB_CMD_SIZE 64 #define DMUB_RB_MAX_ENTRY 128 #define DMUB_RB_SIZE (DMUB_RB_CMD_SIZE * DMUB_RB_MAX_ENTRY) @@ -165,12 +165,14 @@ struct dmub_cmd_surface_flip { uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS; uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH; uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS; - enum dc_plane_addr_type addr_type; - uint8_t hubp_inst; - bool tmz_surface; - bool immediate; - uint8_t vmid; - bool grph_stereo; + struct { + uint8_t hubp_inst : 4; + uint8_t tmz_surface : 1; + uint8_t immediate :1; + uint8_t vmid : 4; + uint8_t grph_stereo : 1; + uint32_t reserved : 21; + } flip_params; }; struct dmub_rb_cmd_flip { From 5adc673c775c8dd41cdfda098279a96f954c6ce9 Mon Sep 17 00:00:00 2001 From: Wyatt Wood Date: Wed, 18 Dec 2019 15:11:55 -0500 Subject: [PATCH 094/113] drm/amd/display: Fix DMUB PSR command IDs [Why] The DMUB PSR command IDs do not have the correct values. [How] Fix the command IDs and cleanup the formatting. Signed-off-by: Wyatt Wood Reviewed-by: Aric Cyr Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h index 20b47649f991..7b69eb37f762 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h @@ -32,16 +32,17 @@ */ enum dmub_cmd_psr_type { - DMUB_CMD__PSR_ENABLE = 0, - DMUB_CMD__PSR_DISABLE = 1, - DMUB_CMD__PSR_COPY_SETTINGS = 2, - DMUB_CMD__PSR_SET_LEVEL = 3, - DMUB_CMD__PSR_SETUP = 4, + DMUB_CMD__PSR_SETUP = 0, + DMUB_CMD__PSR_COPY_SETTINGS = 1, + DMUB_CMD__PSR_ENABLE = 2, + DMUB_CMD__PSR_DISABLE = 3, + DMUB_CMD__PSR_SET_LEVEL = 4, }; enum psr_version { - PSR_VERSION_1 = 0x0, - PSR_VERSION_2 = 0x10, + PSR_VERSION_1 = 0x10, // PSR Version 1 + PSR_VERSION_2 = 0x20, // PSR Version 2, includes selective update + PSR_VERSION_2_Y_COORD = 0x21, // PSR Version 2, includes Y-coordinate support for SU }; #endif /* _DMUB_CMD_DAL_H_ */ From ee76592482d34c56c17dd8b75bf8e30deeb1e6af Mon Sep 17 00:00:00 2001 From: George Shen Date: Tue, 17 Dec 2019 14:34:33 -0500 Subject: [PATCH 095/113] drm/amd/display: Add w/a to reset PHY before link training in verify_link_cap [Why] PHY will sometimes be in bad state on hotplugging display from USB-C dongle. [How] Add additional calls to disable and then enable PHY before link training starts during verify_link_cap. Signed-off-by: George Shen Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 10 ++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 3 ++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index e415f7730f43..75dc387bbb2b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1892,6 +1892,16 @@ bool dp_verify_link_cap( /* disable PHY done possible by BIOS, will be done by driver itself */ dp_disable_link_phy(link, link->connector_signal); + /* Temporary Renoir-specific workaround for SWDEV-215184; + * PHY will sometimes be in bad state on hotplugging display from certain USB-C dongle, + * so add extra cycle of enabling and disabling the PHY before first link training. + */ + if (link->link_enc->features.flags.bits.DP_IS_USB_C && + link->dc->debug.usbc_combo_phy_reset_wa) { + dp_enable_link_phy(link, link->connector_signal, dp_cs_id, cur); + dp_disable_link_phy(link, link->connector_signal); + } + dp_cs_id = get_clock_source_id(link); /* link training starts with the maximum common settings diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index ecd2257de80b..8c7ff9e322f1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -421,6 +421,7 @@ struct dc_debug_options { bool disable_dram_clock_change_vactive_support; bool validate_dml_output; bool enable_dmcub_surface_flip; + bool usbc_combo_phy_reset_wa; }; struct dc_debug_data { diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index f1ec3448c0c0..0c02120311b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -835,7 +835,8 @@ static const struct dc_debug_options debug_defaults_drv = { .scl_reset_length10 = true, .sanity_checks = true, .disable_48mhz_pwrdwn = false, - .nv12_iflip_vm_wa = true + .nv12_iflip_vm_wa = true, + .usbc_combo_phy_reset_wa = true }; static const struct dc_debug_options debug_defaults_diags = { From 51f2af1d839e95e2769cc70c3b01e82e2e10ba1c Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Mon, 16 Dec 2019 12:46:49 -0500 Subject: [PATCH 096/113] drm/amd/display: rename _lvp to l_vp Signed-off-by: Charlene Liu Reviewed-by: Charlene Liu Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c | 4 ++-- .../gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c | 4 ++-- .../gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c index 16559f7fb952..e7a8ac7a1f22 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c @@ -937,7 +937,7 @@ static unsigned int CalculateVMAndRowBytes( *MetaRowByte = 0; } - if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) { MacroTileSizeBytes = 256; MacroTileHeight = BlockHeight256Bytes; } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x @@ -3348,7 +3348,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l == dm_420_10)) || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl || mode_lib->vba.SurfaceTiling[k] - == dm_sw_gfx7_2d_thin_lvp) + == dm_sw_gfx7_2d_thin_l_vp) && !((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 || mode_lib->vba.SourcePixelFormat[k] diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index d6fedae03dc8..22f3b5a4b3b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -997,7 +997,7 @@ static unsigned int CalculateVMAndRowBytes( *MetaRowByte = 0; } - if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) { MacroTileSizeBytes = 256; MacroTileHeight = BlockHeight256Bytes; } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x @@ -3385,7 +3385,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode == dm_420_10)) || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl || mode_lib->vba.SurfaceTiling[k] - == dm_sw_gfx7_2d_thin_lvp) + == dm_sw_gfx7_2d_thin_l_vp) && !((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 || mode_lib->vba.SourcePixelFormat[k] diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index 5dcfbb0af825..af35b3bea909 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -1338,7 +1338,7 @@ static unsigned int CalculateVMAndRowBytes( *MetaRowByte = 0; } - if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) { MacroTileSizeBytes = 256; MacroTileHeight = BlockHeight256Bytes; } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x @@ -3453,7 +3453,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l == dm_420_10)) || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl || mode_lib->vba.SurfaceTiling[k] - == dm_sw_gfx7_2d_thin_lvp) + == dm_sw_gfx7_2d_thin_l_vp) && !((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 || mode_lib->vba.SourcePixelFormat[k] diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h index 658e0733b99d..bfc2f39bd1ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h @@ -85,7 +85,7 @@ enum dm_swizzle_mode { dm_sw_var_s_x = 29, dm_sw_var_d_x = 30, dm_sw_64kb_r_x, - dm_sw_gfx7_2d_thin_lvp, + dm_sw_gfx7_2d_thin_l_vp, dm_sw_gfx7_2d_thin_gl, }; enum lb_depth { From 08f6c859211cc0af1b32e7fa7ec583699a06d6c3 Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Thu, 19 Dec 2019 11:37:49 -0500 Subject: [PATCH 097/113] drm/amd/display: Use SMU ClockTable Values for DML Calculations [Why] DML Initialization was previously done on dcn21_resource initialization. This meant that DML soc struct was populated with hardcoded values. [How] Move DML initialization to after updating bounding box, to use clock table values from SMU. Signed-off-by: Sung Lee Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 0c02120311b6..1d741bca2211 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1349,6 +1349,10 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param } dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i - 1]; dcn2_1_soc.num_states = i; + + // diags does not retrieve proper values from SMU, do not update DML instance for diags + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) && !IS_DIAG_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21); } /* Temporary Place holder until we can get them from fuse */ From d905c33a43dc8014e21e2445ae16ee79e562b91f Mon Sep 17 00:00:00 2001 From: Chris Park Date: Fri, 20 Dec 2019 15:30:05 -0500 Subject: [PATCH 098/113] drm/amd/display: Add default switch case for DCC Signed-off-by: Chris Park Reviewed-by: Aric Cyr Reviewed-by: Jordan Lazare Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c | 3 +++ drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index a02c10e23e0d..f36a0d8cedfe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -930,6 +930,9 @@ static bool hubbub1_get_dcc_compression_cap(struct hubbub *hubbub, output->grph.rgb.max_compressed_blk_size = 64; output->grph.rgb.independent_64b_blks = true; break; + default: + ASSERT(false); + break; } output->capable = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c index 836af0f2bbda..9235f7d29454 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c @@ -293,6 +293,9 @@ bool hubbub2_get_dcc_compression_cap(struct hubbub *hubbub, output->grph.rgb.max_compressed_blk_size = 64; output->grph.rgb.independent_64b_blks = true; break; + default: + ASSERT(false); + break; } output->capable = true; output->const_color_support = true; From 38a20a458080b6ecb071f33908e0eb4851ee7f19 Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Wed, 18 Dec 2019 17:37:43 -0500 Subject: [PATCH 099/113] drm/amd/display: implement fw-driver interface for abm 2.4 [Why] IRAM definition needed for versions of DMCU containing ABM 2.4 [How] Pass ABM 2.3 IRAM definition, which is compatible with ABM 2.4, to DMCU when ABM 2.4 FW is detected Signed-off-by: Josip Pavic Reviewed-by: Aric Cyr Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 4e2f615c3566..e75a4bb94488 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -662,7 +662,11 @@ bool dmcu_load_iram(struct dmcu *dmcu, memset(&ram_table, 0, sizeof(ram_table)); - if (dmcu->dmcu_version.abm_version == 0x23) { + if (dmcu->dmcu_version.abm_version == 0x24) { + fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params); + result = dmcu->funcs->load_iram( + dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START_V2_2); + } else if (dmcu->dmcu_version.abm_version == 0x23) { fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params); result = dmcu->funcs->load_iram( @@ -687,3 +691,4 @@ bool dmcu_load_iram(struct dmcu *dmcu, return result; } + From 6e0ca5ca83bef235778cbf16af37f58b93650dd6 Mon Sep 17 00:00:00 2001 From: Lewis Huang Date: Fri, 20 Dec 2019 14:17:54 +0800 Subject: [PATCH 100/113] drm/amd/display: remove psr state condition when psr exit case [Why] DMCU need to wait a vblank to handle psr enter command. When psr exit coming before vblank, the psr exit command will be skip because current psr state is disable. [How] remove psr state condition when psr exit case Signed-off-by: Lewis Huang Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c index e619e67e6b51..30d953acd016 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c @@ -537,9 +537,6 @@ static void dcn10_dmcu_set_psr_enable(struct dmcu *dmcu, bool enable, bool wait) if (dmcu->dmcu_state != DMCU_RUNNING) return; - dcn10_get_dmcu_psr_state(dmcu, &psr_state); - if (psr_state == 0 && !enable) - return; /* waitDMCUReadyForCmd */ REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, dmcu_wait_reg_ready_interval, From 9a25e13b91f1e7e11dfb3e3f617a1fbf5c5e9608 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 23 Dec 2019 01:07:49 -0500 Subject: [PATCH 101/113] drm/amd/display: 3.2.67 Signed-off-by: Aric Cyr Reviewed-by: Aric Cyr Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 8c7ff9e322f1..e1c11af2bcaf 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.66" +#define DC_VER "3.2.67" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 5b5abe9526073ccbf3032d27b5864520829cdd9c Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Mon, 9 Dec 2019 17:26:34 -0500 Subject: [PATCH 102/113] drm/amd/display: make PSR static screen entry within 30 ms [Why] With different refresh rate panels, the PSR entry/exit time is different since it is dependent on 2 frame entry time today [How] Make static screen num frame entry time to be calculated such that entry time is within 30 ms instead of fixed num frames. Signed-off-by: Anthony Koo Reviewed-by: Tony Cheng Acked-by: Aric Cyr Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 32 +++++++++++++---- drivers/gpu/drm/amd/display/dc/core/dc.c | 6 ++-- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +- drivers/gpu/drm/amd/display/dc/dc.h | 13 ++++--- drivers/gpu/drm/amd/display/dc/dc_stream.h | 4 +-- drivers/gpu/drm/amd/display/dc/dc_types.h | 2 +- .../display/dc/dce110/dce110_hw_sequencer.c | 35 +++++++++++-------- .../dc/dce110/dce110_timing_generator.c | 11 ++++-- .../dc/dce110/dce110_timing_generator.h | 3 +- .../dc/dce120/dce120_timing_generator.c | 11 ++++-- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 23 ++++++------ .../amd/display/dc/dcn10/dcn10_hw_sequencer.h | 2 +- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 15 +++++--- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.h | 3 +- .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 6 +++- .../amd/display/dc/inc/hw/timing_generator.h | 3 +- .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 4 +-- .../amd/display/dc/inc/hw_sequencer_private.h | 2 +- 18 files changed, 116 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 504278d94c22..9402374d2466 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8393,17 +8393,37 @@ static bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) { struct dc_link *link = stream->link; - struct dc_static_screen_events triggers = {0}; + unsigned int vsync_rate_hz = 0; + struct dc_static_screen_params params = {0}; + /* Calculate number of static frames before generating interrupt to + * enter PSR. + */ + unsigned int frame_time_microsec = 1000000 / vsync_rate_hz; + // Init fail safe of 2 frames static + unsigned int num_frames_static = 2; DRM_DEBUG_DRIVER("Enabling psr...\n"); - triggers.cursor_update = true; - triggers.overlay_update = true; - triggers.surface_update = true; + vsync_rate_hz = div64_u64(div64_u64(( + stream->timing.pix_clk_100hz * 100), + stream->timing.v_total), + stream->timing.h_total); - dc_stream_set_static_screen_events(link->ctx->dc, + /* Round up + * Calculate number of frames such that at least 30 ms of time has + * passed. + */ + if (vsync_rate_hz != 0) + num_frames_static = (30000 / frame_time_microsec) + 1; + + params.triggers.cursor_update = true; + params.triggers.overlay_update = true; + params.triggers.surface_update = true; + params.num_frames = num_frames_static; + + dc_stream_set_static_screen_params(link->ctx->dc, &stream, 1, - &triggers); + ¶ms); return dc_link_set_psr_allow_active(link, true, false); } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c918a0cd8c60..6c797fac189d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -510,10 +510,10 @@ bool dc_stream_program_csc_matrix(struct dc *dc, struct dc_stream_state *stream) return ret; } -void dc_stream_set_static_screen_events(struct dc *dc, +void dc_stream_set_static_screen_params(struct dc *dc, struct dc_stream_state **streams, int num_streams, - const struct dc_static_screen_events *events) + const struct dc_static_screen_params *params) { int i = 0; int j = 0; @@ -532,7 +532,7 @@ void dc_stream_set_static_screen_events(struct dc *dc, } } - dc->hwss.set_static_screen_control(pipes_affected, num_pipes_affected, events); + dc->hwss.set_static_screen_control(pipes_affected, num_pipes_affected, params); } static void dc_destruct(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 17f00cbbdc44..26dce397c34c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2542,7 +2542,7 @@ bool dc_link_setup_psr(struct dc_link *link, transmitter_to_phy_id(link->link_enc->transmitter); psr_context->crtcTimingVerticalTotal = stream->timing.v_total; - psr_context->vsyncRateHz = div64_u64(div64_u64((stream-> + psr_context->vsync_rate_hz = div64_u64(div64_u64((stream-> timing.pix_clk_100hz * 100), stream->timing.v_total), stream->timing.h_total); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index e1c11af2bcaf..7639fa074c6e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -157,11 +157,14 @@ struct dc_surface_dcc_cap { bool const_color_support; }; -struct dc_static_screen_events { - bool force_trigger; - bool cursor_update; - bool surface_update; - bool overlay_update; +struct dc_static_screen_params { + struct { + bool force_trigger; + bool cursor_update; + bool surface_update; + bool overlay_update; + } triggers; + unsigned int num_frames; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 37c10dbf269e..92096de79dec 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -439,10 +439,10 @@ bool dc_stream_get_crc(struct dc *dc, uint32_t *g_y, uint32_t *b_cb); -void dc_stream_set_static_screen_events(struct dc *dc, +void dc_stream_set_static_screen_params(struct dc *dc, struct dc_stream_state **stream, int num_streams, - const struct dc_static_screen_events *events); + const struct dc_static_screen_params *params); void dc_stream_set_dyn_expansion(struct dc *dc, struct dc_stream_state *stream, enum dc_dynamic_expansion option); diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 4b5b97520733..e59532d98cb4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -729,7 +729,7 @@ struct psr_context { /* The VSync rate in Hz used to calculate the * step size for smooth brightness feature */ - unsigned int vsyncRateHz; + unsigned int vsync_rate_hz; unsigned int skipPsrWaitForPllLock; unsigned int numberOfControllers; /* Unused, for future use. To indicate that first changed frame from diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 4939cf3b316f..5b689273ff44 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1373,9 +1373,13 @@ static enum dc_status apply_single_controller_ctx_to_hw( // DRR should set trigger event to monitor surface update event if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) event_triggers = 0x80; + /* Event triggers and num frames initialized for DRR, but can be + * later updated for PSR use. Note DRR trigger events are generated + * regardless of whether num frames met. + */ if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control) pipe_ctx->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx->stream_res.tg, event_triggers); + pipe_ctx->stream_res.tg, event_triggers, 2); if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg( @@ -1706,6 +1710,8 @@ static void set_drr(struct pipe_ctx **pipe_ctx, struct drr_params params = {0}; // DRR should set trigger event to monitor surface update event unsigned int event_triggers = 0x80; + // Note DRR trigger events are generated regardless of whether num frames met. + unsigned int num_frames = 2; params.vertical_total_max = vmax; params.vertical_total_min = vmin; @@ -1721,7 +1727,7 @@ static void set_drr(struct pipe_ctx **pipe_ctx, if (vmax != 0 && vmin != 0) pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( pipe_ctx[i]->stream_res.tg, - event_triggers); + event_triggers, num_frames); } } @@ -1738,30 +1744,31 @@ static void get_position(struct pipe_ctx **pipe_ctx, } static void set_static_screen_control(struct pipe_ctx **pipe_ctx, - int num_pipes, const struct dc_static_screen_events *events) + int num_pipes, const struct dc_static_screen_params *params) { unsigned int i; - unsigned int value = 0; + unsigned int triggers = 0; - if (events->overlay_update) - value |= 0x100; - if (events->surface_update) - value |= 0x80; - if (events->cursor_update) - value |= 0x2; - if (events->force_trigger) - value |= 0x1; + if (params->triggers.overlay_update) + triggers |= 0x100; + if (params->triggers.surface_update) + triggers |= 0x80; + if (params->triggers.cursor_update) + triggers |= 0x2; + if (params->triggers.force_trigger) + triggers |= 0x1; if (num_pipes) { struct dc *dc = pipe_ctx[0]->stream->ctx->dc; if (dc->fbc_compressor) - value |= 0x84; + triggers |= 0x84; } for (i = 0; i < num_pipes; i++) pipe_ctx[i]->stream_res.tg->funcs-> - set_static_screen_control(pipe_ctx[i]->stream_res.tg, value); + set_static_screen_control(pipe_ctx[i]->stream_res.tg, + triggers, params->num_frames); } /* diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 5f7c2c5641c4..1ea7db8eeb98 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -469,22 +469,27 @@ void dce110_timing_generator_set_drr( void dce110_timing_generator_set_static_screen_control( struct timing_generator *tg, - uint32_t value) + uint32_t event_triggers, + uint32_t num_frames) { struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); uint32_t static_screen_cntl = 0; uint32_t addr = 0; + // By register spec, it only takes 8 bit value + if (num_frames > 0xFF) + num_frames = 0xFF; + addr = CRTC_REG(mmCRTC_STATIC_SCREEN_CONTROL); static_screen_cntl = dm_read_reg(tg->ctx, addr); set_reg_field_value(static_screen_cntl, - value, + event_triggers, CRTC_STATIC_SCREEN_CONTROL, CRTC_STATIC_SCREEN_EVENT_MASK); set_reg_field_value(static_screen_cntl, - 2, + num_frames, CRTC_STATIC_SCREEN_CONTROL, CRTC_STATIC_SCREEN_FRAME_COUNT); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index 768ccf27ada9..d8a5ed7b485d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -231,7 +231,8 @@ void dce110_timing_generator_set_drr( void dce110_timing_generator_set_static_screen_control( struct timing_generator *tg, - uint32_t value); + uint32_t event_triggers, + uint32_t num_frames); void dce110_timing_generator_get_crtc_scanoutpos( struct timing_generator *tg, diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index 098e56962f2a..82bc4e192bbf 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -819,13 +819,18 @@ void dce120_tg_set_colors(struct timing_generator *tg, static void dce120_timing_generator_set_static_screen_control( struct timing_generator *tg, - uint32_t value) + uint32_t event_triggers, + uint32_t num_frames) { struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + // By register spec, it only takes 8 bit value + if (num_frames > 0xFF) + num_frames = 0xFF; + CRTC_REG_UPDATE_2(CRTC0_CRTC_STATIC_SCREEN_CONTROL, - CRTC_STATIC_SCREEN_EVENT_MASK, value, - CRTC_STATIC_SCREEN_FRAME_COUNT, 2); + CRTC_STATIC_SCREEN_EVENT_MASK, event_triggers, + CRTC_STATIC_SCREEN_FRAME_COUNT, num_frames); } void dce120_timing_generator_set_test_pattern( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 2baff3cd0ae5..5347a85f10d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2704,6 +2704,8 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, struct drr_params params = {0}; // DRR set trigger event mapped to OTG_TRIG_A (bit 11) for manual control flow unsigned int event_triggers = 0x800; + // Note DRR trigger events are generated regardless of whether num frames met. + unsigned int num_frames = 2; params.vertical_total_max = vmax; params.vertical_total_min = vmin; @@ -2720,7 +2722,7 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, if (vmax != 0 && vmin != 0) pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( pipe_ctx[i]->stream_res.tg, - event_triggers); + event_triggers, num_frames); } } @@ -2737,21 +2739,22 @@ void dcn10_get_position(struct pipe_ctx **pipe_ctx, } void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx, - int num_pipes, const struct dc_static_screen_events *events) + int num_pipes, const struct dc_static_screen_params *params) { unsigned int i; - unsigned int value = 0; + unsigned int triggers = 0; - if (events->surface_update) - value |= 0x80; - if (events->cursor_update) - value |= 0x2; - if (events->force_trigger) - value |= 0x1; + if (params->triggers.surface_update) + triggers |= 0x80; + if (params->triggers.cursor_update) + triggers |= 0x2; + if (params->triggers.force_trigger) + triggers |= 0x1; for (i = 0; i < num_pipes; i++) pipe_ctx[i]->stream_res.tg->funcs-> - set_static_screen_control(pipe_ctx[i]->stream_res.tg, value); + set_static_screen_control(pipe_ctx[i]->stream_res.tg, + triggers, params->num_frames); } static void dcn10_config_stereo_parameters( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index 55b8f3b2fc4e..4d20f6586bb5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -132,7 +132,7 @@ void dcn10_get_position(struct pipe_ctx **pipe_ctx, int num_pipes, struct crtc_position *position); void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx, - int num_pipes, const struct dc_static_screen_events *events); + int num_pipes, const struct dc_static_screen_params *params); void dcn10_setup_stereo(struct pipe_ctx *pipe_ctx, struct dc *dc); void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable); void dcn10_log_hw_state(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index cd7412dc42d1..a9a43b397db9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -789,21 +789,26 @@ void optc1_set_early_control( void optc1_set_static_screen_control( struct timing_generator *optc, - uint32_t value) + uint32_t event_triggers, + uint32_t num_frames) { struct optc *optc1 = DCN10TG_FROM_TG(optc); + // By register spec, it only takes 8 bit value + if (num_frames > 0xFF) + num_frames = 0xFF; + /* Bit 8 is no longer applicable in RV for PSR case, * set bit 8 to 0 if given */ - if ((value & STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN) + if ((event_triggers & STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN) != 0) - value = value & + event_triggers = event_triggers & ~STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN; REG_SET_2(OTG_STATIC_SCREEN_CONTROL, 0, - OTG_STATIC_SCREEN_EVENT_MASK, value, - OTG_STATIC_SCREEN_FRAME_COUNT, 2); + OTG_STATIC_SCREEN_EVENT_MASK, event_triggers, + OTG_STATIC_SCREEN_FRAME_COUNT, num_frames); } void optc1_setup_manual_trigger(struct timing_generator *optc) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index 3afeb1a30f21..f277656d5464 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -625,7 +625,8 @@ void optc1_set_drr( void optc1_set_static_screen_control( struct timing_generator *optc, - uint32_t value); + uint32_t event_triggers, + uint32_t num_frames); void optc1_program_stereo(struct timing_generator *optc, const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 5b9cbedaa0de..89920924a154 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -686,9 +686,13 @@ enum dc_status dcn20_enable_stream_timing( // DRR should set trigger event to monitor surface update event if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) event_triggers = 0x80; + /* Event triggers and num frames initialized for DRR, but can be + * later updated for PSR use. Note DRR trigger events are generated + * regardless of whether num frames met. + */ if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control) pipe_ctx->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx->stream_res.tg, event_triggers); + pipe_ctx->stream_res.tg, event_triggers, 2); /* TODO program crtc source select for non-virtual signal*/ /* TODO program FMT */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 2d3efd71fa51..e5e7d94026fc 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -208,7 +208,8 @@ struct timing_generator_funcs { bool enable, const struct dc_crtc_timing *timing); void (*set_drr)(struct timing_generator *tg, const struct drr_params *params); void (*set_static_screen_control)(struct timing_generator *tg, - uint32_t value); + uint32_t event_triggers, + uint32_t num_frames); void (*set_test_pattern)( struct timing_generator *tg, enum controller_dp_test_pattern test_pattern, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index df3204645c6b..209118f9f193 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -42,7 +42,7 @@ struct dc_state; struct dc_stream_status; struct dc_writeback_info; struct dchub_init_data; -struct dc_static_screen_events; +struct dc_static_screen_params; struct resource_pool; struct dc_phy_addr_space_config; struct dc_virtual_addr_space_config; @@ -102,7 +102,7 @@ struct hw_sequencer_funcs { unsigned int vmid, unsigned int vmid_frame_number); void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx, int num_pipes, - const struct dc_static_screen_events *events); + const struct dc_static_screen_params *events); /* Stream Related */ void (*enable_stream)(struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h index 8ba06f015975..ecf566378ccd 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h @@ -51,7 +51,7 @@ struct dc_state; struct dc_stream_status; struct dc_writeback_info; struct dchub_init_data; -struct dc_static_screen_events; +struct dc_static_screen_params; struct resource_pool; struct resource_context; struct stream_resource; From 4134aaa11bd9c3e65ec07a1fcd59f57d4c58c434 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Tue, 17 Dec 2019 12:38:54 -0500 Subject: [PATCH 103/113] drm/amd/display: wait for test pattern after when all pipes are programmed [why] Currently we wait for test pattern after each pipe is programmed. For ODM combined scenario it will cause test pattern is shown on only half screen for 1 frame. This is not desirable. [how] No wait between odm pipe programming, only wait after all pipes are programmed. Signed-off-by: Wenjing Liu Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 75dc387bbb2b..8c257a9f4c7f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3657,12 +3657,6 @@ static void set_crtc_test_pattern(struct dc_link *link, NULL, width, height); - /* wait for dpg to blank pixel data with test pattern */ - for (count = 0; count < 1000; count++) - if (odm_opp->funcs->dpg_is_blanked(odm_opp)) - break; - else - udelay(100); } opp->funcs->opp_set_disp_pattern_generator(opp, controller_test_pattern, From 8547058b17f1f4fba10f389191dd7e08bf95b791 Mon Sep 17 00:00:00 2001 From: Lewis Huang Date: Tue, 24 Dec 2019 09:50:21 +0800 Subject: [PATCH 104/113] drm/amd/display: Add monitor patch for AUO dpcd issue [Why] dpcd cap mismatch in 2200 vs base [How] Add monitor patch which using based caps to overwrite 2200 Signed-off-by: Lewis Huang Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 7 +++ .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 62 +++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc_link.h | 3 + .../gpu/drm/amd/display/dc/inc/dc_link_dp.h | 2 + 4 files changed, 74 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 26dce397c34c..e5807be78edb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3371,3 +3371,10 @@ const struct dc_link_settings *dc_link_get_link_cap( return &link->preferred_link_setting; return &link->verified_link_cap; } + +void dc_link_overwrite_extended_receiver_cap( + struct dc_link *link) +{ + dp_overwrite_extended_receiver_cap(link); +} + diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 8c257a9f4c7f..2124bc10fc53 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3446,6 +3446,68 @@ static bool retrieve_link_cap(struct dc_link *link) return true; } +bool dp_overwrite_extended_receiver_cap(struct dc_link *link) +{ + uint8_t dpcd_data[16]; + uint32_t read_dpcd_retry_cnt = 3; + enum dc_status status = DC_ERROR_UNEXPECTED; + union dp_downstream_port_present ds_port = { 0 }; + union down_stream_port_count down_strm_port_count; + union edp_configuration_cap edp_config_cap; + + int i; + + for (i = 0; i < read_dpcd_retry_cnt; i++) { + status = core_link_read_dpcd( + link, + DP_DPCD_REV, + dpcd_data, + sizeof(dpcd_data)); + if (status == DC_OK) + break; + } + + link->dpcd_caps.dpcd_rev.raw = + dpcd_data[DP_DPCD_REV - DP_DPCD_REV]; + + if (dpcd_data[DP_MAX_LANE_COUNT - DP_DPCD_REV] == 0) + return false; + + ds_port.byte = dpcd_data[DP_DOWNSTREAMPORT_PRESENT - + DP_DPCD_REV]; + + get_active_converter_info(ds_port.byte, link); + + down_strm_port_count.raw = dpcd_data[DP_DOWN_STREAM_PORT_COUNT - + DP_DPCD_REV]; + + link->dpcd_caps.allow_invalid_MSA_timing_param = + down_strm_port_count.bits.IGNORE_MSA_TIMING_PARAM; + + link->dpcd_caps.max_ln_count.raw = dpcd_data[ + DP_MAX_LANE_COUNT - DP_DPCD_REV]; + + link->dpcd_caps.max_down_spread.raw = dpcd_data[ + DP_MAX_DOWNSPREAD - DP_DPCD_REV]; + + link->reported_link_cap.lane_count = + link->dpcd_caps.max_ln_count.bits.MAX_LANE_COUNT; + link->reported_link_cap.link_rate = dpcd_data[ + DP_MAX_LINK_RATE - DP_DPCD_REV]; + link->reported_link_cap.link_spread = + link->dpcd_caps.max_down_spread.bits.MAX_DOWN_SPREAD ? + LINK_SPREAD_05_DOWNSPREAD_30KHZ : LINK_SPREAD_DISABLED; + + edp_config_cap.raw = dpcd_data[ + DP_EDP_CONFIGURATION_CAP - DP_DPCD_REV]; + link->dpcd_caps.panel_mode_edp = + edp_config_cap.bits.ALT_SCRAMBLER_RESET; + link->dpcd_caps.dpcd_display_control_capable = + edp_config_cap.bits.DPCD_DISPLAY_CONTROL_CAPABLE; + + return true; +} + bool detect_dp_sink_caps(struct dc_link *link) { return retrieve_link_cap(link); diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index f420aeac7fbd..f9aae35aec92 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -301,6 +301,9 @@ uint32_t dc_link_bandwidth_kbps( const struct dc_link_settings *dc_link_get_link_cap( const struct dc_link *link); +void dc_link_overwrite_extended_receiver_cap( + struct dc_link *link); + bool dc_submit_i2c( struct dc *dc, uint32_t link_index, diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index 6198bccd6199..8b1f0ce6c2a7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -76,6 +76,8 @@ void dp_enable_mst_on_sink(struct dc_link *link, bool enable); enum dp_panel_mode dp_get_panel_mode(struct dc_link *link); void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode); +bool dp_overwrite_extended_receiver_cap(struct dc_link *link); + void dp_set_fec_ready(struct dc_link *link, bool ready); void dp_set_fec_enable(struct dc_link *link, bool enable); bool dp_set_dsc_enable(struct pipe_ctx *pipe_ctx, bool enable); From 9f21f3792ffb669623cbad8b084f75aa49c98786 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 30 Dec 2019 14:29:06 -0500 Subject: [PATCH 105/113] drm/amd/display: Enable double buffer for OTG_BLANK [Why] Currently if seamless boot is enabled, we will skip double buffer enable for OTG_BLANK. However, we need the double buffer enable in order to block global sync signals when OTG becomes blanked (for PSR). Blocking global sync signals prevent pipe from requesting data. [How] Move tg_init before seamless boot check. Signed-off-by: Alvin Lee Reviewed-by: Jun Lei Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 5347a85f10d7..f2127afb37b2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1188,8 +1188,14 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) if (can_apply_seamless_boot && pipe_ctx->stream != NULL && pipe_ctx->stream_res.tg->funcs->is_tg_enabled( - pipe_ctx->stream_res.tg)) + pipe_ctx->stream_res.tg)) { + // Enable double buffering for OTG_BLANK no matter if + // seamless boot is enabled or not to suppress global sync + // signals when OTG blanked. This is to prevent pipe from + // requesting data while in PSR. + tg->funcs->tg_init(tg); continue; + } /* Disable on the current state so the new one isn't cleared. */ pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; From bae9c49bf0703ecb214e84f889e84fe447e4eda9 Mon Sep 17 00:00:00 2001 From: Yongqiang Sun Date: Wed, 18 Dec 2019 15:01:17 -0500 Subject: [PATCH 106/113] drm/amd/display: Only program surface flip for video plane via dmcub Only need to do surface flip for video plane via dmcub. Signed-off-by: Yongqiang Sun Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 130 +++++++++--------- drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 20 +++ .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 19 +-- 3 files changed, 94 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index 12396c371569..da63fc53cc4a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -690,86 +690,98 @@ void hubp21_validate_dml_output(struct hubp *hubp, dml_dlg_attr->refcyc_per_meta_chunk_flip_l, dlg_attr.refcyc_per_meta_chunk_flip_l); } -static void program_surface_flip_and_addr(struct hubp *hubp, struct dmub_rb_cmd_flip *surface_flip) +static void program_surface_flip_and_addr(struct hubp *hubp, struct surface_flip_registers *flip_regs) { struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); REG_UPDATE_3(DCSURF_FLIP_CONTROL, - SURFACE_FLIP_TYPE, surface_flip->flip.flip_params.immediate, - SURFACE_FLIP_MODE_FOR_STEREOSYNC, surface_flip->flip.flip_params.grph_stereo, - SURFACE_FLIP_IN_STEREOSYNC, surface_flip->flip.flip_params.grph_stereo); + SURFACE_FLIP_TYPE, flip_regs->immediate, + SURFACE_FLIP_MODE_FOR_STEREOSYNC, flip_regs->grph_stereo, + SURFACE_FLIP_IN_STEREOSYNC, flip_regs->grph_stereo); REG_UPDATE(VMID_SETTINGS_0, - VMID, surface_flip->flip.flip_params.vmid); + VMID, flip_regs->vmid); REG_UPDATE_8(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, surface_flip->flip.flip_params.tmz_surface, - PRIMARY_SURFACE_TMZ_C, surface_flip->flip.flip_params.tmz_surface, - PRIMARY_META_SURFACE_TMZ, surface_flip->flip.flip_params.tmz_surface, - PRIMARY_META_SURFACE_TMZ_C, surface_flip->flip.flip_params.tmz_surface, - SECONDARY_SURFACE_TMZ, surface_flip->flip.flip_params.tmz_surface, - SECONDARY_SURFACE_TMZ_C, surface_flip->flip.flip_params.tmz_surface, - SECONDARY_META_SURFACE_TMZ, surface_flip->flip.flip_params.tmz_surface, - SECONDARY_META_SURFACE_TMZ_C, surface_flip->flip.flip_params.tmz_surface); + PRIMARY_SURFACE_TMZ, flip_regs->tmz_surface, + PRIMARY_SURFACE_TMZ_C, flip_regs->tmz_surface, + PRIMARY_META_SURFACE_TMZ, flip_regs->tmz_surface, + PRIMARY_META_SURFACE_TMZ_C, flip_regs->tmz_surface, + SECONDARY_SURFACE_TMZ, flip_regs->tmz_surface, + SECONDARY_SURFACE_TMZ_C, flip_regs->tmz_surface, + SECONDARY_META_SURFACE_TMZ, flip_regs->tmz_surface, + SECONDARY_META_SURFACE_TMZ_C, flip_regs->tmz_surface); REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0, PRIMARY_META_SURFACE_ADDRESS_HIGH_C, - surface_flip->flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C); + flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C); REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0, PRIMARY_META_SURFACE_ADDRESS_C, - surface_flip->flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_C); + flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_C); REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, PRIMARY_META_SURFACE_ADDRESS_HIGH, - surface_flip->flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH); + flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH); REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, PRIMARY_META_SURFACE_ADDRESS, - surface_flip->flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS); + flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS); REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, 0, SECONDARY_META_SURFACE_ADDRESS_HIGH, - surface_flip->flip.DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH); + flip_regs->DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH); REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS, 0, SECONDARY_META_SURFACE_ADDRESS, - surface_flip->flip.DCSURF_SECONDARY_META_SURFACE_ADDRESS); + flip_regs->DCSURF_SECONDARY_META_SURFACE_ADDRESS); REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0, SECONDARY_SURFACE_ADDRESS_HIGH, - surface_flip->flip.DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH); + flip_regs->DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH); REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0, SECONDARY_SURFACE_ADDRESS, - surface_flip->flip.DCSURF_SECONDARY_SURFACE_ADDRESS); + flip_regs->DCSURF_SECONDARY_SURFACE_ADDRESS); REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0, PRIMARY_SURFACE_ADDRESS_HIGH_C, - surface_flip->flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C); + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C); REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, PRIMARY_SURFACE_ADDRESS_C, - surface_flip->flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C); + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C); REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, PRIMARY_SURFACE_ADDRESS_HIGH, - surface_flip->flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH); + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH); REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, PRIMARY_SURFACE_ADDRESS, - surface_flip->flip.DCSURF_PRIMARY_SURFACE_ADDRESS); + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS); } -void program_surface_flip_and_addr_dmcub(struct hubp *hubp, struct dmub_rb_cmd_flip *surface_flip) +void dmcub_PLAT_54186_wa(struct hubp *hubp, struct surface_flip_registers *flip_regs) { struct dc_dmub_srv *dmcub = hubp->ctx->dmub_srv; struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); + struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa = { 0 }; + + PLAT_54186_wa.header.type = DMUB_CMD__PLAT_54186_WA; + PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS; + PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C; + PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; + PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; + PLAT_54186_wa.flip.flip_params.grph_stereo = flip_regs->grph_stereo; + PLAT_54186_wa.flip.flip_params.hubp_inst = hubp->inst; + PLAT_54186_wa.flip.flip_params.immediate = flip_regs->immediate; + PLAT_54186_wa.flip.flip_params.tmz_surface = flip_regs->tmz_surface; + PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid; PERF_TRACE(); // TODO: remove after performance is stable. - dc_dmub_srv_cmd_queue(dmcub, &surface_flip->header); + dc_dmub_srv_cmd_queue(dmcub, &PLAT_54186_wa.header); PERF_TRACE(); // TODO: remove after performance is stable. dc_dmub_srv_cmd_execute(dmcub); PERF_TRACE(); // TODO: remove after performance is stable. @@ -782,15 +794,11 @@ bool hubp21_program_surface_flip_and_addr( const struct dc_plane_address *address, bool flip_immediate) { - struct dmub_rb_cmd_flip surface_flip = { 0 }; - bool grph_stereo = false; struct dc_debug_options *debug = &hubp->ctx->dc->debug; struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); + struct surface_flip_registers flip_regs = { 0 }; - surface_flip.header.type = DMUB_CMD__SURFACE_FLIP; - - surface_flip.flip.flip_params.vmid = address->vmid; - surface_flip.flip.flip_params.hubp_inst = hubp->inst; + flip_regs.vmid = address->vmid; switch (address->type) { case PLN_ADDR_TYPE_GRAPHICS: @@ -800,15 +808,15 @@ bool hubp21_program_surface_flip_and_addr( } if (address->grph.meta_addr.quad_part != 0) { - surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS = + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS = address->grph.meta_addr.low_part; - surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = address->grph.meta_addr.high_part; } - surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS = address->grph.addr.low_part; - surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = address->grph.addr.high_part; break; case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE: @@ -817,30 +825,30 @@ bool hubp21_program_surface_flip_and_addr( break; if (address->video_progressive.luma_meta_addr.quad_part != 0) { - surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS = + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS = address->video_progressive.luma_meta_addr.low_part; - surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = address->video_progressive.luma_meta_addr.high_part; - surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_C = + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_C = address->video_progressive.chroma_meta_addr.low_part; - surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C = + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C = address->video_progressive.chroma_meta_addr.high_part; } - surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS = address->video_progressive.luma_addr.low_part; - surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = address->video_progressive.luma_addr.high_part; if (debug->nv12_iflip_vm_wa) { - surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_C = address->video_progressive.chroma_addr.low_part + hubp21->PLAT_54186_wa_chroma_addr_offset; } else - surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_C = address->video_progressive.chroma_addr.low_part; - surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = address->video_progressive.chroma_addr.high_part; break; @@ -850,30 +858,30 @@ bool hubp21_program_surface_flip_and_addr( if (address->grph_stereo.right_addr.quad_part == 0) break; - grph_stereo = true; + flip_regs.grph_stereo = true; if (address->grph_stereo.right_meta_addr.quad_part != 0) { - surface_flip.flip.DCSURF_SECONDARY_META_SURFACE_ADDRESS = + flip_regs.DCSURF_SECONDARY_META_SURFACE_ADDRESS = address->grph_stereo.right_meta_addr.low_part; - surface_flip.flip.DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH = + flip_regs.DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH = address->grph_stereo.right_meta_addr.high_part; } if (address->grph_stereo.left_meta_addr.quad_part != 0) { - surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS = + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS = address->grph_stereo.left_meta_addr.low_part; - surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = address->grph_stereo.left_meta_addr.high_part; } - surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS = address->grph_stereo.left_addr.low_part; - surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = address->grph_stereo.left_addr.high_part; - surface_flip.flip.DCSURF_SECONDARY_SURFACE_ADDRESS = + flip_regs.DCSURF_SECONDARY_SURFACE_ADDRESS = address->grph_stereo.right_addr.low_part; - surface_flip.flip.DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH = + flip_regs.DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH = address->grph_stereo.right_addr.high_part; break; @@ -882,15 +890,13 @@ bool hubp21_program_surface_flip_and_addr( break; } - surface_flip.flip.flip_params.vmid = address->vmid; - surface_flip.flip.flip_params.grph_stereo = grph_stereo; - surface_flip.flip.flip_params.tmz_surface = address->tmz_surface; - surface_flip.flip.flip_params.immediate = flip_immediate; + flip_regs.tmz_surface = address->tmz_surface; + flip_regs.immediate = flip_immediate; - if (hubp->ctx->dc->debug.enable_dmcub_surface_flip) - program_surface_flip_and_addr_dmcub(hubp, &surface_flip); + if (hubp->ctx->dc->debug.enable_dmcub_surface_flip && address->type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) + dmcub_PLAT_54186_wa(hubp, &flip_regs); else - program_surface_flip_and_addr(hubp, &surface_flip); + program_surface_flip_and_addr(hubp, &flip_regs); hubp->request_address = *address; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 686145933335..2cb8466e657b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -63,6 +63,26 @@ struct hubp { bool power_gated; }; +struct surface_flip_registers { + uint32_t DCSURF_SURFACE_CONTROL; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; + uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS; + uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS; + bool tmz_surface; + bool immediate; + uint8_t vmid; + bool grph_stereo; +}; + struct hubp_funcs { void (*hubp_setup)( struct hubp *hubp, diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 0c3ae02148a6..cd9532b4f14d 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -48,7 +48,7 @@ enum dmub_cmd_type { DMUB_CMD__REG_SEQ_FIELD_UPDATE_SEQ = 2, DMUB_CMD__REG_SEQ_BURST_WRITE = 3, DMUB_CMD__REG_REG_WAIT = 4, - DMUB_CMD__SURFACE_FLIP = 5, + DMUB_CMD__PLAT_54186_WA = 5, DMUB_CMD__PSR = 64, DMUB_CMD__VBIOS = 128, }; @@ -151,20 +151,12 @@ struct dmub_rb_cmd_reg_wait { #define PHYSICAL_ADDRESS_LOC union large_integer #endif -struct dmub_cmd_surface_flip { +struct dmub_cmd_PLAT_54186_wa { uint32_t DCSURF_SURFACE_CONTROL; - uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH; - uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS; uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS; - uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C; - uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C; uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; - uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH; - uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS; - uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH; - uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS; struct { uint8_t hubp_inst : 4; uint8_t tmz_surface : 1; @@ -173,11 +165,12 @@ struct dmub_cmd_surface_flip { uint8_t grph_stereo : 1; uint32_t reserved : 21; } flip_params; + uint32_t reserved[9]; }; -struct dmub_rb_cmd_flip { +struct dmub_rb_cmd_PLAT_54186_wa { struct dmub_cmd_header header; - struct dmub_cmd_surface_flip flip; + struct dmub_cmd_PLAT_54186_wa flip; }; struct dmub_cmd_digx_encoder_control_data { @@ -287,7 +280,7 @@ union dmub_rb_cmd { struct dmub_rb_cmd_psr_enable psr_enable; struct dmub_rb_cmd_psr_copy_settings psr_copy_settings; struct dmub_rb_cmd_psr_set_level psr_set_level; - struct dmub_rb_cmd_flip surface_flip; + struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa; struct dmub_rb_cmd_psr_setup psr_setup; }; From 38259bacd727a2a9e0c767bc6bc50ebb61501023 Mon Sep 17 00:00:00 2001 From: Martin Tsai Date: Thu, 26 Dec 2019 16:19:47 +0800 Subject: [PATCH 107/113] drm/amd/display: Use mdelay to avoid context switch [why] The rapid msleep operation causes the white line garbage when DAL check flip pending status in SetVidPnSourceVisibility. To execute this msleep will induce context switch, and longer delay could cause worse garbage situation. [how] To replace msleep with mdelay. Signed-off-by: Martin Tsai Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 89920924a154..f07899e270f5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1642,9 +1642,9 @@ void dcn20_program_front_end_for_ctx( struct hubp *hubp = pipe->plane_res.hubp; int j = 0; - for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS + for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS*1000 && hubp->funcs->hubp_is_flip_pending(hubp); j++) - msleep(1); + mdelay(1); } } From 34ad0230062c39cdcba564d16d122c0fb467a7d6 Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Wed, 18 Dec 2019 14:40:56 -0500 Subject: [PATCH 108/113] drm/amd/display: fixup DML dependencies [why] Need to fix DML portability issues to enable SW unit testing around DML [how] Move calcs into dc include folder since multiple components reference it Remove relative paths to external dependencies Signed-off-by: Jun Lei Reviewed-by: Anthony Koo Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c | 2 +- drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h | 2 +- drivers/gpu/drm/amd/display/dc/{calcs => inc}/dcn_calc_math.h | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename drivers/gpu/drm/amd/display/dc/{calcs => inc}/dcn_calc_math.h (100%) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c index b953b02a1512..723af0b2dda0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c @@ -24,7 +24,7 @@ */ #include "dml_common_defs.h" -#include "../calcs/dcn_calc_math.h" +#include "dcn_calc_math.h" #include "dml_inline_defs.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h index eca140da13d8..ded71ea82413 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h @@ -27,7 +27,7 @@ #define __DML_INLINE_DEFS_H__ #include "dml_common_defs.h" -#include "../calcs/dcn_calc_math.h" +#include "dcn_calc_math.h" #include "dml_logger.h" static inline double dml_min(double a, double b) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calc_math.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.h rename to drivers/gpu/drm/amd/display/dc/inc/dcn_calc_math.h From ffdaeb1f45ee4414e7ecc2b841bea18bec35d1c0 Mon Sep 17 00:00:00 2001 From: Paul Hsieh Date: Fri, 27 Dec 2019 11:35:33 +0800 Subject: [PATCH 109/113] drm/amd/display: reallocate MST payload when link loss [Why] Try to allocate MST payload but receive HPD short pulse with link loss casue driver allocate payload twice. It cause monitor can't light up successfully. [How] When driver receive HPD short pulse with link loss, we need to deallocate payload then allocate payload. Then we will not allocate payload twice with same sink. Signed-off-by: Paul Hsieh Reviewed-by: Tony Cheng Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 33 +++++++++++++++++++ .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 7 ++-- drivers/gpu/drm/amd/display/dc/dc_link.h | 1 + 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index e5807be78edb..260c0b62d37d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2873,6 +2873,39 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) return DC_OK; } + +enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link) +{ + int i; + struct pipe_ctx *pipe_ctx; + + // Clear all of MST payload then reallocate + for (i = 0; i < MAX_PIPES; i++) { + pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link && + pipe_ctx->stream->dpms_off == false && + pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + deallocate_mst_payload(pipe_ctx); + } + } + + for (i = 0; i < MAX_PIPES; i++) { + pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link && + pipe_ctx->stream->dpms_off == false && + pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + /* enable/disable PHY will clear connection between BE and FE + * need to restore it. + */ + link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc, + pipe_ctx->stream_res.stream_enc->id, true); + dc_link_allocate_mst_payload(pipe_ctx); + } + } + + return DC_OK; +} + #if defined(CONFIG_DRM_AMD_DC_HDCP) static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 2124bc10fc53..6ab298c65247 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2901,11 +2901,8 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd pipe_ctx, pipe_ctx->stream->signal); - if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link && - pipe_ctx->stream->dpms_off == false && - pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { - dc_link_allocate_mst_payload(pipe_ctx); - } + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) + dc_link_reallocate_mst_payload(link); status = false; if (out_link_loss) diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index f9aae35aec92..d25603128394 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -205,6 +205,7 @@ enum dc_detect_reason { bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason); bool dc_link_get_hpd_state(struct dc_link *dc_link); enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx); +enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link); /* Notify DC about DP RX Interrupt (aka Short Pulse Interrupt). * Return: From f7ce7d617033248428db49879b0d20595c46d4e6 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 6 Jan 2020 00:25:33 -0500 Subject: [PATCH 110/113] drm/amd/display: 3.2.68 Signed-off-by: Aric Cyr Reviewed-by: Aric Cyr Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7639fa074c6e..3fa85a54360f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.67" +#define DC_VER "3.2.68" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 31635887cb8d899584484620b597e401b2350a79 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Mon, 23 Dec 2019 16:05:05 -0500 Subject: [PATCH 111/113] drm/amd/display: skip opp blank or unblank if test pattern enabled [why] DPG is used to generate both blank and test automation test pattern. When test automation is running the requested test pattern can be overriden by the blank or unblank call because it is using the same hardware. [how] When test pattern is requested, skip blank or unblank. Signed-off-by: Wenjing Liu Reviewed-by: Nikola Cornij Acked-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index f07899e270f5..cfbbaffa8654 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -945,6 +945,9 @@ void dcn20_blank_pixel_data( int width = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; int height = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; + if (stream->link->test_pattern_enabled) + return; + /* get opp dpg blank color */ color_space_to_black_color(dc, color_space, &black_color); From bd0522112332663e386df1b8642052463ea9b3b9 Mon Sep 17 00:00:00 2001 From: "Pan, Xinhui" Date: Thu, 16 Jan 2020 06:09:41 +0000 Subject: [PATCH 112/113] drm/amdgpu: add the lost mutex_init back MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize notifier_lock. Bug: https://gitlab.freedesktop.org/drm/amd/issues/1016 Reviewed-by: Feifei Xu Reviewed-by: Christian König Signed-off-by: xinhui pan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d3eff4c6289a..53d882000101 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2848,6 +2848,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, hash_init(adev->mn_hash); mutex_init(&adev->lock_reset); mutex_init(&adev->psp.mutex); + mutex_init(&adev->notifier_lock); r = amdgpu_device_check_arguments(adev); if (r) From 7b19914383fc008a6b51871f18da72cf9aa43cae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 16 Jan 2020 17:58:34 -0800 Subject: [PATCH 113/113] drm/mst: Don't do atomic checks over disabled managers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a main MST port is disconnected drivers should call drm_dp_mst_topology_mgr_set_mst() disabling the MST manager, this function will set manager mst_primary to NULL and it will cause the crash bellow on the next atomic check when trying to access mst_primary->port. As there is no use in running checks over managers that are not active this patch will skip it. [ 305.616450] [drm:drm_dp_mst_atomic_check] [MST PORT:00000000cc2049e9] releases all VCPI slots [ 305.625085] [drm:drm_dp_mst_atomic_check] [MST PORT:00000000020ab43e] releases all VCPI slots [ 305.633729] [drm:drm_dp_mst_atomic_check] [MST MGR:00000000cdd467d4] mst state 00000000b67672eb VCPI avail=63 used=0 [ 305.644405] BUG: kernel NULL pointer dereference, address: 0000000000000030 [ 305.651448] #PF: supervisor read access in kernel mode [ 305.656640] #PF: error_code(0x0000) - not-present page [ 305.661807] PGD 0 P4D 0 [ 305.664396] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 305.668789] CPU: 3 PID: 183 Comm: kworker/3:2 Not tainted 5.5.0-rc6+ #1404 [ 305.675703] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP TLC, BIOS ICLSFWR1.R00.3201.A00.1905140358 05/14/2019 [ 305.689425] Workqueue: events drm_dp_delayed_destroy_work [ 305.694874] RIP: 0010:drm_dp_mst_atomic_check+0x138/0x2c0 [ 305.700306] Code: 00 00 00 41 29 d9 41 89 d8 4c 89 fa 4c 89 f1 48 c7 c6 b0 b1 34 82 bf 10 00 00 00 45 31 ed e8 3f 99 02 00 4d 8b bf 80 04 00 00 <49> 8b 47 30 49 8d 5f 30 4c 8d 60 e8 48 39 c3 74 35 49 8b 7c 24 28 [ 305.719169] RSP: 0018:ffffc90001687b58 EFLAGS: 00010246 [ 305.724434] RAX: 0000000000000000 RBX: 000000000000003f RCX: 0000000000000000 [ 305.731611] RDX: 0000000000000000 RSI: ffff88849fba8cb8 RDI: 00000000ffffffff [ 305.738785] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000001 [ 305.745962] R10: ffffc900016879a0 R11: ffffc900016879a5 R12: 0000000000000000 [ 305.753139] R13: 0000000000000000 R14: ffff8884905c9bc0 R15: 0000000000000000 [ 305.760315] FS: 0000000000000000(0000) GS:ffff88849fb80000(0000) knlGS:0000000000000000 [ 305.768452] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 305.774263] CR2: 0000000000000030 CR3: 0000000005610006 CR4: 0000000000760ee0 [ 305.781441] PKRU: 55555554 [ 305.784228] Call Trace: [ 305.786739] intel_atomic_check+0xb2e/0x2560 [i915] [ 305.791678] ? printk+0x53/0x6a [ 305.794856] ? drm_atomic_check_only+0x3e/0x810 [ 305.799417] ? __drm_dbg+0x82/0x90 [ 305.802848] drm_atomic_check_only+0x56a/0x810 [ 305.807322] drm_atomic_commit+0xe/0x50 [ 305.811185] drm_client_modeset_commit_atomic+0x1e2/0x250 [ 305.816619] drm_client_modeset_commit_force+0x4d/0x180 [ 305.821921] drm_fb_helper_restore_fbdev_mode_unlocked+0x46/0xa0 [ 305.827963] drm_fb_helper_set_par+0x2b/0x40 [ 305.832265] drm_fb_helper_hotplug_event.part.0+0xb2/0xd0 [ 305.837755] drm_kms_helper_hotplug_event+0x21/0x30 [ 305.842694] process_one_work+0x25b/0x5b0 [ 305.846735] worker_thread+0x4b/0x3b0 [ 305.850439] kthread+0x100/0x140 [ 305.853690] ? process_one_work+0x5b0/0x5b0 [ 305.857901] ? kthread_park+0x80/0x80 [ 305.861588] ret_from_fork+0x24/0x50 [ 305.865202] Modules linked in: snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic i915 btusb btrtl btbcm btintel bluetooth prime_numbers snd_hda_intel snd_intel_dspcfg snd_hda_codec e1000e snd_hwdep snd_hda_core thunderbolt mei_hdcp mei_me asix cdc_ether x86_pkg_temp_thermal r8152 mei coretemp usbnet snd_pcm mii crct10dif_pclmul ptp crc32_pclmul ecdh_generic ghash_clmulni_intel pps_core ecc i2c_i801 intel_lpss_pci [ 305.903096] CR2: 0000000000000030 [ 305.906431] ---[ end trace 70ee364eed801cb0 ]--- [ 305.940816] RIP: 0010:drm_dp_mst_atomic_check+0x138/0x2c0 [ 305.946261] Code: 00 00 00 41 29 d9 41 89 d8 4c 89 fa 4c 89 f1 48 c7 c6 b0 b1 34 82 bf 10 00 00 00 45 31 ed e8 3f 99 02 00 4d 8b bf 80 04 00 00 <49> 8b 47 30 49 8d 5f 30 4c 8d 60 e8 48 39 c3 74 35 49 8b 7c 24 28 [ 305.965125] RSP: 0018:ffffc90001687b58 EFLAGS: 00010246 [ 305.970382] RAX: 0000000000000000 RBX: 000000000000003f RCX: 0000000000000000 [ 305.977571] RDX: 0000000000000000 RSI: ffff88849fba8cb8 RDI: 00000000ffffffff [ 305.984747] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000001 [ 305.991921] R10: ffffc900016879a0 R11: ffffc900016879a5 R12: 0000000000000000 [ 305.999099] R13: 0000000000000000 R14: ffff8884905c9bc0 R15: 0000000000000000 [ 306.006271] FS: 0000000000000000(0000) GS:ffff88849fb80000(0000) knlGS:0000000000000000 [ 306.014407] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 306.020185] CR2: 0000000000000030 CR3: 000000048b3aa003 CR4: 0000000000760ee0 [ 306.027404] PKRU: 55555554 [ 306.030127] BUG: sleeping function called from invalid context at include/linux/percpu-rwsem.h:38 [ 306.039049] in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 183, name: kworker/3:2 [ 306.047272] INFO: lockdep is turned off. [ 306.051217] irq event stamp: 77505 [ 306.054647] hardirqs last enabled at (77505): [] _raw_spin_unlock_irqrestore+0x47/0x60 [ 306.064270] hardirqs last disabled at (77504): [] _raw_spin_lock_irqsave+0xf/0x50 [ 306.073404] softirqs last enabled at (77402): [] __do_softirq+0x389/0x47f [ 306.081885] softirqs last disabled at (77395): [] irq_exit+0xa9/0xc0 [ 306.089859] CPU: 3 PID: 183 Comm: kworker/3:2 Tainted: G D 5.5.0-rc6+ #1404 [ 306.098167] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP TLC, BIOS ICLSFWR1.R00.3201.A00.1905140358 05/14/2019 [ 306.111882] Workqueue: events drm_dp_delayed_destroy_work [ 306.117314] Call Trace: [ 306.119780] dump_stack+0x71/0xa0 [ 306.123135] ___might_sleep.cold+0xf7/0x10b [ 306.127399] exit_signals+0x2b/0x360 [ 306.131014] do_exit+0xa7/0xc70 [ 306.134189] ? kthread+0x100/0x140 [ 306.137615] rewind_stack_do_exit+0x17/0x20 Fixes: cd82d82cbc04 ("drm/dp_mst: Add branch bandwidth validation to MST atomic check") Cc: Mikita Lipski Cc: Alex Deucher Cc: Lyude Paul Acked-by: Mikita Lipski Reviewed-by: Lyude Paul Signed-off-by: José Roberto de Souza Signed-off-by: Alex Deucher --- drivers/gpu/drm/drm_dp_mst_topology.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 5d3c1d379277..021c5a98db09 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -5001,6 +5001,9 @@ int drm_dp_mst_atomic_check(struct drm_atomic_state *state) int i, ret = 0; for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) { + if (!mgr->mst_state) + continue; + ret = drm_dp_mst_atomic_check_vcpi_alloc_limit(mgr, mst_state); if (ret) break;