diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f42e8d467c12..b1bb10625cd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -90,6 +90,7 @@ #include "amdgpu_mes.h" #include "amdgpu_umc.h" #include "amdgpu_mmhub.h" +#include "amdgpu_df.h" #define MAX_GPU_INSTANCE 16 @@ -664,29 +665,6 @@ struct amdgpu_mmio_remap { resource_size_t bus_addr; }; -struct amdgpu_df_funcs { - void (*sw_init)(struct amdgpu_device *adev); - void (*sw_fini)(struct amdgpu_device *adev); - void (*enable_broadcast_mode)(struct amdgpu_device *adev, - bool enable); - u32 (*get_fb_channel_number)(struct amdgpu_device *adev); - u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); - void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, - bool enable); - void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); - void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, - bool enable); - int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, - int is_enable); - int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, - int is_disable); - void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, - uint64_t *count); - uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); - void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, - uint32_t ficadl_val, uint32_t ficadh_val); -}; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { GC_HWIP = 1, @@ -930,6 +908,9 @@ struct amdgpu_device { bool enable_mes; struct amdgpu_mes mes; + /* df */ + struct amdgpu_df df; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; int num_ip_blocks; struct mutex mn_lock; @@ -943,8 +924,6 @@ struct amdgpu_device { /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; - const struct amdgpu_df_funcs *df_funcs; - /* delayed work_func for deferring clockgating during resume */ struct delayed_work delayed_init_work; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d3da9dde4ee1..8609287620ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -613,15 +613,9 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (is_support_sw_smu(adev)) - smu_switch_power_profile(&adev->smu, - PP_SMC_POWER_PROFILE_COMPUTE, - !idle); - else if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->switch_power_profile) - amdgpu_dpm_switch_power_profile(adev, - PP_SMC_POWER_PROFILE_COMPUTE, - !idle); + amdgpu_dpm_switch_power_profile(adev, + PP_SMC_POWER_PROFILE_COMPUTE, + !idle); } bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) @@ -634,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + if (adev->family == AMDGPU_FAMILY_AI) { + int i; + + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); + } else { + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); + } + + return 0; +} + +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint32_t flush_type = 0; + bool all_hub = false; + + if (adev->gmc.xgmi.num_physical_nodes && + adev->asic_type == CHIP_VEGA20) + flush_type = 2; + + if (adev->family == AMDGPU_FAMILY_AI) + all_hub = true; + + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); +} + bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 069d5d230810..47b0f2957d1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid); +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 3c119407dc34..4bcc175a149d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -71,32 +71,56 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t sdma_engine_reg_base[8] = { - SOC15_REG_OFFSET(SDMA0, 0, - mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA1, 0, - mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA2, 0, - mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA3, 0, - mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA4, 0, - mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA5, 0, - mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA6, 0, - mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA7, 0, - mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL - }; + uint32_t sdma_engine_reg_base = 0; + uint32_t sdma_rlc_reg_offset; - uint32_t retval = sdma_engine_reg_base[engine_id] + switch (engine_id) { + default: + dev_warn(adev->dev, + "Invalid sdma engine id (%d), using engine id 0\n", + engine_id); + /* fall through */ + case 0: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; + break; + case 1: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL; + break; + case 2: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0, + mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL; + break; + case 3: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0, + mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL; + break; + case 4: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0, + mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL; + break; + case 5: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0, + mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL; + break; + case 6: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0, + mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL; + break; + case 7: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0, + mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL; + break; + } + + sdma_rlc_reg_offset = sdma_engine_reg_base + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, - queue_id, retval); + queue_id, sdma_rlc_reg_offset); - return retval; + return sdma_rlc_reg_offset; } static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, @@ -281,6 +305,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .init_interrupts = kgd_gfx_v9_init_interrupts, .hqd_load = kgd_gfx_v9_hqd_load, + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_dump = kgd_gfx_v9_hqd_dump, .hqd_sdma_dump = kgd_hqd_sdma_dump, @@ -296,7 +321,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base, - .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, - .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 61cd707158e4..a7b17c8deb00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -107,13 +107,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, lock_srbm(kgd, mec, pipe, queue_id, 0); } -static uint32_t get_queue_mask(struct amdgpu_device *adev, +static uint64_t get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + - queue_id) & 31; + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id; - return ((uint32_t)1) << bit; + return 1ull << bit; } static void release_queue(struct kgd_dev *kgd) @@ -268,21 +268,6 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); acquire_queue(kgd, pipe_id, queue_id); - /* HIQ is set during driver init period with vmid set to 0*/ - if (m->cp_hqd_vmid == 0) { - uint32_t value, mec, pipe; - - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", - mec, pipe, queue_id); - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); - } - /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); @@ -332,9 +317,10 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, lower_32_bits((uint64_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uint64_t)wptr)); - pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id)); + pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), - get_queue_mask(adev, pipe_id, queue_id)); + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ @@ -350,6 +336,59 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } +static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct v10_compute_mqd *m; + uint32_t mec, pipe; + int r; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + + spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 7); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(queue_id) | + PACKET3_MAP_QUEUES_PIPE(pipe) | + PACKET3_MAP_QUEUES_ME((mec - 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq.ring_lock); + release_queue(kgd); + + return r; +} + static int kgd_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) @@ -686,71 +725,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) -{ - signed long r; - uint32_t seq; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - spin_lock(&adev->gfx.kiq.ring_lock); - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); - amdgpu_ring_write(ring, - PACKET3_INVALIDATE_TLBS_DST_SEL(1) | - PACKET3_INVALIDATE_TLBS_PASID(pasid)); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; -} - -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - uint16_t queried_pasid; - bool ret; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - if (amdgpu_emu_mode == 0 && ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid); - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - ret = get_atc_vmid_pasid_mapping_info(kgd, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, 0); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return 0; - } - - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); - return 0; -} - static int kgd_address_watch_disable(struct kgd_dev *kgd) { return 0; @@ -817,6 +791,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, + .hiq_mqd_load = kgd_hiq_mqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_dump = kgd_hqd_dump, .hqd_sdma_dump = kgd_hqd_sdma_dump, @@ -832,7 +807,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { get_atc_vmid_pasid_mapping_info, .get_tile_config = amdgpu_amdkfd_get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 6e6f0a99ec06..8f052e98a3c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -696,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, lower_32_bits(page_table_base)); } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - unsigned int tmp; - - if (adev->in_gpu_reset) - return -EIO; - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid\n"); - return 0; - } - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - return 0; -} - /** * read_vmid_from_vmfault_reg - read vmid from register * @@ -771,7 +732,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index bfbddedb2380..19a10db93d68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -657,45 +657,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, lower_32_bits(page_table_base)); } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; - unsigned int tmp; - - if (adev->in_gpu_reset) - return -EIO; - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - break; - } - } - - return 0; -} - -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return -EINVAL; - } - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - return 0; -} - const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -717,6 +678,4 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index e7861f0ef415..8562afe5b761 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -103,13 +103,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, lock_srbm(kgd, mec, pipe, queue_id, 0); } -static uint32_t get_queue_mask(struct amdgpu_device *adev, +static uint64_t get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + - queue_id) & 31; + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id; - return ((uint32_t)1) << bit; + return 1ull << bit; } static void release_queue(struct kgd_dev *kgd) @@ -258,21 +258,6 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, acquire_queue(kgd, pipe_id, queue_id); - /* HIQ is set during driver init period with vmid set to 0*/ - if (m->cp_hqd_vmid == 0) { - uint32_t value, mec, pipe; - - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", - mec, pipe, queue_id); - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); - } - /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); @@ -323,7 +308,7 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), - get_queue_mask(adev, pipe_id, queue_id)); + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ @@ -339,6 +324,59 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } +int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct v9_mqd *m; + uint32_t mec, pipe; + int r; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + + spin_lock(&adev->gfx.kiq.ring_lock); + r = amdgpu_ring_alloc(kiq_ring, 7); + if (r) { + pr_err("Failed to alloc KIQ (%d).\n", r); + goto out_unlock; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(queue_id) | + PACKET3_MAP_QUEUES_PIPE(pipe) | + PACKET3_MAP_QUEUES_ME((mec - 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); + amdgpu_ring_commit(kiq_ring); + +out_unlock: + spin_unlock(&adev->gfx.kiq.ring_lock); + release_queue(kgd); + + return r; +} + int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) @@ -617,100 +655,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, - uint32_t flush_type) -{ - signed long r; - uint32_t seq; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - - spin_lock(&adev->gfx.kiq.ring_lock); - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); - amdgpu_ring_write(ring, - PACKET3_INVALIDATE_TLBS_DST_SEL(1) | - PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | - PACKET3_INVALIDATE_TLBS_PASID(pasid) | - PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; -} - -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid, i; - uint16_t queried_pasid; - bool ret; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - uint32_t flush_type = 0; - - if (adev->in_gpu_reset) - return -EIO; - if (adev->gmc.xgmi.num_physical_nodes && - adev->asic_type == CHIP_VEGA20) - flush_type = 2; - - if (ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid, flush_type); - - for (vmid = 0; vmid < 16; vmid++) { - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) - continue; - - ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - i, flush_type); - break; - } - } - - return 0; -} - -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int i; - - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { - pr_err("non kfd vmid %d\n", vmid); - return 0; - } - - /* Use legacy mode tlb invalidation. - * - * Currently on Raven the code below is broken for anything but - * legacy mode due to a MMHUB power gating problem. A workaround - * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ - * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack - * bit. - * - * TODO 1: agree on the right set of invalidation registers for - * KFD use. Use the last one for now. Invalidate both GC and - * MMHUB. - * - * TODO 2: support range-based invalidation, requires kfg2kgd - * interface change - */ - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); - - return 0; -} - int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) { return 0; @@ -778,6 +722,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .init_interrupts = kgd_gfx_v9_init_interrupts, .hqd_load = kgd_gfx_v9_hqd_load, + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_dump = kgd_gfx_v9_hqd_dump, .hqd_sdma_dump = kgd_hqd_sdma_dump, @@ -793,7 +738,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, - .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, - .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 02b1426d17d1..63d3e6683dfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -33,6 +33,9 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); +int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off); int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs); @@ -57,7 +60,5 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, uint8_t vmid, uint16_t *p_pasid); -int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, struct tile_config *config); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5b330f69194b..a52a084158b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -909,6 +909,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (parser->entity && parser->entity != entity) return -EINVAL; + /* Return if there is no run queue associated with this entity. + * Possibly because of disabled HW IP*/ + if (entity->rq == NULL) + return -EINVAL; + parser->entity = entity; ring = to_amdgpu_ring(entity->rq->sched); @@ -1229,7 +1234,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, goto error_abort; } - job->owner = p->filp; p->fence = dma_fence_get(&job->base.s_fence->finished); amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 63343bb43049..f24ed9a1a3e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -144,10 +145,17 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, *pos &= (1UL << 22) - 1; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || - (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) + (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se_bank, sh_bank, instance_bank); @@ -193,6 +201,9 @@ end: if (pm_pg_lock) mutex_unlock(&adev->pm.mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -237,13 +248,20 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_PCIE(*pos >> 2); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -251,6 +269,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -276,12 +297,19 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_PCIE(*pos >> 2, value); @@ -291,6 +319,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -316,13 +347,20 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_DIDT(*pos >> 2); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -330,6 +368,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -355,12 +396,19 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_DIDT(*pos >> 2, value); @@ -370,6 +418,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -395,13 +446,20 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; value = RREG32_SMC(*pos); r = put_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } result += 4; buf += 4; @@ -409,6 +467,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -434,12 +495,19 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * if (size & 0x3 || *pos & 0x3) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + while (size) { uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) + if (r) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return r; + } WREG32_SMC(*pos, value); @@ -449,6 +517,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * size -= 4; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return result; } @@ -572,7 +643,16 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, idx = *pos >> 2; valuesize = sizeof(values); + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -633,6 +713,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, wave = (*pos & GENMASK_ULL(36, 31)) >> 31; simd = (*pos & GENMASK_ULL(44, 37)) >> 37; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -644,6 +728,9 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); mutex_unlock(&adev->grbm_idx_mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (!x) return -EINVAL; @@ -711,6 +798,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, if (!data) return -ENOMEM; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -726,6 +817,9 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); mutex_unlock(&adev->grbm_idx_mutex); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + while (size) { uint32_t value; @@ -859,6 +953,10 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; int r = 0, i; + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; + /* Avoid accidently unparking the sched thread during GPU reset */ mutex_lock(&adev->lock_reset); @@ -889,6 +987,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) mutex_unlock(&adev->lock_reset); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } @@ -907,8 +1008,17 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev)); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } @@ -917,8 +1027,17 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT)); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9b4c18b3546f..53d882000101 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2345,14 +2345,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; /* handle putting the SMC in the appropriate state */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { - if (is_support_sw_smu(adev)) { - r = smu_set_mp1_state(&adev->smu, adev->mp1_state); - } else if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_mp1_state) { - r = adev->powerplay.pp_funcs->set_mp1_state( - adev->powerplay.pp_handle, - adev->mp1_state); - } + r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); if (r) { DRM_ERROR("SMC failed to set mp1 state %d, %d\n", adev->mp1_state, r); @@ -2855,6 +2848,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, hash_init(adev->mn_hash); mutex_init(&adev->lock_reset); mutex_init(&adev->psp.mutex); + mutex_init(&adev->notifier_lock); r = amdgpu_device_check_arguments(adev); if (r) @@ -3765,6 +3759,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_RAVEN: + case CHIP_ARCTURUS: break; default: goto disabled; @@ -4359,55 +4354,21 @@ int amdgpu_device_baco_enter(struct drm_device *dev) if (ras && ras->supported) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - int ret; - - ret = smu_baco_enter(smu); - if (ret) - return ret; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - } - - return 0; + return amdgpu_dpm_baco_enter(adev); } int amdgpu_device_baco_exit(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; if (!amdgpu_device_supports_baco(adev->ddev)) return -ENOTSUPP; - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - int ret; - - ret = smu_baco_exit(smu); - if (ret) - return ret; - - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - } + ret = amdgpu_dpm_baco_exit(adev); + if (ret) + return ret; if (ras && ras->supported) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h new file mode 100644 index 000000000000..61a26c15c8dd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -0,0 +1,62 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DF_H__ +#define __AMDGPU_DF_H__ + +struct amdgpu_df_hash_status { + bool hash_64k; + bool hash_2m; + bool hash_1g; +}; + +struct amdgpu_df_funcs { + void (*sw_init)(struct amdgpu_device *adev); + void (*sw_fini)(struct amdgpu_device *adev); + void (*enable_broadcast_mode)(struct amdgpu_device *adev, + bool enable); + u32 (*get_fb_channel_number)(struct amdgpu_device *adev); + u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, + bool enable); + int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, + int is_enable); + int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, + int is_disable); + void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, + uint64_t *count); + uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); + void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, + uint32_t ficadl_val, uint32_t ficadh_val); +}; + +struct amdgpu_df { + struct amdgpu_df_hash_status hash_status; + const struct amdgpu_df_funcs *funcs; +}; + +#endif /* __AMDGPU_DF_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 4e699071d144..6d520a3eec40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -513,13 +513,23 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, * will not allow USWC mappings. * Also, don't allow GTT domain if the BO doens't have USWC falg set. */ - if (adev->asic_type >= CHIP_CARRIZO && - adev->asic_type < CHIP_RAVEN && - (adev->flags & AMD_IS_APU) && - (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && + if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && amdgpu_bo_support_uswc(bo_flags) && - amdgpu_device_asic_has_dc_support(adev->asic_type)) - domain |= AMDGPU_GEM_DOMAIN_GTT; + amdgpu_device_asic_has_dc_support(adev->asic_type)) { + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: + domain |= AMDGPU_GEM_DOMAIN_GTT; + break; + case CHIP_RAVEN: + /* enable S/G on PCO and RV2 */ + if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + domain |= AMDGPU_GEM_DOMAIN_GTT; + break; + default: + break; + } + } #endif return domain; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index cd76fbf4385d..a2e8c3dfb4f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -946,23 +946,54 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block bool swsmu = is_support_sw_smu(adev); switch (block_type) { - case AMD_IP_BLOCK_TYPE_GFX: case AMD_IP_BLOCK_TYPE_UVD: - case AMD_IP_BLOCK_TYPE_VCN: case AMD_IP_BLOCK_TYPE_VCE: - case AMD_IP_BLOCK_TYPE_SDMA: if (swsmu) { ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); - } else { - if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_powergating_by_smu) { - mutex_lock(&adev->pm.mutex); - ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( - (adev)->powerplay.pp_handle, block_type, gate)); - mutex_unlock(&adev->pm.mutex); - } + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) { + /* + * TODO: need a better lock mechanism + * + * Here adev->pm.mutex lock protection is enforced on + * UVD and VCE cases only. Since for other cases, there + * may be already lock protection in amdgpu_pm.c. + * This is a quick fix for the deadlock issue below. + * NFO: task ocltst:2028 blocked for more than 120 seconds. + * Tainted: G OE 5.0.0-37-generic #40~18.04.1-Ubuntu + * echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. + * cltst D 0 2028 2026 0x00000000 + * all Trace: + * __schedule+0x2c0/0x870 + * schedule+0x2c/0x70 + * schedule_preempt_disabled+0xe/0x10 + * __mutex_lock.isra.9+0x26d/0x4e0 + * __mutex_lock_slowpath+0x13/0x20 + * ? __mutex_lock_slowpath+0x13/0x20 + * mutex_lock+0x2f/0x40 + * amdgpu_dpm_set_powergating_by_smu+0x64/0xe0 [amdgpu] + * gfx_v8_0_enable_gfx_static_mg_power_gating+0x3c/0x70 [amdgpu] + * gfx_v8_0_set_powergating_state+0x66/0x260 [amdgpu] + * amdgpu_device_ip_set_powergating_state+0x62/0xb0 [amdgpu] + * pp_dpm_force_performance_level+0xe7/0x100 [amdgpu] + * amdgpu_set_dpm_forced_performance_level+0x129/0x330 [amdgpu] + */ + mutex_lock(&adev->pm.mutex); + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate)); + mutex_unlock(&adev->pm.mutex); } break; + case AMD_IP_BLOCK_TYPE_GFX: + case AMD_IP_BLOCK_TYPE_VCN: + case AMD_IP_BLOCK_TYPE_SDMA: + if (swsmu) + ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate)); + break; case AMD_IP_BLOCK_TYPE_JPEG: if (swsmu) ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); @@ -970,12 +1001,9 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block case AMD_IP_BLOCK_TYPE_GMC: case AMD_IP_BLOCK_TYPE_ACP: if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_powergating_by_smu) { - mutex_lock(&adev->pm.mutex); + adev->powerplay.pp_funcs->set_powergating_by_smu) ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( (adev)->powerplay.pp_handle, block_type, gate)); - mutex_unlock(&adev->pm.mutex); - } break; default: break; @@ -983,3 +1011,163 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block return ret; } + +int amdgpu_dpm_baco_enter(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_baco_enter(smu); + } else { + if (!pp_funcs || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 1); + } + + return ret; +} + +int amdgpu_dpm_baco_exit(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_baco_exit(smu); + } else { + if (!pp_funcs || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* exit BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 0); + } + + return ret; +} + +int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, + enum pp_mp1_state mp1_state) +{ + int ret = 0; + + if (is_support_sw_smu(adev)) { + ret = smu_set_mp1_state(&adev->smu, mp1_state); + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_mp1_state) { + ret = adev->powerplay.pp_funcs->set_mp1_state( + adev->powerplay.pp_handle, + mp1_state); + } + + return ret; +} + +bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + bool baco_cap; + + if (is_support_sw_smu(adev)) { + return smu_baco_is_support(smu); + } else { + if (!pp_funcs || !pp_funcs->get_asic_baco_capability) + return false; + + if (pp_funcs->get_asic_baco_capability(pp_handle, &baco_cap)) + return false; + + return baco_cap ? true : false; + } +} + +int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + + if (is_support_sw_smu(adev)) { + return smu_mode2_reset(smu); + } else { + if (!pp_funcs || !pp_funcs->asic_reset_mode_2) + return -ENOENT; + + return pp_funcs->asic_reset_mode_2(pp_handle); + } +} + +int amdgpu_dpm_baco_reset(struct amdgpu_device *adev) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + void *pp_handle = adev->powerplay.pp_handle; + struct smu_context *smu = &adev->smu; + int ret = 0; + + dev_info(adev->dev, "GPU BACO reset\n"); + + if (is_support_sw_smu(adev)) { + ret = smu_baco_enter(smu); + if (ret) + return ret; + + ret = smu_baco_exit(smu); + if (ret) + return ret; + } else { + if (!pp_funcs + || !pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 1); + if (ret) + return ret; + + /* exit BACO state */ + ret = pp_funcs->set_asic_baco_state(pp_handle, 0); + if (ret) + return ret; + } + + return 0; +} + +int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, + enum PP_SMC_POWER_PROFILE type, + bool en) +{ + int ret = 0; + + if (is_support_sw_smu(adev)) + ret = smu_switch_power_profile(&adev->smu, type, en); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->switch_power_profile) + ret = adev->powerplay.pp_funcs->switch_power_profile( + adev->powerplay.pp_handle, type, en); + + return ret; +} + +int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, + uint32_t pstate) +{ + int ret = 0; + + if (is_support_sw_smu_xgmi(adev)) + ret = smu_set_xgmi_pstate(&adev->smu, pstate); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_xgmi_pstate) + ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, + pstate); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 2cfb677272af..902ca6c00cca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -341,10 +341,6 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ (adev)->powerplay.pp_handle, request)) -#define amdgpu_dpm_switch_power_profile(adev, type, en) \ - ((adev)->powerplay.pp_funcs->switch_power_profile(\ - (adev)->powerplay.pp_handle, type, en)) - #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \ ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ (adev)->powerplay.pp_handle, msg_id)) @@ -517,4 +513,24 @@ extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low); extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low); +int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, + uint32_t pstate); + +int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, + enum PP_SMC_POWER_PROFILE type, + bool en); + +int amdgpu_dpm_baco_reset(struct amdgpu_device *adev); + +int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev); + +bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev); + +int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, + enum pp_mp1_state mp1_state); + +int amdgpu_dpm_baco_exit(struct amdgpu_device *adev); + +int amdgpu_dpm_baco_enter(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index e9efee04ca23..3c01252b1e0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -741,10 +741,18 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return 0; seq_printf(m, "gpu recover\n"); amdgpu_device_gpu_recover(adev, NULL); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index db7b2b3f9966..b88b8b82bb64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -543,12 +543,6 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return; - if (!is_support_sw_smu(adev) && - (!adev->powerplay.pp_funcs || - !adev->powerplay.pp_funcs->set_powergating_by_smu)) - return; - - mutex_lock(&adev->gfx.gfx_off_mutex); if (!enable) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8e88e0411662..af4bd279f42f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -76,11 +76,15 @@ struct kiq_pm4_funcs { struct amdgpu_ring *ring, u64 addr, u64 seq); + void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub); /* Packet sizes */ int set_resources_size; int map_queues_size; int unmap_queues_size; int query_status_size; + int invalidate_tlbs_size; }; struct amdgpu_kiq { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index c91dd602d5f1..86267baca07c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -60,6 +60,11 @@ */ #define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL +/* + * Default stolen memory size, 1024 * 768 * 4 + */ +#define AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE 0x300000ULL + struct firmware; /* @@ -92,6 +97,9 @@ struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type); + /* flush the vm tlb via pasid */ + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type, bool all_hub); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -216,6 +224,9 @@ struct amdgpu_gmc { }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ + ((adev), (pasid), (type), (allhub))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 73328d0c741d..d42be880a236 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -153,7 +153,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, if (r) return r; - job->owner = owner; *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); priority = job->base.s_priority; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index dc7ee9358dcd..3f7b8433d179 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -49,7 +49,6 @@ struct amdgpu_job { uint32_t preamble_status; uint32_t preemption_status; uint32_t num_ibs; - void *owner; bool vm_needs_flush; uint64_t vm_pd_addr; unsigned vmid; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 285d460624c8..b03b1eb7ba04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "hwmgr.h" #define WIDTH_4K 3840 @@ -158,10 +159,15 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type pm; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { if (adev->smu.ppt_funcs->get_current_power_state) pm = smu_get_current_power_state(&adev->smu); @@ -173,6 +179,9 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, pm = adev->pm.dpm.user_state; } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%s\n", (pm == POWER_STATE_TYPE_BATTERY) ? "battery" : (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance"); @@ -186,6 +195,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type state; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; @@ -196,10 +206,12 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, state = POWER_STATE_TYPE_BALANCED; else if (strncmp("performance", buf, strlen("performance")) == 0) state = POWER_STATE_TYPE_PERFORMANCE; - else { - count = -EINVAL; - goto fail; - } + else + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { mutex_lock(&adev->pm.mutex); @@ -212,12 +224,11 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, adev->pm.dpm.user_state = state; mutex_unlock(&adev->pm.mutex); - /* Can't set dpm state when the card is off */ - if (!(adev->flags & AMD_IS_PX) || - (ddev->switch_power_state == DRM_SWITCH_POWER_ON)) - amdgpu_pm_compute_clocks(adev); + amdgpu_pm_compute_clocks(adev); } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -288,13 +299,14 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_dpm_forced_level level = 0xff; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return snprintf(buf, PAGE_SIZE, "off\n"); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) level = smu_get_performance_level(&adev->smu); @@ -303,6 +315,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, else level = adev->pm.dpm.forced_level; + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%s\n", (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" : (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : @@ -329,11 +344,6 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; - /* Can't force performance level when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - if (strncmp("low", buf, strlen("low")) == 0) { level = AMD_DPM_FORCED_LEVEL_LOW; } else if (strncmp("high", buf, strlen("high")) == 0) { @@ -353,17 +363,23 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, } else if (strncmp("profile_peak", buf, strlen("profile_peak")) == 0) { level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; } else { - count = -EINVAL; - goto fail; + return -EINVAL; } + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) current_level = smu_get_performance_level(&adev->smu); else if (adev->powerplay.pp_funcs->get_performance_level) current_level = amdgpu_dpm_get_performance_level(adev); - if (current_level == level) + if (current_level == level) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; + } /* profile_exit setting is valid only when current mode is in profile mode */ if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | @@ -372,29 +388,40 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) && (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) { pr_err("Currently not in any profile mode!\n"); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; } if (is_support_sw_smu(adev)) { ret = smu_force_performance_level(&adev->smu, level); - if (ret) - count = -EINVAL; + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } } else if (adev->powerplay.pp_funcs->force_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->pm.dpm.thermal_active) { - count = -EINVAL; mutex_unlock(&adev->pm.mutex); - goto fail; + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; } ret = amdgpu_dpm_force_performance_level(adev, level); - if (ret) - count = -EINVAL; - else + if (ret) { + mutex_unlock(&adev->pm.mutex); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } else { adev->pm.dpm.forced_level = level; + } mutex_unlock(&adev->pm.mutex); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); -fail: return count; } @@ -407,6 +434,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, struct pp_states_info data; int i, buf_len, ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_get_power_num_states(&adev->smu, &data); if (ret) @@ -414,6 +445,9 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, } else if (adev->powerplay.pp_funcs->get_pp_num_states) amdgpu_dpm_get_pp_num_states(adev, &data); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); for (i = 0; i < data.nums; i++) buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i, @@ -439,6 +473,10 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { pm = smu_get_current_power_state(smu); ret = smu_get_power_num_states(smu, &data); @@ -450,6 +488,9 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, amdgpu_dpm_get_pp_num_states(adev, &data); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + for (i = 0; i < data.nums; i++) { if (pm == data.states[i]) break; @@ -500,14 +541,18 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, struct pp_states_info data; ret = kstrtoul(buf, 0, &idx); - if (ret || idx >= ARRAY_SIZE(data.states)) { - count = -EINVAL; - goto fail; - } + if (ret || idx >= ARRAY_SIZE(data.states)) + return -EINVAL; + idx = array_index_nospec(idx, ARRAY_SIZE(data.states)); amdgpu_dpm_get_pp_num_states(adev, &data); state = data.states[idx]; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + /* only set user selected power states */ if (state != POWER_STATE_TYPE_INTERNAL_BOOT && state != POWER_STATE_TYPE_DEFAULT) { @@ -515,8 +560,10 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, AMD_PP_TASK_ENABLE_USER_STATE, &state); adev->pp_force_state_enabled = true; } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); } -fail: + return count; } @@ -538,20 +585,32 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; char *table = NULL; - int size; + int size, ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { size = smu_sys_get_pp_table(&adev->smu, (void **)&table); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (size < 0) return size; - } - else if (adev->powerplay.pp_funcs->get_pp_table) + } else if (adev->powerplay.pp_funcs->get_pp_table) { size = amdgpu_dpm_get_pp_table(adev, &table); - else + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (size < 0) + return size; + } else { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return 0; + } if (size >= PAGE_SIZE) size = PAGE_SIZE - 1; @@ -573,13 +632,23 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return ret; + } } else if (adev->powerplay.pp_funcs->set_pp_table) amdgpu_dpm_set_pp_table(adev, buf, count); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -703,18 +772,28 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, tmp_str++; } + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_od_edit_dpm_table(&adev->smu, type, parameter, parameter_size); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } else { if (adev->powerplay.pp_funcs->odn_edit_dpm_table) { ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, parameter_size); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } if (type == PP_OD_COMMIT_DPM_TABLE) { @@ -722,12 +801,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } else { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; } } } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } @@ -738,27 +823,33 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - uint32_t size = 0; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf); size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size); size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size); size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size); - return size; } else if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size); - return size; } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return size; } /** @@ -796,15 +887,27 @@ static ssize_t amdgpu_set_pp_feature_status(struct device *dev, pr_debug("featuremask = 0x%llx\n", featuremask); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); - if (ret) + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return -EINVAL; + } } + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); return count; } @@ -815,16 +918,27 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev)) return 0; - if (is_support_sw_smu(adev)) { - return smu_sys_get_pp_feature_mask(&adev->smu, buf); - } else if (adev->powerplay.pp_funcs->get_ppfeature_status) - return amdgpu_dpm_get_ppfeature_status(adev, buf); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; - return snprintf(buf, PAGE_SIZE, "\n"); + if (is_support_sw_smu(adev)) + size = smu_sys_get_pp_feature_mask(&adev->smu, buf); + else if (adev->powerplay.pp_funcs->get_ppfeature_status) + size = amdgpu_dpm_get_ppfeature_status(adev, buf); + else + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } /** @@ -863,16 +977,27 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } /* @@ -928,11 +1053,18 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (ret) return -EINVAL; @@ -945,16 +1077,27 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, @@ -964,8 +1107,8 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - int ret; uint32_t mask = 0; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; @@ -974,11 +1117,18 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (ret) return -EINVAL; @@ -991,16 +1141,27 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, @@ -1020,10 +1181,19 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1037,16 +1207,27 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, @@ -1066,10 +1247,19 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1083,16 +1273,27 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); + size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, @@ -1112,10 +1313,19 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1129,16 +1339,27 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); + size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) - return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); + size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); else - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, @@ -1158,10 +1379,19 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, if (ret) return ret; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); if (ret) return -EINVAL; @@ -1176,15 +1406,23 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; + int ret; if (amdgpu_sriov_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK); else if (adev->powerplay.pp_funcs->get_sclk_od) value = amdgpu_dpm_get_sclk_od(adev); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%d\n", value); } @@ -1203,10 +1441,12 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, ret = kstrtol(buf, 0, &value); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value); @@ -1222,7 +1462,9 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, } } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -1233,15 +1475,23 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; + int ret; if (amdgpu_sriov_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK); else if (adev->powerplay.pp_funcs->get_mclk_od) value = amdgpu_dpm_get_mclk_od(adev); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%d\n", value); } @@ -1260,10 +1510,12 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, ret = kstrtol(buf, 0, &value); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value); @@ -1279,7 +1531,9 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, } } -fail: + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return count; } @@ -1309,16 +1563,27 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + ssize_t size; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; - if (is_support_sw_smu(adev)) - return smu_get_power_profile_mode(&adev->smu, buf); - else if (adev->powerplay.pp_funcs->get_power_profile_mode) - return amdgpu_dpm_get_power_profile_mode(adev, buf); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; - return snprintf(buf, PAGE_SIZE, "\n"); + if (is_support_sw_smu(adev)) + size = smu_get_power_profile_mode(&adev->smu, buf); + else if (adev->powerplay.pp_funcs->get_power_profile_mode) + size = amdgpu_dpm_get_power_profile_mode(adev, buf); + else + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; } @@ -1343,7 +1608,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, tmp[1] = '\0'; ret = kstrtol(tmp, 0, &profile_mode); if (ret) - goto fail; + return -EINVAL; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return -EINVAL; @@ -1358,23 +1623,30 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, while (tmp_str[0]) { sub_str = strsep(&tmp_str, delimiter); ret = kstrtol(sub_str, 0, ¶meter[parameter_size]); - if (ret) { - count = -EINVAL; - goto fail; - } + if (ret) + return -EINVAL; parameter_size++; while (isspace(*tmp_str)) tmp_str++; } } parameter[parameter_size] = profile_mode; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true); else if (adev->powerplay.pp_funcs->set_power_profile_mode) ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (!ret) return count; -fail: + return -EINVAL; } @@ -1397,10 +1669,17 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + r = pm_runtime_get_sync(ddev->dev); + if (r < 0) + return r; + /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (r) return r; @@ -1426,10 +1705,17 @@ static ssize_t amdgpu_get_memory_busy_percent(struct device *dev, if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + r = pm_runtime_get_sync(ddev->dev); + if (r < 0) + return r; + /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size); + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + if (r) return r; @@ -1455,11 +1741,20 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; uint64_t count0, count1; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) + return ret; + amdgpu_asic_get_pcie_usage(adev, &count0, &count1); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n", count0, count1, pcie_get_mps(adev->pdev)); } @@ -1547,42 +1842,43 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; int channel = to_sensor_dev_attr(attr)->index; int r, temp = 0, size = sizeof(temp); - /* Can't get temperature when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - if (channel >= PP_TEMP_MAX) return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + switch (channel) { case PP_TEMP_JUNCTION: /* get current junction temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, (void *)&temp, &size); - if (r) - return r; break; case PP_TEMP_EDGE: /* get current edge temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, (void *)&temp, &size); - if (r) - return r; break; case PP_TEMP_MEM: /* get current memory temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, (void *)&temp, &size); - if (r) - return r; + break; + default: + r = -EINVAL; break; } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (r) + return r; + return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -1678,16 +1974,27 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; + int ret; + + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { - if (!adev->powerplay.pp_funcs->get_fan_control_mode) + if (!adev->powerplay.pp_funcs->get_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return sprintf(buf, "%i\n", pwm_mode); } @@ -1697,27 +2004,32 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, size_t count) { struct amdgpu_device *adev = dev_get_drvdata(dev); - int err; + int err, ret; int value; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - err = kstrtoint(buf, 10, &value); if (err) return err; + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, value); } else { - if (!adev->powerplay.pp_funcs->set_fan_control_mode) + if (!adev->powerplay.pp_funcs->set_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } amdgpu_dpm_set_fan_control_mode(adev, value); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return count; } @@ -1744,34 +2056,43 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, u32 value; u32 pwm_mode; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { pr_info("manual fan speed control should be enabled first\n"); + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; } err = kstrtou32(buf, 10, &value); - if (err) + if (err) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return err; + } value = (value * 100) / 255; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_set_fan_speed_percent(&adev->smu, value); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->set_fan_speed_percent) { + else if (adev->powerplay.pp_funcs->set_fan_speed_percent) err = amdgpu_dpm_set_fan_speed_percent(adev, value); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return count; } @@ -1784,20 +2105,22 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, int err; u32 speed = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_percent(&adev->smu, &speed); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_percent) { + else if (adev->powerplay.pp_funcs->get_fan_speed_percent) err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; speed = (speed * 255) / 100; @@ -1812,20 +2135,22 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, int err; u32 speed = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &speed); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return sprintf(buf, "%i\n", speed); } @@ -1839,8 +2164,16 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, u32 size = sizeof(min_rpm); int r; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, (void *)&min_rpm, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1856,8 +2189,16 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, u32 size = sizeof(max_rpm); int r; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, (void *)&max_rpm, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -1872,20 +2213,22 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, int err; u32 rpm = 0; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; - if (is_support_sw_smu(adev)) { + if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &rpm); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm); - if (err) - return err; - } + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + if (err) + return err; return sprintf(buf, "%i\n", rpm); } @@ -1899,33 +2242,41 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, u32 value; u32 pwm_mode; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); - if (pwm_mode != AMD_FAN_CTRL_MANUAL) + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -ENODATA; - - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + } err = kstrtou32(buf, 10, &value); + if (err) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return err; + } + + if (is_support_sw_smu(adev)) + err = smu_set_fan_speed_rpm(&adev->smu, value); + else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) + err = amdgpu_dpm_set_fan_speed_rpm(adev, value); + else + err = -EINVAL; + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (err) return err; - if (is_support_sw_smu(adev)) { - err = smu_set_fan_speed_rpm(&adev->smu, value); - if (err) - return err; - } else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) { - err = amdgpu_dpm_set_fan_speed_rpm(adev, value); - if (err) - return err; - } - return count; } @@ -1935,15 +2286,27 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; + int ret; + + ret = pm_runtime_get_sync(adev->ddev->dev); + if (ret < 0) + return ret; if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { - if (!adev->powerplay.pp_funcs->get_fan_control_mode) + if (!adev->powerplay.pp_funcs->get_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1); } @@ -1957,12 +2320,6 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, int value; u32 pwm_mode; - /* Can't adjust fan when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - - err = kstrtoint(buf, 10, &value); if (err) return err; @@ -1974,14 +2331,24 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, else return -EINVAL; + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, pwm_mode); } else { - if (!adev->powerplay.pp_funcs->set_fan_control_mode) + if (!adev->powerplay.pp_funcs->set_fan_control_mode) { + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); return -EINVAL; + } amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); } + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + return count; } @@ -1990,18 +2357,20 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 vddgfx; int r, size = sizeof(vddgfx); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&vddgfx, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2020,7 +2389,6 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 vddnb; int r, size = sizeof(vddnb); @@ -2028,14 +2396,17 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, if (!(adev->flags & AMD_IS_APU)) return -EINVAL; - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&vddnb, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2054,19 +2425,21 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; u32 query = 0; int r, size = sizeof(u32); unsigned uw; - /* Can't get power when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2089,16 +2462,27 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; + ssize_t size; + int r; + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; if (is_support_sw_smu(adev)) { smu_get_power_limit(&adev->smu, &limit, true, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + return size; } static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, @@ -2107,16 +2491,27 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; + ssize_t size; + int r; + + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; if (is_support_sw_smu(adev)) { smu_get_power_limit(&adev->smu, &limit, false, true); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false); - return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { - return snprintf(buf, PAGE_SIZE, "\n"); + size = snprintf(buf, PAGE_SIZE, "\n"); } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + + return size; } @@ -2138,13 +2533,20 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, value = value / 1000000; /* convert to Watt */ - if (is_support_sw_smu(adev)) { + + err = pm_runtime_get_sync(adev->ddev->dev); + if (err < 0) + return err; + + if (is_support_sw_smu(adev)) err = smu_set_power_limit(&adev->smu, value); - } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { + else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); - } else { + else err = -EINVAL; - } + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); if (err) return err; @@ -2157,18 +2559,20 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; uint32_t sclk; int r, size = sizeof(sclk); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&sclk, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -2187,18 +2591,20 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); - struct drm_device *ddev = adev->ddev; uint32_t mclk; int r, size = sizeof(mclk); - /* Can't get voltage when the card is off */ - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; + r = pm_runtime_get_sync(adev->ddev->dev); + if (r < 0) + return r; /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, (void *)&mclk, &size); + + pm_runtime_mark_last_busy(adev->ddev->dev); + pm_runtime_put_autosuspend(adev->ddev->dev); + if (r) return r; @@ -3220,8 +3626,12 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - struct drm_device *ddev = adev->ddev; u32 flags = 0; + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) + return r; amdgpu_device_ip_get_clockgating_state(adev, &flags); seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags); @@ -3230,23 +3640,28 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) if (!adev->pm.dpm_enabled) { seq_printf(m, "dpm not enabled\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); return 0; } - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { - seq_printf(m, "PX asic powered off\n"); - } else if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { + + if (!is_support_sw_smu(adev) && + adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level) adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m); else seq_printf(m, "Debugfs support not implemented for this asic\n"); mutex_unlock(&adev->pm.mutex); + r = 0; } else { - return amdgpu_debugfs_pm_info_pp(m, adev); + r = amdgpu_debugfs_pm_info_pp(m, adev); } - return 0; + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + return r; } static const struct drm_info_list amdgpu_pm_info_list[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index cf21ad0cad9a..07914e34bc25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: if (!(flags & PERF_EF_RELOAD)) - pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1); - pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 0); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 0); break; default: break; @@ -101,7 +101,7 @@ static void amdgpu_perf_read(struct perf_event *event) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_get_count(pe->adev, hwc->conf, + pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->conf, &count); break; default: @@ -126,7 +126,7 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 0); + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 0); break; default: break; @@ -156,7 +156,7 @@ static int amdgpu_perf_add(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - retval = pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + retval = pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1); break; default: return 0; @@ -184,7 +184,7 @@ static void amdgpu_perf_del(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 1); + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 1); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 281d89640344..3a1570dafe34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -529,6 +529,11 @@ static int psp_xgmi_unload(struct psp_context *psp) { int ret; struct psp_gfx_cmd_resp *cmd; + struct amdgpu_device *adev = psp->adev; + + /* XGMI TA unload currently is not supported on Arcturus */ + if (adev->asic_type == CHIP_ARCTURUS) + return 0; /* * TODO: bypass the unloading in sriov for now diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 96fc538ec824..766be7f18282 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -686,6 +686,7 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data = {0, 0, 0, NULL}; + int i; if (!obj) return -EINVAL; @@ -700,6 +701,13 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, if (adev->umc.funcs->query_ras_error_address) adev->umc.funcs->query_ras_error_address(adev, &err_data); break; + case AMDGPU_RAS_BLOCK__SDMA: + if (adev->sdma.funcs->query_ras_error_count) { + for (i = 0; i < adev->sdma.num_instances; i++) + adev->sdma.funcs->query_ras_error_count(adev, i, + &err_data); + } + break; case AMDGPU_RAS_BLOCK__GFX: if (adev->gfx.funcs->query_ras_error_count) adev->gfx.funcs->query_ras_error_count(adev, &err_data); @@ -1345,7 +1353,8 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_ras *ras = container_of(work, struct amdgpu_ras, recovery_work); - amdgpu_device_gpu_recover(ras->adev, 0); + if (amdgpu_device_should_recover_gpu(ras->adev)) + amdgpu_device_gpu_recover(ras->adev, 0); atomic_set(&ras->in_recovery, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 346dcb1f7146..485335267d78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -50,6 +50,14 @@ struct amdgpu_sdma_instance { bool burst_nop; }; +struct amdgpu_sdma_ras_funcs { + int (*ras_late_init)(struct amdgpu_device *adev, + void *ras_ih_info); + void (*ras_fini)(struct amdgpu_device *adev); + int (*query_ras_error_count)(struct amdgpu_device *adev, + uint32_t instance, void *ras_error_status); +}; + struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; struct drm_gpu_scheduler *sdma_sched[AMDGPU_MAX_SDMA_INSTANCES]; @@ -61,6 +69,7 @@ struct amdgpu_sdma { uint32_t srbm_soft_reset; bool has_page_queue; struct ras_common_if *ras_if; + const struct amdgpu_sdma_ras_funcs *funcs; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 3114d8a47e88..dee446278417 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index ed106d99d078..f96464e2c157 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -75,6 +75,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case CHIP_ARCTURUS: fw_name = FIRMWARE_ARCTURUS; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; break; case CHIP_RENOIR: fw_name = FIRMWARE_RENOIR; @@ -165,15 +168,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); return r; } - } - if (adev->vcn.indirect_sram) { - r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r); - return r; + if (adev->vcn.indirect_sram) { + r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo, + &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr); + if (r) { + dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); + return r; + } } } @@ -186,15 +189,14 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) cancel_delayed_work_sync(&adev->vcn.idle_work); - if (adev->vcn.indirect_sram) { - amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, - (void **)&adev->vcn.dpg_sram_cpu_addr); - } - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { if (adev->vcn.harvest_config & (1 << j)) continue; + if (adev->vcn.indirect_sram) { + amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo, + &adev->vcn.inst[j].dpg_sram_gpu_addr, + (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); + } kvfree(adev->vcn.inst[j].saved_bo); amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, @@ -298,7 +300,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, j, &new_state); } fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); @@ -341,7 +343,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) new_state.fw_based = VCN_DPG_STATE__PAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index e6dee8224d33..c4984c5fb2db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -57,6 +57,11 @@ #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 +#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b +#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 +#define mmUVD_REG_XX_MASK 0x026c +#define mmUVD_REG_XX_MASK_BASE_IDX 1 + /* 1 second timeout */ #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) @@ -104,27 +109,27 @@ internal_reg_offset >>= 2; \ }) -#define RREG32_SOC15_DPG_MODE_2_0(offset, mask_en) \ - ({ \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ - (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ - mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ - offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ - RREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA); \ +#define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) \ + ({ \ + WREG32_SOC15(VCN, inst, mmUVD_DPG_LMA_CTL, \ + (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \ }) -#define WREG32_SOC15_DPG_MODE_2_0(offset, value, mask_en, indirect) \ - do { \ - if (!indirect) { \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ - (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ - mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ - offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ - } else { \ - *adev->vcn.dpg_sram_curr_addr++ = offset; \ - *adev->vcn.dpg_sram_curr_addr++ = value; \ - } \ +#define WREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \ + } \ } while (0) enum engine_status_constants { @@ -173,6 +178,10 @@ struct amdgpu_vcn_inst { struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; struct amdgpu_irq_src irq; struct amdgpu_vcn_reg external; + struct amdgpu_bo *dpg_sram_bo; + void *dpg_sram_cpu_addr; + uint64_t dpg_sram_gpu_addr; + uint32_t *dpg_sram_curr_addr; }; struct amdgpu_vcn { @@ -184,10 +193,6 @@ struct amdgpu_vcn { struct dpg_pause_state pause_state; bool indirect_sram; - struct amdgpu_bo *dpg_sram_bo; - void *dpg_sram_cpu_addr; - uint64_t dpg_sram_gpu_addr; - uint32_t *dpg_sram_curr_addr; uint8_t num_vcn_inst; struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; @@ -199,7 +204,7 @@ struct amdgpu_vcn { unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 4dc75eda1d91..d16231d6a790 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -82,6 +82,32 @@ struct amdgpu_prt_cb { struct dma_fence_cb cb; }; +/** + * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS + * happens while holding this lock anywhere to prevent deadlocks when + * an MMU notifier runs in reclaim-FS context. + */ +static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) +{ + mutex_lock(&vm->eviction_lock); + vm->saved_flags = memalloc_nofs_save(); +} + +static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) +{ + if (mutex_trylock(&vm->eviction_lock)) { + vm->saved_flags = memalloc_nofs_save(); + return 1; + } + return 0; +} + +static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) +{ + memalloc_nofs_restore(vm->saved_flags); + mutex_unlock(&vm->eviction_lock); +} + /** * amdgpu_vm_level_shift - return the addr shift for each level * @@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } } - mutex_lock(&vm->eviction_lock); + amdgpu_vm_eviction_lock(vm); vm->evicting = false; - mutex_unlock(&vm->eviction_lock); + amdgpu_vm_eviction_unlock(vm); return 0; } @@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_KFD; - mutex_lock(&vm->eviction_lock); + amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; goto error_unlock; @@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, fence); error_unlock: - mutex_unlock(&vm->eviction_lock); + amdgpu_vm_eviction_unlock(vm); return r; } @@ -2533,18 +2559,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return false; /* Try to block ongoing updates */ - if (!mutex_trylock(&bo_base->vm->eviction_lock)) + if (!amdgpu_vm_eviction_trylock(bo_base->vm)) return false; /* Don't evict VM page tables while they are updated */ if (!dma_fence_is_signaled(bo_base->vm->last_direct) || !dma_fence_is_signaled(bo_base->vm->last_delayed)) { - mutex_unlock(&bo_base->vm->eviction_lock); + amdgpu_vm_eviction_unlock(bo_base->vm); return false; } bo_base->vm->evicting = true; - mutex_unlock(&bo_base->vm->eviction_lock); + amdgpu_vm_eviction_unlock(bo_base->vm); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fade4f45320c..b4640ab38c95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -30,6 +30,7 @@ #include #include #include +#include #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -239,9 +240,12 @@ struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; - /* Lock to prevent eviction while we are updating page tables */ + /* Lock to prevent eviction while we are updating page tables + * use vm_eviction_lock/unlock(vm) + */ struct mutex eviction_lock; bool evicting; + unsigned int saved_flags; /* BOs who needs a validation */ struct list_head evicted; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 5cf920d9358b..a97af422575a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -146,16 +146,16 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev, ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200); ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208); - fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in); + fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in); if (fica_out != 0x1f) pr_err("xGMI error counters not enabled!\n"); - fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in); + fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in); if ((fica_out & 0xffff) == 2) error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63); - adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); + adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); return snprintf(buf, PAGE_SIZE, "%d\n", error_count); } @@ -291,13 +291,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); - if (is_support_sw_smu_xgmi(adev)) - ret = smu_set_xgmi_pstate(&adev->smu, pstate); - else if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_xgmi_pstate) - ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, - pstate); - + ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate); if (ret) { dev_err(adev->dev, "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index e9822ea8bb19..006f21ef7ddf 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1312,19 +1312,13 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev) static bool cik_asic_supports_baco(struct amdgpu_device *adev) { - bool baco_support; - switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: - smu7_asic_get_baco_capability(adev, &baco_support); - break; + return amdgpu_dpm_is_baco_supported(adev); default: - baco_support = false; - break; + return false; } - - return baco_support; } static enum amd_reset_method @@ -1366,7 +1360,7 @@ static int cik_asic_reset(struct amdgpu_device *adev) if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); - r = smu7_asic_baco_reset(adev); + r = amdgpu_dpm_baco_reset(adev); } else { r = cik_asic_pci_config_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h index 9870bf27870e..f91ab4c246b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.h +++ b/drivers/gpu/drm/amd/amdgpu/cik.h @@ -31,7 +31,5 @@ void cik_srbm_select(struct amdgpu_device *adev, int cik_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); -int smu7_asic_baco_reset(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index d6221298b477..d6aca1c08068 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -31,6 +31,9 @@ static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; static void df_v1_7_sw_init(struct amdgpu_device *adev) { + adev->df.hash_status.hash_64k = false; + adev->df.hash_status.hash_2m = false; + adev->df.hash_status.hash_1g = false; } static void df_v1_7_sw_fini(struct amdgpu_device *adev) @@ -66,7 +69,7 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev) { int fb_channel_number; - fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + fb_channel_number = adev->df.funcs->get_fb_channel_number(adev); return df_v1_7_channel_number[fb_channel_number]; } @@ -77,7 +80,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, u32 tmp; /* Put DF on broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, true); + adev->df.funcs->enable_broadcast_mode(adev, true); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); @@ -92,7 +95,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, } /* Exit boradcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, false); + adev->df.funcs->enable_broadcast_mode(adev, false); } static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 2f884d941e8d..f51326598a8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -262,6 +262,32 @@ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, /* device attr for available perfmon counters */ static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL); +static void df_v3_6_query_hashes(struct amdgpu_device *adev) +{ + u32 tmp; + + adev->df.hash_status.hash_64k = false; + adev->df.hash_status.hash_2m = false; + adev->df.hash_status.hash_1g = false; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + /* encoding for hash-enabled on Arcturus */ + if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) { + tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl); + adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl64K); + adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl2M); + adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp, + DF_CS_UMC_AON0_DfGlobalCtrl, + GlbHashIntlvCtl1G); + } +} + /* init perfmons */ static void df_v3_6_sw_init(struct amdgpu_device *adev) { @@ -273,6 +299,8 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++) adev->df_perfmon_config_assign_mask[i] = 0; + + df_v3_6_query_hashes(adev); } static void df_v3_6_sw_fini(struct amdgpu_device *adev) @@ -311,7 +339,7 @@ static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev) { int fb_channel_number; - fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + fb_channel_number = adev->df.funcs->get_fb_channel_number(adev); if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number)) fb_channel_number = 0; @@ -325,7 +353,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) { /* Put DF on broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, true); + adev->df.funcs->enable_broadcast_mode(adev, true); if (enable) { tmp = RREG32_SOC15(DF, 0, @@ -344,7 +372,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, } /* Exit broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, false); + adev->df.funcs->enable_broadcast_mode(adev, false); } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 6bc3b937fba2..874f641de281 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -40,6 +40,7 @@ #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "clearstate_gfx10.h" #include "v10_structs.h" @@ -120,7 +121,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), @@ -168,7 +169,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070105), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070105), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), @@ -345,15 +346,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); } +static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { .kiq_set_resources = gfx10_kiq_set_resources, .kiq_map_queues = gfx10_kiq_map_queues, .kiq_unmap_queues = gfx10_kiq_unmap_queues, .kiq_query_status = gfx10_kiq_query_status, + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, .query_status_size = 7, + .invalidate_tlbs_size = 12, }; static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) @@ -807,10 +822,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; info->ucode_id = AMDGPU_UCODE_ID_RLC_G; info->fw = adev->gfx.rlc_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - + if (info->fw) { + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } if (adev->gfx.rlc.is_rlc_v2_1 && adev->gfx.rlc.save_restore_list_cntl_size_bytes && adev->gfx.rlc.save_restore_list_gpm_size_bytes && @@ -3321,8 +3337,11 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index cfc1403fc855..46f0533ba43f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4558,8 +4558,11 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a5492e375f29..46ab46757b25 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -48,15 +48,6 @@ #include "amdgpu_ras.h" -#include "sdma0/sdma0_4_2_offset.h" -#include "sdma1/sdma1_4_2_offset.h" -#include "sdma2/sdma2_4_2_2_offset.h" -#include "sdma3/sdma3_4_2_2_offset.h" -#include "sdma4/sdma4_4_2_2_offset.h" -#include "sdma5/sdma5_4_2_2_offset.h" -#include "sdma6/sdma6_4_2_2_offset.h" -#include "sdma7/sdma7_4_2_2_offset.h" - #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -748,6 +739,134 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, void *inject_if); +static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, + PACKET3_SET_RESOURCES_VMID_MASK(0) | + /* vmid_mask:0* queue_type:0 (KIQ) */ + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); + amdgpu_ring_write(kiq_ring, + lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, + upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ +} + +static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | + /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + /* num_queues: must be 1 */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + +static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v9_0_kiq_set_resources, + .kiq_map_queues = gfx_v9_0_kiq_map_queues, + .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, + .kiq_query_status = gfx_v9_0_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, + .invalidate_tlbs_size = 12, +}; + +static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; +} + static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { @@ -3115,74 +3234,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } -static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) -{ - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - uint64_t queue_mask = 0; - int r, i; - - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { - if (!test_bit(i, adev->gfx.mec.queue_bitmap)) - continue; - - /* This situation may be hit in the future if a new HW - * generation exposes more than 64 queues. If so, the - * definition of queue_mask needs updating */ - if (WARN_ON(i >= (sizeof(queue_mask)*8))) { - DRM_ERROR("Invalid KCQ enabled: %d\n", i); - break; - } - - queue_mask |= (1ull << i); - } - - r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - return r; - } - - /* set resources */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | - PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ - amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* oac mask */ - amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ - PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ - PACKET3_MAP_QUEUES_QUEUE(ring->queue) | - PACKET3_MAP_QUEUES_PIPE(ring->pipe) | - PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | - PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ - PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ - PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ - amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); - } - - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ enable failed\n"); - - return r; -} - static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -3319,8 +3370,11 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } @@ -3589,7 +3643,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; } - r = gfx_v9_0_kiq_kcq_enable(adev); + r = amdgpu_gfx_enable_kcq(adev); done: return r; } @@ -3646,6 +3700,23 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) return 0; } +static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) +{ + u32 tmp; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, + adev->df.hash_status.hash_64k); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, + adev->df.hash_status.hash_2m); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, + adev->df.hash_status.hash_1g); + WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); +} + static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) { if (adev->asic_type != CHIP_ARCTURUS) @@ -3663,6 +3734,8 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_constants_init(adev); + gfx_v9_0_init_tcp_config(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -3674,36 +3747,6 @@ static int gfx_v9_0_hw_init(void *handle) return r; } -static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) -{ - int r, i; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - - r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); - if (r) - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ - PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | - PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | - PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); - amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - } - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ disable failed\n"); - - return r; -} - static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3715,7 +3758,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) /* disable KCQ to avoid CPC touch memory not valid anymore */ - gfx_v9_0_kcq_disable(adev); + amdgpu_gfx_disable_kcq(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); @@ -4021,14 +4064,6 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA2, 0, mmSDMA2_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA3, 0, mmSDMA3_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA4, 0, mmSDMA4_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA5, 0, mmSDMA5_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA6, 0, mmSDMA6_EDC_COUNTER), 0, 1, 1}, - { SOC15_REG_ENTRY(SDMA7, 0, mmSDMA7_EDC_COUNTER), 0, 1, 1}, }; static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) @@ -4092,7 +4127,6 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se; int sgpr_work_group_size = 5; int gpr_reg_size = compute_dim_x / 16 + 6; - int sec_ded_counter_reg_size = adev->sdma.num_instances + 34; /* only support when RAS is enabled */ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) @@ -4232,7 +4266,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* read back registers to clear the counters */ mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < sec_ded_counter_reg_size; i++) { + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { gfx_v9_0_select_se_sh(adev, j, 0x0, k); @@ -4259,6 +4293,7 @@ static int gfx_v9_0_early_init(void *handle) else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + gfx_v9_0_set_kiq_pm4_funcs(adev); gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); gfx_v9_0_set_gds_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index da9765ff45d6..bbede09983e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -30,6 +30,8 @@ #include "hdp/hdp_5_0_0_sh_mask.h" #include "gc/gc_10_1_0_sh_mask.h" #include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_offset.h" #include "dcn/dcn_2_0_0_offset.h" #include "dcn/dcn_2_0_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" @@ -37,6 +39,7 @@ #include "navi10_enum.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "nbio_v2_3.h" @@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, (!amdgpu_sriov_vf(adev))); } +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( + struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -380,6 +396,63 @@ error_alloc: DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); } +/** + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid, i; + signed long r; + uint32_t seq; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + if (amdgpu_emu_mode == 0 && ring->sched.ready) { + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, flush_type, all_hub); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; + } + + for (vmid = 1; vmid < 16; vmid++) { + + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + if (all_hub) { + for (i = 0; i < adev->num_vmhubs; i++) + gmc_v10_0_flush_gpu_tlb(adev, vmid, + i, 0); + } else { + gmc_v10_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); + } + break; + } + } + + return 0; +} + static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { @@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, .map_mtype = gmc_v10_0_map_mtype, @@ -566,6 +640,13 @@ static int gmc_v10_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + /* + * Can't free the stolen VGA memory when it might be used for memory + * training again. + */ + if (!adev->fw_vram_usage.mem_train_support) + amdgpu_bo_late_init(adev); + r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) return r; @@ -720,6 +801,10 @@ static int gmc_v10_0_sw_init(void *handle) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); + + if (r) + return r; + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); @@ -732,15 +817,6 @@ static int gmc_v10_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - /* - * Reserve 8M stolen memory for navi10 like vega10 - * TODO: will check if it's really needed on asic. - */ - if (amdgpu_emu_mode == 1) - adev->gmc.stolen_size = 0; - else - adev->gmc.stolen_size = 9 * 1024 *1024; - r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); @@ -753,6 +829,19 @@ static int gmc_v10_0_sw_init(void *handle) adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev); + /* + * In dual GPUs scenario, stolen_size is assigned to zero on the + * secondary GPU, since there is no pre-OS console using that memory. + * Then the bottom region of VRAM was allocated as GTT, unfortunately a + * small region of bottom VRAM was encroached by UMC firmware during + * GDDR6 BIST training, this cause page fault. + * The page fault can be fixed by forcing stolen_size to 3MB, then the + * bottom region of VRAM was allocated as stolen memory, GTT corruption + * avoid. + */ + adev->gmc.stolen_size = max(adev->gmc.stolen_size, + AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE); + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) @@ -792,6 +881,13 @@ static void gmc_v10_0_gart_fini(struct amdgpu_device *adev) static int gmc_v10_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + void *stolen_vga_buf; + + /* + * Free the stolen memory if it wasn't already freed in late_init + * because of memory training. + */ + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); amdgpu_vm_manager_fini(adev); gmc_v10_0_gart_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index f08e5330642d..19d5b133e1d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) return 0; } +/** + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid; + unsigned int tmp; + + if (adev->in_gpu_reset) + return -EIO; + + for (vmid = 1; vmid < 16; vmid++) { + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, .set_prt = gmc_v7_0_set_prt, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 6d96d40fbcb8..27d83204fa2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) return 0; } +/** + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid; + unsigned int tmp; + + if (adev->in_gpu_reset) + return -EIO; + + for (vmid = 1; vmid < 16; vmid++) { + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; + +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, .set_prt = gmc_v8_0_set_prt, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 26194ac9af98..40a496804356 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -38,10 +38,12 @@ #include "dce/dce_12_0_sh_mask.h" #include "vega10_enum.h" #include "mmhub/mmhub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" #include "athub/athub_1_0_offset.h" #include "oss/osssys_4_0_offset.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "umc/umc_6_0_sh_mask.h" @@ -441,6 +443,18 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, adev->pdev->device == 0x15d8))); } +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -539,6 +553,67 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } +/** + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */ +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + int vmid, i; + signed long r; + uint32_t seq; + uint16_t queried_pasid; + bool ret; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + if (adev->in_gpu_reset) + return -EIO; + + if (ring->sched.ready) { + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size); + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, flush_type, all_hub); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; + } + + for (vmid = 1; vmid < 16; vmid++) { + + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + if (all_hub) { + for (i = 0; i < adev->num_vmhubs; i++) + gmc_v9_0_flush_gpu_tlb(adev, vmid, + i, 0); + } else { + gmc_v9_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); + } + break; + } + } + + return 0; + +} + static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { @@ -700,6 +775,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, .map_mtype = gmc_v9_0_map_mtype, @@ -817,8 +893,8 @@ static int gmc_v9_0_late_init(void *handle) r = amdgpu_atomfirmware_mem_ecc_supported(adev); if (!r) { DRM_INFO("ECC is not present.\n"); - if (adev->df_funcs->enable_ecc_force_par_wr_rmw) - adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); + if (adev->df.funcs->enable_ecc_force_par_wr_rmw) + adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); } else { DRM_INFO("ECC is active.\n"); } @@ -1023,7 +1099,7 @@ static int gmc_v9_0_sw_init(void *handle) else chansize = 128; - numchan = adev->df_funcs->get_hbm_channel_number(adev); + numchan = adev->df.funcs->get_hbm_channel_number(adev); adev->gmc.vram_width = numchan * chansize; } diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index b0229543e887..2e0f8933410e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -478,7 +478,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -489,7 +489,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); @@ -502,7 +502,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -513,7 +513,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); @@ -726,6 +726,12 @@ static int nv_common_early_init(void *handle) AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_ATHUB; + /* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0, + * as a consequence, the rev_id and external_rev_id are wrong. + * workaround it by hardcoding rev_id to 0 (default value). + */ + if (amdgpu_sriov_vf(adev)) + adev->rev_id = 0; adev->external_rev_id = adev->rev_id + 0xa; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index a57f3d737677..685dd9754c67 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -43,10 +43,13 @@ MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); MODULE_FIRMWARE("amdgpu/navi10_sos.bin"); MODULE_FIRMWARE("amdgpu/navi10_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi10_ta.bin"); MODULE_FIRMWARE("amdgpu/navi14_sos.bin"); MODULE_FIRMWARE("amdgpu/navi14_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ta.bin"); MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi12_ta.bin"); MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ce0753a9d241..27c7001be1ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -82,6 +82,7 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); +static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev); static const struct soc15_reg_golden golden_settings_sdma_4[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), @@ -254,7 +255,106 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000) + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe) +}; + +static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = { + { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED), + 0, 0, + }, + { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED), + 0, 0, + }, + { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED), + 0, 0, + }, + { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED), + 0, 0, + }, + { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED), + 0, 0, + }, + { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED), + 0, 0, + }, + { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED), + 0, 0, + }, + { "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED), + 0, 0, + }, + { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED), + 0, 0, + }, }; static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, @@ -1686,6 +1786,7 @@ static int sdma_v4_0_early_init(void *handle) sdma_v4_0_set_buffer_funcs(adev); sdma_v4_0_set_vm_pte_funcs(adev); sdma_v4_0_set_irq_funcs(adev); + sdma_v4_0_set_ras_funcs(adev); return 0; } @@ -1700,8 +1801,18 @@ static int sdma_v4_0_late_init(void *handle) struct ras_ih_if ih_info = { .cb = sdma_v4_0_process_ras_data_cb, }; + int i; - return amdgpu_sdma_ras_late_init(adev, &ih_info); + /* read back edc counter registers to clear the counters */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + for (i = 0; i < adev->sdma.num_instances; i++) + RREG32_SDMA(i, mmSDMA0_EDC_COUNTER); + } + + if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) + return adev->sdma.funcs->ras_late_init(adev, &ih_info); + else + return 0; } static int sdma_v4_0_sw_init(void *handle) @@ -1773,7 +1884,8 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - amdgpu_sdma_ras_fini(adev); + if (adev->sdma.funcs && adev->sdma.funcs->ras_fini) + adev->sdma.funcs->ras_fini(adev); for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); @@ -2414,6 +2526,70 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } +static void sdma_v4_0_get_ras_error_count(uint32_t value, + uint32_t instance, + uint32_t *sec_count) +{ + uint32_t i; + uint32_t sec_cnt; + + /* double bits error (multiple bits) error detection is not supported */ + for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) { + /* the SDMA_EDC_COUNTER register in each sdma instance + * shares the same sed shift_mask + * */ + sec_cnt = (value & + sdma_v4_0_ras_fields[i].sec_count_mask) >> + sdma_v4_0_ras_fields[i].sec_count_shift; + if (sec_cnt) { + DRM_INFO("Detected %s in SDMA%d, SED %d\n", + sdma_v4_0_ras_fields[i].name, + instance, sec_cnt); + *sec_count += sec_cnt; + } + } +} + +static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, + uint32_t instance, void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count = 0; + uint32_t reg_value = 0; + + reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER); + /* double bit error is not supported */ + if (reg_value) + sdma_v4_0_get_ras_error_count(reg_value, + instance, &sec_count); + /* err_data->ce_count should be initialized to 0 + * before calling into this function */ + err_data->ce_count += sec_count; + /* double bit error is not supported + * set ue count to 0 */ + err_data->ue_count = 0; + + return 0; +}; + +static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = { + .ras_late_init = amdgpu_sdma_ras_late_init, + .ras_fini = amdgpu_sdma_ras_fini, + .query_ras_error_count = sdma_v4_0_query_ras_error_count, +}; + +static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA20: + case CHIP_ARCTURUS: + adev->sdma.funcs = &sdma_v4_0_ras_funcs; + break; + default: + break; + } +} + const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 4, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 714cf4dfd0a7..317803f6a561 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -479,62 +479,18 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) return ret; } -static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) -{ - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - - *cap = smu_baco_is_support(smu); - return 0; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { - *cap = false; - return -ENOENT; - } - - return pp_funcs->get_asic_baco_capability(pp_handle, cap); - } -} - static int soc15_asic_baco_reset(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int ret = 0; /* avoid NBIF got stuck when do RAS recovery in BACO reset */ if (ras && ras->supported) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); - dev_info(adev->dev, "GPU BACO reset\n"); - - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - int ret; - - ret = smu_baco_enter(smu); - if (ret) - return ret; - - ret = smu_baco_exit(smu); - if (ret) - return ret; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - } + ret = amdgpu_dpm_baco_reset(adev); + if (ret) + return ret; /* re-enable doorbell interrupt after BACO exit */ if (ras && ras->supported) @@ -543,17 +499,6 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) return 0; } -static int soc15_mode2_reset(struct amdgpu_device *adev) -{ - if (is_support_sw_smu(adev)) - return smu_mode2_reset(&adev->smu); - if (!adev->powerplay.pp_funcs || - !adev->powerplay.pp_funcs->asic_reset_mode_2) - return -ENOENT; - - return adev->powerplay.pp_funcs->asic_reset_mode_2(adev->powerplay.pp_handle); -} - static enum amd_reset_method soc15_asic_reset_method(struct amdgpu_device *adev) { @@ -567,11 +512,11 @@ soc15_asic_reset_method(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_ARCTURUS: - soc15_asic_get_baco_capability(adev, &baco_reset); + baco_reset = amdgpu_dpm_is_baco_supported(adev); break; case CHIP_VEGA20: if (adev->psp.sos_fw_version >= 0x80067) - soc15_asic_get_baco_capability(adev, &baco_reset); + baco_reset = amdgpu_dpm_is_baco_supported(adev); /* * 1. PMFW version > 0x284300: all cases use baco @@ -598,7 +543,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev) amdgpu_inc_vram_lost(adev); return soc15_asic_baco_reset(adev); case AMD_RESET_METHOD_MODE2: - return soc15_mode2_reset(adev); + return amdgpu_dpm_mode2_reset(adev); default: if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); @@ -608,25 +553,18 @@ static int soc15_asic_reset(struct amdgpu_device *adev) static bool soc15_supports_baco(struct amdgpu_device *adev) { - bool baco_support; - switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_ARCTURUS: - soc15_asic_get_baco_capability(adev, &baco_support); - break; + return amdgpu_dpm_is_baco_supported(adev); case CHIP_VEGA20: if (adev->psp.sos_fw_version >= 0x80067) - soc15_asic_get_baco_capability(adev, &baco_support); - else - baco_support = false; - break; + return amdgpu_dpm_is_baco_supported(adev); + return false; default: return false; } - - return baco_support; } /*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock, @@ -739,9 +677,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) } if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) - adev->df_funcs = &df_v3_6_funcs; + adev->df.funcs = &df_v3_6_funcs; else - adev->df_funcs = &df_v1_7_funcs; + adev->df.funcs = &df_v1_7_funcs; adev->rev_id = soc15_get_rev_id(adev); adev->nbio.funcs->detect_hw_virt(adev); @@ -834,8 +772,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); } else { - if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) - amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); } if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block); @@ -846,8 +783,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block); - if (is_support_sw_smu(adev)) - amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) @@ -1311,7 +1247,7 @@ static int soc15_common_sw_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_add_irq_id(adev); - adev->df_funcs->sw_init(adev); + adev->df.funcs->sw_init(adev); return 0; } @@ -1321,7 +1257,7 @@ static int soc15_common_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_nbio_ras_fini(adev); - adev->df_funcs->sw_fini(adev); + adev->df.funcs->sw_fini(adev); return 0; } @@ -1542,7 +1478,7 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE ? true : false); soc15_update_rom_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - adev->df_funcs->update_medium_grain_clock_gating(adev, + adev->df.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; case CHIP_RAVEN: @@ -1600,7 +1536,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) *flags |= AMD_CG_SUPPORT_ROM_MGCG; - adev->df_funcs->get_clockgating_state(adev, flags); + adev->df.funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 11e924dd88ff..793bf70e64b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -30,8 +30,6 @@ #include "umc/umc_6_1_1_sh_mask.h" #include "umc/umc_6_1_2_offset.h" -#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10 - #define UMC_6_INST_DIST 0x40000 /* @@ -56,12 +54,30 @@ const uint32_t {9, 25, 0, 16}, {15, 31, 6, 22} }; +static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev) +{ + WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 1); +} + static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) { WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, RSMU_UMC_INDEX_MODE_EN, 0); } +static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev) +{ + uint32_t rsmu_umc_index; + + rsmu_umc_index = RREG32_SOC15(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + + return REG_GET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN); +} + static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev, uint32_t umc_inst, uint32_t ch_inst) @@ -165,6 +181,11 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, uint32_t ch_inst = 0; uint32_t umc_reg_offset = 0; + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { umc_reg_offset = get_umc_6_reg_offset(adev, umc_inst, @@ -177,6 +198,9 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, umc_reg_offset, &(err_data->ue_count)); } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); } static void umc_v6_1_query_error_address(struct amdgpu_device *adev, @@ -186,7 +210,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, uint32_t umc_inst) { uint32_t lsb, mc_umc_status_addr; - uint64_t mc_umc_status, err_addr, retired_page; + uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; struct eeprom_table_record *err_rec; uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; @@ -194,10 +218,14 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, /* UMC 6_1_2 registers */ mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT); } else { /* UMC 6_1_1 registers */ mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0); } /* skip error address process if -ENOMEM */ @@ -214,8 +242,8 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { - err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4); + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); /* the lowest lsb bits should be ignored */ lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); @@ -255,6 +283,11 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, uint32_t ch_inst = 0; uint32_t umc_reg_offset = 0; + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { umc_reg_offset = get_umc_6_reg_offset(adev, umc_inst, @@ -267,6 +300,8 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, umc_inst); } + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); } static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, @@ -313,7 +348,10 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) uint32_t ch_inst = 0; uint32_t umc_reg_offset = 0; - umc_v6_1_disable_umc_index_mode(adev); + uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { umc_reg_offset = get_umc_6_reg_offset(adev, @@ -322,6 +360,9 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset); } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); } const struct amdgpu_umc_funcs umc_v6_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 3b025a3f8c7d..e654938f6cca 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -39,10 +39,10 @@ #include "ivsrcid/vcn/irqsrcs_vcn_1_0.h" #include "jpeg_v1_0.h" -#define mmUVD_RBC_XX_IB_REG_CHECK 0x05ab -#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 -#define mmUVD_REG_XX_MASK 0x05ac -#define mmUVD_REG_XX_MASK_BASE_IDX 1 +#define mmUVD_RBC_XX_IB_REG_CHECK_1_0 0x05ab +#define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX 1 +#define mmUVD_REG_XX_MASK_1_0 0x05ac +#define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1 static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); @@ -50,7 +50,7 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); static void vcn_v1_0_idle_work_handler(struct work_struct *work); @@ -835,9 +835,9 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) vcn_v1_0_mc_resume_spg_mode(adev); - WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK, 0x10); - WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, - RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK) | 0x3); + WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10); + WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0, + RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0) | 0x3); /* enable VCPU clock */ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK); @@ -1199,7 +1199,7 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) } static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) + int inst_idx, struct dpg_pause_state *new_state) { int ret_code; uint32_t reg_data = 0; @@ -1786,7 +1786,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work) else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, 0, &new_state); } fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec); @@ -1840,7 +1840,7 @@ void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring) else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) new_state.jpeg = VCN_DPG_STATE__PAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, 0, &new_state); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index d76ece38c97b..f4db8af6536b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -47,18 +47,13 @@ #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 -#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b -#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 -#define mmUVD_REG_XX_MASK 0x026c -#define mmUVD_REG_XX_MASK_BASE_IDX 1 - static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v2_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); + int inst_idx, struct dpg_pause_state *new_state); /** * vcn_v2_0_early_init - set function pointers @@ -356,88 +351,88 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); } offset = 0; } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); offset = size; - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); } if (!indirect) - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); else - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); /* cache window 1: stack */ if (!indirect) { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } else { - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); /* cache window 2: context */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); /* non-cache window */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } @@ -583,19 +578,19 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, UVD_CGC_CTRL__WCB_MODE_MASK | UVD_CGC_CTRL__VCPU_MODE_MASK | UVD_CGC_CTRL__SCPU_MODE_MASK); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); /* turn off clock gating */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); /* turn on SUVD clock gating */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); /* turn on sw mode in UVD_SUVD_CGC_CTRL */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); } @@ -759,7 +754,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp); if (indirect) - adev->vcn.dpg_sram_curr_addr = (uint32_t*)adev->vcn.dpg_sram_cpu_addr; + adev->vcn.inst->dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst->dpg_sram_cpu_addr; /* enable clock gating */ vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect); @@ -768,11 +763,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK; - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); /* disable master interupt */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect); /* setup mmUVD_LMI_CTRL */ @@ -784,28 +779,28 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 0x00100000L); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_CNTL), 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUXA0), ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUXB0), ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MPC_SET_MUX), ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | @@ -813,29 +808,29 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) vcn_v2_0_mc_resume_dpg_mode(adev, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); /* release VCPU reset to boot */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect); /* enable LMI MC and UMC channels */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_CTRL2), 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect); /* enable master interrupt */ - WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) - psp_update_vcn_sram(adev, 0, adev->vcn.dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.dpg_sram_curr_addr - - (uintptr_t)adev->vcn.dpg_sram_cpu_addr)); + psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr)); /* force RBC into idle state */ rb_bufsz = order_base_2(ring->ring_size); @@ -1135,7 +1130,7 @@ power_off: } static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) + int inst_idx, struct dpg_pause_state *new_state) { struct amdgpu_ring *ring; uint32_t reg_data = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 4ea8e20ed15d..c8b63d57a541 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -55,6 +55,8 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v2_5_set_powergating_state(void *handle, enum amd_powergating_state state); +static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); static int amdgpu_ih_clientid_vcns[] = { @@ -212,6 +214,9 @@ static int vcn_v2_5_sw_init(void *handle) return r; } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode; + return 0; } @@ -286,7 +291,8 @@ static int vcn_v2_5_hw_init(void *handle) done: if (!r) - DRM_INFO("VCN decode and encode initialized successfully.\n"); + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); return r; } @@ -309,7 +315,9 @@ static int vcn_v2_5_hw_fini(void *handle) continue; ring = &adev->vcn.inst[i].ring_dec; - if (RREG32_SOC15(VCN, i, mmUVD_STATUS)) + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, mmUVD_STATUS))) vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); ring->sched.ready = false; @@ -384,9 +392,9 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo)); WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi)); WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); offset = 0; } else { @@ -418,6 +426,99 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) } } +static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + /** * vcn_v2_5_disable_clock_gating - disable VCN clock gating * @@ -536,6 +637,54 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) } } +static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev, + uint8_t sram_sel, int inst_idx, uint8_t indirect) +{ + uint32_t reg_data = 0; + + /* enable sw clock gating control */ + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); + + /* turn off clock gating */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect); + + /* turn on SUVD clock gating */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); + + /* turn on sw mode in UVD_SUVD_CGC_CTRL */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); +} + /** * vcn_v2_5_enable_clock_gating - enable VCN clock gating * @@ -598,6 +747,138 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) } } +static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + struct amdgpu_ring *ring; + uint32_t rb_bufsz, tmp; + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp); + + if (indirect) + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + + /* enable clock gating */ + vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interupt */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect); + + /* setup mmUVD_LMI_CTRL */ + tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MPC_CNTL), + 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MPC_SET_MUXA0), + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MPC_SET_MUXB0), + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MPC_SET_MUX), + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); + + vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect); + + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); + + /* enable LMI MC and UMC channels */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect); + + /* unblock VCPU register access */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0( + UVD, inst_idx, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); + + ring = &adev->vcn.inst[inst_idx].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0); + + WREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2, 0); + + ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + return 0; +} + static int vcn_v2_5_start(struct amdgpu_device *adev) { struct amdgpu_ring *ring; @@ -610,6 +891,9 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -973,6 +1257,35 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table); } +static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + int ret_code = 0; + uint32_t tmp; + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2); + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return 0; +} + static int vcn_v2_5_stop(struct amdgpu_device *adev) { uint32_t tmp; @@ -981,6 +1294,12 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_5_stop_dpg_mode(adev, i); + goto power_off; + } + /* wait for vcn idle */ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); if (r) @@ -1030,12 +1349,74 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } +power_off: if (adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, false); return 0; } +static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state) +{ + struct amdgpu_ring *ring; + uint32_t reg_data = 0; + int ret_code; + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d -> %d", + adev->vcn.pause_state.fw_based, new_state->fw_based); + reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.inst[inst_idx].ring_enc[1]; + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); + + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, + 0x0, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + /** * vcn_v2_5_dec_ring_get_rptr - get read pointer * @@ -1078,6 +1459,10 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + WREG32_SOC15(UVD, ring->me, mmUVD_SCRATCH2, + lower_32_bits(ring->wptr) | 0x80000000); + if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index e4f4201b3c34..78b35901643b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -689,40 +689,6 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev) return -EINVAL; } -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) -{ - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { - *cap = false; - return -ENOENT; - } - - return pp_funcs->get_asic_baco_capability(pp_handle, cap); -} - -int smu7_asic_baco_reset(struct amdgpu_device *adev) -{ - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - - dev_info(adev->dev, "GPU BACO reset\n"); - - return 0; -} - /** * vi_asic_pci_config_reset - soft reset GPU * @@ -747,8 +713,6 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev) static bool vi_asic_supports_baco(struct amdgpu_device *adev) { - bool baco_support; - switch (adev->asic_type) { case CHIP_FIJI: case CHIP_TONGA: @@ -756,14 +720,10 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev) case CHIP_POLARIS11: case CHIP_POLARIS12: case CHIP_TOPAZ: - smu7_asic_get_baco_capability(adev, &baco_support); - break; + return amdgpu_dpm_is_baco_supported(adev); default: - baco_support = false; - break; + return false; } - - return baco_support; } static enum amd_reset_method @@ -778,7 +738,7 @@ vi_asic_reset_method(struct amdgpu_device *adev) case CHIP_POLARIS11: case CHIP_POLARIS12: case CHIP_TOPAZ: - smu7_asic_get_baco_capability(adev, &baco_reset); + baco_reset = amdgpu_dpm_is_baco_supported(adev); break; default: baco_reset = false; @@ -807,7 +767,7 @@ static int vi_asic_reset(struct amdgpu_device *adev) if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { if (!adev->in_suspend) amdgpu_inc_vram_lost(adev); - r = smu7_asic_baco_reset(adev); + r = amdgpu_dpm_baco_reset(adev); } else { r = vi_asic_pci_config_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index 40d4174913a4..defb4aaf929a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -31,7 +31,5 @@ void vi_srbm_select(struct amdgpu_device *adev, int vi_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); -int smu7_asic_baco_reset(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d7eb6ac37f62..2870553a2ce0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -934,6 +934,7 @@ static void uninitialize(struct device_queue_manager *dqm) static int start_nocpsch(struct device_queue_manager *dqm) { + pr_info("SW scheduler is used"); init_interrupts(dqm); if (dqm->dev->device_info->asic_family == CHIP_HAWAII) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 7832ec6e480b..d1d68a51bfb8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -153,6 +153,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, return r; } +static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, + queue_id, p->doorbell_off); +} + static void update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { @@ -409,7 +417,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, mqd->allocate_mqd = allocate_hiq_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd_hiq_sdma; - mqd->load_mqd = load_mqd; + mqd->load_mqd = hiq_load_mqd_kiq; mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index aa9010995eaf..436b7f518979 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -191,6 +191,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, wptr_shift, 0, mms); } +static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, + queue_id, p->doorbell_off); +} + static void update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { @@ -449,7 +457,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->allocate_mqd = allocate_hiq_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd_hiq_sdma; - mqd->load_mqd = load_mqd; + mqd->load_mqd = hiq_load_mqd_kiq; mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 536a153ac9a4..25b90f70aecd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -32,6 +32,7 @@ #include #include #include "amdgpu_amdkfd.h" +#include "amdgpu.h" struct mm_struct; @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, void kfd_flush_tlb(struct kfd_process_device *pdd) { struct kfd_dev *dev = pdd->dev; - const struct kfd2kgd_calls *f2g = dev->kfd2kgd; if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { /* Nothing to flush until a VMID is assigned, which * only happens when the first queue is created. */ if (pdd->qpd.vmid) - f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid); + amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, + pdd->qpd.vmid); } else { - f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid); + amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, + pdd->process->pasid); } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 76673c7234ed..9402374d2466 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -906,13 +906,16 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.dce_environment = DCE_ENV_PRODUCTION_DRV; - /* - * TODO debug why this doesn't work on Raven - */ - if (adev->flags & AMD_IS_APU && - adev->asic_type >= CHIP_CARRIZO && - adev->asic_type < CHIP_RAVEN) + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: + case CHIP_RAVEN: + case CHIP_RENOIR: init_data.flags.gpu_vm_support = true; + break; + default: + break; + } if (amdgpu_dc_feature_mask & DC_FBC_MASK) init_data.flags.fbc_support = true; @@ -8390,17 +8393,37 @@ static bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) { struct dc_link *link = stream->link; - struct dc_static_screen_events triggers = {0}; + unsigned int vsync_rate_hz = 0; + struct dc_static_screen_params params = {0}; + /* Calculate number of static frames before generating interrupt to + * enter PSR. + */ + unsigned int frame_time_microsec = 1000000 / vsync_rate_hz; + // Init fail safe of 2 frames static + unsigned int num_frames_static = 2; DRM_DEBUG_DRIVER("Enabling psr...\n"); - triggers.cursor_update = true; - triggers.overlay_update = true; - triggers.surface_update = true; + vsync_rate_hz = div64_u64(div64_u64(( + stream->timing.pix_clk_100hz * 100), + stream->timing.v_total), + stream->timing.h_total); - dc_stream_set_static_screen_events(link->ctx->dc, + /* Round up + * Calculate number of frames such that at least 30 ms of time has + * passed. + */ + if (vsync_rate_hz != 0) + num_frames_static = (30000 / frame_time_microsec) + 1; + + params.triggers.cursor_update = true; + params.triggers.overlay_update = true; + params.triggers.surface_update = true; + params.num_frames = num_frames_static; + + dc_stream_set_static_screen_params(link->ctx->dc, &stream, 1, - &triggers); + ¶ms); return dc_link_set_psr_allow_active(link, true, false); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 52fb207393ef..96b391e4b3e7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -534,7 +534,7 @@ static int kbps_to_peak_pbn(int kbps) peak_kbps *= 1006; peak_kbps = div_u64(peak_kbps, 1000); - return (int) DIV_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); + return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *params, diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index e6c22345f0ea..a27d84ca15a5 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -705,8 +705,8 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v, unsigned int get_highest_allowed_voltage_level(uint32_t hw_internal_rev) { - /* for dali, the highest voltage level we want is 0 */ - if (ASICREV_IS_DALI(hw_internal_rev)) + /* for dali & pollock, the highest voltage level we want is 0 */ + if (ASICREV_IS_POLLOCK(hw_internal_rev) || ASICREV_IS_DALI(hw_internal_rev)) return 0; /* we are ok with all levels */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 6d60ef822619..a78e5c74c79c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -134,13 +134,13 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p #if defined(CONFIG_DRM_AMD_DC_DCN) case FAMILY_RV: - if (ASICREV_IS_DALI(asic_id.hw_internal_rev)) { + if (ASICREV_IS_DALI(asic_id.hw_internal_rev) || + ASICREV_IS_POLLOCK(asic_id.hw_internal_rev)) { /* TEMP: this check has to come before ASICREV_IS_RENOIR */ - /* which also incorrectly returns true for Dali */ + /* which also incorrectly returns true for Dali/Pollock*/ rv2_clk_mgr_construct(ctx, clk_mgr, pp_smu); break; } - if (ASICREV_IS_RENOIR(asic_id.hw_internal_rev)) { rn_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); break; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index ffed7207c099..7ae4c06232dd 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -59,14 +59,16 @@ int rn_get_active_display_cnt_wa( struct dc_state *context) { int i, display_count; - bool hdmi_present = false; + bool tmds_present = false; display_count = 0; for (i = 0; i < context->stream_count; i++) { const struct dc_stream_state *stream = context->streams[i]; - if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) - hdmi_present = true; + if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A || + stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK || + stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) + tmds_present = true; } for (i = 0; i < dc->link_count; i++) { @@ -85,7 +87,7 @@ int rn_get_active_display_cnt_wa( } /* WA for hang on HDMI after display off back back on*/ - if (display_count == 0 && hdmi_present) + if (display_count == 0 && tmds_present) display_count = 1; return display_count; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 3d89904003f0..6c797fac189d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -287,7 +287,6 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->stream == stream && pipe->stream_res.tg) { - pipe->stream->adjust = *adjust; dc->hwss.set_drr(&pipe, 1, adjust->v_total_min, @@ -511,10 +510,10 @@ bool dc_stream_program_csc_matrix(struct dc *dc, struct dc_stream_state *stream) return ret; } -void dc_stream_set_static_screen_events(struct dc *dc, +void dc_stream_set_static_screen_params(struct dc *dc, struct dc_stream_state **streams, int num_streams, - const struct dc_static_screen_events *events) + const struct dc_static_screen_params *params) { int i = 0; int j = 0; @@ -533,7 +532,7 @@ void dc_stream_set_static_screen_events(struct dc *dc, } } - dc->hwss.set_static_screen_control(pipes_affected, num_pipes_affected, events); + dc->hwss.set_static_screen_control(pipes_affected, num_pipes_affected, params); } static void dc_destruct(struct dc *dc) @@ -1319,6 +1318,12 @@ bool dc_commit_state(struct dc *dc, struct dc_state *context) return (result == DC_OK); } +bool dc_is_hw_initialized(struct dc *dc) +{ + struct dc_bios *dcb = dc->ctx->dc_bios; + return dcb->funcs->is_accelerated_mode(dcb); +} + bool dc_post_update_surfaces_to_stream(struct dc *dc) { int i; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index cef8c1ba9797..260c0b62d37d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -45,6 +45,7 @@ #include "dpcd_defs.h" #include "dmcu.h" #include "hw/clk_mgr.h" +#include "../dce/dmub_psr.h" #define DC_LOGGER_INIT(logger) @@ -817,8 +818,8 @@ static bool dc_link_detect_helper(struct dc_link *link, } case SIGNAL_TYPE_EDP: { - read_current_link_settings_on_detect(link); detect_edp_sink_caps(link); + read_current_link_settings_on_detect(link); sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C_OVER_AUX; sink_caps.signal = SIGNAL_TYPE_EDP; break; @@ -2404,10 +2405,11 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, bool allow_active, bool { struct dc *dc = link->ctx->dc; struct dmcu *dmcu = dc->res_pool->dmcu; + struct dmub_psr *psr = dc->res_pool->psr; - - - if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_feature_enabled) + if ((psr != NULL) && link->psr_feature_enabled) + psr->funcs->set_psr_enable(psr, allow_active); + else if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_feature_enabled) dmcu->funcs->set_psr_enable(dmcu, allow_active, wait); link->psr_allow_active = allow_active; @@ -2419,8 +2421,11 @@ bool dc_link_get_psr_state(const struct dc_link *link, uint32_t *psr_state) { struct dc *dc = link->ctx->dc; struct dmcu *dmcu = dc->res_pool->dmcu; + struct dmub_psr *psr = dc->res_pool->psr; - if (dmcu != NULL && link->psr_feature_enabled) + if (psr != NULL && link->psr_feature_enabled) + psr->funcs->get_psr_state(psr_state); + else if (dmcu != NULL && link->psr_feature_enabled) dmcu->funcs->get_psr_state(dmcu, psr_state); return true; @@ -2467,6 +2472,7 @@ bool dc_link_setup_psr(struct dc_link *link, { struct dc *dc; struct dmcu *dmcu; + struct dmub_psr *psr; int i; /* updateSinkPsrDpcdConfig*/ union dpcd_psr_configuration psr_configuration; @@ -2478,8 +2484,9 @@ bool dc_link_setup_psr(struct dc_link *link, dc = link->ctx->dc; dmcu = dc->res_pool->dmcu; + psr = dc->res_pool->psr; - if (!dmcu) + if (!dmcu && !psr) return false; @@ -2535,7 +2542,7 @@ bool dc_link_setup_psr(struct dc_link *link, transmitter_to_phy_id(link->link_enc->transmitter); psr_context->crtcTimingVerticalTotal = stream->timing.v_total; - psr_context->vsyncRateHz = div64_u64(div64_u64((stream-> + psr_context->vsync_rate_hz = div64_u64(div64_u64((stream-> timing.pix_clk_100hz * 100), stream->timing.v_total), stream->timing.h_total); @@ -2588,7 +2595,10 @@ bool dc_link_setup_psr(struct dc_link *link, */ psr_context->frame_delay = 0; - link->psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context); + if (psr) + link->psr_feature_enabled = psr->funcs->setup_psr(psr, link, psr_context); + else + link->psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context); /* psr_enabled == 0 indicates setup_psr did not succeed, but this * should not happen since firmware should be running at this point @@ -2863,6 +2873,39 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) return DC_OK; } + +enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link) +{ + int i; + struct pipe_ctx *pipe_ctx; + + // Clear all of MST payload then reallocate + for (i = 0; i < MAX_PIPES; i++) { + pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link && + pipe_ctx->stream->dpms_off == false && + pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + deallocate_mst_payload(pipe_ctx); + } + } + + for (i = 0; i < MAX_PIPES; i++) { + pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link && + pipe_ctx->stream->dpms_off == false && + pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + /* enable/disable PHY will clear connection between BE and FE + * need to restore it. + */ + link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc, + pipe_ctx->stream_res.stream_enc->id, true); + dc_link_allocate_mst_payload(pipe_ctx); + } + } + + return DC_OK; +} + #if defined(CONFIG_DRM_AMD_DC_HDCP) static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off) { @@ -3361,3 +3404,10 @@ const struct dc_link_settings *dc_link_get_link_cap( return &link->preferred_link_setting; return &link->verified_link_cap; } + +void dc_link_overwrite_extended_receiver_cap( + struct dc_link *link) +{ + dp_overwrite_extended_receiver_cap(link); +} + diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 38b0f4347383..6ab298c65247 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1217,24 +1217,33 @@ static void configure_lttpr_mode(struct dc_link *link) uint8_t repeater_cnt; uint32_t aux_interval_address; uint8_t repeater_id; + enum dc_status result = DC_ERROR_UNEXPECTED; uint8_t repeater_mode = DP_PHY_REPEATER_MODE_TRANSPARENT; DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Transparent Mode\n", __func__); - core_link_write_dpcd(link, + result = core_link_write_dpcd(link, DP_PHY_REPEATER_MODE, (uint8_t *)&repeater_mode, sizeof(repeater_mode)); + if (result == DC_OK) { + link->dpcd_caps.lttpr_caps.mode = repeater_mode; + } + if (!link->is_lttpr_mode_transparent) { DC_LOG_HW_LINK_TRAINING("%s\n Set LTTPR to Non Transparent Mode\n", __func__); repeater_mode = DP_PHY_REPEATER_MODE_NON_TRANSPARENT; - core_link_write_dpcd(link, + result = core_link_write_dpcd(link, DP_PHY_REPEATER_MODE, (uint8_t *)&repeater_mode, sizeof(repeater_mode)); + if (result == DC_OK) { + link->dpcd_caps.lttpr_caps.mode = repeater_mode; + } + repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); for (repeater_id = repeater_cnt; repeater_id > 0; repeater_id--) { aux_interval_address = DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 + @@ -1883,6 +1892,16 @@ bool dp_verify_link_cap( /* disable PHY done possible by BIOS, will be done by driver itself */ dp_disable_link_phy(link, link->connector_signal); + /* Temporary Renoir-specific workaround for SWDEV-215184; + * PHY will sometimes be in bad state on hotplugging display from certain USB-C dongle, + * so add extra cycle of enabling and disabling the PHY before first link training. + */ + if (link->link_enc->features.flags.bits.DP_IS_USB_C && + link->dc->debug.usbc_combo_phy_reset_wa) { + dp_enable_link_phy(link, link->connector_signal, dp_cs_id, cur); + dp_disable_link_phy(link, link->connector_signal); + } + dp_cs_id = get_clock_source_id(link); /* link training starts with the maximum common settings @@ -2876,18 +2895,14 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd return false; previous_link_settings = link->cur_link_settings; - dp_disable_link_phy(link, pipe_ctx->stream->signal); perform_link_training_with_retries(&previous_link_settings, true, LINK_TRAINING_ATTEMPTS, pipe_ctx, pipe_ctx->stream->signal); - if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link && - pipe_ctx->stream->dpms_off == false && - pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { - dc_link_allocate_mst_payload(pipe_ctx); - } + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) + dc_link_reallocate_mst_payload(link); status = false; if (out_link_loss) @@ -3269,7 +3284,7 @@ static bool retrieve_link_cap(struct dc_link *link) dpcd_data[DP_TRAINING_AUX_RD_INTERVAL]; link->dpcd_caps.ext_receiver_cap_field_present = - aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1 ? true:false; + aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1; if (aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1) { uint8_t ext_cap_data[16]; @@ -3428,6 +3443,68 @@ static bool retrieve_link_cap(struct dc_link *link) return true; } +bool dp_overwrite_extended_receiver_cap(struct dc_link *link) +{ + uint8_t dpcd_data[16]; + uint32_t read_dpcd_retry_cnt = 3; + enum dc_status status = DC_ERROR_UNEXPECTED; + union dp_downstream_port_present ds_port = { 0 }; + union down_stream_port_count down_strm_port_count; + union edp_configuration_cap edp_config_cap; + + int i; + + for (i = 0; i < read_dpcd_retry_cnt; i++) { + status = core_link_read_dpcd( + link, + DP_DPCD_REV, + dpcd_data, + sizeof(dpcd_data)); + if (status == DC_OK) + break; + } + + link->dpcd_caps.dpcd_rev.raw = + dpcd_data[DP_DPCD_REV - DP_DPCD_REV]; + + if (dpcd_data[DP_MAX_LANE_COUNT - DP_DPCD_REV] == 0) + return false; + + ds_port.byte = dpcd_data[DP_DOWNSTREAMPORT_PRESENT - + DP_DPCD_REV]; + + get_active_converter_info(ds_port.byte, link); + + down_strm_port_count.raw = dpcd_data[DP_DOWN_STREAM_PORT_COUNT - + DP_DPCD_REV]; + + link->dpcd_caps.allow_invalid_MSA_timing_param = + down_strm_port_count.bits.IGNORE_MSA_TIMING_PARAM; + + link->dpcd_caps.max_ln_count.raw = dpcd_data[ + DP_MAX_LANE_COUNT - DP_DPCD_REV]; + + link->dpcd_caps.max_down_spread.raw = dpcd_data[ + DP_MAX_DOWNSPREAD - DP_DPCD_REV]; + + link->reported_link_cap.lane_count = + link->dpcd_caps.max_ln_count.bits.MAX_LANE_COUNT; + link->reported_link_cap.link_rate = dpcd_data[ + DP_MAX_LINK_RATE - DP_DPCD_REV]; + link->reported_link_cap.link_spread = + link->dpcd_caps.max_down_spread.bits.MAX_DOWN_SPREAD ? + LINK_SPREAD_05_DOWNSPREAD_30KHZ : LINK_SPREAD_DISABLED; + + edp_config_cap.raw = dpcd_data[ + DP_EDP_CONFIGURATION_CAP - DP_DPCD_REV]; + link->dpcd_caps.panel_mode_edp = + edp_config_cap.bits.ALT_SCRAMBLER_RESET; + link->dpcd_caps.dpcd_display_control_capable = + edp_config_cap.bits.DPCD_DISPLAY_CONTROL_CAPABLE; + + return true; +} + bool detect_dp_sink_caps(struct dc_link *link) { return retrieve_link_cap(link); @@ -3603,6 +3680,7 @@ static void set_crtc_test_pattern(struct dc_link *link, struct pipe_ctx *odm_pipe; enum controller_dp_color_space controller_color_space; int opp_cnt = 1; + uint8_t count = 0; switch (test_pattern_color_space) { case DP_TEST_PATTERN_COLOR_SPACE_RGB: @@ -3646,6 +3724,12 @@ static void set_crtc_test_pattern(struct dc_link *link, NULL, width, height); + /* wait for dpg to blank pixel data with test pattern */ + for (count = 0; count < 1000; count++) + if (opp->funcs->dpg_is_blanked(opp)) + break; + else + udelay(100); } } break; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 64a0e08fd019..a0eb9e533a61 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2066,6 +2066,13 @@ void dc_resource_state_construct( dst_ctx->clk_mgr = dc->clk_mgr; } + +bool dc_resource_is_dsc_encoding_supported(const struct dc *dc) +{ + return dc->res_pool->res_cap->num_dsc > 0; +} + + /** * dc_validate_global_state() - Determine if HW can support a given state * Checks HW resource availability and bandwidth requirement. @@ -2897,6 +2904,3 @@ void get_audio_check(struct audio_info *aud_modes, } } - - - diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 039004344dc6..3fa85a54360f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.64" +#define DC_VER "3.2.68" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -157,11 +157,14 @@ struct dc_surface_dcc_cap { bool const_color_support; }; -struct dc_static_screen_events { - bool force_trigger; - bool cursor_update; - bool surface_update; - bool overlay_update; +struct dc_static_screen_params { + struct { + bool force_trigger; + bool cursor_update; + bool surface_update; + bool overlay_update; + } triggers; + unsigned int num_frames; }; @@ -420,6 +423,8 @@ struct dc_debug_options { bool nv12_iflip_vm_wa; bool disable_dram_clock_change_vactive_support; bool validate_dml_output; + bool enable_dmcub_surface_flip; + bool usbc_combo_phy_reset_wa; }; struct dc_debug_data { @@ -910,6 +915,8 @@ void dc_resource_state_copy_construct_current( void dc_resource_state_destruct(struct dc_state *context); +bool dc_resource_is_dsc_encoding_supported(const struct dc *dc); + /* * TODO update to make it about validation sets * Set up streams and links associated to drive sinks @@ -1067,6 +1074,7 @@ unsigned int dc_get_current_backlight_pwm(struct dc *dc); unsigned int dc_get_target_backlight_pwm(struct dc *dc); bool dc_is_dmcu_initialized(struct dc *dc); +bool dc_is_hw_initialized(struct dc *dc); enum dc_status dc_set_clock(struct dc *dc, enum dc_clock_type clock_type, uint32_t clk_khz, uint32_t stepping); void dc_get_clock(struct dc *dc, enum dc_clock_type clock_type, struct dc_clock_config *clock_cfg); diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 02a63e9cb62f..737048d8a96c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -552,6 +552,36 @@ uint32_t generic_read_indirect_reg(const struct dc_context *ctx, return value; } +uint32_t generic_indirect_reg_get(const struct dc_context *ctx, + uint32_t addr_index, uint32_t addr_data, + uint32_t index, int n, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + ...) +{ + uint32_t shift, mask, *field_value; + uint32_t value = 0; + int i = 1; + + va_list ap; + + va_start(ap, field_value1); + + value = generic_read_indirect_reg(ctx, addr_index, addr_data, index); + *field_value1 = get_reg_field_value_ex(value, mask1, shift1); + + while (i < n) { + shift = va_arg(ap, uint32_t); + mask = va_arg(ap, uint32_t); + field_value = va_arg(ap, uint32_t *); + + *field_value = get_reg_field_value_ex(value, mask, shift); + i++; + } + + va_end(ap); + + return value; +} uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx, uint32_t addr_index, uint32_t addr_data, diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index f420aeac7fbd..d25603128394 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -205,6 +205,7 @@ enum dc_detect_reason { bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason); bool dc_link_get_hpd_state(struct dc_link *dc_link); enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx); +enum dc_status dc_link_reallocate_mst_payload(struct dc_link *link); /* Notify DC about DP RX Interrupt (aka Short Pulse Interrupt). * Return: @@ -301,6 +302,9 @@ uint32_t dc_link_bandwidth_kbps( const struct dc_link_settings *dc_link_get_link_cap( const struct dc_link *link); +void dc_link_overwrite_extended_receiver_cap( + struct dc_link *link); + bool dc_submit_i2c( struct dc *dc, uint32_t link_index, diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 37c10dbf269e..92096de79dec 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -439,10 +439,10 @@ bool dc_stream_get_crc(struct dc *dc, uint32_t *g_y, uint32_t *b_cb); -void dc_stream_set_static_screen_events(struct dc *dc, +void dc_stream_set_static_screen_params(struct dc *dc, struct dc_stream_state **stream, int num_streams, - const struct dc_static_screen_events *events); + const struct dc_static_screen_params *params); void dc_stream_set_dyn_expansion(struct dc *dc, struct dc_stream_state *stream, enum dc_dynamic_expansion option); diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index b1a372c8df83..e59532d98cb4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -229,6 +229,7 @@ struct dc_panel_patch { unsigned int extra_t12_ms; unsigned int extra_delay_backlight_off; unsigned int extra_t7_ms; + unsigned int manage_secondary_link; }; struct dc_edid_caps { @@ -728,7 +729,7 @@ struct psr_context { /* The VSync rate in Hz used to calculate the * step size for smooth brightness feature */ - unsigned int vsyncRateHz; + unsigned int vsync_rate_hz; unsigned int skipPsrWaitForPllLock; unsigned int numberOfControllers; /* Unused, for future use. To indicate that first changed frame from diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c index e619e67e6b51..30d953acd016 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c @@ -537,9 +537,6 @@ static void dcn10_dmcu_set_psr_enable(struct dmcu *dmcu, bool enable, bool wait) if (dmcu->dmcu_state != DMCU_RUNNING) return; - dcn10_get_dmcu_psr_state(dmcu, &psr_state); - if (psr_state == 0 && !enable) - return; /* waitDMCUReadyForCmd */ REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, dmcu_wait_reg_ready_interval, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c new file mode 100644 index 000000000000..225955ec6d39 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -0,0 +1,220 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dmub_psr.h" +#include "dc.h" +#include "dc_dmub_srv.h" +#include "../../dmub/inc/dmub_srv.h" +#include "dmub_fw_state.h" +#include "core_types.h" +#include "ipp.h" + +#define MAX_PIPES 6 + +/** + * Get PSR state from firmware. + */ +static void dmub_get_psr_state(uint32_t *psr_state) +{ + // Not yet implemented + // Trigger GPINT interrupt from firmware +} + +/** + * Enable/Disable PSR. + */ +static void dmub_set_psr_enable(struct dmub_psr *dmub, bool enable) +{ + union dmub_rb_cmd cmd; + struct dc_context *dc = dmub->ctx; + + cmd.psr_enable.header.type = DMUB_CMD__PSR; + + if (enable) + cmd.psr_enable.header.sub_type = DMUB_CMD__PSR_ENABLE; + else + cmd.psr_enable.header.sub_type = DMUB_CMD__PSR_DISABLE; + + cmd.psr_enable.header.payload_bytes = 0; // Send header only + + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_enable.header); + dc_dmub_srv_cmd_execute(dc->dmub_srv); + dc_dmub_srv_wait_idle(dc->dmub_srv); +} + +/** + * Set PSR level. + */ +static void dmub_set_psr_level(struct dmub_psr *dmub, uint16_t psr_level) +{ + union dmub_rb_cmd cmd; + uint32_t psr_state = 0; + struct dc_context *dc = dmub->ctx; + + dmub_get_psr_state(&psr_state); + + if (psr_state == 0) + return; + + cmd.psr_set_level.header.type = DMUB_CMD__PSR; + cmd.psr_set_level.header.sub_type = DMUB_CMD__PSR_SET_LEVEL; + cmd.psr_set_level.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_level_data); + cmd.psr_set_level.psr_set_level_data.psr_level = psr_level; + + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_set_level.header); + dc_dmub_srv_cmd_execute(dc->dmub_srv); + dc_dmub_srv_wait_idle(dc->dmub_srv); +} + +/** + * Setup PSR by programming phy registers and sending psr hw context values to firmware. + */ +static bool dmub_setup_psr(struct dmub_psr *dmub, + struct dc_link *link, + struct psr_context *psr_context) +{ + union dmub_rb_cmd cmd; + struct dc_context *dc = dmub->ctx; + struct dmub_cmd_psr_copy_settings_data *copy_settings_data + = &cmd.psr_copy_settings.psr_copy_settings_data; + struct pipe_ctx *pipe_ctx = NULL; + struct resource_context *res_ctx = &link->ctx->dc->current_state->res_ctx; + + for (int i = 0; i < MAX_PIPES; i++) { + if (res_ctx && + res_ctx->pipe_ctx[i].stream && + res_ctx->pipe_ctx[i].stream->link && + res_ctx->pipe_ctx[i].stream->link == link && + res_ctx->pipe_ctx[i].stream->link->connector_signal == SIGNAL_TYPE_EDP) { + pipe_ctx = &res_ctx->pipe_ctx[i]; + break; + } + } + + if (!pipe_ctx || + !&pipe_ctx->plane_res || + !&pipe_ctx->stream_res) + return false; + + // Program DP DPHY fast training registers + link->link_enc->funcs->psr_program_dp_dphy_fast_training(link->link_enc, + psr_context->psrExitLinkTrainingRequired); + + // Program DP_SEC_CNTL1 register to set transmission GPS0 line num and priority to high + link->link_enc->funcs->psr_program_secondary_packet(link->link_enc, + psr_context->sdpTransmitLineNumDeadline); + + cmd.psr_copy_settings.header.type = DMUB_CMD__PSR; + cmd.psr_copy_settings.header.sub_type = DMUB_CMD__PSR_COPY_SETTINGS; + cmd.psr_copy_settings.header.payload_bytes = sizeof(struct dmub_cmd_psr_copy_settings_data); + + // Hw insts + copy_settings_data->dpphy_inst = psr_context->phyType; + copy_settings_data->aux_inst = psr_context->channel; + copy_settings_data->digfe_inst = psr_context->engineId; + copy_settings_data->digbe_inst = psr_context->transmitterId; + + copy_settings_data->mpcc_inst = pipe_ctx->plane_res.mpcc_inst; + + if (pipe_ctx->plane_res.hubp) + copy_settings_data->hubp_inst = pipe_ctx->plane_res.hubp->inst; + else + copy_settings_data->hubp_inst = 0; + if (pipe_ctx->plane_res.dpp) + copy_settings_data->dpp_inst = pipe_ctx->plane_res.dpp->inst; + else + copy_settings_data->dpp_inst = 0; + if (pipe_ctx->stream_res.opp) + copy_settings_data->opp_inst = pipe_ctx->stream_res.opp->inst; + else + copy_settings_data->opp_inst = 0; + if (pipe_ctx->stream_res.tg) + copy_settings_data->otg_inst = pipe_ctx->stream_res.tg->inst; + else + copy_settings_data->otg_inst = 0; + + // Misc + copy_settings_data->psr_level = psr_context->psr_level.u32all; + copy_settings_data->hyst_frames = psr_context->timehyst_frames; + copy_settings_data->hyst_lines = psr_context->hyst_lines; + copy_settings_data->phy_type = psr_context->phyType; + copy_settings_data->aux_repeat = psr_context->aux_repeats; + copy_settings_data->smu_optimizations_en = psr_context->allow_smu_optimizations; + copy_settings_data->skip_wait_for_pll_lock = psr_context->skipPsrWaitForPllLock; + copy_settings_data->frame_delay = psr_context->frame_delay; + copy_settings_data->smu_phy_id = psr_context->smuPhyId; + copy_settings_data->num_of_controllers = psr_context->numberOfControllers; + copy_settings_data->frame_cap_ind = psr_context->psrFrameCaptureIndicationReq; + copy_settings_data->phy_num = psr_context->frame_delay & 0x7; + copy_settings_data->link_rate = psr_context->frame_delay & 0xF; + + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_copy_settings.header); + dc_dmub_srv_cmd_execute(dc->dmub_srv); + dc_dmub_srv_wait_idle(dc->dmub_srv); + + return true; +} + +static const struct dmub_psr_funcs psr_funcs = { + .set_psr_enable = dmub_set_psr_enable, + .setup_psr = dmub_setup_psr, + .get_psr_state = dmub_get_psr_state, + .set_psr_level = dmub_set_psr_level, +}; + +/** + * Construct PSR object. + */ +static void dmub_psr_construct(struct dmub_psr *psr, struct dc_context *ctx) +{ + psr->ctx = ctx; + psr->funcs = &psr_funcs; +} + +/** + * Allocate and initialize PSR object. + */ +struct dmub_psr *dmub_psr_create(struct dc_context *ctx) +{ + struct dmub_psr *psr = kzalloc(sizeof(struct dmub_psr), GFP_KERNEL); + + if (psr == NULL) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dmub_psr_construct(psr, ctx); + + return psr; +} + +/** + * Deallocate PSR object. + */ +void dmub_psr_destroy(struct dmub_psr **dmub) +{ + kfree(dmub); + *dmub = NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h new file mode 100644 index 000000000000..229958de3035 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h @@ -0,0 +1,47 @@ +/* + * Copyright 2012-16 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DMUB_PSR_H_ +#define _DMUB_PSR_H_ + +#include "os_types.h" + +struct dmub_psr { + struct dc_context *ctx; + const struct dmub_psr_funcs *funcs; +}; + +struct dmub_psr_funcs { + void (*set_psr_enable)(struct dmub_psr *dmub, bool enable); + bool (*setup_psr)(struct dmub_psr *dmub, struct dc_link *link, struct psr_context *psr_context); + void (*get_psr_state)(uint32_t *psr_state); + void (*set_psr_level)(struct dmub_psr *dmub, uint16_t psr_level); +}; + +struct dmub_psr *dmub_psr_create(struct dc_context *ctx); +void dmub_psr_destroy(struct dmub_psr **dmub); + + +#endif /* _DCE_DMUB_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 4939cf3b316f..5b689273ff44 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1373,9 +1373,13 @@ static enum dc_status apply_single_controller_ctx_to_hw( // DRR should set trigger event to monitor surface update event if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) event_triggers = 0x80; + /* Event triggers and num frames initialized for DRR, but can be + * later updated for PSR use. Note DRR trigger events are generated + * regardless of whether num frames met. + */ if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control) pipe_ctx->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx->stream_res.tg, event_triggers); + pipe_ctx->stream_res.tg, event_triggers, 2); if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg( @@ -1706,6 +1710,8 @@ static void set_drr(struct pipe_ctx **pipe_ctx, struct drr_params params = {0}; // DRR should set trigger event to monitor surface update event unsigned int event_triggers = 0x80; + // Note DRR trigger events are generated regardless of whether num frames met. + unsigned int num_frames = 2; params.vertical_total_max = vmax; params.vertical_total_min = vmin; @@ -1721,7 +1727,7 @@ static void set_drr(struct pipe_ctx **pipe_ctx, if (vmax != 0 && vmin != 0) pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( pipe_ctx[i]->stream_res.tg, - event_triggers); + event_triggers, num_frames); } } @@ -1738,30 +1744,31 @@ static void get_position(struct pipe_ctx **pipe_ctx, } static void set_static_screen_control(struct pipe_ctx **pipe_ctx, - int num_pipes, const struct dc_static_screen_events *events) + int num_pipes, const struct dc_static_screen_params *params) { unsigned int i; - unsigned int value = 0; + unsigned int triggers = 0; - if (events->overlay_update) - value |= 0x100; - if (events->surface_update) - value |= 0x80; - if (events->cursor_update) - value |= 0x2; - if (events->force_trigger) - value |= 0x1; + if (params->triggers.overlay_update) + triggers |= 0x100; + if (params->triggers.surface_update) + triggers |= 0x80; + if (params->triggers.cursor_update) + triggers |= 0x2; + if (params->triggers.force_trigger) + triggers |= 0x1; if (num_pipes) { struct dc *dc = pipe_ctx[0]->stream->ctx->dc; if (dc->fbc_compressor) - value |= 0x84; + triggers |= 0x84; } for (i = 0; i < num_pipes; i++) pipe_ctx[i]->stream_res.tg->funcs-> - set_static_screen_control(pipe_ctx[i]->stream_res.tg, value); + set_static_screen_control(pipe_ctx[i]->stream_res.tg, + triggers, params->num_frames); } /* diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 5f7c2c5641c4..1ea7db8eeb98 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -469,22 +469,27 @@ void dce110_timing_generator_set_drr( void dce110_timing_generator_set_static_screen_control( struct timing_generator *tg, - uint32_t value) + uint32_t event_triggers, + uint32_t num_frames) { struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); uint32_t static_screen_cntl = 0; uint32_t addr = 0; + // By register spec, it only takes 8 bit value + if (num_frames > 0xFF) + num_frames = 0xFF; + addr = CRTC_REG(mmCRTC_STATIC_SCREEN_CONTROL); static_screen_cntl = dm_read_reg(tg->ctx, addr); set_reg_field_value(static_screen_cntl, - value, + event_triggers, CRTC_STATIC_SCREEN_CONTROL, CRTC_STATIC_SCREEN_EVENT_MASK); set_reg_field_value(static_screen_cntl, - 2, + num_frames, CRTC_STATIC_SCREEN_CONTROL, CRTC_STATIC_SCREEN_FRAME_COUNT); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index 768ccf27ada9..d8a5ed7b485d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -231,7 +231,8 @@ void dce110_timing_generator_set_drr( void dce110_timing_generator_set_static_screen_control( struct timing_generator *tg, - uint32_t value); + uint32_t event_triggers, + uint32_t num_frames); void dce110_timing_generator_get_crtc_scanoutpos( struct timing_generator *tg, diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index 098e56962f2a..82bc4e192bbf 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -819,13 +819,18 @@ void dce120_tg_set_colors(struct timing_generator *tg, static void dce120_timing_generator_set_static_screen_control( struct timing_generator *tg, - uint32_t value) + uint32_t event_triggers, + uint32_t num_frames) { struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + // By register spec, it only takes 8 bit value + if (num_frames > 0xFF) + num_frames = 0xFF; + CRTC_REG_UPDATE_2(CRTC0_CRTC_STATIC_SCREEN_CONTROL, - CRTC_STATIC_SCREEN_EVENT_MASK, value, - CRTC_STATIC_SCREEN_FRAME_COUNT, 2); + CRTC_STATIC_SCREEN_EVENT_MASK, event_triggers, + CRTC_STATIC_SCREEN_FRAME_COUNT, num_frames); } void dce120_timing_generator_set_test_pattern( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c index 935c892622a0..4d3f7d5e1473 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c @@ -88,26 +88,6 @@ enum dscl_mode_sel { DSCL_MODE_DSCL_BYPASS = 6 }; -static const struct dpp_input_csc_matrix dpp_input_csc_matrix[] = { - {COLOR_SPACE_SRGB, - {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, - {COLOR_SPACE_SRGB_LIMITED, - {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, - {COLOR_SPACE_YCBCR601, - {0x2cdd, 0x2000, 0, 0xe991, 0xe926, 0x2000, 0xf4fd, 0x10ef, - 0, 0x2000, 0x38b4, 0xe3a6} }, - {COLOR_SPACE_YCBCR601_LIMITED, - {0x3353, 0x2568, 0, 0xe400, 0xe5dc, 0x2568, 0xf367, 0x1108, - 0, 0x2568, 0x40de, 0xdd3a} }, - {COLOR_SPACE_YCBCR709, - {0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0, - 0x2000, 0x3b61, 0xe24f} }, - - {COLOR_SPACE_YCBCR709_LIMITED, - {0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0, - 0x2568, 0x43ee, 0xdbb2} } -}; - static void program_gamut_remap( struct dcn10_dpp *dpp, const uint16_t *regval, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index a02c10e23e0d..f36a0d8cedfe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -930,6 +930,9 @@ static bool hubbub1_get_dcc_compression_cap(struct hubbub *hubbub, output->grph.rgb.max_compressed_blk_size = 64; output->grph.rgb.independent_64b_blks = true; break; + default: + ASSERT(false); + break; } output->capable = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 2baff3cd0ae5..f2127afb37b2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1188,8 +1188,14 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) if (can_apply_seamless_boot && pipe_ctx->stream != NULL && pipe_ctx->stream_res.tg->funcs->is_tg_enabled( - pipe_ctx->stream_res.tg)) + pipe_ctx->stream_res.tg)) { + // Enable double buffering for OTG_BLANK no matter if + // seamless boot is enabled or not to suppress global sync + // signals when OTG blanked. This is to prevent pipe from + // requesting data while in PSR. + tg->funcs->tg_init(tg); continue; + } /* Disable on the current state so the new one isn't cleared. */ pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; @@ -2704,6 +2710,8 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, struct drr_params params = {0}; // DRR set trigger event mapped to OTG_TRIG_A (bit 11) for manual control flow unsigned int event_triggers = 0x800; + // Note DRR trigger events are generated regardless of whether num frames met. + unsigned int num_frames = 2; params.vertical_total_max = vmax; params.vertical_total_min = vmin; @@ -2720,7 +2728,7 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, if (vmax != 0 && vmin != 0) pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( pipe_ctx[i]->stream_res.tg, - event_triggers); + event_triggers, num_frames); } } @@ -2737,21 +2745,22 @@ void dcn10_get_position(struct pipe_ctx **pipe_ctx, } void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx, - int num_pipes, const struct dc_static_screen_events *events) + int num_pipes, const struct dc_static_screen_params *params) { unsigned int i; - unsigned int value = 0; + unsigned int triggers = 0; - if (events->surface_update) - value |= 0x80; - if (events->cursor_update) - value |= 0x2; - if (events->force_trigger) - value |= 0x1; + if (params->triggers.surface_update) + triggers |= 0x80; + if (params->triggers.cursor_update) + triggers |= 0x2; + if (params->triggers.force_trigger) + triggers |= 0x1; for (i = 0; i < num_pipes; i++) pipe_ctx[i]->stream_res.tg->funcs-> - set_static_screen_control(pipe_ctx[i]->stream_res.tg, value); + set_static_screen_control(pipe_ctx[i]->stream_res.tg, + triggers, params->num_frames); } static void dcn10_config_stereo_parameters( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index 55b8f3b2fc4e..4d20f6586bb5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -132,7 +132,7 @@ void dcn10_get_position(struct pipe_ctx **pipe_ctx, int num_pipes, struct crtc_position *position); void dcn10_set_static_screen_control(struct pipe_ctx **pipe_ctx, - int num_pipes, const struct dc_static_screen_events *events); + int num_pipes, const struct dc_static_screen_params *params); void dcn10_setup_stereo(struct pipe_ctx *pipe_ctx, struct dc *dc); void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable); void dcn10_log_hw_state(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index cd7412dc42d1..a9a43b397db9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -789,21 +789,26 @@ void optc1_set_early_control( void optc1_set_static_screen_control( struct timing_generator *optc, - uint32_t value) + uint32_t event_triggers, + uint32_t num_frames) { struct optc *optc1 = DCN10TG_FROM_TG(optc); + // By register spec, it only takes 8 bit value + if (num_frames > 0xFF) + num_frames = 0xFF; + /* Bit 8 is no longer applicable in RV for PSR case, * set bit 8 to 0 if given */ - if ((value & STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN) + if ((event_triggers & STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN) != 0) - value = value & + event_triggers = event_triggers & ~STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN; REG_SET_2(OTG_STATIC_SCREEN_CONTROL, 0, - OTG_STATIC_SCREEN_EVENT_MASK, value, - OTG_STATIC_SCREEN_FRAME_COUNT, 2); + OTG_STATIC_SCREEN_EVENT_MASK, event_triggers, + OTG_STATIC_SCREEN_FRAME_COUNT, num_frames); } void optc1_setup_manual_trigger(struct timing_generator *optc) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index 3afeb1a30f21..f277656d5464 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -625,7 +625,8 @@ void optc1_set_drr( void optc1_set_static_screen_control( struct timing_generator *optc, - uint32_t value); + uint32_t event_triggers, + uint32_t num_frames); void optc1_program_stereo(struct timing_generator *optc, const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c index 4d7e45892f08..13e057d7ee93 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c @@ -104,7 +104,7 @@ static void dpp2_cnv_setup ( uint32_t pixel_format = 0; uint32_t alpha_en = 1; enum dc_color_space color_space = COLOR_SPACE_SRGB; - enum dcn10_input_csc_select select = INPUT_CSC_SELECT_BYPASS; + enum dcn20_input_csc_select select = DCN2_ICSC_SELECT_BYPASS; bool force_disable_cursor = false; struct out_csc_color_matrix tbl_entry; uint32_t is_2bit = 0; @@ -145,25 +145,25 @@ static void dpp2_cnv_setup ( force_disable_cursor = false; pixel_format = 65; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: force_disable_cursor = true; pixel_format = 64; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: force_disable_cursor = true; pixel_format = 67; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: force_disable_cursor = true; pixel_format = 66; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: pixel_format = 22; @@ -177,7 +177,7 @@ static void dpp2_cnv_setup ( case SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888: pixel_format = 12; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX: pixel_format = 112; @@ -188,13 +188,13 @@ static void dpp2_cnv_setup ( case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010: pixel_format = 114; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; is_2bit = 1; break; case SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102: pixel_format = 115; color_space = COLOR_SPACE_YCBCR709; - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; is_2bit = 1; break; case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT: @@ -227,13 +227,13 @@ static void dpp2_cnv_setup ( tbl_entry.color_space = input_color_space; if (color_space >= COLOR_SPACE_YCBCR601) - select = INPUT_CSC_SELECT_ICSC; + select = DCN2_ICSC_SELECT_ICSC_A; else - select = INPUT_CSC_SELECT_BYPASS; + select = DCN2_ICSC_SELECT_BYPASS; - dpp1_program_input_csc(dpp_base, color_space, select, &tbl_entry); + dpp2_program_input_csc(dpp_base, color_space, select, &tbl_entry); } else - dpp1_program_input_csc(dpp_base, color_space, select, NULL); + dpp2_program_input_csc(dpp_base, color_space, select, NULL); if (force_disable_cursor) { REG_UPDATE(CURSOR_CONTROL, @@ -458,7 +458,7 @@ static struct dpp_funcs dcn20_dpp_funcs = { .dpp_reset = dpp_reset, .dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale, .dpp_get_optimal_number_of_taps = dpp1_get_optimal_number_of_taps, - .dpp_set_gamut_remap = dpp1_cm_set_gamut_remap, + .dpp_set_gamut_remap = dpp2_cm_set_gamut_remap, .dpp_set_csc_adjustment = NULL, .dpp_set_csc_default = NULL, .dpp_program_regamma_pwl = oppn20_dummy_program_regamma_pwl, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h index 5b03b737b1d6..27610251c57f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h @@ -150,6 +150,16 @@ SRI(CM_SHAPER_RAMA_REGION_32_33, CM, id), \ SRI(CM_SHAPER_LUT_INDEX, CM, id) +#define TF_REG_LIST_DCN20_COMMON_APPEND(id) \ + SRI(CM_GAMUT_REMAP_B_C11_C12, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C13_C14, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C21_C22, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C23_C24, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C31_C32, CM, id),\ + SRI(CM_GAMUT_REMAP_B_C33_C34, CM, id),\ + SRI(CM_ICSC_B_C11_C12, CM, id), \ + SRI(CM_ICSC_B_C33_C34, CM, id) + #define TF_REG_LIST_DCN20(id) \ TF_REG_LIST_DCN(id), \ TF_REG_LIST_DCN20_COMMON(id), \ @@ -572,10 +582,29 @@ TF_SF(DSCL0_OBUF_MEM_PWR_CTRL, OBUF_MEM_PWR_FORCE, mask_sh),\ TF_SF(DSCL0_DSCL_MEM_PWR_CTRL, LUT_MEM_PWR_FORCE, mask_sh) +/* DPP CM debug status register: + * + * Status index including current ICSC, Gamut Remap Mode is 9 + * ICSC Mode: [4..3] + * Gamut Remap Mode: [10..9] + */ +#define CM_TEST_DEBUG_DATA_STATUS_IDX 9 + +#define TF_DEBUG_REG_LIST_SH_DCN20 \ + TF_DEBUG_REG_LIST_SH_DCN10, \ + .CM_TEST_DEBUG_DATA_ICSC_MODE = 3, \ + .CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE = 9 + +#define TF_DEBUG_REG_LIST_MASK_DCN20 \ + TF_DEBUG_REG_LIST_MASK_DCN10, \ + .CM_TEST_DEBUG_DATA_ICSC_MODE = 0x18, \ + .CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE = 0x600 #define TF_REG_FIELD_LIST_DCN2_0(type) \ TF_REG_FIELD_LIST(type) \ type CM_BLNDGAM_LUT_DATA; \ + type CM_TEST_DEBUG_DATA_ICSC_MODE; \ + type CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE; \ type FORMAT_CNV16; \ type CNVC_BYPASS_MSB_ALIGN; \ type CLAMP_POSITIVE; \ @@ -630,11 +659,22 @@ struct dcn2_dpp_mask { uint32_t COLOR_KEYER_RED; \ uint32_t COLOR_KEYER_GREEN; \ uint32_t COLOR_KEYER_BLUE; \ - uint32_t OBUF_MEM_PWR_CTRL;\ + uint32_t OBUF_MEM_PWR_CTRL; \ uint32_t DSCL_MEM_PWR_CTRL +#define DPP_DCN2_REG_VARIABLE_LIST_CM_APPEND \ + uint32_t CM_GAMUT_REMAP_B_C11_C12; \ + uint32_t CM_GAMUT_REMAP_B_C13_C14; \ + uint32_t CM_GAMUT_REMAP_B_C21_C22; \ + uint32_t CM_GAMUT_REMAP_B_C23_C24; \ + uint32_t CM_GAMUT_REMAP_B_C31_C32; \ + uint32_t CM_GAMUT_REMAP_B_C33_C34; \ + uint32_t CM_ICSC_B_C11_C12; \ + uint32_t CM_ICSC_B_C33_C34 + struct dcn2_dpp_registers { DPP_DCN2_REG_VARIABLE_LIST; + DPP_DCN2_REG_VARIABLE_LIST_CM_APPEND; }; struct dcn20_dpp { @@ -656,6 +696,18 @@ struct dcn20_dpp { struct pwl_params pwl_data; }; +enum dcn20_input_csc_select { + DCN2_ICSC_SELECT_BYPASS = 0, + DCN2_ICSC_SELECT_ICSC_A = 1, + DCN2_ICSC_SELECT_ICSC_B = 2 +}; + +enum dcn20_gamut_remap_select { + DCN2_GAMUT_REMAP_BYPASS = 0, + DCN2_GAMUT_REMAP_COEF_A = 1, + DCN2_GAMUT_REMAP_COEF_B = 2 +}; + void dpp20_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s); @@ -667,6 +719,16 @@ void dpp2_set_degamma( struct dpp *dpp_base, enum ipp_degamma_mode mode); +void dpp2_cm_set_gamut_remap( + struct dpp *dpp_base, + const struct dpp_grph_csc_adjustment *adjust); + +void dpp2_program_input_csc( + struct dpp *dpp_base, + enum dc_color_space color_space, + enum dcn20_input_csc_select input_select, + const struct out_csc_color_matrix *tbl_entry); + bool dpp20_program_blnd_lut( struct dpp *dpp_base, const struct pwl_params *params); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c index 05a3e7f97ef0..8dc3d1f73984 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c @@ -36,6 +36,9 @@ #define REG(reg)\ dpp->tf_regs->reg +#define IND_REG(index) \ + (index) + #define CTX \ dpp->base.ctx @@ -44,9 +47,6 @@ dpp->tf_shift->field_name, dpp->tf_mask->field_name - - - static void dpp2_enable_cm_block( struct dpp *dpp_base) { @@ -158,6 +158,155 @@ void dpp2_set_degamma( } } +static void program_gamut_remap( + struct dcn20_dpp *dpp, + const uint16_t *regval, + enum dcn20_gamut_remap_select select) +{ + uint32_t cur_select = 0; + struct color_matrices_reg gam_regs; + + if (regval == NULL || select == DCN2_GAMUT_REMAP_BYPASS) { + REG_SET(CM_GAMUT_REMAP_CONTROL, 0, + CM_GAMUT_REMAP_MODE, 0); + return; + } + + /* determine which gamut_remap coefficients (A or B) we are using + * currently. select the alternate set to double buffer + * the update so gamut_remap is updated on frame boundary + */ + IX_REG_GET(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA, + CM_TEST_DEBUG_DATA_STATUS_IDX, + CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE, &cur_select); + + /* value stored in dbg reg will be 1 greater than mode we want */ + if (cur_select != DCN2_GAMUT_REMAP_COEF_A) + select = DCN2_GAMUT_REMAP_COEF_A; + else + select = DCN2_GAMUT_REMAP_COEF_B; + + gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_GAMUT_REMAP_C11; + gam_regs.masks.csc_c11 = dpp->tf_mask->CM_GAMUT_REMAP_C11; + gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_GAMUT_REMAP_C12; + gam_regs.masks.csc_c12 = dpp->tf_mask->CM_GAMUT_REMAP_C12; + + if (select == DCN2_GAMUT_REMAP_COEF_A) { + gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_C11_C12); + gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_C33_C34); + } else { + gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_B_C11_C12); + gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_B_C33_C34); + } + + cm_helper_program_color_matrices( + dpp->base.ctx, + regval, + &gam_regs); + + REG_SET( + CM_GAMUT_REMAP_CONTROL, 0, + CM_GAMUT_REMAP_MODE, select); + +} + +void dpp2_cm_set_gamut_remap( + struct dpp *dpp_base, + const struct dpp_grph_csc_adjustment *adjust) +{ + struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); + int i = 0; + + if (adjust->gamut_adjust_type != GRAPHICS_GAMUT_ADJUST_TYPE_SW) + /* Bypass if type is bypass or hw */ + program_gamut_remap(dpp, NULL, DCN2_GAMUT_REMAP_BYPASS); + else { + struct fixed31_32 arr_matrix[12]; + uint16_t arr_reg_val[12]; + + for (i = 0; i < 12; i++) + arr_matrix[i] = adjust->temperature_matrix[i]; + + convert_float_matrix( + arr_reg_val, arr_matrix, 12); + + program_gamut_remap(dpp, arr_reg_val, DCN2_GAMUT_REMAP_COEF_A); + } +} + +void dpp2_program_input_csc( + struct dpp *dpp_base, + enum dc_color_space color_space, + enum dcn20_input_csc_select input_select, + const struct out_csc_color_matrix *tbl_entry) +{ + struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); + int i; + int arr_size = sizeof(dpp_input_csc_matrix)/sizeof(struct dpp_input_csc_matrix); + const uint16_t *regval = NULL; + uint32_t cur_select = 0; + enum dcn20_input_csc_select select; + struct color_matrices_reg icsc_regs; + + if (input_select == DCN2_ICSC_SELECT_BYPASS) { + REG_SET(CM_ICSC_CONTROL, 0, CM_ICSC_MODE, 0); + return; + } + + if (tbl_entry == NULL) { + for (i = 0; i < arr_size; i++) + if (dpp_input_csc_matrix[i].color_space == color_space) { + regval = dpp_input_csc_matrix[i].regval; + break; + } + + if (regval == NULL) { + BREAK_TO_DEBUGGER(); + return; + } + } else { + regval = tbl_entry->regval; + } + + /* determine which CSC coefficients (A or B) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + IX_REG_GET(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA, + CM_TEST_DEBUG_DATA_STATUS_IDX, + CM_TEST_DEBUG_DATA_ICSC_MODE, &cur_select); + + if (cur_select != DCN2_ICSC_SELECT_ICSC_A) + select = DCN2_ICSC_SELECT_ICSC_A; + else + select = DCN2_ICSC_SELECT_ICSC_B; + + icsc_regs.shifts.csc_c11 = dpp->tf_shift->CM_ICSC_C11; + icsc_regs.masks.csc_c11 = dpp->tf_mask->CM_ICSC_C11; + icsc_regs.shifts.csc_c12 = dpp->tf_shift->CM_ICSC_C12; + icsc_regs.masks.csc_c12 = dpp->tf_mask->CM_ICSC_C12; + + if (select == DCN2_ICSC_SELECT_ICSC_A) { + + icsc_regs.csc_c11_c12 = REG(CM_ICSC_C11_C12); + icsc_regs.csc_c33_c34 = REG(CM_ICSC_C33_C34); + + } else { + + icsc_regs.csc_c11_c12 = REG(CM_ICSC_B_C11_C12); + icsc_regs.csc_c33_c34 = REG(CM_ICSC_B_C33_C34); + + } + + cm_helper_program_color_matrices( + dpp->base.ctx, + regval, + &icsc_regs); + + REG_SET(CM_ICSC_CONTROL, 0, + CM_ICSC_MODE, select); +} + static void dpp20_power_on_blnd_lut( struct dpp *dpp_base, bool power_on) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c index 8b8438566101..9235f7d29454 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c @@ -293,6 +293,9 @@ bool hubbub2_get_dcc_compression_cap(struct hubbub *hubbub, output->grph.rgb.max_compressed_blk_size = 64; output->grph.rgb.independent_64b_blks = true; break; + default: + ASSERT(false); + break; } output->capable = true; output->const_color_support = true; @@ -601,7 +604,8 @@ static const struct hubbub_funcs hubbub2_funcs = { .wm_read_state = hubbub2_wm_read_state, .get_dchub_ref_freq = hubbub2_get_dchub_ref_freq, .program_watermarks = hubbub2_program_watermarks, - .allow_self_refresh_control = hubbub1_allow_self_refresh_control + .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled, + .allow_self_refresh_control = hubbub1_allow_self_refresh_control, }; void hubbub2_construct(struct dcn20_hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 5b9cbedaa0de..cfbbaffa8654 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -686,9 +686,13 @@ enum dc_status dcn20_enable_stream_timing( // DRR should set trigger event to monitor surface update event if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) event_triggers = 0x80; + /* Event triggers and num frames initialized for DRR, but can be + * later updated for PSR use. Note DRR trigger events are generated + * regardless of whether num frames met. + */ if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control) pipe_ctx->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx->stream_res.tg, event_triggers); + pipe_ctx->stream_res.tg, event_triggers, 2); /* TODO program crtc source select for non-virtual signal*/ /* TODO program FMT */ @@ -941,6 +945,9 @@ void dcn20_blank_pixel_data( int width = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; int height = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; + if (stream->link->test_pattern_enabled) + return; + /* get opp dpg blank color */ color_space_to_black_color(dc, color_space, &black_color); @@ -1638,9 +1645,9 @@ void dcn20_program_front_end_for_ctx( struct hubp *hubp = pipe->plane_res.hubp; int j = 0; - for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS + for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS*1000 && hubp->funcs->hubp_is_flip_pending(hubp); j++) - msleep(1); + mdelay(1); } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index f90031ed58a6..de9c857ab3e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -33,6 +33,9 @@ #define REG(reg)\ mpc20->mpc_regs->reg +#define IND_REG(index) \ + (index) + #define CTX \ mpc20->base.ctx @@ -132,19 +135,33 @@ void mpc2_set_output_csc( const uint16_t *regval, enum mpc_output_csc_mode ocsc_mode) { + uint32_t cur_mode; struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc); struct color_matrices_reg ocsc_regs; - REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); - - if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) + if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) { + REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); return; + } if (regval == NULL) { BREAK_TO_DEBUGGER(); return; } + /* determine which CSC coefficients (A or B) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + IX_REG_GET(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA, + MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX, + MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE, &cur_mode); + + if (cur_mode != MPC_OUTPUT_CSC_COEF_A) + ocsc_mode = MPC_OUTPUT_CSC_COEF_A; + else + ocsc_mode = MPC_OUTPUT_CSC_COEF_B; + ocsc_regs.shifts.csc_c11 = mpc20->mpc_shift->MPC_OCSC_C11_A; ocsc_regs.masks.csc_c11 = mpc20->mpc_mask->MPC_OCSC_C11_A; ocsc_regs.shifts.csc_c12 = mpc20->mpc_shift->MPC_OCSC_C12_A; @@ -157,10 +174,13 @@ void mpc2_set_output_csc( ocsc_regs.csc_c11_c12 = REG(CSC_C11_C12_B[opp_id]); ocsc_regs.csc_c33_c34 = REG(CSC_C33_C34_B[opp_id]); } + cm_helper_program_color_matrices( mpc20->base.ctx, regval, &ocsc_regs); + + REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); } void mpc2_set_ocsc_default( @@ -169,14 +189,16 @@ void mpc2_set_ocsc_default( enum dc_color_space color_space, enum mpc_output_csc_mode ocsc_mode) { + uint32_t cur_mode; struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc); uint32_t arr_size; struct color_matrices_reg ocsc_regs; const uint16_t *regval = NULL; - REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); - if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) + if (ocsc_mode == MPC_OUTPUT_CSC_DISABLE) { + REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); return; + } regval = find_color_matrix(color_space, &arr_size); @@ -185,6 +207,19 @@ void mpc2_set_ocsc_default( return; } + /* determine which CSC coefficients (A or B) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + IX_REG_GET(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_DATA, + MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX, + MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE, &cur_mode); + + if (cur_mode != MPC_OUTPUT_CSC_COEF_A) + ocsc_mode = MPC_OUTPUT_CSC_COEF_A; + else + ocsc_mode = MPC_OUTPUT_CSC_COEF_B; + ocsc_regs.shifts.csc_c11 = mpc20->mpc_shift->MPC_OCSC_C11_A; ocsc_regs.masks.csc_c11 = mpc20->mpc_mask->MPC_OCSC_C11_A; ocsc_regs.shifts.csc_c12 = mpc20->mpc_shift->MPC_OCSC_C12_A; @@ -203,6 +238,8 @@ void mpc2_set_ocsc_default( mpc20->base.ctx, regval, &ocsc_regs); + + REG_SET(CSC_MODE[opp_id], 0, MPC_OCSC_MODE, ocsc_mode); } static void mpc2_ogam_get_reg_field( diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h index 9f53192da2dc..c78fd5123497 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h @@ -80,6 +80,10 @@ SRII(DENORM_CLAMP_G_Y, MPC_OUT, inst),\ SRII(DENORM_CLAMP_B_CB, MPC_OUT, inst) +#define MPC_DBG_REG_LIST_DCN2_0() \ + SR(MPC_OCSC_TEST_DEBUG_DATA),\ + SR(MPC_OCSC_TEST_DEBUG_INDEX) + #define MPC_REG_VARIABLE_LIST_DCN2_0 \ MPC_COMMON_REG_VARIABLE_LIST \ uint32_t MPCC_TOP_GAIN[MAX_MPCC]; \ @@ -118,6 +122,8 @@ uint32_t MPCC_OGAM_LUT_RAM_CONTROL[MAX_MPCC];\ uint32_t MPCC_OGAM_LUT_DATA[MAX_MPCC];\ uint32_t MPCC_OGAM_MODE[MAX_MPCC];\ + uint32_t MPC_OCSC_TEST_DEBUG_DATA;\ + uint32_t MPC_OCSC_TEST_DEBUG_INDEX;\ uint32_t CSC_MODE[MAX_OPP]; \ uint32_t CSC_C11_C12_A[MAX_OPP]; \ uint32_t CSC_C33_C34_A[MAX_OPP]; \ @@ -134,6 +140,7 @@ SF(MPCC0_MPCC_TOP_GAIN, MPCC_TOP_GAIN, mask_sh),\ SF(MPCC0_MPCC_BOT_GAIN_INSIDE, MPCC_BOT_GAIN_INSIDE, mask_sh),\ SF(MPCC0_MPCC_BOT_GAIN_OUTSIDE, MPCC_BOT_GAIN_OUTSIDE, mask_sh),\ + SF(MPC_OCSC_TEST_DEBUG_INDEX, MPC_OCSC_TEST_DEBUG_INDEX, mask_sh),\ SF(MPC_OUT0_CSC_MODE, MPC_OCSC_MODE, mask_sh),\ SF(MPC_OUT0_CSC_C11_C12_A, MPC_OCSC_C11_A, mask_sh),\ SF(MPC_OUT0_CSC_C11_C12_A, MPC_OCSC_C12_A, mask_sh),\ @@ -174,6 +181,19 @@ SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MAX_B_CB, mask_sh),\ SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MIN_B_CB, mask_sh) +/* + * DCN2 MPC_OCSC debug status register: + * + * Status index including current OCSC Mode is 1 + * OCSC Mode: [1..0] + */ +#define MPC_OCSC_TEST_DEBUG_DATA_STATUS_IDX 1 + +#define MPC_DEBUG_REG_LIST_SH_DCN20 \ + .MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE = 0 + +#define MPC_DEBUG_REG_LIST_MASK_DCN20 \ + .MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE = 0x3 #define MPC_REG_FIELD_LIST_DCN2_0(type) \ MPC_REG_FIELD_LIST(type)\ @@ -182,6 +202,8 @@ type MPCC_TOP_GAIN;\ type MPCC_BOT_GAIN_INSIDE;\ type MPCC_BOT_GAIN_OUTSIDE;\ + type MPC_OCSC_TEST_DEBUG_DATA_OCSC_MODE;\ + type MPC_OCSC_TEST_DEBUG_INDEX;\ type MPC_OCSC_MODE;\ type MPC_OCSC_C11_A;\ type MPC_OCSC_C12_A;\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 2dafa20d769d..85f90f3e24cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -638,6 +638,7 @@ static const struct dce110_aux_registers aux_engine_regs[] = { #define tf_regs(id)\ [id] = {\ TF_REG_LIST_DCN20(id),\ + TF_REG_LIST_DCN20_COMMON_APPEND(id),\ } static const struct dcn2_dpp_registers tf_regs[] = { @@ -651,12 +652,12 @@ static const struct dcn2_dpp_registers tf_regs[] = { static const struct dcn2_dpp_shift tf_shift = { TF_REG_LIST_SH_MASK_DCN20(__SHIFT), - TF_DEBUG_REG_LIST_SH_DCN10 + TF_DEBUG_REG_LIST_SH_DCN20 }; static const struct dcn2_dpp_mask tf_mask = { TF_REG_LIST_SH_MASK_DCN20(_MASK), - TF_DEBUG_REG_LIST_MASK_DCN10 + TF_DEBUG_REG_LIST_MASK_DCN20 }; #define dwbc_regs_dcn2(id)\ @@ -706,14 +707,17 @@ static const struct dcn20_mpc_registers mpc_regs = { MPC_OUT_MUX_REG_LIST_DCN2_0(3), MPC_OUT_MUX_REG_LIST_DCN2_0(4), MPC_OUT_MUX_REG_LIST_DCN2_0(5), + MPC_DBG_REG_LIST_DCN2_0() }; static const struct dcn20_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) + MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT), + MPC_DEBUG_REG_LIST_SH_DCN20 }; static const struct dcn20_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK) + MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK), + MPC_DEBUG_REG_LIST_MASK_DCN20 }; #define tg_regs(id)\ @@ -1857,6 +1861,22 @@ void dcn20_populate_dml_writeback_from_context( } +static int get_num_odm_heads(struct pipe_ctx *pipe) +{ + int odm_head_count = 0; + struct pipe_ctx *next_pipe = pipe->next_odm_pipe; + while (next_pipe) { + odm_head_count++; + next_pipe = next_pipe->next_odm_pipe; + } + pipe = pipe->prev_odm_pipe; + while (pipe) { + odm_head_count++; + pipe = pipe->prev_odm_pipe; + } + return odm_head_count ? odm_head_count + 1 : 0; +} + int dcn20_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes) { @@ -1883,17 +1903,21 @@ int dcn20_populate_dml_pipes_from_context( for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing; unsigned int v_total; + unsigned int front_porch; int output_bpc; if (!res_ctx->pipe_ctx[i].stream) continue; v_total = timing->v_total; + front_porch = timing->v_front_porch; /* todo: pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0; pipes[pipe_cnt].pipe.src.dcc = 0; pipes[pipe_cnt].pipe.src.vm = 0;*/ + pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; + pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC; /* todo: rotation?*/ pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h; @@ -1915,7 +1939,7 @@ int dcn20_populate_dml_pipes_from_context( - timing->h_addressable - timing->h_border_left - timing->h_border_right; - pipes[pipe_cnt].pipe.dest.vblank_start = v_total - timing->v_front_porch; + pipes[pipe_cnt].pipe.dest.vblank_start = v_total - front_porch; pipes[pipe_cnt].pipe.dest.vblank_end = pipes[pipe_cnt].pipe.dest.vblank_start - timing->v_addressable - timing->v_border_top @@ -1932,8 +1956,13 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].dout.dp_lanes = 4; pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min; pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max; - pipes[pipe_cnt].pipe.dest.odm_combine = res_ctx->pipe_ctx[i].prev_odm_pipe - || res_ctx->pipe_ctx[i].next_odm_pipe; + switch (get_num_odm_heads(&res_ctx->pipe_ctx[i])) { + case 2: + pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_2to1; + break; + default: + pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_disabled; + } pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx; if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state == res_ctx->pipe_ctx[i].plane_state) @@ -2043,6 +2072,9 @@ int dcn20_populate_dml_pipes_from_context( if (pipes[pipe_cnt].pipe.src.viewport_height > 1080) pipes[pipe_cnt].pipe.src.viewport_height = 1080; pipes[pipe_cnt].pipe.src.surface_height_y = pipes[pipe_cnt].pipe.src.viewport_height; + pipes[pipe_cnt].pipe.src.surface_width_y = pipes[pipe_cnt].pipe.src.viewport_width; + pipes[pipe_cnt].pipe.src.surface_height_c = pipes[pipe_cnt].pipe.src.viewport_height; + pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width; pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 63) / 64) * 64; /* linear sw only */ pipes[pipe_cnt].pipe.src.source_format = dm_444_32; pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/ @@ -2076,7 +2108,10 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c.width; pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport.height; pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c.height; + pipes[pipe_cnt].pipe.src.surface_width_y = pln->plane_size.surface_size.width; pipes[pipe_cnt].pipe.src.surface_height_y = pln->plane_size.surface_size.height; + pipes[pipe_cnt].pipe.src.surface_width_c = pln->plane_size.chroma_size.width; + pipes[pipe_cnt].pipe.src.surface_height_c = pln->plane_size.chroma_size.height; if (pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { pipes[pipe_cnt].pipe.src.data_pitch = pln->plane_size.surface_pitch; pipes[pipe_cnt].pipe.src.data_pitch_c = pln->plane_size.chroma_pitch; @@ -2490,7 +2525,7 @@ int dcn20_validate_apply_pipe_split_flags( split[i] = true; if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) { split[i] = true; - context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx] = true; + context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx] = dm_odm_combine_mode_2to1; } context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx] = context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx]; @@ -2915,7 +2950,7 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 || (voltage_supported && full_pstate_supported)) { - context->bw_ctx.bw.dcn.clk.p_state_change_support = true; + context->bw_ctx.bw.dcn.clk.p_state_change_support = full_pstate_supported; goto restore_dml_state; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index 216ae170bc50..da63fc53cc4a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -29,6 +29,8 @@ #include "dm_services.h" #include "reg_helper.h" +#include "dc_dmub_srv.h" + #define DC_LOGGER_INIT(logger) #define REG(reg)\ @@ -251,10 +253,20 @@ static void hubp21_apply_PLAT_54186_wa( ROTATION_ANGLE, &rotation_angle, H_MIRROR_EN, &h_mirror_en); - /* apply wa only for NV12 surface with scatter gather enabled with view port > 512 */ + /* reset persistent cached data */ + hubp21->PLAT_54186_wa_chroma_addr_offset = 0; + /* apply wa only for NV12 surface with scatter gather enabled with viewport > 512 along + * the vertical direction*/ if (address->type != PLN_ADDR_TYPE_VIDEO_PROGRESSIVE || - address->video_progressive.luma_addr.high_part == 0xf4 - || viewport_c_height <= 512) + address->video_progressive.luma_addr.high_part == 0xf4) + return; + + if ((rotation_angle == 0 || rotation_angle == 180) + && viewport_c_height <= 512) + return; + + if ((rotation_angle == 90 || rotation_angle == 270) + && viewport_c_width <= 512) return; switch (rotation_angle) { @@ -678,123 +690,167 @@ void hubp21_validate_dml_output(struct hubp *hubp, dml_dlg_attr->refcyc_per_meta_chunk_flip_l, dlg_attr.refcyc_per_meta_chunk_flip_l); } -bool hubp21_program_surface_flip_and_addr( - struct hubp *hubp, - const struct dc_plane_address *address, - bool flip_immediate) +static void program_surface_flip_and_addr(struct hubp *hubp, struct surface_flip_registers *flip_regs) { struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); - struct dc_debug_options *debug = &hubp->ctx->dc->debug; - //program flip type - REG_UPDATE(DCSURF_FLIP_CONTROL, - SURFACE_FLIP_TYPE, flip_immediate); + REG_UPDATE_3(DCSURF_FLIP_CONTROL, + SURFACE_FLIP_TYPE, flip_regs->immediate, + SURFACE_FLIP_MODE_FOR_STEREOSYNC, flip_regs->grph_stereo, + SURFACE_FLIP_IN_STEREOSYNC, flip_regs->grph_stereo); - // Program VMID reg REG_UPDATE(VMID_SETTINGS_0, - VMID, address->vmid); + VMID, flip_regs->vmid); - if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) { - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1); - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1); + REG_UPDATE_8(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_TMZ, flip_regs->tmz_surface, + PRIMARY_SURFACE_TMZ_C, flip_regs->tmz_surface, + PRIMARY_META_SURFACE_TMZ, flip_regs->tmz_surface, + PRIMARY_META_SURFACE_TMZ_C, flip_regs->tmz_surface, + SECONDARY_SURFACE_TMZ, flip_regs->tmz_surface, + SECONDARY_SURFACE_TMZ_C, flip_regs->tmz_surface, + SECONDARY_META_SURFACE_TMZ, flip_regs->tmz_surface, + SECONDARY_META_SURFACE_TMZ_C, flip_regs->tmz_surface); - } else { - // turn off stereo if not in stereo - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x0); - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x0); - } + REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0, + PRIMARY_META_SURFACE_ADDRESS_HIGH_C, + flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C); + + REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0, + PRIMARY_META_SURFACE_ADDRESS_C, + flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_C); + + REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, + PRIMARY_META_SURFACE_ADDRESS_HIGH, + flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH); + + REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, + PRIMARY_META_SURFACE_ADDRESS, + flip_regs->DCSURF_PRIMARY_META_SURFACE_ADDRESS); + + REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, 0, + SECONDARY_META_SURFACE_ADDRESS_HIGH, + flip_regs->DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH); + + REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS, 0, + SECONDARY_META_SURFACE_ADDRESS, + flip_regs->DCSURF_SECONDARY_META_SURFACE_ADDRESS); + REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0, + SECONDARY_SURFACE_ADDRESS_HIGH, + flip_regs->DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH); + + REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0, + SECONDARY_SURFACE_ADDRESS, + flip_regs->DCSURF_SECONDARY_SURFACE_ADDRESS); + + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0, + PRIMARY_SURFACE_ADDRESS_HIGH_C, + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, + PRIMARY_SURFACE_ADDRESS_C, + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, + PRIMARY_SURFACE_ADDRESS_HIGH, + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, + PRIMARY_SURFACE_ADDRESS, + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS); +} + +void dmcub_PLAT_54186_wa(struct hubp *hubp, struct surface_flip_registers *flip_regs) +{ + struct dc_dmub_srv *dmcub = hubp->ctx->dmub_srv; + struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); + struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa = { 0 }; + + PLAT_54186_wa.header.type = DMUB_CMD__PLAT_54186_WA; + PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS; + PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C; + PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; + PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; + PLAT_54186_wa.flip.flip_params.grph_stereo = flip_regs->grph_stereo; + PLAT_54186_wa.flip.flip_params.hubp_inst = hubp->inst; + PLAT_54186_wa.flip.flip_params.immediate = flip_regs->immediate; + PLAT_54186_wa.flip.flip_params.tmz_surface = flip_regs->tmz_surface; + PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid; + + PERF_TRACE(); // TODO: remove after performance is stable. + dc_dmub_srv_cmd_queue(dmcub, &PLAT_54186_wa.header); + PERF_TRACE(); // TODO: remove after performance is stable. + dc_dmub_srv_cmd_execute(dmcub); + PERF_TRACE(); // TODO: remove after performance is stable. + dc_dmub_srv_wait_idle(dmcub); + PERF_TRACE(); // TODO: remove after performance is stable. +} + +bool hubp21_program_surface_flip_and_addr( + struct hubp *hubp, + const struct dc_plane_address *address, + bool flip_immediate) +{ + struct dc_debug_options *debug = &hubp->ctx->dc->debug; + struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); + struct surface_flip_registers flip_regs = { 0 }; + + flip_regs.vmid = address->vmid; - /* HW automatically latch rest of address register on write to - * DCSURF_PRIMARY_SURFACE_ADDRESS if SURFACE_UPDATE_LOCK is not used - * - * program high first and then the low addr, order matters! - */ switch (address->type) { case PLN_ADDR_TYPE_GRAPHICS: - /* DCN1.0 does not support const color - * TODO: program DCHUBBUB_RET_PATH_DCC_CFGx_0/1 - * base on address->grph.dcc_const_color - * x = 0, 2, 4, 6 for pipe 0, 1, 2, 3 for rgb and luma - * x = 1, 3, 5, 7 for pipe 0, 1, 2, 3 for chroma - */ - - if (address->grph.addr.quad_part == 0) + if (address->grph.addr.quad_part == 0) { + BREAK_TO_DEBUGGER(); break; - - REG_UPDATE_2(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ, address->tmz_surface); - - if (address->grph.meta_addr.quad_part != 0) { - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_META_SURFACE_ADDRESS_HIGH, - address->grph.meta_addr.high_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, - PRIMARY_META_SURFACE_ADDRESS, - address->grph.meta_addr.low_part); } - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_SURFACE_ADDRESS_HIGH, - address->grph.addr.high_part); + if (address->grph.meta_addr.quad_part != 0) { + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS = + address->grph.meta_addr.low_part; + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = + address->grph.meta_addr.high_part; + } - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, - PRIMARY_SURFACE_ADDRESS, - address->grph.addr.low_part); + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS = + address->grph.addr.low_part; + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + address->grph.addr.high_part; break; case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE: if (address->video_progressive.luma_addr.quad_part == 0 || address->video_progressive.chroma_addr.quad_part == 0) break; - REG_UPDATE_4(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, address->tmz_surface, - PRIMARY_SURFACE_TMZ_C, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface); - if (address->video_progressive.luma_meta_addr.quad_part != 0) { - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, 0, - PRIMARY_META_SURFACE_ADDRESS_HIGH_C, - address->video_progressive.chroma_meta_addr.high_part); + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS = + address->video_progressive.luma_meta_addr.low_part; + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = + address->video_progressive.luma_meta_addr.high_part; - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, 0, - PRIMARY_META_SURFACE_ADDRESS_C, - address->video_progressive.chroma_meta_addr.low_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_META_SURFACE_ADDRESS_HIGH, - address->video_progressive.luma_meta_addr.high_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, - PRIMARY_META_SURFACE_ADDRESS, - address->video_progressive.luma_meta_addr.low_part); + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_C = + address->video_progressive.chroma_meta_addr.low_part; + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C = + address->video_progressive.chroma_meta_addr.high_part; } - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0, - PRIMARY_SURFACE_ADDRESS_HIGH_C, - address->video_progressive.chroma_addr.high_part); + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS = + address->video_progressive.luma_addr.low_part; + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + address->video_progressive.luma_addr.high_part; if (debug->nv12_iflip_vm_wa) { - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, - PRIMARY_SURFACE_ADDRESS_C, - address->video_progressive.chroma_addr.low_part + hubp21->PLAT_54186_wa_chroma_addr_offset); - } else { - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, - PRIMARY_SURFACE_ADDRESS_C, - address->video_progressive.chroma_addr.low_part); - } + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_C = + address->video_progressive.chroma_addr.low_part + hubp21->PLAT_54186_wa_chroma_addr_offset; + } else + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_C = + address->video_progressive.chroma_addr.low_part; - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_SURFACE_ADDRESS_HIGH, - address->video_progressive.luma_addr.high_part); + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = + address->video_progressive.chroma_addr.high_part; - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, - PRIMARY_SURFACE_ADDRESS, - address->video_progressive.luma_addr.low_part); break; case PLN_ADDR_TYPE_GRPH_STEREO: if (address->grph_stereo.left_addr.quad_part == 0) @@ -802,58 +858,46 @@ bool hubp21_program_surface_flip_and_addr( if (address->grph_stereo.right_addr.quad_part == 0) break; - REG_UPDATE_8(DCSURF_SURFACE_CONTROL, - PRIMARY_SURFACE_TMZ, address->tmz_surface, - PRIMARY_SURFACE_TMZ_C, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface, - SECONDARY_SURFACE_TMZ, address->tmz_surface, - SECONDARY_SURFACE_TMZ_C, address->tmz_surface, - SECONDARY_META_SURFACE_TMZ, address->tmz_surface, - SECONDARY_META_SURFACE_TMZ_C, address->tmz_surface); + flip_regs.grph_stereo = true; if (address->grph_stereo.right_meta_addr.quad_part != 0) { - - REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, 0, - SECONDARY_META_SURFACE_ADDRESS_HIGH, - address->grph_stereo.right_meta_addr.high_part); - - REG_SET(DCSURF_SECONDARY_META_SURFACE_ADDRESS, 0, - SECONDARY_META_SURFACE_ADDRESS, - address->grph_stereo.right_meta_addr.low_part); + flip_regs.DCSURF_SECONDARY_META_SURFACE_ADDRESS = + address->grph_stereo.right_meta_addr.low_part; + flip_regs.DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH = + address->grph_stereo.right_meta_addr.high_part; } + if (address->grph_stereo.left_meta_addr.quad_part != 0) { - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_META_SURFACE_ADDRESS_HIGH, - address->grph_stereo.left_meta_addr.high_part); - - REG_SET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, 0, - PRIMARY_META_SURFACE_ADDRESS, - address->grph_stereo.left_meta_addr.low_part); + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS = + address->grph_stereo.left_meta_addr.low_part; + flip_regs.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH = + address->grph_stereo.left_meta_addr.high_part; } - REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0, - SECONDARY_SURFACE_ADDRESS_HIGH, - address->grph_stereo.right_addr.high_part); + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS = + address->grph_stereo.left_addr.low_part; + flip_regs.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + address->grph_stereo.left_addr.high_part; - REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0, - SECONDARY_SURFACE_ADDRESS, - address->grph_stereo.right_addr.low_part); + flip_regs.DCSURF_SECONDARY_SURFACE_ADDRESS = + address->grph_stereo.right_addr.low_part; + flip_regs.DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH = + address->grph_stereo.right_addr.high_part; - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, - PRIMARY_SURFACE_ADDRESS_HIGH, - address->grph_stereo.left_addr.high_part); - - REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, - PRIMARY_SURFACE_ADDRESS, - address->grph_stereo.left_addr.low_part); break; default: BREAK_TO_DEBUGGER(); break; } + flip_regs.tmz_surface = address->tmz_surface; + flip_regs.immediate = flip_immediate; + + if (hubp->ctx->dc->debug.enable_dmcub_surface_flip && address->type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) + dmcub_PLAT_54186_wa(hubp, &flip_regs); + else + program_surface_flip_and_addr(hubp, &flip_regs); + hubp->request_address = *address; return true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index c76449f58064..1d741bca2211 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -83,6 +83,7 @@ #include "dcn21_resource.h" #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" +#include "../dce/dmub_psr.h" #define SOC_BOUNDING_BOX_VALID false #define DC_LOGGER_INIT(logger) @@ -465,15 +466,18 @@ static const struct dcn20_mpc_registers mpc_regs = { MPC_OUT_MUX_REG_LIST_DCN2_0(0), MPC_OUT_MUX_REG_LIST_DCN2_0(1), MPC_OUT_MUX_REG_LIST_DCN2_0(2), - MPC_OUT_MUX_REG_LIST_DCN2_0(3) + MPC_OUT_MUX_REG_LIST_DCN2_0(3), + MPC_DBG_REG_LIST_DCN2_0() }; static const struct dcn20_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) + MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT), + MPC_DEBUG_REG_LIST_SH_DCN20 }; static const struct dcn20_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK) + MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK), + MPC_DEBUG_REG_LIST_MASK_DCN20 }; #define hubp_regs(id)\ @@ -608,6 +612,7 @@ static const struct dce110_aux_registers aux_engine_regs[] = { #define tf_regs(id)\ [id] = {\ TF_REG_LIST_DCN20(id),\ + TF_REG_LIST_DCN20_COMMON_APPEND(id),\ } static const struct dcn2_dpp_registers tf_regs[] = { @@ -618,11 +623,13 @@ static const struct dcn2_dpp_registers tf_regs[] = { }; static const struct dcn2_dpp_shift tf_shift = { - TF_REG_LIST_SH_MASK_DCN20(__SHIFT) + TF_REG_LIST_SH_MASK_DCN20(__SHIFT), + TF_DEBUG_REG_LIST_SH_DCN20 }; static const struct dcn2_dpp_mask tf_mask = { - TF_REG_LIST_SH_MASK_DCN20(_MASK) + TF_REG_LIST_SH_MASK_DCN20(_MASK), + TF_DEBUG_REG_LIST_MASK_DCN20 }; #define stream_enc_regs(id)\ @@ -828,7 +835,8 @@ static const struct dc_debug_options debug_defaults_drv = { .scl_reset_length10 = true, .sanity_checks = true, .disable_48mhz_pwrdwn = false, - .nv12_iflip_vm_wa = true + .nv12_iflip_vm_wa = true, + .usbc_combo_phy_reset_wa = true }; static const struct dc_debug_options debug_defaults_diags = { @@ -1341,6 +1349,10 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param } dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i - 1]; dcn2_1_soc.num_states = i; + + // diags does not retrieve proper values from SMU, do not update DML instance for diags + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) && !IS_DIAG_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21); } /* Temporary Place holder until we can get them from fuse */ @@ -1740,6 +1752,10 @@ static bool dcn21_resource_construct( goto create_fail; } + // Leave as NULL to not affect current dmcu psr programming sequence + // Will be uncommented when functionality is confirmed to be working + pool->base.psr = NULL; + pool->base.abm = dce_abm_create(ctx, &abm_regs, &abm_shift, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c index 3b224b155e8c..e7a8ac7a1f22 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c @@ -937,7 +937,7 @@ static unsigned int CalculateVMAndRowBytes( *MetaRowByte = 0; } - if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) { MacroTileSizeBytes = 256; MacroTileHeight = BlockHeight256Bytes; } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x @@ -1335,11 +1335,11 @@ static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPer else mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; - if (mode_lib->vba.ODMCombineEnabled[k] == true) + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; if (MainPlaneDoesODMCombine == true) @@ -2848,12 +2848,12 @@ static void dml20_DisplayPipeConfiguration(struct display_mode_lib *mode_lib) SwathWidth = mode_lib->vba.ViewportHeight[k]; } - if (mode_lib->vba.ODMCombineEnabled[k] == true) { + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) { + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } } @@ -3348,7 +3348,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l == dm_420_10)) || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl || mode_lib->vba.SurfaceTiling[k] - == dm_sw_gfx7_2d_thin_lvp) + == dm_sw_gfx7_2d_thin_l_vp) && !((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 || mode_lib->vba.SourcePixelFormat[k] @@ -3446,10 +3446,10 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->FabricAndDRAMBandwidthPerState[i] * 1000) * locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; - locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState; + locals->ReturnBWPerState[i][0] = locals->ReturnBWToDCNPerState; if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] @@ -3460,7 +3460,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], 4 * locals->ReturnBWToDCNPerState * (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / @@ -3472,7 +3472,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000); if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] @@ -3483,7 +3483,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], 4 * locals->ReturnBWToDCNPerState * (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / @@ -3521,12 +3521,12 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] - + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; - if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] + + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0] > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { - locals->ROBSupport[i] = true; + locals->ROBSupport[i][0] = true; } else { - locals->ROBSupport[i] = false; + locals->ROBSupport[i][0] = false; } } /*Writeback Mode Support Check*/ @@ -3903,7 +3903,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] - && locals->ODMCombineEnablePerState[i][k] == false) { + && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->NoOfDPP[i][j][k] = 1; locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); @@ -3992,16 +3992,16 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*Viewport Size Check*/ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - locals->ViewportSizeSupport[i] = true; + locals->ViewportSizeSupport[i][0] = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } else { if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } } @@ -4183,8 +4183,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DSCFormatFactor = 1; } if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] - == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { locals->DSCCLKRequiredMoreThanSupported[i] = @@ -4207,7 +4206,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.TotalDSCUnitsRequired = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { mode_lib->vba.TotalDSCUnitsRequired = mode_lib->vba.TotalDSCUnitsRequired + 2.0; } else { @@ -4249,7 +4248,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; } if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { - if (locals->ODMCombineEnablePerState[i][k] == false) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->DSCDelayPerState[i][k] = dscceComputeDelay( mode_lib->vba.DSCInputBitPerComponent[k], @@ -4292,7 +4291,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { for (j = 0; j < 2; j++) { for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k])); else locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; @@ -4345,28 +4344,28 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma + dml_min( locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] * - locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i], + locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesLuma), locals->SwathHeightYPerState[i][j][k]); locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * - locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i], + locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesChroma), locals->SwathHeightCPerState[i][j][k]); if (locals->BytePerPixelInDETC[k] == 0) { locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * - dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]); + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]); } else { locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min( locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * - dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]), + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]), locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) - locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 * - dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k])); + dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k])); } } } @@ -4406,14 +4405,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k]; locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k]; locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k]; - mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], mode_lib->vba.PixelClock[k] / 16.0); if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4423,9 +4422,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4436,9 +4435,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } else { if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4448,9 +4447,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4460,9 +4459,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l * mode_lib->vba.RequiredDPPCLK[i][j][k]); } if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETC[k], @@ -4473,9 +4472,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETC[k], @@ -4511,7 +4510,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k], &mode_lib->vba.dpte_row_height[k], &mode_lib->vba.meta_row_height[k]); - mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines( + mode_lib->vba.PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k], mode_lib->vba.vtaps[k], @@ -4550,7 +4549,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k], &mode_lib->vba.dpte_row_height_chroma[k], &mode_lib->vba.meta_row_height_chroma[k]); - mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines( + mode_lib->vba.PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k] / 2.0, mode_lib->vba.VTAPsChroma[k], @@ -4564,14 +4563,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; mode_lib->vba.MetaRowBytesC = 0.0; mode_lib->vba.DPTEBytesPerRowC = 0.0; - locals->PrefetchLinesC[k] = 0.0; + locals->PrefetchLinesC[0][0][k] = 0.0; locals->PTEBufferSizeNotExceededC[i][j][k] = true; locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; } - locals->PDEAndMetaPTEBytesPerFrame[k] = + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] = mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; - locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; - locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; CalculateActiveRowBandwidth( mode_lib->vba.GPUVMEnable, @@ -4598,14 +4597,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] * mode_lib->vba.MetaChunkSize) * 1024.0 - / mode_lib->vba.ReturnBWPerState[i]; + / mode_lib->vba.ReturnBWPerState[i][0]; if (mode_lib->vba.GPUVMEnable == true) { mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency + mode_lib->vba.TotalNumberOfActiveDPP[i][j] * mode_lib->vba.PTEGroupSize - / mode_lib->vba.ReturnBWPerState[i]; + / mode_lib->vba.ReturnBWPerState[i][0]; } - mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { @@ -4655,7 +4654,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); } @@ -4700,7 +4699,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], @@ -4718,7 +4717,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l - mode_lib->vba.VActive[k], mode_lib->vba.HTotal[k], mode_lib->vba.MaxInterDCNTileRepeaters, - mode_lib->vba.MaximumVStartup[k], + mode_lib->vba.MaximumVStartup[0][0][k], mode_lib->vba.GPUVMMaxPageTableLevels, mode_lib->vba.GPUVMEnable, mode_lib->vba.DynamicMetadataEnable[k], @@ -4728,15 +4727,15 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.ExtraLatency, mode_lib->vba.TimeCalc, - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], - mode_lib->vba.PrefetchLinesY[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], + mode_lib->vba.PrefetchLinesY[0][0][k], mode_lib->vba.SwathWidthYPerState[i][j][k], mode_lib->vba.BytePerPixelInDETY[k], mode_lib->vba.PrefillY[k], mode_lib->vba.MaxNumSwY[k], - mode_lib->vba.PrefetchLinesC[k], + mode_lib->vba.PrefetchLinesC[0][0][k], mode_lib->vba.BytePerPixelInDETC[k], mode_lib->vba.PrefillC[k], mode_lib->vba.MaxNumSwC[k], @@ -4767,19 +4766,19 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->prefetch_vm_bw_valid = true; locals->prefetch_row_bw_valid = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0) + if (locals->PDEAndMetaPTEBytesPerFrame[0][0][k] == 0) locals->prefetch_vm_bw[k] = 0; else if (locals->LinesForMetaPTE[k] > 0) - locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k] + locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[0][0][k] / (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]); else { locals->prefetch_vm_bw[k] = 0; locals->prefetch_vm_bw_valid = false; } - if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0) + if (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k] == 0) locals->prefetch_row_bw[k] = 0; else if (locals->LinesForMetaAndDPTERow[k] > 0) - locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]) + locals->prefetch_row_bw[k] = (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k]) / (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]); else { locals->prefetch_row_bw[k] = 0; @@ -4798,13 +4797,13 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k]) + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]); } - locals->BandwidthWithoutPrefetchSupported[i] = true; - if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) { - locals->BandwidthWithoutPrefetchSupported[i] = false; + locals->BandwidthWithoutPrefetchSupported[i][0] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]) { + locals->BandwidthWithoutPrefetchSupported[i][0] = false; } locals->PrefetchSupported[i][j] = true; - if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) { + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]) { locals->PrefetchSupported[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4829,7 +4828,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.PrefetchSupported[i][j] == true && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) { mode_lib->vba.BandwidthAvailableForImmediateFlip = - mode_lib->vba.ReturnBWPerState[i]; + mode_lib->vba.ReturnBWPerState[i][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip @@ -4843,9 +4842,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { mode_lib->vba.ImmediateFlipBytes[k] = - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] - + mode_lib->vba.MetaRowBytes[k] - + mode_lib->vba.DPTEBytesPerRow[k]; + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k] + + mode_lib->vba.MetaRowBytes[0][0][k] + + mode_lib->vba.DPTEBytesPerRow[0][0][k]; } } mode_lib->vba.TotImmediateFlipBytes = 0.0; @@ -4873,9 +4872,9 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l / mode_lib->vba.PixelClock[k], mode_lib->vba.VRatio[k], mode_lib->vba.Tno_bw[k], - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], mode_lib->vba.DCCEnable[k], mode_lib->vba.dpte_row_height[k], mode_lib->vba.meta_row_height[k], @@ -4900,7 +4899,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true; if (mode_lib->vba.total_dcn_read_bw_with_flip - > mode_lib->vba.ReturnBWPerState[i]) { + > mode_lib->vba.ReturnBWPerState[i][0]) { mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4919,13 +4918,13 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) mode_lib->vba.MaxTotalVActiveRDBandwidth = mode_lib->vba.MaxTotalVActiveRDBandwidth + mode_lib->vba.ReadBandwidth[k]; for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth * + mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) * mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100; - if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i]) - mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true; + if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0]) + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = true; else - mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false; + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = false; } /*PTE Buffer Size Check*/ @@ -5013,7 +5012,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_SCALE_RATIO_TAP; } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { status = DML_FAIL_SOURCE_PIXEL_FORMAT; - } else if (locals->ViewportSizeSupport[i] != true) { + } else if (locals->ViewportSizeSupport[i][0] != true) { status = DML_FAIL_VIEWPORT_SIZE; } else if (locals->DIOSupport[i] != true) { status = DML_FAIL_DIO_SUPPORT; @@ -5023,7 +5022,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_DSC_CLK_REQUIRED; } else if (locals->UrgentLatencySupport[i][j] != true) { status = DML_FAIL_URGENT_LATENCY; - } else if (locals->ROBSupport[i] != true) { + } else if (locals->ROBSupport[i][0] != true) { status = DML_FAIL_REORDERING_BUFFER; } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { status = DML_FAIL_DISPCLK_DPPCLK; @@ -5043,7 +5042,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_PITCH_SUPPORT; } else if (locals->PrefetchSupported[i][j] != true) { status = DML_FAIL_PREFETCH_SUPPORT; - } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { + } else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) { status = DML_FAIL_TOTAL_V_ACTIVE_BW; } else if (locals->VRatioInPrefetchSupported[i][j] != true) { status = DML_FAIL_V_RATIO_PREFETCH; @@ -5089,7 +5088,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index 6482d7b99bae..22f3b5a4b3b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -997,7 +997,7 @@ static unsigned int CalculateVMAndRowBytes( *MetaRowByte = 0; } - if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) { MacroTileSizeBytes = 256; MacroTileHeight = BlockHeight256Bytes; } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x @@ -1395,11 +1395,11 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP else mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; - if (mode_lib->vba.ODMCombineEnabled[k] == true) + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; if (MainPlaneDoesODMCombine == true) @@ -2885,12 +2885,12 @@ static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib) SwathWidth = mode_lib->vba.ViewportHeight[k]; } - if (mode_lib->vba.ODMCombineEnabled[k] == true) { + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) { + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } } @@ -3385,7 +3385,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode == dm_420_10)) || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl || mode_lib->vba.SurfaceTiling[k] - == dm_sw_gfx7_2d_thin_lvp) + == dm_sw_gfx7_2d_thin_l_vp) && !((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 || mode_lib->vba.SourcePixelFormat[k] @@ -3483,10 +3483,10 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->FabricAndDRAMBandwidthPerState[i] * 1000) * locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; - locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState; + locals->ReturnBWPerState[i][0] = locals->ReturnBWToDCNPerState; if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] @@ -3497,7 +3497,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], 4 * locals->ReturnBWToDCNPerState * (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / @@ -3509,7 +3509,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000); if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] @@ -3520,7 +3520,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { - locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0] = dml_min(locals->ReturnBWPerState[i][0], 4 * locals->ReturnBWToDCNPerState * (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / @@ -3558,12 +3558,12 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] - + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; - if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] + + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0] > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { - locals->ROBSupport[i] = true; + locals->ROBSupport[i][0] = true; } else { - locals->ROBSupport[i] = false; + locals->ROBSupport[i][0] = false; } } /*Writeback Mode Support Check*/ @@ -3946,7 +3946,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode } if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] - && locals->ODMCombineEnablePerState[i][k] == false) { + && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->NoOfDPP[i][j][k] = 1; locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); @@ -4035,16 +4035,16 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode /*Viewport Size Check*/ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - locals->ViewportSizeSupport[i] = true; + locals->ViewportSizeSupport[i][0] = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } else { if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } } @@ -4226,8 +4226,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.DSCFormatFactor = 1; } if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] - == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { locals->DSCCLKRequiredMoreThanSupported[i] = @@ -4250,7 +4249,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.TotalDSCUnitsRequired = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { mode_lib->vba.TotalDSCUnitsRequired = mode_lib->vba.TotalDSCUnitsRequired + 2.0; } else { @@ -4292,7 +4291,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; } if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { - if (locals->ODMCombineEnablePerState[i][k] == false) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->DSCDelayPerState[i][k] = dscceComputeDelay( mode_lib->vba.DSCInputBitPerComponent[k], @@ -4335,7 +4334,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { for (j = 0; j < 2; j++) { for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k])); else locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; @@ -4388,28 +4387,28 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma + dml_min( locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] * - locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i], + locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesLuma), locals->SwathHeightYPerState[i][j][k]); locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * - locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i], + locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0], locals->EffectiveLBLatencyHidingSourceLinesChroma), locals->SwathHeightCPerState[i][j][k]); if (locals->BytePerPixelInDETC[k] == 0) { locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * - dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]); + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]); } else { locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min( locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * - dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]), + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]), locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) - locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 * - dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k])); + dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k])); } } } @@ -4454,14 +4453,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k]; locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k]; locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k]; - mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], mode_lib->vba.PixelClock[k] / 16.0); if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4471,9 +4470,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4484,9 +4483,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode } } else { if (mode_lib->vba.VRatio[k] <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4496,9 +4495,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETY[k], @@ -4508,9 +4507,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode * mode_lib->vba.RequiredDPPCLK[i][j][k]); } if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETC[k], @@ -4521,9 +4520,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode * mode_lib->vba.PixelClock[k] / mode_lib->vba.NoOfDPP[i][j][k]); } else { - mode_lib->vba.ProjectedDCFCLKDeepSleep = + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0] = dml_max( - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], 1.1 * dml_ceil( mode_lib->vba.BytePerPixelInDETC[k], @@ -4559,7 +4558,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode &mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k], &mode_lib->vba.dpte_row_height[k], &mode_lib->vba.meta_row_height[k]); - mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines( + mode_lib->vba.PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k], mode_lib->vba.vtaps[k], @@ -4598,7 +4597,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode &mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k], &mode_lib->vba.dpte_row_height_chroma[k], &mode_lib->vba.meta_row_height_chroma[k]); - mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines( + mode_lib->vba.PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k] / 2.0, mode_lib->vba.VTAPsChroma[k], @@ -4612,14 +4611,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; mode_lib->vba.MetaRowBytesC = 0.0; mode_lib->vba.DPTEBytesPerRowC = 0.0; - locals->PrefetchLinesC[k] = 0.0; + locals->PrefetchLinesC[0][0][k] = 0.0; locals->PTEBufferSizeNotExceededC[i][j][k] = true; locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; } - locals->PDEAndMetaPTEBytesPerFrame[k] = + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] = mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; - locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; - locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; CalculateActiveRowBandwidth( mode_lib->vba.GPUVMEnable, @@ -4646,14 +4645,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] * mode_lib->vba.MetaChunkSize) * 1024.0 - / mode_lib->vba.ReturnBWPerState[i]; + / mode_lib->vba.ReturnBWPerState[i][0]; if (mode_lib->vba.GPUVMEnable == true) { mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency + mode_lib->vba.TotalNumberOfActiveDPP[i][j] * mode_lib->vba.PTEGroupSize - / mode_lib->vba.ReturnBWPerState[i]; + / mode_lib->vba.ReturnBWPerState[i][0]; } - mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { @@ -4703,7 +4702,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); } @@ -4743,7 +4742,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0; } - CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth, + CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i][0], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth, mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k], mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k], mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal, @@ -4757,14 +4756,14 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.NumberOfCursors[k], mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k], mode_lib->vba.HTotal[k], mode_lib->vba.MaxInterDCNTileRepeaters, - mode_lib->vba.MaximumVStartup[k], + mode_lib->vba.MaximumVStartup[0][0][k], mode_lib->vba.GPUVMMaxPageTableLevels, mode_lib->vba.GPUVMEnable, mode_lib->vba.DynamicMetadataEnable[k], @@ -4774,15 +4773,15 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.ExtraLatency, mode_lib->vba.TimeCalc, - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], - mode_lib->vba.PrefetchLinesY[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], + mode_lib->vba.PrefetchLinesY[0][0][k], mode_lib->vba.SwathWidthYPerState[i][j][k], mode_lib->vba.BytePerPixelInDETY[k], mode_lib->vba.PrefillY[k], mode_lib->vba.MaxNumSwY[k], - mode_lib->vba.PrefetchLinesC[k], + mode_lib->vba.PrefetchLinesC[0][0][k], mode_lib->vba.BytePerPixelInDETC[k], mode_lib->vba.PrefillC[k], mode_lib->vba.MaxNumSwC[k], @@ -4812,19 +4811,19 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode locals->prefetch_vm_bw_valid = true; locals->prefetch_row_bw_valid = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0) + if (locals->PDEAndMetaPTEBytesPerFrame[0][0][k] == 0) locals->prefetch_vm_bw[k] = 0; else if (locals->LinesForMetaPTE[k] > 0) - locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k] + locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[0][0][k] / (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]); else { locals->prefetch_vm_bw[k] = 0; locals->prefetch_vm_bw_valid = false; } - if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0) + if (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k] == 0) locals->prefetch_row_bw[k] = 0; else if (locals->LinesForMetaAndDPTERow[k] > 0) - locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]) + locals->prefetch_row_bw[k] = (locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k]) / (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]); else { locals->prefetch_row_bw[k] = 0; @@ -4843,13 +4842,13 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k]) + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]); } - locals->BandwidthWithoutPrefetchSupported[i] = true; - if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) { - locals->BandwidthWithoutPrefetchSupported[i] = false; + locals->BandwidthWithoutPrefetchSupported[i][0] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]) { + locals->BandwidthWithoutPrefetchSupported[i][0] = false; } locals->PrefetchSupported[i][j] = true; - if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) { + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]) { locals->PrefetchSupported[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4874,7 +4873,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode if (mode_lib->vba.PrefetchSupported[i][j] == true && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) { mode_lib->vba.BandwidthAvailableForImmediateFlip = - mode_lib->vba.ReturnBWPerState[i]; + mode_lib->vba.ReturnBWPerState[i][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip @@ -4888,9 +4887,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { mode_lib->vba.ImmediateFlipBytes[k] = - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] - + mode_lib->vba.MetaRowBytes[k] - + mode_lib->vba.DPTEBytesPerRow[k]; + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k] + + mode_lib->vba.MetaRowBytes[0][0][k] + + mode_lib->vba.DPTEBytesPerRow[0][0][k]; } } mode_lib->vba.TotImmediateFlipBytes = 0.0; @@ -4918,9 +4917,9 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode / mode_lib->vba.PixelClock[k], mode_lib->vba.VRatio[k], mode_lib->vba.Tno_bw[k], - mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], - mode_lib->vba.MetaRowBytes[k], - mode_lib->vba.DPTEBytesPerRow[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[0][0][k], + mode_lib->vba.MetaRowBytes[0][0][k], + mode_lib->vba.DPTEBytesPerRow[0][0][k], mode_lib->vba.DCCEnable[k], mode_lib->vba.dpte_row_height[k], mode_lib->vba.meta_row_height[k], @@ -4945,7 +4944,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode } mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true; if (mode_lib->vba.total_dcn_read_bw_with_flip - > mode_lib->vba.ReturnBWPerState[i]) { + > mode_lib->vba.ReturnBWPerState[i][0]) { mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4961,13 +4960,13 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode /*Vertical Active BW support*/ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth * + mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) * mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100; - if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i]) - mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true; + if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i][0]) + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = true; else - mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false; + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][0] = false; } /*PTE Buffer Size Check*/ @@ -5055,7 +5054,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode status = DML_FAIL_SCALE_RATIO_TAP; } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { status = DML_FAIL_SOURCE_PIXEL_FORMAT; - } else if (locals->ViewportSizeSupport[i] != true) { + } else if (locals->ViewportSizeSupport[i][0] != true) { status = DML_FAIL_VIEWPORT_SIZE; } else if (locals->DIOSupport[i] != true) { status = DML_FAIL_DIO_SUPPORT; @@ -5065,7 +5064,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode status = DML_FAIL_DSC_CLK_REQUIRED; } else if (locals->UrgentLatencySupport[i][j] != true) { status = DML_FAIL_URGENT_LATENCY; - } else if (locals->ROBSupport[i] != true) { + } else if (locals->ROBSupport[i][0] != true) { status = DML_FAIL_REORDERING_BUFFER; } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { status = DML_FAIL_DISPCLK_DPPCLK; @@ -5085,7 +5084,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode status = DML_FAIL_PITCH_SUPPORT; } else if (locals->PrefetchSupported[i][j] != true) { status = DML_FAIL_PREFETCH_SUPPORT; - } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { + } else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) { status = DML_FAIL_TOTAL_V_ACTIVE_BW; } else if (locals->VRatioInPrefetchSupported[i][j] != true) { status = DML_FAIL_V_RATIO_PREFETCH; @@ -5131,7 +5130,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index b6d34669cddf..af35b3bea909 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -197,7 +197,7 @@ static unsigned int CalculateVMAndRowBytes( unsigned int *meta_row_width, unsigned int *meta_row_height, unsigned int *vm_group_bytes, - long *dpte_group_bytes, + unsigned int *dpte_group_bytes, unsigned int *PixelPTEReqWidth, unsigned int *PixelPTEReqHeight, unsigned int *PTERequestSize, @@ -295,7 +295,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double UrgentOutOfOrderReturn, double ReturnBW, bool GPUVMEnable, - long dpte_group_bytes[], + int dpte_group_bytes[], unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, @@ -309,13 +309,13 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( int DPPPerPlane[], bool DCCEnable[], double DPPCLK[], - unsigned int SwathWidthSingleDPPY[], + double SwathWidthSingleDPPY[], unsigned int SwathHeightY[], double ReadBandwidthPlaneLuma[], unsigned int SwathHeightC[], double ReadBandwidthPlaneChroma[], unsigned int LBBitPerPixel[], - unsigned int SwathWidthY[], + double SwathWidthY[], double HRatio[], unsigned int vtaps[], unsigned int VTAPsChroma[], @@ -344,7 +344,7 @@ static void CalculateDCFCLKDeepSleep( double BytePerPixelDETY[], double BytePerPixelDETC[], double VRatio[], - unsigned int SwathWidthY[], + double SwathWidthY[], int DPPPerPlane[], double HRatio[], double PixelClock[], @@ -435,7 +435,7 @@ static void CalculateMetaAndPTETimes( unsigned int meta_row_height[], unsigned int meta_req_width[], unsigned int meta_req_height[], - long dpte_group_bytes[], + int dpte_group_bytes[], unsigned int PTERequestSizeY[], unsigned int PTERequestSizeC[], unsigned int PixelPTEReqWidthY[], @@ -477,7 +477,7 @@ static double CalculateExtraLatency( bool HostVMEnable, int NumberOfActivePlanes, int NumberOfDPP[], - long dpte_group_bytes[], + int dpte_group_bytes[], double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, int HostVMMaxPageTableLevels, @@ -1280,7 +1280,7 @@ static unsigned int CalculateVMAndRowBytes( unsigned int *meta_row_width, unsigned int *meta_row_height, unsigned int *vm_group_bytes, - long *dpte_group_bytes, + unsigned int *dpte_group_bytes, unsigned int *PixelPTEReqWidth, unsigned int *PixelPTEReqHeight, unsigned int *PTERequestSize, @@ -1338,7 +1338,7 @@ static unsigned int CalculateVMAndRowBytes( *MetaRowByte = 0; } - if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) { MacroTileSizeBytes = 256; MacroTileHeight = BlockHeight256Bytes; } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x @@ -1683,11 +1683,11 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman else locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; - if (mode_lib->vba.ODMCombineEnabled[k] == true) + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) MainPlaneDoesODMCombine = true; if (MainPlaneDoesODMCombine == true) @@ -2940,12 +2940,12 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) SwathWidth = mode_lib->vba.ViewportHeight[k]; } - if (mode_lib->vba.ODMCombineEnabled[k] == true) { + if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { if (mode_lib->vba.BlendingAndTiming[k] == j - && mode_lib->vba.ODMCombineEnabled[j] == true) { + && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { MainPlaneDoesODMCombine = true; } } @@ -3453,7 +3453,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l == dm_420_10)) || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl || mode_lib->vba.SurfaceTiling[k] - == dm_sw_gfx7_2d_thin_lvp) + == dm_sw_gfx7_2d_thin_l_vp) && !((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 || mode_lib->vba.SourcePixelFormat[k] @@ -3542,17 +3542,17 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - locals->IdealSDPPortBandwidthPerState[i] = dml_min3( + locals->IdealSDPPortBandwidthPerState[i][0] = dml_min3( mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels * mode_lib->vba.DRAMChannelWidth, mode_lib->vba.FabricClockPerState[i] * mode_lib->vba.FabricDatapathToDCNDataReturn); if (mode_lib->vba.HostVMEnable == false) { - locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i] + locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0] * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0; } else { - locals->ReturnBWPerState[i] = locals->IdealSDPPortBandwidthPerState[i] + locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0] * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0; } } @@ -3589,12 +3589,12 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly) - * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; - if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] + * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0] > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { - locals->ROBSupport[i] = true; + locals->ROBSupport[i][0] = true; } else { - locals->ROBSupport[i] = false; + locals->ROBSupport[i][0] = false; } } /*Writeback Mode Support Check*/ @@ -3982,7 +3982,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] - && locals->ODMCombineEnablePerState[i][k] == false) { + && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->NoOfDPP[i][j][k] = 1; locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); @@ -4071,16 +4071,16 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*Viewport Size Check*/ for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - locals->ViewportSizeSupport[i] = true; + locals->ViewportSizeSupport[i][0] = true; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } else { if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { - locals->ViewportSizeSupport[i] = false; + locals->ViewportSizeSupport[i][0] = false; } } } @@ -4269,8 +4269,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DSCFormatFactor = 1; } if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] - == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { locals->DSCCLKRequiredMoreThanSupported[i] = @@ -4293,7 +4292,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.TotalDSCUnitsRequired = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (locals->RequiresDSC[i][k] == true) { - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { mode_lib->vba.TotalDSCUnitsRequired = mode_lib->vba.TotalDSCUnitsRequired + 2.0; } else { @@ -4335,7 +4334,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; } if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { - if (locals->ODMCombineEnablePerState[i][k] == false) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { locals->DSCDelayPerState[i][k] = dscceComputeDelay( mode_lib->vba.DSCInputBitPerComponent[k], @@ -4399,7 +4398,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k]; locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k]; - if (locals->ODMCombineEnablePerState[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { locals->SwathWidthYThisState[k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])); } else { @@ -4451,7 +4450,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->PSCL_FACTOR, locals->PSCL_FACTOR_CHROMA, locals->RequiredDPPCLKThisState, - &mode_lib->vba.ProjectedDCFCLKDeepSleep); + &mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]); for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 @@ -4496,7 +4495,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->PTERequestSizeC, locals->dpde0_bytes_per_frame_ub_c, locals->meta_pte_bytes_per_frame_ub_c); - locals->PrefetchLinesC[k] = CalculatePrefetchSourceLines( + locals->PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k]/2, mode_lib->vba.VTAPsChroma[k], @@ -4511,7 +4510,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; mode_lib->vba.MetaRowBytesC = 0.0; mode_lib->vba.DPTEBytesPerRowC = 0.0; - locals->PrefetchLinesC[k] = 0.0; + locals->PrefetchLinesC[0][0][k] = 0.0; locals->PTEBufferSizeNotExceededC[i][j][k] = true; locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; } @@ -4552,7 +4551,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->PTERequestSizeY, locals->dpde0_bytes_per_frame_ub_l, locals->meta_pte_bytes_per_frame_ub_l); - locals->PrefetchLinesY[k] = CalculatePrefetchSourceLines( + locals->PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines( mode_lib, mode_lib->vba.VRatio[k], mode_lib->vba.vtaps[k], @@ -4562,10 +4561,10 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.ViewportYStartY[k], &locals->PrefillY[k], &locals->MaxNumSwY[k]); - locals->PDEAndMetaPTEBytesPerFrame[k] = + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] = mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; - locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; - locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; CalculateActiveRowBandwidth( mode_lib->vba.GPUVMEnable, @@ -4591,7 +4590,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.PixelChunkSizeInKByte, locals->TotalNumberOfDCCActiveDPP[i][j], mode_lib->vba.MetaChunkSize, - locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0], mode_lib->vba.GPUVMEnable, mode_lib->vba.HostVMEnable, mode_lib->vba.NumberOfActivePlanes, @@ -4602,7 +4601,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.HostVMMaxPageTableLevels, mode_lib->vba.HostVMCachedPageTableLevels); - mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { if (mode_lib->vba.WritebackEnable[k] == true) { @@ -4644,15 +4643,15 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } } - mode_lib->vba.MaxMaxVStartup = 0; + mode_lib->vba.MaxMaxVStartup[0][0] = 0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); - mode_lib->vba.MaxMaxVStartup = dml_max(mode_lib->vba.MaxMaxVStartup, locals->MaximumVStartup[k]); + mode_lib->vba.MaxMaxVStartup[0][0] = dml_max(mode_lib->vba.MaxMaxVStartup[0][0], locals->MaximumVStartup[0][0][k]); } mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode; - mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup; + mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0]; do { mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode; mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup; @@ -4693,7 +4692,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k]; myPipe.DISPCLK = locals->RequiredDISPCLK[i][j]; myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep; + myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]; myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k]; myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; myPipe.SourceScan = mode_lib->vba.SourceScan[k]; @@ -4727,8 +4726,8 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.MaxInterDCNTileRepeaters, - dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[k]), - locals->MaximumVStartup[k], + dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]), + locals->MaximumVStartup[0][0][k], mode_lib->vba.GPUVMMaxPageTableLevels, mode_lib->vba.GPUVMEnable, &myHostVM, @@ -4739,15 +4738,15 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentLatency, mode_lib->vba.ExtraLatency, mode_lib->vba.TimeCalc, - locals->PDEAndMetaPTEBytesPerFrame[k], - locals->MetaRowBytes[k], - locals->DPTEBytesPerRow[k], - locals->PrefetchLinesY[k], + locals->PDEAndMetaPTEBytesPerFrame[0][0][k], + locals->MetaRowBytes[0][0][k], + locals->DPTEBytesPerRow[0][0][k], + locals->PrefetchLinesY[0][0][k], locals->SwathWidthYThisState[k], locals->BytePerPixelInDETY[k], locals->PrefillY[k], locals->MaxNumSwY[k], - locals->PrefetchLinesC[k], + locals->PrefetchLinesC[0][0][k], locals->BytePerPixelInDETC[k], locals->PrefillC[k], locals->MaxNumSwC[k], @@ -4836,14 +4835,14 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); } - locals->BandwidthWithoutPrefetchSupported[i] = true; - if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i] + locals->BandwidthWithoutPrefetchSupported[i][0] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0] || locals->NotEnoughUrgentLatencyHiding == 1) { - locals->BandwidthWithoutPrefetchSupported[i] = false; + locals->BandwidthWithoutPrefetchSupported[i][0] = false; } locals->PrefetchSupported[i][j] = true; - if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i] + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0] || locals->NotEnoughUrgentLatencyHiding == 1 || locals->NotEnoughUrgentLatencyHidingPre == 1) { locals->PrefetchSupported[i][j] = false; @@ -4872,17 +4871,17 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) { - mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup; + mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0]; mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1; } else { mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1; } } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) - && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup + && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0] || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode)); if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) { - mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i]; + mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip - dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] @@ -4895,7 +4894,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.TotImmediateFlipBytes = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes - + locals->PDEAndMetaPTEBytesPerFrame[k] + locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]; + + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] + locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k]; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4910,9 +4909,9 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.HostVMMaxPageTableLevels, mode_lib->vba.HostVMCachedPageTableLevels, mode_lib->vba.GPUVMEnable, - locals->PDEAndMetaPTEBytesPerFrame[k], - locals->MetaRowBytes[k], - locals->DPTEBytesPerRow[k], + locals->PDEAndMetaPTEBytesPerFrame[0][0][k], + locals->MetaRowBytes[0][0][k], + locals->DPTEBytesPerRow[0][0][k], mode_lib->vba.BandwidthAvailableForImmediateFlip, mode_lib->vba.TotImmediateFlipBytes, mode_lib->vba.SourcePixelFormat[k], @@ -4943,7 +4942,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } locals->ImmediateFlipSupportedForState[i][j] = true; if (mode_lib->vba.total_dcn_read_bw_with_flip - > locals->ReturnBWPerState[i]) { + > locals->ReturnBWPerState[i][0]) { locals->ImmediateFlipSupportedForState[i][j] = false; } for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { @@ -4970,7 +4969,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.WritebackInterfaceChromaBufferSize, mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels, - locals->ReturnBWPerState[i], + locals->ReturnBWPerState[i][0], mode_lib->vba.GPUVMEnable, locals->dpte_group_bytes, mode_lib->vba.MetaChunkSize, @@ -4982,7 +4981,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DRAMClockChangeLatency, mode_lib->vba.SRExitTime, mode_lib->vba.SREnterPlusExitTime, - mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0], locals->NoOfDPPThisState, mode_lib->vba.DCCEnable, locals->RequiredDPPCLKThisState, @@ -5025,8 +5024,8 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k]; } for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) { - locals->MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min( - locals->IdealSDPPortBandwidthPerState[i] * + locals->MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min( + locals->IdealSDPPortBandwidthPerState[i][0] * mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100.0, mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels * @@ -5034,10 +5033,10 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100.0); - if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i]) { - locals->TotalVerticalActiveBandwidthSupport[i] = true; + if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i][0]) { + locals->TotalVerticalActiveBandwidthSupport[i][0] = true; } else { - locals->TotalVerticalActiveBandwidthSupport[i] = false; + locals->TotalVerticalActiveBandwidthSupport[i][0] = false; } } } @@ -5116,7 +5115,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_SCALE_RATIO_TAP; } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { status = DML_FAIL_SOURCE_PIXEL_FORMAT; - } else if (locals->ViewportSizeSupport[i] != true) { + } else if (locals->ViewportSizeSupport[i][0] != true) { status = DML_FAIL_VIEWPORT_SIZE; } else if (locals->DIOSupport[i] != true) { status = DML_FAIL_DIO_SUPPORT; @@ -5124,7 +5123,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_NOT_ENOUGH_DSC; } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) { status = DML_FAIL_DSC_CLK_REQUIRED; - } else if (locals->ROBSupport[i] != true) { + } else if (locals->ROBSupport[i][0] != true) { status = DML_FAIL_REORDERING_BUFFER; } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { status = DML_FAIL_DISPCLK_DPPCLK; @@ -5142,7 +5141,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l status = DML_FAIL_CURSOR_SUPPORT; } else if (mode_lib->vba.PitchSupport != true) { status = DML_FAIL_PITCH_SUPPORT; - } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { + } else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) { status = DML_FAIL_TOTAL_V_ACTIVE_BW; } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) { status = DML_FAIL_PTE_BUFFER_SIZE; @@ -5198,7 +5197,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; - mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { if (mode_lib->vba.BlendingAndTiming[k] == k) { mode_lib->vba.ODMCombineEnabled[k] = @@ -5227,7 +5226,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double UrgentOutOfOrderReturn, double ReturnBW, bool GPUVMEnable, - long dpte_group_bytes[], + int dpte_group_bytes[], unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, @@ -5241,13 +5240,13 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( int DPPPerPlane[], bool DCCEnable[], double DPPCLK[], - unsigned int SwathWidthSingleDPPY[], + double SwathWidthSingleDPPY[], unsigned int SwathHeightY[], double ReadBandwidthPlaneLuma[], unsigned int SwathHeightC[], double ReadBandwidthPlaneChroma[], unsigned int LBBitPerPixel[], - unsigned int SwathWidthY[], + double SwathWidthY[], double HRatio[], unsigned int vtaps[], unsigned int VTAPsChroma[], @@ -5503,7 +5502,7 @@ static void CalculateDCFCLKDeepSleep( double BytePerPixelDETY[], double BytePerPixelDETC[], double VRatio[], - unsigned int SwathWidthY[], + double SwathWidthY[], int DPPPerPlane[], double HRatio[], double PixelClock[], @@ -5831,7 +5830,7 @@ static void CalculateMetaAndPTETimes( unsigned int meta_row_height[], unsigned int meta_req_width[], unsigned int meta_req_height[], - long dpte_group_bytes[], + int dpte_group_bytes[], unsigned int PTERequestSizeY[], unsigned int PTERequestSizeC[], unsigned int PixelPTEReqWidthY[], @@ -6087,7 +6086,7 @@ static double CalculateExtraLatency( bool HostVMEnable, int NumberOfActivePlanes, int NumberOfDPP[], - long dpte_group_bytes[], + int dpte_group_bytes[], double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, int HostVMMaxPageTableLevels, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h index 55d4cb23a073..bfc2f39bd1ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h @@ -85,7 +85,7 @@ enum dm_swizzle_mode { dm_sw_var_s_x = 29, dm_sw_var_d_x = 30, dm_sw_64kb_r_x, - dm_sw_gfx7_2d_thin_lvp, + dm_sw_gfx7_2d_thin_l_vp, dm_sw_gfx7_2d_thin_gl, }; enum lb_depth { @@ -119,6 +119,10 @@ enum mpc_combine_affinity { dm_mpc_never }; +enum RequestType { + REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA +}; + enum self_refresh_affinity { dm_try_to_allow_self_refresh_and_mclk_switch, dm_allow_self_refresh_and_mclk_switch, @@ -165,4 +169,16 @@ enum odm_combine_mode { dm_odm_combine_mode_4to1, }; +enum odm_combine_policy { + dm_odm_combine_policy_dal, + dm_odm_combine_policy_none, + dm_odm_combine_policy_2to1, + dm_odm_combine_policy_4to1, +}; + +enum immediate_flip_requirement { + dm_immediate_flip_not_required, + dm_immediate_flip_required, +}; + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index dbf6a021d0d8..658f81e757e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -63,6 +63,7 @@ struct _vcs_dpi_voltage_scaling_st { double dispclk_mhz; double phyclk_mhz; double dppclk_mhz; + double dtbclk_mhz; }; struct _vcs_dpi_soc_bounding_box_st { @@ -214,6 +215,7 @@ struct _vcs_dpi_display_pipe_source_params_st { int source_format; unsigned char dcc; unsigned int dcc_rate; + unsigned int dcc_rate_chroma; unsigned char dcc_use_global; unsigned char vm; bool gpuvm; // gpuvm enabled @@ -225,7 +227,10 @@ struct _vcs_dpi_display_pipe_source_params_st { int source_scan; int sw_mode; int macro_tile_size; + unsigned int surface_width_y; unsigned int surface_height_y; + unsigned int surface_width_c; + unsigned int surface_height_c; unsigned int viewport_width; unsigned int viewport_height; unsigned int viewport_y_y; @@ -324,7 +329,7 @@ struct _vcs_dpi_display_pipe_dest_params_st { double pixel_rate_mhz; unsigned char synchronized_vblank_all_planes; unsigned char otg_inst; - unsigned char odm_combine; + unsigned int odm_combine; unsigned char use_maximum_vstartup; unsigned int vtotal_max; unsigned int vtotal_min; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 66ca014a6b92..b3c96d9b472f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -264,7 +264,10 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib) mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mts; //mode_lib->vba.DRAMSpeedPerState[i] = soc->clock_limits[i].dram_speed_mhz; mode_lib->vba.MaxDispclk[i] = soc->clock_limits[i].dispclk_mhz; + mode_lib->vba.DTBCLKPerState[i] = soc->clock_limits[i].dtbclk_mhz; } + mode_lib->vba.MinVoltageLevel = 0; + mode_lib->vba.MaxVoltageLevel = mode_lib->vba.soc.num_states; mode_lib->vba.DoUrgentLatencyAdjustment = soc->do_urgent_latency_adjustment; @@ -306,8 +309,6 @@ static void fetch_ip_params(struct display_mode_lib *mode_lib) mode_lib->vba.WritebackInterfaceBufferSize = ip->writeback_interface_buffer_size_kbytes; mode_lib->vba.WritebackLineBufferSize = ip->writeback_line_buffer_buffer_size; - mode_lib->vba.MinVoltageLevel = 0; - mode_lib->vba.MaxVoltageLevel = 5; mode_lib->vba.WritebackChromaLineBufferWidth = ip->writeback_chroma_line_buffer_width_pixels; @@ -423,8 +424,8 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) ip->dcc_supported : src->dcc && ip->dcc_supported; mode_lib->vba.DCCRate[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate; /* TODO: Needs to be set based on src->dcc_rate_luma/chroma */ - mode_lib->vba.DCCRateLuma[mode_lib->vba.NumberOfActivePlanes] = 0; - mode_lib->vba.DCCRateChroma[mode_lib->vba.NumberOfActivePlanes] = 0; + mode_lib->vba.DCCRateLuma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate; + mode_lib->vba.DCCRateChroma[mode_lib->vba.NumberOfActivePlanes] = src->dcc_rate_chroma; mode_lib->vba.SourcePixelFormat[mode_lib->vba.NumberOfActivePlanes] = (enum source_format_class) (src->source_format); @@ -436,8 +437,6 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) dst->recout_width; // TODO: or should this be full_recout_width???...maybe only when in hsplit mode? mode_lib->vba.ODMCombineEnabled[mode_lib->vba.NumberOfActivePlanes] = dst->odm_combine; - mode_lib->vba.ODMCombineTypeEnabled[mode_lib->vba.NumberOfActivePlanes] = - dst->odm_combine; mode_lib->vba.OutputFormat[mode_lib->vba.NumberOfActivePlanes] = (enum output_format_class) (dout->output_format); mode_lib->vba.OutputBpp[mode_lib->vba.NumberOfActivePlanes] = @@ -590,6 +589,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) for (k = j + 1; k < mode_lib->vba.cache_num_pipes; ++k) { display_pipe_source_params_st *src_k = &pipes[k].pipe.src; display_pipe_dest_params_st *dst_k = &pipes[k].pipe.dest; + display_output_params_st *dout_k = &pipes[j].dout; if (src_k->is_hsplit && !visited[k] && src->hsplit_grp == src_k->hsplit_grp) { @@ -600,12 +600,18 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) == dm_horz) { mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] += src_k->viewport_width; + mode_lib->vba.ViewportWidthChroma[mode_lib->vba.NumberOfActivePlanes] += + src_k->viewport_width; mode_lib->vba.ScalerRecoutWidth[mode_lib->vba.NumberOfActivePlanes] += dst_k->recout_width; } else { mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] += src_k->viewport_height; + mode_lib->vba.ViewportHeightChroma[mode_lib->vba.NumberOfActivePlanes] += + src_k->viewport_height; } + mode_lib->vba.NumberOfDSCSlices[mode_lib->vba.NumberOfActivePlanes] += + dout_k->dsc_slices; visited[k] = true; } @@ -811,7 +817,9 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib) unsigned int total_pipes = 0; mode_lib->vba.VoltageLevel = mode_lib->vba.cache_pipes[0].clks_cfg.voltage; - mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]; + if (mode_lib->vba.ReturnBW == 0) + mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBWPerState[mode_lib->vba.VoltageLevel][0]; mode_lib->vba.FabricAndDRAMBandwidth = mode_lib->vba.FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; fetch_socbb_params(mode_lib); diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 3eb657ed5714..e7a44df676ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -157,6 +157,7 @@ struct vba_vars_st { bool DummyPStateCheck; bool DRAMClockChangeSupportsVActive; bool PrefetchModeSupported; + bool PrefetchAndImmediateFlipSupported; enum self_refresh_affinity AllowDRAMSelfRefreshOrDRAMClockChangeInVblank; // Mode Support only double XFCRemoteSurfaceFlipDelay; double TInitXFill; @@ -318,8 +319,7 @@ struct vba_vars_st { unsigned int DynamicMetadataTransmittedBytes[DC__NUM_DPP__MAX]; double DCCRate[DC__NUM_DPP__MAX]; double AverageDCCCompressionRate; - bool ODMCombineEnabled[DC__NUM_DPP__MAX]; - enum odm_combine_mode ODMCombineTypeEnabled[DC__NUM_DPP__MAX]; + enum odm_combine_mode ODMCombineEnabled[DC__NUM_DPP__MAX]; double OutputBpp[DC__NUM_DPP__MAX]; bool DSCEnabled[DC__NUM_DPP__MAX]; unsigned int DSCInputBitPerComponent[DC__NUM_DPP__MAX]; @@ -347,6 +347,7 @@ struct vba_vars_st { unsigned int EffectiveLBLatencyHidingSourceLinesChroma; double BandwidthAvailableForImmediateFlip; unsigned int PrefetchMode[DC__VOLTAGE_STATES + 1][2]; + unsigned int PrefetchModePerState[DC__VOLTAGE_STATES + 1][2]; unsigned int MinPrefetchMode; unsigned int MaxPrefetchMode; bool AnyLinesForVMOrRowTooLarge; @@ -396,6 +397,7 @@ struct vba_vars_st { bool WritebackLumaAndChromaScalingSupported; bool Cursor64BppSupport; double DCFCLKPerState[DC__VOLTAGE_STATES + 1]; + double DCFCLKState[DC__VOLTAGE_STATES + 1][2]; double FabricClockPerState[DC__VOLTAGE_STATES + 1]; double SOCCLKPerState[DC__VOLTAGE_STATES + 1]; double PHYCLKPerState[DC__VOLTAGE_STATES + 1]; @@ -444,7 +446,7 @@ struct vba_vars_st { double OutputLinkDPLanes[DC__NUM_DPP__MAX]; double ForcedOutputLinkBPP[DC__NUM_DPP__MAX]; // Mode Support only double ImmediateFlipBW[DC__NUM_DPP__MAX]; - double MaxMaxVStartup; + double MaxMaxVStartup[DC__VOLTAGE_STATES + 1][2]; double WritebackLumaVExtra; double WritebackChromaVExtra; @@ -471,7 +473,7 @@ struct vba_vars_st { double RoundedUpMaxSwathSizeBytesC; double EffectiveDETLBLinesLuma; double EffectiveDETLBLinesChroma; - double ProjectedDCFCLKDeepSleep; + double ProjectedDCFCLKDeepSleep[DC__VOLTAGE_STATES + 1][2]; double PDEAndMetaPTEBytesPerFrameY; double PDEAndMetaPTEBytesPerFrameC; unsigned int MetaRowBytesY; @@ -489,12 +491,11 @@ struct vba_vars_st { double FractionOfUrgentBandwidthImmediateFlip; // Mode Support debugging output /* ms locals */ - double IdealSDPPortBandwidthPerState[DC__VOLTAGE_STATES + 1]; + double IdealSDPPortBandwidthPerState[DC__VOLTAGE_STATES + 1][2]; unsigned int NoOfDPP[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; int NoOfDPPThisState[DC__NUM_DPP__MAX]; - bool ODMCombineEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - enum odm_combine_mode ODMCombineTypeEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - unsigned int SwathWidthYThisState[DC__NUM_DPP__MAX]; + enum odm_combine_mode ODMCombineEnablePerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; + double SwathWidthYThisState[DC__NUM_DPP__MAX]; unsigned int SwathHeightCPerState[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; unsigned int SwathHeightYThisState[DC__NUM_DPP__MAX]; unsigned int SwathHeightCThisState[DC__NUM_DPP__MAX]; @@ -506,7 +507,7 @@ struct vba_vars_st { double RequiredDPPCLKThisState[DC__NUM_DPP__MAX]; bool PTEBufferSizeNotExceededY[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; bool PTEBufferSizeNotExceededC[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; - bool BandwidthWithoutPrefetchSupported[DC__VOLTAGE_STATES + 1]; + bool BandwidthWithoutPrefetchSupported[DC__VOLTAGE_STATES + 1][2]; bool PrefetchSupported[DC__VOLTAGE_STATES + 1][2]; bool VRatioInPrefetchSupported[DC__VOLTAGE_STATES + 1][2]; double RequiredDISPCLK[DC__VOLTAGE_STATES + 1][2]; @@ -515,22 +516,22 @@ struct vba_vars_st { unsigned int TotalNumberOfActiveDPP[DC__VOLTAGE_STATES + 1][2]; unsigned int TotalNumberOfDCCActiveDPP[DC__VOLTAGE_STATES + 1][2]; bool ModeSupport[DC__VOLTAGE_STATES + 1][2]; - double ReturnBWPerState[DC__VOLTAGE_STATES + 1]; + double ReturnBWPerState[DC__VOLTAGE_STATES + 1][2]; bool DIOSupport[DC__VOLTAGE_STATES + 1]; bool NotEnoughDSCUnits[DC__VOLTAGE_STATES + 1]; bool DSCCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1]; bool DTBCLKRequiredMoreThanSupported[DC__VOLTAGE_STATES + 1]; double UrgentRoundTripAndOutOfOrderLatencyPerState[DC__VOLTAGE_STATES + 1]; - bool ROBSupport[DC__VOLTAGE_STATES + 1]; + bool ROBSupport[DC__VOLTAGE_STATES + 1][2]; bool PTEBufferSizeNotExceeded[DC__VOLTAGE_STATES + 1][2]; - bool TotalVerticalActiveBandwidthSupport[DC__VOLTAGE_STATES + 1]; - double MaxTotalVerticalActiveAvailableBandwidth[DC__VOLTAGE_STATES + 1]; + bool TotalVerticalActiveBandwidthSupport[DC__VOLTAGE_STATES + 1][2]; + double MaxTotalVerticalActiveAvailableBandwidth[DC__VOLTAGE_STATES + 1][2]; double PrefetchBW[DC__NUM_DPP__MAX]; - double PDEAndMetaPTEBytesPerFrame[DC__NUM_DPP__MAX]; - double MetaRowBytes[DC__NUM_DPP__MAX]; - double DPTEBytesPerRow[DC__NUM_DPP__MAX]; - double PrefetchLinesY[DC__NUM_DPP__MAX]; - double PrefetchLinesC[DC__NUM_DPP__MAX]; + double PDEAndMetaPTEBytesPerFrame[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double MetaRowBytes[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double DPTEBytesPerRow[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double PrefetchLinesY[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double PrefetchLinesC[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; unsigned int MaxNumSwY[DC__NUM_DPP__MAX]; unsigned int MaxNumSwC[DC__NUM_DPP__MAX]; double PrefillY[DC__NUM_DPP__MAX]; @@ -539,7 +540,7 @@ struct vba_vars_st { double LinesForMetaPTE[DC__NUM_DPP__MAX]; double LinesForMetaAndDPTERow[DC__NUM_DPP__MAX]; double MinDPPCLKUsingSingleDPP[DC__NUM_DPP__MAX]; - unsigned int SwathWidthYSingleDPP[DC__NUM_DPP__MAX]; + double SwathWidthYSingleDPP[DC__NUM_DPP__MAX]; double BytePerPixelInDETY[DC__NUM_DPP__MAX]; double BytePerPixelInDETC[DC__NUM_DPP__MAX]; bool RequiresDSC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; @@ -547,7 +548,7 @@ struct vba_vars_st { double RequiresFEC[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; double OutputBppPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; double DSCDelayPerState[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; - bool ViewportSizeSupport[DC__VOLTAGE_STATES + 1]; + bool ViewportSizeSupport[DC__VOLTAGE_STATES + 1][2]; unsigned int Read256BlockHeightY[DC__NUM_DPP__MAX]; unsigned int Read256BlockWidthY[DC__NUM_DPP__MAX]; unsigned int Read256BlockHeightC[DC__NUM_DPP__MAX]; @@ -562,7 +563,7 @@ struct vba_vars_st { double WriteBandwidth[DC__NUM_DPP__MAX]; double PSCL_FACTOR[DC__NUM_DPP__MAX]; double PSCL_FACTOR_CHROMA[DC__NUM_DPP__MAX]; - double MaximumVStartup[DC__NUM_DPP__MAX]; + double MaximumVStartup[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; unsigned int MacroTileWidthY[DC__NUM_DPP__MAX]; unsigned int MacroTileWidthC[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitch[DC__NUM_DPP__MAX]; @@ -579,7 +580,7 @@ struct vba_vars_st { bool ImmediateFlipSupportedForState[DC__VOLTAGE_STATES + 1][2]; double WritebackDelay[DC__VOLTAGE_STATES + 1][DC__NUM_DPP__MAX]; unsigned int vm_group_bytes[DC__NUM_DPP__MAX]; - long dpte_group_bytes[DC__NUM_DPP__MAX]; + unsigned int dpte_group_bytes[DC__NUM_DPP__MAX]; unsigned int dpte_row_height[DC__NUM_DPP__MAX]; unsigned int meta_req_height[DC__NUM_DPP__MAX]; unsigned int meta_req_width[DC__NUM_DPP__MAX]; @@ -605,14 +606,14 @@ struct vba_vars_st { double UrgentBurstFactorChroma[DC__NUM_DPP__MAX]; double UrgentBurstFactorChromaPre[DC__NUM_DPP__MAX]; + bool MPCCombine[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; double SwathWidthCSingleDPP[DC__NUM_DPP__MAX]; double MaximumSwathWidthInLineBufferLuma; double MaximumSwathWidthInLineBufferChroma; double MaximumSwathWidthLuma[DC__NUM_DPP__MAX]; double MaximumSwathWidthChroma[DC__NUM_DPP__MAX]; - bool odm_combine_dummy[DC__NUM_DPP__MAX]; - enum odm_combine_mode odm_combine_mode_dummy[DC__NUM_DPP__MAX]; + enum odm_combine_mode odm_combine_dummy[DC__NUM_DPP__MAX]; double dummy1[DC__NUM_DPP__MAX]; double dummy2[DC__NUM_DPP__MAX]; double dummy3[DC__NUM_DPP__MAX]; @@ -622,9 +623,9 @@ struct vba_vars_st { double dummy7[DC__NUM_DPP__MAX]; double dummy8[DC__NUM_DPP__MAX]; unsigned int dummyinteger1ms[DC__NUM_DPP__MAX]; - unsigned int dummyinteger2ms[DC__NUM_DPP__MAX]; + double dummyinteger2ms[DC__NUM_DPP__MAX]; unsigned int dummyinteger3[DC__NUM_DPP__MAX]; - unsigned int dummyinteger4; + unsigned int dummyinteger4[DC__NUM_DPP__MAX]; unsigned int dummyinteger5; unsigned int dummyinteger6; unsigned int dummyinteger7; @@ -637,7 +638,6 @@ struct vba_vars_st { unsigned int dummyintegerarr2[DC__NUM_DPP__MAX]; unsigned int dummyintegerarr3[DC__NUM_DPP__MAX]; unsigned int dummyintegerarr4[DC__NUM_DPP__MAX]; - long dummylongarr1[DC__NUM_DPP__MAX]; bool dummysinglestring; bool SingleDPPViewportSizeSupportPerPlane[DC__NUM_DPP__MAX]; double PlaneRequiredDISPCLKWithODMCombine2To1; @@ -645,20 +645,19 @@ struct vba_vars_st { unsigned int TotalNumberOfSingleDPPPlanes[DC__VOLTAGE_STATES + 1][2]; bool LinkDSCEnable; bool ODMCombine4To1SupportCheckOK[DC__VOLTAGE_STATES + 1]; - bool ODMCombineEnableThisState[DC__NUM_DPP__MAX]; - enum odm_combine_mode ODMCombineEnableTypeThisState[DC__NUM_DPP__MAX]; - unsigned int SwathWidthCThisState[DC__NUM_DPP__MAX]; + enum odm_combine_mode ODMCombineEnableThisState[DC__NUM_DPP__MAX]; + double SwathWidthCThisState[DC__NUM_DPP__MAX]; bool ViewportSizeSupportPerPlane[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitchY[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitchC[DC__NUM_DPP__MAX]; unsigned int NotEnoughUrgentLatencyHiding; unsigned int NotEnoughUrgentLatencyHidingPre; - long PTEBufferSizeInRequestsForLuma; - long PTEBufferSizeInRequestsForChroma; + int PTEBufferSizeInRequestsForLuma; + int PTEBufferSizeInRequestsForChroma; // Missing from VBA - long dpte_group_bytes_chroma; + int dpte_group_bytes_chroma; unsigned int vm_group_bytes_chroma; double dst_x_after_scaler; double dst_y_after_scaler; @@ -683,8 +682,8 @@ struct vba_vars_st { double MinTTUVBlank[DC__NUM_DPP__MAX]; double BytePerPixelDETY[DC__NUM_DPP__MAX]; double BytePerPixelDETC[DC__NUM_DPP__MAX]; - unsigned int SwathWidthY[DC__NUM_DPP__MAX]; - unsigned int SwathWidthSingleDPPY[DC__NUM_DPP__MAX]; + double SwathWidthY[DC__NUM_DPP__MAX]; + double SwathWidthSingleDPPY[DC__NUM_DPP__MAX]; double CursorRequestDeliveryTime[DC__NUM_DPP__MAX]; double CursorRequestDeliveryTimePrefetch[DC__NUM_DPP__MAX]; double ReadBandwidthPlaneLuma[DC__NUM_DPP__MAX]; @@ -760,8 +759,8 @@ struct vba_vars_st { double LinesInDETY[DC__NUM_DPP__MAX]; double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; - unsigned int SwathWidthSingleDPPC[DC__NUM_DPP__MAX]; - unsigned int SwathWidthC[DC__NUM_DPP__MAX]; + double SwathWidthSingleDPPC[DC__NUM_DPP__MAX]; + double SwathWidthC[DC__NUM_DPP__MAX]; unsigned int BytePerPixelY[DC__NUM_DPP__MAX]; unsigned int BytePerPixelC[DC__NUM_DPP__MAX]; long dummyinteger1; @@ -779,6 +778,7 @@ struct vba_vars_st { unsigned int DCCCMaxCompressedBlock[DC__NUM_DPP__MAX]; unsigned int DCCCIndependent64ByteBlock[DC__NUM_DPP__MAX]; double VStartupMargin; + bool NotEnoughTimeForDynamicMetadata; /* Missing from VBA */ unsigned int MaximumMaxVStartupLines; @@ -814,7 +814,7 @@ struct vba_vars_st { unsigned int ViewportHeightChroma[DC__NUM_DPP__MAX]; double HRatioChroma[DC__NUM_DPP__MAX]; double VRatioChroma[DC__NUM_DPP__MAX]; - long WritebackSourceWidth[DC__NUM_DPP__MAX]; + int WritebackSourceWidth[DC__NUM_DPP__MAX]; bool ModeIsSupported; bool ODMCombine4To1Supported; @@ -850,6 +850,58 @@ struct vba_vars_st { unsigned int MaxNumHDMIFRLOutputs; int AudioSampleRate[DC__NUM_DPP__MAX]; int AudioSampleLayout[DC__NUM_DPP__MAX]; + + int PercentMarginOverMinimumRequiredDCFCLK; + bool DynamicMetadataSupported[DC__VOLTAGE_STATES + 1][2]; + enum immediate_flip_requirement ImmediateFlipRequirement; + double DETBufferSizeYThisState[DC__NUM_DPP__MAX]; + double DETBufferSizeCThisState[DC__NUM_DPP__MAX]; + bool NoUrgentLatencyHiding[DC__NUM_DPP__MAX]; + bool NoUrgentLatencyHidingPre[DC__NUM_DPP__MAX]; + int swath_width_luma_ub_this_state[DC__NUM_DPP__MAX]; + int swath_width_chroma_ub_this_state[DC__NUM_DPP__MAX]; + double UrgLatency[DC__VOLTAGE_STATES + 1]; + double VActiveCursorBandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double VActivePixelBandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + bool NoTimeForPrefetch[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + bool NoTimeForDynamicMetadata[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double dpte_row_bandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double meta_row_bandwidth[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double DETBufferSizeYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double DETBufferSizeCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + int swath_width_luma_ub_all_states[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + int swath_width_chroma_ub_all_states[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + bool NotUrgentLatencyHiding[DC__VOLTAGE_STATES + 1][2]; + unsigned int SwathHeightYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + unsigned int SwathHeightCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + unsigned int SwathWidthYAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + unsigned int SwathWidthCAllStates[DC__VOLTAGE_STATES + 1][2][DC__NUM_DPP__MAX]; + double TotalDPTERowBandwidth[DC__VOLTAGE_STATES + 1][2]; + double TotalMetaRowBandwidth[DC__VOLTAGE_STATES + 1][2]; + double TotalVActiveCursorBandwidth[DC__VOLTAGE_STATES + 1][2]; + double TotalVActivePixelBandwidth[DC__VOLTAGE_STATES + 1][2]; + bool UseMinimumRequiredDCFCLK; + double WritebackDelayTime[DC__NUM_DPP__MAX]; + unsigned int DCCYIndependentBlock[DC__NUM_DPP__MAX]; + unsigned int DCCCIndependentBlock[DC__NUM_DPP__MAX]; + unsigned int dummyinteger15; + unsigned int dummyinteger16; + unsigned int dummyinteger17; + unsigned int dummyinteger18; + unsigned int dummyinteger19; + unsigned int dummyinteger20; + unsigned int dummyinteger21; + unsigned int dummyinteger22; + unsigned int dummyinteger23; + unsigned int dummyinteger24; + unsigned int dummyinteger25; + unsigned int dummyinteger26; + unsigned int dummyinteger27; + unsigned int dummyinteger28; + unsigned int dummyinteger29; + bool dummystring[DC__NUM_DPP__MAX]; + double BPP; + enum odm_combine_policy ODMCombinePolicy; }; bool CalculateMinAndMaxPrefetchMode( diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c index b953b02a1512..723af0b2dda0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c @@ -24,7 +24,7 @@ */ #include "dml_common_defs.h" -#include "../calcs/dcn_calc_math.h" +#include "dcn_calc_math.h" #include "dml_inline_defs.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h index eca140da13d8..ded71ea82413 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h @@ -27,7 +27,7 @@ #define __DML_INLINE_DEFS_H__ #include "dml_common_defs.h" -#include "../calcs/dcn_calc_math.h" +#include "dcn_calc_math.h" #include "dml_logger.h" static inline double dml_min(double a, double b) diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 16f6ef22367b..f285b76888fb 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -212,6 +212,7 @@ struct resource_pool { struct abm *abm; struct dmcu *dmcu; + struct dmub_psr *psr; const struct resource_funcs *funcs; const struct resource_caps *res_cap; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index 6198bccd6199..8b1f0ce6c2a7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -76,6 +76,8 @@ void dp_enable_mst_on_sink(struct dc_link *link, bool enable); enum dp_panel_mode dp_get_panel_mode(struct dc_link *link); void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode); +bool dp_overwrite_extended_receiver_cap(struct dc_link *link); + void dp_set_fec_ready(struct dc_link *link, bool ready); void dp_set_fec_enable(struct dc_link *link, bool enable); bool dp_set_dsc_enable(struct pipe_ctx *pipe_ctx, bool enable); diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calc_math.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.h rename to drivers/gpu/drm/amd/display/dc/inc/dcn_calc_math.h diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 125e42dbd3c5..45ef390ae052 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -47,6 +47,26 @@ struct dpp_input_csc_matrix { uint16_t regval[12]; }; +static const struct dpp_input_csc_matrix dpp_input_csc_matrix[] = { + {COLOR_SPACE_SRGB, + {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, + {COLOR_SPACE_SRGB_LIMITED, + {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, + {COLOR_SPACE_YCBCR601, + {0x2cdd, 0x2000, 0, 0xe991, 0xe926, 0x2000, 0xf4fd, 0x10ef, + 0, 0x2000, 0x38b4, 0xe3a6} }, + {COLOR_SPACE_YCBCR601_LIMITED, + {0x3353, 0x2568, 0, 0xe400, 0xe5dc, 0x2568, 0xf367, 0x1108, + 0, 0x2568, 0x40de, 0xdd3a} }, + {COLOR_SPACE_YCBCR709, + {0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0, + 0x2000, 0x3b61, 0xe24f} }, + + {COLOR_SPACE_YCBCR709_LIMITED, + {0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0, + 0x2568, 0x43ee, 0xdbb2} } +}; + struct dpp_grph_csc_adjustment { struct fixed31_32 temperature_matrix[CSC_TEMPERATURE_MATRIX_SIZE]; enum graphics_gamut_adjust_type gamut_adjust_type; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 686145933335..2cb8466e657b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -63,6 +63,26 @@ struct hubp { bool power_gated; }; +struct surface_flip_registers { + uint32_t DCSURF_SURFACE_CONTROL; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C; + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; + uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS; + uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS; + bool tmz_surface; + bool immediate; + uint8_t vmid; + bool grph_stereo; +}; + struct hubp_funcs { void (*hubp_setup)( struct hubp *hubp, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 2d3efd71fa51..e5e7d94026fc 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -208,7 +208,8 @@ struct timing_generator_funcs { bool enable, const struct dc_crtc_timing *timing); void (*set_drr)(struct timing_generator *tg, const struct drr_params *params); void (*set_static_screen_control)(struct timing_generator *tg, - uint32_t value); + uint32_t event_triggers, + uint32_t num_frames); void (*set_test_pattern)( struct timing_generator *tg, enum controller_dp_test_pattern test_pattern, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index df3204645c6b..209118f9f193 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -42,7 +42,7 @@ struct dc_state; struct dc_stream_status; struct dc_writeback_info; struct dchub_init_data; -struct dc_static_screen_events; +struct dc_static_screen_params; struct resource_pool; struct dc_phy_addr_space_config; struct dc_virtual_addr_space_config; @@ -102,7 +102,7 @@ struct hw_sequencer_funcs { unsigned int vmid, unsigned int vmid_frame_number); void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx, int num_pipes, - const struct dc_static_screen_events *events); + const struct dc_static_screen_params *events); /* Stream Related */ void (*enable_stream)(struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h index 8ba06f015975..ecf566378ccd 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h @@ -51,7 +51,7 @@ struct dc_state; struct dc_stream_status; struct dc_writeback_info; struct dchub_init_data; -struct dc_static_screen_events; +struct dc_static_screen_params; struct resource_pool; struct resource_context; struct stream_resource; diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h index 47e307388581..2470405e996b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h +++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h @@ -458,7 +458,14 @@ uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr, #define IX_REG_READ(index_reg_name, data_reg_name, index) \ generic_read_indirect_reg(CTX, REG(index_reg_name), REG(data_reg_name), IND_REG(index)) +#define IX_REG_GET_N(index_reg_name, data_reg_name, index, n, ...) \ + generic_indirect_reg_get(CTX, REG(index_reg_name), REG(data_reg_name), \ + IND_REG(index), \ + n, __VA_ARGS__) +#define IX_REG_GET(index_reg_name, data_reg_name, index, field, val) \ + IX_REG_GET_N(index_reg_name, data_reg_name, index, 1, \ + FN(data_reg_name, field), val) #define IX_REG_UPDATE_N(index_reg_name, data_reg_name, index, n, ...) \ generic_indirect_reg_update_ex(CTX, \ @@ -479,6 +486,12 @@ uint32_t generic_read_indirect_reg(const struct dc_context *ctx, uint32_t addr_index, uint32_t addr_data, uint32_t index); +uint32_t generic_indirect_reg_get(const struct dc_context *ctx, + uint32_t addr_index, uint32_t addr_data, + uint32_t index, int n, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + ...); + uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx, uint32_t addr_index, uint32_t addr_data, uint32_t index, uint32_t reg_val, int n, diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index b10728f33f62..cd9532b4f14d 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -36,6 +36,7 @@ #define DMUB_RB_SIZE (DMUB_RB_CMD_SIZE * DMUB_RB_MAX_ENTRY) #define REG_SET_MASK 0xFFFF + /* * Command IDs should be treated as stable ABI. * Do not reuse or modify IDs. @@ -47,6 +48,7 @@ enum dmub_cmd_type { DMUB_CMD__REG_SEQ_FIELD_UPDATE_SEQ = 2, DMUB_CMD__REG_SEQ_BURST_WRITE = 3, DMUB_CMD__REG_REG_WAIT = 4, + DMUB_CMD__PLAT_54186_WA = 5, DMUB_CMD__PSR = 64, DMUB_CMD__VBIOS = 128, }; @@ -145,6 +147,32 @@ struct dmub_rb_cmd_reg_wait { struct dmub_cmd_reg_wait_data reg_wait; }; +#ifndef PHYSICAL_ADDRESS_LOC +#define PHYSICAL_ADDRESS_LOC union large_integer +#endif + +struct dmub_cmd_PLAT_54186_wa { + uint32_t DCSURF_SURFACE_CONTROL; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; + struct { + uint8_t hubp_inst : 4; + uint8_t tmz_surface : 1; + uint8_t immediate :1; + uint8_t vmid : 4; + uint8_t grph_stereo : 1; + uint32_t reserved : 21; + } flip_params; + uint32_t reserved[9]; +}; + +struct dmub_rb_cmd_PLAT_54186_wa { + struct dmub_cmd_header header; + struct dmub_cmd_PLAT_54186_wa flip; +}; + struct dmub_cmd_digx_encoder_control_data { union dig_encoder_control_parameters_v1_5 dig; }; @@ -187,9 +215,28 @@ struct dmub_rb_cmd_dpphy_init { }; struct dmub_cmd_psr_copy_settings_data { - uint32_t reg1; - uint32_t reg2; - uint32_t reg3; + uint16_t psr_level; + uint8_t hubp_inst; + uint8_t dpp_inst; + uint8_t mpcc_inst; + uint8_t opp_inst; + uint8_t otg_inst; + uint8_t digfe_inst; + uint8_t digbe_inst; + uint8_t dpphy_inst; + uint8_t aux_inst; + uint8_t hyst_frames; + uint8_t hyst_lines; + uint8_t phy_num; + uint8_t phy_type; + uint8_t aux_repeat; + uint8_t smu_optimizations_en; + uint8_t skip_wait_for_pll_lock; + uint8_t frame_delay; + uint8_t smu_phy_id; + uint8_t num_of_controllers; + uint8_t link_rate; + uint8_t frame_cap_ind; }; struct dmub_rb_cmd_psr_copy_settings { @@ -206,31 +253,17 @@ struct dmub_rb_cmd_psr_set_level { struct dmub_cmd_psr_set_level_data psr_set_level_data; }; -struct dmub_rb_cmd_psr_disable { - struct dmub_cmd_header header; -}; - struct dmub_rb_cmd_psr_enable { struct dmub_cmd_header header; }; -struct dmub_cmd_psr_notify_vblank_data { - uint32_t vblank_int; // Which vblank interrupt was triggered +struct dmub_cmd_psr_setup_data { + enum psr_version version; // PSR version 1 or 2 }; -struct dmub_rb_cmd_notify_vblank { +struct dmub_rb_cmd_psr_setup { struct dmub_cmd_header header; - struct dmub_cmd_psr_notify_vblank_data psr_notify_vblank_data; -}; - -struct dmub_cmd_psr_notify_static_state_data { - uint32_t ss_int; // Which static screen interrupt was triggered - uint32_t ss_enter; // Enter (1) or exit (0) static screen -}; - -struct dmub_rb_cmd_psr_notify_static_state { - struct dmub_cmd_header header; - struct dmub_cmd_psr_notify_static_state_data psr_notify_static_state_data; + struct dmub_cmd_psr_setup_data psr_setup_data; }; union dmub_rb_cmd { @@ -245,9 +278,10 @@ union dmub_rb_cmd { struct dmub_rb_cmd_dpphy_init dpphy_init; struct dmub_rb_cmd_dig1_transmitter_control dig1_transmitter_control; struct dmub_rb_cmd_psr_enable psr_enable; - struct dmub_rb_cmd_psr_disable psr_disable; struct dmub_rb_cmd_psr_copy_settings psr_copy_settings; struct dmub_rb_cmd_psr_set_level psr_set_level; + struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa; + struct dmub_rb_cmd_psr_setup psr_setup; }; #pragma pack(pop) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h index 14f13e8a6f3b..7b69eb37f762 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h @@ -32,10 +32,17 @@ */ enum dmub_cmd_psr_type { - DMUB_CMD__PSR_ENABLE = 0, - DMUB_CMD__PSR_DISABLE = 1, - DMUB_CMD__PSR_COPY_SETTINGS = 2, - DMUB_CMD__PSR_SET_LEVEL = 3, + DMUB_CMD__PSR_SETUP = 0, + DMUB_CMD__PSR_COPY_SETTINGS = 1, + DMUB_CMD__PSR_ENABLE = 2, + DMUB_CMD__PSR_DISABLE = 3, + DMUB_CMD__PSR_SET_LEVEL = 4, +}; + +enum psr_version { + PSR_VERSION_1 = 0x10, // PSR Version 1 + PSR_VERSION_2 = 0x20, // PSR Version 2, includes selective update + PSR_VERSION_2_Y_COORD = 0x21, // PSR Version 2, includes Y-coordinate support for SU }; #endif /* _DMUB_CMD_DAL_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h index ac22744eaa94..df875fdd2ab0 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h @@ -73,12 +73,17 @@ static inline bool dmub_rb_full(struct dmub_rb *rb) static inline bool dmub_rb_push_front(struct dmub_rb *rb, const struct dmub_cmd_header *cmd) { - uint8_t *wt_ptr = (uint8_t *)(rb->base_address) + rb->wrpt; + uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + rb->wrpt / sizeof(uint64_t); + const uint64_t *src = (const uint64_t *)cmd; + int i; if (dmub_rb_full(rb)) return false; - dmub_memcpy(wt_ptr, cmd, DMUB_RB_CMD_SIZE); + // copying data + for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) + *dst++ = *src++; + rb->wrpt += DMUB_RB_CMD_SIZE; if (rb->wrpt >= rb->capacity) @@ -113,6 +118,26 @@ static inline bool dmub_rb_pop_front(struct dmub_rb *rb) return true; } +static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) +{ + uint32_t rptr = rb->rptr; + uint32_t wptr = rb->wrpt; + + while (rptr != wptr) { + uint64_t volatile *data = (uint64_t volatile *)rb->base_address + rptr / sizeof(uint64_t); + //uint64_t volatile *p = (uint64_t volatile *)data; + uint64_t temp; + int i; + + for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) + temp = *data++; + + rptr += DMUB_RB_CMD_SIZE; + if (rptr >= rb->capacity) + rptr %= rb->capacity; + } +} + static inline void dmub_rb_init(struct dmub_rb *rb, struct dmub_rb_init_params *init_params) { diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h index f34a50dd36ea..8e23a7017588 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h @@ -444,25 +444,6 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, */ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub); -/** - * dmub_srv_cmd_submit() - submits a command to the DMUB immediately - * @dmub: the dmub service - * @cmd: the command to submit - * @timeout_us: the maximum number of microseconds to wait - * - * Submits a command to the DMUB with an optional timeout. - * If timeout_us is given then the service will attempt to - * resubmit for the given number of microseconds. - * - * Return: - * DMUB_STATUS_OK - success - * DMUB_STATUS_TIMEOUT - wait for submit timed out - * DMUB_STATUS_INVALID - unspecified error - */ -enum dmub_status dmub_srv_cmd_submit(struct dmub_srv *dmub, - const struct dmub_cmd_header *cmd, - uint32_t timeout_us); - /** * dmub_srv_wait_for_auto_load() - Waits for firmware auto load to complete * @dmub: the dmub service diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c index f45e14ada685..cd51c6138894 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c @@ -66,10 +66,12 @@ void dmub_dcn20_reset(struct dmub_srv *dmub) { REG_UPDATE(DMCUB_CNTL, DMCUB_SOFT_RESET, 1); REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); + REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); } void dmub_dcn20_reset_release(struct dmub_srv *dmub) { + REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 0); REG_WRITE(DMCUB_SCRATCH15, dmub->psp_version & 0x001100FF); REG_UPDATE_2(DMCUB_CNTL, DMCUB_ENABLE, 1, DMCUB_TRACEPORT_EN, 1); REG_UPDATE(DMCUB_CNTL, DMCUB_SOFT_RESET, 0); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h index 68af9b190288..53bfd4da69ad 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h @@ -91,7 +91,8 @@ struct dmub_srv; DMUB_SR(DMCUB_SCRATCH13) \ DMUB_SR(DMCUB_SCRATCH14) \ DMUB_SR(DMCUB_SCRATCH15) \ - DMUB_SR(CC_DC_PIPE_DIS) + DMUB_SR(CC_DC_PIPE_DIS) \ + DMUB_SR(MMHUBBUB_SOFT_RESET) #define DMUB_COMMON_FIELDS() \ DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \ @@ -119,7 +120,8 @@ struct dmub_srv; DMUB_SF(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_ENABLE) \ DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_TOP_ADDRESS) \ DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \ - DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) + DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \ + DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) struct dmub_srv_common_reg_offset { #define DMUB_SR(reg) uint32_t reg; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 9a959f871f11..dee676335d73 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -67,6 +67,26 @@ static inline uint32_t dmub_align(uint32_t val, uint32_t factor) return (val + factor - 1) / factor * factor; } +static void dmub_flush_buffer_mem(const struct dmub_fb *fb) +{ + const uint8_t *base = (const uint8_t *)fb->cpu_addr; + uint8_t buf[64]; + uint32_t pos, end; + + /** + * Read 64-byte chunks since we don't want to store a + * large temporary buffer for this purpose. + */ + end = fb->size / sizeof(buf) * sizeof(buf); + + for (pos = 0; pos < end; pos += sizeof(buf)) + dmub_memcpy(buf, base + pos, sizeof(buf)); + + /* Read anything leftover into the buffer. */ + if (end < fb->size) + dmub_memcpy(buf, base + pos, fb->size - end); +} + static const struct dmub_fw_meta_info * dmub_get_fw_meta_info(const uint8_t *fw_bss_data, uint32_t fw_bss_data_size) { @@ -329,6 +349,13 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, cw1.region.base = DMUB_CW1_BASE; cw1.region.top = cw1.region.base + stack_fb->size - 1; + /** + * Read back all the instruction memory so we don't hang the + * DMCUB when backdoor loading if the write from x86 hasn't been + * flushed yet. This only occurs in backdoor loading. + */ + dmub_flush_buffer_mem(inst_fb); + if (params->load_inst_const && dmub->hw_funcs.backdoor_load) dmub->hw_funcs.backdoor_load(dmub, &cw0, &cw1); } @@ -405,33 +432,17 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) if (!dmub->hw_init) return DMUB_STATUS_INVALID; + /** + * Read back all the queued commands to ensure that they've + * been flushed to framebuffer memory. Otherwise DMCUB might + * read back stale, fully invalid or partially invalid data. + */ + dmub_rb_flush_pending(&dmub->inbox1_rb); + dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt); return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_cmd_submit(struct dmub_srv *dmub, - const struct dmub_cmd_header *cmd, - uint32_t timeout_us) -{ - uint32_t i = 0; - - if (!dmub->hw_init) - return DMUB_STATUS_INVALID; - - for (i = 0; i <= timeout_us; ++i) { - dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - if (dmub_rb_push_front(&dmub->inbox1_rb, cmd)) { - dmub->hw_funcs.set_inbox1_wptr(dmub, - dmub->inbox1_rb.wrpt); - return DMUB_STATUS_OK; - } - - udelay(1); - } - - return DMUB_STATUS_TIMEOUT; -} - enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, uint32_t timeout_us) { diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 11d7daf6f076..a2903985b9e8 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -134,8 +134,13 @@ #define PICASSO_A0 0x41 /* DCN1_01 */ #define RAVEN2_A0 0x81 +#define RAVEN2_15D8_REV_94 0x94 +#define RAVEN2_15D8_REV_95 0x95 #define RAVEN2_15D8_REV_E3 0xE3 #define RAVEN2_15D8_REV_E4 0xE4 +#define RAVEN2_15D8_REV_E9 0xE9 +#define RAVEN2_15D8_REV_EA 0xEA +#define RAVEN2_15D8_REV_EB 0xEB #define RAVEN1_F0 0xF0 #define RAVEN_UNKNOWN 0xFF #ifndef ASICREV_IS_RAVEN @@ -149,6 +154,11 @@ #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN)) #define ASICREV_IS_DALI(eChipRev) ((eChipRev == RAVEN2_15D8_REV_E3) \ || (eChipRev == RAVEN2_15D8_REV_E4)) +#define ASICREV_IS_POLLOCK(eChipRev) (eChipRev == RAVEN2_15D8_REV_94 \ + || eChipRev == RAVEN2_15D8_REV_95 \ + || eChipRev == RAVEN2_15D8_REV_E9 \ + || eChipRev == RAVEN2_15D8_REV_EA \ + || eChipRev == RAVEN2_15D8_REV_EB) #define FAMILY_RV 142 /* DCN 1*/ diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index fa57885503d4..6e5ecefe7d9d 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -816,6 +816,8 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, in_out_vrr->btr.inserted_duration_in_us = 0; in_out_vrr->btr.frames_to_insert = 0; in_out_vrr->btr.frame_counter = 0; + in_out_vrr->fixed.fixed_active = false; + in_out_vrr->fixed.target_refresh_in_uhz = 0; in_out_vrr->btr.mid_point_in_us = (in_out_vrr->min_duration_in_us + @@ -832,6 +834,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, in_out_vrr->adjust.v_total_max = stream->timing.v_total; } else if (in_out_vrr->state == VRR_STATE_ACTIVE_VARIABLE && refresh_range >= MIN_REFRESH_RANGE_IN_US) { + in_out_vrr->adjust.v_total_min = calc_v_total_from_refresh(stream, in_out_vrr->max_refresh_in_uhz); diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 4e2f615c3566..e75a4bb94488 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -662,7 +662,11 @@ bool dmcu_load_iram(struct dmcu *dmcu, memset(&ram_table, 0, sizeof(ram_table)); - if (dmcu->dmcu_version.abm_version == 0x23) { + if (dmcu->dmcu_version.abm_version == 0x24) { + fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params); + result = dmcu->funcs->load_iram( + dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START_V2_2); + } else if (dmcu->dmcu_version.abm_version == 0x23) { fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params); result = dmcu->funcs->load_iram( @@ -687,3 +691,4 @@ bool dmcu_load_iram(struct dmcu *dmcu, return result; } + diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h index cff8f91555d3..e9b2bd84cfed 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_offset.h @@ -8134,6 +8134,10 @@ #define mmMPC_OUT5_CSC_C33_C34_B 0x1604 #define mmMPC_OUT5_CSC_C33_C34_B_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_INDEX 0x163b +#define mmMPC_OCSC_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_DATA_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_DATA 0x163c // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec // base address: 0x5964 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h index 10c83fecd147..dc8ce7aaa0cf 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_0_sh_mask.h @@ -28263,7 +28263,14 @@ #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C34_B__SHIFT 0x10 #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C33_B_MASK 0x0000FFFFL #define MPC_OUT5_CSC_C33_C34_B__MPC_OCSC_C34_B_MASK 0xFFFF0000L - +//MPC_OCSC_TEST_DEBUG_INDEX +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX__SHIFT 0x0 +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN__SHIFT 0x8 +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX_MASK 0x000000FFL +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN_MASK 0x00000100L +//MPC_OCSC_TEST_DEBUG_DATA +#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA__SHIFT 0x0 +#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec //DC_PERFMON17_PERFCOUNTER_CNTL diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h index eddf83ec1c39..7cd0ee61c030 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h @@ -7103,7 +7103,10 @@ #define mmMPC_OUT3_CSC_C31_C32_B_BASE_IDX 2 #define mmMPC_OUT3_CSC_C33_C34_B 0x15ea #define mmMPC_OUT3_CSC_C33_C34_B_BASE_IDX 2 - +#define mmMPC_OCSC_TEST_DEBUG_INDEX 0x163b +#define mmMPC_OCSC_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_DATA_BASE_IDX 2 +#define mmMPC_OCSC_TEST_DEBUG_DATA 0x163c // addressBlock: dce_dc_mpc_mpc_dcperfmon_dc_perfmon_dispdec // base address: 0x5964 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h index faa0e76e32b4..2f780aefc722 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_sh_mask.h @@ -56634,5 +56634,13 @@ #define AZF0INPUTENDPOINT7_AZALIA_F0_CODEC_INPUT_PIN_CONTROL_INFOFRAME__INFOFRAME_BYTE_5_MASK 0x00FF0000L #define AZF0INPUTENDPOINT7_AZALIA_F0_CODEC_INPUT_PIN_CONTROL_INFOFRAME__INFOFRAME_VALID_MASK 0x80000000L +//MPC_OCSC_TEST_DEBUG_INDEX +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX__SHIFT 0x0 +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN__SHIFT 0x8 +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_INDEX_MASK 0x000000FFL +#define MPC_OCSC_TEST_DEBUG_INDEX__MPC_OCSC_TEST_DEBUG_WRITE_EN_MASK 0x00000100L +//MPC_OCSC_TEST_DEBUG_DATA +#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA__SHIFT 0x0 +#define MPC_OCSC_TEST_DEBUG_DATA__MPC_OCSC_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h index f301e5fe2109..87c84691b5be 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h @@ -27,6 +27,9 @@ #define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc #define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0 +#define mmDF_CS_UMC_AON0_DfGlobalCtrl 0x00fe +#define mmDF_CS_UMC_AON0_DfGlobalCtrl_BASE_IDX 0 + #define mmDF_CS_UMC_AON0_DramBaseAddress0 0x0044 #define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX 0 diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h index 06fac509e987..65e9f756e86e 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h @@ -33,6 +33,14 @@ #define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0 #define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL +/* DF_CS_UMC_AON0_DfGlobalCtrl */ +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl64K__SHIFT 0x14 +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl2M__SHIFT 0x15 +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl1G__SHIFT 0x16 +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl64K_MASK 0x00100000L +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl2M_MASK 0x00200000L +#define DF_CS_UMC_AON0_DfGlobalCtrl__GlbHashIntlvCtl1G_MASK 0x00400000L + /* DF_CS_AON0_DramBaseAddress0 */ #define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 #define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h index d4c613a85352..c9e3f6d849a8 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h @@ -8739,10 +8739,16 @@ #define TCP_ADDR_CONFIG__NUM_BANKS__SHIFT 0x4 #define TCP_ADDR_CONFIG__COLHI_WIDTH__SHIFT 0x6 #define TCP_ADDR_CONFIG__RB_SPLIT_COLHI__SHIFT 0x9 +#define TCP_ADDR_CONFIG__ENABLE64KHASH__SHIFT 0xb +#define TCP_ADDR_CONFIG__ENABLE2MHASH__SHIFT 0xc +#define TCP_ADDR_CONFIG__ENABLE1GHASH__SHIFT 0xd #define TCP_ADDR_CONFIG__NUM_TCC_BANKS_MASK 0x0000000FL #define TCP_ADDR_CONFIG__NUM_BANKS_MASK 0x00000030L #define TCP_ADDR_CONFIG__COLHI_WIDTH_MASK 0x000001C0L #define TCP_ADDR_CONFIG__RB_SPLIT_COLHI_MASK 0x00000200L +#define TCP_ADDR_CONFIG__ENABLE64KHASH_MASK 0x00000800L +#define TCP_ADDR_CONFIG__ENABLE2MHASH_MASK 0x00001000L +#define TCP_ADDR_CONFIG__ENABLE1GHASH_MASK 0x00002000L //TCP_CREDIT #define TCP_CREDIT__LFIFO_CREDIT__SHIFT 0x0 #define TCP_CREDIT__REQ_FIFO_CREDIT__SHIFT 0x10 diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h index 043aa695d63f..0d6b594be775 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h @@ -27,5 +27,7 @@ #define mmUMCCH0_0_EccErrCnt_BASE_IDX 0 #define mmMCA_UMC_UMC0_MCUMC_STATUST0 0x03c2 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_BASE_IDX 0 +#define mmMCA_UMC_UMC0_MCUMC_ADDRT0 0x03c4 +#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_BASE_IDX 0 #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h index 03be415e9555..ce005c674a18 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_offset.h @@ -27,5 +27,7 @@ #define mmUMCCH0_0_EccErrCnt_ARCT_BASE_IDX 1 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT 0x03c2 #define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT_BASE_IDX 1 +#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT 0x03c4 +#define mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT_BASE_IDX 1 #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h new file mode 100644 index 000000000000..a5a8c993ec3a --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_2_sh_mask.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_6_1_2_SH_MASK_HEADER +#define _umc_6_1_2_SH_MASK_HEADER + +//UMCCH0_0_EccErrCntSel_ARCT +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntCsSel__SHIFT 0x0 +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrInt__SHIFT 0xc +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntEn__SHIFT 0xf +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntCsSel_MASK 0x0000000FL +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrInt_MASK 0x00003000L +#define UMCCH0_0_EccErrCntSel_ARCT__EccErrCntEn_MASK 0x00008000L +//UMCCH0_0_EccErrCnt_ARCT +#define UMCCH0_0_EccErrCnt_ARCT__EccErrCnt__SHIFT 0x0 +#define UMCCH0_0_EccErrCnt_ARCT__EccErrCnt_MASK 0x0000FFFFL +//MCA_UMC_UMC0_MCUMC_STATUST0_ARCT +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCode__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCodeExt__SHIFT 0x10 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV0__SHIFT 0x16 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreId__SHIFT 0x20 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV1__SHIFT 0x26 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Scrub__SHIFT 0x28 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV2__SHIFT 0x29 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Poison__SHIFT 0x2b +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Deferred__SHIFT 0x2c +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UECC__SHIFT 0x2d +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__CECC__SHIFT 0x2e +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV3__SHIFT 0x2f +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Transparent__SHIFT 0x34 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__SyndV__SHIFT 0x35 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV4__SHIFT 0x36 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__TCC__SHIFT 0x37 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreIdVal__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__PCC__SHIFT 0x39 +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__AddrV__SHIFT 0x3a +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__MiscV__SHIFT 0x3b +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__En__SHIFT 0x3c +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UC__SHIFT 0x3d +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Overflow__SHIFT 0x3e +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Val__SHIFT 0x3f +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCode_MASK 0x000000000000FFFFL +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrorCodeExt_MASK 0x00000000003F0000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV0_MASK 0x00000000FFC00000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreId_MASK 0x0000003F00000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV1_MASK 0x000000C000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Scrub_MASK 0x0000010000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV2_MASK 0x0000060000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Poison_MASK 0x0000080000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Deferred_MASK 0x0000100000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UECC_MASK 0x0000200000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__CECC_MASK 0x0000400000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV3_MASK 0x000F800000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Transparent_MASK 0x0010000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__SyndV_MASK 0x0020000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__RESERV4_MASK 0x0040000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__TCC_MASK 0x0080000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__ErrCoreIdVal_MASK 0x0100000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__PCC_MASK 0x0200000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__AddrV_MASK 0x0400000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__MiscV_MASK 0x0800000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__En_MASK 0x1000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__UC_MASK 0x2000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Overflow_MASK 0x4000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0_ARCT__Val_MASK 0x8000000000000000L +//MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__ErrorAddr__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__LSB__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__Reserved__SHIFT 0x3e +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__ErrorAddr_MASK 0x00FFFFFFFFFFFFFFL +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__LSB_MASK 0x3F00000000000000L +#define MCA_UMC_UMC0_MCUMC_ADDRT0_ARCT__Reserved_MASK 0xC000000000000000L + +#endif diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 2cd217e60125..a607b1034962 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -256,6 +256,10 @@ struct kfd2kgd_calls { uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); + int (*hiq_mqd_load)(struct kgd_dev *kgd, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t doorbell_off); + int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd, uint32_t __user *wptr, struct mm_struct *mm); @@ -307,8 +311,6 @@ struct kfd2kgd_calls { void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); - int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid); - int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid); uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); uint64_t (*get_hive_id)(struct kgd_dev *kgd); diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 1c15c6fbe3b9..14ba6aa876e2 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1318,6 +1318,7 @@ static int arcturus_get_power_limit(struct smu_context *smu, static int arcturus_get_power_profile_mode(struct smu_context *smu, char *buf) { + struct amdgpu_device *adev = smu->adev; DpmActivityMonitorCoeffInt_t activity_monitor; static const char *profile_name[] = { "BOOTUP_DEFAULT", @@ -1351,7 +1352,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, if (result) return result; - if (smu_version >= 0x360d00) + if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n", title[0], title[1], title[2], title[3], title[4], title[5], title[6], title[7], title[8], title[9], title[10]); @@ -1368,7 +1369,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, if (workload_type < 0) continue; - if (smu_version >= 0x360d00) { + if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) { result = smu_update_table(smu, SMU_TABLE_ACTIVITY_MONITOR_COEFF, workload_type, @@ -1383,7 +1384,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, size += sprintf(buf + size, "%2d %14s%s\n", i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); - if (smu_version >= 0x360d00) { + if (smu_version >= 0x360d00 && !amdgpu_sriov_vf(adev)) { size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 0, diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h index c27c82851468..2f85a34c0591 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu12_driver_if.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define SMU12_DRIVER_IF_VERSION 10 +#define SMU12_DRIVER_IF_VERSION 11 typedef struct { int32_t value; @@ -192,6 +192,11 @@ typedef struct { uint16_t SocTemperature; //[centi-Celsius] uint16_t ThrottlerStatus; uint16_t spare; + + uint16_t StapmOriginalLimit; //[mW] + uint16_t StapmCurrentLimit; //[mW] + uint16_t ApuPower; //[mW] + uint16_t dGpuPower; //[mW] } SmuMetrics_t; diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index e804f9854027..02f8c9cb89d9 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1154,11 +1154,12 @@ static int smu_v11_0_set_thermal_range(struct smu_context *smu, int low = SMU_THERMAL_MINIMUM_ALERT_TEMP; int high = SMU_THERMAL_MAXIMUM_ALERT_TEMP; uint32_t val; + struct smu_table_context *table_context = &smu->smu_table; + struct smu_11_0_powerplay_table *powerplay_table = table_context->power_play_table; low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP, range.min / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES); - high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, - range.max / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES); + high = min((uint16_t)SMU_THERMAL_MAXIMUM_ALERT_TEMP, powerplay_table->software_shutdown_temp); if (low > high) return -EINVAL; diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 5d3c1d379277..021c5a98db09 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -5001,6 +5001,9 @@ int drm_dp_mst_atomic_check(struct drm_atomic_state *state) int i, ret = 0; for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) { + if (!mgr->mst_state) + continue; + ret = drm_dp_mst_atomic_check_vcpi_alloc_limit(mgr, mst_state); if (ret) break; diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 2e3a058fc239..ec79e8e5ad3c 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -45,7 +45,7 @@ * @guilty: atomic_t set to 1 when a job on this queue * is found to be guilty causing a timeout * - * Note: the rq_list should have atleast one element to schedule + * Note: the sched_list should have atleast one element to schedule * the entity * * Returns 0 on success or a negative error code on failure. @@ -130,7 +130,7 @@ static struct drm_sched_rq * drm_sched_entity_get_free_sched(struct drm_sched_entity *entity) { struct drm_sched_rq *rq = NULL; - unsigned int min_jobs = UINT_MAX, num_jobs; + unsigned int min_score = UINT_MAX, num_score; int i; for (i = 0; i < entity->num_sched_list; ++i) { @@ -141,9 +141,9 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity) continue; } - num_jobs = atomic_read(&sched->num_jobs); - if (num_jobs < min_jobs) { - min_jobs = num_jobs; + num_score = atomic_read(&sched->score); + if (num_score < min_score) { + min_score = num_score; rq = &entity->sched_list[i]->sched_rq[entity->priority]; } } @@ -498,7 +498,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, bool first; trace_drm_sched_job(sched_job, entity); - atomic_inc(&entity->rq->sched->num_jobs); + atomic_inc(&entity->rq->sched->score); WRITE_ONCE(entity->last_user, current->group_leader); first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 3fad5876a13f..71ce6215956f 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -92,6 +92,7 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq, if (!list_empty(&entity->list)) return; spin_lock(&rq->lock); + atomic_inc(&rq->sched->score); list_add_tail(&entity->list, &rq->entities); spin_unlock(&rq->lock); } @@ -110,6 +111,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, if (list_empty(&entity->list)) return; spin_lock(&rq->lock); + atomic_dec(&rq->sched->score); list_del_init(&entity->list); if (rq->current_entity == entity) rq->current_entity = NULL; @@ -655,7 +657,7 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) struct drm_gpu_scheduler *sched = s_fence->sched; atomic_dec(&sched->hw_rq_count); - atomic_dec(&sched->num_jobs); + atomic_dec(&sched->score); trace_drm_sched_process_job(s_fence); @@ -830,7 +832,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, spin_lock_init(&sched->job_list_lock); atomic_set(&sched->hw_rq_count, 0); INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); - atomic_set(&sched->num_jobs, 0); + atomic_set(&sched->score, 0); atomic64_set(&sched->job_id_count, 0); /* Each scheduler will run on a seperate kernel thread */ diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 96a1a1b7526e..9e71be129c30 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -52,9 +52,9 @@ enum drm_sched_priority { * @list: used to append this struct to the list of entities in the * runqueue. * @rq: runqueue on which this entity is currently scheduled. - * @rq_list: a list of run queues on which jobs from this entity can - * be scheduled - * @num_rq_list: number of run queues in the rq_list + * @sched_list: a list of drm_gpu_schedulers on which jobs from this entity can + * be scheduled + * @num_sched_list: number of drm_gpu_schedulers in the sched_list. * @rq_lock: lock to modify the runqueue to which this entity belongs. * @job_queue: the list of jobs of this entity. * @fence_seq: a linearly increasing seqno incremented with each @@ -81,8 +81,8 @@ enum drm_sched_priority { struct drm_sched_entity { struct list_head list; struct drm_sched_rq *rq; - unsigned int num_sched_list; struct drm_gpu_scheduler **sched_list; + unsigned int num_sched_list; enum drm_sched_priority priority; spinlock_t rq_lock; @@ -261,7 +261,7 @@ struct drm_sched_backend_ops { * @job_list_lock: lock to protect the ring_mirror_list. * @hang_limit: once the hangs by a job crosses this limit then it is marked * guilty and it will be considered for scheduling further. - * @num_jobs: the number of jobs in queue in the scheduler + * @score: score to help loadbalancer pick a idle sched * @ready: marks if the underlying HW is ready to work * @free_guilty: A hit to time out handler to free the guilty job. * @@ -282,8 +282,8 @@ struct drm_gpu_scheduler { struct list_head ring_mirror_list; spinlock_t job_list_lock; int hang_limit; - atomic_t num_jobs; - bool ready; + atomic_t score; + bool ready; bool free_guilty; }; @@ -315,7 +315,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, int drm_sched_entity_init(struct drm_sched_entity *entity, enum drm_sched_priority priority, struct drm_gpu_scheduler **sched_list, - unsigned int num_rq_list, + unsigned int num_sched_list, atomic_t *guilty); long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout); void drm_sched_entity_fini(struct drm_sched_entity *entity);