drm/amdgpu: Perform gpu reset after gfx finishes processing ras poison consumption on gfx_v11_0_3
Perform gpu reset after gfx finishes processing ras poison consumption on gfx_v11_0_3. V2: Move gfx poison consumption handler from hw_ops to ip function level. V3: Adjust the calling position of amdgpu_gfx_poison_consumation_handler. V4: Since gfx v11_0_3 does not have .hw_ops instance, the .hw_ops null pointer check in amdgpu_ras_interrupt_poison_consumption_handler needs to be adjusted. Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
790bef488b
commit
ac7b25d92c
@ -731,6 +731,15 @@ int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
|
||||
return adev->gfx.ras->poison_consumption_handler(adev, entry);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
|
||||
void *err_data,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
|
@ -213,6 +213,8 @@ struct amdgpu_gfx_ras {
|
||||
int (*rlc_gc_fed_irq)(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
int (*poison_consumption_handler)(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
};
|
||||
|
||||
struct amdgpu_gfx_funcs {
|
||||
@ -437,4 +439,6 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
|
||||
|
||||
int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev);
|
||||
int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
#endif
|
||||
|
@ -1620,14 +1620,14 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
|
||||
struct amdgpu_ras_block_object *block_obj =
|
||||
amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
|
||||
|
||||
if (!block_obj || !block_obj->hw_ops)
|
||||
if (!block_obj)
|
||||
return;
|
||||
|
||||
/* both query_poison_status and handle_poison_consumption are optional,
|
||||
* but at least one of them should be implemented if we need poison
|
||||
* consumption handler
|
||||
*/
|
||||
if (block_obj->hw_ops->query_poison_status) {
|
||||
if (block_obj->hw_ops && block_obj->hw_ops->query_poison_status) {
|
||||
poison_stat = block_obj->hw_ops->query_poison_status(adev);
|
||||
if (!poison_stat) {
|
||||
/* Not poison consumption interrupt, no need to handle it */
|
||||
@ -1641,7 +1641,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
|
||||
if (!adev->gmc.xgmi.connected_to_cpu)
|
||||
amdgpu_umc_poison_handler(adev, false);
|
||||
|
||||
if (block_obj->hw_ops->handle_poison_consumption)
|
||||
if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption)
|
||||
poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
|
||||
|
||||
/* gpu reset is fallback for failed and default cases */
|
||||
@ -1649,6 +1649,8 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
|
||||
dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n",
|
||||
block_obj->ras_comm.name);
|
||||
amdgpu_ras_reset_gpu(adev);
|
||||
} else {
|
||||
amdgpu_gfx_poison_consumption_handler(adev, entry);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,6 +70,19 @@ static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
/* Workaround: when vmid and pasid are both zero, trigger gpu reset in KGD. */
|
||||
if (entry && (entry->client_id == SOC21_IH_CLIENTID_GFX) &&
|
||||
(entry->src_id == GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT) &&
|
||||
!entry->vmid && !entry->pasid)
|
||||
amdgpu_ras_reset_gpu(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct amdgpu_gfx_ras gfx_v11_0_3_ras = {
|
||||
.rlc_gc_fed_irq = gfx_v11_0_3_rlc_gc_fed_irq,
|
||||
.poison_consumption_handler = gfx_v11_0_3_poison_consumption_handler,
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user