drm/amdgpu: retire bad pages for umc v12_0
Retire bad pages for umc v12_0. Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
e74313be5a
commit
314c38cde6
@ -28,6 +28,8 @@
|
||||
#include "umc/umc_12_0_0_sh_mask.h"
|
||||
#include "mp/mp_13_0_6_sh_mask.h"
|
||||
|
||||
#define MAX_ECC_NUM_PER_RETIREMENT 32
|
||||
|
||||
static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev,
|
||||
uint32_t node_inst,
|
||||
uint32_t umc_inst,
|
||||
@ -374,6 +376,7 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef TO_BE_REMOVED
|
||||
static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
|
||||
void *ras_error_status)
|
||||
{
|
||||
@ -442,6 +445,7 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev,
|
||||
enum amdgpu_mca_error_type type, void *ras_error_status)
|
||||
@ -633,6 +637,58 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int umc_v12_0_fill_error_record(struct amdgpu_device *adev,
|
||||
struct ras_ecc_err *ecc_err, void *ras_error_status)
|
||||
{
|
||||
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
|
||||
uint32_t i = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (!err_data || !ecc_err)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < ecc_err->err_pages.count; i++) {
|
||||
ret = amdgpu_umc_fill_error_record(err_data,
|
||||
ecc_err->addr,
|
||||
ecc_err->err_pages.pfn[i] << AMDGPU_GPU_PAGE_SHIFT,
|
||||
MCA_IPID_2_UMC_CH(ecc_err->ipid),
|
||||
MCA_IPID_2_UMC_INST(ecc_err->ipid));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
err_data->de_count++;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev,
|
||||
void *ras_error_status)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
struct ras_ecc_err *entries[MAX_ECC_NUM_PER_RETIREMENT];
|
||||
struct radix_tree_root *ecc_tree;
|
||||
int new_detected, ret, i;
|
||||
|
||||
ecc_tree = &con->umc_ecc_log.de_page_tree;
|
||||
|
||||
mutex_lock(&con->umc_ecc_log.lock);
|
||||
new_detected = radix_tree_gang_lookup_tag(ecc_tree, (void **)entries,
|
||||
0, ARRAY_SIZE(entries), UMC_ECC_NEW_DETECTED_TAG);
|
||||
for (i = 0; i < new_detected; i++) {
|
||||
if (!entries[i])
|
||||
continue;
|
||||
|
||||
ret = umc_v12_0_fill_error_record(adev, entries[i], ras_error_status);
|
||||
if (ret) {
|
||||
dev_err(adev->dev, "Fail to fill umc error record, ret:%d\n", ret);
|
||||
break;
|
||||
}
|
||||
radix_tree_tag_clear(ecc_tree, entries[i]->hash_index, UMC_ECC_NEW_DETECTED_TAG);
|
||||
}
|
||||
mutex_unlock(&con->umc_ecc_log.lock);
|
||||
}
|
||||
|
||||
struct amdgpu_umc_ras umc_v12_0_ras = {
|
||||
.ras_block = {
|
||||
.hw_ops = &umc_v12_0_ras_hw_ops,
|
||||
@ -640,8 +696,7 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
|
||||
},
|
||||
.err_cnt_init = umc_v12_0_err_cnt_init,
|
||||
.query_ras_poison_mode = umc_v12_0_query_ras_poison_mode,
|
||||
.ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count,
|
||||
.ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address,
|
||||
.ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr,
|
||||
.check_ecc_err_status = umc_v12_0_check_ecc_err_status,
|
||||
.update_ecc_status = umc_v12_0_update_ecc_status,
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user