drm/amdgpu/pm: support mca_ceumc_addr in ecctable
SMU add a new variable mca_ceumc_addr to record umc correctable error address in EccInfo table, driver side add EccInfo_V2_t to support this feature Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
faf26f2b12
commit
2f6247dad2
@ -328,6 +328,7 @@ struct ecc_info_per_ch {
|
||||
uint16_t ce_count_hi_chip;
|
||||
uint64_t mca_umc_status;
|
||||
uint64_t mca_umc_addr;
|
||||
uint64_t mca_ceumc_addr;
|
||||
};
|
||||
|
||||
struct umc_ecc_info {
|
||||
|
@ -519,7 +519,21 @@ typedef struct {
|
||||
} EccInfo_t;
|
||||
|
||||
typedef struct {
|
||||
EccInfo_t EccInfo[ALDEBARAN_UMC_CHANNEL_NUM];
|
||||
uint64_t mca_umc_status;
|
||||
uint64_t mca_umc_addr;
|
||||
uint64_t mca_ceumc_addr;
|
||||
|
||||
uint16_t ce_count_lo_chip;
|
||||
uint16_t ce_count_hi_chip;
|
||||
|
||||
uint32_t eccPadding;
|
||||
} EccInfo_V2_t;
|
||||
|
||||
typedef struct {
|
||||
union {
|
||||
EccInfo_t EccInfo[ALDEBARAN_UMC_CHANNEL_NUM];
|
||||
EccInfo_V2_t EccInfo_V2[ALDEBARAN_UMC_CHANNEL_NUM];
|
||||
};
|
||||
} EccInfoTable_t;
|
||||
|
||||
// These defines are used with the following messages:
|
||||
|
@ -82,6 +82,12 @@
|
||||
*/
|
||||
#define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00
|
||||
|
||||
/*
|
||||
* SMU support mca_ceumc_addr in ECCTABLE since version 68.55.0,
|
||||
* use this to check mca_ceumc_addr record whether support
|
||||
*/
|
||||
#define SUPPORT_ECCTABLE_V2_SMU_VERSION 0x00443700
|
||||
|
||||
/*
|
||||
* SMU support BAD CHENNEL info MSG since version 68.51.00,
|
||||
* use this to check ECCTALE feature whether support
|
||||
@ -1803,7 +1809,8 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
|
||||
return sizeof(struct gpu_metrics_v1_3);
|
||||
}
|
||||
|
||||
static int aldebaran_check_ecc_table_support(struct smu_context *smu)
|
||||
static int aldebaran_check_ecc_table_support(struct smu_context *smu,
|
||||
int *ecctable_version)
|
||||
{
|
||||
uint32_t if_version = 0xff, smu_version = 0xff;
|
||||
int ret = 0;
|
||||
@ -1816,6 +1823,11 @@ static int aldebaran_check_ecc_table_support(struct smu_context *smu)
|
||||
|
||||
if (smu_version < SUPPORT_ECCTABLE_SMU_VERSION)
|
||||
ret = -EOPNOTSUPP;
|
||||
else if (smu_version >= SUPPORT_ECCTABLE_SMU_VERSION &&
|
||||
smu_version < SUPPORT_ECCTABLE_V2_SMU_VERSION)
|
||||
*ecctable_version = 1;
|
||||
else
|
||||
*ecctable_version = 2;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1827,9 +1839,10 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,
|
||||
EccInfoTable_t *ecc_table = NULL;
|
||||
struct ecc_info_per_ch *ecc_info_per_channel = NULL;
|
||||
int i, ret = 0;
|
||||
int table_version = 0;
|
||||
struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table;
|
||||
|
||||
ret = aldebaran_check_ecc_table_support(smu);
|
||||
ret = aldebaran_check_ecc_table_support(smu, &table_version);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1845,16 +1858,32 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,
|
||||
|
||||
ecc_table = (EccInfoTable_t *)smu_table->ecc_table;
|
||||
|
||||
for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {
|
||||
ecc_info_per_channel = &(eccinfo->ecc[i]);
|
||||
ecc_info_per_channel->ce_count_lo_chip =
|
||||
ecc_table->EccInfo[i].ce_count_lo_chip;
|
||||
ecc_info_per_channel->ce_count_hi_chip =
|
||||
ecc_table->EccInfo[i].ce_count_hi_chip;
|
||||
ecc_info_per_channel->mca_umc_status =
|
||||
ecc_table->EccInfo[i].mca_umc_status;
|
||||
ecc_info_per_channel->mca_umc_addr =
|
||||
ecc_table->EccInfo[i].mca_umc_addr;
|
||||
if (table_version == 1) {
|
||||
for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {
|
||||
ecc_info_per_channel = &(eccinfo->ecc[i]);
|
||||
ecc_info_per_channel->ce_count_lo_chip =
|
||||
ecc_table->EccInfo[i].ce_count_lo_chip;
|
||||
ecc_info_per_channel->ce_count_hi_chip =
|
||||
ecc_table->EccInfo[i].ce_count_hi_chip;
|
||||
ecc_info_per_channel->mca_umc_status =
|
||||
ecc_table->EccInfo[i].mca_umc_status;
|
||||
ecc_info_per_channel->mca_umc_addr =
|
||||
ecc_table->EccInfo[i].mca_umc_addr;
|
||||
}
|
||||
} else if (table_version == 2) {
|
||||
for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {
|
||||
ecc_info_per_channel = &(eccinfo->ecc[i]);
|
||||
ecc_info_per_channel->ce_count_lo_chip =
|
||||
ecc_table->EccInfo_V2[i].ce_count_lo_chip;
|
||||
ecc_info_per_channel->ce_count_hi_chip =
|
||||
ecc_table->EccInfo_V2[i].ce_count_hi_chip;
|
||||
ecc_info_per_channel->mca_umc_status =
|
||||
ecc_table->EccInfo_V2[i].mca_umc_status;
|
||||
ecc_info_per_channel->mca_umc_addr =
|
||||
ecc_table->EccInfo_V2[i].mca_umc_addr;
|
||||
ecc_info_per_channel->mca_ceumc_addr =
|
||||
ecc_table->EccInfo_V2[i].mca_ceumc_addr;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
Reference in New Issue
Block a user