drm/amdgpu: move xgmi ras functions to xgmi_ras_funcs
xgmi ras is not managed by gpu driver when gpu is connected to cpu through xgmi. move all xgmi ras functions to xgmi_ras_funcs so gpu driver only initializes xgmi ras functions when it manages xgmi ras. Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Dennis Li <Dennis.Li@amd.com> Reviewed-by: John Clements <John.Clements@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
6e36f23193
commit
52137ca852
@ -403,14 +403,26 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
|
||||
return r;
|
||||
}
|
||||
|
||||
return amdgpu_xgmi_ras_late_init(adev);
|
||||
if (!adev->gmc.xgmi.connected_to_cpu)
|
||||
adev->gmc.xgmi.ras_funcs = &xgmi_ras_funcs;
|
||||
|
||||
if (adev->gmc.xgmi.ras_funcs &&
|
||||
adev->gmc.xgmi.ras_funcs->ras_late_init) {
|
||||
r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
amdgpu_umc_ras_fini(adev);
|
||||
amdgpu_mmhub_ras_fini(adev);
|
||||
amdgpu_xgmi_ras_fini(adev);
|
||||
if (adev->gmc.xgmi.ras_funcs &&
|
||||
adev->gmc.xgmi.ras_funcs->ras_fini)
|
||||
adev->gmc.xgmi.ras_funcs->ras_fini(adev);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -135,6 +135,14 @@ struct amdgpu_gmc_funcs {
|
||||
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct amdgpu_xgmi_ras_funcs {
|
||||
int (*ras_late_init)(struct amdgpu_device *adev);
|
||||
void (*ras_fini)(struct amdgpu_device *adev);
|
||||
int (*query_ras_error_count)(struct amdgpu_device *adev,
|
||||
void *ras_error_status);
|
||||
void (*reset_ras_error_count)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct amdgpu_xgmi {
|
||||
/* from psp */
|
||||
u64 node_id;
|
||||
@ -151,6 +159,7 @@ struct amdgpu_xgmi {
|
||||
struct ras_common_if *ras_if;
|
||||
bool connected_to_cpu;
|
||||
bool pending_reset;
|
||||
const struct amdgpu_xgmi_ras_funcs *ras_funcs;
|
||||
};
|
||||
|
||||
struct amdgpu_gmc {
|
||||
|
@ -809,7 +809,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
|
||||
adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data);
|
||||
break;
|
||||
case AMDGPU_RAS_BLOCK__XGMI_WAFL:
|
||||
amdgpu_xgmi_query_ras_error_count(adev, &err_data);
|
||||
if (adev->gmc.xgmi.ras_funcs &&
|
||||
adev->gmc.xgmi.ras_funcs->query_ras_error_count)
|
||||
adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -628,7 +628,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
|
||||
return psp_xgmi_terminate(&adev->psp);
|
||||
}
|
||||
|
||||
int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
|
||||
static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
struct ras_ih_if ih_info = {
|
||||
@ -642,7 +642,7 @@ int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
|
||||
adev->gmc.xgmi.num_physical_nodes == 0)
|
||||
return 0;
|
||||
|
||||
amdgpu_xgmi_reset_ras_error_count(adev);
|
||||
adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
|
||||
|
||||
if (!adev->gmc.xgmi.ras_if) {
|
||||
adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
|
||||
@ -664,7 +664,7 @@ int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
|
||||
return r;
|
||||
}
|
||||
|
||||
void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
|
||||
static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&
|
||||
adev->gmc.xgmi.ras_if) {
|
||||
@ -691,7 +691,7 @@ static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg
|
||||
WREG32_PCIE(pcs_status_reg, 0);
|
||||
}
|
||||
|
||||
void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
|
||||
static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
@ -751,8 +751,8 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
|
||||
void *ras_error_status)
|
||||
static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
|
||||
void *ras_error_status)
|
||||
{
|
||||
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
|
||||
int i;
|
||||
@ -801,10 +801,17 @@ int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
|
||||
break;
|
||||
}
|
||||
|
||||
amdgpu_xgmi_reset_ras_error_count(adev);
|
||||
adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
|
||||
|
||||
err_data->ue_count += ue_cnt;
|
||||
err_data->ce_count += ce_cnt;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs = {
|
||||
.ras_late_init = amdgpu_xgmi_ras_late_init,
|
||||
.ras_fini = amdgpu_xgmi_ras_fini,
|
||||
.query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
|
||||
.reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
|
||||
};
|
||||
|
@ -50,6 +50,7 @@ struct amdgpu_pcs_ras_field {
|
||||
uint32_t pcs_err_shift;
|
||||
};
|
||||
|
||||
extern const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs;
|
||||
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
|
||||
void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
|
||||
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
|
||||
@ -58,14 +59,8 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
|
||||
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
|
||||
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
|
||||
struct amdgpu_device *peer_adev);
|
||||
int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev);
|
||||
void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev);
|
||||
uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
|
||||
uint64_t addr);
|
||||
int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
|
||||
void *ras_error_status);
|
||||
void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev);
|
||||
|
||||
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
|
||||
struct amdgpu_device *bo_adev)
|
||||
{
|
||||
|
@ -1208,7 +1208,7 @@ static int gmc_v9_0_early_init(void *handle)
|
||||
adev->gmc.xgmi.supported = true;
|
||||
adev->gmc.xgmi.connected_to_cpu =
|
||||
adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
|
||||
}
|
||||
}
|
||||
|
||||
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
|
||||
adev->gmc.shared_aperture_end =
|
||||
|
Loading…
x
Reference in New Issue
Block a user