drm/amdgpu: Introduce reset domain
Defined a reset_domain struct such that all the entities that go through reset together will be serialized one against another. Do it for both single device and XGMI hive cases. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch> Suggested-by: Christian König <ckoenig.leichtzumerken@gmail.com> Reviewed-by: Christian König <christian.koenig@amd.com> Link: https://www.spinics.net/lists/amd-gfx/msg74111.html
This commit is contained in:
parent
b21a142fd2
commit
a4c63cafa5
@ -813,6 +813,10 @@ struct amd_powerplay {
|
||||
#define AMDGPU_RESET_MAGIC_NUM 64
|
||||
#define AMDGPU_MAX_DF_PERFMONS 4
|
||||
#define AMDGPU_PRODUCT_NAME_LEN 64
|
||||
struct amdgpu_reset_domain {
|
||||
struct workqueue_struct *wq;
|
||||
};
|
||||
|
||||
struct amdgpu_device {
|
||||
struct device *dev;
|
||||
struct pci_dev *pdev;
|
||||
@ -1100,6 +1104,7 @@ struct amdgpu_device {
|
||||
uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE];
|
||||
|
||||
bool ram_is_direct_mapped;
|
||||
struct amdgpu_reset_domain reset_domain;
|
||||
};
|
||||
|
||||
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
|
||||
|
@ -2398,9 +2398,27 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
|
||||
if (r)
|
||||
goto init_failed;
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1)
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1) {
|
||||
struct amdgpu_hive_info *hive;
|
||||
|
||||
amdgpu_xgmi_add_device(adev);
|
||||
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (!hive || !hive->reset_domain.wq) {
|
||||
DRM_ERROR("Failed to obtain reset domain info for XGMI hive:%llx", hive->hive_id);
|
||||
r = -EINVAL;
|
||||
goto init_failed;
|
||||
}
|
||||
|
||||
adev->reset_domain.wq = hive->reset_domain.wq;
|
||||
} else {
|
||||
adev->reset_domain.wq = alloc_ordered_workqueue("amdgpu-reset-dev", 0);
|
||||
if (!adev->reset_domain.wq) {
|
||||
r = -ENOMEM;
|
||||
goto init_failed;
|
||||
}
|
||||
}
|
||||
|
||||
/* Don't init kfd if whole hive need to be reset during init */
|
||||
if (!adev->gmc.xgmi.pending_reset)
|
||||
amdgpu_amdkfd_device_init(adev);
|
||||
|
@ -398,6 +398,14 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
|
||||
goto pro_end;
|
||||
}
|
||||
|
||||
hive->reset_domain.wq = alloc_ordered_workqueue("amdgpu-reset-hive", 0);
|
||||
if (!hive->reset_domain.wq) {
|
||||
dev_err(adev->dev, "XGMI: failed allocating wq for reset domain!\n");
|
||||
kfree(hive);
|
||||
hive = NULL;
|
||||
goto pro_end;
|
||||
}
|
||||
|
||||
hive->hive_id = adev->gmc.xgmi.hive_id;
|
||||
INIT_LIST_HEAD(&hive->device_list);
|
||||
INIT_LIST_HEAD(&hive->node);
|
||||
@ -407,6 +415,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
|
||||
task_barrier_init(&hive->tb);
|
||||
hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
|
||||
hive->hi_req_gpu = NULL;
|
||||
|
||||
/*
|
||||
* hive pstate on boot is high in vega20 so we have to go to low
|
||||
* pstate on after boot.
|
||||
|
@ -42,6 +42,8 @@ struct amdgpu_hive_info {
|
||||
AMDGPU_XGMI_PSTATE_MAX_VEGA20,
|
||||
AMDGPU_XGMI_PSTATE_UNKNOWN
|
||||
} pstate;
|
||||
|
||||
struct amdgpu_reset_domain reset_domain;
|
||||
};
|
||||
|
||||
struct amdgpu_pcs_ras_field {
|
||||
|
Loading…
x
Reference in New Issue
Block a user