drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources
We can directly calculate sdma doorbell indexes in the process doorbell pages through the doorbell_index structure in amdgpu_device, so no need to cache them in kgd2kfd_shared_resources any more. This alleviates the adaptation needs when new SDMA configurations are introduced. Signed-off-by: Yong Zhao <Yong.Zhao@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
		| @@ -131,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, | ||||
| 
 | ||||
| void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) | ||||
| { | ||||
| 	int i, n; | ||||
| 	int i; | ||||
| 	int last_valid_bit; | ||||
| 
 | ||||
| 	if (adev->kfd.dev) { | ||||
| @@ -142,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) | ||||
| 			.gpuvm_size = min(adev->vm_manager.max_pfn | ||||
| 					  << AMDGPU_GPU_PAGE_SHIFT, | ||||
| 					  AMDGPU_GMC_HOLE_START), | ||||
| 			.drm_render_minor = adev->ddev->render->index | ||||
| 			.drm_render_minor = adev->ddev->render->index, | ||||
| 			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine, | ||||
| 
 | ||||
| 		}; | ||||
| 
 | ||||
| 		/* this is going to have a few of the MSBs set that we need to
 | ||||
| @@ -172,31 +174,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) | ||||
| 				&gpu_resources.doorbell_aperture_size, | ||||
| 				&gpu_resources.doorbell_start_offset); | ||||
| 
 | ||||
| 		if (adev->asic_type < CHIP_VEGA10) { | ||||
| 			kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); | ||||
| 			return; | ||||
| 		} | ||||
| 
 | ||||
| 		n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8; | ||||
| 
 | ||||
| 		for (i = 0; i < n; i += 2) { | ||||
| 			/* On SOC15 the BIF is involved in routing
 | ||||
| 			 * doorbells using the low 12 bits of the | ||||
| 			 * address. Communicate the assignments to | ||||
| 			 * KFD. KFD uses two doorbell pages per | ||||
| 			 * process in case of 64-bit doorbells so we | ||||
| 			 * can use each doorbell assignment twice. | ||||
| 			 */ | ||||
| 			gpu_resources.sdma_doorbell[0][i] = | ||||
| 				adev->doorbell_index.sdma_engine[0] + (i >> 1); | ||||
| 			gpu_resources.sdma_doorbell[0][i+1] = | ||||
| 				adev->doorbell_index.sdma_engine[0] + 0x200 + (i >> 1); | ||||
| 			gpu_resources.sdma_doorbell[1][i] = | ||||
| 				adev->doorbell_index.sdma_engine[1] + (i >> 1); | ||||
| 			gpu_resources.sdma_doorbell[1][i+1] = | ||||
| 				adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1); | ||||
| 		} | ||||
| 
 | ||||
| 		/* Since SOC15, BIF starts to statically use the
 | ||||
| 		 * lower 12 bits of doorbell addresses for routing | ||||
| 		 * based on settings in registers like | ||||
| @@ -205,10 +182,12 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) | ||||
| 		 * 12 bits of its address has to be outside the range | ||||
| 		 * set for SDMA, VCN, and IH blocks. | ||||
| 		 */ | ||||
| 		gpu_resources.non_cp_doorbells_start = | ||||
| 				adev->doorbell_index.first_non_cp; | ||||
| 		gpu_resources.non_cp_doorbells_end = | ||||
| 				adev->doorbell_index.last_non_cp; | ||||
| 		if (adev->asic_type >= CHIP_VEGA10) { | ||||
| 			gpu_resources.non_cp_doorbells_start = | ||||
| 					adev->doorbell_index.first_non_cp; | ||||
| 			gpu_resources.non_cp_doorbells_end = | ||||
| 					adev->doorbell_index.last_non_cp; | ||||
| 		} | ||||
| 
 | ||||
| 		kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); | ||||
| 	} | ||||
|   | ||||
| @@ -134,12 +134,18 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) | ||||
| 		 */ | ||||
| 		q->doorbell_id = q->properties.queue_id; | ||||
| 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { | ||||
| 		/* For SDMA queues on SOC15, use static doorbell
 | ||||
| 		 * assignments based on the engine and queue. | ||||
| 		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
 | ||||
| 		 * doorbell assignments based on the engine and queue id. | ||||
| 		 * The doobell index distance between RLC (2*i) and (2*i+1) | ||||
| 		 * for a SDMA engine is 512. | ||||
| 		 */ | ||||
| 		q->doorbell_id = dev->shared_resources.sdma_doorbell | ||||
| 			[q->properties.sdma_engine_id] | ||||
| 			[q->properties.sdma_queue_id]; | ||||
| 		uint32_t *idx_offset = | ||||
| 				dev->shared_resources.sdma_doorbell_idx; | ||||
| 
 | ||||
| 		q->doorbell_id = idx_offset[q->properties.sdma_engine_id] | ||||
| 			+ (q->properties.sdma_queue_id & 1) | ||||
| 			* KFD_QUEUE_DOORBELL_MIRROR_OFFSET | ||||
| 			+ (q->properties.sdma_queue_id >> 1); | ||||
| 	} else { | ||||
| 		/* For CP queues on SOC15 reserve a free doorbell ID */ | ||||
| 		unsigned int found; | ||||
|   | ||||
| @@ -137,11 +137,11 @@ struct kgd2kfd_shared_resources { | ||||
| 	/* Bit n == 1 means Queue n is available for KFD */ | ||||
| 	DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES); | ||||
| 
 | ||||
| 	/* Doorbell assignments (SOC15 and later chips only). Only
 | ||||
| 	/* SDMA doorbell assignments (SOC15 and later chips only). Only
 | ||||
| 	 * specific doorbells are routed to each SDMA engine. Others | ||||
| 	 * are routed to IH and VCN. They are not usable by the CP. | ||||
| 	 */ | ||||
| 	unsigned int sdma_doorbell[2][8]; | ||||
| 	uint32_t *sdma_doorbell_idx; | ||||
| 
 | ||||
| 	/* From SOC15 onward, the doorbell index range not usable for CP
 | ||||
| 	 * queues. | ||||
|   | ||||
		Reference in New Issue
	
	Block a user