2015-06-12 21:35:14 +03:00
/*
* Copyright 2014 Advanced Micro Devices , Inc .
*
* Permission is hereby granted , free of charge , to any person obtaining a
* copy of this software and associated documentation files ( the " Software " ) ,
* to deal in the Software without restriction , including without limitation
* the rights to use , copy , modify , merge , publish , distribute , sublicense ,
* and / or sell copies of the Software , and to permit persons to whom the
* Software is furnished to do so , subject to the following conditions :
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR
* IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL
* THE COPYRIGHT HOLDER ( S ) OR AUTHOR ( S ) BE LIABLE FOR ANY CLAIM , DAMAGES OR
* OTHER LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE ,
* ARISING FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE .
*/
# include "amdgpu_amdkfd.h"
2015-07-22 11:29:01 +08:00
# include "amd_shared.h"
2015-06-12 21:35:14 +03:00
# include <drm/drmP.h>
# include "amdgpu.h"
2017-06-07 12:59:29 -04:00
# include "amdgpu_gfx.h"
2015-06-12 21:35:14 +03:00
# include <linux/module.h>
const struct kgd2kfd_calls * kgd2kfd ;
2017-08-15 23:00:04 -04:00
bool ( * kgd2kfd_init_p ) ( unsigned int , const struct kgd2kfd_calls * * ) ;
2015-06-12 21:35:14 +03:00
2018-02-06 20:32:36 -05:00
static const unsigned int compute_vmid_bitmap = 0xFF00 ;
2016-02-09 13:30:12 +02:00
int amdgpu_amdkfd_init ( void )
2015-06-12 21:35:14 +03:00
{
2016-02-09 13:30:12 +02:00
int ret ;
2015-06-12 21:35:14 +03:00
# if defined(CONFIG_HSA_AMD_MODULE)
2017-08-15 23:00:04 -04:00
int ( * kgd2kfd_init_p ) ( unsigned int , const struct kgd2kfd_calls * * ) ;
2015-06-12 21:35:14 +03:00
kgd2kfd_init_p = symbol_request ( kgd2kfd_init ) ;
if ( kgd2kfd_init_p = = NULL )
2016-02-09 13:30:12 +02:00
return - ENOENT ;
ret = kgd2kfd_init_p ( KFD_INTERFACE_VERSION , & kgd2kfd ) ;
if ( ret ) {
symbol_put ( kgd2kfd_init ) ;
kgd2kfd = NULL ;
}
# elif defined(CONFIG_HSA_AMD)
ret = kgd2kfd_init ( KFD_INTERFACE_VERSION , & kgd2kfd ) ;
if ( ret )
kgd2kfd = NULL ;
# else
ret = - ENOENT ;
2015-06-12 21:35:14 +03:00
# endif
2018-02-06 20:32:38 -05:00
amdgpu_amdkfd_gpuvm_init_mem_limits ( ) ;
2016-02-09 13:30:12 +02:00
return ret ;
2015-06-12 21:35:14 +03:00
}
2017-07-28 16:54:54 -04:00
void amdgpu_amdkfd_fini ( void )
{
if ( kgd2kfd ) {
kgd2kfd - > exit ( ) ;
symbol_put ( kgd2kfd_init ) ;
}
}
void amdgpu_amdkfd_device_probe ( struct amdgpu_device * adev )
2015-06-12 21:35:14 +03:00
{
2017-07-28 16:54:54 -04:00
const struct kfd2kgd_calls * kfd2kgd ;
if ( ! kgd2kfd )
return ;
2017-02-01 17:02:13 -05:00
switch ( adev - > asic_type ) {
2015-07-31 17:20:14 -04:00
# ifdef CONFIG_DRM_AMDGPU_CIK
2015-06-12 21:35:14 +03:00
case CHIP_KAVERI :
2018-01-04 17:17:48 -05:00
case CHIP_HAWAII :
2015-06-12 21:38:22 +03:00
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions ( ) ;
break ;
2015-07-31 17:20:14 -04:00
# endif
2014-10-07 14:43:07 +03:00
case CHIP_CARRIZO :
2018-01-04 17:17:48 -05:00
case CHIP_TONGA :
case CHIP_FIJI :
case CHIP_POLARIS10 :
case CHIP_POLARIS11 :
2014-10-07 14:43:07 +03:00
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions ( ) ;
break ;
2015-06-12 21:35:14 +03:00
default :
2017-10-26 09:30:38 +08:00
dev_dbg ( adev - > dev , " kfd not supported on this ASIC \n " ) ;
2017-07-28 16:54:54 -04:00
return ;
2015-06-12 21:35:14 +03:00
}
2017-07-28 16:54:54 -04:00
adev - > kfd = kgd2kfd - > probe ( ( struct kgd_dev * ) adev ,
adev - > pdev , kfd2kgd ) ;
2015-06-12 21:35:14 +03:00
}
2017-12-14 16:27:11 -05:00
/**
* amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
* setup amdkfd
*
* @ adev : amdgpu_device pointer
* @ aperture_base : output returning doorbell aperture base physical address
* @ aperture_size : output returning doorbell aperture size in bytes
* @ start_offset : output returning # of doorbell bytes reserved for amdgpu .
*
* amdgpu and amdkfd share the doorbell aperture . amdgpu sets it up ,
* takes doorbells required for its own rings and reports the setup to amdkfd .
* amdgpu reserved doorbells are at the start of the doorbell aperture .
*/
static void amdgpu_doorbell_get_kfd_info ( struct amdgpu_device * adev ,
phys_addr_t * aperture_base ,
size_t * aperture_size ,
size_t * start_offset )
{
/*
* The first num_doorbells are used by amdgpu .
* amdkfd takes whatever ' s left in the aperture .
*/
if ( adev - > doorbell . size > adev - > doorbell . num_doorbells * sizeof ( u32 ) ) {
* aperture_base = adev - > doorbell . base ;
* aperture_size = adev - > doorbell . size ;
* start_offset = adev - > doorbell . num_doorbells * sizeof ( u32 ) ;
} else {
* aperture_base = 0 ;
* aperture_size = 0 ;
* start_offset = 0 ;
}
}
2017-02-01 17:02:13 -05:00
void amdgpu_amdkfd_device_init ( struct amdgpu_device * adev )
2015-06-12 21:35:14 +03:00
{
2017-02-03 16:28:48 -05:00
int i ;
int last_valid_bit ;
2017-02-01 17:02:13 -05:00
if ( adev - > kfd ) {
2015-06-12 21:35:14 +03:00
struct kgd2kfd_shared_resources gpu_resources = {
2018-02-06 20:32:36 -05:00
. compute_vmid_bitmap = compute_vmid_bitmap ,
2017-02-03 16:28:48 -05:00
. num_pipe_per_mec = adev - > gfx . mec . num_pipe_per_mec ,
2018-02-06 20:32:36 -05:00
. num_queue_per_pipe = adev - > gfx . mec . num_queue_per_pipe ,
. gpuvm_size = min ( adev - > vm_manager . max_pfn
< < AMDGPU_GPU_PAGE_SHIFT ,
AMDGPU_VA_HOLE_START ) ,
. drm_render_minor = adev - > ddev - > render - > index
2015-06-12 21:35:14 +03:00
} ;
2017-02-03 16:28:48 -05:00
/* this is going to have a few of the MSBs set that we need to
* clear */
bitmap_complement ( gpu_resources . queue_bitmap ,
adev - > gfx . mec . queue_bitmap ,
KGD_MAX_QUEUES ) ;
2017-04-06 00:10:53 -04:00
/* remove the KIQ bit as well */
if ( adev - > gfx . kiq . ring . ready )
2017-06-07 12:59:29 -04:00
clear_bit ( amdgpu_gfx_queue_to_bit ( adev ,
adev - > gfx . kiq . ring . me - 1 ,
adev - > gfx . kiq . ring . pipe ,
adev - > gfx . kiq . ring . queue ) ,
2017-04-06 00:10:53 -04:00
gpu_resources . queue_bitmap ) ;
2017-02-03 16:28:48 -05:00
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
* nbits is not compile time constant */
2017-07-13 20:21:53 -05:00
last_valid_bit = 1 /* only first MEC can have compute queues */
2017-02-03 16:28:48 -05:00
* adev - > gfx . mec . num_pipe_per_mec
* adev - > gfx . mec . num_queue_per_pipe ;
for ( i = last_valid_bit ; i < KGD_MAX_QUEUES ; + + i )
clear_bit ( i , gpu_resources . queue_bitmap ) ;
2017-02-01 17:02:13 -05:00
amdgpu_doorbell_get_kfd_info ( adev ,
2015-06-12 21:35:14 +03:00
& gpu_resources . doorbell_physical_address ,
& gpu_resources . doorbell_aperture_size ,
& gpu_resources . doorbell_start_offset ) ;
2017-02-01 17:02:13 -05:00
kgd2kfd - > device_init ( adev - > kfd , & gpu_resources ) ;
2015-06-12 21:35:14 +03:00
}
}
2017-02-01 17:02:13 -05:00
void amdgpu_amdkfd_device_fini ( struct amdgpu_device * adev )
2015-06-12 21:35:14 +03:00
{
2017-02-01 17:02:13 -05:00
if ( adev - > kfd ) {
kgd2kfd - > device_exit ( adev - > kfd ) ;
adev - > kfd = NULL ;
2015-06-12 21:35:14 +03:00
}
}
2017-02-01 17:02:13 -05:00
void amdgpu_amdkfd_interrupt ( struct amdgpu_device * adev ,
2015-06-12 21:35:14 +03:00
const void * ih_ring_entry )
{
2017-02-01 17:02:13 -05:00
if ( adev - > kfd )
kgd2kfd - > interrupt ( adev - > kfd , ih_ring_entry ) ;
2015-06-12 21:35:14 +03:00
}
2017-02-01 17:02:13 -05:00
void amdgpu_amdkfd_suspend ( struct amdgpu_device * adev )
2015-06-12 21:35:14 +03:00
{
2017-02-01 17:02:13 -05:00
if ( adev - > kfd )
kgd2kfd - > suspend ( adev - > kfd ) ;
2015-06-12 21:35:14 +03:00
}
2017-02-01 17:02:13 -05:00
int amdgpu_amdkfd_resume ( struct amdgpu_device * adev )
2015-06-12 21:35:14 +03:00
{
int r = 0 ;
2017-02-01 17:02:13 -05:00
if ( adev - > kfd )
r = kgd2kfd - > resume ( adev - > kfd ) ;
2015-06-12 21:35:14 +03:00
return r ;
}
int alloc_gtt_mem ( struct kgd_dev * kgd , size_t size ,
void * * mem_obj , uint64_t * gpu_addr ,
void * * cpu_ptr )
{
2017-02-01 17:02:13 -05:00
struct amdgpu_device * adev = ( struct amdgpu_device * ) kgd ;
2018-02-06 20:32:31 -05:00
struct amdgpu_bo * bo = NULL ;
2015-06-12 21:35:14 +03:00
int r ;
2018-02-06 20:32:31 -05:00
uint64_t gpu_addr_tmp = 0 ;
void * cpu_ptr_tmp = NULL ;
2015-06-12 21:35:14 +03:00
2018-03-14 14:48:17 -05:00
r = amdgpu_bo_create ( adev , size , PAGE_SIZE , AMDGPU_GEM_DOMAIN_GTT ,
AMDGPU_GEM_CREATE_CPU_GTT_USWC , ttm_bo_type_kernel ,
NULL , & bo ) ;
2015-06-12 21:35:14 +03:00
if ( r ) {
2017-02-01 17:02:13 -05:00
dev_err ( adev - > dev ,
2015-06-12 21:35:14 +03:00
" failed to allocate BO for amdkfd (%d) \n " , r ) ;
return r ;
}
/* map the buffer */
2018-02-06 20:32:31 -05:00
r = amdgpu_bo_reserve ( bo , true ) ;
2015-06-12 21:35:14 +03:00
if ( r ) {
2017-02-01 17:02:13 -05:00
dev_err ( adev - > dev , " (%d) failed to reserve bo for amdkfd \n " , r ) ;
2015-06-12 21:35:14 +03:00
goto allocate_mem_reserve_bo_failed ;
}
2018-02-06 20:32:31 -05:00
r = amdgpu_bo_pin ( bo , AMDGPU_GEM_DOMAIN_GTT ,
& gpu_addr_tmp ) ;
2015-06-12 21:35:14 +03:00
if ( r ) {
2017-02-01 17:02:13 -05:00
dev_err ( adev - > dev , " (%d) failed to pin bo for amdkfd \n " , r ) ;
2015-06-12 21:35:14 +03:00
goto allocate_mem_pin_bo_failed ;
}
2018-02-06 20:32:31 -05:00
r = amdgpu_bo_kmap ( bo , & cpu_ptr_tmp ) ;
2015-06-12 21:35:14 +03:00
if ( r ) {
2017-02-01 17:02:13 -05:00
dev_err ( adev - > dev ,
2015-06-12 21:35:14 +03:00
" (%d) failed to map bo to kernel for amdkfd \n " , r ) ;
goto allocate_mem_kmap_bo_failed ;
}
2018-02-06 20:32:31 -05:00
* mem_obj = bo ;
* gpu_addr = gpu_addr_tmp ;
* cpu_ptr = cpu_ptr_tmp ;
amdgpu_bo_unreserve ( bo ) ;
2015-06-12 21:35:14 +03:00
return 0 ;
allocate_mem_kmap_bo_failed :
2018-02-06 20:32:31 -05:00
amdgpu_bo_unpin ( bo ) ;
2015-06-12 21:35:14 +03:00
allocate_mem_pin_bo_failed :
2018-02-06 20:32:31 -05:00
amdgpu_bo_unreserve ( bo ) ;
2015-06-12 21:35:14 +03:00
allocate_mem_reserve_bo_failed :
2018-02-06 20:32:31 -05:00
amdgpu_bo_unref ( & bo ) ;
2015-06-12 21:35:14 +03:00
return r ;
}
void free_gtt_mem ( struct kgd_dev * kgd , void * mem_obj )
{
2018-02-06 20:32:31 -05:00
struct amdgpu_bo * bo = ( struct amdgpu_bo * ) mem_obj ;
amdgpu_bo_reserve ( bo , true ) ;
amdgpu_bo_kunmap ( bo ) ;
amdgpu_bo_unpin ( bo ) ;
amdgpu_bo_unreserve ( bo ) ;
amdgpu_bo_unref ( & ( bo ) ) ;
2015-06-12 21:35:14 +03:00
}
2017-12-08 23:08:42 -05:00
void get_local_mem_info ( struct kgd_dev * kgd ,
struct kfd_local_mem_info * mem_info )
{
struct amdgpu_device * adev = ( struct amdgpu_device * ) kgd ;
uint64_t address_mask = adev - > dev - > dma_mask ? ~ * adev - > dev - > dma_mask :
~ ( ( 1ULL < < 32 ) - 1 ) ;
2018-01-12 14:52:22 +01:00
resource_size_t aper_limit = adev - > gmc . aper_base + adev - > gmc . aper_size ;
2017-12-08 23:08:42 -05:00
memset ( mem_info , 0 , sizeof ( * mem_info ) ) ;
2018-01-12 14:52:22 +01:00
if ( ! ( adev - > gmc . aper_base & address_mask | | aper_limit & address_mask ) ) {
mem_info - > local_mem_size_public = adev - > gmc . visible_vram_size ;
mem_info - > local_mem_size_private = adev - > gmc . real_vram_size -
adev - > gmc . visible_vram_size ;
2017-12-08 23:08:42 -05:00
} else {
mem_info - > local_mem_size_public = 0 ;
2018-01-12 14:52:22 +01:00
mem_info - > local_mem_size_private = adev - > gmc . real_vram_size ;
2017-12-08 23:08:42 -05:00
}
2018-01-12 14:52:22 +01:00
mem_info - > vram_width = adev - > gmc . vram_width ;
2017-12-08 23:08:42 -05:00
2018-01-08 13:53:56 +01:00
pr_debug ( " Address base: %pap limit %pap public 0x%llx private 0x%llx \n " ,
2018-01-12 14:52:22 +01:00
& adev - > gmc . aper_base , & aper_limit ,
2017-12-08 23:08:42 -05:00
mem_info - > local_mem_size_public ,
mem_info - > local_mem_size_private ) ;
2018-02-05 16:41:33 -05:00
if ( amdgpu_emu_mode = = 1 ) {
mem_info - > mem_clk_max = 100 ;
return ;
}
2017-12-08 23:08:42 -05:00
if ( amdgpu_sriov_vf ( adev ) )
mem_info - > mem_clk_max = adev - > clock . default_mclk / 100 ;
else
mem_info - > mem_clk_max = amdgpu_dpm_get_mclk ( adev , false ) / 100 ;
}
2015-06-12 21:35:14 +03:00
uint64_t get_gpu_clock_counter ( struct kgd_dev * kgd )
{
2017-02-01 17:02:13 -05:00
struct amdgpu_device * adev = ( struct amdgpu_device * ) kgd ;
2015-06-12 21:35:14 +03:00
2017-02-01 17:02:13 -05:00
if ( adev - > gfx . funcs - > get_gpu_clock_counter )
return adev - > gfx . funcs - > get_gpu_clock_counter ( adev ) ;
2015-06-12 21:35:14 +03:00
return 0 ;
}
uint32_t get_max_engine_clock_in_mhz ( struct kgd_dev * kgd )
{
2017-02-01 17:02:13 -05:00
struct amdgpu_device * adev = ( struct amdgpu_device * ) kgd ;
2015-06-12 21:35:14 +03:00
2017-11-27 18:29:43 -05:00
/* the sclk is in quantas of 10kHz */
2018-02-05 16:41:33 -05:00
if ( amdgpu_emu_mode = = 1 )
return 100 ;
2017-11-27 18:29:43 -05:00
if ( amdgpu_sriov_vf ( adev ) )
return adev - > clock . default_sclk / 100 ;
return amdgpu_dpm_get_sclk ( adev , false ) / 100 ;
2015-06-12 21:35:14 +03:00
}
2017-12-08 23:08:40 -05:00
void get_cu_info ( struct kgd_dev * kgd , struct kfd_cu_info * cu_info )
{
struct amdgpu_device * adev = ( struct amdgpu_device * ) kgd ;
struct amdgpu_cu_info acu_info = adev - > gfx . cu_info ;
memset ( cu_info , 0 , sizeof ( * cu_info ) ) ;
if ( sizeof ( cu_info - > cu_bitmap ) ! = sizeof ( acu_info . bitmap ) )
return ;
cu_info - > cu_active_number = acu_info . number ;
cu_info - > cu_ao_mask = acu_info . ao_cu_mask ;
memcpy ( & cu_info - > cu_bitmap [ 0 ] , & acu_info . bitmap [ 0 ] ,
sizeof ( acu_info . bitmap ) ) ;
cu_info - > num_shader_engines = adev - > gfx . config . max_shader_engines ;
cu_info - > num_shader_arrays_per_engine = adev - > gfx . config . max_sh_per_se ;
cu_info - > num_cu_per_sh = adev - > gfx . config . max_cu_per_sh ;
cu_info - > simd_per_cu = acu_info . simd_per_cu ;
cu_info - > max_waves_per_simd = acu_info . max_waves_per_simd ;
cu_info - > wave_front_size = acu_info . wave_front_size ;
cu_info - > max_scratch_slots_per_cu = acu_info . max_scratch_slots_per_cu ;
cu_info - > lds_size = acu_info . lds_size ;
}
2017-12-08 23:09:05 -05:00
uint64_t amdgpu_amdkfd_get_vram_usage ( struct kgd_dev * kgd )
{
struct amdgpu_device * adev = ( struct amdgpu_device * ) kgd ;
return amdgpu_vram_mgr_usage ( & adev - > mman . bdev . man [ TTM_PL_VRAM ] ) ;
}
2018-02-06 20:32:36 -05:00
2018-02-06 20:32:39 -05:00
int amdgpu_amdkfd_submit_ib ( struct kgd_dev * kgd , enum kgd_engine_type engine ,
uint32_t vmid , uint64_t gpu_addr ,
uint32_t * ib_cmd , uint32_t ib_len )
{
struct amdgpu_device * adev = ( struct amdgpu_device * ) kgd ;
struct amdgpu_job * job ;
struct amdgpu_ib * ib ;
struct amdgpu_ring * ring ;
struct dma_fence * f = NULL ;
int ret ;
switch ( engine ) {
case KGD_ENGINE_MEC1 :
ring = & adev - > gfx . compute_ring [ 0 ] ;
break ;
case KGD_ENGINE_SDMA1 :
ring = & adev - > sdma . instance [ 0 ] . ring ;
break ;
case KGD_ENGINE_SDMA2 :
ring = & adev - > sdma . instance [ 1 ] . ring ;
break ;
default :
pr_err ( " Invalid engine in IB submission: %d \n " , engine ) ;
ret = - EINVAL ;
goto err ;
}
ret = amdgpu_job_alloc ( adev , 1 , & job , NULL ) ;
if ( ret )
goto err ;
ib = & job - > ibs [ 0 ] ;
memset ( ib , 0 , sizeof ( struct amdgpu_ib ) ) ;
ib - > gpu_addr = gpu_addr ;
ib - > ptr = ib_cmd ;
ib - > length_dw = ib_len ;
/* This works for NO_HWS. TODO: need to handle without knowing VMID */
job - > vmid = vmid ;
ret = amdgpu_ib_schedule ( ring , 1 , ib , job , & f ) ;
if ( ret ) {
DRM_ERROR ( " amdgpu: failed to schedule IB. \n " ) ;
goto err_ib_sched ;
}
ret = dma_fence_wait ( f , false ) ;
err_ib_sched :
dma_fence_put ( f ) ;
amdgpu_job_free ( job ) ;
err :
return ret ;
}
2018-02-06 20:32:36 -05:00
bool amdgpu_amdkfd_is_kfd_vmid ( struct amdgpu_device * adev , u32 vmid )
{
if ( adev - > kfd ) {
if ( ( 1 < < vmid ) & compute_vmid_bitmap )
return true ;
}
return false ;
}