2022-02-10 22:24:41 -05:00
// SPDX-License-Identifier: GPL-2.0 OR MIT
2014-07-17 01:04:10 +03:00
/*
2022-02-10 22:24:41 -05:00
* Copyright 2014 - 2022 Advanced Micro Devices , Inc .
2014-07-17 01:04:10 +03:00
*
* Permission is hereby granted , free of charge , to any person obtaining a
* copy of this software and associated documentation files ( the " Software " ) ,
* to deal in the Software without restriction , including without limitation
* the rights to use , copy , modify , merge , publish , distribute , sublicense ,
* and / or sell copies of the Software , and to permit persons to whom the
* Software is furnished to do so , subject to the following conditions :
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR
* IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL
* THE COPYRIGHT HOLDER ( S ) OR AUTHOR ( S ) BE LIABLE FOR ANY CLAIM , DAMAGES OR
* OTHER LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE ,
* ARISING FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE .
*
*/
# include <linux/slab.h>
# include <linux/list.h>
# include "kfd_device_queue_manager.h"
# include "kfd_priv.h"
# include "kfd_kernel_queue.h"
2019-05-06 21:31:52 -05:00
# include "amdgpu_amdkfd.h"
2014-07-17 01:04:10 +03:00
static inline struct process_queue_node * get_queue_by_qid (
struct process_queue_manager * pqm , unsigned int qid )
{
struct process_queue_node * pqn ;
list_for_each_entry ( pqn , & pqm - > queues , process_queue_list ) {
2017-08-15 23:00:07 -04:00
if ( ( pqn - > q & & pqn - > q - > properties . queue_id = = qid ) | |
( pqn - > kq & & pqn - > kq - > queue - > properties . queue_id = = qid ) )
2014-07-17 01:04:10 +03:00
return pqn ;
}
return NULL ;
}
2021-01-25 11:03:31 -05:00
static int assign_queue_slot_by_qid ( struct process_queue_manager * pqm ,
unsigned int qid )
{
if ( qid > = KFD_MAX_NUM_OF_QUEUES_PER_PROCESS )
return - EINVAL ;
if ( __test_and_set_bit ( qid , pqm - > queue_slot_bitmap ) ) {
pr_err ( " Cannot create new queue because requested qid(%u) is in use \n " , qid ) ;
return - ENOSPC ;
}
return 0 ;
}
2014-07-17 01:04:10 +03:00
static int find_available_queue_slot ( struct process_queue_manager * pqm ,
unsigned int * qid )
{
unsigned long found ;
found = find_first_zero_bit ( pqm - > queue_slot_bitmap ,
2015-01-18 13:18:01 +02:00
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS ) ;
2014-07-17 01:04:10 +03:00
2017-08-15 23:00:05 -04:00
pr_debug ( " The new slot id %lu \n " , found ) ;
2014-07-17 01:04:10 +03:00
2015-01-18 13:18:01 +02:00
if ( found > = KFD_MAX_NUM_OF_QUEUES_PER_PROCESS ) {
2019-09-25 17:00:59 -04:00
pr_info ( " Cannot open more queues for process with pasid 0x%x \n " ,
2014-07-17 01:04:10 +03:00
pqm - > process - > pasid ) ;
return - ENOMEM ;
}
set_bit ( found , pqm - > queue_slot_bitmap ) ;
* qid = found ;
return 0 ;
}
2017-09-27 00:09:52 -04:00
void kfd_process_dequeue_from_device ( struct kfd_process_device * pdd )
{
struct kfd_dev * dev = pdd - > dev ;
if ( pdd - > already_dequeued )
return ;
dev - > dqm - > ops . process_termination ( dev - > dqm , & pdd - > qpd ) ;
pdd - > already_dequeued = true ;
}
2019-05-06 21:31:52 -05:00
int pqm_set_gws ( struct process_queue_manager * pqm , unsigned int qid ,
void * gws )
{
struct kfd_dev * dev = NULL ;
struct process_queue_node * pqn ;
struct kfd_process_device * pdd ;
struct kgd_mem * mem = NULL ;
int ret ;
pqn = get_queue_by_qid ( pqm , qid ) ;
if ( ! pqn ) {
pr_err ( " Queue id does not match any known queue \n " ) ;
return - EINVAL ;
}
if ( pqn - > q )
dev = pqn - > q - > device ;
if ( WARN_ON ( ! dev ) )
return - ENODEV ;
pdd = kfd_get_process_device_data ( dev , pqm - > process ) ;
if ( ! pdd ) {
pr_err ( " Process device data doesn't exist \n " ) ;
return - EINVAL ;
}
/* Only allow one queue per process can have GWS assigned */
if ( gws & & pdd - > qpd . num_gws )
2019-05-28 14:51:49 -05:00
return - EBUSY ;
2019-05-06 21:31:52 -05:00
if ( ! gws & & pdd - > qpd . num_gws = = 0 )
return - EINVAL ;
if ( gws )
ret = amdgpu_amdkfd_add_gws_to_process ( pdd - > process - > kgd_process_info ,
gws , & mem ) ;
else
ret = amdgpu_amdkfd_remove_gws_from_process ( pdd - > process - > kgd_process_info ,
pqn - > q - > gws ) ;
if ( unlikely ( ret ) )
return ret ;
pqn - > q - > gws = mem ;
2021-11-05 14:24:57 -04:00
pdd - > qpd . num_gws = gws ? dev - > adev - > gds . gws_size : 0 ;
2019-05-06 21:31:52 -05:00
return pqn - > q - > device - > dqm - > ops . update_queue ( pqn - > q - > device - > dqm ,
2021-10-08 12:47:18 +08:00
pqn - > q , NULL ) ;
2019-05-06 21:31:52 -05:00
}
2017-09-27 00:09:52 -04:00
void kfd_process_dequeue_from_all_devices ( struct kfd_process * p )
{
2020-04-01 16:35:06 -05:00
int i ;
2017-09-27 00:09:52 -04:00
2020-04-01 16:35:06 -05:00
for ( i = 0 ; i < p - > n_pdds ; i + + )
kfd_process_dequeue_from_device ( p - > pdds [ i ] ) ;
2017-09-27 00:09:52 -04:00
}
2014-07-17 01:04:10 +03:00
int pqm_init ( struct process_queue_manager * pqm , struct kfd_process * p )
{
INIT_LIST_HEAD ( & pqm - > queues ) ;
2021-11-23 21:46:44 +01:00
pqm - > queue_slot_bitmap = bitmap_zalloc ( KFD_MAX_NUM_OF_QUEUES_PER_PROCESS ,
GFP_KERNEL ) ;
2017-08-15 23:00:06 -04:00
if ( ! pqm - > queue_slot_bitmap )
2014-07-17 01:04:10 +03:00
return - ENOMEM ;
pqm - > process = p ;
return 0 ;
}
void pqm_uninit ( struct process_queue_manager * pqm )
{
struct process_queue_node * pqn , * next ;
list_for_each_entry_safe ( pqn , next , & pqm - > queues , process_queue_list ) {
2019-07-17 09:47:58 -05:00
if ( pqn - > q & & pqn - > q - > gws )
amdgpu_amdkfd_remove_gws_from_process ( pqm - > process - > kgd_process_info ,
pqn - > q - > gws ) ;
2021-06-21 18:51:26 -04:00
kfd_procfs_del_queue ( pqn - > q ) ;
2017-09-27 00:09:52 -04:00
uninit_queue ( pqn - > q ) ;
list_del ( & pqn - > process_queue_list ) ;
kfree ( pqn ) ;
2014-07-17 01:04:10 +03:00
}
2017-09-27 00:09:52 -04:00
2021-11-23 21:46:44 +01:00
bitmap_free ( pqm - > queue_slot_bitmap ) ;
2014-07-17 01:04:10 +03:00
pqm - > queue_slot_bitmap = NULL ;
}
2019-01-15 19:23:16 -05:00
static int init_user_queue ( struct process_queue_manager * pqm ,
2014-07-17 01:04:10 +03:00
struct kfd_dev * dev , struct queue * * q ,
struct queue_properties * q_properties ,
struct file * f , unsigned int qid )
{
int retval ;
/* Doorbell initialized in user space*/
q_properties - > doorbell_ptr = NULL ;
/* let DQM handle it*/
q_properties - > vmid = 0 ;
q_properties - > queue_id = qid ;
2016-09-17 15:01:45 +10:00
retval = init_queue ( q , q_properties ) ;
2014-07-17 01:04:10 +03:00
if ( retval ! = 0 )
2017-08-15 23:00:07 -04:00
return retval ;
2014-07-17 01:04:10 +03:00
( * q ) - > device = dev ;
( * q ) - > process = pqm - > process ;
2022-04-26 13:00:11 -04:00
if ( dev - > shared_resources . enable_mes ) {
retval = amdgpu_amdkfd_alloc_gtt_mem ( dev - > adev ,
AMDGPU_MES_GANG_CTX_SIZE ,
& ( * q ) - > gang_ctx_bo ,
& ( * q ) - > gang_ctx_gpu_addr ,
& ( * q ) - > gang_ctx_cpu_ptr ,
false ) ;
if ( retval ) {
pr_err ( " failed to allocate gang context bo \n " ) ;
goto cleanup ;
}
memset ( ( * q ) - > gang_ctx_cpu_ptr , 0 , AMDGPU_MES_GANG_CTX_SIZE ) ;
}
2017-08-15 23:00:05 -04:00
pr_debug ( " PQM After init queue " ) ;
2022-04-26 13:00:11 -04:00
return 0 ;
2014-07-17 01:04:10 +03:00
2022-04-26 13:00:11 -04:00
cleanup :
if ( dev - > shared_resources . enable_mes )
uninit_queue ( * q ) ;
2014-07-17 01:04:10 +03:00
return retval ;
}
int pqm_create_queue ( struct process_queue_manager * pqm ,
struct kfd_dev * dev ,
struct file * f ,
struct queue_properties * properties ,
2019-01-15 13:58:57 -05:00
unsigned int * qid ,
2021-01-25 11:03:31 -05:00
const struct kfd_criu_queue_priv_data * q_data ,
2021-01-25 12:50:14 -05:00
const void * restore_mqd ,
2021-01-25 14:09:32 -05:00
const void * restore_ctl_stack ,
2019-01-15 13:58:57 -05:00
uint32_t * p_doorbell_offset_in_process )
2014-07-17 01:04:10 +03:00
{
int retval ;
struct kfd_process_device * pdd ;
struct queue * q ;
struct process_queue_node * pqn ;
struct kernel_queue * kq ;
2017-09-27 00:09:53 -04:00
enum kfd_queue_type type = properties - > type ;
2017-09-27 00:09:55 -04:00
unsigned int max_queues = 127 ; /* HWS limit */
2014-07-17 01:04:10 +03:00
q = NULL ;
kq = NULL ;
2014-11-18 14:00:04 +02:00
pdd = kfd_get_process_device_data ( dev , pqm - > process ) ;
if ( ! pdd ) {
pr_err ( " Process device data doesn't exist \n " ) ;
return - 1 ;
}
2014-07-17 01:04:10 +03:00
2015-05-20 13:43:04 +03:00
/*
* for debug process , verify that it is within the static queues limit
* currently limit is set to half of the total avail HQD slots
* If we are just about to create DIQ , the is_debug flag is not set yet
* Hence we also check the type as well
*/
2017-09-27 00:09:55 -04:00
if ( ( pdd - > qpd . is_debug ) | | ( type = = KFD_QUEUE_TYPE_DIQ ) )
2021-11-17 17:32:37 -05:00
max_queues = dev - > device_info . max_no_of_hqd / 2 ;
2017-09-27 00:09:55 -04:00
if ( pdd - > qpd . queue_count > = max_queues )
return - ENOSPC ;
2015-05-20 13:43:04 +03:00
2021-01-25 11:03:31 -05:00
if ( q_data ) {
retval = assign_queue_slot_by_qid ( pqm , q_data - > q_id ) ;
* qid = q_data - > q_id ;
} else
retval = find_available_queue_slot ( pqm , qid ) ;
2014-07-17 01:04:10 +03:00
if ( retval ! = 0 )
return retval ;
2017-11-01 19:21:30 -04:00
if ( list_empty ( & pdd - > qpd . queues_list ) & &
2017-11-14 16:41:17 -05:00
list_empty ( & pdd - > qpd . priv_queue_list ) )
2015-01-12 14:26:10 +02:00
dev - > dqm - > ops . register_process ( dev - > dqm , & pdd - > qpd ) ;
2014-07-17 01:04:10 +03:00
2017-08-15 23:00:08 -04:00
pqn = kzalloc ( sizeof ( * pqn ) , GFP_KERNEL ) ;
2014-07-17 01:04:10 +03:00
if ( ! pqn ) {
retval = - ENOMEM ;
goto err_allocate_pqn ;
}
switch ( type ) {
2015-01-03 22:12:32 +02:00
case KFD_QUEUE_TYPE_SDMA :
2019-02-07 14:02:27 -06:00
case KFD_QUEUE_TYPE_SDMA_XGMI :
2020-02-05 16:53:37 -05:00
/* SDMA queues are always allocated statically no matter
* which scheduler mode is used . We also do not need to
* check whether a SDMA queue can be allocated here , because
* allocate_sdma_queue ( ) in create_queue ( ) has the
* corresponding check logic .
*/
2019-01-15 19:23:16 -05:00
retval = init_user_queue ( pqm , dev , & q , properties , f , * qid ) ;
2017-11-01 19:21:57 -04:00
if ( retval ! = 0 )
goto err_create_queue ;
pqn - > q = q ;
pqn - > kq = NULL ;
2021-01-25 14:09:32 -05:00
retval = dev - > dqm - > ops . create_queue ( dev - > dqm , q , & pdd - > qpd , q_data ,
restore_mqd , restore_ctl_stack ) ;
2017-11-01 19:21:57 -04:00
print_queue ( q ) ;
break ;
2014-07-17 01:04:10 +03:00
case KFD_QUEUE_TYPE_COMPUTE :
/* check if there is over subscription */
2018-01-04 17:17:43 -05:00
if ( ( dev - > dqm - > sched_policy = =
KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION ) & &
2017-09-20 18:10:18 -04:00
( ( dev - > dqm - > processes_count > = dev - > vm_info . vmid_num_kfd ) | |
2020-01-30 18:35:23 -05:00
( dev - > dqm - > active_queue_count > = get_cp_queues_num ( dev - > dqm ) ) ) ) {
2018-07-11 22:33:08 -04:00
pr_debug ( " Over-subscription is not allowed when amdkfd.sched_policy == 1 \n " ) ;
2014-07-17 01:04:10 +03:00
retval = - EPERM ;
goto err_create_queue ;
}
2019-01-15 19:23:16 -05:00
retval = init_user_queue ( pqm , dev , & q , properties , f , * qid ) ;
2014-07-17 01:04:10 +03:00
if ( retval ! = 0 )
goto err_create_queue ;
pqn - > q = q ;
pqn - > kq = NULL ;
2021-01-25 14:09:32 -05:00
retval = dev - > dqm - > ops . create_queue ( dev - > dqm , q , & pdd - > qpd , q_data ,
restore_mqd , restore_ctl_stack ) ;
2014-07-17 01:04:10 +03:00
print_queue ( q ) ;
break ;
case KFD_QUEUE_TYPE_DIQ :
kq = kernel_queue_init ( dev , KFD_QUEUE_TYPE_DIQ ) ;
2017-08-15 23:00:06 -04:00
if ( ! kq ) {
2014-11-25 13:24:51 +03:00
retval = - ENOMEM ;
2014-07-17 01:04:10 +03:00
goto err_create_queue ;
}
kq - > queue - > properties . queue_id = * qid ;
pqn - > kq = kq ;
pqn - > q = NULL ;
2015-01-12 14:26:10 +02:00
retval = dev - > dqm - > ops . create_kernel_queue ( dev - > dqm ,
kq , & pdd - > qpd ) ;
2014-07-17 01:04:10 +03:00
break ;
default :
2017-08-15 23:00:12 -04:00
WARN ( 1 , " Invalid queue type %d " , type ) ;
retval = - EINVAL ;
2014-07-17 01:04:10 +03:00
}
if ( retval ! = 0 ) {
2020-02-05 18:22:48 -05:00
pr_err ( " Pasid 0x%x DQM create queue type %d failed. ret %d \n " ,
2018-05-01 17:56:11 -04:00
pqm - > process - > pasid , type , retval ) ;
2014-07-17 01:04:10 +03:00
goto err_create_queue ;
}
2019-01-15 13:58:57 -05:00
if ( q & & p_doorbell_offset_in_process )
2018-04-10 17:33:05 -04:00
/* Return the doorbell offset within the doorbell page
* to the caller so it can be passed up to user mode
* ( in bytes ) .
2019-01-15 13:58:57 -05:00
* There are always 1024 doorbells per process , so in case
* of 8 - byte doorbells , there are two doorbell pages per
* process .
2018-04-10 17:33:05 -04:00
*/
2019-01-15 13:58:57 -05:00
* p_doorbell_offset_in_process =
2018-04-10 17:33:05 -04:00
( q - > properties . doorbell_off * sizeof ( uint32_t ) ) &
( kfd_doorbell_process_slice ( dev ) - 1 ) ;
2017-08-15 23:00:05 -04:00
pr_debug ( " PQM After DQM create queue \n " ) ;
2014-07-17 01:04:10 +03:00
list_add ( & pqn - > process_queue_list , & pqm - > queues ) ;
if ( q ) {
2017-08-15 23:00:05 -04:00
pr_debug ( " PQM done creating queue \n " ) ;
2020-01-30 00:08:05 -05:00
kfd_procfs_add_queue ( q ) ;
2017-09-27 00:09:53 -04:00
print_queue_properties ( & q - > properties ) ;
2014-07-17 01:04:10 +03:00
}
return retval ;
err_create_queue :
2020-02-05 17:13:54 -05:00
uninit_queue ( q ) ;
if ( kq )
kernel_queue_uninit ( kq , false ) ;
2014-07-17 01:04:10 +03:00
kfree ( pqn ) ;
err_allocate_pqn :
2015-01-15 17:14:47 +02:00
/* check if queues list is empty unregister process from device */
2014-07-17 01:04:10 +03:00
clear_bit ( * qid , pqm - > queue_slot_bitmap ) ;
2017-11-01 19:21:30 -04:00
if ( list_empty ( & pdd - > qpd . queues_list ) & &
list_empty ( & pdd - > qpd . priv_queue_list ) )
2015-01-29 11:45:31 +10:00
dev - > dqm - > ops . unregister_process ( dev - > dqm , & pdd - > qpd ) ;
2014-07-17 01:04:10 +03:00
return retval ;
}
int pqm_destroy_queue ( struct process_queue_manager * pqm , unsigned int qid )
{
struct process_queue_node * pqn ;
struct kfd_process_device * pdd ;
struct device_queue_manager * dqm ;
struct kfd_dev * dev ;
int retval ;
dqm = NULL ;
retval = 0 ;
pqn = get_queue_by_qid ( pqm , qid ) ;
2017-08-15 23:00:06 -04:00
if ( ! pqn ) {
2017-08-15 23:00:05 -04:00
pr_err ( " Queue id does not match any known queue \n " ) ;
2014-07-17 01:04:10 +03:00
return - EINVAL ;
}
dev = NULL ;
if ( pqn - > kq )
dev = pqn - > kq - > dev ;
if ( pqn - > q )
dev = pqn - > q - > device ;
2017-08-15 23:00:12 -04:00
if ( WARN_ON ( ! dev ) )
return - ENODEV ;
2014-07-17 01:04:10 +03:00
2014-11-18 14:00:04 +02:00
pdd = kfd_get_process_device_data ( dev , pqm - > process ) ;
if ( ! pdd ) {
pr_err ( " Process device data doesn't exist \n " ) ;
return - 1 ;
}
2014-07-17 01:04:10 +03:00
if ( pqn - > kq ) {
/* destroy kernel queue (DIQ) */
dqm = pqn - > kq - > dev - > dqm ;
2015-01-12 14:26:10 +02:00
dqm - > ops . destroy_kernel_queue ( dqm , pqn - > kq , & pdd - > qpd ) ;
2019-12-20 02:46:55 -05:00
kernel_queue_uninit ( pqn - > kq , false ) ;
2014-07-17 01:04:10 +03:00
}
if ( pqn - > q ) {
dqm = pqn - > q - > device - > dqm ;
2015-01-12 14:26:10 +02:00
retval = dqm - > ops . destroy_queue ( dqm , & pdd - > qpd , pqn - > q ) ;
2017-11-27 18:29:44 -05:00
if ( retval ) {
2019-09-25 17:00:59 -04:00
pr_err ( " Pasid 0x%x destroy queue %d failed, ret %d \n " ,
2018-05-01 17:56:11 -04:00
pqm - > process - > pasid ,
pqn - > q - > properties . queue_id , retval ) ;
if ( retval ! = - ETIME )
goto err_destroy_queue ;
2017-11-27 18:29:44 -05:00
}
2019-05-06 21:31:52 -05:00
if ( pqn - > q - > gws ) {
amdgpu_amdkfd_remove_gws_from_process ( pqm - > process - > kgd_process_info ,
pqn - > q - > gws ) ;
pdd - > qpd . num_gws = 0 ;
}
2022-04-26 13:00:11 -04:00
if ( dev - > shared_resources . enable_mes )
amdgpu_amdkfd_free_gtt_mem ( dev - > adev ,
pqn - > q - > gang_ctx_bo ) ;
2022-06-10 20:15:29 -04:00
kfd_procfs_del_queue ( pqn - > q ) ;
2014-07-17 01:04:10 +03:00
uninit_queue ( pqn - > q ) ;
}
list_del ( & pqn - > process_queue_list ) ;
kfree ( pqn ) ;
clear_bit ( qid , pqm - > queue_slot_bitmap ) ;
2017-11-01 19:21:30 -04:00
if ( list_empty ( & pdd - > qpd . queues_list ) & &
list_empty ( & pdd - > qpd . priv_queue_list ) )
2015-01-12 14:26:10 +02:00
dqm - > ops . unregister_process ( dqm , & pdd - > qpd ) ;
2014-07-17 01:04:10 +03:00
2017-11-27 18:29:44 -05:00
err_destroy_queue :
2014-07-17 01:04:10 +03:00
return retval ;
}
2021-10-08 16:46:07 +08:00
int pqm_update_queue_properties ( struct process_queue_manager * pqm ,
unsigned int qid , struct queue_properties * p )
2014-07-17 01:04:10 +03:00
{
int retval ;
struct process_queue_node * pqn ;
pqn = get_queue_by_qid ( pqm , qid ) ;
2015-01-20 14:57:19 +02:00
if ( ! pqn ) {
2017-08-15 23:00:05 -04:00
pr_debug ( " No queue %d exists for update operation \n " , qid ) ;
2015-01-20 14:57:19 +02:00
return - EFAULT ;
}
2014-07-17 01:04:10 +03:00
pqn - > q - > properties . queue_address = p - > queue_address ;
pqn - > q - > properties . queue_size = p - > queue_size ;
pqn - > q - > properties . queue_percent = p - > queue_percent ;
pqn - > q - > properties . priority = p - > priority ;
2015-01-12 14:26:10 +02:00
retval = pqn - > q - > device - > dqm - > ops . update_queue ( pqn - > q - > device - > dqm ,
2021-10-08 12:47:18 +08:00
pqn - > q , NULL ) ;
2014-07-17 01:04:10 +03:00
if ( retval ! = 0 )
return retval ;
return 0 ;
}
2021-10-08 16:46:07 +08:00
int pqm_update_mqd ( struct process_queue_manager * pqm ,
unsigned int qid , struct mqd_update_info * minfo )
2018-07-14 19:05:59 -04:00
{
int retval ;
struct process_queue_node * pqn ;
pqn = get_queue_by_qid ( pqm , qid ) ;
if ( ! pqn ) {
pr_debug ( " No queue %d exists for update operation \n " , qid ) ;
return - EFAULT ;
}
retval = pqn - > q - > device - > dqm - > ops . update_queue ( pqn - > q - > device - > dqm ,
2021-10-08 16:46:07 +08:00
pqn - > q , minfo ) ;
2018-07-14 19:05:59 -04:00
if ( retval ! = 0 )
return retval ;
return 0 ;
}
2015-05-20 13:48:26 +03:00
struct kernel_queue * pqm_get_kernel_queue (
2014-11-20 17:16:23 +08:00
struct process_queue_manager * pqm ,
2014-07-17 01:04:10 +03:00
unsigned int qid )
{
struct process_queue_node * pqn ;
pqn = get_queue_by_qid ( pqm , qid ) ;
if ( pqn & & pqn - > kq )
return pqn - > kq ;
return NULL ;
}
2019-05-06 22:11:14 -05:00
struct queue * pqm_get_user_queue ( struct process_queue_manager * pqm ,
unsigned int qid )
{
struct process_queue_node * pqn ;
pqn = get_queue_by_qid ( pqm , qid ) ;
return pqn ? pqn - > q : NULL ;
}
2017-05-02 17:39:37 -05:00
int pqm_get_wave_state ( struct process_queue_manager * pqm ,
unsigned int qid ,
void __user * ctl_stack ,
u32 * ctl_stack_used_size ,
u32 * save_area_used_size )
{
struct process_queue_node * pqn ;
pqn = get_queue_by_qid ( pqm , qid ) ;
if ( ! pqn ) {
pr_debug ( " amdkfd: No queue %d exists for operation \n " ,
qid ) ;
return - EFAULT ;
}
return pqn - > q - > device - > dqm - > ops . get_wave_state ( pqn - > q - > device - > dqm ,
pqn - > q ,
ctl_stack ,
ctl_stack_used_size ,
save_area_used_size ) ;
}
2021-01-25 14:09:32 -05:00
static int get_queue_data_sizes ( struct kfd_process_device * pdd ,
struct queue * q ,
uint32_t * mqd_size ,
uint32_t * ctl_stack_size )
2021-01-25 12:50:14 -05:00
{
int ret ;
2021-01-25 14:09:32 -05:00
ret = pqm_get_queue_checkpoint_info ( & pdd - > process - > pqm ,
q - > properties . queue_id ,
mqd_size ,
ctl_stack_size ) ;
2021-01-25 12:50:14 -05:00
if ( ret )
pr_err ( " Failed to get queue dump info (%d) \n " , ret ) ;
return ret ;
}
2021-01-25 10:13:48 -05:00
int kfd_process_get_queue_info ( struct kfd_process * p ,
uint32_t * num_queues ,
uint64_t * priv_data_sizes )
{
2021-01-25 12:50:14 -05:00
uint32_t extra_data_sizes = 0 ;
2021-01-25 10:13:48 -05:00
struct queue * q ;
int i ;
2021-01-25 12:50:14 -05:00
int ret ;
2021-01-25 10:13:48 -05:00
* num_queues = 0 ;
/* Run over all PDDs of the process */
for ( i = 0 ; i < p - > n_pdds ; i + + ) {
struct kfd_process_device * pdd = p - > pdds [ i ] ;
list_for_each_entry ( q , & pdd - > qpd . queues_list , list ) {
if ( q - > properties . type = = KFD_QUEUE_TYPE_COMPUTE | |
q - > properties . type = = KFD_QUEUE_TYPE_SDMA | |
q - > properties . type = = KFD_QUEUE_TYPE_SDMA_XGMI ) {
2021-01-25 14:09:32 -05:00
uint32_t mqd_size , ctl_stack_size ;
2021-01-25 10:13:48 -05:00
* num_queues = * num_queues + 1 ;
2021-01-25 12:50:14 -05:00
2021-01-25 14:09:32 -05:00
ret = get_queue_data_sizes ( pdd , q , & mqd_size , & ctl_stack_size ) ;
2021-01-25 12:50:14 -05:00
if ( ret )
return ret ;
2021-01-25 14:09:32 -05:00
extra_data_sizes + = mqd_size + ctl_stack_size ;
2021-01-25 10:13:48 -05:00
} else {
pr_err ( " Unsupported queue type (%d) \n " , q - > properties . type ) ;
return - EOPNOTSUPP ;
}
}
}
2021-01-25 12:50:14 -05:00
* priv_data_sizes = extra_data_sizes +
( * num_queues * sizeof ( struct kfd_criu_queue_priv_data ) ) ;
2021-01-25 10:13:48 -05:00
return 0 ;
}
2021-01-25 14:09:32 -05:00
static int pqm_checkpoint_mqd ( struct process_queue_manager * pqm ,
unsigned int qid ,
void * mqd ,
void * ctl_stack )
2021-01-25 12:50:14 -05:00
{
struct process_queue_node * pqn ;
pqn = get_queue_by_qid ( pqm , qid ) ;
if ( ! pqn ) {
pr_debug ( " amdkfd: No queue %d exists for operation \n " , qid ) ;
return - EFAULT ;
}
if ( ! pqn - > q - > device - > dqm - > ops . checkpoint_mqd ) {
pr_err ( " amdkfd: queue dumping not supported on this device \n " ) ;
return - EOPNOTSUPP ;
}
2021-01-25 14:09:32 -05:00
return pqn - > q - > device - > dqm - > ops . checkpoint_mqd ( pqn - > q - > device - > dqm ,
pqn - > q , mqd , ctl_stack ) ;
2021-01-25 12:50:14 -05:00
}
static int criu_checkpoint_queue ( struct kfd_process_device * pdd ,
2021-01-25 10:13:48 -05:00
struct queue * q ,
struct kfd_criu_queue_priv_data * q_data )
{
2021-01-25 14:09:32 -05:00
uint8_t * mqd , * ctl_stack ;
2021-01-25 12:50:14 -05:00
int ret ;
mqd = ( void * ) ( q_data + 1 ) ;
2021-01-25 14:09:32 -05:00
ctl_stack = mqd + q_data - > mqd_size ;
2021-01-25 12:50:14 -05:00
2021-04-09 12:30:43 -04:00
q_data - > gpu_id = pdd - > user_gpu_id ;
2021-01-25 10:13:48 -05:00
q_data - > type = q - > properties . type ;
q_data - > format = q - > properties . format ;
q_data - > q_id = q - > properties . queue_id ;
q_data - > q_address = q - > properties . queue_address ;
q_data - > q_size = q - > properties . queue_size ;
q_data - > priority = q - > properties . priority ;
q_data - > q_percent = q - > properties . queue_percent ;
q_data - > read_ptr_addr = ( uint64_t ) q - > properties . read_ptr ;
q_data - > write_ptr_addr = ( uint64_t ) q - > properties . write_ptr ;
q_data - > doorbell_id = q - > doorbell_id ;
q_data - > sdma_id = q - > sdma_id ;
q_data - > eop_ring_buffer_address =
q - > properties . eop_ring_buffer_address ;
q_data - > eop_ring_buffer_size = q - > properties . eop_ring_buffer_size ;
q_data - > ctx_save_restore_area_address =
q - > properties . ctx_save_restore_area_address ;
q_data - > ctx_save_restore_area_size =
q - > properties . ctx_save_restore_area_size ;
2022-04-13 11:37:53 -04:00
q_data - > gws = ! ! q - > gws ;
2021-01-25 14:09:32 -05:00
ret = pqm_checkpoint_mqd ( & pdd - > process - > pqm , q - > properties . queue_id , mqd , ctl_stack ) ;
2021-01-25 12:50:14 -05:00
if ( ret ) {
pr_err ( " Failed checkpoint queue_mqd (%d) \n " , ret ) ;
return ret ;
}
2021-01-25 10:13:48 -05:00
pr_debug ( " Dumping Queue: gpu_id:%x queue_id:%u \n " , q_data - > gpu_id , q_data - > q_id ) ;
2021-01-25 12:50:14 -05:00
return ret ;
2021-01-25 10:13:48 -05:00
}
2021-01-25 11:03:31 -05:00
static int criu_checkpoint_queues_device ( struct kfd_process_device * pdd ,
2021-01-25 10:13:48 -05:00
uint8_t __user * user_priv ,
unsigned int * q_index ,
uint64_t * queues_priv_data_offset )
{
2021-01-25 12:50:14 -05:00
unsigned int q_private_data_size = 0 ;
uint8_t * q_private_data = NULL ; /* Local buffer to store individual queue private data */
2021-01-25 10:13:48 -05:00
struct queue * q ;
int ret = 0 ;
list_for_each_entry ( q , & pdd - > qpd . queues_list , list ) {
2021-01-25 12:50:14 -05:00
struct kfd_criu_queue_priv_data * q_data ;
uint64_t q_data_size ;
uint32_t mqd_size ;
2021-01-25 14:09:32 -05:00
uint32_t ctl_stack_size ;
2021-01-25 12:50:14 -05:00
2021-01-25 10:13:48 -05:00
if ( q - > properties . type ! = KFD_QUEUE_TYPE_COMPUTE & &
q - > properties . type ! = KFD_QUEUE_TYPE_SDMA & &
q - > properties . type ! = KFD_QUEUE_TYPE_SDMA_XGMI ) {
pr_err ( " Unsupported queue type (%d) \n " , q - > properties . type ) ;
ret = - EOPNOTSUPP ;
break ;
}
2021-01-25 14:09:32 -05:00
ret = get_queue_data_sizes ( pdd , q , & mqd_size , & ctl_stack_size ) ;
2021-01-25 12:50:14 -05:00
if ( ret )
break ;
2021-01-25 14:09:32 -05:00
q_data_size = sizeof ( * q_data ) + mqd_size + ctl_stack_size ;
2021-01-25 12:50:14 -05:00
/* Increase local buffer space if needed */
if ( q_private_data_size < q_data_size ) {
kfree ( q_private_data ) ;
q_private_data = kzalloc ( q_data_size , GFP_KERNEL ) ;
if ( ! q_private_data ) {
ret = - ENOMEM ;
break ;
}
q_private_data_size = q_data_size ;
}
q_data = ( struct kfd_criu_queue_priv_data * ) q_private_data ;
2021-01-25 14:09:32 -05:00
/* data stored in this order: priv_data, mqd, ctl_stack */
2021-01-25 12:50:14 -05:00
q_data - > mqd_size = mqd_size ;
2021-01-25 14:09:32 -05:00
q_data - > ctl_stack_size = ctl_stack_size ;
2021-01-25 12:50:14 -05:00
ret = criu_checkpoint_queue ( pdd , q , q_data ) ;
if ( ret )
break ;
2021-01-25 11:03:31 -05:00
q_data - > object_type = KFD_CRIU_OBJECT_TYPE_QUEUE ;
2021-01-25 10:13:48 -05:00
2021-01-25 12:50:14 -05:00
ret = copy_to_user ( user_priv + * queues_priv_data_offset ,
q_data , q_data_size ) ;
2021-01-25 10:13:48 -05:00
if ( ret ) {
ret = - EFAULT ;
break ;
}
2021-01-25 12:50:14 -05:00
* queues_priv_data_offset + = q_data_size ;
2021-01-25 10:13:48 -05:00
* q_index = * q_index + 1 ;
}
2021-01-25 12:50:14 -05:00
kfree ( q_private_data ) ;
2021-01-25 10:13:48 -05:00
return ret ;
}
int kfd_criu_checkpoint_queues ( struct kfd_process * p ,
uint8_t __user * user_priv_data ,
uint64_t * priv_data_offset )
{
int ret = 0 , pdd_index , q_index = 0 ;
for ( pdd_index = 0 ; pdd_index < p - > n_pdds ; pdd_index + + ) {
struct kfd_process_device * pdd = p - > pdds [ pdd_index ] ;
/*
2021-01-25 11:03:31 -05:00
* criu_checkpoint_queues_device will copy data to user and update q_index and
2021-01-25 10:13:48 -05:00
* queues_priv_data_offset
*/
2021-01-25 11:03:31 -05:00
ret = criu_checkpoint_queues_device ( pdd , user_priv_data , & q_index ,
priv_data_offset ) ;
2021-01-25 10:13:48 -05:00
if ( ret )
break ;
}
return ret ;
}
static void set_queue_properties_from_criu ( struct queue_properties * qp ,
struct kfd_criu_queue_priv_data * q_data )
{
qp - > is_interop = false ;
qp - > queue_percent = q_data - > q_percent ;
qp - > priority = q_data - > priority ;
qp - > queue_address = q_data - > q_address ;
qp - > queue_size = q_data - > q_size ;
qp - > read_ptr = ( uint32_t * ) q_data - > read_ptr_addr ;
qp - > write_ptr = ( uint32_t * ) q_data - > write_ptr_addr ;
qp - > eop_ring_buffer_address = q_data - > eop_ring_buffer_address ;
qp - > eop_ring_buffer_size = q_data - > eop_ring_buffer_size ;
qp - > ctx_save_restore_area_address = q_data - > ctx_save_restore_area_address ;
qp - > ctx_save_restore_area_size = q_data - > ctx_save_restore_area_size ;
qp - > ctl_stack_size = q_data - > ctl_stack_size ;
qp - > type = q_data - > type ;
qp - > format = q_data - > format ;
}
int kfd_criu_restore_queue ( struct kfd_process * p ,
uint8_t __user * user_priv_ptr ,
uint64_t * priv_data_offset ,
uint64_t max_priv_data_size )
{
2021-01-25 14:09:32 -05:00
uint8_t * mqd , * ctl_stack , * q_extra_data = NULL ;
2021-01-25 10:13:48 -05:00
struct kfd_criu_queue_priv_data * q_data ;
struct kfd_process_device * pdd ;
2021-01-25 12:50:14 -05:00
uint64_t q_extra_data_size ;
2021-01-25 10:13:48 -05:00
struct queue_properties qp ;
unsigned int queue_id ;
int ret = 0 ;
if ( * priv_data_offset + sizeof ( * q_data ) > max_priv_data_size )
return - EINVAL ;
q_data = kmalloc ( sizeof ( * q_data ) , GFP_KERNEL ) ;
if ( ! q_data )
return - ENOMEM ;
ret = copy_from_user ( q_data , user_priv_ptr + * priv_data_offset , sizeof ( * q_data ) ) ;
if ( ret ) {
ret = - EFAULT ;
goto exit ;
}
* priv_data_offset + = sizeof ( * q_data ) ;
2022-02-18 17:53:43 -05:00
q_extra_data_size = ( uint64_t ) q_data - > ctl_stack_size + q_data - > mqd_size ;
2021-01-25 12:50:14 -05:00
if ( * priv_data_offset + q_extra_data_size > max_priv_data_size ) {
ret = - EINVAL ;
goto exit ;
}
q_extra_data = kmalloc ( q_extra_data_size , GFP_KERNEL ) ;
if ( ! q_extra_data ) {
ret = - ENOMEM ;
goto exit ;
}
ret = copy_from_user ( q_extra_data , user_priv_ptr + * priv_data_offset , q_extra_data_size ) ;
if ( ret ) {
ret = - EFAULT ;
goto exit ;
}
* priv_data_offset + = q_extra_data_size ;
2021-01-25 10:13:48 -05:00
2021-04-09 12:30:43 -04:00
pdd = kfd_process_device_data_by_id ( p , q_data - > gpu_id ) ;
2021-01-25 10:13:48 -05:00
if ( ! pdd ) {
pr_err ( " Failed to get pdd \n " ) ;
2021-04-09 12:30:43 -04:00
ret = - EINVAL ;
goto exit ;
2021-01-25 10:13:48 -05:00
}
2021-01-25 14:09:32 -05:00
/* data stored in this order: mqd, ctl_stack */
2021-01-25 12:50:14 -05:00
mqd = q_extra_data ;
2021-01-25 14:09:32 -05:00
ctl_stack = mqd + q_data - > mqd_size ;
2021-01-25 10:13:48 -05:00
memset ( & qp , 0 , sizeof ( qp ) ) ;
set_queue_properties_from_criu ( & qp , q_data ) ;
print_queue_properties ( & qp ) ;
2021-01-25 14:09:32 -05:00
ret = pqm_create_queue ( & p - > pqm , pdd - > dev , NULL , & qp , & queue_id , q_data , mqd , ctl_stack ,
NULL ) ;
2021-01-25 10:13:48 -05:00
if ( ret ) {
pr_err ( " Failed to create new queue err:%d \n " , ret ) ;
2022-04-13 11:37:53 -04:00
goto exit ;
2021-01-25 10:13:48 -05:00
}
2022-04-13 11:37:53 -04:00
if ( q_data - > gws )
ret = pqm_set_gws ( & p - > pqm , q_data - > q_id , pdd - > dev - > gws ) ;
2021-01-25 10:13:48 -05:00
exit :
if ( ret )
2022-04-13 11:37:53 -04:00
pr_err ( " Failed to restore queue (%d) \n " , ret ) ;
2021-01-25 10:13:48 -05:00
else
pr_debug ( " Queue id %d was restored successfully \n " , queue_id ) ;
kfree ( q_data ) ;
return ret ;
}
2021-01-25 12:50:14 -05:00
int pqm_get_queue_checkpoint_info ( struct process_queue_manager * pqm ,
unsigned int qid ,
2021-01-25 14:09:32 -05:00
uint32_t * mqd_size ,
uint32_t * ctl_stack_size )
2021-01-25 12:50:14 -05:00
{
struct process_queue_node * pqn ;
pqn = get_queue_by_qid ( pqm , qid ) ;
if ( ! pqn ) {
pr_debug ( " amdkfd: No queue %d exists for operation \n " , qid ) ;
return - EFAULT ;
}
if ( ! pqn - > q - > device - > dqm - > ops . get_queue_checkpoint_info ) {
pr_err ( " amdkfd: queue dumping not supported on this device \n " ) ;
return - EOPNOTSUPP ;
}
2021-01-25 14:09:32 -05:00
pqn - > q - > device - > dqm - > ops . get_queue_checkpoint_info ( pqn - > q - > device - > dqm ,
pqn - > q , mqd_size ,
ctl_stack_size ) ;
2021-01-25 12:50:14 -05:00
return 0 ;
}
2017-11-27 18:29:49 -05:00
# if defined(CONFIG_DEBUG_FS)
2014-07-17 01:04:10 +03:00
2017-11-27 18:29:49 -05:00
int pqm_debugfs_mqds ( struct seq_file * m , void * data )
{
struct process_queue_manager * pqm = data ;
struct process_queue_node * pqn ;
struct queue * q ;
enum KFD_MQD_TYPE mqd_type ;
2018-07-11 22:33:07 -04:00
struct mqd_manager * mqd_mgr ;
2017-11-27 18:29:49 -05:00
int r = 0 ;
list_for_each_entry ( pqn , & pqm - > queues , process_queue_list ) {
if ( pqn - > q ) {
q = pqn - > q ;
switch ( q - > properties . type ) {
case KFD_QUEUE_TYPE_SDMA :
2019-02-07 14:02:27 -06:00
case KFD_QUEUE_TYPE_SDMA_XGMI :
2017-11-27 18:29:49 -05:00
seq_printf ( m , " SDMA queue on device %x \n " ,
q - > device - > id ) ;
mqd_type = KFD_MQD_TYPE_SDMA ;
break ;
case KFD_QUEUE_TYPE_COMPUTE :
seq_printf ( m , " Compute queue on device %x \n " ,
q - > device - > id ) ;
mqd_type = KFD_MQD_TYPE_CP ;
break ;
default :
seq_printf ( m ,
" Bad user queue type %d on device %x \n " ,
q - > properties . type , q - > device - > id ) ;
continue ;
}
2018-12-05 10:15:27 -06:00
mqd_mgr = q - > device - > dqm - > mqd_mgrs [ mqd_type ] ;
2017-11-27 18:29:49 -05:00
} else if ( pqn - > kq ) {
q = pqn - > kq - > queue ;
2018-07-11 22:33:07 -04:00
mqd_mgr = pqn - > kq - > mqd_mgr ;
2017-11-27 18:29:49 -05:00
switch ( q - > properties . type ) {
case KFD_QUEUE_TYPE_DIQ :
seq_printf ( m , " DIQ on device %x \n " ,
pqn - > kq - > dev - > id ) ;
break ;
default :
seq_printf ( m ,
" Bad kernel queue type %d on device %x \n " ,
q - > properties . type ,
pqn - > kq - > dev - > id ) ;
continue ;
}
} else {
seq_printf ( m ,
" Weird: Queue node with neither kernel nor user queue \n " ) ;
continue ;
}
2018-07-11 22:33:07 -04:00
r = mqd_mgr - > debugfs_show_mqd ( m , q - > mqd ) ;
2017-11-27 18:29:49 -05:00
if ( r ! = 0 )
break ;
}
return r ;
}
# endif