2017-12-06 17:49:39 +01:00
/*
* Copyright 2015 Advanced Micro Devices , Inc .
*
* Permission is hereby granted , free of charge , to any person obtaining a
* copy of this software and associated documentation files ( the " Software " ) ,
* to deal in the Software without restriction , including without limitation
* the rights to use , copy , modify , merge , publish , distribute , sublicense ,
* and / or sell copies of the Software , and to permit persons to whom the
* Software is furnished to do so , subject to the following conditions :
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR
* IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL
* THE COPYRIGHT HOLDER ( S ) OR AUTHOR ( S ) BE LIABLE FOR ANY CLAIM , DAMAGES OR
* OTHER LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE ,
* ARISING FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE .
*
*/
# ifndef _DRM_GPU_SCHEDULER_H_
# define _DRM_GPU_SCHEDULER_H_
# include <drm/spsc_queue.h>
# include <linux/dma-fence.h>
2019-11-08 16:31:10 +11:00
# include <linux/completion.h>
2017-12-06 17:49:39 +01:00
2018-05-30 15:11:01 -04:00
# define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
2017-12-06 17:49:39 +01:00
struct drm_gpu_scheduler ;
struct drm_sched_rq ;
2020-08-11 19:59:58 -04:00
/* These are often used as an (initial) index
* to an array , and as such should start at 0.
*/
2017-12-06 17:49:39 +01:00
enum drm_sched_priority {
DRM_SCHED_PRIORITY_MIN ,
DRM_SCHED_PRIORITY_NORMAL ,
2020-08-11 19:59:58 -04:00
DRM_SCHED_PRIORITY_HIGH ,
2017-12-06 17:49:39 +01:00
DRM_SCHED_PRIORITY_KERNEL ,
2020-08-11 19:59:58 -04:00
DRM_SCHED_PRIORITY_COUNT ,
2017-12-06 17:49:39 +01:00
DRM_SCHED_PRIORITY_UNSET = - 2
} ;
/**
2018-05-29 11:23:07 +05:30
* struct drm_sched_entity - A wrapper around a job queue ( typically
* attached to the DRM file_priv ) .
*
* @ list : used to append this struct to the list of entities in the
* runqueue .
2018-08-01 13:49:59 +05:30
* @ rq : runqueue on which this entity is currently scheduled .
2020-01-22 10:37:56 +01:00
* @ sched_list : A list of schedulers ( drm_gpu_schedulers ) .
* Jobs from this entity can be scheduled on any scheduler
* on this list .
2020-01-14 10:38:42 +01:00
* @ num_sched_list : number of drm_gpu_schedulers in the sched_list .
2020-03-28 14:20:22 +01:00
* @ priority : priority of the entity
2018-05-29 11:23:07 +05:30
* @ rq_lock : lock to modify the runqueue to which this entity belongs .
* @ job_queue : the list of jobs of this entity .
* @ fence_seq : a linearly increasing seqno incremented with each
* new & drm_sched_fence which is part of the entity .
* @ fence_context : a unique context for all the fences which belong
* to this entity .
* The & drm_sched_fence . scheduled uses the
* fence_context but & drm_sched_fence . finished uses
* fence_context + 1.
* @ dependency : the dependency fence of the job which is on the top
* of the job queue .
* @ cb : callback for the dependency fence above .
* @ guilty : points to ctx ' s guilty .
* @ fini_status : contains the exit status in case the process was signalled .
* @ last_scheduled : points to the finished fence of the last scheduled job .
2018-07-26 13:43:49 +02:00
* @ last_user : last group leader pushing a job into the entity .
2018-08-17 10:32:50 -04:00
* @ stopped : Marks the enity as removed from rq and destined for termination .
2019-11-04 16:30:05 -05:00
* @ entity_idle : Signals when enityt is not in use
2018-04-04 15:32:51 -07:00
*
* Entities will emit jobs in order to their corresponding hardware
* ring , and the scheduler will alternate between entities based on
* scheduling policy .
2018-05-29 11:23:07 +05:30
*/
2017-12-06 17:49:39 +01:00
struct drm_sched_entity {
struct list_head list ;
struct drm_sched_rq * rq ;
2019-12-05 11:38:00 +01:00
struct drm_gpu_scheduler * * sched_list ;
2020-01-14 10:38:42 +01:00
unsigned int num_sched_list ;
2019-12-05 11:38:00 +01:00
enum drm_sched_priority priority ;
2017-12-06 17:49:39 +01:00
spinlock_t rq_lock ;
struct spsc_queue job_queue ;
atomic_t fence_seq ;
uint64_t fence_context ;
struct dma_fence * dependency ;
struct dma_fence_cb cb ;
2018-05-29 11:23:07 +05:30
atomic_t * guilty ;
struct dma_fence * last_scheduled ;
2018-07-26 13:43:49 +02:00
struct task_struct * last_user ;
2018-08-17 10:32:50 -04:00
bool stopped ;
2019-11-04 16:30:05 -05:00
struct completion entity_idle ;
2017-12-06 17:49:39 +01:00
} ;
/**
2018-05-29 11:23:07 +05:30
* struct drm_sched_rq - queue of entities to be scheduled .
*
* @ lock : to modify the entities list .
2018-07-13 15:21:13 +05:30
* @ sched : the scheduler to which this rq belongs to .
2018-05-29 11:23:07 +05:30
* @ entities : list of the entities to be scheduled .
* @ current_entity : the entity which is to be scheduled .
*
2017-12-06 17:49:39 +01:00
* Run queue is a set of entities scheduling command submissions for
* one specific ring . It implements the scheduling policy that selects
* the next entity to emit commands from .
2018-05-29 11:23:07 +05:30
*/
2017-12-06 17:49:39 +01:00
struct drm_sched_rq {
spinlock_t lock ;
2018-07-13 15:21:13 +05:30
struct drm_gpu_scheduler * sched ;
2017-12-06 17:49:39 +01:00
struct list_head entities ;
struct drm_sched_entity * current_entity ;
} ;
2018-05-29 11:23:07 +05:30
/**
* struct drm_sched_fence - fences corresponding to the scheduling of a job .
*/
2017-12-06 17:49:39 +01:00
struct drm_sched_fence {
2018-05-29 11:23:07 +05:30
/**
* @ scheduled : this fence is what will be signaled by the scheduler
* when the job is scheduled .
*/
2017-12-06 17:49:39 +01:00
struct dma_fence scheduled ;
2018-04-04 15:32:51 -07:00
2018-05-29 11:23:07 +05:30
/**
* @ finished : this fence is what will be signaled by the scheduler
* when the job is completed .
*
* When setting up an out fence for the job , you should use
* this , since it ' s available immediately upon
* drm_sched_job_init ( ) , and the fence returned by the driver
* from run_job ( ) won ' t be created until the dependencies have
* resolved .
*/
2017-12-06 17:49:39 +01:00
struct dma_fence finished ;
2018-04-04 15:32:51 -07:00
2018-05-29 11:23:07 +05:30
/**
* @ parent : the fence returned by & drm_sched_backend_ops . run_job
* when scheduling the job on hardware . We signal the
* & drm_sched_fence . finished fence once parent is signalled .
*/
2017-12-06 17:49:39 +01:00
struct dma_fence * parent ;
2018-05-29 11:23:07 +05:30
/**
* @ sched : the scheduler instance to which the job having this struct
* belongs to .
*/
2017-12-06 17:49:39 +01:00
struct drm_gpu_scheduler * sched ;
2018-05-29 11:23:07 +05:30
/**
* @ lock : the lock used by the scheduled and the finished fences .
*/
2017-12-06 17:49:39 +01:00
spinlock_t lock ;
2018-05-29 11:23:07 +05:30
/**
* @ owner : job owner for debugging
*/
2017-12-06 17:49:39 +01:00
void * owner ;
} ;
struct drm_sched_fence * to_drm_sched_fence ( struct dma_fence * f ) ;
2018-04-04 15:32:51 -07:00
/**
2018-05-29 11:23:07 +05:30
* struct drm_sched_job - A job to be run by an entity .
*
* @ queue_node : used to append this struct to the queue of jobs in an entity .
* @ sched : the scheduler instance on which this job is scheduled .
* @ s_fence : contains the fences for the scheduling of job .
* @ finish_cb : the callback for the finished fence .
* @ node : used to append this struct to the @ drm_gpu_scheduler . ring_mirror_list .
* @ id : a unique id assigned to each job scheduled on the scheduler .
* @ karma : increment on every hang caused by this job . If this exceeds the hang
* limit of the scheduler then the job is marked guilty and will not
* be scheduled further .
* @ s_priority : the priority of the job .
* @ entity : the entity to which this job belongs .
2018-12-05 14:21:28 -05:00
* @ cb : the callback for the parent fence in s_fence .
2018-04-04 15:32:51 -07:00
*
* A job is created by the driver using drm_sched_job_init ( ) , and
* should call drm_sched_entity_push_job ( ) once it wants the scheduler
* to schedule the job .
*/
2017-12-06 17:49:39 +01:00
struct drm_sched_job {
struct spsc_node queue_node ;
struct drm_gpu_scheduler * sched ;
struct drm_sched_fence * s_fence ;
struct dma_fence_cb finish_cb ;
struct list_head node ;
uint64_t id ;
atomic_t karma ;
enum drm_sched_priority s_priority ;
2018-04-16 10:07:02 +08:00
struct drm_sched_entity * entity ;
2018-12-05 14:21:28 -05:00
struct dma_fence_cb cb ;
2017-12-06 17:49:39 +01:00
} ;
static inline bool drm_sched_invalidate_job ( struct drm_sched_job * s_job ,
int threshold )
{
return ( s_job & & atomic_inc_return ( & s_job - > karma ) > threshold ) ;
}
/**
2018-05-29 11:23:07 +05:30
* struct drm_sched_backend_ops
*
2017-12-06 17:49:39 +01:00
* Define the backend operations called by the scheduler ,
2018-05-29 11:23:07 +05:30
* these functions should be implemented in driver side .
*/
2017-12-06 17:49:39 +01:00
struct drm_sched_backend_ops {
2018-05-29 11:23:07 +05:30
/**
* @ dependency : Called when the scheduler is considering scheduling
* this job next , to get another struct dma_fence for this job to
2018-04-04 15:32:51 -07:00
* block on . Once it returns NULL , run_job ( ) may be called .
*/
2017-12-06 17:49:39 +01:00
struct dma_fence * ( * dependency ) ( struct drm_sched_job * sched_job ,
struct drm_sched_entity * s_entity ) ;
2018-04-04 15:32:51 -07:00
2018-05-29 11:23:07 +05:30
/**
* @ run_job : Called to execute the job once all of the dependencies
* have been resolved . This may be called multiple times , if
2018-04-04 15:32:51 -07:00
* timedout_job ( ) has happened and drm_sched_job_recovery ( )
* decides to try it again .
*/
2017-12-06 17:49:39 +01:00
struct dma_fence * ( * run_job ) ( struct drm_sched_job * sched_job ) ;
2018-04-04 15:32:51 -07:00
2018-05-29 11:23:07 +05:30
/**
* @ timedout_job : Called when a job has taken too long to execute ,
* to trigger GPU recovery .
2018-04-04 15:32:51 -07:00
*/
2017-12-06 17:49:39 +01:00
void ( * timedout_job ) ( struct drm_sched_job * sched_job ) ;
2018-04-04 15:32:51 -07:00
2018-05-29 11:23:07 +05:30
/**
* @ free_job : Called once the job ' s finished fence has been signaled
* and it ' s time to clean it up .
2018-04-04 15:32:51 -07:00
*/
2017-12-06 17:49:39 +01:00
void ( * free_job ) ( struct drm_sched_job * sched_job ) ;
} ;
/**
2018-05-29 11:23:07 +05:30
* struct drm_gpu_scheduler
*
* @ ops : backend operations provided by the driver .
* @ hw_submission_limit : the max size of the hardware queue .
* @ timeout : the time after which a job is removed from the scheduler .
* @ name : name of the ring for which this scheduler is being used .
* @ sched_rq : priority wise array of run queues .
* @ wake_up_worker : the wait queue on which the scheduler sleeps until a job
* is ready to be scheduled .
* @ job_scheduled : once @ drm_sched_entity_do_release is called the scheduler
* waits on this wait queue until all the scheduled jobs are
* finished .
* @ hw_rq_count : the number of jobs currently in the hardware queue .
* @ job_id_count : used to assign unique id to the each job .
2018-09-26 02:09:02 +09:00
* @ work_tdr : schedules a delayed call to @ drm_sched_job_timedout after the
* timeout interval is over .
2018-05-29 11:23:07 +05:30
* @ thread : the kthread on which the scheduler which run .
* @ ring_mirror_list : the list of jobs which are currently in the job queue .
* @ job_list_lock : lock to protect the ring_mirror_list .
* @ hang_limit : once the hangs by a job crosses this limit then it is marked
* guilty and it will be considered for scheduling further .
2020-06-25 14:07:23 +02:00
* @ score : score to help loadbalancer pick a idle sched
2018-10-18 12:32:46 -04:00
* @ ready : marks if the underlying HW is ready to work
2019-04-18 11:00:23 -04:00
* @ free_guilty : A hit to time out handler to free the guilty job .
2018-05-29 11:23:07 +05:30
*
* One scheduler is implemented for each hardware ring .
*/
2017-12-06 17:49:39 +01:00
struct drm_gpu_scheduler {
const struct drm_sched_backend_ops * ops ;
uint32_t hw_submission_limit ;
long timeout ;
const char * name ;
2020-08-11 19:59:58 -04:00
struct drm_sched_rq sched_rq [ DRM_SCHED_PRIORITY_COUNT ] ;
2017-12-06 17:49:39 +01:00
wait_queue_head_t wake_up_worker ;
wait_queue_head_t job_scheduled ;
atomic_t hw_rq_count ;
atomic64_t job_id_count ;
2018-09-26 02:09:02 +09:00
struct delayed_work work_tdr ;
2017-12-06 17:49:39 +01:00
struct task_struct * thread ;
struct list_head ring_mirror_list ;
spinlock_t job_list_lock ;
int hang_limit ;
2020-06-25 14:07:23 +02:00
atomic_t score ;
bool ready ;
2019-04-18 11:00:23 -04:00
bool free_guilty ;
2017-12-06 17:49:39 +01:00
} ;
int drm_sched_init ( struct drm_gpu_scheduler * sched ,
const struct drm_sched_backend_ops * ops ,
uint32_t hw_submission , unsigned hang_limit , long timeout ,
const char * name ) ;
2018-10-18 12:32:46 -04:00
2017-12-06 17:49:39 +01:00
void drm_sched_fini ( struct drm_gpu_scheduler * sched ) ;
2018-08-06 14:25:32 +02:00
int drm_sched_job_init ( struct drm_sched_job * job ,
struct drm_sched_entity * entity ,
void * owner ) ;
2020-02-27 15:34:15 +01:00
void drm_sched_entity_modify_sched ( struct drm_sched_entity * entity ,
struct drm_gpu_scheduler * * sched_list ,
unsigned int num_sched_list ) ;
2018-10-29 15:02:28 +05:30
void drm_sched_job_cleanup ( struct drm_sched_job * job ) ;
2018-08-06 14:25:32 +02:00
void drm_sched_wakeup ( struct drm_gpu_scheduler * sched ) ;
2019-04-18 11:00:21 -04:00
void drm_sched_stop ( struct drm_gpu_scheduler * sched , struct drm_sched_job * bad ) ;
2018-12-04 16:56:14 -05:00
void drm_sched_start ( struct drm_gpu_scheduler * sched , bool full_recovery ) ;
void drm_sched_resubmit_jobs ( struct drm_gpu_scheduler * sched ) ;
void drm_sched_increase_karma ( struct drm_sched_job * bad ) ;
2018-08-06 14:25:32 +02:00
bool drm_sched_dependency_optimized ( struct dma_fence * fence ,
struct drm_sched_entity * entity ) ;
2018-10-12 16:47:13 +02:00
void drm_sched_fault ( struct drm_gpu_scheduler * sched ) ;
2018-08-06 14:25:32 +02:00
void drm_sched_job_kickout ( struct drm_sched_job * s_job ) ;
void drm_sched_rq_add_entity ( struct drm_sched_rq * rq ,
struct drm_sched_entity * entity ) ;
void drm_sched_rq_remove_entity ( struct drm_sched_rq * rq ,
struct drm_sched_entity * entity ) ;
2017-12-06 17:49:39 +01:00
2018-07-13 15:21:14 +05:30
int drm_sched_entity_init ( struct drm_sched_entity * entity ,
2019-12-05 11:38:00 +01:00
enum drm_sched_priority priority ,
struct drm_gpu_scheduler * * sched_list ,
2020-01-14 10:38:42 +01:00
unsigned int num_sched_list ,
2018-03-29 22:36:32 +05:30
atomic_t * guilty ) ;
2018-07-20 17:51:05 +05:30
long drm_sched_entity_flush ( struct drm_sched_entity * entity , long timeout ) ;
void drm_sched_entity_fini ( struct drm_sched_entity * entity ) ;
void drm_sched_entity_destroy ( struct drm_sched_entity * entity ) ;
2018-08-06 14:25:32 +02:00
void drm_sched_entity_select_rq ( struct drm_sched_entity * entity ) ;
struct drm_sched_job * drm_sched_entity_pop_job ( struct drm_sched_entity * entity ) ;
2017-12-06 17:49:39 +01:00
void drm_sched_entity_push_job ( struct drm_sched_job * sched_job ,
struct drm_sched_entity * entity ) ;
2018-08-01 16:22:39 +02:00
void drm_sched_entity_set_priority ( struct drm_sched_entity * entity ,
enum drm_sched_priority priority ) ;
2018-08-06 14:25:32 +02:00
bool drm_sched_entity_is_ready ( struct drm_sched_entity * entity ) ;
2017-12-06 17:49:39 +01:00
struct drm_sched_fence * drm_sched_fence_create (
struct drm_sched_entity * s_entity , void * owner ) ;
void drm_sched_fence_scheduled ( struct drm_sched_fence * fence ) ;
void drm_sched_fence_finished ( struct drm_sched_fence * fence ) ;
2018-11-29 15:35:20 +05:30
unsigned long drm_sched_suspend_timeout ( struct drm_gpu_scheduler * sched ) ;
void drm_sched_resume_timeout ( struct drm_gpu_scheduler * sched ,
unsigned long remaining ) ;
2020-03-13 11:39:27 +01:00
struct drm_gpu_scheduler *
drm_sched_pick_best ( struct drm_gpu_scheduler * * sched_list ,
unsigned int num_sched_list ) ;
2018-11-29 15:35:20 +05:30
2017-12-06 17:49:39 +01:00
# endif