2019-10-21 18:43:39 +01:00
/*
* SPDX - License - Identifier : MIT
*
* Copyright © 2019 Intel Corporation
*/
2020-06-18 18:04:02 +03:00
# include "i915_drv.h"
2019-10-21 18:43:39 +01:00
# include "i915_request.h"
# include "intel_context.h"
# include "intel_engine_heartbeat.h"
# include "intel_engine_pm.h"
# include "intel_engine.h"
# include "intel_gt.h"
2019-10-23 14:31:08 +01:00
# include "intel_reset.h"
/*
* While the engine is active , we send a periodic pulse along the engine
* to check on its health and to flush any idle - barriers . If that request
* is stuck , and we fail to preempt it , we declare the engine hung and
* issue a reset - - in the hope that restores progress .
*/
static bool next_heartbeat ( struct intel_engine_cs * engine )
{
long delay ;
delay = READ_ONCE ( engine - > props . heartbeat_interval_ms ) ;
if ( ! delay )
return false ;
delay = msecs_to_jiffies_timeout ( delay ) ;
if ( delay > = HZ )
delay = round_jiffies_up_relative ( delay ) ;
2020-07-02 10:52:19 +01:00
mod_delayed_work ( system_highpri_wq , & engine - > heartbeat . work , delay ) ;
2019-10-23 14:31:08 +01:00
return true ;
}
2019-10-21 18:43:39 +01:00
static void idle_pulse ( struct intel_engine_cs * engine , struct i915_request * rq )
{
engine - > wakeref_serial = READ_ONCE ( engine - > serial ) + 1 ;
i915_request_add_active_barriers ( rq ) ;
}
2019-10-23 14:31:08 +01:00
static void show_heartbeat ( const struct i915_request * rq ,
struct intel_engine_cs * engine )
{
struct drm_printer p = drm_debug_printer ( " heartbeat " ) ;
intel_engine_dump ( engine , & p ,
2020-05-28 08:41:00 +01:00
" %s heartbeat {seqno:%llx:%lld, prio:%d} not ticking \n " ,
2019-10-23 14:31:08 +01:00
engine - > name ,
2020-05-28 08:41:00 +01:00
rq - > fence . context ,
rq - > fence . seqno ,
2019-10-23 14:31:08 +01:00
rq - > sched . attr . priority ) ;
}
static void heartbeat ( struct work_struct * wrk )
{
struct i915_sched_attr attr = {
. priority = I915_USER_PRIORITY ( I915_PRIORITY_MIN ) ,
} ;
struct intel_engine_cs * engine =
container_of ( wrk , typeof ( * engine ) , heartbeat . work . work ) ;
struct intel_context * ce = engine - > kernel_context ;
struct i915_request * rq ;
2020-07-02 10:52:18 +01:00
unsigned long serial ;
2019-10-23 14:31:08 +01:00
2020-06-15 17:50:13 +01:00
/* Just in case everything has gone horribly wrong, give it a kick */
intel_engine_flush_submission ( engine ) ;
2019-10-23 14:31:08 +01:00
rq = engine - > heartbeat . systole ;
if ( rq & & i915_request_completed ( rq ) ) {
i915_request_put ( rq ) ;
engine - > heartbeat . systole = NULL ;
}
2019-11-06 22:34:10 +00:00
if ( ! intel_engine_pm_get_if_awake ( engine ) )
return ;
2019-10-23 14:31:08 +01:00
if ( intel_gt_is_wedged ( engine - > gt ) )
goto out ;
if ( engine - > heartbeat . systole ) {
2020-05-28 08:41:00 +01:00
if ( ! i915_sw_fence_signaled ( & rq - > submit ) ) {
/*
* Not yet submitted , system is stalled .
*
* This more often happens for ring submission ,
* where all contexts are funnelled into a common
* ringbuffer . If one context is blocked on an
* external fence , not only is it not submitted ,
* but all other contexts , including the kernel
* context are stuck waiting for the signal .
*/
} else if ( engine - > schedule & &
rq - > sched . attr . priority < I915_PRIORITY_BARRIER ) {
2019-10-23 14:31:08 +01:00
/*
* Gradually raise the priority of the heartbeat to
* give high priority work [ which presumably desires
* low latency and no jitter ] the chance to naturally
* complete before being preempted .
*/
attr . priority = I915_PRIORITY_MASK ;
if ( rq - > sched . attr . priority > = attr . priority )
attr . priority | = I915_USER_PRIORITY ( I915_PRIORITY_HEARTBEAT ) ;
if ( rq - > sched . attr . priority > = attr . priority )
attr . priority = I915_PRIORITY_BARRIER ;
local_bh_disable ( ) ;
engine - > schedule ( rq , & attr ) ;
local_bh_enable ( ) ;
} else {
if ( IS_ENABLED ( CONFIG_DRM_I915_DEBUG_GEM ) )
show_heartbeat ( rq , engine ) ;
intel_gt_handle_error ( engine - > gt , engine - > mask ,
I915_ERROR_CAPTURE ,
" stopped heartbeat on %s " ,
engine - > name ) ;
}
goto out ;
}
2020-07-02 10:52:18 +01:00
serial = READ_ONCE ( engine - > serial ) ;
if ( engine - > wakeref_serial = = serial )
2019-10-23 14:31:08 +01:00
goto out ;
2020-07-02 10:52:18 +01:00
if ( ! mutex_trylock ( & ce - > timeline - > mutex ) ) {
/* Unable to lock the kernel timeline, is the engine stuck? */
if ( xchg ( & engine - > heartbeat . blocked , serial ) = = serial )
intel_gt_handle_error ( engine - > gt , engine - > mask ,
I915_ERROR_CAPTURE ,
" no heartbeat on %s " ,
engine - > name ) ;
goto out ;
}
2019-10-23 14:31:08 +01:00
intel_context_enter ( ce ) ;
rq = __i915_request_create ( ce , GFP_NOWAIT | __GFP_NOWARN ) ;
intel_context_exit ( ce ) ;
if ( IS_ERR ( rq ) )
goto unlock ;
idle_pulse ( engine , rq ) ;
2020-06-18 18:04:02 +03:00
if ( engine - > i915 - > params . enable_hangcheck )
2019-10-23 14:31:08 +01:00
engine - > heartbeat . systole = i915_request_get ( rq ) ;
__i915_request_commit ( rq ) ;
__i915_request_queue ( rq , & attr ) ;
unlock :
mutex_unlock ( & ce - > timeline - > mutex ) ;
out :
if ( ! next_heartbeat ( engine ) )
i915_request_put ( fetch_and_zero ( & engine - > heartbeat . systole ) ) ;
intel_engine_pm_put ( engine ) ;
}
void intel_engine_unpark_heartbeat ( struct intel_engine_cs * engine )
{
2019-10-25 14:59:42 +01:00
if ( ! IS_ACTIVE ( CONFIG_DRM_I915_HEARTBEAT_INTERVAL ) )
2019-10-23 14:31:08 +01:00
return ;
next_heartbeat ( engine ) ;
}
void intel_engine_park_heartbeat ( struct intel_engine_cs * engine )
{
2019-11-06 13:31:29 +00:00
if ( cancel_delayed_work ( & engine - > heartbeat . work ) )
i915_request_put ( fetch_and_zero ( & engine - > heartbeat . systole ) ) ;
2019-10-23 14:31:08 +01:00
}
void intel_engine_init_heartbeat ( struct intel_engine_cs * engine )
{
INIT_DELAYED_WORK ( & engine - > heartbeat . work , heartbeat ) ;
}
2020-09-28 23:15:09 +01:00
static int __intel_engine_pulse ( struct intel_engine_cs * engine )
{
struct i915_sched_attr attr = { . priority = I915_PRIORITY_BARRIER } ;
struct intel_context * ce = engine - > kernel_context ;
struct i915_request * rq ;
lockdep_assert_held ( & ce - > timeline - > mutex ) ;
GEM_BUG_ON ( ! intel_engine_has_preemption ( engine ) ) ;
GEM_BUG_ON ( ! intel_engine_pm_is_awake ( engine ) ) ;
intel_context_enter ( ce ) ;
rq = __i915_request_create ( ce , GFP_NOWAIT | __GFP_NOWARN ) ;
intel_context_exit ( ce ) ;
if ( IS_ERR ( rq ) )
return PTR_ERR ( rq ) ;
__set_bit ( I915_FENCE_FLAG_SENTINEL , & rq - > fence . flags ) ;
idle_pulse ( engine , rq ) ;
__i915_request_commit ( rq ) ;
__i915_request_queue ( rq , & attr ) ;
GEM_BUG_ON ( rq - > sched . attr . priority < I915_PRIORITY_BARRIER ) ;
return 0 ;
}
static unsigned long set_heartbeat ( struct intel_engine_cs * engine ,
unsigned long delay )
{
unsigned long old ;
old = xchg ( & engine - > props . heartbeat_interval_ms , delay ) ;
if ( delay )
intel_engine_unpark_heartbeat ( engine ) ;
else
intel_engine_park_heartbeat ( engine ) ;
return old ;
}
2019-10-23 14:31:08 +01:00
int intel_engine_set_heartbeat ( struct intel_engine_cs * engine ,
unsigned long delay )
{
2020-09-28 23:15:09 +01:00
struct intel_context * ce = engine - > kernel_context ;
int err = 0 ;
2019-10-23 14:31:08 +01:00
2020-09-28 23:15:09 +01:00
if ( ! delay & & ! intel_engine_has_preempt_reset ( engine ) )
return - ENODEV ;
intel_engine_pm_get ( engine ) ;
err = mutex_lock_interruptible ( & ce - > timeline - > mutex ) ;
if ( err )
goto out_rpm ;
2019-10-23 14:31:08 +01:00
2020-09-28 23:15:09 +01:00
if ( delay ! = engine - > props . heartbeat_interval_ms ) {
unsigned long saved = set_heartbeat ( engine , delay ) ;
2019-10-23 14:31:08 +01:00
2020-09-28 23:15:09 +01:00
/* recheck current execution */
if ( intel_engine_has_preemption ( engine ) ) {
err = __intel_engine_pulse ( engine ) ;
if ( err )
set_heartbeat ( engine , saved ) ;
}
2019-10-23 14:31:08 +01:00
}
2020-09-28 23:15:09 +01:00
mutex_unlock ( & ce - > timeline - > mutex ) ;
out_rpm :
intel_engine_pm_put ( engine ) ;
return err ;
2019-10-23 14:31:08 +01:00
}
2019-10-21 18:43:39 +01:00
int intel_engine_pulse ( struct intel_engine_cs * engine )
{
struct intel_context * ce = engine - > kernel_context ;
2020-02-18 16:21:42 +00:00
int err ;
2019-10-21 18:43:39 +01:00
if ( ! intel_engine_has_preemption ( engine ) )
return - ENODEV ;
if ( ! intel_engine_pm_get_if_awake ( engine ) )
return 0 ;
2020-09-28 23:15:09 +01:00
err = - EINTR ;
if ( ! mutex_lock_interruptible ( & ce - > timeline - > mutex ) ) {
err = __intel_engine_pulse ( engine ) ;
mutex_unlock ( & ce - > timeline - > mutex ) ;
2019-10-21 18:43:39 +01:00
}
intel_engine_pm_put ( engine ) ;
return err ;
}
int intel_engine_flush_barriers ( struct intel_engine_cs * engine )
{
struct i915_request * rq ;
2019-11-25 10:58:56 +00:00
int err = 0 ;
2019-10-21 18:43:39 +01:00
if ( llist_empty ( & engine - > barrier_tasks ) )
return 0 ;
2019-11-25 10:58:56 +00:00
if ( ! intel_engine_pm_get_if_awake ( engine ) )
return 0 ;
2019-10-21 18:43:39 +01:00
rq = i915_request_create ( engine - > kernel_context ) ;
2019-11-25 10:58:56 +00:00
if ( IS_ERR ( rq ) ) {
err = PTR_ERR ( rq ) ;
goto out_rpm ;
}
2019-10-21 18:43:39 +01:00
idle_pulse ( engine , rq ) ;
i915_request_add ( rq ) ;
2019-11-25 10:58:56 +00:00
out_rpm :
intel_engine_pm_put ( engine ) ;
return err ;
2019-10-21 18:43:39 +01:00
}
# if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
# include "selftest_engine_heartbeat.c"
# endif