2019-05-22 19:31:55 +00:00
// SPDX-License-Identifier: MIT
/*
* Copyright © 2018 Intel Corporation
*/
2020-10-19 17:50:05 +01:00
# include <linux/crc32.h>
# include "gem/i915_gem_stolen.h"
# include "i915_memcpy.h"
2019-05-22 19:31:55 +00:00
# include "i915_selftest.h"
2020-12-16 13:54:52 +00:00
# include "intel_gpu_commands.h"
2019-05-22 19:31:55 +00:00
# include "selftests/igt_reset.h"
2019-05-22 19:31:56 +00:00
# include "selftests/igt_atomic.h"
2020-10-19 17:50:05 +01:00
# include "selftests/igt_spinner.h"
static int
__igt_reset_stolen ( struct intel_gt * gt ,
intel_engine_mask_t mask ,
const char * msg )
{
2021-12-21 21:59:46 +02:00
struct i915_ggtt * ggtt = gt - > ggtt ;
2023-01-16 19:34:21 +02:00
const struct resource * dsm = & gt - > i915 - > dsm . stolen ;
2020-10-19 17:50:05 +01:00
resource_size_t num_pages , page ;
struct intel_engine_cs * engine ;
intel_wakeref_t wakeref ;
enum intel_engine_id id ;
struct igt_spinner spin ;
long max , count ;
void * tmp ;
u32 * crc ;
int err ;
if ( ! drm_mm_node_allocated ( & ggtt - > error_capture ) )
return 0 ;
num_pages = resource_size ( dsm ) > > PAGE_SHIFT ;
if ( ! num_pages )
return 0 ;
crc = kmalloc_array ( num_pages , sizeof ( u32 ) , GFP_KERNEL ) ;
if ( ! crc )
return - ENOMEM ;
tmp = kmalloc ( PAGE_SIZE , GFP_KERNEL ) ;
if ( ! tmp ) {
err = - ENOMEM ;
goto err_crc ;
}
igt_global_reset_lock ( gt ) ;
wakeref = intel_runtime_pm_get ( gt - > uncore - > rpm ) ;
err = igt_spinner_init ( & spin , gt ) ;
if ( err )
goto err_lock ;
for_each_engine ( engine , gt , id ) {
struct intel_context * ce ;
struct i915_request * rq ;
if ( ! ( mask & engine - > mask ) )
continue ;
if ( ! intel_engine_can_store_dword ( engine ) )
continue ;
ce = intel_context_create ( engine ) ;
if ( IS_ERR ( ce ) ) {
err = PTR_ERR ( ce ) ;
goto err_spin ;
}
rq = igt_spinner_create_request ( & spin , ce , MI_ARB_CHECK ) ;
intel_context_put ( ce ) ;
if ( IS_ERR ( rq ) ) {
err = PTR_ERR ( rq ) ;
goto err_spin ;
}
i915_request_add ( rq ) ;
}
for ( page = 0 ; page < num_pages ; page + + ) {
dma_addr_t dma = ( dma_addr_t ) dsm - > start + ( page < < PAGE_SHIFT ) ;
void __iomem * s ;
void * in ;
ggtt - > vm . insert_page ( & ggtt - > vm , dma ,
ggtt - > error_capture . start ,
I915_CACHE_NONE , 0 ) ;
mb ( ) ;
s = io_mapping_map_wc ( & ggtt - > iomap ,
ggtt - > error_capture . start ,
PAGE_SIZE ) ;
if ( ! __drm_mm_interval_first ( & gt - > i915 - > mm . stolen ,
page < < PAGE_SHIFT ,
( ( page + 1 ) < < PAGE_SHIFT ) - 1 ) )
2021-01-06 12:39:37 +00:00
memset_io ( s , STACK_MAGIC , PAGE_SIZE ) ;
2020-10-19 17:50:05 +01:00
2021-01-06 12:39:37 +00:00
in = ( void __force * ) s ;
if ( i915_memcpy_from_wc ( tmp , in , PAGE_SIZE ) )
2020-10-19 17:50:05 +01:00
in = tmp ;
crc [ page ] = crc32_le ( 0 , in , PAGE_SIZE ) ;
io_mapping_unmap ( s ) ;
}
mb ( ) ;
ggtt - > vm . clear_range ( & ggtt - > vm , ggtt - > error_capture . start , PAGE_SIZE ) ;
if ( mask = = ALL_ENGINES ) {
intel_gt_reset ( gt , mask , NULL ) ;
} else {
for_each_engine ( engine , gt , id ) {
if ( mask & engine - > mask )
intel_engine_reset ( engine , NULL ) ;
}
}
max = - 1 ;
count = 0 ;
for ( page = 0 ; page < num_pages ; page + + ) {
dma_addr_t dma = ( dma_addr_t ) dsm - > start + ( page < < PAGE_SHIFT ) ;
void __iomem * s ;
void * in ;
u32 x ;
ggtt - > vm . insert_page ( & ggtt - > vm , dma ,
ggtt - > error_capture . start ,
I915_CACHE_NONE , 0 ) ;
mb ( ) ;
s = io_mapping_map_wc ( & ggtt - > iomap ,
ggtt - > error_capture . start ,
PAGE_SIZE ) ;
2021-01-06 12:39:37 +00:00
in = ( void __force * ) s ;
if ( i915_memcpy_from_wc ( tmp , in , PAGE_SIZE ) )
2020-10-19 17:50:05 +01:00
in = tmp ;
x = crc32_le ( 0 , in , PAGE_SIZE ) ;
if ( x ! = crc [ page ] & &
! __drm_mm_interval_first ( & gt - > i915 - > mm . stolen ,
page < < PAGE_SHIFT ,
( ( page + 1 ) < < PAGE_SHIFT ) - 1 ) ) {
pr_debug ( " unused stolen page %pa modified by GPU reset \n " ,
& page ) ;
if ( count + + = = 0 )
igt_hexdump ( in , PAGE_SIZE ) ;
max = page ;
}
io_mapping_unmap ( s ) ;
}
mb ( ) ;
ggtt - > vm . clear_range ( & ggtt - > vm , ggtt - > error_capture . start , PAGE_SIZE ) ;
if ( count > 0 ) {
pr_info ( " %s reset clobbered %ld pages of stolen, last clobber at page %ld \n " ,
msg , count , max ) ;
}
if ( max > = I915_GEM_STOLEN_BIAS > > PAGE_SHIFT ) {
pr_err ( " %s reset clobbered unreserved area [above %x] of stolen; may cause severe faults \n " ,
msg , I915_GEM_STOLEN_BIAS ) ;
err = - EINVAL ;
}
err_spin :
igt_spinner_fini ( & spin ) ;
err_lock :
intel_runtime_pm_put ( gt - > uncore - > rpm , wakeref ) ;
igt_global_reset_unlock ( gt ) ;
kfree ( tmp ) ;
err_crc :
kfree ( crc ) ;
return err ;
}
static int igt_reset_device_stolen ( void * arg )
{
return __igt_reset_stolen ( arg , ALL_ENGINES , " device " ) ;
}
static int igt_reset_engines_stolen ( void * arg )
{
struct intel_gt * gt = arg ;
struct intel_engine_cs * engine ;
enum intel_engine_id id ;
int err ;
if ( ! intel_has_reset_engine ( gt ) )
return 0 ;
for_each_engine ( engine , gt , id ) {
err = __igt_reset_stolen ( gt , engine - > mask , engine - > name ) ;
if ( err )
return err ;
}
return 0 ;
}
2019-05-22 19:31:55 +00:00
static int igt_global_reset ( void * arg )
{
2019-07-12 20:29:53 +01:00
struct intel_gt * gt = arg ;
2019-05-22 19:31:55 +00:00
unsigned int reset_count ;
2019-07-12 20:29:53 +01:00
intel_wakeref_t wakeref ;
2019-05-22 19:31:55 +00:00
int err = 0 ;
/* Check that we can issue a global GPU reset */
2019-07-12 20:29:53 +01:00
igt_global_reset_lock ( gt ) ;
2019-10-07 16:45:31 +01:00
wakeref = intel_runtime_pm_get ( gt - > uncore - > rpm ) ;
2019-05-22 19:31:55 +00:00
2019-07-12 20:29:53 +01:00
reset_count = i915_reset_count ( & gt - > i915 - > gpu_error ) ;
2019-05-22 19:31:55 +00:00
2019-07-12 20:29:53 +01:00
intel_gt_reset ( gt , ALL_ENGINES , NULL ) ;
2019-05-22 19:31:55 +00:00
2019-07-12 20:29:53 +01:00
if ( i915_reset_count ( & gt - > i915 - > gpu_error ) = = reset_count ) {
2019-05-22 19:31:55 +00:00
pr_err ( " No GPU reset recorded! \n " ) ;
err = - EINVAL ;
}
2019-10-07 16:45:31 +01:00
intel_runtime_pm_put ( gt - > uncore - > rpm , wakeref ) ;
2019-07-12 20:29:53 +01:00
igt_global_reset_unlock ( gt ) ;
2019-05-22 19:31:55 +00:00
2019-07-12 20:29:53 +01:00
if ( intel_gt_is_wedged ( gt ) )
2019-05-22 19:31:55 +00:00
err = - EIO ;
return err ;
}
static int igt_wedged_reset ( void * arg )
{
2019-07-12 20:29:53 +01:00
struct intel_gt * gt = arg ;
2019-05-22 19:31:55 +00:00
intel_wakeref_t wakeref ;
/* Check that we can recover a wedged device with a GPU reset */
2019-07-12 20:29:53 +01:00
igt_global_reset_lock ( gt ) ;
2019-10-07 16:45:31 +01:00
wakeref = intel_runtime_pm_get ( gt - > uncore - > rpm ) ;
2019-05-22 19:31:55 +00:00
2019-07-12 20:29:53 +01:00
intel_gt_set_wedged ( gt ) ;
2019-05-22 19:31:55 +00:00
2019-07-12 20:29:53 +01:00
GEM_BUG_ON ( ! intel_gt_is_wedged ( gt ) ) ;
intel_gt_reset ( gt , ALL_ENGINES , NULL ) ;
2019-05-22 19:31:55 +00:00
2019-10-07 16:45:31 +01:00
intel_runtime_pm_put ( gt - > uncore - > rpm , wakeref ) ;
2019-07-12 20:29:53 +01:00
igt_global_reset_unlock ( gt ) ;
2019-05-22 19:31:55 +00:00
2019-07-12 20:29:53 +01:00
return intel_gt_is_wedged ( gt ) ? - EIO : 0 ;
2019-05-22 19:31:55 +00:00
}
2019-05-22 19:31:56 +00:00
static int igt_atomic_reset ( void * arg )
{
2019-07-12 20:29:53 +01:00
struct intel_gt * gt = arg ;
2019-05-22 19:31:56 +00:00
const typeof ( * igt_atomic_phases ) * p ;
int err = 0 ;
/* Check that the resets are usable from atomic context */
2019-07-12 20:29:53 +01:00
intel_gt_pm_get ( gt ) ;
igt_global_reset_lock ( gt ) ;
2019-05-22 19:31:56 +00:00
/* Flush any requests before we get started and check basics */
2019-07-12 20:29:53 +01:00
if ( ! igt_force_reset ( gt ) )
2019-05-22 19:31:56 +00:00
goto unlock ;
for ( p = igt_atomic_phases ; p - > name ; p + + ) {
2019-06-26 16:45:48 +01:00
intel_engine_mask_t awake ;
2019-07-12 20:29:53 +01:00
GEM_TRACE ( " __intel_gt_reset under %s \n " , p - > name ) ;
2019-05-22 19:31:56 +00:00
2019-07-12 20:29:53 +01:00
awake = reset_prepare ( gt ) ;
2019-06-26 14:44:33 +01:00
p - > critical_section_begin ( ) ;
2019-07-12 20:29:53 +01:00
err = __intel_gt_reset ( gt , ALL_ENGINES ) ;
2019-06-26 14:44:33 +01:00
2019-05-22 19:31:56 +00:00
p - > critical_section_end ( ) ;
2019-07-12 20:29:53 +01:00
reset_finish ( gt , awake ) ;
2019-05-22 19:31:56 +00:00
if ( err ) {
2019-07-12 20:29:53 +01:00
pr_err ( " __intel_gt_reset failed under %s \n " , p - > name ) ;
2019-05-22 19:31:56 +00:00
break ;
}
}
/* As we poke around the guts, do a full reset before continuing. */
2019-07-12 20:29:53 +01:00
igt_force_reset ( gt ) ;
2019-05-22 19:31:56 +00:00
unlock :
2019-07-12 20:29:53 +01:00
igt_global_reset_unlock ( gt ) ;
intel_gt_pm_put ( gt ) ;
2019-05-22 19:31:56 +00:00
return err ;
}
2019-06-26 14:44:33 +01:00
static int igt_atomic_engine_reset ( void * arg )
{
2019-07-12 20:29:53 +01:00
struct intel_gt * gt = arg ;
2019-06-26 14:44:33 +01:00
const typeof ( * igt_atomic_phases ) * p ;
struct intel_engine_cs * engine ;
enum intel_engine_id id ;
int err = 0 ;
/* Check that the resets are usable from atomic context */
2019-09-27 22:17:47 +01:00
if ( ! intel_has_reset_engine ( gt ) )
2019-06-26 14:44:33 +01:00
return 0 ;
2020-02-18 14:33:20 -08:00
if ( intel_uc_uses_guc_submission ( & gt - > uc ) )
2019-06-26 14:44:33 +01:00
return 0 ;
2019-07-12 20:29:53 +01:00
intel_gt_pm_get ( gt ) ;
igt_global_reset_lock ( gt ) ;
2019-06-26 14:44:33 +01:00
/* Flush any requests before we get started and check basics */
2019-07-12 20:29:53 +01:00
if ( ! igt_force_reset ( gt ) )
2019-06-26 14:44:33 +01:00
goto out_unlock ;
2019-10-17 10:45:00 +01:00
for_each_engine ( engine , gt , id ) {
2021-06-17 18:06:38 -07:00
struct tasklet_struct * t = & engine - > sched_engine - > tasklet ;
2021-01-19 11:07:58 +00:00
if ( t - > func )
tasklet_disable ( t ) ;
2019-06-26 14:44:33 +01:00
intel_engine_pm_get ( engine ) ;
for ( p = igt_atomic_phases ; p - > name ; p + + ) {
2019-07-12 20:29:53 +01:00
GEM_TRACE ( " intel_engine_reset(%s) under %s \n " ,
2019-06-26 14:44:33 +01:00
engine - > name , p - > name ) ;
2020-12-24 13:55:36 +00:00
if ( strcmp ( p - > name , " softirq " ) )
local_bh_disable ( ) ;
2019-06-26 14:44:33 +01:00
p - > critical_section_begin ( ) ;
2020-12-24 13:55:36 +00:00
err = __intel_engine_reset_bh ( engine , NULL ) ;
2019-06-26 14:44:33 +01:00
p - > critical_section_end ( ) ;
2020-12-24 13:55:36 +00:00
if ( strcmp ( p - > name , " softirq " ) )
local_bh_enable ( ) ;
2019-06-26 14:44:33 +01:00
if ( err ) {
2019-07-12 20:29:53 +01:00
pr_err ( " intel_engine_reset(%s) failed under %s \n " ,
2019-06-26 14:44:33 +01:00
engine - > name , p - > name ) ;
break ;
}
}
intel_engine_pm_put ( engine ) ;
2021-01-19 11:07:58 +00:00
if ( t - > func ) {
tasklet_enable ( t ) ;
tasklet_hi_schedule ( t ) ;
}
2019-06-26 14:44:33 +01:00
if ( err )
break ;
}
/* As we poke around the guts, do a full reset before continuing. */
2019-07-12 20:29:53 +01:00
igt_force_reset ( gt ) ;
2019-06-26 14:44:33 +01:00
out_unlock :
2019-07-12 20:29:53 +01:00
igt_global_reset_unlock ( gt ) ;
intel_gt_pm_put ( gt ) ;
2019-06-26 14:44:33 +01:00
return err ;
}
2019-05-22 19:31:55 +00:00
int intel_reset_live_selftests ( struct drm_i915_private * i915 )
{
static const struct i915_subtest tests [ ] = {
SUBTEST ( igt_global_reset ) , /* attempt to recover GPU first */
2020-10-19 17:50:05 +01:00
SUBTEST ( igt_reset_device_stolen ) ,
SUBTEST ( igt_reset_engines_stolen ) ,
2019-05-22 19:31:55 +00:00
SUBTEST ( igt_wedged_reset ) ,
2019-05-22 19:31:56 +00:00
SUBTEST ( igt_atomic_reset ) ,
2019-06-26 14:44:33 +01:00
SUBTEST ( igt_atomic_engine_reset ) ,
2019-05-22 19:31:55 +00:00
} ;
2021-12-14 21:33:34 +02:00
struct intel_gt * gt = to_gt ( i915 ) ;
2019-05-22 19:31:55 +00:00
2019-09-27 22:17:47 +01:00
if ( ! intel_has_gpu_reset ( gt ) )
2019-05-22 19:31:55 +00:00
return 0 ;
2019-07-12 20:29:53 +01:00
if ( intel_gt_is_wedged ( gt ) )
2019-05-22 19:31:55 +00:00
return - EIO ; /* we're long past hope of a successful reset */
2019-07-12 20:29:53 +01:00
return intel_gt_live_subtests ( tests , gt ) ;
2019-05-22 19:31:55 +00:00
}