2020-06-01 08:24:12 +01:00
// SPDX-License-Identifier: MIT
/*
* Copyright © 2020 Intel Corporation
*/
# include "gen6_engine_cs.h"
# include "intel_engine.h"
2022-01-10 21:15:56 -08:00
# include "intel_engine_regs.h"
2020-06-01 08:24:12 +01:00
# include "intel_gpu_commands.h"
# include "intel_gt.h"
# include "intel_gt_irq.h"
# include "intel_gt_pm_irq.h"
# include "intel_ring.h"
# define HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32))
/*
* Emits a PIPE_CONTROL with a non - zero post - sync operation , for
* implementing two workarounds on gen6 . From section 1.4 .7 .1
* " PIPE_CONTROL " of the Sandy Bridge PRM volume 2 part 1 :
*
* [ DevSNB - C + { W / A } ] Before any depth stall flush ( including those
* produced by non - pipelined state commands ) , software needs to first
* send a PIPE_CONTROL with no bits set except Post - Sync Operation ! =
* 0.
*
* [ Dev - SNB { W / A } ] : Before a PIPE_CONTROL with Write Cache Flush Enable
* = 1 , a PIPE_CONTROL with any non - zero post - sync - op is required .
*
* And the workaround for these two requires this workaround first :
*
* [ Dev - SNB { W / A } ] : Pipe - control with CS - stall bit set must be sent
* BEFORE the pipe - control with a post - sync op and no write - cache
* flushes .
*
* And this last workaround is tricky because of the requirements on
* that bit . From section 1.4 .7 .2 .3 " Stall " of the Sandy Bridge PRM
* volume 2 part 1 :
*
* " 1 of the following must also be set:
* - Render Target Cache Flush Enable ( [ 12 ] of DW1 )
* - Depth Cache Flush Enable ( [ 0 ] of DW1 )
* - Stall at Pixel Scoreboard ( [ 1 ] of DW1 )
* - Depth Stall ( [ 13 ] of DW1 )
* - Post - Sync Operation ( [ 13 ] of DW1 )
* - Notify Enable ( [ 8 ] of DW1 ) "
*
* The cache flushes require the workaround flush that triggered this
* one , so we can ' t use it . Depth stall would trigger the same .
* Post - sync nonzero is what triggered this second workaround , so we
* can ' t use that one either . Notify enable is IRQs , which aren ' t
* really our business . That leaves only stall at scoreboard .
*/
static int
gen6_emit_post_sync_nonzero_flush ( struct i915_request * rq )
{
u32 scratch_addr =
intel_gt_scratch_offset ( rq - > engine - > gt ,
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH ) ;
u32 * cs ;
cs = intel_ring_begin ( rq , 6 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
* cs + + = GFX_OP_PIPE_CONTROL ( 5 ) ;
* cs + + = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD ;
* cs + + = scratch_addr | PIPE_CONTROL_GLOBAL_GTT ;
* cs + + = 0 ; /* low dword */
* cs + + = 0 ; /* high dword */
* cs + + = MI_NOOP ;
intel_ring_advance ( rq , cs ) ;
cs = intel_ring_begin ( rq , 6 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
* cs + + = GFX_OP_PIPE_CONTROL ( 5 ) ;
* cs + + = PIPE_CONTROL_QW_WRITE ;
* cs + + = scratch_addr | PIPE_CONTROL_GLOBAL_GTT ;
* cs + + = 0 ;
* cs + + = 0 ;
* cs + + = MI_NOOP ;
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
int gen6_emit_flush_rcs ( struct i915_request * rq , u32 mode )
{
u32 scratch_addr =
intel_gt_scratch_offset ( rq - > engine - > gt ,
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH ) ;
u32 * cs , flags = 0 ;
int ret ;
/* Force SNB workarounds for PIPE_CONTROL flushes */
ret = gen6_emit_post_sync_nonzero_flush ( rq ) ;
if ( ret )
return ret ;
/*
* Just flush everything . Experiments have shown that reducing the
* number of bits based on the write domains has little performance
* impact . And when rearranging requests , the order of flushes is
* unknown .
*/
if ( mode & EMIT_FLUSH ) {
flags | = PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH ;
flags | = PIPE_CONTROL_DEPTH_CACHE_FLUSH ;
/*
* Ensure that any following seqno writes only happen
* when the render cache is indeed flushed .
*/
flags | = PIPE_CONTROL_CS_STALL ;
}
if ( mode & EMIT_INVALIDATE ) {
flags | = PIPE_CONTROL_TLB_INVALIDATE ;
flags | = PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE ;
flags | = PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE ;
flags | = PIPE_CONTROL_VF_CACHE_INVALIDATE ;
flags | = PIPE_CONTROL_CONST_CACHE_INVALIDATE ;
flags | = PIPE_CONTROL_STATE_CACHE_INVALIDATE ;
/*
* TLB invalidate requires a post - sync write .
*/
flags | = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL ;
}
cs = intel_ring_begin ( rq , 4 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
* cs + + = GFX_OP_PIPE_CONTROL ( 4 ) ;
* cs + + = flags ;
* cs + + = scratch_addr | PIPE_CONTROL_GLOBAL_GTT ;
* cs + + = 0 ;
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
u32 * gen6_emit_breadcrumb_rcs ( struct i915_request * rq , u32 * cs )
{
/* First we do the gen6_emit_post_sync_nonzero_flush w/a */
* cs + + = GFX_OP_PIPE_CONTROL ( 4 ) ;
* cs + + = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD ;
* cs + + = 0 ;
* cs + + = 0 ;
* cs + + = GFX_OP_PIPE_CONTROL ( 4 ) ;
* cs + + = PIPE_CONTROL_QW_WRITE ;
* cs + + = intel_gt_scratch_offset ( rq - > engine - > gt ,
INTEL_GT_SCRATCH_FIELD_DEFAULT ) |
PIPE_CONTROL_GLOBAL_GTT ;
* cs + + = 0 ;
/* Finally we can flush and with it emit the breadcrumb */
* cs + + = GFX_OP_PIPE_CONTROL ( 4 ) ;
* cs + + = ( PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_CS_STALL ) ;
2021-03-23 16:49:50 +01:00
* cs + + = i915_request_active_seqno ( rq ) |
2020-06-01 08:24:12 +01:00
PIPE_CONTROL_GLOBAL_GTT ;
* cs + + = rq - > fence . seqno ;
* cs + + = MI_USER_INTERRUPT ;
* cs + + = MI_NOOP ;
rq - > tail = intel_ring_offset ( rq , cs ) ;
assert_ring_tail_valid ( rq - > ring , rq - > tail ) ;
return cs ;
}
static int mi_flush_dw ( struct i915_request * rq , u32 flags )
{
u32 cmd , * cs ;
cs = intel_ring_begin ( rq , 4 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
cmd = MI_FLUSH_DW ;
/*
* We always require a command barrier so that subsequent
* commands , such as breadcrumb interrupts , are strictly ordered
* wrt the contents of the write cache being flushed to memory
* ( and thus being coherent from the CPU ) .
*/
cmd | = MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW ;
/*
* Bspec vol 1 c .3 - blitter engine command streamer :
* " If ENABLED, all TLBs will be invalidated once the flush
* operation is complete . This bit is only valid when the
* Post - Sync Operation field is a value of 1 h or 3 h . "
*/
cmd | = flags ;
* cs + + = cmd ;
* cs + + = HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT ;
* cs + + = 0 ;
* cs + + = MI_NOOP ;
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
static int gen6_flush_dw ( struct i915_request * rq , u32 mode , u32 invflags )
{
return mi_flush_dw ( rq , mode & EMIT_INVALIDATE ? invflags : 0 ) ;
}
int gen6_emit_flush_xcs ( struct i915_request * rq , u32 mode )
{
return gen6_flush_dw ( rq , mode , MI_INVALIDATE_TLB ) ;
}
int gen6_emit_flush_vcs ( struct i915_request * rq , u32 mode )
{
return gen6_flush_dw ( rq , mode , MI_INVALIDATE_TLB | MI_INVALIDATE_BSD ) ;
}
int gen6_emit_bb_start ( struct i915_request * rq ,
u64 offset , u32 len ,
unsigned int dispatch_flags )
{
u32 security ;
u32 * cs ;
security = MI_BATCH_NON_SECURE_I965 ;
if ( dispatch_flags & I915_DISPATCH_SECURE )
security = 0 ;
cs = intel_ring_begin ( rq , 2 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
cs = __gen6_emit_bb_start ( cs , offset , security ) ;
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
int
hsw_emit_bb_start ( struct i915_request * rq ,
u64 offset , u32 len ,
unsigned int dispatch_flags )
{
u32 security ;
u32 * cs ;
security = MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW ;
if ( dispatch_flags & I915_DISPATCH_SECURE )
security = 0 ;
cs = intel_ring_begin ( rq , 2 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
cs = __gen6_emit_bb_start ( cs , offset , security ) ;
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
static int gen7_stall_cs ( struct i915_request * rq )
{
u32 * cs ;
cs = intel_ring_begin ( rq , 4 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
* cs + + = GFX_OP_PIPE_CONTROL ( 4 ) ;
* cs + + = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD ;
* cs + + = 0 ;
* cs + + = 0 ;
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
int gen7_emit_flush_rcs ( struct i915_request * rq , u32 mode )
{
u32 scratch_addr =
intel_gt_scratch_offset ( rq - > engine - > gt ,
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH ) ;
u32 * cs , flags = 0 ;
/*
* Ensure that any following seqno writes only happen when the render
* cache is indeed flushed .
*
* Workaround : 4 th PIPE_CONTROL command ( except the ones with only
* read - cache invalidate bits set ) must have the CS_STALL bit set . We
* don ' t try to be clever and just set it unconditionally .
*/
flags | = PIPE_CONTROL_CS_STALL ;
/*
* CS_STALL suggests at least a post - sync write .
*/
flags | = PIPE_CONTROL_QW_WRITE ;
flags | = PIPE_CONTROL_GLOBAL_GTT_IVB ;
/*
* Just flush everything . Experiments have shown that reducing the
* number of bits based on the write domains has little performance
* impact .
*/
if ( mode & EMIT_FLUSH ) {
flags | = PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH ;
flags | = PIPE_CONTROL_DEPTH_CACHE_FLUSH ;
flags | = PIPE_CONTROL_DC_FLUSH_ENABLE ;
flags | = PIPE_CONTROL_FLUSH_ENABLE ;
}
if ( mode & EMIT_INVALIDATE ) {
flags | = PIPE_CONTROL_TLB_INVALIDATE ;
flags | = PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE ;
flags | = PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE ;
flags | = PIPE_CONTROL_VF_CACHE_INVALIDATE ;
flags | = PIPE_CONTROL_CONST_CACHE_INVALIDATE ;
flags | = PIPE_CONTROL_STATE_CACHE_INVALIDATE ;
flags | = PIPE_CONTROL_MEDIA_STATE_CLEAR ;
/*
* Workaround : we must issue a pipe_control with CS - stall bit
* set before a pipe_control command that has the state cache
* invalidate bit set .
*/
gen7_stall_cs ( rq ) ;
}
cs = intel_ring_begin ( rq , 4 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
* cs + + = GFX_OP_PIPE_CONTROL ( 4 ) ;
* cs + + = flags ;
* cs + + = scratch_addr ;
* cs + + = 0 ;
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
u32 * gen7_emit_breadcrumb_rcs ( struct i915_request * rq , u32 * cs )
{
* cs + + = GFX_OP_PIPE_CONTROL ( 4 ) ;
* cs + + = ( PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL ) ;
2021-03-23 16:49:50 +01:00
* cs + + = i915_request_active_seqno ( rq ) ;
2020-06-01 08:24:12 +01:00
* cs + + = rq - > fence . seqno ;
* cs + + = MI_USER_INTERRUPT ;
* cs + + = MI_NOOP ;
rq - > tail = intel_ring_offset ( rq , cs ) ;
assert_ring_tail_valid ( rq - > ring , rq - > tail ) ;
return cs ;
}
u32 * gen6_emit_breadcrumb_xcs ( struct i915_request * rq , u32 * cs )
{
GEM_BUG_ON ( i915_request_active_timeline ( rq ) - > hwsp_ggtt ! = rq - > engine - > status_page . vma ) ;
2021-03-23 16:49:50 +01:00
GEM_BUG_ON ( offset_in_page ( rq - > hwsp_seqno ) ! = I915_GEM_HWS_SEQNO_ADDR ) ;
2020-06-01 08:24:12 +01:00
* cs + + = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX ;
* cs + + = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT ;
* cs + + = rq - > fence . seqno ;
* cs + + = MI_USER_INTERRUPT ;
rq - > tail = intel_ring_offset ( rq , cs ) ;
assert_ring_tail_valid ( rq - > ring , rq - > tail ) ;
return cs ;
}
# define GEN7_XCS_WA 32
u32 * gen7_emit_breadcrumb_xcs ( struct i915_request * rq , u32 * cs )
{
int i ;
GEM_BUG_ON ( i915_request_active_timeline ( rq ) - > hwsp_ggtt ! = rq - > engine - > status_page . vma ) ;
2021-03-23 16:49:50 +01:00
GEM_BUG_ON ( offset_in_page ( rq - > hwsp_seqno ) ! = I915_GEM_HWS_SEQNO_ADDR ) ;
2020-06-01 08:24:12 +01:00
* cs + + = MI_FLUSH_DW | MI_INVALIDATE_TLB |
MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX ;
* cs + + = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT ;
* cs + + = rq - > fence . seqno ;
for ( i = 0 ; i < GEN7_XCS_WA ; i + + ) {
* cs + + = MI_STORE_DWORD_INDEX ;
* cs + + = I915_GEM_HWS_SEQNO_ADDR ;
* cs + + = rq - > fence . seqno ;
}
* cs + + = MI_FLUSH_DW ;
* cs + + = 0 ;
* cs + + = 0 ;
* cs + + = MI_USER_INTERRUPT ;
* cs + + = MI_NOOP ;
rq - > tail = intel_ring_offset ( rq , cs ) ;
assert_ring_tail_valid ( rq - > ring , rq - > tail ) ;
return cs ;
}
# undef GEN7_XCS_WA
void gen6_irq_enable ( struct intel_engine_cs * engine )
{
ENGINE_WRITE ( engine , RING_IMR ,
~ ( engine - > irq_enable_mask | engine - > irq_keep_mask ) ) ;
/* Flush/delay to ensure the RING_IMR is active before the GT IMR */
ENGINE_POSTING_READ ( engine , RING_IMR ) ;
gen5_gt_enable_irq ( engine - > gt , engine - > irq_enable_mask ) ;
}
void gen6_irq_disable ( struct intel_engine_cs * engine )
{
ENGINE_WRITE ( engine , RING_IMR , ~ engine - > irq_keep_mask ) ;
gen5_gt_disable_irq ( engine - > gt , engine - > irq_enable_mask ) ;
}
void hsw_irq_enable_vecs ( struct intel_engine_cs * engine )
{
ENGINE_WRITE ( engine , RING_IMR , ~ engine - > irq_enable_mask ) ;
/* Flush/delay to ensure the RING_IMR is active before the GT IMR */
ENGINE_POSTING_READ ( engine , RING_IMR ) ;
gen6_gt_pm_unmask_irq ( engine - > gt , engine - > irq_enable_mask ) ;
}
void hsw_irq_disable_vecs ( struct intel_engine_cs * engine )
{
ENGINE_WRITE ( engine , RING_IMR , ~ 0 ) ;
gen6_gt_pm_mask_irq ( engine - > gt , engine - > irq_enable_mask ) ;
}