2020-06-01 08:24:12 +01:00
// SPDX-License-Identifier: MIT
/*
* Copyright © 2020 Intel Corporation
*/
# include "gen2_engine_cs.h"
# include "i915_drv.h"
# include "intel_engine.h"
# include "intel_gpu_commands.h"
# include "intel_gt.h"
# include "intel_gt_irq.h"
# include "intel_ring.h"
int gen2_emit_flush ( struct i915_request * rq , u32 mode )
{
2020-06-12 13:39:49 +01:00
unsigned int num_store_dw = 12 ;
2020-06-01 08:24:12 +01:00
u32 cmd , * cs ;
cmd = MI_FLUSH ;
if ( mode & EMIT_INVALIDATE )
cmd | = MI_READ_FLUSH ;
2020-06-12 13:39:49 +01:00
cs = intel_ring_begin ( rq , 2 + 4 * num_store_dw ) ;
2020-06-01 08:24:12 +01:00
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
* cs + + = cmd ;
while ( num_store_dw - - ) {
2020-06-12 13:39:49 +01:00
* cs + + = MI_STORE_DWORD_INDEX ;
* cs + + = I915_GEM_HWS_SCRATCH * sizeof ( u32 ) ;
2020-06-01 08:24:12 +01:00
* cs + + = 0 ;
2020-06-12 13:39:49 +01:00
* cs + + = MI_FLUSH | MI_NO_WRITE_FLUSH ;
2020-06-01 08:24:12 +01:00
}
2020-06-12 13:39:49 +01:00
* cs + + = cmd ;
2020-06-01 08:24:12 +01:00
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
int gen4_emit_flush_rcs ( struct i915_request * rq , u32 mode )
{
u32 cmd , * cs ;
int i ;
/*
* read / write caches :
*
* I915_GEM_DOMAIN_RENDER is always invalidated , but is
* only flushed if MI_NO_WRITE_FLUSH is unset . On 965 , it is
* also flushed at 2 d versus 3 d pipeline switches .
*
* read - only caches :
*
* I915_GEM_DOMAIN_SAMPLER is flushed on pre - 965 if
* MI_READ_FLUSH is set , and is always flushed on 965.
*
* I915_GEM_DOMAIN_COMMAND may not exist ?
*
* I915_GEM_DOMAIN_INSTRUCTION , which exists on 965 , is
* invalidated when MI_EXE_FLUSH is set .
*
* I915_GEM_DOMAIN_VERTEX , which exists on 965 , is
* invalidated with every MI_FLUSH .
*
* TLBs :
*
* On 965 , TLBs associated with I915_GEM_DOMAIN_COMMAND
* and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
* I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
* are flushed at any MI_FLUSH .
*/
cmd = MI_FLUSH ;
if ( mode & EMIT_INVALIDATE ) {
cmd | = MI_EXE_FLUSH ;
2021-06-05 08:53:52 -07:00
if ( IS_G4X ( rq - > engine - > i915 ) | | GRAPHICS_VER ( rq - > engine - > i915 ) = = 5 )
2020-06-01 08:24:12 +01:00
cmd | = MI_INVALIDATE_ISP ;
}
i = 2 ;
if ( mode & EMIT_INVALIDATE )
i + = 20 ;
cs = intel_ring_begin ( rq , i ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
* cs + + = cmd ;
/*
* A random delay to let the CS invalidate take effect ? Without this
* delay , the GPU relocation path fails as the CS does not see
* the updated contents . Just as important , if we apply the flushes
* to the EMIT_FLUSH branch ( i . e . immediately after the relocation
* write and before the invalidate on the next batch ) , the relocations
* still fail . This implies that is a delay following invalidation
* that is required to reset the caches as opposed to a delay to
* ensure the memory is written .
*/
if ( mode & EMIT_INVALIDATE ) {
* cs + + = GFX_OP_PIPE_CONTROL ( 4 ) | PIPE_CONTROL_QW_WRITE ;
* cs + + = intel_gt_scratch_offset ( rq - > engine - > gt ,
INTEL_GT_SCRATCH_FIELD_DEFAULT ) |
PIPE_CONTROL_GLOBAL_GTT ;
* cs + + = 0 ;
* cs + + = 0 ;
for ( i = 0 ; i < 12 ; i + + )
* cs + + = MI_FLUSH ;
* cs + + = GFX_OP_PIPE_CONTROL ( 4 ) | PIPE_CONTROL_QW_WRITE ;
* cs + + = intel_gt_scratch_offset ( rq - > engine - > gt ,
INTEL_GT_SCRATCH_FIELD_DEFAULT ) |
PIPE_CONTROL_GLOBAL_GTT ;
* cs + + = 0 ;
* cs + + = 0 ;
}
* cs + + = cmd ;
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
int gen4_emit_flush_vcs ( struct i915_request * rq , u32 mode )
{
u32 * cs ;
cs = intel_ring_begin ( rq , 2 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
* cs + + = MI_FLUSH ;
* cs + + = MI_NOOP ;
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
2020-06-12 13:39:49 +01:00
static u32 * __gen2_emit_breadcrumb ( struct i915_request * rq , u32 * cs ,
int flush , int post )
2020-06-01 08:24:12 +01:00
{
GEM_BUG_ON ( i915_request_active_timeline ( rq ) - > hwsp_ggtt ! = rq - > engine - > status_page . vma ) ;
2021-03-23 16:49:50 +01:00
GEM_BUG_ON ( offset_in_page ( rq - > hwsp_seqno ) ! = I915_GEM_HWS_SEQNO_ADDR ) ;
2020-06-01 08:24:12 +01:00
* cs + + = MI_FLUSH ;
2020-06-12 13:39:49 +01:00
while ( flush - - ) {
* cs + + = MI_STORE_DWORD_INDEX ;
* cs + + = I915_GEM_HWS_SCRATCH * sizeof ( u32 ) ;
* cs + + = rq - > fence . seqno ;
}
2020-06-01 08:24:12 +01:00
2020-06-12 13:39:49 +01:00
while ( post - - ) {
2020-06-01 08:24:12 +01:00
* cs + + = MI_STORE_DWORD_INDEX ;
* cs + + = I915_GEM_HWS_SEQNO_ADDR ;
* cs + + = rq - > fence . seqno ;
}
* cs + + = MI_USER_INTERRUPT ;
rq - > tail = intel_ring_offset ( rq , cs ) ;
assert_ring_tail_valid ( rq - > ring , rq - > tail ) ;
return cs ;
}
2020-06-12 13:39:49 +01:00
u32 * gen3_emit_breadcrumb ( struct i915_request * rq , u32 * cs )
{
return __gen2_emit_breadcrumb ( rq , cs , 16 , 8 ) ;
}
u32 * gen5_emit_breadcrumb ( struct i915_request * rq , u32 * cs )
{
return __gen2_emit_breadcrumb ( rq , cs , 8 , 8 ) ;
}
2020-06-01 08:24:12 +01:00
/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
# define I830_BATCH_LIMIT SZ_256K
# define I830_TLB_ENTRIES (2)
# define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
int i830_emit_bb_start ( struct i915_request * rq ,
u64 offset , u32 len ,
unsigned int dispatch_flags )
{
u32 * cs , cs_offset =
intel_gt_scratch_offset ( rq - > engine - > gt ,
INTEL_GT_SCRATCH_FIELD_DEFAULT ) ;
GEM_BUG_ON ( rq - > engine - > gt - > scratch - > size < I830_WA_SIZE ) ;
cs = intel_ring_begin ( rq , 6 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
/* Evict the invalid PTE TLBs */
* cs + + = COLOR_BLT_CMD | BLT_WRITE_RGBA ;
* cs + + = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096 ;
* cs + + = I830_TLB_ENTRIES < < 16 | 4 ; /* load each page */
* cs + + = cs_offset ;
* cs + + = 0xdeadbeef ;
* cs + + = MI_NOOP ;
intel_ring_advance ( rq , cs ) ;
if ( ( dispatch_flags & I915_DISPATCH_PINNED ) = = 0 ) {
if ( len > I830_BATCH_LIMIT )
return - ENOSPC ;
cs = intel_ring_begin ( rq , 6 + 2 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
/*
* Blit the batch ( which has now all relocs applied ) to the
* stable batch scratch bo area ( so that the CS never
* stumbles over its tlb invalidation bug ) . . .
*/
* cs + + = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | ( 6 - 2 ) ;
* cs + + = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096 ;
* cs + + = DIV_ROUND_UP ( len , 4096 ) < < 16 | 4096 ;
* cs + + = cs_offset ;
* cs + + = 4096 ;
* cs + + = offset ;
* cs + + = MI_FLUSH ;
* cs + + = MI_NOOP ;
intel_ring_advance ( rq , cs ) ;
/* ... and execute it. */
offset = cs_offset ;
}
if ( ! ( dispatch_flags & I915_DISPATCH_SECURE ) )
offset | = MI_BATCH_NON_SECURE ;
cs = intel_ring_begin ( rq , 2 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
* cs + + = MI_BATCH_BUFFER_START | MI_BATCH_GTT ;
* cs + + = offset ;
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
int gen3_emit_bb_start ( struct i915_request * rq ,
u64 offset , u32 len ,
unsigned int dispatch_flags )
{
u32 * cs ;
if ( ! ( dispatch_flags & I915_DISPATCH_SECURE ) )
offset | = MI_BATCH_NON_SECURE ;
cs = intel_ring_begin ( rq , 2 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
* cs + + = MI_BATCH_BUFFER_START | MI_BATCH_GTT ;
* cs + + = offset ;
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
int gen4_emit_bb_start ( struct i915_request * rq ,
u64 offset , u32 length ,
unsigned int dispatch_flags )
{
u32 security ;
u32 * cs ;
security = MI_BATCH_NON_SECURE_I965 ;
if ( dispatch_flags & I915_DISPATCH_SECURE )
security = 0 ;
cs = intel_ring_begin ( rq , 2 ) ;
if ( IS_ERR ( cs ) )
return PTR_ERR ( cs ) ;
* cs + + = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security ;
* cs + + = offset ;
intel_ring_advance ( rq , cs ) ;
return 0 ;
}
void gen2_irq_enable ( struct intel_engine_cs * engine )
{
struct drm_i915_private * i915 = engine - > i915 ;
i915 - > irq_mask & = ~ engine - > irq_enable_mask ;
intel_uncore_write16 ( & i915 - > uncore , GEN2_IMR , i915 - > irq_mask ) ;
ENGINE_POSTING_READ16 ( engine , RING_IMR ) ;
}
void gen2_irq_disable ( struct intel_engine_cs * engine )
{
struct drm_i915_private * i915 = engine - > i915 ;
i915 - > irq_mask | = engine - > irq_enable_mask ;
intel_uncore_write16 ( & i915 - > uncore , GEN2_IMR , i915 - > irq_mask ) ;
}
void gen3_irq_enable ( struct intel_engine_cs * engine )
{
engine - > i915 - > irq_mask & = ~ engine - > irq_enable_mask ;
intel_uncore_write ( engine - > uncore , GEN2_IMR , engine - > i915 - > irq_mask ) ;
intel_uncore_posting_read_fw ( engine - > uncore , GEN2_IMR ) ;
}
void gen3_irq_disable ( struct intel_engine_cs * engine )
{
engine - > i915 - > irq_mask | = engine - > irq_enable_mask ;
intel_uncore_write ( engine - > uncore , GEN2_IMR , engine - > i915 - > irq_mask ) ;
}
void gen5_irq_enable ( struct intel_engine_cs * engine )
{
gen5_gt_enable_irq ( engine - > gt , engine - > irq_enable_mask ) ;
}
void gen5_irq_disable ( struct intel_engine_cs * engine )
{
gen5_gt_disable_irq ( engine - > gt , engine - > irq_enable_mask ) ;
}