2014-05-14 17:02:16 +03:00
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted , free of charge , to any person obtaining a
* copy of this software and associated documentation files ( the " Software " ) ,
* to deal in the Software without restriction , including without limitation
* the rights to use , copy , modify , merge , publish , distribute , sublicense ,
* and / or sell copies of the Software , and to permit persons to whom the
* Software is furnished to do so , subject to the following conditions :
*
* The above copyright notice and this permission notice ( including the next
* paragraph ) shall be included in all copies or substantial portions of the
* Software .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR
* IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER
* LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING
* FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE .
*
* Authors :
* Mika Kuoppala < mika . kuoppala @ intel . com >
*
*/
# include "i915_drv.h"
# include "intel_renderstate.h"
2016-10-28 13:58:31 +01:00
struct intel_render_state {
2016-08-02 22:50:36 +01:00
const struct intel_renderstate_rodata * rodata ;
2016-08-15 10:49:03 +01:00
struct i915_vma * vma ;
2016-10-28 13:58:31 +01:00
u32 batch_offset ;
u32 batch_size ;
u32 aux_offset ;
u32 aux_size ;
2016-08-02 22:50:36 +01:00
} ;
2014-05-14 17:02:16 +03:00
static const struct intel_renderstate_rodata *
2016-10-28 13:58:31 +01:00
render_state_get_rodata ( const struct intel_engine_cs * engine )
2014-05-14 17:02:16 +03:00
{
2016-10-28 13:58:31 +01:00
switch ( INTEL_GEN ( engine - > i915 ) ) {
2014-05-14 17:02:16 +03:00
case 6 :
return & gen6_null_state ;
case 7 :
return & gen7_null_state ;
case 8 :
return & gen8_null_state ;
2014-10-23 08:34:28 -07:00
case 9 :
return & gen9_null_state ;
2014-05-14 17:02:16 +03:00
}
return NULL ;
}
2015-07-20 10:46:10 +01:00
/*
* Macro to add commands to auxiliary batch .
* This macro only checks for page overflow before inserting the commands ,
* this is sufficient as the null state generator makes the final batch
* with two passes to build command and state separately . At this point
* the size of both are known and it compacts them by relocating the state
* right after the commands taking care of aligment so we should sufficient
* space below them for adding new commands .
*/
# define OUT_BATCH(batch, i, val) \
do { \
2016-10-28 13:58:31 +01:00
if ( ( i ) > = PAGE_SIZE / sizeof ( u32 ) ) \
goto err ; \
2015-07-20 10:46:10 +01:00
( batch ) [ ( i ) + + ] = ( val ) ; \
} while ( 0 )
2016-10-28 13:58:31 +01:00
static int render_state_setup ( struct intel_render_state * so ,
struct drm_i915_private * i915 )
2014-06-10 11:23:33 +01:00
{
const struct intel_renderstate_rodata * rodata = so - > rodata ;
2016-10-28 13:58:31 +01:00
struct drm_i915_gem_object * obj = so - > vma - > obj ;
2014-06-10 11:23:33 +01:00
unsigned int i = 0 , reloc_index = 0 ;
2016-10-28 13:58:31 +01:00
unsigned int needs_clflush ;
2014-06-10 11:23:33 +01:00
u32 * d ;
int ret ;
2016-10-28 13:58:31 +01:00
ret = i915_gem_obj_prepare_shmem_write ( obj , & needs_clflush ) ;
2014-05-14 17:02:16 +03:00
if ( ret )
return ret ;
2016-10-28 13:58:31 +01:00
d = kmap_atomic ( i915_gem_object_get_dirty_page ( obj , 0 ) ) ;
2014-06-10 11:23:33 +01:00
2014-05-14 17:02:16 +03:00
while ( i < rodata - > batch_items ) {
u32 s = rodata - > batch [ i ] ;
2014-06-10 11:23:33 +01:00
if ( i * 4 = = rodata - > reloc [ reloc_index ] ) {
2016-08-15 10:49:03 +01:00
u64 r = s + so - > vma - > node . start ;
2014-06-10 11:23:33 +01:00
s = lower_32_bits ( r ) ;
2016-11-03 10:39:46 +02:00
if ( HAS_64BIT_RELOC ( i915 ) ) {
2014-05-14 17:02:16 +03:00
if ( i + 1 > = rodata - > batch_items | |
2016-10-28 13:58:31 +01:00
rodata - > batch [ i + 1 ] ! = 0 )
goto err ;
2014-05-14 17:02:16 +03:00
2014-06-10 11:23:33 +01:00
d [ i + + ] = s ;
s = upper_32_bits ( r ) ;
2014-05-14 17:02:16 +03:00
}
reloc_index + + ;
}
2014-06-10 11:23:33 +01:00
d [ i + + ] = s ;
2014-05-14 17:02:16 +03:00
}
2015-07-20 10:46:10 +01:00
2016-10-28 13:58:31 +01:00
if ( rodata - > reloc [ reloc_index ] ! = - 1 ) {
DRM_ERROR ( " only %d relocs resolved \n " , reloc_index ) ;
goto err ;
}
so - > batch_offset = so - > vma - > node . start ;
so - > batch_size = rodata - > batch_items * sizeof ( u32 ) ;
2015-07-20 10:46:10 +01:00
while ( i % CACHELINE_DWORDS )
OUT_BATCH ( d , i , MI_NOOP ) ;
2016-10-28 13:58:31 +01:00
so - > aux_offset = i * sizeof ( u32 ) ;
2015-07-20 10:46:10 +01:00
2016-10-28 13:58:31 +01:00
if ( HAS_POOLED_EU ( i915 ) ) {
2016-06-03 06:34:33 +01:00
/*
* We always program 3 x6 pool config but depending upon which
* subslice is disabled HW drops down to appropriate config
* shown below .
*
* In the below table 2 x6 config always refers to
* fused - down version , native 2 x6 is not available and can
* be ignored
*
* SNo subslices config eu pool configuration
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* 1 3 subslices enabled ( 3 x6 ) - 0x00777000 ( 9 + 9 )
* 2 ss0 disabled ( 2 x6 ) - 0x00777000 ( 3 + 9 )
* 3 ss1 disabled ( 2 x6 ) - 0x00770000 ( 6 + 6 )
* 4 ss2 disabled ( 2 x6 ) - 0x00007000 ( 9 + 3 )
*/
u32 eu_pool_config = 0x00777000 ;
OUT_BATCH ( d , i , GEN9_MEDIA_POOL_STATE ) ;
OUT_BATCH ( d , i , GEN9_MEDIA_POOL_ENABLE ) ;
OUT_BATCH ( d , i , eu_pool_config ) ;
OUT_BATCH ( d , i , 0 ) ;
OUT_BATCH ( d , i , 0 ) ;
OUT_BATCH ( d , i , 0 ) ;
}
2015-07-20 10:46:10 +01:00
OUT_BATCH ( d , i , MI_BATCH_BUFFER_END ) ;
2016-10-28 13:58:31 +01:00
so - > aux_size = i * sizeof ( u32 ) - so - > aux_offset ;
so - > aux_offset + = so - > batch_offset ;
2015-07-20 10:46:10 +01:00
/*
* Since we are sending length , we need to strictly conform to
* all requirements . For Gen2 this must be a multiple of 8.
*/
2016-10-28 13:58:31 +01:00
so - > aux_size = ALIGN ( so - > aux_size , 8 ) ;
2014-05-14 17:02:16 +03:00
2016-10-28 13:58:31 +01:00
if ( needs_clflush )
drm_clflush_virt_range ( d , i * sizeof ( u32 ) ) ;
kunmap_atomic ( d ) ;
2015-07-17 17:08:51 +01:00
2016-10-28 13:58:31 +01:00
ret = i915_gem_object_set_to_gtt_domain ( obj , false ) ;
out :
i915_gem_obj_finish_shmem_access ( obj ) ;
2015-07-17 17:08:51 +01:00
return ret ;
2016-10-28 13:58:31 +01:00
err :
kunmap_atomic ( d ) ;
ret = - EINVAL ;
goto out ;
2014-05-14 17:02:16 +03:00
}
2015-07-20 10:46:10 +01:00
# undef OUT_BATCH
2016-10-28 13:58:31 +01:00
int i915_gem_render_state_init ( struct intel_engine_cs * engine )
2014-05-14 17:02:16 +03:00
{
2016-10-28 13:58:31 +01:00
struct intel_render_state * so ;
const struct intel_renderstate_rodata * rodata ;
2016-08-15 10:49:03 +01:00
struct drm_i915_gem_object * obj ;
2014-05-14 17:02:16 +03:00
int ret ;
2016-10-28 13:58:31 +01:00
if ( engine - > id ! = RCS )
return 0 ;
2014-05-21 19:01:06 +03:00
2016-10-28 13:58:31 +01:00
rodata = render_state_get_rodata ( engine ) ;
if ( ! rodata )
2014-06-10 11:23:33 +01:00
return 0 ;
2014-05-14 17:02:16 +03:00
2016-10-28 13:58:31 +01:00
if ( rodata - > batch_items * 4 > 4096 )
2016-08-02 22:50:37 +01:00
return - EINVAL ;
2014-08-21 11:40:54 +01:00
2016-10-28 13:58:31 +01:00
so = kmalloc ( sizeof ( * so ) , GFP_KERNEL ) ;
if ( ! so )
return - ENOMEM ;
2014-08-21 11:40:54 +01:00
2016-10-28 13:58:31 +01:00
obj = i915_gem_object_create_internal ( engine - > i915 , 4096 ) ;
if ( IS_ERR ( obj ) ) {
ret = PTR_ERR ( obj ) ;
goto err_free ;
2016-08-15 10:49:03 +01:00
}
2014-08-21 11:40:54 +01:00
2016-10-28 13:58:31 +01:00
so - > vma = i915_vma_create ( obj , & engine - > i915 - > ggtt . base , NULL ) ;
if ( IS_ERR ( so - > vma ) ) {
ret = PTR_ERR ( so - > vma ) ;
2016-08-15 10:49:03 +01:00
goto err_obj ;
2016-10-28 13:58:31 +01:00
}
so - > rodata = rodata ;
engine - > render_state = so ;
return 0 ;
2016-08-02 22:50:37 +01:00
2016-10-28 13:58:31 +01:00
err_obj :
i915_gem_object_put ( obj ) ;
err_free :
kfree ( so ) ;
return ret ;
}
int i915_gem_render_state_emit ( struct drm_i915_gem_request * req )
{
struct intel_render_state * so ;
int ret ;
2016-10-28 13:58:32 +01:00
lockdep_assert_held ( & req - > i915 - > drm . struct_mutex ) ;
2016-10-28 13:58:31 +01:00
so = req - > engine - > render_state ;
if ( ! so )
return 0 ;
/* Recreate the page after shrinking */
2016-10-28 13:58:35 +01:00
if ( ! so - > vma - > obj - > mm . pages )
2016-10-28 13:58:31 +01:00
so - > batch_offset = - 1 ;
ret = i915_vma_pin ( so - > vma , 0 , 0 , PIN_GLOBAL | PIN_HIGH ) ;
2016-08-02 22:50:37 +01:00
if ( ret )
2016-10-28 13:58:31 +01:00
return ret ;
2014-05-14 17:02:16 +03:00
2016-10-28 13:58:31 +01:00
if ( so - > vma - > node . start ! = so - > batch_offset ) {
ret = render_state_setup ( so , req - > i915 ) ;
if ( ret )
goto err_unpin ;
}
ret = req - > engine - > emit_bb_start ( req ,
so - > batch_offset , so - > batch_size ,
2016-08-02 22:50:27 +01:00
I915_DISPATCH_SECURE ) ;
2014-05-14 17:02:16 +03:00
if ( ret )
2016-08-02 22:50:37 +01:00
goto err_unpin ;
2014-05-14 17:02:16 +03:00
2016-10-28 13:58:31 +01:00
if ( so - > aux_size > 8 ) {
2016-08-02 22:50:27 +01:00
ret = req - > engine - > emit_bb_start ( req ,
2016-10-28 13:58:31 +01:00
so - > aux_offset , so - > aux_size ,
2016-08-02 22:50:27 +01:00
I915_DISPATCH_SECURE ) ;
2015-07-20 10:46:10 +01:00
if ( ret )
2016-08-02 22:50:37 +01:00
goto err_unpin ;
2015-07-20 10:46:10 +01:00
}
2016-10-28 13:58:31 +01:00
i915_vma_move_to_active ( so - > vma , req , 0 ) ;
2016-08-02 22:50:37 +01:00
err_unpin :
2016-10-28 13:58:31 +01:00
i915_vma_unpin ( so - > vma ) ;
2014-05-14 17:02:16 +03:00
return ret ;
}
2016-10-28 13:58:31 +01:00
void i915_gem_render_state_fini ( struct intel_engine_cs * engine )
{
struct intel_render_state * so ;
struct drm_i915_gem_object * obj ;
so = fetch_and_zero ( & engine - > render_state ) ;
if ( ! so )
return ;
obj = so - > vma - > obj ;
i915_vma_close ( so - > vma ) ;
__i915_gem_object_release_unless_active ( obj ) ;
kfree ( so ) ;
}