2015-04-20 16:55:21 -04:00
/*
* Copyright 2008 Jerome Glisse .
* All Rights Reserved .
*
* Permission is hereby granted , free of charge , to any person obtaining a
* copy of this software and associated documentation files ( the " Software " ) ,
* to deal in the Software without restriction , including without limitation
* the rights to use , copy , modify , merge , publish , distribute , sublicense ,
* and / or sell copies of the Software , and to permit persons to whom the
* Software is furnished to do so , subject to the following conditions :
*
* The above copyright notice and this permission notice ( including the next
* paragraph ) shall be included in all copies or substantial portions of the
* Software .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR
* IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL
* PRECISION INSIGHT AND / OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM , DAMAGES OR
* OTHER LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE ,
* ARISING FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE .
*
* Authors :
* Jerome Glisse < glisse @ freedesktop . org >
*/
# include <linux/list_sort.h>
# include <drm/drmP.h>
# include <drm/amdgpu_drm.h>
# include "amdgpu.h"
# include "amdgpu_trace.h"
# define AMDGPU_CS_MAX_PRIORITY 32u
# define AMDGPU_CS_NUM_BUCKETS (AMDGPU_CS_MAX_PRIORITY + 1)
/* This is based on the bucket sort with O(n) time complexity.
* An item with priority " i " is added to bucket [ i ] . The lists are then
* concatenated in descending order .
*/
struct amdgpu_cs_buckets {
struct list_head bucket [ AMDGPU_CS_NUM_BUCKETS ] ;
} ;
static void amdgpu_cs_buckets_init ( struct amdgpu_cs_buckets * b )
{
unsigned i ;
for ( i = 0 ; i < AMDGPU_CS_NUM_BUCKETS ; i + + )
INIT_LIST_HEAD ( & b - > bucket [ i ] ) ;
}
static void amdgpu_cs_buckets_add ( struct amdgpu_cs_buckets * b ,
struct list_head * item , unsigned priority )
{
/* Since buffers which appear sooner in the relocation list are
* likely to be used more often than buffers which appear later
* in the list , the sort mustn ' t change the ordering of buffers
* with the same priority , i . e . it must be stable .
*/
list_add_tail ( item , & b - > bucket [ min ( priority , AMDGPU_CS_MAX_PRIORITY ) ] ) ;
}
static void amdgpu_cs_buckets_get_list ( struct amdgpu_cs_buckets * b ,
struct list_head * out_list )
{
unsigned i ;
/* Connect the sorted buckets in the output list. */
for ( i = 0 ; i < AMDGPU_CS_NUM_BUCKETS ; i + + ) {
list_splice ( & b - > bucket [ i ] , out_list ) ;
}
}
int amdgpu_cs_get_ring ( struct amdgpu_device * adev , u32 ip_type ,
u32 ip_instance , u32 ring ,
struct amdgpu_ring * * out_ring )
{
/* Right now all IPs have only one instance - multiple rings. */
if ( ip_instance ! = 0 ) {
DRM_ERROR ( " invalid ip instance: %d \n " , ip_instance ) ;
return - EINVAL ;
}
switch ( ip_type ) {
default :
DRM_ERROR ( " unknown ip type: %d \n " , ip_type ) ;
return - EINVAL ;
case AMDGPU_HW_IP_GFX :
if ( ring < adev - > gfx . num_gfx_rings ) {
* out_ring = & adev - > gfx . gfx_ring [ ring ] ;
} else {
DRM_ERROR ( " only %d gfx rings are supported now \n " ,
adev - > gfx . num_gfx_rings ) ;
return - EINVAL ;
}
break ;
case AMDGPU_HW_IP_COMPUTE :
if ( ring < adev - > gfx . num_compute_rings ) {
* out_ring = & adev - > gfx . compute_ring [ ring ] ;
} else {
DRM_ERROR ( " only %d compute rings are supported now \n " ,
adev - > gfx . num_compute_rings ) ;
return - EINVAL ;
}
break ;
case AMDGPU_HW_IP_DMA :
2015-10-08 16:30:37 -04:00
if ( ring < adev - > sdma . num_instances ) {
* out_ring = & adev - > sdma . instance [ ring ] . ring ;
2015-04-20 16:55:21 -04:00
} else {
2015-10-08 16:30:37 -04:00
DRM_ERROR ( " only %d SDMA rings are supported \n " ,
adev - > sdma . num_instances ) ;
2015-04-20 16:55:21 -04:00
return - EINVAL ;
}
break ;
case AMDGPU_HW_IP_UVD :
* out_ring = & adev - > uvd . ring ;
break ;
case AMDGPU_HW_IP_VCE :
if ( ring < 2 ) {
* out_ring = & adev - > vce . ring [ ring ] ;
} else {
DRM_ERROR ( " only two VCE rings are supported \n " ) ;
return - EINVAL ;
}
break ;
}
return 0 ;
}
int amdgpu_cs_parser_init ( struct amdgpu_cs_parser * p , void * data )
{
union drm_amdgpu_cs * cs = data ;
uint64_t * chunk_array_user ;
2015-09-23 13:59:28 +03:00
uint64_t * chunk_array ;
2015-04-20 16:55:21 -04:00
struct amdgpu_fpriv * fpriv = p - > filp - > driver_priv ;
2015-09-25 14:36:55 +03:00
unsigned size ;
int i ;
2015-09-23 13:59:28 +03:00
int ret ;
2015-04-20 16:55:21 -04:00
2015-09-23 13:59:28 +03:00
if ( cs - > in . num_chunks = = 0 )
return 0 ;
chunk_array = kmalloc_array ( cs - > in . num_chunks , sizeof ( uint64_t ) , GFP_KERNEL ) ;
if ( ! chunk_array )
return - ENOMEM ;
2015-04-20 16:55:21 -04:00
2015-05-11 15:34:59 +02:00
p - > ctx = amdgpu_ctx_get ( fpriv , cs - > in . ctx_id ) ;
if ( ! p - > ctx ) {
2015-09-23 13:59:28 +03:00
ret = - EINVAL ;
goto free_chunk ;
2015-05-11 15:34:59 +02:00
}
2015-09-23 13:59:28 +03:00
2015-08-18 16:25:46 +08:00
p - > bo_list = amdgpu_bo_list_get ( fpriv , cs - > in . bo_list_handle ) ;
2015-04-20 16:55:21 -04:00
/* get chunks */
INIT_LIST_HEAD ( & p - > validated ) ;
2015-10-07 09:41:27 +02:00
chunk_array_user = ( uint64_t __user * ) ( unsigned long ) ( cs - > in . chunks ) ;
2015-04-20 16:55:21 -04:00
if ( copy_from_user ( chunk_array , chunk_array_user ,
sizeof ( uint64_t ) * cs - > in . num_chunks ) ) {
2015-09-23 13:59:28 +03:00
ret = - EFAULT ;
goto put_bo_list ;
2015-04-20 16:55:21 -04:00
}
p - > nchunks = cs - > in . num_chunks ;
2015-07-17 18:39:25 +08:00
p - > chunks = kmalloc_array ( p - > nchunks , sizeof ( struct amdgpu_cs_chunk ) ,
2015-04-20 16:55:21 -04:00
GFP_KERNEL ) ;
2015-09-23 13:59:28 +03:00
if ( ! p - > chunks ) {
ret = - ENOMEM ;
goto put_bo_list ;
2015-04-20 16:55:21 -04:00
}
for ( i = 0 ; i < p - > nchunks ; i + + ) {
struct drm_amdgpu_cs_chunk __user * * chunk_ptr = NULL ;
struct drm_amdgpu_cs_chunk user_chunk ;
uint32_t __user * cdata ;
2015-10-07 09:41:27 +02:00
chunk_ptr = ( void __user * ) ( unsigned long ) chunk_array [ i ] ;
2015-04-20 16:55:21 -04:00
if ( copy_from_user ( & user_chunk , chunk_ptr ,
sizeof ( struct drm_amdgpu_cs_chunk ) ) ) {
2015-09-23 13:59:28 +03:00
ret = - EFAULT ;
i - - ;
goto free_partial_kdata ;
2015-04-20 16:55:21 -04:00
}
p - > chunks [ i ] . chunk_id = user_chunk . chunk_id ;
p - > chunks [ i ] . length_dw = user_chunk . length_dw ;
size = p - > chunks [ i ] . length_dw ;
2015-10-07 09:41:27 +02:00
cdata = ( void __user * ) ( unsigned long ) user_chunk . chunk_data ;
2015-04-20 16:55:21 -04:00
p - > chunks [ i ] . user_ptr = cdata ;
p - > chunks [ i ] . kdata = drm_malloc_ab ( size , sizeof ( uint32_t ) ) ;
if ( p - > chunks [ i ] . kdata = = NULL ) {
2015-09-23 13:59:28 +03:00
ret = - ENOMEM ;
i - - ;
goto free_partial_kdata ;
2015-04-20 16:55:21 -04:00
}
size * = sizeof ( uint32_t ) ;
if ( copy_from_user ( p - > chunks [ i ] . kdata , cdata , size ) ) {
2015-09-23 13:59:28 +03:00
ret = - EFAULT ;
goto free_partial_kdata ;
2015-04-20 16:55:21 -04:00
}
2015-06-23 17:07:03 +02:00
switch ( p - > chunks [ i ] . chunk_id ) {
case AMDGPU_CHUNK_ID_IB :
p - > num_ibs + + ;
break ;
case AMDGPU_CHUNK_ID_FENCE :
2015-04-20 16:55:21 -04:00
size = sizeof ( struct drm_amdgpu_cs_chunk_fence ) ;
if ( p - > chunks [ i ] . length_dw * sizeof ( uint32_t ) > = size ) {
uint32_t handle ;
struct drm_gem_object * gobj ;
struct drm_amdgpu_cs_chunk_fence * fence_data ;
fence_data = ( void * ) p - > chunks [ i ] . kdata ;
handle = fence_data - > handle ;
gobj = drm_gem_object_lookup ( p - > adev - > ddev ,
p - > filp , handle ) ;
if ( gobj = = NULL ) {
2015-09-23 13:59:28 +03:00
ret = - EINVAL ;
goto free_partial_kdata ;
2015-04-20 16:55:21 -04:00
}
p - > uf . bo = gem_to_amdgpu_bo ( gobj ) ;
p - > uf . offset = fence_data - > offset ;
} else {
2015-09-23 13:59:28 +03:00
ret = - EINVAL ;
goto free_partial_kdata ;
2015-04-20 16:55:21 -04:00
}
2015-06-23 17:07:03 +02:00
break ;
2015-06-19 17:31:29 +02:00
case AMDGPU_CHUNK_ID_DEPENDENCIES :
break ;
2015-06-23 17:07:03 +02:00
default :
2015-09-23 13:59:28 +03:00
ret = - EINVAL ;
goto free_partial_kdata ;
2015-04-20 16:55:21 -04:00
}
}
2015-07-17 18:39:25 +08:00
2015-08-18 18:23:16 +02:00
p - > ibs = kcalloc ( p - > num_ibs , sizeof ( struct amdgpu_ib ) , GFP_KERNEL ) ;
2015-09-23 13:59:28 +03:00
if ( ! p - > ibs ) {
ret = - ENOMEM ;
goto free_all_kdata ;
}
2015-04-20 16:55:21 -04:00
kfree ( chunk_array ) ;
2015-09-23 13:59:28 +03:00
return 0 ;
free_all_kdata :
i = p - > nchunks - 1 ;
free_partial_kdata :
for ( ; i > = 0 ; i - - )
drm_free_large ( p - > chunks [ i ] . kdata ) ;
kfree ( p - > chunks ) ;
put_bo_list :
if ( p - > bo_list )
amdgpu_bo_list_put ( p - > bo_list ) ;
amdgpu_ctx_put ( p - > ctx ) ;
free_chunk :
kfree ( chunk_array ) ;
return ret ;
2015-04-20 16:55:21 -04:00
}
/* Returns how many bytes TTM can move per IB.
*/
static u64 amdgpu_cs_get_threshold_for_moves ( struct amdgpu_device * adev )
{
u64 real_vram_size = adev - > mc . real_vram_size ;
u64 vram_usage = atomic64_read ( & adev - > vram_usage ) ;
/* This function is based on the current VRAM usage.
*
* - If all of VRAM is free , allow relocating the number of bytes that
* is equal to 1 / 4 of the size of VRAM for this IB .
* - If more than one half of VRAM is occupied , only allow relocating
* 1 MB of data for this IB .
*
* - From 0 to one half of used VRAM , the threshold decreases
* linearly .
* __________________
* 1 / 4 of - | \ |
* VRAM | \ |
* | \ |
* | \ |
* | \ |
* | \ |
* | \ |
* | \ ________ | 1 MB
* | - - - - - - - - - - - - - - - - |
* VRAM 0 % 100 %
* used used
*
* Note : It ' s a threshold , not a limit . The threshold must be crossed
* for buffer relocations to stop , so any buffer of an arbitrary size
* can be moved as long as the threshold isn ' t crossed before
* the relocation takes place . We don ' t want to disable buffer
* relocations completely .
*
* The idea is that buffers should be placed in VRAM at creation time
* and TTM should only do a minimum number of relocations during
* command submission . In practice , you need to submit at least
* a dozen IBs to move all buffers to VRAM if they are in GTT .
*
* Also , things can get pretty crazy under memory pressure and actual
* VRAM usage can change a lot , so playing safe even at 50 % does
* consistently increase performance .
*/
u64 half_vram = real_vram_size > > 1 ;
u64 half_free_vram = vram_usage > = half_vram ? 0 : half_vram - vram_usage ;
u64 bytes_moved_threshold = half_free_vram > > 1 ;
return max ( bytes_moved_threshold , 1024 * 1024ull ) ;
}
2015-09-03 16:40:39 +02:00
int amdgpu_cs_list_validate ( struct amdgpu_device * adev ,
struct amdgpu_vm * vm ,
struct list_head * validated )
2015-04-20 16:55:21 -04:00
{
struct amdgpu_bo_list_entry * lobj ;
struct amdgpu_bo * bo ;
u64 bytes_moved = 0 , initial_bytes_moved ;
u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves ( adev ) ;
int r ;
2015-09-03 16:40:39 +02:00
list_for_each_entry ( lobj , validated , tv . head ) {
2015-04-20 16:55:21 -04:00
bo = lobj - > robj ;
if ( ! bo - > pin_count ) {
u32 domain = lobj - > prefered_domains ;
u32 current_domain =
amdgpu_mem_type_to_domain ( bo - > tbo . mem . mem_type ) ;
/* Check if this buffer will be moved and don't move it
* if we have moved too many buffers for this IB already .
*
* Note that this allows moving at least one buffer of
* any size , because it doesn ' t take the current " bo "
* into account . We don ' t want to disallow buffer moves
* completely .
*/
2015-09-02 20:25:48 +02:00
if ( ( lobj - > allowed_domains & current_domain ) ! = 0 & &
2015-04-20 16:55:21 -04:00
( domain & current_domain ) = = 0 & & /* will be moved */
bytes_moved > bytes_moved_threshold ) {
/* don't move it */
domain = current_domain ;
}
retry :
amdgpu_ttm_placement_from_domain ( bo , domain ) ;
initial_bytes_moved = atomic64_read ( & adev - > num_bytes_moved ) ;
r = ttm_bo_validate ( & bo - > tbo , & bo - > placement , true , false ) ;
bytes_moved + = atomic64_read ( & adev - > num_bytes_moved ) -
initial_bytes_moved ;
if ( unlikely ( r ) ) {
if ( r ! = - ERESTARTSYS & & domain ! = lobj - > allowed_domains ) {
domain = lobj - > allowed_domains ;
goto retry ;
}
return r ;
}
}
lobj - > bo_va = amdgpu_vm_bo_find ( vm , bo ) ;
}
return 0 ;
}
static int amdgpu_cs_parser_relocs ( struct amdgpu_cs_parser * p )
{
struct amdgpu_fpriv * fpriv = p - > filp - > driver_priv ;
struct amdgpu_cs_buckets buckets ;
2015-09-03 16:40:39 +02:00
struct list_head duplicates ;
2015-04-27 15:19:20 +08:00
bool need_mmap_lock = false ;
2015-04-20 16:55:21 -04:00
int i , r ;
2015-04-27 15:19:20 +08:00
if ( p - > bo_list ) {
need_mmap_lock = p - > bo_list - > has_userptr ;
amdgpu_cs_buckets_init ( & buckets ) ;
for ( i = 0 ; i < p - > bo_list - > num_entries ; i + + )
amdgpu_cs_buckets_add ( & buckets , & p - > bo_list - > array [ i ] . tv . head ,
p - > bo_list - > array [ i ] . priority ) ;
2015-04-20 16:55:21 -04:00
2015-04-27 15:19:20 +08:00
amdgpu_cs_buckets_get_list ( & buckets , & p - > validated ) ;
}
2015-04-20 16:55:21 -04:00
p - > vm_bos = amdgpu_vm_get_bos ( p - > adev , & fpriv - > vm ,
& p - > validated ) ;
if ( need_mmap_lock )
down_read ( & current - > mm - > mmap_sem ) ;
2015-09-03 16:40:39 +02:00
INIT_LIST_HEAD ( & duplicates ) ;
r = ttm_eu_reserve_buffers ( & p - > ticket , & p - > validated , true , & duplicates ) ;
if ( unlikely ( r ! = 0 ) )
goto error_reserve ;
r = amdgpu_cs_list_validate ( p - > adev , & fpriv - > vm , & p - > validated ) ;
if ( r )
goto error_validate ;
r = amdgpu_cs_list_validate ( p - > adev , & fpriv - > vm , & duplicates ) ;
error_validate :
if ( r )
ttm_eu_backoff_reservation ( & p - > ticket , & p - > validated ) ;
2015-04-20 16:55:21 -04:00
2015-09-03 16:40:39 +02:00
error_reserve :
2015-04-20 16:55:21 -04:00
if ( need_mmap_lock )
up_read ( & current - > mm - > mmap_sem ) ;
return r ;
}
static int amdgpu_cs_sync_rings ( struct amdgpu_cs_parser * p )
{
struct amdgpu_bo_list_entry * e ;
int r ;
list_for_each_entry ( e , & p - > validated , tv . head ) {
struct reservation_object * resv = e - > robj - > tbo . resv ;
r = amdgpu_sync_resv ( p - > adev , & p - > ibs [ 0 ] . sync , resv , p - > filp ) ;
if ( r )
return r ;
}
return 0 ;
}
static int cmp_size_smaller_first ( void * priv , struct list_head * a ,
struct list_head * b )
{
struct amdgpu_bo_list_entry * la = list_entry ( a , struct amdgpu_bo_list_entry , tv . head ) ;
struct amdgpu_bo_list_entry * lb = list_entry ( b , struct amdgpu_bo_list_entry , tv . head ) ;
/* Sort A before B if A is smaller. */
return ( int ) la - > robj - > tbo . num_pages - ( int ) lb - > robj - > tbo . num_pages ;
}
2015-11-14 21:05:35 +01:00
/**
* cs_parser_fini ( ) - clean parser states
* @ parser : parser structure holding parsing context .
* @ error : error number
*
* If error is set than unvalidate buffer , otherwise just free memory
* used by parsing context .
* */
static void amdgpu_cs_parser_fini ( struct amdgpu_cs_parser * parser , int error , bool backoff )
2015-07-21 14:36:51 +08:00
{
2015-11-14 21:05:35 +01:00
unsigned i ;
2015-04-20 16:55:21 -04:00
if ( ! error ) {
/* Sort the buffer list from the smallest to largest buffer,
* which affects the order of buffers in the LRU list .
* This assures that the smallest buffers are added first
* to the LRU list , so they are likely to be later evicted
* first , instead of large buffers whose eviction is more
* expensive .
*
* This slightly lowers the number of bytes moved by TTM
* per frame under memory pressure .
*/
list_sort ( NULL , & parser - > validated , cmp_size_smaller_first ) ;
ttm_eu_fence_buffer_objects ( & parser - > ticket ,
2015-11-14 21:05:35 +01:00
& parser - > validated ,
parser - > fence ) ;
2015-04-20 16:55:21 -04:00
} else if ( backoff ) {
ttm_eu_backoff_reservation ( & parser - > ticket ,
& parser - > validated ) ;
}
2015-11-14 21:05:35 +01:00
fence_put ( parser - > fence ) ;
2015-11-04 15:44:39 +01:00
2015-05-11 15:34:59 +02:00
if ( parser - > ctx )
amdgpu_ctx_put ( parser - > ctx ) ;
2015-08-18 16:25:46 +08:00
if ( parser - > bo_list )
amdgpu_bo_list_put ( parser - > bo_list ) ;
2015-04-20 16:55:21 -04:00
drm_free_large ( parser - > vm_bos ) ;
for ( i = 0 ; i < parser - > nchunks ; i + + )
drm_free_large ( parser - > chunks [ i ] . kdata ) ;
kfree ( parser - > chunks ) ;
2015-11-05 17:00:25 +01:00
if ( parser - > ibs )
for ( i = 0 ; i < parser - > num_ibs ; i + + )
amdgpu_ib_free ( parser - > adev , & parser - > ibs [ i ] ) ;
kfree ( parser - > ibs ) ;
if ( parser - > uf . bo )
drm_gem_object_unreference_unlocked ( & parser - > uf . bo - > gem_base ) ;
2015-04-20 16:55:21 -04:00
}
static int amdgpu_bo_vm_update_pte ( struct amdgpu_cs_parser * p ,
struct amdgpu_vm * vm )
{
struct amdgpu_device * adev = p - > adev ;
struct amdgpu_bo_va * bo_va ;
struct amdgpu_bo * bo ;
int i , r ;
r = amdgpu_vm_update_page_directory ( adev , vm ) ;
if ( r )
return r ;
2015-08-14 20:08:40 +02:00
r = amdgpu_sync_fence ( adev , & p - > ibs [ 0 ] . sync , vm - > page_directory_fence ) ;
if ( r )
return r ;
2015-04-20 16:55:21 -04:00
r = amdgpu_vm_clear_freed ( adev , vm ) ;
if ( r )
return r ;
if ( p - > bo_list ) {
for ( i = 0 ; i < p - > bo_list - > num_entries ; i + + ) {
2015-07-06 22:06:40 +02:00
struct fence * f ;
2015-04-20 16:55:21 -04:00
/* ignore duplicates */
bo = p - > bo_list - > array [ i ] . robj ;
if ( ! bo )
continue ;
bo_va = p - > bo_list - > array [ i ] . bo_va ;
if ( bo_va = = NULL )
continue ;
r = amdgpu_vm_bo_update ( adev , bo_va , & bo - > tbo . mem ) ;
if ( r )
return r ;
2015-08-03 18:19:38 +08:00
f = bo_va - > last_pt_update ;
2015-07-06 22:06:40 +02:00
r = amdgpu_sync_fence ( adev , & p - > ibs [ 0 ] . sync , f ) ;
if ( r )
return r ;
2015-04-20 16:55:21 -04:00
}
2015-09-10 14:00:35 +02:00
}
r = amdgpu_vm_clear_invalids ( adev , vm , & p - > ibs [ 0 ] . sync ) ;
if ( amdgpu_vm_debug & & p - > bo_list ) {
/* Invalidate all BOs to test for userspace bugs */
for ( i = 0 ; i < p - > bo_list - > num_entries ; i + + ) {
/* ignore duplicates */
bo = p - > bo_list - > array [ i ] . robj ;
if ( ! bo )
continue ;
amdgpu_vm_bo_invalidate ( adev , bo ) ;
}
2015-04-20 16:55:21 -04:00
}
2015-09-10 14:00:35 +02:00
return r ;
2015-04-20 16:55:21 -04:00
}
static int amdgpu_cs_ib_vm_chunk ( struct amdgpu_device * adev ,
struct amdgpu_cs_parser * parser )
{
struct amdgpu_fpriv * fpriv = parser - > filp - > driver_priv ;
struct amdgpu_vm * vm = & fpriv - > vm ;
struct amdgpu_ring * ring ;
int i , r ;
if ( parser - > num_ibs = = 0 )
return 0 ;
/* Only for UVD/VCE VM emulation */
for ( i = 0 ; i < parser - > num_ibs ; i + + ) {
ring = parser - > ibs [ i ] . ring ;
if ( ring - > funcs - > parse_cs ) {
r = amdgpu_ring_parse_cs ( ring , parser , i ) ;
if ( r )
return r ;
}
}
r = amdgpu_bo_vm_update_pte ( parser , vm ) ;
2015-11-14 21:05:35 +01:00
if ( ! r )
amdgpu_cs_sync_rings ( parser ) ;
2015-04-20 16:55:21 -04:00
return r ;
}
static int amdgpu_cs_handle_lockup ( struct amdgpu_device * adev , int r )
{
if ( r = = - EDEADLK ) {
r = amdgpu_gpu_reset ( adev ) ;
if ( ! r )
r = - EAGAIN ;
}
return r ;
}
static int amdgpu_cs_ib_fill ( struct amdgpu_device * adev ,
struct amdgpu_cs_parser * parser )
{
struct amdgpu_fpriv * fpriv = parser - > filp - > driver_priv ;
struct amdgpu_vm * vm = & fpriv - > vm ;
int i , j ;
int r ;
for ( i = 0 , j = 0 ; i < parser - > nchunks & & j < parser - > num_ibs ; i + + ) {
struct amdgpu_cs_chunk * chunk ;
struct amdgpu_ib * ib ;
struct drm_amdgpu_cs_chunk_ib * chunk_ib ;
struct amdgpu_ring * ring ;
chunk = & parser - > chunks [ i ] ;
ib = & parser - > ibs [ j ] ;
chunk_ib = ( struct drm_amdgpu_cs_chunk_ib * ) chunk - > kdata ;
if ( chunk - > chunk_id ! = AMDGPU_CHUNK_ID_IB )
continue ;
r = amdgpu_cs_get_ring ( adev , chunk_ib - > ip_type ,
chunk_ib - > ip_instance , chunk_ib - > ring ,
& ring ) ;
2015-06-02 17:44:49 +02:00
if ( r )
2015-04-20 16:55:21 -04:00
return r ;
if ( ring - > funcs - > parse_cs ) {
2015-06-10 17:20:11 +02:00
struct amdgpu_bo_va_mapping * m ;
2015-06-02 17:44:49 +02:00
struct amdgpu_bo * aobj = NULL ;
2015-06-10 17:20:11 +02:00
uint64_t offset ;
uint8_t * kptr ;
2015-06-02 17:44:49 +02:00
2015-06-10 17:20:11 +02:00
m = amdgpu_cs_find_mapping ( parser , chunk_ib - > va_start ,
& aobj ) ;
2015-06-02 17:44:49 +02:00
if ( ! aobj ) {
DRM_ERROR ( " IB va_start is invalid \n " ) ;
return - EINVAL ;
2015-04-20 16:55:21 -04:00
}
2015-06-10 17:20:11 +02:00
if ( ( chunk_ib - > va_start + chunk_ib - > ib_bytes ) >
( m - > it . last + 1 ) * AMDGPU_GPU_PAGE_SIZE ) {
DRM_ERROR ( " IB va_start+ib_bytes is invalid \n " ) ;
return - EINVAL ;
}
2015-06-02 17:44:49 +02:00
/* the IB should be reserved at this point */
2015-06-10 17:20:11 +02:00
r = amdgpu_bo_kmap ( aobj , ( void * * ) & kptr ) ;
2015-04-20 16:55:21 -04:00
if ( r ) {
return r ;
}
2015-06-10 17:20:11 +02:00
offset = ( ( uint64_t ) m - > it . start ) * AMDGPU_GPU_PAGE_SIZE ;
kptr + = chunk_ib - > va_start - offset ;
2015-04-20 16:55:21 -04:00
r = amdgpu_ib_get ( ring , NULL , chunk_ib - > ib_bytes , ib ) ;
if ( r ) {
DRM_ERROR ( " Failed to get ib ! \n " ) ;
return r ;
}
memcpy ( ib - > ptr , kptr , chunk_ib - > ib_bytes ) ;
amdgpu_bo_kunmap ( aobj ) ;
} else {
r = amdgpu_ib_get ( ring , vm , 0 , ib ) ;
if ( r ) {
DRM_ERROR ( " Failed to get ib ! \n " ) ;
return r ;
}
ib - > gpu_addr = chunk_ib - > va_start ;
}
2015-06-02 17:44:49 +02:00
ib - > length_dw = chunk_ib - > ib_bytes / 4 ;
2015-05-11 23:41:41 +08:00
ib - > flags = chunk_ib - > flags ;
2015-05-11 15:34:59 +02:00
ib - > ctx = parser - > ctx ;
2015-04-20 16:55:21 -04:00
j + + ;
}
if ( ! parser - > num_ibs )
return 0 ;
/* add GDS resources to first IB */
if ( parser - > bo_list ) {
struct amdgpu_bo * gds = parser - > bo_list - > gds_obj ;
struct amdgpu_bo * gws = parser - > bo_list - > gws_obj ;
struct amdgpu_bo * oa = parser - > bo_list - > oa_obj ;
struct amdgpu_ib * ib = & parser - > ibs [ 0 ] ;
if ( gds ) {
ib - > gds_base = amdgpu_bo_gpu_offset ( gds ) ;
ib - > gds_size = amdgpu_bo_size ( gds ) ;
}
if ( gws ) {
ib - > gws_base = amdgpu_bo_gpu_offset ( gws ) ;
ib - > gws_size = amdgpu_bo_size ( gws ) ;
}
if ( oa ) {
ib - > oa_base = amdgpu_bo_gpu_offset ( oa ) ;
ib - > oa_size = amdgpu_bo_size ( oa ) ;
}
}
/* wrap the last IB with user fence */
if ( parser - > uf . bo ) {
struct amdgpu_ib * ib = & parser - > ibs [ parser - > num_ibs - 1 ] ;
/* UVD & VCE fw doesn't support user fences */
if ( ib - > ring - > type = = AMDGPU_RING_TYPE_UVD | |
ib - > ring - > type = = AMDGPU_RING_TYPE_VCE )
return - EINVAL ;
ib - > user = & parser - > uf ;
}
return 0 ;
}
2015-06-19 17:31:29 +02:00
static int amdgpu_cs_dependencies ( struct amdgpu_device * adev ,
struct amdgpu_cs_parser * p )
{
2015-07-06 19:42:10 +02:00
struct amdgpu_fpriv * fpriv = p - > filp - > driver_priv ;
2015-06-19 17:31:29 +02:00
struct amdgpu_ib * ib ;
int i , j , r ;
if ( ! p - > num_ibs )
return 0 ;
/* Add dependencies to first IB */
ib = & p - > ibs [ 0 ] ;
for ( i = 0 ; i < p - > nchunks ; + + i ) {
struct drm_amdgpu_cs_chunk_dep * deps ;
struct amdgpu_cs_chunk * chunk ;
unsigned num_deps ;
chunk = & p - > chunks [ i ] ;
if ( chunk - > chunk_id ! = AMDGPU_CHUNK_ID_DEPENDENCIES )
continue ;
deps = ( struct drm_amdgpu_cs_chunk_dep * ) chunk - > kdata ;
num_deps = chunk - > length_dw * 4 /
sizeof ( struct drm_amdgpu_cs_chunk_dep ) ;
for ( j = 0 ; j < num_deps ; + + j ) {
struct amdgpu_ring * ring ;
2015-07-06 19:42:10 +02:00
struct amdgpu_ctx * ctx ;
2015-07-07 17:24:49 +02:00
struct fence * fence ;
2015-06-19 17:31:29 +02:00
r = amdgpu_cs_get_ring ( adev , deps [ j ] . ip_type ,
deps [ j ] . ip_instance ,
deps [ j ] . ring , & ring ) ;
if ( r )
return r ;
2015-07-06 19:42:10 +02:00
ctx = amdgpu_ctx_get ( fpriv , deps [ j ] . ctx_id ) ;
if ( ctx = = NULL )
return - EINVAL ;
2015-07-07 17:24:49 +02:00
fence = amdgpu_ctx_get_fence ( ctx , ring ,
deps [ j ] . handle ) ;
if ( IS_ERR ( fence ) ) {
r = PTR_ERR ( fence ) ;
2015-07-06 19:42:10 +02:00
amdgpu_ctx_put ( ctx ) ;
2015-06-19 17:31:29 +02:00
return r ;
2015-07-06 22:06:40 +02:00
2015-07-07 17:24:49 +02:00
} else if ( fence ) {
r = amdgpu_sync_fence ( adev , & ib - > sync , fence ) ;
fence_put ( fence ) ;
amdgpu_ctx_put ( ctx ) ;
if ( r )
return r ;
}
2015-06-19 17:31:29 +02:00
}
}
return 0 ;
}
2015-09-09 09:05:55 +08:00
static int amdgpu_cs_free_job ( struct amdgpu_job * job )
2015-08-18 15:16:40 +08:00
{
int i ;
2015-09-09 09:05:55 +08:00
if ( job - > ibs )
for ( i = 0 ; i < job - > num_ibs ; i + + )
amdgpu_ib_free ( job - > adev , & job - > ibs [ i ] ) ;
kfree ( job - > ibs ) ;
if ( job - > uf . bo )
drm_gem_object_unreference_unlocked ( & job - > uf . bo - > gem_base ) ;
2015-08-18 15:16:40 +08:00
return 0 ;
}
2015-07-21 14:36:51 +08:00
int amdgpu_cs_ioctl ( struct drm_device * dev , void * data , struct drm_file * filp )
{
struct amdgpu_device * adev = dev - > dev_private ;
union drm_amdgpu_cs * cs = data ;
2015-11-04 15:44:39 +01:00
struct amdgpu_cs_parser parser = { } ;
2015-08-18 21:09:33 +02:00
bool reserved_buffers = false ;
int i , r ;
2015-07-21 14:36:51 +08:00
2015-09-01 15:13:53 +02:00
if ( ! adev - > accel_working )
2015-07-21 14:36:51 +08:00
return - EBUSY ;
2015-06-19 17:31:29 +02:00
2015-11-04 15:44:39 +01:00
parser . adev = adev ;
parser . filp = filp ;
r = amdgpu_cs_parser_init ( & parser , data ) ;
2015-04-20 16:55:21 -04:00
if ( r ) {
2015-07-21 14:36:51 +08:00
DRM_ERROR ( " Failed to initialize parser ! \n " ) ;
2015-11-04 15:44:39 +01:00
amdgpu_cs_parser_fini ( & parser , r , false ) ;
2015-04-20 16:55:21 -04:00
r = amdgpu_cs_handle_lockup ( adev , r ) ;
return r ;
}
2015-11-04 15:44:39 +01:00
r = amdgpu_cs_parser_relocs ( & parser ) ;
2015-08-18 21:09:33 +02:00
if ( r = = - ENOMEM )
DRM_ERROR ( " Not enough memory for command submission! \n " ) ;
else if ( r & & r ! = - ERESTARTSYS )
DRM_ERROR ( " Failed to process the buffer list %d! \n " , r ) ;
else if ( ! r ) {
reserved_buffers = true ;
2015-11-04 15:44:39 +01:00
r = amdgpu_cs_ib_fill ( adev , & parser ) ;
2015-08-18 21:09:33 +02:00
}
if ( ! r ) {
2015-11-04 15:44:39 +01:00
r = amdgpu_cs_dependencies ( adev , & parser ) ;
2015-08-18 21:09:33 +02:00
if ( r )
DRM_ERROR ( " Failed in the dependencies handling %d! \n " , r ) ;
}
if ( r )
goto out ;
2015-11-04 15:44:39 +01:00
for ( i = 0 ; i < parser . num_ibs ; i + + )
trace_amdgpu_cs ( & parser , i ) ;
2015-08-18 21:09:33 +02:00
2015-11-04 15:44:39 +01:00
r = amdgpu_cs_ib_vm_chunk ( adev , & parser ) ;
2015-08-18 16:12:15 +08:00
if ( r )
goto out ;
2015-11-04 15:44:39 +01:00
if ( amdgpu_enable_scheduler & & parser . num_ibs ) {
struct amdgpu_ring * ring = parser . ibs - > ring ;
2015-11-05 19:49:48 +01:00
struct amd_sched_fence * fence ;
struct amdgpu_job * job ;
2015-11-04 15:44:39 +01:00
2015-08-18 15:16:40 +08:00
job = kzalloc ( sizeof ( struct amdgpu_job ) , GFP_KERNEL ) ;
2015-11-04 16:25:09 +03:00
if ( ! job ) {
r = - ENOMEM ;
goto out ;
}
2015-11-04 15:44:39 +01:00
2015-09-08 20:22:31 +02:00
job - > base . sched = & ring - > sched ;
2015-11-04 15:44:39 +01:00
job - > base . s_entity = & parser . ctx - > rings [ ring - > idx ] . entity ;
job - > adev = parser . adev ;
2015-11-05 19:49:48 +01:00
job - > owner = parser . filp ;
job - > free_job = amdgpu_cs_free_job ;
2015-11-13 13:04:50 +01:00
job - > ibs = parser . ibs ;
job - > num_ibs = parser . num_ibs ;
parser . ibs = NULL ;
parser . num_ibs = 0 ;
2015-08-18 15:16:40 +08:00
if ( job - > ibs [ job - > num_ibs - 1 ] . user ) {
2015-11-04 15:44:39 +01:00
job - > uf = parser . uf ;
2015-08-18 15:16:40 +08:00
job - > ibs [ job - > num_ibs - 1 ] . user = & job - > uf ;
2015-11-04 15:44:39 +01:00
parser . uf . bo = NULL ;
2015-08-18 15:16:40 +08:00
}
2015-11-05 19:49:48 +01:00
fence = amd_sched_fence_create ( job - > base . s_entity ,
parser . filp ) ;
if ( ! fence ) {
r = - ENOMEM ;
2015-08-18 15:16:40 +08:00
amdgpu_cs_free_job ( job ) ;
kfree ( job ) ;
2015-08-02 11:18:04 +08:00
goto out ;
}
2015-11-05 19:49:48 +01:00
job - > base . s_fence = fence ;
2015-11-14 21:05:35 +01:00
parser . fence = fence_get ( & fence - > base ) ;
2015-11-05 19:49:48 +01:00
cs - > out . handle = amdgpu_ctx_add_fence ( parser . ctx , ring ,
& fence - > base ) ;
2015-11-05 17:00:25 +01:00
job - > ibs [ job - > num_ibs - 1 ] . sequence = cs - > out . handle ;
2015-08-20 17:28:36 +02:00
2015-11-11 14:56:00 +08:00
trace_amdgpu_cs_ioctl ( job ) ;
2015-11-05 19:49:48 +01:00
amd_sched_entity_push_job ( & job - > base ) ;
2015-11-14 21:05:35 +01:00
} else {
struct amdgpu_fence * fence ;
2015-11-05 19:49:48 +01:00
2015-11-14 21:05:35 +01:00
r = amdgpu_ib_schedule ( adev , parser . num_ibs , parser . ibs ,
parser . filp ) ;
fence = parser . ibs [ parser . num_ibs - 1 ] . fence ;
parser . fence = fence_get ( & fence - > base ) ;
cs - > out . handle = parser . ibs [ parser . num_ibs - 1 ] . sequence ;
2015-04-20 16:55:21 -04:00
}
out :
2015-11-04 15:44:39 +01:00
amdgpu_cs_parser_fini ( & parser , r , reserved_buffers ) ;
2015-04-20 16:55:21 -04:00
r = amdgpu_cs_handle_lockup ( adev , r ) ;
return r ;
}
/**
* amdgpu_cs_wait_ioctl - wait for a command submission to finish
*
* @ dev : drm device
* @ data : data from userspace
* @ filp : file private
*
* Wait for the command submission identified by handle to finish .
*/
int amdgpu_cs_wait_ioctl ( struct drm_device * dev , void * data ,
struct drm_file * filp )
{
union drm_amdgpu_wait_cs * wait = data ;
struct amdgpu_device * adev = dev - > dev_private ;
unsigned long timeout = amdgpu_gem_timeout ( wait - > in . timeout ) ;
2015-06-19 17:00:19 +02:00
struct amdgpu_ring * ring = NULL ;
2015-05-08 17:29:40 +08:00
struct amdgpu_ctx * ctx ;
2015-07-07 17:24:49 +02:00
struct fence * fence ;
2015-04-20 16:55:21 -04:00
long r ;
2015-07-07 17:24:49 +02:00
r = amdgpu_cs_get_ring ( adev , wait - > in . ip_type , wait - > in . ip_instance ,
wait - > in . ring , & ring ) ;
if ( r )
return r ;
2015-05-08 17:29:40 +08:00
ctx = amdgpu_ctx_get ( filp - > driver_priv , wait - > in . ctx_id ) ;
if ( ctx = = NULL )
return - EINVAL ;
2015-04-20 16:55:21 -04:00
2015-07-21 15:53:04 +08:00
fence = amdgpu_ctx_get_fence ( ctx , ring , wait - > in . handle ) ;
if ( IS_ERR ( fence ) )
r = PTR_ERR ( fence ) ;
else if ( fence ) {
r = fence_wait_timeout ( fence , true , timeout ) ;
fence_put ( fence ) ;
} else
r = 1 ;
2015-07-21 14:36:51 +08:00
2015-05-08 17:29:40 +08:00
amdgpu_ctx_put ( ctx ) ;
2015-04-20 16:55:21 -04:00
if ( r < 0 )
return r ;
memset ( wait , 0 , sizeof ( * wait ) ) ;
wait - > out . status = ( r = = 0 ) ;
return 0 ;
}
/**
* amdgpu_cs_find_bo_va - find bo_va for VM address
*
* @ parser : command submission parser context
* @ addr : VM address
* @ bo : resulting BO of the mapping found
*
* Search the buffer objects in the command submission context for a certain
* virtual memory address . Returns allocation structure when found , NULL
* otherwise .
*/
struct amdgpu_bo_va_mapping *
amdgpu_cs_find_mapping ( struct amdgpu_cs_parser * parser ,
uint64_t addr , struct amdgpu_bo * * bo )
{
struct amdgpu_bo_list_entry * reloc ;
struct amdgpu_bo_va_mapping * mapping ;
addr / = AMDGPU_GPU_PAGE_SIZE ;
list_for_each_entry ( reloc , & parser - > validated , tv . head ) {
if ( ! reloc - > bo_va )
continue ;
2015-07-30 11:53:42 +02:00
list_for_each_entry ( mapping , & reloc - > bo_va - > valids , list ) {
if ( mapping - > it . start > addr | |
addr > mapping - > it . last )
continue ;
* bo = reloc - > bo_va - > bo ;
return mapping ;
}
list_for_each_entry ( mapping , & reloc - > bo_va - > invalids , list ) {
2015-04-20 16:55:21 -04:00
if ( mapping - > it . start > addr | |
addr > mapping - > it . last )
continue ;
* bo = reloc - > bo_va - > bo ;
return mapping ;
}
}
return NULL ;
}