drm/amdgpu: add timeline support in amdgpu CS v3
syncobj wait/signal operation is appending in command submission. v2: separate to two kinds in/out_deps functions v3: fix checking for timeline syncobj Signed-off-by: Chunming Zhou <david1.zhou@amd.com> Cc: Tobias Hector <Tobias.Hector@amd.com> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
ecc4946f11
commit
2624dd154b
@ -436,6 +436,12 @@ struct amdgpu_cs_chunk {
|
||||
void *kdata;
|
||||
};
|
||||
|
||||
struct amdgpu_cs_post_dep {
|
||||
struct drm_syncobj *syncobj;
|
||||
struct dma_fence_chain *chain;
|
||||
u64 point;
|
||||
};
|
||||
|
||||
struct amdgpu_cs_parser {
|
||||
struct amdgpu_device *adev;
|
||||
struct drm_file *filp;
|
||||
@ -465,8 +471,8 @@ struct amdgpu_cs_parser {
|
||||
/* user fence */
|
||||
struct amdgpu_bo_list_entry uf_entry;
|
||||
|
||||
unsigned num_post_dep_syncobjs;
|
||||
struct drm_syncobj **post_dep_syncobjs;
|
||||
unsigned num_post_deps;
|
||||
struct amdgpu_cs_post_dep *post_deps;
|
||||
};
|
||||
|
||||
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
|
||||
|
@ -215,6 +215,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
|
||||
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -804,9 +806,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
|
||||
ttm_eu_backoff_reservation(&parser->ticket,
|
||||
&parser->validated);
|
||||
|
||||
for (i = 0; i < parser->num_post_dep_syncobjs; i++)
|
||||
drm_syncobj_put(parser->post_dep_syncobjs[i]);
|
||||
kfree(parser->post_dep_syncobjs);
|
||||
for (i = 0; i < parser->num_post_deps; i++) {
|
||||
drm_syncobj_put(parser->post_deps[i].syncobj);
|
||||
kfree(parser->post_deps[i].chain);
|
||||
}
|
||||
kfree(parser->post_deps);
|
||||
|
||||
dma_fence_put(parser->fence);
|
||||
|
||||
@ -1117,13 +1121,18 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
|
||||
}
|
||||
|
||||
static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
|
||||
uint32_t handle)
|
||||
uint32_t handle, u64 point,
|
||||
u64 flags)
|
||||
{
|
||||
int r;
|
||||
struct dma_fence *fence;
|
||||
r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence);
|
||||
if (r)
|
||||
int r;
|
||||
|
||||
r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
|
||||
handle, point, r);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
|
||||
dma_fence_put(fence);
|
||||
@ -1134,46 +1143,118 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
|
||||
static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
|
||||
struct amdgpu_cs_chunk *chunk)
|
||||
{
|
||||
struct drm_amdgpu_cs_chunk_sem *deps;
|
||||
unsigned num_deps;
|
||||
int i, r;
|
||||
struct drm_amdgpu_cs_chunk_sem *deps;
|
||||
|
||||
deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
sizeof(struct drm_amdgpu_cs_chunk_sem);
|
||||
|
||||
for (i = 0; i < num_deps; ++i) {
|
||||
r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
|
||||
r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
|
||||
struct amdgpu_cs_chunk *chunk)
|
||||
{
|
||||
struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
|
||||
unsigned num_deps;
|
||||
int i, r;
|
||||
|
||||
syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
sizeof(struct drm_amdgpu_cs_chunk_syncobj);
|
||||
for (i = 0; i < num_deps; ++i) {
|
||||
r = amdgpu_syncobj_lookup_and_add_to_sync(p,
|
||||
syncobj_deps[i].handle,
|
||||
syncobj_deps[i].point,
|
||||
syncobj_deps[i].flags);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
|
||||
struct amdgpu_cs_chunk *chunk)
|
||||
{
|
||||
struct drm_amdgpu_cs_chunk_sem *deps;
|
||||
unsigned num_deps;
|
||||
int i;
|
||||
struct drm_amdgpu_cs_chunk_sem *deps;
|
||||
|
||||
deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
sizeof(struct drm_amdgpu_cs_chunk_sem);
|
||||
|
||||
p->post_dep_syncobjs = kmalloc_array(num_deps,
|
||||
sizeof(struct drm_syncobj *),
|
||||
p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
|
||||
GFP_KERNEL);
|
||||
p->num_post_dep_syncobjs = 0;
|
||||
p->num_post_deps = 0;
|
||||
|
||||
if (!p->post_dep_syncobjs)
|
||||
if (!p->post_deps)
|
||||
return -ENOMEM;
|
||||
|
||||
|
||||
for (i = 0; i < num_deps; ++i) {
|
||||
p->post_deps[i].syncobj =
|
||||
drm_syncobj_find(p->filp, deps[i].handle);
|
||||
if (!p->post_deps[i].syncobj)
|
||||
return -EINVAL;
|
||||
p->post_deps[i].chain = NULL;
|
||||
p->post_deps[i].point = 0;
|
||||
p->num_post_deps++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
|
||||
struct amdgpu_cs_chunk
|
||||
*chunk)
|
||||
{
|
||||
struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
|
||||
unsigned num_deps;
|
||||
int i;
|
||||
|
||||
syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
sizeof(struct drm_amdgpu_cs_chunk_syncobj);
|
||||
|
||||
p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
|
||||
GFP_KERNEL);
|
||||
p->num_post_deps = 0;
|
||||
|
||||
if (!p->post_deps)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < num_deps; ++i) {
|
||||
p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
|
||||
if (!p->post_dep_syncobjs[i])
|
||||
return -EINVAL;
|
||||
p->num_post_dep_syncobjs++;
|
||||
struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
|
||||
|
||||
dep->chain = NULL;
|
||||
if (syncobj_deps[i].point) {
|
||||
dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
|
||||
if (!dep->chain)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dep->syncobj = drm_syncobj_find(p->filp,
|
||||
syncobj_deps[i].handle);
|
||||
if (!dep->syncobj) {
|
||||
kfree(dep->chain);
|
||||
return -EINVAL;
|
||||
}
|
||||
dep->point = syncobj_deps[i].point;
|
||||
p->num_post_deps++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1187,19 +1268,33 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
|
||||
|
||||
chunk = &p->chunks[i];
|
||||
|
||||
if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES ||
|
||||
chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
|
||||
switch (chunk->chunk_id) {
|
||||
case AMDGPU_CHUNK_ID_DEPENDENCIES:
|
||||
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
|
||||
r = amdgpu_cs_process_fence_dep(p, chunk);
|
||||
if (r)
|
||||
return r;
|
||||
} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
|
||||
break;
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
|
||||
r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
|
||||
if (r)
|
||||
return r;
|
||||
} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
|
||||
break;
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
|
||||
r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
|
||||
if (r)
|
||||
return r;
|
||||
break;
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
|
||||
r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
|
||||
if (r)
|
||||
return r;
|
||||
break;
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
|
||||
r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
|
||||
if (r)
|
||||
return r;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1210,8 +1305,17 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < p->num_post_dep_syncobjs; ++i)
|
||||
drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
|
||||
for (i = 0; i < p->num_post_deps; ++i) {
|
||||
if (p->post_deps[i].chain && p->post_deps[i].point) {
|
||||
drm_syncobj_add_point(p->post_deps[i].syncobj,
|
||||
p->post_deps[i].chain,
|
||||
p->fence, p->post_deps[i].point);
|
||||
p->post_deps[i].chain = NULL;
|
||||
} else {
|
||||
drm_syncobj_replace_fence(p->post_deps[i].syncobj,
|
||||
p->fence);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||
|
@ -528,6 +528,8 @@ struct drm_amdgpu_gem_va {
|
||||
#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05
|
||||
#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06
|
||||
#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07
|
||||
#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08
|
||||
#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09
|
||||
|
||||
struct drm_amdgpu_cs_chunk {
|
||||
__u32 chunk_id;
|
||||
@ -608,6 +610,12 @@ struct drm_amdgpu_cs_chunk_sem {
|
||||
__u32 handle;
|
||||
};
|
||||
|
||||
struct drm_amdgpu_cs_chunk_syncobj {
|
||||
__u32 handle;
|
||||
__u32 flags;
|
||||
__u64 point;
|
||||
};
|
||||
|
||||
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0
|
||||
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1
|
||||
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2
|
||||
|
Loading…
x
Reference in New Issue
Block a user