drm/i915/guc: Insert submit fences between requests in parent-child relationship
The GuC must receive requests in the order submitted for contexts in a parent-child relationship to function correctly. To ensure this, insert a submit fence between the current request and last request submitted for requests / contexts in a parent child relationship. This is conceptually similar to a single timeline. Signed-off-by: Matthew Brost <matthew.brost@intel.com> Cc: John Harrison <John.C.Harrison@Intel.com> Reviewed-by: John Harrison <John.C.Harrison@Intel.com> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211014172005.27155-14-matthew.brost@intel.com
This commit is contained in:
parent
6b540bf6f1
commit
bc95520491
@ -77,6 +77,11 @@ intel_context_to_parent(struct intel_context *ce)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool intel_context_is_parallel(struct intel_context *ce)
|
||||||
|
{
|
||||||
|
return intel_context_is_child(ce) || intel_context_is_parent(ce);
|
||||||
|
}
|
||||||
|
|
||||||
void intel_context_bind_parent_child(struct intel_context *parent,
|
void intel_context_bind_parent_child(struct intel_context *parent,
|
||||||
struct intel_context *child);
|
struct intel_context *child);
|
||||||
|
|
||||||
|
@ -237,6 +237,12 @@ struct intel_context {
|
|||||||
};
|
};
|
||||||
/** @parent: pointer to parent if child */
|
/** @parent: pointer to parent if child */
|
||||||
struct intel_context *parent;
|
struct intel_context *parent;
|
||||||
|
/**
|
||||||
|
* @last_rq: last request submitted on a parallel context, used
|
||||||
|
* to insert submit fences between requests in the parallel
|
||||||
|
* context
|
||||||
|
*/
|
||||||
|
struct i915_request *last_rq;
|
||||||
/** @number_children: number of children if parent */
|
/** @number_children: number of children if parent */
|
||||||
u8 number_children;
|
u8 number_children;
|
||||||
/** @guc: GuC specific members for parallel submission */
|
/** @guc: GuC specific members for parallel submission */
|
||||||
|
@ -684,8 +684,7 @@ static inline int rq_prio(const struct i915_request *rq)
|
|||||||
|
|
||||||
static bool is_multi_lrc_rq(struct i915_request *rq)
|
static bool is_multi_lrc_rq(struct i915_request *rq)
|
||||||
{
|
{
|
||||||
return intel_context_is_child(rq->context) ||
|
return intel_context_is_parallel(rq->context);
|
||||||
intel_context_is_parent(rq->context);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool can_merge_rq(struct i915_request *rq,
|
static bool can_merge_rq(struct i915_request *rq,
|
||||||
@ -2873,6 +2872,8 @@ static void guc_parent_context_unpin(struct intel_context *ce)
|
|||||||
GEM_BUG_ON(!intel_context_is_parent(ce));
|
GEM_BUG_ON(!intel_context_is_parent(ce));
|
||||||
GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
|
GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
|
||||||
|
|
||||||
|
if (ce->parallel.last_rq)
|
||||||
|
i915_request_put(ce->parallel.last_rq);
|
||||||
unpin_guc_id(guc, ce);
|
unpin_guc_id(guc, ce);
|
||||||
lrc_unpin(ce);
|
lrc_unpin(ce);
|
||||||
}
|
}
|
||||||
|
@ -1549,6 +1549,91 @@ i915_request_await_object(struct i915_request *to,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool is_parallel_rq(struct i915_request *rq)
|
||||||
|
{
|
||||||
|
return intel_context_is_parallel(rq->context);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct intel_context *request_to_parent(struct i915_request *rq)
|
||||||
|
{
|
||||||
|
return intel_context_to_parent(rq->context);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct i915_request *
|
||||||
|
__i915_request_ensure_parallel_ordering(struct i915_request *rq,
|
||||||
|
struct intel_timeline *timeline)
|
||||||
|
{
|
||||||
|
struct i915_request *prev;
|
||||||
|
|
||||||
|
GEM_BUG_ON(!is_parallel_rq(rq));
|
||||||
|
|
||||||
|
prev = request_to_parent(rq)->parallel.last_rq;
|
||||||
|
if (prev) {
|
||||||
|
if (!__i915_request_is_complete(prev)) {
|
||||||
|
i915_sw_fence_await_sw_fence(&rq->submit,
|
||||||
|
&prev->submit,
|
||||||
|
&rq->submitq);
|
||||||
|
|
||||||
|
if (rq->engine->sched_engine->schedule)
|
||||||
|
__i915_sched_node_add_dependency(&rq->sched,
|
||||||
|
&prev->sched,
|
||||||
|
&rq->dep,
|
||||||
|
0);
|
||||||
|
}
|
||||||
|
i915_request_put(prev);
|
||||||
|
}
|
||||||
|
|
||||||
|
request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
|
||||||
|
|
||||||
|
return to_request(__i915_active_fence_set(&timeline->last_request,
|
||||||
|
&rq->fence));
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct i915_request *
|
||||||
|
__i915_request_ensure_ordering(struct i915_request *rq,
|
||||||
|
struct intel_timeline *timeline)
|
||||||
|
{
|
||||||
|
struct i915_request *prev;
|
||||||
|
|
||||||
|
GEM_BUG_ON(is_parallel_rq(rq));
|
||||||
|
|
||||||
|
prev = to_request(__i915_active_fence_set(&timeline->last_request,
|
||||||
|
&rq->fence));
|
||||||
|
|
||||||
|
if (prev && !__i915_request_is_complete(prev)) {
|
||||||
|
bool uses_guc = intel_engine_uses_guc(rq->engine);
|
||||||
|
bool pow2 = is_power_of_2(READ_ONCE(prev->engine)->mask |
|
||||||
|
rq->engine->mask);
|
||||||
|
bool same_context = prev->context == rq->context;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The requests are supposed to be kept in order. However,
|
||||||
|
* we need to be wary in case the timeline->last_request
|
||||||
|
* is used as a barrier for external modification to this
|
||||||
|
* context.
|
||||||
|
*/
|
||||||
|
GEM_BUG_ON(same_context &&
|
||||||
|
i915_seqno_passed(prev->fence.seqno,
|
||||||
|
rq->fence.seqno));
|
||||||
|
|
||||||
|
if ((same_context && uses_guc) || (!uses_guc && pow2))
|
||||||
|
i915_sw_fence_await_sw_fence(&rq->submit,
|
||||||
|
&prev->submit,
|
||||||
|
&rq->submitq);
|
||||||
|
else
|
||||||
|
__i915_sw_fence_await_dma_fence(&rq->submit,
|
||||||
|
&prev->fence,
|
||||||
|
&rq->dmaq);
|
||||||
|
if (rq->engine->sched_engine->schedule)
|
||||||
|
__i915_sched_node_add_dependency(&rq->sched,
|
||||||
|
&prev->sched,
|
||||||
|
&rq->dep,
|
||||||
|
0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return prev;
|
||||||
|
}
|
||||||
|
|
||||||
static struct i915_request *
|
static struct i915_request *
|
||||||
__i915_request_add_to_timeline(struct i915_request *rq)
|
__i915_request_add_to_timeline(struct i915_request *rq)
|
||||||
{
|
{
|
||||||
@ -1574,38 +1659,21 @@ __i915_request_add_to_timeline(struct i915_request *rq)
|
|||||||
* complete (to maximise our greedy late load balancing) and this
|
* complete (to maximise our greedy late load balancing) and this
|
||||||
* precludes optimising to use semaphores serialisation of a single
|
* precludes optimising to use semaphores serialisation of a single
|
||||||
* timeline across engines.
|
* timeline across engines.
|
||||||
|
*
|
||||||
|
* We do not order parallel submission requests on the timeline as each
|
||||||
|
* parallel submission context has its own timeline and the ordering
|
||||||
|
* rules for parallel requests are that they must be submitted in the
|
||||||
|
* order received from the execbuf IOCTL. So rather than using the
|
||||||
|
* timeline we store a pointer to last request submitted in the
|
||||||
|
* relationship in the gem context and insert a submission fence
|
||||||
|
* between that request and request passed into this function or
|
||||||
|
* alternatively we use completion fence if gem context has a single
|
||||||
|
* timeline and this is the first submission of an execbuf IOCTL.
|
||||||
*/
|
*/
|
||||||
prev = to_request(__i915_active_fence_set(&timeline->last_request,
|
if (likely(!is_parallel_rq(rq)))
|
||||||
&rq->fence));
|
prev = __i915_request_ensure_ordering(rq, timeline);
|
||||||
if (prev && !__i915_request_is_complete(prev)) {
|
else
|
||||||
bool uses_guc = intel_engine_uses_guc(rq->engine);
|
prev = __i915_request_ensure_parallel_ordering(rq, timeline);
|
||||||
|
|
||||||
/*
|
|
||||||
* The requests are supposed to be kept in order. However,
|
|
||||||
* we need to be wary in case the timeline->last_request
|
|
||||||
* is used as a barrier for external modification to this
|
|
||||||
* context.
|
|
||||||
*/
|
|
||||||
GEM_BUG_ON(prev->context == rq->context &&
|
|
||||||
i915_seqno_passed(prev->fence.seqno,
|
|
||||||
rq->fence.seqno));
|
|
||||||
|
|
||||||
if ((!uses_guc &&
|
|
||||||
is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) ||
|
|
||||||
(uses_guc && prev->context == rq->context))
|
|
||||||
i915_sw_fence_await_sw_fence(&rq->submit,
|
|
||||||
&prev->submit,
|
|
||||||
&rq->submitq);
|
|
||||||
else
|
|
||||||
__i915_sw_fence_await_dma_fence(&rq->submit,
|
|
||||||
&prev->fence,
|
|
||||||
&rq->dmaq);
|
|
||||||
if (rq->engine->sched_engine->schedule)
|
|
||||||
__i915_sched_node_add_dependency(&rq->sched,
|
|
||||||
&prev->sched,
|
|
||||||
&rq->dep,
|
|
||||||
0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure that no request gazumped us - if it was allocated after
|
* Make sure that no request gazumped us - if it was allocated after
|
||||||
|
Loading…
x
Reference in New Issue
Block a user