drm/doc/rfc: i915 new parallel submission uAPI plan
Add entry for i915 new parallel submission uAPI plan. v2: (Daniel Vetter): - Expand logical order explaination - Add dummy header - Only allow N BBs in execbuf IOCTL - Configure parallel submission per slot not per gem context v3: (Marcin Ślusarz): - Lot's of typos / bad english fixed (Tvrtko Ursulin): - Consistent pseudo code, clean up wording in descriptions v4: (Daniel Vetter) - Drop flags - Add kernel doc - Reword a few things / fix typos (Tvrtko) - Reword a few things / fix typos v5: (Checkpatch) - Fix typos (Docs) - Fix warning Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Tony Ye <tony.ye@intel.com> CC: Carl Zhang <carl.zhang@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Jason Ekstrand <jason@jlekstrand.net> Signed-off-by: Matthew Brost <matthew.brost@intel.com> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Acked-by: Tony Ye <tony.ye@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20210629193511.124099-3-matthew.brost@intel.com
This commit is contained in:
parent
f587623b78
commit
0454a490bd
122
Documentation/gpu/rfc/i915_parallel_execbuf.h
Normal file
122
Documentation/gpu/rfc/i915_parallel_execbuf.h
Normal file
@ -0,0 +1,122 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright © 2021 Intel Corporation
|
||||
*/
|
||||
|
||||
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
|
||||
|
||||
/**
|
||||
* struct drm_i915_context_engines_parallel_submit - Configure engine for
|
||||
* parallel submission.
|
||||
*
|
||||
* Setup a slot in the context engine map to allow multiple BBs to be submitted
|
||||
* in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
|
||||
* in parallel. Multiple hardware contexts are created internally in the i915
|
||||
* run these BBs. Once a slot is configured for N BBs only N BBs can be
|
||||
* submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
|
||||
* doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
|
||||
* many BBs there are based on the slot's configuration. The N BBs are the last
|
||||
* N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
|
||||
*
|
||||
* The default placement behavior is to create implicit bonds between each
|
||||
* context if each context maps to more than 1 physical engine (e.g. context is
|
||||
* a virtual engine). Also we only allow contexts of same engine class and these
|
||||
* contexts must be in logically contiguous order. Examples of the placement
|
||||
* behavior described below. Lastly, the default is to not allow BBs to
|
||||
* preempted mid BB rather insert coordinated preemption on all hardware
|
||||
* contexts between each set of BBs. Flags may be added in the future to change
|
||||
* both of these default behaviors.
|
||||
*
|
||||
* Returns -EINVAL if hardware context placement configuration is invalid or if
|
||||
* the placement configuration isn't supported on the platform / submission
|
||||
* interface.
|
||||
* Returns -ENODEV if extension isn't supported on the platform / submission
|
||||
* interface.
|
||||
*
|
||||
* .. code-block:: none
|
||||
*
|
||||
* Example 1 pseudo code:
|
||||
* CS[X] = generic engine of same class, logical instance X
|
||||
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
|
||||
* set_engines(INVALID)
|
||||
* set_parallel(engine_index=0, width=2, num_siblings=1,
|
||||
* engines=CS[0],CS[1])
|
||||
*
|
||||
* Results in the following valid placement:
|
||||
* CS[0], CS[1]
|
||||
*
|
||||
* Example 2 pseudo code:
|
||||
* CS[X] = generic engine of same class, logical instance X
|
||||
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
|
||||
* set_engines(INVALID)
|
||||
* set_parallel(engine_index=0, width=2, num_siblings=2,
|
||||
* engines=CS[0],CS[2],CS[1],CS[3])
|
||||
*
|
||||
* Results in the following valid placements:
|
||||
* CS[0], CS[1]
|
||||
* CS[2], CS[3]
|
||||
*
|
||||
* This can also be thought of as 2 virtual engines described by 2-D array
|
||||
* in the engines the field with bonds placed between each index of the
|
||||
* virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to
|
||||
* CS[3].
|
||||
* VE[0] = CS[0], CS[2]
|
||||
* VE[1] = CS[1], CS[3]
|
||||
*
|
||||
* Example 3 pseudo code:
|
||||
* CS[X] = generic engine of same class, logical instance X
|
||||
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
|
||||
* set_engines(INVALID)
|
||||
* set_parallel(engine_index=0, width=2, num_siblings=2,
|
||||
* engines=CS[0],CS[1],CS[1],CS[3])
|
||||
*
|
||||
* Results in the following valid and invalid placements:
|
||||
* CS[0], CS[1]
|
||||
* CS[1], CS[3] - Not logical contiguous, return -EINVAL
|
||||
*/
|
||||
struct drm_i915_context_engines_parallel_submit {
|
||||
/**
|
||||
* @base: base user extension.
|
||||
*/
|
||||
struct i915_user_extension base;
|
||||
|
||||
/**
|
||||
* @engine_index: slot for parallel engine
|
||||
*/
|
||||
__u16 engine_index;
|
||||
|
||||
/**
|
||||
* @width: number of contexts per parallel engine
|
||||
*/
|
||||
__u16 width;
|
||||
|
||||
/**
|
||||
* @num_siblings: number of siblings per context
|
||||
*/
|
||||
__u16 num_siblings;
|
||||
|
||||
/**
|
||||
* @mbz16: reserved for future use; must be zero
|
||||
*/
|
||||
__u16 mbz16;
|
||||
|
||||
/**
|
||||
* @flags: all undefined flags must be zero, currently not defined flags
|
||||
*/
|
||||
__u64 flags;
|
||||
|
||||
/**
|
||||
* @mbz64: reserved for future use; must be zero
|
||||
*/
|
||||
__u64 mbz64[3];
|
||||
|
||||
/**
|
||||
* @engines: 2-d array of engine instances to configure parallel engine
|
||||
*
|
||||
* length = width (i) * num_siblings (j)
|
||||
* index = j + i * num_siblings
|
||||
*/
|
||||
struct i915_engine_class_instance engines[0];
|
||||
|
||||
} __packed;
|
||||
|
@ -88,4 +88,61 @@ Spec references:
|
||||
|
||||
New parallel submission uAPI
|
||||
============================
|
||||
Details to come in a following patch.
|
||||
The existing bonding uAPI is completely broken with GuC submission because
|
||||
whether a submission is a single context submit or parallel submit isn't known
|
||||
until execbuf time activated via the I915_SUBMIT_FENCE. To submit multiple
|
||||
contexts in parallel with the GuC the context must be explicitly registered with
|
||||
N contexts and all N contexts must be submitted in a single command to the GuC.
|
||||
The GuC interfaces do not support dynamically changing between N contexts as the
|
||||
bonding uAPI does. Hence the need for a new parallel submission interface. Also
|
||||
the legacy bonding uAPI is quite confusing and not intuitive at all. Furthermore
|
||||
I915_SUBMIT_FENCE is by design a future fence, so not really something we should
|
||||
continue to support.
|
||||
|
||||
The new parallel submission uAPI consists of 3 parts:
|
||||
|
||||
* Export engines logical mapping
|
||||
* A 'set_parallel' extension to configure contexts for parallel
|
||||
submission
|
||||
* Extend execbuf2 IOCTL to support submitting N BBs in a single IOCTL
|
||||
|
||||
Export engines logical mapping
|
||||
------------------------------
|
||||
Certain use cases require BBs to be placed on engine instances in logical order
|
||||
(e.g. split-frame on gen11+). The logical mapping of engine instances can change
|
||||
based on fusing. Rather than making UMDs be aware of fusing, simply expose the
|
||||
logical mapping with the existing query engine info IOCTL. Also the GuC
|
||||
submission interface currently only supports submitting multiple contexts to
|
||||
engines in logical order which is a new requirement compared to execlists.
|
||||
Lastly, all current platforms have at most 2 engine instances and the logical
|
||||
order is the same as uAPI order. This will change on platforms with more than 2
|
||||
engine instances.
|
||||
|
||||
A single bit will be added to drm_i915_engine_info.flags indicating that the
|
||||
logical instance has been returned and a new field,
|
||||
drm_i915_engine_info.logical_instance, returns the logical instance.
|
||||
|
||||
A 'set_parallel' extension to configure contexts for parallel submission
|
||||
------------------------------------------------------------------------
|
||||
The 'set_parallel' extension configures a slot for parallel submission of N BBs.
|
||||
It is a setup step that must be called before using any of the contexts. See
|
||||
I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE or I915_CONTEXT_ENGINES_EXT_BOND for
|
||||
similar existing examples. Once a slot is configured for parallel submission the
|
||||
execbuf2 IOCTL can be called submitting N BBs in a single IOCTL. Initially only
|
||||
supports GuC submission. Execlists supports can be added later if needed.
|
||||
|
||||
Add I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT and
|
||||
drm_i915_context_engines_parallel_submit to the uAPI to implement this
|
||||
extension.
|
||||
|
||||
.. kernel-doc:: Documentation/gpu/rfc/i915_parallel_execbuf.h
|
||||
:functions: drm_i915_context_engines_parallel_submit
|
||||
|
||||
Extend execbuf2 IOCTL to support submitting N BBs in a single IOCTL
|
||||
-------------------------------------------------------------------
|
||||
Contexts that have been configured with the 'set_parallel' extension can only
|
||||
submit N BBs in a single execbuf2 IOCTL. The BBs are either the last N objects
|
||||
in the drm_i915_gem_exec_object2 list or the first N if I915_EXEC_BATCH_FIRST is
|
||||
set. The number of BBs is implicit based on the slot submitted and how it has
|
||||
been configured by 'set_parallel' or other extensions. No uAPI changes are
|
||||
required to the execbuf2 IOCTL.
|
||||
|
Loading…
x
Reference in New Issue
Block a user