drm/i915: Only wait on a pending flip if we intend to write to the buffer
... as if we are only reading from it, we can do that concurrently with the queue flip. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
parent
3d3dc149ed
commit
c59a333f73
@ -37,6 +37,7 @@ struct change_domains {
|
|||||||
uint32_t invalidate_domains;
|
uint32_t invalidate_domains;
|
||||||
uint32_t flush_domains;
|
uint32_t flush_domains;
|
||||||
uint32_t flush_rings;
|
uint32_t flush_rings;
|
||||||
|
uint32_t flips;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -190,6 +191,9 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
|
|||||||
if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT)
|
if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT)
|
||||||
i915_gem_release_mmap(obj);
|
i915_gem_release_mmap(obj);
|
||||||
|
|
||||||
|
if (obj->base.pending_write_domain)
|
||||||
|
cd->flips |= atomic_read(&obj->pending_flip);
|
||||||
|
|
||||||
/* The actual obj->write_domain will be updated with
|
/* The actual obj->write_domain will be updated with
|
||||||
* pending_write_domain after we emit the accumulated flush for all
|
* pending_write_domain after we emit the accumulated flush for all
|
||||||
* of our domain changes in execbuffers (which clears objects'
|
* of our domain changes in execbuffers (which clears objects'
|
||||||
@ -773,6 +777,39 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
|
|||||||
return intel_ring_sync(to, from, seqno - 1);
|
return intel_ring_sync(to, from, seqno - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
|
||||||
|
{
|
||||||
|
u32 plane, flip_mask;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* Check for any pending flips. As we only maintain a flip queue depth
|
||||||
|
* of 1, we can simply insert a WAIT for the next display flip prior
|
||||||
|
* to executing the batch and avoid stalling the CPU.
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (plane = 0; flips >> plane; plane++) {
|
||||||
|
if (((flips >> plane) & 1) == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (plane)
|
||||||
|
flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
|
||||||
|
else
|
||||||
|
flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
|
||||||
|
|
||||||
|
ret = intel_ring_begin(ring, 2);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
|
||||||
|
intel_ring_emit(ring, MI_NOOP);
|
||||||
|
intel_ring_advance(ring);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
|
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
|
||||||
struct list_head *objects)
|
struct list_head *objects)
|
||||||
@ -781,9 +818,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
|
|||||||
struct change_domains cd;
|
struct change_domains cd;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
cd.invalidate_domains = 0;
|
memset(&cd, 0, sizeof(cd));
|
||||||
cd.flush_domains = 0;
|
|
||||||
cd.flush_rings = 0;
|
|
||||||
list_for_each_entry(obj, objects, exec_list)
|
list_for_each_entry(obj, objects, exec_list)
|
||||||
i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
|
i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
|
||||||
|
|
||||||
@ -796,6 +831,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cd.flips) {
|
||||||
|
ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
list_for_each_entry(obj, objects, exec_list) {
|
list_for_each_entry(obj, objects, exec_list) {
|
||||||
ret = i915_gem_execbuffer_sync_rings(obj, ring);
|
ret = i915_gem_execbuffer_sync_rings(obj, ring);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -842,47 +883,6 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring,
|
|
||||||
struct list_head *objects)
|
|
||||||
{
|
|
||||||
struct drm_i915_gem_object *obj;
|
|
||||||
int flips;
|
|
||||||
|
|
||||||
/* Check for any pending flips. As we only maintain a flip queue depth
|
|
||||||
* of 1, we can simply insert a WAIT for the next display flip prior
|
|
||||||
* to executing the batch and avoid stalling the CPU.
|
|
||||||
*/
|
|
||||||
flips = 0;
|
|
||||||
list_for_each_entry(obj, objects, exec_list) {
|
|
||||||
if (obj->base.write_domain)
|
|
||||||
flips |= atomic_read(&obj->pending_flip);
|
|
||||||
}
|
|
||||||
if (flips) {
|
|
||||||
int plane, flip_mask, ret;
|
|
||||||
|
|
||||||
for (plane = 0; flips >> plane; plane++) {
|
|
||||||
if (((flips >> plane) & 1) == 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (plane)
|
|
||||||
flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
|
|
||||||
else
|
|
||||||
flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
|
|
||||||
|
|
||||||
ret = intel_ring_begin(ring, 2);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
|
|
||||||
intel_ring_emit(ring, MI_NOOP);
|
|
||||||
intel_ring_advance(ring);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
i915_gem_execbuffer_move_to_active(struct list_head *objects,
|
i915_gem_execbuffer_move_to_active(struct list_head *objects,
|
||||||
struct intel_ring_buffer *ring,
|
struct intel_ring_buffer *ring,
|
||||||
@ -1133,10 +1133,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
ret = i915_gem_execbuffer_wait_for_flips(ring, &objects);
|
|
||||||
if (ret)
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
seqno = i915_gem_next_request_seqno(ring);
|
seqno = i915_gem_next_request_seqno(ring);
|
||||||
for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
|
for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
|
||||||
if (seqno < ring->sync_seqno[i]) {
|
if (seqno < ring->sync_seqno[i]) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user