linux/drivers/gpu/drm/i915/gt/intel_timeline.c
Matthew Auld c3b147604f drm/i915: drop the __i915_active_call pointer packing
We use some of the lower bits of the retire function pointer for
potential flags, which is quite thorny, since the caller needs to
remember to give the function the correct alignment with
__i915_active_call, otherwise we might incorrectly unpack the pointer
and jump to some garbage address later. Instead of all this let's just
pass the flags along as a separate parameter.

Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Suggested-by: Daniel Vetter <daniel@ffwll.ch>
References: ca419f407b43 ("drm/i915: Fix crash in auto_retire")
References: d8e44e4dd221 ("drm/i915/overlay: Fix active retire callback alignment")
References: fd5f262db118 ("drm/i915/selftests: Fix active retire callback alignment")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210504164136.96456-1-matthew.auld@intel.com
2021-05-05 11:36:23 +01:00

482 lines
12 KiB
C

// SPDX-License-Identifier: MIT
/*
* Copyright © 2016-2018 Intel Corporation
*/
#include "i915_drv.h"
#include "i915_active.h"
#include "i915_syncmap.h"
#include "intel_gt.h"
#include "intel_ring.h"
#include "intel_timeline.h"
#define TIMELINE_SEQNO_BYTES 8
static struct i915_vma *hwsp_alloc(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
if (IS_ERR(obj))
return ERR_CAST(obj);
i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
if (IS_ERR(vma))
i915_gem_object_put(obj);
return vma;
}
static void __timeline_retire(struct i915_active *active)
{
struct intel_timeline *tl =
container_of(active, typeof(*tl), active);
i915_vma_unpin(tl->hwsp_ggtt);
intel_timeline_put(tl);
}
static int __timeline_active(struct i915_active *active)
{
struct intel_timeline *tl =
container_of(active, typeof(*tl), active);
__i915_vma_pin(tl->hwsp_ggtt);
intel_timeline_get(tl);
return 0;
}
I915_SELFTEST_EXPORT int
intel_timeline_pin_map(struct intel_timeline *timeline)
{
struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj;
u32 ofs = offset_in_page(timeline->hwsp_offset);
void *vaddr;
vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
timeline->hwsp_map = vaddr;
timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES);
clflush(vaddr + ofs);
return 0;
}
static int intel_timeline_init(struct intel_timeline *timeline,
struct intel_gt *gt,
struct i915_vma *hwsp,
unsigned int offset)
{
kref_init(&timeline->kref);
atomic_set(&timeline->pin_count, 0);
timeline->gt = gt;
if (hwsp) {
timeline->hwsp_offset = offset;
timeline->hwsp_ggtt = i915_vma_get(hwsp);
} else {
timeline->has_initial_breadcrumb = true;
hwsp = hwsp_alloc(gt);
if (IS_ERR(hwsp))
return PTR_ERR(hwsp);
timeline->hwsp_ggtt = hwsp;
}
timeline->hwsp_map = NULL;
timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset;
GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
timeline->fence_context = dma_fence_context_alloc(1);
mutex_init(&timeline->mutex);
INIT_ACTIVE_FENCE(&timeline->last_request);
INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync);
i915_active_init(&timeline->active, __timeline_active,
__timeline_retire, 0);
return 0;
}
void intel_gt_init_timelines(struct intel_gt *gt)
{
struct intel_gt_timelines *timelines = &gt->timelines;
spin_lock_init(&timelines->lock);
INIT_LIST_HEAD(&timelines->active_list);
}
static void intel_timeline_fini(struct rcu_head *rcu)
{
struct intel_timeline *timeline =
container_of(rcu, struct intel_timeline, rcu);
if (timeline->hwsp_map)
i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
i915_vma_put(timeline->hwsp_ggtt);
i915_active_fini(&timeline->active);
kfree(timeline);
}
struct intel_timeline *
__intel_timeline_create(struct intel_gt *gt,
struct i915_vma *global_hwsp,
unsigned int offset)
{
struct intel_timeline *timeline;
int err;
timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
if (!timeline)
return ERR_PTR(-ENOMEM);
err = intel_timeline_init(timeline, gt, global_hwsp, offset);
if (err) {
kfree(timeline);
return ERR_PTR(err);
}
return timeline;
}
struct intel_timeline *
intel_timeline_create_from_engine(struct intel_engine_cs *engine,
unsigned int offset)
{
struct i915_vma *hwsp = engine->status_page.vma;
struct intel_timeline *tl;
tl = __intel_timeline_create(engine->gt, hwsp, offset);
if (IS_ERR(tl))
return tl;
/* Borrow a nearby lock; we only create these timelines during init */
mutex_lock(&hwsp->vm->mutex);
list_add_tail(&tl->engine_link, &engine->status_page.timelines);
mutex_unlock(&hwsp->vm->mutex);
return tl;
}
void __intel_timeline_pin(struct intel_timeline *tl)
{
GEM_BUG_ON(!atomic_read(&tl->pin_count));
atomic_inc(&tl->pin_count);
}
int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
{
int err;
if (atomic_add_unless(&tl->pin_count, 1, 0))
return 0;
if (!tl->hwsp_map) {
err = intel_timeline_pin_map(tl);
if (err)
return err;
}
err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH);
if (err)
return err;
tl->hwsp_offset =
i915_ggtt_offset(tl->hwsp_ggtt) +
offset_in_page(tl->hwsp_offset);
GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
tl->fence_context, tl->hwsp_offset);
i915_active_acquire(&tl->active);
if (atomic_fetch_inc(&tl->pin_count)) {
i915_active_release(&tl->active);
__i915_vma_unpin(tl->hwsp_ggtt);
}
return 0;
}
void intel_timeline_reset_seqno(const struct intel_timeline *tl)
{
u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno;
/* Must be pinned to be writable, and no requests in flight. */
GEM_BUG_ON(!atomic_read(&tl->pin_count));
memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno));
WRITE_ONCE(*hwsp_seqno, tl->seqno);
clflush(hwsp_seqno);
}
void intel_timeline_enter(struct intel_timeline *tl)
{
struct intel_gt_timelines *timelines = &tl->gt->timelines;
/*
* Pretend we are serialised by the timeline->mutex.
*
* While generally true, there are a few exceptions to the rule
* for the engine->kernel_context being used to manage power
* transitions. As the engine_park may be called from under any
* timeline, it uses the power mutex as a global serialisation
* lock to prevent any other request entering its timeline.
*
* The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
*
* However, intel_gt_retire_request() does not know which engine
* it is retiring along and so cannot partake in the engine-pm
* barrier, and there we use the tl->active_count as a means to
* pin the timeline in the active_list while the locks are dropped.
* Ergo, as that is outside of the engine-pm barrier, we need to
* use atomic to manipulate tl->active_count.
*/
lockdep_assert_held(&tl->mutex);
if (atomic_add_unless(&tl->active_count, 1, 0))
return;
spin_lock(&timelines->lock);
if (!atomic_fetch_inc(&tl->active_count)) {
/*
* The HWSP is volatile, and may have been lost while inactive,
* e.g. across suspend/resume. Be paranoid, and ensure that
* the HWSP value matches our seqno so we don't proclaim
* the next request as already complete.
*/
intel_timeline_reset_seqno(tl);
list_add_tail(&tl->link, &timelines->active_list);
}
spin_unlock(&timelines->lock);
}
void intel_timeline_exit(struct intel_timeline *tl)
{
struct intel_gt_timelines *timelines = &tl->gt->timelines;
/* See intel_timeline_enter() */
lockdep_assert_held(&tl->mutex);
GEM_BUG_ON(!atomic_read(&tl->active_count));
if (atomic_add_unless(&tl->active_count, -1, 1))
return;
spin_lock(&timelines->lock);
if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
spin_unlock(&timelines->lock);
/*
* Since this timeline is idle, all bariers upon which we were waiting
* must also be complete and so we can discard the last used barriers
* without loss of information.
*/
i915_syncmap_free(&tl->sync);
}
static u32 timeline_advance(struct intel_timeline *tl)
{
GEM_BUG_ON(!atomic_read(&tl->pin_count));
GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
return tl->seqno += 1 + tl->has_initial_breadcrumb;
}
static noinline int
__intel_timeline_get_seqno(struct intel_timeline *tl,
u32 *seqno)
{
u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES);
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5)))
next_ofs = offset_in_page(next_ofs + BIT(5));
tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs;
tl->hwsp_seqno = tl->hwsp_map + next_ofs;
intel_timeline_reset_seqno(tl);
*seqno = timeline_advance(tl);
GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
return 0;
}
int intel_timeline_get_seqno(struct intel_timeline *tl,
struct i915_request *rq,
u32 *seqno)
{
*seqno = timeline_advance(tl);
/* Replace the HWSP on wraparound for HW semaphores */
if (unlikely(!*seqno && tl->has_initial_breadcrumb))
return __intel_timeline_get_seqno(tl, seqno);
return 0;
}
int intel_timeline_read_hwsp(struct i915_request *from,
struct i915_request *to,
u32 *hwsp)
{
struct intel_timeline *tl;
int err;
rcu_read_lock();
tl = rcu_dereference(from->timeline);
if (i915_request_signaled(from) ||
!i915_active_acquire_if_busy(&tl->active))
tl = NULL;
if (tl) {
/* hwsp_offset may wraparound, so use from->hwsp_seqno */
*hwsp = i915_ggtt_offset(tl->hwsp_ggtt) +
offset_in_page(from->hwsp_seqno);
}
/* ensure we wait on the right request, if not, we completed */
if (tl && __i915_request_is_complete(from)) {
i915_active_release(&tl->active);
tl = NULL;
}
rcu_read_unlock();
if (!tl)
return 1;
/* Can't do semaphore waits on kernel context */
if (!tl->has_initial_breadcrumb) {
err = -EINVAL;
goto out;
}
err = i915_active_add_request(&tl->active, to);
out:
i915_active_release(&tl->active);
return err;
}
void intel_timeline_unpin(struct intel_timeline *tl)
{
GEM_BUG_ON(!atomic_read(&tl->pin_count));
if (!atomic_dec_and_test(&tl->pin_count))
return;
i915_active_release(&tl->active);
__i915_vma_unpin(tl->hwsp_ggtt);
}
void __intel_timeline_free(struct kref *kref)
{
struct intel_timeline *timeline =
container_of(kref, typeof(*timeline), kref);
GEM_BUG_ON(atomic_read(&timeline->pin_count));
GEM_BUG_ON(!list_empty(&timeline->requests));
GEM_BUG_ON(timeline->retire);
call_rcu(&timeline->rcu, intel_timeline_fini);
}
void intel_gt_fini_timelines(struct intel_gt *gt)
{
struct intel_gt_timelines *timelines = &gt->timelines;
GEM_BUG_ON(!list_empty(&timelines->active_list));
}
void intel_gt_show_timelines(struct intel_gt *gt,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
const struct i915_request *rq,
const char *prefix,
int indent))
{
struct intel_gt_timelines *timelines = &gt->timelines;
struct intel_timeline *tl, *tn;
LIST_HEAD(free);
spin_lock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
unsigned long count, ready, inflight;
struct i915_request *rq, *rn;
struct dma_fence *fence;
if (!mutex_trylock(&tl->mutex)) {
drm_printf(m, "Timeline %llx: busy; skipping\n",
tl->fence_context);
continue;
}
intel_timeline_get(tl);
GEM_BUG_ON(!atomic_read(&tl->active_count));
atomic_inc(&tl->active_count); /* pin the list element */
spin_unlock(&timelines->lock);
count = 0;
ready = 0;
inflight = 0;
list_for_each_entry_safe(rq, rn, &tl->requests, link) {
if (i915_request_completed(rq))
continue;
count++;
if (i915_request_is_ready(rq))
ready++;
if (i915_request_is_active(rq))
inflight++;
}
drm_printf(m, "Timeline %llx: { ", tl->fence_context);
drm_printf(m, "count: %lu, ready: %lu, inflight: %lu",
count, ready, inflight);
drm_printf(m, ", seqno: { current: %d, last: %d }",
*tl->hwsp_seqno, tl->seqno);
fence = i915_active_fence_get(&tl->last_request);
if (fence) {
drm_printf(m, ", engine: %s",
to_request(fence)->engine->name);
dma_fence_put(fence);
}
drm_printf(m, " }\n");
if (show_request) {
list_for_each_entry_safe(rq, rn, &tl->requests, link)
show_request(m, rq, "", 2);
}
mutex_unlock(&tl->mutex);
spin_lock(&timelines->lock);
/* Resume list iteration after reacquiring spinlock */
list_safe_reset_next(tl, tn, link);
if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
/* Defer the final release to after the spinlock */
if (refcount_dec_and_test(&tl->kref.refcount)) {
GEM_BUG_ON(atomic_read(&tl->active_count));
list_add(&tl->link, &free);
}
}
spin_unlock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &free, link)
__intel_timeline_free(&tl->kref);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "gt/selftests/mock_timeline.c"
#include "gt/selftest_timeline.c"
#endif