The major drawback of commit 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA") is that it disables RC6 while Skylake (and friends) is active, and we do not consider the GPU idle until all outstanding requests have been retired and the engine switched over to the kernel context. If userspace is idle, this task falls onto our background idle worker, which only runs roughly once a second, meaning that userspace has to have been idle for a couple of seconds before we enable RC6 again. Naturally, this causes us to consume considerably more energy than before as powersaving is effectively disabled while a display server (here's looking at you Xorg) is running. As execlists will get a completion event as each context is completed, we can use this interrupt to queue a retire worker bound to this engine to cleanup idle timelines. We will then immediately notice the idle engine (without userspace intervention or the aid of the background retire worker) and start parking the GPU. Thus during light workloads, we will do much more work to idle the GPU faster... Hopefully with commensurate power saving! v2: Watch context completions and only look at those local to the engine when retiring to reduce the amount of excess work we perform. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112315 References: 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA") References: 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191125105858.1718307-3-chris@chris-wilson.co.uk
218 lines
5.3 KiB
C
218 lines
5.3 KiB
C
/*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
* Copyright © 2019 Intel Corporation
|
|
*/
|
|
|
|
#include <linux/workqueue.h>
|
|
|
|
#include "i915_drv.h" /* for_each_engine() */
|
|
#include "i915_request.h"
|
|
#include "intel_gt.h"
|
|
#include "intel_gt_pm.h"
|
|
#include "intel_gt_requests.h"
|
|
#include "intel_timeline.h"
|
|
|
|
static void retire_requests(struct intel_timeline *tl)
|
|
{
|
|
struct i915_request *rq, *rn;
|
|
|
|
list_for_each_entry_safe(rq, rn, &tl->requests, link)
|
|
if (!i915_request_retire(rq))
|
|
break;
|
|
}
|
|
|
|
static void flush_submission(struct intel_gt *gt)
|
|
{
|
|
struct intel_engine_cs *engine;
|
|
enum intel_engine_id id;
|
|
|
|
for_each_engine(engine, gt, id)
|
|
intel_engine_flush_submission(engine);
|
|
}
|
|
|
|
static void engine_retire(struct work_struct *work)
|
|
{
|
|
struct intel_engine_cs *engine =
|
|
container_of(work, typeof(*engine), retire_work);
|
|
struct intel_timeline *tl = xchg(&engine->retire, NULL);
|
|
|
|
do {
|
|
struct intel_timeline *next = xchg(&tl->retire, NULL);
|
|
|
|
/*
|
|
* Our goal here is to retire _idle_ timelines as soon as
|
|
* possible (as they are idle, we do not expect userspace
|
|
* to be cleaning up anytime soon).
|
|
*
|
|
* If the timeline is currently locked, either it is being
|
|
* retired elsewhere or about to be!
|
|
*/
|
|
if (mutex_trylock(&tl->mutex)) {
|
|
retire_requests(tl);
|
|
mutex_unlock(&tl->mutex);
|
|
}
|
|
intel_timeline_put(tl);
|
|
|
|
GEM_BUG_ON(!next);
|
|
tl = ptr_mask_bits(next, 1);
|
|
} while (tl);
|
|
}
|
|
|
|
static bool add_retire(struct intel_engine_cs *engine,
|
|
struct intel_timeline *tl)
|
|
{
|
|
struct intel_timeline *first;
|
|
|
|
/*
|
|
* We open-code a llist here to include the additional tag [BIT(0)]
|
|
* so that we know when the timeline is already on a
|
|
* retirement queue: either this engine or another.
|
|
*
|
|
* However, we rely on that a timeline can only be active on a single
|
|
* engine at any one time and that add_retire() is called before the
|
|
* engine releases the timeline and transferred to another to retire.
|
|
*/
|
|
|
|
if (READ_ONCE(tl->retire)) /* already queued */
|
|
return false;
|
|
|
|
intel_timeline_get(tl);
|
|
first = READ_ONCE(engine->retire);
|
|
do
|
|
tl->retire = ptr_pack_bits(first, 1, 1);
|
|
while (!try_cmpxchg(&engine->retire, &first, tl));
|
|
|
|
return !first;
|
|
}
|
|
|
|
void intel_engine_add_retire(struct intel_engine_cs *engine,
|
|
struct intel_timeline *tl)
|
|
{
|
|
if (add_retire(engine, tl))
|
|
schedule_work(&engine->retire_work);
|
|
}
|
|
|
|
void intel_engine_init_retire(struct intel_engine_cs *engine)
|
|
{
|
|
INIT_WORK(&engine->retire_work, engine_retire);
|
|
}
|
|
|
|
void intel_engine_fini_retire(struct intel_engine_cs *engine)
|
|
{
|
|
flush_work(&engine->retire_work);
|
|
GEM_BUG_ON(engine->retire);
|
|
}
|
|
|
|
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
|
|
{
|
|
struct intel_gt_timelines *timelines = >->timelines;
|
|
struct intel_timeline *tl, *tn;
|
|
unsigned long active_count = 0;
|
|
bool interruptible;
|
|
LIST_HEAD(free);
|
|
|
|
interruptible = true;
|
|
if (unlikely(timeout < 0))
|
|
timeout = -timeout, interruptible = false;
|
|
|
|
flush_submission(gt); /* kick the ksoftirqd tasklets */
|
|
|
|
spin_lock(&timelines->lock);
|
|
list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
|
|
if (!mutex_trylock(&tl->mutex)) {
|
|
active_count++; /* report busy to caller, try again? */
|
|
continue;
|
|
}
|
|
|
|
intel_timeline_get(tl);
|
|
GEM_BUG_ON(!atomic_read(&tl->active_count));
|
|
atomic_inc(&tl->active_count); /* pin the list element */
|
|
spin_unlock(&timelines->lock);
|
|
|
|
if (timeout > 0) {
|
|
struct dma_fence *fence;
|
|
|
|
fence = i915_active_fence_get(&tl->last_request);
|
|
if (fence) {
|
|
timeout = dma_fence_wait_timeout(fence,
|
|
interruptible,
|
|
timeout);
|
|
dma_fence_put(fence);
|
|
}
|
|
}
|
|
|
|
retire_requests(tl);
|
|
|
|
spin_lock(&timelines->lock);
|
|
|
|
/* Resume iteration after dropping lock */
|
|
list_safe_reset_next(tl, tn, link);
|
|
if (atomic_dec_and_test(&tl->active_count))
|
|
list_del(&tl->link);
|
|
else
|
|
active_count += !!rcu_access_pointer(tl->last_request.fence);
|
|
|
|
mutex_unlock(&tl->mutex);
|
|
|
|
/* Defer the final release to after the spinlock */
|
|
if (refcount_dec_and_test(&tl->kref.refcount)) {
|
|
GEM_BUG_ON(atomic_read(&tl->active_count));
|
|
list_add(&tl->link, &free);
|
|
}
|
|
}
|
|
spin_unlock(&timelines->lock);
|
|
|
|
list_for_each_entry_safe(tl, tn, &free, link)
|
|
__intel_timeline_free(&tl->kref);
|
|
|
|
return active_count ? timeout : 0;
|
|
}
|
|
|
|
int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
|
|
{
|
|
/* If the device is asleep, we have no requests outstanding */
|
|
if (!intel_gt_pm_is_awake(gt))
|
|
return 0;
|
|
|
|
while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) {
|
|
cond_resched();
|
|
if (signal_pending(current))
|
|
return -EINTR;
|
|
}
|
|
|
|
return timeout;
|
|
}
|
|
|
|
static void retire_work_handler(struct work_struct *work)
|
|
{
|
|
struct intel_gt *gt =
|
|
container_of(work, typeof(*gt), requests.retire_work.work);
|
|
|
|
schedule_delayed_work(>->requests.retire_work,
|
|
round_jiffies_up_relative(HZ));
|
|
intel_gt_retire_requests(gt);
|
|
}
|
|
|
|
void intel_gt_init_requests(struct intel_gt *gt)
|
|
{
|
|
INIT_DELAYED_WORK(>->requests.retire_work, retire_work_handler);
|
|
}
|
|
|
|
void intel_gt_park_requests(struct intel_gt *gt)
|
|
{
|
|
cancel_delayed_work(>->requests.retire_work);
|
|
}
|
|
|
|
void intel_gt_unpark_requests(struct intel_gt *gt)
|
|
{
|
|
schedule_delayed_work(>->requests.retire_work,
|
|
round_jiffies_up_relative(HZ));
|
|
}
|
|
|
|
void intel_gt_fini_requests(struct intel_gt *gt)
|
|
{
|
|
/* Wait until the work is marked as finished before unloading! */
|
|
cancel_delayed_work_sync(>->requests.retire_work);
|
|
}
|