d1051db85f
TLB cache invalidation can happen on two different situations: 1. synchronously, at __vma_put_pages(); 2. asynchronously. On the first case, TLB cache invalidation happens inside __vma_put_pages(). So, no need to do it later on. However, on the second case, the pages will keep in memory until __i915_vma_evict() is called. So, we need to store the TLB data at struct i915_vma_resource, in order to do a TLB cache invalidation before allowing userspace to re-use the same memory. So, i915_vma_resource_unbind() has gained a new parameter in order to store the TLB data at the second case. Document it. Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/aa55eef7e63b8f3d0f69b525db2dd2eb87e9db6b.1658924372.git.mchehab@kernel.org
426 lines
12 KiB
C
426 lines
12 KiB
C
// SPDX-License-Identifier: MIT
|
|
/*
|
|
* Copyright © 2021 Intel Corporation
|
|
*/
|
|
|
|
#include <linux/interval_tree_generic.h>
|
|
#include <linux/sched/mm.h>
|
|
|
|
#include "i915_sw_fence.h"
|
|
#include "i915_vma_resource.h"
|
|
#include "i915_drv.h"
|
|
#include "intel_memory_region.h"
|
|
|
|
#include "gt/intel_gtt.h"
|
|
|
|
static struct kmem_cache *slab_vma_resources;
|
|
|
|
/**
|
|
* DOC:
|
|
* We use a per-vm interval tree to keep track of vma_resources
|
|
* scheduled for unbind but not yet unbound. The tree is protected by
|
|
* the vm mutex, and nodes are removed just after the unbind fence signals.
|
|
* The removal takes the vm mutex from a kernel thread which we need to
|
|
* keep in mind so that we don't grab the mutex and try to wait for all
|
|
* pending unbinds to complete, because that will temporaryily block many
|
|
* of the workqueue threads, and people will get angry.
|
|
*
|
|
* We should consider using a single ordered fence per VM instead but that
|
|
* requires ordering the unbinds and might introduce unnecessary waiting
|
|
* for unrelated unbinds. Amount of code will probably be roughly the same
|
|
* due to the simplicity of using the interval tree interface.
|
|
*
|
|
* Another drawback of this interval tree is that the complexity of insertion
|
|
* and removal of fences increases as O(ln(pending_unbinds)) instead of
|
|
* O(1) for a single fence without interval tree.
|
|
*/
|
|
#define VMA_RES_START(_node) ((_node)->start)
|
|
#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size - 1)
|
|
INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
|
|
u64, __subtree_last,
|
|
VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
|
|
|
|
/* Callbacks for the unbind dma-fence. */
|
|
|
|
/**
|
|
* i915_vma_resource_alloc - Allocate a vma resource
|
|
*
|
|
* Return: A pointer to a cleared struct i915_vma_resource or
|
|
* a -ENOMEM error pointer if allocation fails.
|
|
*/
|
|
struct i915_vma_resource *i915_vma_resource_alloc(void)
|
|
{
|
|
struct i915_vma_resource *vma_res =
|
|
kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL);
|
|
|
|
return vma_res ? vma_res : ERR_PTR(-ENOMEM);
|
|
}
|
|
|
|
/**
|
|
* i915_vma_resource_free - Free a vma resource
|
|
* @vma_res: The vma resource to free.
|
|
*/
|
|
void i915_vma_resource_free(struct i915_vma_resource *vma_res)
|
|
{
|
|
if (vma_res)
|
|
kmem_cache_free(slab_vma_resources, vma_res);
|
|
}
|
|
|
|
static const char *get_driver_name(struct dma_fence *fence)
|
|
{
|
|
return "vma unbind fence";
|
|
}
|
|
|
|
static const char *get_timeline_name(struct dma_fence *fence)
|
|
{
|
|
return "unbound";
|
|
}
|
|
|
|
static void unbind_fence_free_rcu(struct rcu_head *head)
|
|
{
|
|
struct i915_vma_resource *vma_res =
|
|
container_of(head, typeof(*vma_res), unbind_fence.rcu);
|
|
|
|
i915_vma_resource_free(vma_res);
|
|
}
|
|
|
|
static void unbind_fence_release(struct dma_fence *fence)
|
|
{
|
|
struct i915_vma_resource *vma_res =
|
|
container_of(fence, typeof(*vma_res), unbind_fence);
|
|
|
|
i915_sw_fence_fini(&vma_res->chain);
|
|
|
|
call_rcu(&fence->rcu, unbind_fence_free_rcu);
|
|
}
|
|
|
|
static struct dma_fence_ops unbind_fence_ops = {
|
|
.get_driver_name = get_driver_name,
|
|
.get_timeline_name = get_timeline_name,
|
|
.release = unbind_fence_release,
|
|
};
|
|
|
|
static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res)
|
|
{
|
|
struct i915_address_space *vm;
|
|
|
|
if (!refcount_dec_and_test(&vma_res->hold_count))
|
|
return;
|
|
|
|
dma_fence_signal(&vma_res->unbind_fence);
|
|
|
|
vm = vma_res->vm;
|
|
if (vma_res->wakeref)
|
|
intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref);
|
|
|
|
vma_res->vm = NULL;
|
|
if (!RB_EMPTY_NODE(&vma_res->rb)) {
|
|
mutex_lock(&vm->mutex);
|
|
vma_res_itree_remove(vma_res, &vm->pending_unbind);
|
|
mutex_unlock(&vm->mutex);
|
|
}
|
|
|
|
if (vma_res->bi.pages_rsgt)
|
|
i915_refct_sgt_put(vma_res->bi.pages_rsgt);
|
|
}
|
|
|
|
/**
|
|
* i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind
|
|
* fence.
|
|
* @vma_res: The vma resource.
|
|
* @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold.
|
|
*
|
|
* The function may leave a dma_fence critical section.
|
|
*/
|
|
void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
|
|
bool lockdep_cookie)
|
|
{
|
|
dma_fence_end_signalling(lockdep_cookie);
|
|
|
|
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
|
|
unsigned long irq_flags;
|
|
|
|
/* Inefficient open-coded might_lock_irqsave() */
|
|
spin_lock_irqsave(&vma_res->lock, irq_flags);
|
|
spin_unlock_irqrestore(&vma_res->lock, irq_flags);
|
|
}
|
|
|
|
__i915_vma_resource_unhold(vma_res);
|
|
}
|
|
|
|
/**
|
|
* i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence.
|
|
* @vma_res: The vma resource.
|
|
* @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should
|
|
* be given as an argument to the pairing i915_vma_resource_unhold.
|
|
*
|
|
* If returning true, the function enters a dma_fence signalling critical
|
|
* section if not in one already.
|
|
*
|
|
* Return: true if holding successful, false if not.
|
|
*/
|
|
bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
|
|
bool *lockdep_cookie)
|
|
{
|
|
bool held = refcount_inc_not_zero(&vma_res->hold_count);
|
|
|
|
if (held)
|
|
*lockdep_cookie = dma_fence_begin_signalling();
|
|
|
|
return held;
|
|
}
|
|
|
|
static void i915_vma_resource_unbind_work(struct work_struct *work)
|
|
{
|
|
struct i915_vma_resource *vma_res =
|
|
container_of(work, typeof(*vma_res), work);
|
|
struct i915_address_space *vm = vma_res->vm;
|
|
bool lockdep_cookie;
|
|
|
|
lockdep_cookie = dma_fence_begin_signalling();
|
|
if (likely(!vma_res->skip_pte_rewrite))
|
|
vma_res->ops->unbind_vma(vm, vma_res);
|
|
|
|
dma_fence_end_signalling(lockdep_cookie);
|
|
__i915_vma_resource_unhold(vma_res);
|
|
i915_vma_resource_put(vma_res);
|
|
}
|
|
|
|
static int
|
|
i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
|
|
enum i915_sw_fence_notify state)
|
|
{
|
|
struct i915_vma_resource *vma_res =
|
|
container_of(fence, typeof(*vma_res), chain);
|
|
struct dma_fence *unbind_fence =
|
|
&vma_res->unbind_fence;
|
|
|
|
switch (state) {
|
|
case FENCE_COMPLETE:
|
|
dma_fence_get(unbind_fence);
|
|
if (vma_res->immediate_unbind) {
|
|
i915_vma_resource_unbind_work(&vma_res->work);
|
|
} else {
|
|
INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work);
|
|
queue_work(system_unbound_wq, &vma_res->work);
|
|
}
|
|
break;
|
|
case FENCE_FREE:
|
|
i915_vma_resource_put(vma_res);
|
|
break;
|
|
}
|
|
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
/**
|
|
* i915_vma_resource_unbind - Unbind a vma resource
|
|
* @vma_res: The vma resource to unbind.
|
|
* @tlb: pointer to vma->obj->mm.tlb associated with the resource
|
|
* to be stored at vma_res->tlb. When not-NULL, it will be used
|
|
* to do TLB cache invalidation before freeing a VMA resource.
|
|
* Used only for async unbind.
|
|
*
|
|
* At this point this function does little more than publish a fence that
|
|
* signals immediately unless signaling is held back.
|
|
*
|
|
* Return: A refcounted pointer to a dma-fence that signals when unbinding is
|
|
* complete.
|
|
*/
|
|
struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
|
|
u32 *tlb)
|
|
{
|
|
struct i915_address_space *vm = vma_res->vm;
|
|
|
|
vma_res->tlb = tlb;
|
|
|
|
/* Reference for the sw fence */
|
|
i915_vma_resource_get(vma_res);
|
|
|
|
/* Caller must already have a wakeref in this case. */
|
|
if (vma_res->needs_wakeref)
|
|
vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm);
|
|
|
|
if (atomic_read(&vma_res->chain.pending) <= 1) {
|
|
RB_CLEAR_NODE(&vma_res->rb);
|
|
vma_res->immediate_unbind = 1;
|
|
} else {
|
|
vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind);
|
|
}
|
|
|
|
i915_sw_fence_commit(&vma_res->chain);
|
|
|
|
return &vma_res->unbind_fence;
|
|
}
|
|
|
|
/**
|
|
* __i915_vma_resource_init - Initialize a vma resource.
|
|
* @vma_res: The vma resource to initialize
|
|
*
|
|
* Initializes the private members of a vma resource.
|
|
*/
|
|
void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
|
|
{
|
|
spin_lock_init(&vma_res->lock);
|
|
dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
|
|
&vma_res->lock, 0, 0);
|
|
refcount_set(&vma_res->hold_count, 1);
|
|
i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify);
|
|
}
|
|
|
|
static void
|
|
i915_vma_resource_color_adjust_range(struct i915_address_space *vm,
|
|
u64 *start,
|
|
u64 *end)
|
|
{
|
|
if (i915_vm_has_cache_coloring(vm)) {
|
|
if (*start)
|
|
*start -= I915_GTT_PAGE_SIZE;
|
|
*end += I915_GTT_PAGE_SIZE;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a
|
|
* certain vm range.
|
|
* @vm: The vm to look at.
|
|
* @offset: The range start.
|
|
* @size: The range size.
|
|
* @intr: Whether to wait interrubtible.
|
|
*
|
|
* The function needs to be called with the vm lock held.
|
|
*
|
|
* Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
|
|
*/
|
|
int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
|
|
u64 offset,
|
|
u64 size,
|
|
bool intr)
|
|
{
|
|
struct i915_vma_resource *node;
|
|
u64 last = offset + size - 1;
|
|
|
|
lockdep_assert_held(&vm->mutex);
|
|
might_sleep();
|
|
|
|
i915_vma_resource_color_adjust_range(vm, &offset, &last);
|
|
node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
|
|
while (node) {
|
|
int ret = dma_fence_wait(&node->unbind_fence, intr);
|
|
|
|
if (ret)
|
|
return ret;
|
|
|
|
node = vma_res_itree_iter_next(node, offset, last);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm,
|
|
* releasing the vm lock while waiting.
|
|
* @vm: The vm to look at.
|
|
*
|
|
* The function may not be called with the vm lock held.
|
|
* Typically this is called at vm destruction to finish any pending
|
|
* unbind operations. The vm mutex is released while waiting to avoid
|
|
* stalling kernel workqueues trying to grab the mutex.
|
|
*/
|
|
void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm)
|
|
{
|
|
struct i915_vma_resource *node;
|
|
struct dma_fence *fence;
|
|
|
|
do {
|
|
fence = NULL;
|
|
mutex_lock(&vm->mutex);
|
|
node = vma_res_itree_iter_first(&vm->pending_unbind, 0,
|
|
U64_MAX);
|
|
if (node)
|
|
fence = dma_fence_get_rcu(&node->unbind_fence);
|
|
mutex_unlock(&vm->mutex);
|
|
|
|
if (fence) {
|
|
/*
|
|
* The wait makes sure the node eventually removes
|
|
* itself from the tree.
|
|
*/
|
|
dma_fence_wait(fence, false);
|
|
dma_fence_put(fence);
|
|
}
|
|
} while (node);
|
|
}
|
|
|
|
/**
|
|
* i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all
|
|
* pending unbinds in a certain range of a vm.
|
|
* @vm: The vm to look at.
|
|
* @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds.
|
|
* @offset: The range start.
|
|
* @size: The range size.
|
|
* @intr: Whether to wait interrubtible.
|
|
* @gfp: Allocation mode for memory allocations.
|
|
*
|
|
* The function makes @sw_fence await all pending unbinds in a certain
|
|
* vm range before calling the complete notifier. To be able to await
|
|
* each individual unbind, the function needs to allocate memory using
|
|
* the @gpf allocation mode. If that fails, the function will instead
|
|
* wait for the unbind fence to signal, using @intr to judge whether to
|
|
* wait interruptible or not. Note that @gfp should ideally be selected so
|
|
* as to avoid any expensive memory allocation stalls and rather fail and
|
|
* synchronize itself. For now the vm mutex is required when calling this
|
|
* function with means that @gfp can't call into direct reclaim. In reality
|
|
* this means that during heavy memory pressure, we will sync in this
|
|
* function.
|
|
*
|
|
* Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
|
|
*/
|
|
int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
|
|
struct i915_sw_fence *sw_fence,
|
|
u64 offset,
|
|
u64 size,
|
|
bool intr,
|
|
gfp_t gfp)
|
|
{
|
|
struct i915_vma_resource *node;
|
|
u64 last = offset + size - 1;
|
|
|
|
lockdep_assert_held(&vm->mutex);
|
|
might_alloc(gfp);
|
|
might_sleep();
|
|
|
|
i915_vma_resource_color_adjust_range(vm, &offset, &last);
|
|
node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
|
|
while (node) {
|
|
int ret;
|
|
|
|
ret = i915_sw_fence_await_dma_fence(sw_fence,
|
|
&node->unbind_fence,
|
|
0, gfp);
|
|
if (ret < 0) {
|
|
ret = dma_fence_wait(&node->unbind_fence, intr);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
node = vma_res_itree_iter_next(node, offset, last);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void i915_vma_resource_module_exit(void)
|
|
{
|
|
kmem_cache_destroy(slab_vma_resources);
|
|
}
|
|
|
|
int __init i915_vma_resource_module_init(void)
|
|
{
|
|
slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN);
|
|
if (!slab_vma_resources)
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
}
|