From d6ea88865d3e5b0c62040531310c1f2c6a994f46 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 22 Nov 2010 13:24:40 +1000 Subject: [PATCH 01/10] drm/ttm: Add a bo list reserve fastpath (v2) Makes it possible to reserve a list of buffer objects with a single spin lock / unlock if there is no contention. Should improve cpu usage on SMP kernels. v2: Initialize private list members on reserve and don't call ttm_bo_list_ref_sub() with zero put_count. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 32 +++---- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 124 ++++++++++++++++++++++--- include/drm/ttm/ttm_bo_api.h | 38 ++++++++ include/drm/ttm/ttm_bo_driver.h | 14 +++ include/drm/ttm/ttm_execbuf_util.h | 6 +- 5 files changed, 186 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 148a322d8f5d..a586378b1b2b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -169,7 +169,7 @@ int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo, bool interruptible) } EXPORT_SYMBOL(ttm_bo_wait_unreserved); -static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo) +void ttm_bo_add_to_lru(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; struct ttm_mem_type_manager *man; @@ -191,11 +191,7 @@ static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo) } } -/** - * Call with the lru_lock held. - */ - -static int ttm_bo_del_from_lru(struct ttm_buffer_object *bo) +int ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { int put_count = 0; @@ -267,6 +263,15 @@ static void ttm_bo_ref_bug(struct kref *list_kref) BUG(); } +void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, + bool never_free) +{ + while (count--) + kref_put(&bo->list_kref, + (never_free || (count >= 0)) ? ttm_bo_ref_bug : + ttm_bo_release_list); +} + int ttm_bo_reserve(struct ttm_buffer_object *bo, bool interruptible, bool no_wait, bool use_sequence, uint32_t sequence) @@ -282,8 +287,7 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo, put_count = ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); - while (put_count--) - kref_put(&bo->list_kref, ttm_bo_ref_bug); + ttm_bo_list_ref_sub(bo, put_count, true); return ret; } @@ -496,8 +500,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) spin_unlock(&glob->lru_lock); ttm_bo_cleanup_memtype_use(bo); - while (put_count--) - kref_put(&bo->list_kref, ttm_bo_ref_bug); + ttm_bo_list_ref_sub(bo, put_count, true); return; } else { @@ -580,8 +583,7 @@ retry: spin_unlock(&glob->lru_lock); ttm_bo_cleanup_memtype_use(bo); - while (put_count--) - kref_put(&bo->list_kref, ttm_bo_ref_bug); + ttm_bo_list_ref_sub(bo, put_count, true); return 0; } @@ -802,8 +804,7 @@ retry: BUG_ON(ret != 0); - while (put_count--) - kref_put(&bo->list_kref, ttm_bo_ref_bug); + ttm_bo_list_ref_sub(bo, put_count, true); ret = ttm_bo_evict(bo, interruptible, no_wait_reserve, no_wait_gpu); ttm_bo_unreserve(bo); @@ -1783,8 +1784,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) put_count = ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); - while (put_count--) - kref_put(&bo->list_kref, ttm_bo_ref_bug); + ttm_bo_list_ref_sub(bo, put_count, true); /** * Wait for GPU, then move to system cached. diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index c285c2902d15..201a71d111ec 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -32,6 +32,72 @@ #include #include +static void ttm_eu_backoff_reservation_locked(struct list_head *list) +{ + struct ttm_validate_buffer *entry; + + list_for_each_entry(entry, list, head) { + struct ttm_buffer_object *bo = entry->bo; + if (!entry->reserved) + continue; + + if (entry->removed) { + ttm_bo_add_to_lru(bo); + entry->removed = false; + + } + entry->reserved = false; + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); + } +} + +static void ttm_eu_del_from_lru_locked(struct list_head *list) +{ + struct ttm_validate_buffer *entry; + + list_for_each_entry(entry, list, head) { + struct ttm_buffer_object *bo = entry->bo; + if (!entry->reserved) + continue; + + if (!entry->removed) { + entry->put_count = ttm_bo_del_from_lru(bo); + entry->removed = true; + } + } +} + +static void ttm_eu_list_ref_sub(struct list_head *list) +{ + struct ttm_validate_buffer *entry; + + list_for_each_entry(entry, list, head) { + struct ttm_buffer_object *bo = entry->bo; + + if (entry->put_count) { + ttm_bo_list_ref_sub(bo, entry->put_count, true); + entry->put_count = 0; + } + } +} + +static int ttm_eu_wait_unreserved_locked(struct list_head *list, + struct ttm_buffer_object *bo) +{ + struct ttm_bo_global *glob = bo->glob; + int ret; + + ttm_eu_del_from_lru_locked(list); + spin_unlock(&glob->lru_lock); + ret = ttm_bo_wait_unreserved(bo, true); + spin_lock(&glob->lru_lock); + if (unlikely(ret != 0)) + ttm_eu_backoff_reservation_locked(list); + return ret; +} + + void ttm_eu_backoff_reservation(struct list_head *list) { struct ttm_validate_buffer *entry; @@ -61,35 +127,71 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation); int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq) { + struct ttm_bo_global *glob; struct ttm_validate_buffer *entry; int ret; + if (list_empty(list)) + return 0; + + list_for_each_entry(entry, list, head) { + entry->reserved = false; + entry->put_count = 0; + entry->removed = false; + } + + entry = list_first_entry(list, struct ttm_validate_buffer, head); + glob = entry->bo->glob; + retry: + spin_lock(&glob->lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - entry->reserved = false; - ret = ttm_bo_reserve(bo, true, false, true, val_seq); - if (ret != 0) { - ttm_eu_backoff_reservation(list); - if (ret == -EAGAIN) { - ret = ttm_bo_wait_unreserved(bo, true); - if (unlikely(ret != 0)) - return ret; - goto retry; - } else +retry_this_bo: + ret = ttm_bo_reserve_locked(bo, true, true, true, val_seq); + switch (ret) { + case 0: + break; + case -EBUSY: + ret = ttm_eu_wait_unreserved_locked(list, bo); + if (unlikely(ret != 0)) { + spin_unlock(&glob->lru_lock); + ttm_eu_list_ref_sub(list); return ret; + } + goto retry_this_bo; + case -EAGAIN: + ttm_eu_backoff_reservation_locked(list); + spin_unlock(&glob->lru_lock); + ttm_eu_list_ref_sub(list); + ret = ttm_bo_wait_unreserved(bo, true); + if (unlikely(ret != 0)) + return ret; + goto retry; + default: + ttm_eu_backoff_reservation_locked(list); + spin_unlock(&glob->lru_lock); + ttm_eu_list_ref_sub(list); + return ret; } entry->reserved = true; if (unlikely(atomic_read(&bo->cpu_writers) > 0)) { - ttm_eu_backoff_reservation(list); + ttm_eu_backoff_reservation_locked(list); + spin_unlock(&glob->lru_lock); + ttm_eu_list_ref_sub(list); ret = ttm_bo_wait_cpu(bo, false); if (ret) return ret; goto retry; } } + + ttm_eu_del_from_lru_locked(list); + spin_unlock(&glob->lru_lock); + ttm_eu_list_ref_sub(list); + return 0; } EXPORT_SYMBOL(ttm_eu_reserve_buffers); diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index beafc156a535..b0fc9c12554b 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -364,6 +364,44 @@ extern int ttm_bo_validate(struct ttm_buffer_object *bo, */ extern void ttm_bo_unref(struct ttm_buffer_object **bo); + +/** + * ttm_bo_list_ref_sub + * + * @bo: The buffer object. + * @count: The number of references with which to decrease @bo::list_kref; + * @never_free: The refcount should not reach zero with this operation. + * + * Release @count lru list references to this buffer object. + */ +extern void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, + bool never_free); + +/** + * ttm_bo_add_to_lru + * + * @bo: The buffer object. + * + * Add this bo to the relevant mem type lru and, if it's backed by + * system pages (ttms) to the swap list. + * This function must be called with struct ttm_bo_global::lru_lock held, and + * is typically called immediately prior to unreserving a bo. + */ +extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo); + +/** + * ttm_bo_del_from_lru + * + * @bo: The buffer object. + * + * Remove this bo from all lru lists used to lookup and reserve an object. + * This function must be called with struct ttm_bo_global::lru_lock held, + * and is usually called just immediately after the bo has been reserved to + * avoid recursive reservation from lru lists. + */ +extern int ttm_bo_del_from_lru(struct ttm_buffer_object *bo); + + /** * ttm_bo_lock_delayed_workqueue * diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 8e0c848326b6..95068e6024db 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -864,6 +864,20 @@ extern int ttm_bo_reserve(struct ttm_buffer_object *bo, bool interruptible, bool no_wait, bool use_sequence, uint32_t sequence); + +/** + * ttm_bo_reserve_locked: + * + * Similar to ttm_bo_reserve, but must be called with the glob::lru_lock + * spinlock held, and will not remove reserved buffers from the lru lists. + * The function may release the LRU spinlock if it needs to sleep. + */ + +extern int ttm_bo_reserve_locked(struct ttm_buffer_object *bo, + bool interruptible, + bool no_wait, bool use_sequence, + uint32_t sequence); + /** * ttm_bo_unreserve * diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index cd2c475da9ea..fd09b8438977 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -41,7 +41,9 @@ * @bo: refcounted buffer object pointer. * @new_sync_obj_arg: New sync_obj_arg for @bo, to be used once * adding a new sync object. - * @reservied: Indicates whether @bo has been reserved for validation. + * @reserved: Indicates whether @bo has been reserved for validation. + * @removed: Indicates whether @bo has been removed from lru lists. + * @put_count: Number of outstanding references on bo::list_kref. */ struct ttm_validate_buffer { @@ -49,6 +51,8 @@ struct ttm_validate_buffer { struct ttm_buffer_object *bo; void *new_sync_obj_arg; bool reserved; + bool removed; + int put_count; }; /** From ecf7ace9a8450303a987aa8364e53860cd50e554 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 16 Nov 2010 15:21:07 +0100 Subject: [PATCH 02/10] kref: Add a kref_sub function Makes it possible to optimize batched multiple unrefs. Initial user will be drivers/gpu/ttm which accumulates unrefs to be processed outside of atomic code. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- include/linux/kref.h | 2 ++ lib/kref.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/include/linux/kref.h b/include/linux/kref.h index 6cc38fc07ab7..d4a62ab2ee5e 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -24,5 +24,7 @@ struct kref { void kref_init(struct kref *kref); void kref_get(struct kref *kref); int kref_put(struct kref *kref, void (*release) (struct kref *kref)); +int kref_sub(struct kref *kref, unsigned int count, + void (*release) (struct kref *kref)); #endif /* _KREF_H_ */ diff --git a/lib/kref.c b/lib/kref.c index d3d227a08a4b..3efb882b11db 100644 --- a/lib/kref.c +++ b/lib/kref.c @@ -62,6 +62,36 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref)) return 0; } + +/** + * kref_sub - subtract a number of refcounts for object. + * @kref: object. + * @count: Number of recounts to subtract. + * @release: pointer to the function that will clean up the object when the + * last reference to the object is released. + * This pointer is required, and it is not acceptable to pass kfree + * in as this function. + * + * Subtract @count from the refcount, and if 0, call release(). + * Return 1 if the object was removed, otherwise return 0. Beware, if this + * function returns 0, you still can not count on the kref from remaining in + * memory. Only use the return value if you want to see if the kref is now + * gone, not present. + */ +int kref_sub(struct kref *kref, unsigned int count, + void (*release)(struct kref *kref)) +{ + WARN_ON(release == NULL); + WARN_ON(release == (void (*)(struct kref *))kfree); + + if (atomic_sub_and_test((int) count, &kref->refcount)) { + release(kref); + return 1; + } + return 0; +} + EXPORT_SYMBOL(kref_init); EXPORT_SYMBOL(kref_get); EXPORT_SYMBOL(kref_put); +EXPORT_SYMBOL(kref_sub); From 2357cbe5f4ca8a52329c2c2a26b68839870d5d43 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 16 Nov 2010 15:21:08 +0100 Subject: [PATCH 03/10] drm/ttm: Use kref_sub instead of repeatedly calling kref_put Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a586378b1b2b..9ef893d5da88 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -266,10 +266,8 @@ static void ttm_bo_ref_bug(struct kref *list_kref) void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, bool never_free) { - while (count--) - kref_put(&bo->list_kref, - (never_free || (count >= 0)) ? ttm_bo_ref_bug : - ttm_bo_release_list); + kref_sub(&bo->list_kref, count, + (never_free) ? ttm_bo_ref_bug : ttm_bo_release_list); } int ttm_bo_reserve(struct ttm_buffer_object *bo, From 68c4fa31aa52765314b4285a7835368ea35b509c Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 17 Nov 2010 12:28:27 +0000 Subject: [PATCH 04/10] drm/ttm: Optimize ttm_eu_backoff_reservation Avoid the ttm_bo_unreserve() spinlocks by calling ttm_eu_backoff_reservation_locked under the lru spinlock. Signed-off-by: Thomas Hellstrom Reviewed-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 201a71d111ec..7dcc6470e2f5 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -101,15 +101,16 @@ static int ttm_eu_wait_unreserved_locked(struct list_head *list, void ttm_eu_backoff_reservation(struct list_head *list) { struct ttm_validate_buffer *entry; + struct ttm_bo_global *glob; - list_for_each_entry(entry, list, head) { - struct ttm_buffer_object *bo = entry->bo; - if (!entry->reserved) - continue; + if (list_empty(list)) + return; - entry->reserved = false; - ttm_bo_unreserve(bo); - } + entry = list_first_entry(list, struct ttm_validate_buffer, head); + glob = entry->bo->glob; + spin_lock(&glob->lru_lock); + ttm_eu_backoff_reservation_locked(list); + spin_unlock(&glob->lru_lock); } EXPORT_SYMBOL(ttm_eu_backoff_reservation); From 96726fe50feae74812a2ccf5d5da23cb01c0a413 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 17 Nov 2010 12:28:28 +0000 Subject: [PATCH 05/10] drm/ttm: Don't deadlock on recursive multi-bo reservations Add an aid for the driver to detect deadlocks on multi-bo reservations Update documentation. Signed-off-by: Thomas Hellstrom Reviewed-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 15 ++++++++++++--- include/drm/ttm/ttm_bo_driver.h | 25 ++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9ef893d5da88..5d8750830dc3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -223,9 +223,18 @@ int ttm_bo_reserve_locked(struct ttm_buffer_object *bo, /** * Deadlock avoidance for multi-bo reserving. */ - if (use_sequence && bo->seq_valid && - (sequence - bo->val_seq < (1 << 31))) { - return -EAGAIN; + if (use_sequence && bo->seq_valid) { + /** + * We've already reserved this one. + */ + if (unlikely(sequence == bo->val_seq)) + return -EDEADLK; + /** + * Already reserved by a thread that will not back + * off for us. We need to back off. + */ + if (unlikely(sequence - bo->val_seq < (1 << 31))) + return -EAGAIN; } if (no_wait) diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 95068e6024db..1e25a40c688e 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -859,6 +859,9 @@ extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo); * try again. (only if use_sequence == 1). * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by * a signal. Release all buffer reservations and return to user-space. + * -EBUSY: The function needed to sleep, but @no_wait was true + * -EDEADLK: Bo already reserved using @sequence. This error code will only + * be returned if @use_sequence is set to true. */ extern int ttm_bo_reserve(struct ttm_buffer_object *bo, bool interruptible, @@ -868,11 +871,27 @@ extern int ttm_bo_reserve(struct ttm_buffer_object *bo, /** * ttm_bo_reserve_locked: * - * Similar to ttm_bo_reserve, but must be called with the glob::lru_lock - * spinlock held, and will not remove reserved buffers from the lru lists. + * @bo: A pointer to a struct ttm_buffer_object. + * @interruptible: Sleep interruptible if waiting. + * @no_wait: Don't sleep while trying to reserve, rather return -EBUSY. + * @use_sequence: If @bo is already reserved, Only sleep waiting for + * it to become unreserved if @sequence < (@bo)->sequence. + * + * Must be called with struct ttm_bo_global::lru_lock held, + * and will not remove reserved buffers from the lru lists. * The function may release the LRU spinlock if it needs to sleep. + * Otherwise identical to ttm_bo_reserve. + * + * Returns: + * -EAGAIN: The reservation may cause a deadlock. + * Release all buffer reservations, wait for @bo to become unreserved and + * try again. (only if use_sequence == 1). + * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by + * a signal. Release all buffer reservations and return to user-space. + * -EBUSY: The function needed to sleep, but @no_wait was true + * -EDEADLK: Bo already reserved using @sequence. This error code will only + * be returned if @use_sequence is set to true. */ - extern int ttm_bo_reserve_locked(struct ttm_buffer_object *bo, bool interruptible, bool no_wait, bool use_sequence, From 702adba22433c175e8429a47760f35ca16caf1cd Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 17 Nov 2010 12:28:29 +0000 Subject: [PATCH 06/10] drm/ttm/radeon/nouveau: Kill the bo lock in favour of a bo device fence_lock The bo lock used only to protect the bo sync object members, and since it is a per bo lock, fencing a buffer list will see a lot of locks and unlocks. Replace it with a per-device lock that protects the sync object members on *all* bos. Reading and setting these members will always be very quick, so the risc of heavy lock contention is microscopic. Note that waiting for sync objects will always take place outside of this lock. The bo device fence lock will eventually be replaced with a seqlock / rcu mechanism so we can determine that a bo is idle under a rcu / read seqlock. However this change will allow us to batch fencing and unreserving of buffers with a minimal amount of locking. Signed-off-by: Thomas Hellstrom Reviewed-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_gem.c | 12 +++--- drivers/gpu/drm/radeon/radeon_object.c | 4 +- drivers/gpu/drm/radeon/radeon_object.h | 4 +- drivers/gpu/drm/ttm/ttm_bo.c | 55 +++++++++++++------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 7 ++-- drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 +-- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 7 ++-- include/drm/ttm/ttm_bo_api.h | 6 +-- include/drm/ttm/ttm_bo_driver.h | 3 ++ 9 files changed, 53 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 9a1fdcf400c2..1f2301d26c0a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -234,10 +234,10 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence) if (likely(fence)) { struct nouveau_fence *prev_fence; - spin_lock(&nvbo->bo.lock); + spin_lock(&nvbo->bo.bdev->fence_lock); prev_fence = nvbo->bo.sync_obj; nvbo->bo.sync_obj = nouveau_fence_ref(fence); - spin_unlock(&nvbo->bo.lock); + spin_unlock(&nvbo->bo.bdev->fence_lock); nouveau_fence_unref((void *)&prev_fence); } @@ -557,9 +557,9 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev, data |= r->vor; } - spin_lock(&nvbo->bo.lock); + spin_lock(&nvbo->bo.bdev->fence_lock); ret = ttm_bo_wait(&nvbo->bo, false, false, false); - spin_unlock(&nvbo->bo.lock); + spin_unlock(&nvbo->bo.bdev->fence_lock); if (ret) { NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret); break; @@ -791,9 +791,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, } if (req->flags & NOUVEAU_GEM_CPU_PREP_NOBLOCK) { - spin_lock(&nvbo->bo.lock); + spin_lock(&nvbo->bo.bdev->fence_lock); ret = ttm_bo_wait(&nvbo->bo, false, false, no_wait); - spin_unlock(&nvbo->bo.lock); + spin_unlock(&nvbo->bo.bdev->fence_lock); } else { ret = ttm_bo_synccpu_write_grab(&nvbo->bo, no_wait); if (ret == 0) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 1d067743fee0..e939cb6a91cc 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -369,11 +369,11 @@ void radeon_bo_list_fence(struct list_head *head, void *fence) list_for_each_entry(lobj, head, list) { bo = lobj->bo; - spin_lock(&bo->tbo.lock); + spin_lock(&bo->tbo.bdev->fence_lock); old_fence = (struct radeon_fence *)bo->tbo.sync_obj; bo->tbo.sync_obj = radeon_fence_ref(fence); bo->tbo.sync_obj_arg = NULL; - spin_unlock(&bo->tbo.lock); + spin_unlock(&bo->tbo.bdev->fence_lock); if (old_fence) { radeon_fence_unref(&old_fence); } diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index d143702b244a..fd536751f865 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -126,12 +126,12 @@ static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); if (unlikely(r != 0)) return r; - spin_lock(&bo->tbo.lock); + spin_lock(&bo->tbo.bdev->fence_lock); if (mem_type) *mem_type = bo->tbo.mem.mem_type; if (bo->tbo.sync_obj) r = ttm_bo_wait(&bo->tbo, true, true, no_wait); - spin_unlock(&bo->tbo.lock); + spin_unlock(&bo->tbo.bdev->fence_lock); ttm_bo_unreserve(&bo->tbo); return r; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 5d8750830dc3..d93c73b1c471 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -427,11 +427,9 @@ moved: } if (bo->mem.mm_node) { - spin_lock(&bo->lock); bo->offset = (bo->mem.start << PAGE_SHIFT) + bdev->man[bo->mem.mem_type].gpu_offset; bo->cur_placement = bo->mem.placement; - spin_unlock(&bo->lock); } else bo->offset = 0; @@ -485,14 +483,14 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) int put_count; int ret; - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); (void) ttm_bo_wait(bo, false, false, true); if (!bo->sync_obj) { spin_lock(&glob->lru_lock); /** - * Lock inversion between bo::reserve and bo::lock here, + * Lock inversion between bo:reserve and bdev::fence_lock here, * but that's OK, since we're only trylocking. */ @@ -501,7 +499,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) if (unlikely(ret == -EBUSY)) goto queue; - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); put_count = ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); @@ -522,7 +520,7 @@ queue: kref_get(&bo->list_kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy); spin_unlock(&glob->lru_lock); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (sync_obj) { driver->sync_obj_flush(sync_obj, sync_obj_arg); @@ -547,14 +545,15 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool no_wait_reserve, bool no_wait_gpu) { + struct ttm_bo_device *bdev = bo->bdev; struct ttm_bo_global *glob = bo->glob; int put_count; int ret = 0; retry: - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) return ret; @@ -707,9 +706,9 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, struct ttm_placement placement; int ret = 0; - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { if (ret != -ERESTARTSYS) { @@ -1044,6 +1043,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, { int ret = 0; struct ttm_mem_reg mem; + struct ttm_bo_device *bdev = bo->bdev; BUG_ON(!atomic_read(&bo->reserved)); @@ -1052,9 +1052,9 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, * Have the driver move function wait for idle when necessary, * instead of doing it here. */ - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (ret) return ret; mem.num_pages = bo->num_pages; @@ -1171,7 +1171,6 @@ int ttm_bo_init(struct ttm_bo_device *bdev, } bo->destroy = destroy; - spin_lock_init(&bo->lock); kref_init(&bo->kref); kref_init(&bo->list_kref); atomic_set(&bo->cpu_writers, 0); @@ -1535,7 +1534,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, bdev->dev_mapping = NULL; bdev->glob = glob; bdev->need_dma32 = need_dma32; - + spin_lock_init(&bdev->fence_lock); mutex_lock(&glob->device_list_mutex); list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&glob->device_list_mutex); @@ -1659,6 +1658,7 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy, bool interruptible, bool no_wait) { struct ttm_bo_driver *driver = bo->bdev->driver; + struct ttm_bo_device *bdev = bo->bdev; void *sync_obj; void *sync_obj_arg; int ret = 0; @@ -1672,9 +1672,9 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, void *tmp_obj = bo->sync_obj; bo->sync_obj = NULL; clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); driver->sync_obj_unref(&tmp_obj); - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); continue; } @@ -1683,29 +1683,29 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, sync_obj = driver->sync_obj_ref(bo->sync_obj); sync_obj_arg = bo->sync_obj_arg; - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); ret = driver->sync_obj_wait(sync_obj, sync_obj_arg, lazy, interruptible); if (unlikely(ret != 0)) { driver->sync_obj_unref(&sync_obj); - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); return ret; } - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); if (likely(bo->sync_obj == sync_obj && bo->sync_obj_arg == sync_obj_arg)) { void *tmp_obj = bo->sync_obj; bo->sync_obj = NULL; clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); driver->sync_obj_unref(&sync_obj); driver->sync_obj_unref(&tmp_obj); - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); } else { - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); driver->sync_obj_unref(&sync_obj); - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); } } return 0; @@ -1714,6 +1714,7 @@ EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait) { + struct ttm_bo_device *bdev = bo->bdev; int ret = 0; /* @@ -1723,9 +1724,9 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait) ret = ttm_bo_reserve(bo, true, no_wait, false, 0); if (unlikely(ret != 0)) return ret; - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, true, no_wait); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (likely(ret == 0)) atomic_inc(&bo->cpu_writers); ttm_bo_unreserve(bo); @@ -1797,9 +1798,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) * Wait for GPU, then move to system cached. */ - spin_lock(&bo->lock); + spin_lock(&bo->bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bo->lock); + spin_unlock(&bo->bdev->fence_lock); if (unlikely(ret != 0)) goto out; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 3106d5bcce32..4b75133d6606 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -337,7 +337,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, * TODO: Explicit member copy would probably be better here. */ - spin_lock_init(&fbo->lock); init_waitqueue_head(&fbo->event_queue); INIT_LIST_HEAD(&fbo->ddestroy); INIT_LIST_HEAD(&fbo->lru); @@ -520,7 +519,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_buffer_object *ghost_obj; void *tmp_obj = NULL; - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); if (bo->sync_obj) { tmp_obj = bo->sync_obj; bo->sync_obj = NULL; @@ -529,7 +528,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, bo->sync_obj_arg = sync_obj_arg; if (evict) { ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (tmp_obj) driver->sync_obj_unref(&tmp_obj); if (ret) @@ -552,7 +551,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, */ set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (tmp_obj) driver->sync_obj_unref(&tmp_obj); diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index fe6cb77899f4..8dd446cb778e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -118,17 +118,17 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * move. */ - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) { ret = ttm_bo_wait(bo, false, true, false); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { retval = (ret != -ERESTARTSYS) ? VM_FAULT_SIGBUS : VM_FAULT_NOPAGE; goto out_unlock; } } else - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); ret = ttm_mem_io_reserve(bdev, &bo->mem); diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 7dcc6470e2f5..c3a2100bace6 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -203,14 +203,15 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - struct ttm_bo_driver *driver = bo->bdev->driver; + struct ttm_bo_device *bdev = bo->bdev; + struct ttm_bo_driver *driver = bdev->driver; void *old_sync_obj; - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); old_sync_obj = bo->sync_obj; bo->sync_obj = driver->sync_obj_ref(sync_obj); bo->sync_obj_arg = entry->new_sync_obj_arg; - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); ttm_bo_unreserve(bo); entry->reserved = false; if (old_sync_obj) diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index b0fc9c12554b..edacd483c59c 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -154,7 +154,6 @@ struct ttm_tt; * keeps one refcount. When this refcount reaches zero, * the object is destroyed. * @event_queue: Queue for processes waiting on buffer object status change. - * @lock: spinlock protecting mostly synchronization members. * @mem: structure describing current placement. * @persistant_swap_storage: Usually the swap storage is deleted for buffers * pinned in physical memory. If this behaviour is not desired, this member @@ -213,7 +212,6 @@ struct ttm_buffer_object { struct kref kref; struct kref list_kref; wait_queue_head_t event_queue; - spinlock_t lock; /** * Members protected by the bo::reserved lock. @@ -248,10 +246,10 @@ struct ttm_buffer_object { atomic_t reserved; /** - * Members protected by the bo::lock + * Members protected by struct buffer_object_device::fence_lock * In addition, setting sync_obj to anything else * than NULL requires bo::reserved to be held. This allows for - * checking NULL while reserved but not holding bo::lock. + * checking NULL while reserved but not holding the mentioned lock. */ void *sync_obj_arg; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 1e25a40c688e..ca8131e98300 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -510,6 +510,8 @@ struct ttm_bo_global { * * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. + * @fence_lock: Protects the synchronizing members on *all* bos belonging + * to this device. * @addr_space_mm: Range manager for the device address space. * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. @@ -531,6 +533,7 @@ struct ttm_bo_device { struct ttm_bo_driver *driver; rwlock_t vm_lock; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; + spinlock_t fence_lock; /* * Protected by the vm lock. */ From 95762c2b34069bf4adb7929969f1f5f5fc8a38df Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 17 Nov 2010 12:28:30 +0000 Subject: [PATCH 07/10] drm/ttm: Improved fencing of buffer object lists Drastically reduce the number of spin lock / unlock operations by performing unreserving and fencing under global locks. Signed-off-by: Thomas Hellstrom Reviewed-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 11 +++++--- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 36 ++++++++++++++++++-------- include/drm/ttm/ttm_bo_driver.h | 10 +++++++ include/drm/ttm/ttm_execbuf_util.h | 2 ++ 4 files changed, 45 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d93c73b1c471..551a5d31cadf 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -299,14 +299,19 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo, return ret; } +void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo) +{ + ttm_bo_add_to_lru(bo); + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); +} + void ttm_bo_unreserve(struct ttm_buffer_object *bo) { struct ttm_bo_global *glob = bo->glob; spin_lock(&glob->lru_lock); - ttm_bo_add_to_lru(bo); - atomic_set(&bo->reserved, 0); - wake_up_all(&bo->event_queue); + ttm_bo_unreserve_locked(bo); spin_unlock(&glob->lru_lock); } EXPORT_SYMBOL(ttm_bo_unreserve); diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index c3a2100bace6..b6da65cc502a 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -200,22 +200,36 @@ EXPORT_SYMBOL(ttm_eu_reserve_buffers); void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) { struct ttm_validate_buffer *entry; + struct ttm_buffer_object *bo; + struct ttm_bo_global *glob; + struct ttm_bo_device *bdev; + struct ttm_bo_driver *driver; + + if (list_empty(list)) + return; + + bo = list_first_entry(list, struct ttm_validate_buffer, head)->bo; + bdev = bo->bdev; + driver = bdev->driver; + glob = bo->glob; + + spin_lock(&bdev->fence_lock); + spin_lock(&glob->lru_lock); list_for_each_entry(entry, list, head) { - struct ttm_buffer_object *bo = entry->bo; - struct ttm_bo_device *bdev = bo->bdev; - struct ttm_bo_driver *driver = bdev->driver; - void *old_sync_obj; - - spin_lock(&bdev->fence_lock); - old_sync_obj = bo->sync_obj; + bo = entry->bo; + entry->old_sync_obj = bo->sync_obj; bo->sync_obj = driver->sync_obj_ref(sync_obj); bo->sync_obj_arg = entry->new_sync_obj_arg; - spin_unlock(&bdev->fence_lock); - ttm_bo_unreserve(bo); + ttm_bo_unreserve_locked(bo); entry->reserved = false; - if (old_sync_obj) - driver->sync_obj_unref(&old_sync_obj); + } + spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->fence_lock); + + list_for_each_entry(entry, list, head) { + if (entry->old_sync_obj) + driver->sync_obj_unref(&entry->old_sync_obj); } } EXPORT_SYMBOL(ttm_eu_fence_buffer_objects); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index ca8131e98300..cfb9ca4ec1c4 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -909,6 +909,16 @@ extern int ttm_bo_reserve_locked(struct ttm_buffer_object *bo, */ extern void ttm_bo_unreserve(struct ttm_buffer_object *bo); +/** + * ttm_bo_unreserve_locked + * + * @bo: A pointer to a struct ttm_buffer_object. + * + * Unreserve a previous reservation of @bo. + * Needs to be called with struct ttm_bo_global::lru_lock held. + */ +extern void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo); + /** * ttm_bo_wait_unreserved * diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index fd09b8438977..535ab00407e0 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -44,6 +44,7 @@ * @reserved: Indicates whether @bo has been reserved for validation. * @removed: Indicates whether @bo has been removed from lru lists. * @put_count: Number of outstanding references on bo::list_kref. + * @old_sync_obj: Pointer to a sync object about to be unreferenced */ struct ttm_validate_buffer { @@ -53,6 +54,7 @@ struct ttm_validate_buffer { bool reserved; bool removed; int put_count; + void *old_sync_obj; }; /** From 65705962025df490d13df59ec57c5329d1bd0a16 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 17 Nov 2010 12:28:31 +0000 Subject: [PATCH 08/10] drm/ttm/vmwgfx: Have TTM manage the validation sequence. Rather than having the driver supply the validation sequence, leave that responsibility to TTM. This saves some confusion and a function argument. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 1 + drivers/gpu/drm/ttm/ttm_execbuf_util.c | 5 ++++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 1 - drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 3 +-- include/drm/ttm/ttm_bo_driver.h | 2 ++ include/drm/ttm/ttm_execbuf_util.h | 3 +-- 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 551a5d31cadf..25e4c2a1d1d8 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1539,6 +1539,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, bdev->dev_mapping = NULL; bdev->glob = glob; bdev->need_dma32 = need_dma32; + bdev->val_seq = 0; spin_lock_init(&bdev->fence_lock); mutex_lock(&glob->device_list_mutex); list_add_tail(&bdev->device_list, &glob->device_list); diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index b6da65cc502a..3832fe10b4df 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -126,11 +126,12 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation); * buffers in different orders. */ -int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq) +int ttm_eu_reserve_buffers(struct list_head *list) { struct ttm_bo_global *glob; struct ttm_validate_buffer *entry; int ret; + uint32_t val_seq; if (list_empty(list)) return 0; @@ -146,6 +147,8 @@ int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq) retry: spin_lock(&glob->lru_lock); + val_seq = entry->bo->bdev->val_seq++; + list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index e7a58d055041..10fc01f69c40 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -264,7 +264,6 @@ struct vmw_private { */ struct vmw_sw_context ctx; - uint32_t val_seq; struct mutex cmdbuf_mutex; /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 76954e3528c1..41b95ed6dbcd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -653,8 +653,7 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data, ret = vmw_cmd_check_all(dev_priv, sw_context, cmd, arg->command_size); if (unlikely(ret != 0)) goto out_err; - ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes, - dev_priv->val_seq++); + ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes); if (unlikely(ret != 0)) goto out_err; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index cfb9ca4ec1c4..e3b2e245db1b 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -515,6 +515,7 @@ struct ttm_bo_global { * @addr_space_mm: Range manager for the device address space. * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. + * @val_seq: Current validation sequence. * @nice_mode: Try nicely to wait for buffer idle when cleaning a manager. * If a GPU lockup has been detected, this is forced to 0. * @dev_mapping: A pointer to the struct address_space representing the @@ -544,6 +545,7 @@ struct ttm_bo_device { * Protected by the global:lru lock. */ struct list_head ddestroy; + uint32_t val_seq; /* * Protected by load / firstopen / lastclose /unload sync. diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index 535ab00407e0..26cc7f9ffa41 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -72,7 +72,6 @@ extern void ttm_eu_backoff_reservation(struct list_head *list); * function ttm_eu_reserve_buffers * * @list: thread private list of ttm_validate_buffer structs. - * @val_seq: A unique sequence number. * * Tries to reserve bos pointed to by the list entries for validation. * If the function returns 0, all buffers are marked as "unfenced", @@ -94,7 +93,7 @@ extern void ttm_eu_backoff_reservation(struct list_head *list); * has failed. */ -extern int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq); +extern int ttm_eu_reserve_buffers(struct list_head *list); /** * function ttm_eu_fence_buffer_objects. From eba67093f535322cb4f1c4b737319c0907a0c81d Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 11 Nov 2010 09:41:57 +0100 Subject: [PATCH 09/10] drm/ttm: Fix up io_mem_reserve / io_mem_free calling This patch attempts to fix up shortcomings with the current calling sequences. 1) There's a fastpath where no locking occurs and only io_mem_reserved is called to obtain needed info for mapping. The fastpath is set per memory type manager. 2) If the fastpath is disabled, io_mem_reserve and io_mem_free will be exactly balanced and not called recursively for the same struct ttm_mem_reg. 3) Optionally the driver can choose to enable a per memory type manager LRU eviction mechanism that, when io_mem_reserve returns -EAGAIN will attempt to kill user-space mappings of memory in that manager to free up needed resources Signed-off-by: Thomas Hellstrom Reviewed-by: Ben Skeggs Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 44 ++++++++-- drivers/gpu/drm/ttm/ttm_bo_util.c | 129 ++++++++++++++++++++++++++---- drivers/gpu/drm/ttm/ttm_bo_vm.c | 23 ++++-- include/drm/ttm/ttm_bo_api.h | 6 +- include/drm/ttm/ttm_bo_driver.h | 104 ++++++++++++------------ 5 files changed, 226 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 25e4c2a1d1d8..cf2ec562550e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -378,8 +378,13 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, int ret = 0; if (old_is_pci || new_is_pci || - ((mem->placement & bo->mem.placement & TTM_PL_MASK_CACHING) == 0)) - ttm_bo_unmap_virtual(bo); + ((mem->placement & bo->mem.placement & TTM_PL_MASK_CACHING) == 0)) { + ret = ttm_mem_io_lock(old_man, true); + if (unlikely(ret != 0)) + goto out_err; + ttm_bo_unmap_virtual_locked(bo); + ttm_mem_io_unlock(old_man); + } /* * Create and bind a ttm if required. @@ -466,7 +471,6 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) ttm_tt_destroy(bo->ttm); bo->ttm = NULL; } - ttm_bo_mem_put(bo, &bo->mem); atomic_set(&bo->reserved, 0); @@ -665,6 +669,7 @@ static void ttm_bo_release(struct kref *kref) struct ttm_buffer_object *bo = container_of(kref, struct ttm_buffer_object, kref); struct ttm_bo_device *bdev = bo->bdev; + struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; if (likely(bo->vm_node != NULL)) { rb_erase(&bo->vm_rb, &bdev->addr_space_rb); @@ -672,6 +677,9 @@ static void ttm_bo_release(struct kref *kref) bo->vm_node = NULL; } write_unlock(&bdev->vm_lock); + ttm_mem_io_lock(man, false); + ttm_mem_io_free_vm(bo); + ttm_mem_io_unlock(man); ttm_bo_cleanup_refs_or_queue(bo); kref_put(&bo->list_kref, ttm_bo_release_list); write_lock(&bdev->vm_lock); @@ -728,7 +736,8 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, evict_mem = bo->mem; evict_mem.mm_node = NULL; - evict_mem.bus.io_reserved = false; + evict_mem.bus.io_reserved_vm = false; + evict_mem.bus.io_reserved_count = 0; placement.fpfn = 0; placement.lpfn = 0; @@ -1065,7 +1074,8 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, mem.num_pages = bo->num_pages; mem.size = mem.num_pages << PAGE_SHIFT; mem.page_alignment = bo->mem.page_alignment; - mem.bus.io_reserved = false; + mem.bus.io_reserved_vm = false; + mem.bus.io_reserved_count = 0; /* * Determine where to move the buffer. */ @@ -1184,6 +1194,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev, INIT_LIST_HEAD(&bo->lru); INIT_LIST_HEAD(&bo->ddestroy); INIT_LIST_HEAD(&bo->swap); + INIT_LIST_HEAD(&bo->io_reserve_lru); bo->bdev = bdev; bo->glob = bdev->glob; bo->type = type; @@ -1193,7 +1204,8 @@ int ttm_bo_init(struct ttm_bo_device *bdev, bo->mem.num_pages = bo->num_pages; bo->mem.mm_node = NULL; bo->mem.page_alignment = page_alignment; - bo->mem.bus.io_reserved = false; + bo->mem.bus.io_reserved_vm = false; + bo->mem.bus.io_reserved_count = 0; bo->buffer_start = buffer_start & PAGE_MASK; bo->priv_flags = 0; bo->mem.placement = (TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED); @@ -1367,6 +1379,10 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, BUG_ON(type >= TTM_NUM_MEM_TYPES); man = &bdev->man[type]; BUG_ON(man->has_type); + man->io_reserve_fastpath = true; + man->use_io_reserve_lru = false; + mutex_init(&man->io_reserve_mutex); + INIT_LIST_HEAD(&man->io_reserve_lru); ret = bdev->driver->init_mem_type(bdev, type, man); if (ret) @@ -1574,7 +1590,7 @@ bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) return true; } -void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo) +void ttm_bo_unmap_virtual_locked(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; loff_t offset = (loff_t) bo->addr_space_offset; @@ -1583,8 +1599,20 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo) if (!bdev->dev_mapping) return; unmap_mapping_range(bdev->dev_mapping, offset, holelen, 1); - ttm_mem_io_free(bdev, &bo->mem); + ttm_mem_io_free_vm(bo); } + +void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo) +{ + struct ttm_bo_device *bdev = bo->bdev; + struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; + + ttm_mem_io_lock(man, false); + ttm_bo_unmap_virtual_locked(bo); + ttm_mem_io_unlock(man); +} + + EXPORT_SYMBOL(ttm_bo_unmap_virtual); static void ttm_bo_vm_insert_rb(struct ttm_buffer_object *bo) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 4b75133d6606..a89839f83f6c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -75,37 +75,123 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_move_ttm); -int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) +int ttm_mem_io_lock(struct ttm_mem_type_manager *man, bool interruptible) { + if (likely(man->io_reserve_fastpath)) + return 0; + + if (interruptible) + return mutex_lock_interruptible(&man->io_reserve_mutex); + + mutex_lock(&man->io_reserve_mutex); + return 0; +} + +void ttm_mem_io_unlock(struct ttm_mem_type_manager *man) +{ + if (likely(man->io_reserve_fastpath)) + return; + + mutex_unlock(&man->io_reserve_mutex); +} + +static int ttm_mem_io_evict(struct ttm_mem_type_manager *man) +{ + struct ttm_buffer_object *bo; + + if (!man->use_io_reserve_lru || list_empty(&man->io_reserve_lru)) + return -EAGAIN; + + bo = list_first_entry(&man->io_reserve_lru, + struct ttm_buffer_object, + io_reserve_lru); + list_del_init(&bo->io_reserve_lru); + ttm_bo_unmap_virtual_locked(bo); + + return 0; +} + +static int ttm_mem_io_reserve(struct ttm_bo_device *bdev, + struct ttm_mem_reg *mem) +{ + struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; + int ret = 0; + + if (!bdev->driver->io_mem_reserve) + return 0; + if (likely(man->io_reserve_fastpath)) + return bdev->driver->io_mem_reserve(bdev, mem); + + if (bdev->driver->io_mem_reserve && + mem->bus.io_reserved_count++ == 0) { +retry: + ret = bdev->driver->io_mem_reserve(bdev, mem); + if (ret == -EAGAIN) { + ret = ttm_mem_io_evict(man); + if (ret == 0) + goto retry; + } + } + return ret; +} + +static void ttm_mem_io_free(struct ttm_bo_device *bdev, + struct ttm_mem_reg *mem) +{ + struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; + + if (likely(man->io_reserve_fastpath)) + return; + + if (bdev->driver->io_mem_reserve && + --mem->bus.io_reserved_count == 0 && + bdev->driver->io_mem_free) + bdev->driver->io_mem_free(bdev, mem); + +} + +int ttm_mem_io_reserve_vm(struct ttm_buffer_object *bo) +{ + struct ttm_mem_reg *mem = &bo->mem; int ret; - if (!mem->bus.io_reserved) { - mem->bus.io_reserved = true; - ret = bdev->driver->io_mem_reserve(bdev, mem); + if (!mem->bus.io_reserved_vm) { + struct ttm_mem_type_manager *man = + &bo->bdev->man[mem->mem_type]; + + ret = ttm_mem_io_reserve(bo->bdev, mem); if (unlikely(ret != 0)) return ret; + mem->bus.io_reserved_vm = true; + if (man->use_io_reserve_lru) + list_add_tail(&bo->io_reserve_lru, + &man->io_reserve_lru); } return 0; } -void ttm_mem_io_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) +void ttm_mem_io_free_vm(struct ttm_buffer_object *bo) { - if (bdev->driver->io_mem_reserve) { - if (mem->bus.io_reserved) { - mem->bus.io_reserved = false; - bdev->driver->io_mem_free(bdev, mem); - } + struct ttm_mem_reg *mem = &bo->mem; + + if (mem->bus.io_reserved_vm) { + mem->bus.io_reserved_vm = false; + list_del_init(&bo->io_reserve_lru); + ttm_mem_io_free(bo->bdev, mem); } } int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem, void **virtual) { + struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; int ret; void *addr; *virtual = NULL; + (void) ttm_mem_io_lock(man, false); ret = ttm_mem_io_reserve(bdev, mem); + ttm_mem_io_unlock(man); if (ret || !mem->bus.is_iomem) return ret; @@ -117,7 +203,9 @@ int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem, else addr = ioremap_nocache(mem->bus.base + mem->bus.offset, mem->bus.size); if (!addr) { + (void) ttm_mem_io_lock(man, false); ttm_mem_io_free(bdev, mem); + ttm_mem_io_unlock(man); return -ENOMEM; } } @@ -134,7 +222,9 @@ void ttm_mem_reg_iounmap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem, if (virtual && mem->bus.addr == NULL) iounmap(virtual); + (void) ttm_mem_io_lock(man, false); ttm_mem_io_free(bdev, mem); + ttm_mem_io_unlock(man); } static int ttm_copy_io_page(void *dst, void *src, unsigned long page) @@ -231,7 +321,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_mem_type_manager *man = &bdev->man[new_mem->mem_type]; struct ttm_tt *ttm = bo->ttm; struct ttm_mem_reg *old_mem = &bo->mem; - struct ttm_mem_reg old_copy = *old_mem; + struct ttm_mem_reg old_copy; void *old_iomap; void *new_iomap; int ret; @@ -281,7 +371,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, mb(); out2: ttm_bo_free_old_node(bo); - + old_copy = *old_mem; *old_mem = *new_mem; new_mem->mm_node = NULL; @@ -292,7 +382,7 @@ out2: } out1: - ttm_mem_reg_iounmap(bdev, new_mem, new_iomap); + ttm_mem_reg_iounmap(bdev, old_mem, new_iomap); out: ttm_mem_reg_iounmap(bdev, &old_copy, old_iomap); return ret; @@ -341,6 +431,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, INIT_LIST_HEAD(&fbo->ddestroy); INIT_LIST_HEAD(&fbo->lru); INIT_LIST_HEAD(&fbo->swap); + INIT_LIST_HEAD(&fbo->io_reserve_lru); fbo->vm_node = NULL; atomic_set(&fbo->cpu_writers, 0); @@ -452,6 +543,8 @@ int ttm_bo_kmap(struct ttm_buffer_object *bo, unsigned long start_page, unsigned long num_pages, struct ttm_bo_kmap_obj *map) { + struct ttm_mem_type_manager *man = + &bo->bdev->man[bo->mem.mem_type]; unsigned long offset, size; int ret; @@ -466,7 +559,9 @@ int ttm_bo_kmap(struct ttm_buffer_object *bo, if (num_pages > 1 && !DRM_SUSER(DRM_CURPROC)) return -EPERM; #endif + (void) ttm_mem_io_lock(man, false); ret = ttm_mem_io_reserve(bo->bdev, &bo->mem); + ttm_mem_io_unlock(man); if (ret) return ret; if (!bo->mem.bus.is_iomem) { @@ -481,12 +576,15 @@ EXPORT_SYMBOL(ttm_bo_kmap); void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map) { + struct ttm_buffer_object *bo = map->bo; + struct ttm_mem_type_manager *man = + &bo->bdev->man[bo->mem.mem_type]; + if (!map->virtual) return; switch (map->bo_kmap_type) { case ttm_bo_map_iomap: iounmap(map->virtual); - ttm_mem_io_free(map->bo->bdev, &map->bo->mem); break; case ttm_bo_map_vmap: vunmap(map->virtual); @@ -499,6 +597,9 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map) default: BUG(); } + (void) ttm_mem_io_lock(man, false); + ttm_mem_io_free(map->bo->bdev, &map->bo->mem); + ttm_mem_io_unlock(man); map->virtual = NULL; map->page = NULL; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 8dd446cb778e..221b924acebe 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -83,6 +83,8 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) int i; unsigned long address = (unsigned long)vmf->virtual_address; int retval = VM_FAULT_NOPAGE; + struct ttm_mem_type_manager *man = + &bdev->man[bo->mem.mem_type]; /* * Work around locking order reversal in fault / nopfn @@ -130,12 +132,16 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } else spin_unlock(&bdev->fence_lock); - - ret = ttm_mem_io_reserve(bdev, &bo->mem); - if (ret) { - retval = VM_FAULT_SIGBUS; + ret = ttm_mem_io_lock(man, true); + if (unlikely(ret != 0)) { + retval = VM_FAULT_NOPAGE; goto out_unlock; } + ret = ttm_mem_io_reserve_vm(bo); + if (unlikely(ret != 0)) { + retval = VM_FAULT_SIGBUS; + goto out_io_unlock; + } page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) + bo->vm_node->start - vma->vm_pgoff; @@ -144,7 +150,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (unlikely(page_offset >= bo->num_pages)) { retval = VM_FAULT_SIGBUS; - goto out_unlock; + goto out_io_unlock; } /* @@ -182,7 +188,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) page = ttm_tt_get_page(ttm, page_offset); if (unlikely(!page && i == 0)) { retval = VM_FAULT_OOM; - goto out_unlock; + goto out_io_unlock; } else if (unlikely(!page)) { break; } @@ -200,14 +206,15 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) else if (unlikely(ret != 0)) { retval = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; - goto out_unlock; + goto out_io_unlock; } address += PAGE_SIZE; if (unlikely(++page_offset >= page_last)) break; } - +out_io_unlock: + ttm_mem_io_unlock(man); out_unlock: ttm_bo_unreserve(bo); return retval; diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index edacd483c59c..50852aad260a 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -74,6 +74,8 @@ struct ttm_placement { * @is_iomem: is this io memory ? * @size: size in byte * @offset: offset from the base address + * @io_reserved_vm: The VM system has a refcount in @io_reserved_count + * @io_reserved_count: Refcounting the numbers of callers to ttm_mem_io_reserve * * Structure indicating the bus placement of an object. */ @@ -83,7 +85,8 @@ struct ttm_bus_placement { unsigned long size; unsigned long offset; bool is_iomem; - bool io_reserved; + bool io_reserved_vm; + uint64_t io_reserved_count; }; @@ -235,6 +238,7 @@ struct ttm_buffer_object { struct list_head lru; struct list_head ddestroy; struct list_head swap; + struct list_head io_reserve_lru; uint32_t val_seq; bool seq_valid; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index e3b2e245db1b..1da8af6ac884 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -179,30 +179,6 @@ struct ttm_tt { #define TTM_MEMTYPE_FLAG_MAPPABLE (1 << 1) /* Memory mappable */ #define TTM_MEMTYPE_FLAG_CMA (1 << 3) /* Can't map aperture */ -/** - * struct ttm_mem_type_manager - * - * @has_type: The memory type has been initialized. - * @use_type: The memory type is enabled. - * @flags: TTM_MEMTYPE_XX flags identifying the traits of the memory - * managed by this memory type. - * @gpu_offset: If used, the GPU offset of the first managed page of - * fixed memory or the first managed location in an aperture. - * @size: Size of the managed region. - * @available_caching: A mask of available caching types, TTM_PL_FLAG_XX, - * as defined in ttm_placement_common.h - * @default_caching: The default caching policy used for a buffer object - * placed in this memory type if the user doesn't provide one. - * @manager: The range manager used for this memory type. FIXME: If the aperture - * has a page size different from the underlying system, the granularity - * of this manager should take care of this. But the range allocating code - * in ttm_bo.c needs to be modified for this. - * @lru: The lru list for this memory type. - * - * This structure is used to identify and manage memory types for a device. - * It's set up by the ttm_bo_driver::init_mem_type method. - */ - struct ttm_mem_type_manager; struct ttm_mem_type_manager_func { @@ -287,6 +263,36 @@ struct ttm_mem_type_manager_func { void (*debug)(struct ttm_mem_type_manager *man, const char *prefix); }; +/** + * struct ttm_mem_type_manager + * + * @has_type: The memory type has been initialized. + * @use_type: The memory type is enabled. + * @flags: TTM_MEMTYPE_XX flags identifying the traits of the memory + * managed by this memory type. + * @gpu_offset: If used, the GPU offset of the first managed page of + * fixed memory or the first managed location in an aperture. + * @size: Size of the managed region. + * @available_caching: A mask of available caching types, TTM_PL_FLAG_XX, + * as defined in ttm_placement_common.h + * @default_caching: The default caching policy used for a buffer object + * placed in this memory type if the user doesn't provide one. + * @func: structure pointer implementing the range manager. See above + * @priv: Driver private closure for @func. + * @io_reserve_mutex: Mutex optionally protecting shared io_reserve structures + * @use_io_reserve_lru: Use an lru list to try to unreserve io_mem_regions + * reserved by the TTM vm system. + * @io_reserve_lru: Optional lru list for unreserving io mem regions. + * @io_reserve_fastpath: Only use bdev::driver::io_mem_reserve to obtain + * static information. bdev::driver::io_mem_free is never used. + * @lru: The lru list for this memory type. + * + * This structure is used to identify and manage memory types for a device. + * It's set up by the ttm_bo_driver::init_mem_type method. + */ + + + struct ttm_mem_type_manager { struct ttm_bo_device *bdev; @@ -303,6 +309,15 @@ struct ttm_mem_type_manager { uint32_t default_caching; const struct ttm_mem_type_manager_func *func; void *priv; + struct mutex io_reserve_mutex; + bool use_io_reserve_lru; + bool io_reserve_fastpath; + + /* + * Protected by @io_reserve_mutex: + */ + + struct list_head io_reserve_lru; /* * Protected by the global->lru_lock. @@ -758,31 +773,6 @@ extern void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo, extern int ttm_bo_wait_cpu(struct ttm_buffer_object *bo, bool no_wait); -/** - * ttm_bo_pci_offset - Get the PCI offset for the buffer object memory. - * - * @bo Pointer to a struct ttm_buffer_object. - * @bus_base On return the base of the PCI region - * @bus_offset On return the byte offset into the PCI region - * @bus_size On return the byte size of the buffer object or zero if - * the buffer object memory is not accessible through a PCI region. - * - * Returns: - * -EINVAL if the buffer object is currently not mappable. - * 0 otherwise. - */ - -extern int ttm_bo_pci_offset(struct ttm_bo_device *bdev, - struct ttm_mem_reg *mem, - unsigned long *bus_base, - unsigned long *bus_offset, - unsigned long *bus_size); - -extern int ttm_mem_io_reserve(struct ttm_bo_device *bdev, - struct ttm_mem_reg *mem); -extern void ttm_mem_io_free(struct ttm_bo_device *bdev, - struct ttm_mem_reg *mem); - extern void ttm_bo_global_release(struct drm_global_reference *ref); extern int ttm_bo_global_init(struct drm_global_reference *ref); @@ -814,6 +804,22 @@ extern int ttm_bo_device_init(struct ttm_bo_device *bdev, */ extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo); +/** + * ttm_bo_unmap_virtual + * + * @bo: tear down the virtual mappings for this BO + * + * The caller must take ttm_mem_io_lock before calling this function. + */ +extern void ttm_bo_unmap_virtual_locked(struct ttm_buffer_object *bo); + +extern int ttm_mem_io_reserve_vm(struct ttm_buffer_object *bo); +extern void ttm_mem_io_free_vm(struct ttm_buffer_object *bo); +extern int ttm_mem_io_lock(struct ttm_mem_type_manager *man, + bool interruptible); +extern void ttm_mem_io_unlock(struct ttm_mem_type_manager *man); + + /** * ttm_bo_reserve: * From 147666fb3b93b8c484f562da33a37f886ddff768 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 17 Nov 2010 12:38:32 +0000 Subject: [PATCH 10/10] drm/radeon: Use the ttm execbuf utilities Rather than re-implementing in the Radeon driver, Use the execbuf / cs / pushbuf utilities that comes with TTM. This comes with an even greater benefit now that many spinlocks have been optimized away... Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon.h | 4 +- drivers/gpu/drm/radeon/radeon_cs.c | 17 ++++---- drivers/gpu/drm/radeon/radeon_object.c | 55 ++------------------------ drivers/gpu/drm/radeon/radeon_object.h | 3 -- 4 files changed, 16 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 3a7095743d44..b1e073b7381f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -69,6 +69,7 @@ #include #include #include +#include #include "radeon_family.h" #include "radeon_mode.h" @@ -259,13 +260,12 @@ struct radeon_bo { }; struct radeon_bo_list { - struct list_head list; + struct ttm_validate_buffer tv; struct radeon_bo *bo; uint64_t gpu_offset; unsigned rdomain; unsigned wdomain; u32 tiling_flags; - bool reserved; }; /* diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 6d64a2705f12..35b5eb8fbe2a 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -77,13 +77,13 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs_ptr[i] = &p->relocs[i]; p->relocs[i].robj = p->relocs[i].gobj->driver_private; p->relocs[i].lobj.bo = p->relocs[i].robj; - p->relocs[i].lobj.rdomain = r->read_domains; p->relocs[i].lobj.wdomain = r->write_domain; + p->relocs[i].lobj.rdomain = r->read_domains; + p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].handle = r->handle; p->relocs[i].flags = r->flags; - INIT_LIST_HEAD(&p->relocs[i].lobj.list); radeon_bo_list_add_object(&p->relocs[i].lobj, - &p->validated); + &p->validated); } } return radeon_bo_list_validate(&p->validated); @@ -189,10 +189,13 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) { unsigned i; - if (!error && parser->ib) { - radeon_bo_list_fence(&parser->validated, parser->ib->fence); - } - radeon_bo_list_unreserve(&parser->validated); + + if (!error && parser->ib) + ttm_eu_fence_buffer_objects(&parser->validated, + parser->ib->fence); + else + ttm_eu_backoff_reservation(&parser->validated); + if (parser->relocs != NULL) { for (i = 0; i < parser->nrelocs; i++) { if (parser->relocs[i].gobj) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index e939cb6a91cc..a8594d289bcf 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -293,34 +293,9 @@ void radeon_bo_list_add_object(struct radeon_bo_list *lobj, struct list_head *head) { if (lobj->wdomain) { - list_add(&lobj->list, head); + list_add(&lobj->tv.head, head); } else { - list_add_tail(&lobj->list, head); - } -} - -int radeon_bo_list_reserve(struct list_head *head) -{ - struct radeon_bo_list *lobj; - int r; - - list_for_each_entry(lobj, head, list){ - r = radeon_bo_reserve(lobj->bo, false); - if (unlikely(r != 0)) - return r; - lobj->reserved = true; - } - return 0; -} - -void radeon_bo_list_unreserve(struct list_head *head) -{ - struct radeon_bo_list *lobj; - - list_for_each_entry(lobj, head, list) { - /* only unreserve object we successfully reserved */ - if (lobj->reserved && radeon_bo_is_reserved(lobj->bo)) - radeon_bo_unreserve(lobj->bo); + list_add_tail(&lobj->tv.head, head); } } @@ -331,14 +306,11 @@ int radeon_bo_list_validate(struct list_head *head) u32 domain; int r; - list_for_each_entry(lobj, head, list) { - lobj->reserved = false; - } - r = radeon_bo_list_reserve(head); + r = ttm_eu_reserve_buffers(head); if (unlikely(r != 0)) { return r; } - list_for_each_entry(lobj, head, list) { + list_for_each_entry(lobj, head, tv.head) { bo = lobj->bo; if (!bo->pin_count) { domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain; @@ -361,25 +333,6 @@ int radeon_bo_list_validate(struct list_head *head) return 0; } -void radeon_bo_list_fence(struct list_head *head, void *fence) -{ - struct radeon_bo_list *lobj; - struct radeon_bo *bo; - struct radeon_fence *old_fence = NULL; - - list_for_each_entry(lobj, head, list) { - bo = lobj->bo; - spin_lock(&bo->tbo.bdev->fence_lock); - old_fence = (struct radeon_fence *)bo->tbo.sync_obj; - bo->tbo.sync_obj = radeon_fence_ref(fence); - bo->tbo.sync_obj_arg = NULL; - spin_unlock(&bo->tbo.bdev->fence_lock); - if (old_fence) { - radeon_fence_unref(&old_fence); - } - } -} - int radeon_bo_fbdev_mmap(struct radeon_bo *bo, struct vm_area_struct *vma) { diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index fd536751f865..22d4c237dea5 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -152,10 +152,7 @@ extern int radeon_bo_init(struct radeon_device *rdev); extern void radeon_bo_fini(struct radeon_device *rdev); extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, struct list_head *head); -extern int radeon_bo_list_reserve(struct list_head *head); -extern void radeon_bo_list_unreserve(struct list_head *head); extern int radeon_bo_list_validate(struct list_head *head); -extern void radeon_bo_list_fence(struct list_head *head, void *fence); extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, struct vm_area_struct *vma); extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo,