linux/drivers/gpu/drm/msm/msm_ringbuffer.c

133 lines
3.0 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2013 Red Hat
* Author: Rob Clark <robdclark@gmail.com>
*/
#include "msm_ringbuffer.h"
#include "msm_gpu.h"
static uint num_hw_submissions = 8;
MODULE_PARM_DESC(num_hw_submissions, "The max # of jobs to write into ringbuffer (default 8)");
module_param(num_hw_submissions, uint, 0600);
static struct dma_fence *msm_job_run(struct drm_sched_job *job)
{
struct msm_gem_submit *submit = to_msm_submit(job);
struct msm_fence_context *fctx = submit->ring->fctx;
struct msm_gpu *gpu = submit->gpu;
struct msm_drm_private *priv = gpu->dev->dev_private;
int i;
msm_fence_init(submit->hw_fence, fctx);
drm/msm/gpu: Push gpu lock down past runpm Avoid holding gpu lock when calling runpm, to avoid this lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 6.4.3-debug+ #14 Not tainted ------------------------------------------------------ ring0/373 is trying to acquire lock: ffffffead86efb98 (prepare_lock){+.+.}-{3:3}, at: clk_prepare_lock+0x70/0x98 but task is already holding lock: ffffff809cd19170 (&gpu->lock){+.+.}-{3:3}, at: msm_job_run+0x7c/0x128 [msm] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (&gpu->lock){+.+.}-{3:3}: __mutex_lock+0xc8/0x388 mutex_lock_nested+0x2c/0x38 msm_job_run+0x7c/0x128 [msm] drm_sched_main+0x264/0x354 [gpu_sched] kthread+0xf0/0x100 ret_from_fork+0x10/0x20 -> #3 (dma_fence_map){++++}-{0:0}: __dma_fence_might_wait+0x74/0xc0 dma_resv_lockdep+0x1f0/0x2e8 do_one_initcall+0xb4/0x214 kernel_init_freeable+0x338/0x33c kernel_init+0x30/0x134 ret_from_fork+0x10/0x20 -> #2 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}: fs_reclaim_acquire+0x7c/0x9c slab_pre_alloc_hook.constprop.0+0x40/0x250 __kmem_cache_alloc_node+0x60/0x18c kmalloc_node_trace+0x40/0x84 alloc_worker+0x2c/0x64 init_rescuer+0x34/0xe0 workqueue_init+0x168/0x1fc kernel_init_freeable+0x15c/0x33c kernel_init+0x30/0x134 ret_from_fork+0x10/0x20 -> #1 (fs_reclaim){+.+.}-{0:0}: __fs_reclaim_acquire+0x3c/0x48 fs_reclaim_acquire+0x50/0x9c slab_pre_alloc_hook.constprop.0+0x40/0x250 __kmem_cache_alloc_node+0x60/0x18c kmalloc_trace+0x44/0x88 clk_rcg2_dfs_determine_rate+0x60/0x214 clk_core_determine_round_nolock+0xb8/0xf0 clk_core_round_rate_nolock+0x84/0x118 clk_core_round_rate_nolock+0xd8/0x118 clk_round_rate+0x6c/0xd0 geni_se_clk_tbl_get+0x78/0xc0 geni_se_clk_freq_match+0x44/0xe4 get_spi_clk_cfg+0x50/0xf4 geni_spi_set_clock_and_bw+0x54/0x104 spi_geni_prepare_message+0x130/0x174 __spi_pump_transfer_message+0x200/0x4d8 __spi_sync+0x13c/0x23c spi_sync_locked+0x18/0x24 do_cros_ec_pkt_xfer_spi+0x124/0x3f0 cros_ec_xfer_high_pri_work+0x28/0x3c kthread_worker_fn+0x14c/0x27c kthread+0xf0/0x100 ret_from_fork+0x10/0x20 -> #0 (prepare_lock){+.+.}-{3:3}: __lock_acquire+0xdf8/0x109c lock_acquire+0x234/0x284 __mutex_lock+0xc8/0x388 mutex_lock_nested+0x2c/0x38 clk_prepare_lock+0x70/0x98 clk_prepare+0x24/0x50 clk_bulk_prepare+0x50/0x9c a6xx_gmu_resume+0x94/0x800 [msm] a6xx_gmu_pm_resume+0x38/0x158 [msm] adreno_runtime_resume+0x2c/0x38 [msm] pm_generic_runtime_resume+0x30/0x44 __rpm_callback+0x4c/0x134 rpm_callback+0x78/0x7c rpm_resume+0x3a4/0x46c __pm_runtime_resume+0x78/0xbc pm_runtime_get_sync.isra.0+0x14/0x20 [msm] msm_gpu_submit+0x4c/0x12c [msm] msm_job_run+0x88/0x128 [msm] drm_sched_main+0x264/0x354 [gpu_sched] kthread+0xf0/0x100 ret_from_fork+0x10/0x20 other info that might help us debug this: Chain exists of: prepare_lock --> dma_fence_map --> &gpu->lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&gpu->lock); lock(dma_fence_map); lock(&gpu->lock); lock(prepare_lock); *** DEADLOCK *** 2 locks held by ring0/373: #0: ffffffead875ae50 (dma_fence_map){++++}-{0:0}, at: drm_sched_main+0x54/0x354 [gpu_sched] #1: ffffff809cd19170 (&gpu->lock){+.+.}-{3:3}, at: msm_job_run+0x7c/0x128 [msm] stack backtrace: CPU: 2 PID: 373 Comm: ring0 Not tainted 6.4.3-debug+ #14 Hardware name: Google Villager (rev1+) with LTE (DT) Call trace: dump_backtrace+0xb4/0xf0 show_stack+0x20/0x30 dump_stack_lvl+0x60/0x84 dump_stack+0x18/0x24 print_circular_bug+0x1cc/0x234 check_noncircular+0x78/0xac __lock_acquire+0xdf8/0x109c lock_acquire+0x234/0x284 __mutex_lock+0xc8/0x388 mutex_lock_nested+0x2c/0x38 clk_prepare_lock+0x70/0x98 clk_prepare+0x24/0x50 clk_bulk_prepare+0x50/0x9c a6xx_gmu_resume+0x94/0x800 [msm] a6xx_gmu_pm_resume+0x38/0x158 [msm] adreno_runtime_resume+0x2c/0x38 [msm] pm_generic_runtime_resume+0x30/0x44 __rpm_callback+0x4c/0x134 rpm_callback+0x78/0x7c rpm_resume+0x3a4/0x46c __pm_runtime_resume+0x78/0xbc pm_runtime_get_sync.isra.0+0x14/0x20 [msm] msm_gpu_submit+0x4c/0x12c [msm] msm_job_run+0x88/0x128 [msm] drm_sched_main+0x264/0x354 [gpu_sched] kthread+0xf0/0x100 ret_from_fork+0x10/0x20 Signed-off-by: Rob Clark <robdclark@chromium.org> Patchwork: https://patchwork.freedesktop.org/patch/552298/
2023-08-10 14:31:41 -07:00
submit->seqno = submit->hw_fence->seqno;
mutex_lock(&priv->lru.lock);
for (i = 0; i < submit->nr_bos; i++) {
struct drm_gem_object *obj = submit->bos[i].obj;
msm_gem_unpin_active(obj);
submit->bos[i].flags &= ~BO_PINNED;
}
mutex_unlock(&priv->lru.lock);
msm_gpu_submit(gpu, submit);
return dma_fence_get(submit->hw_fence);
}
static void msm_job_free(struct drm_sched_job *job)
{
struct msm_gem_submit *submit = to_msm_submit(job);
drm_sched_job_cleanup(job);
msm_gem_submit_put(submit);
}
static const struct drm_sched_backend_ops msm_sched_ops = {
.run_job = msm_job_run,
.free_job = msm_job_free
};
struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
void *memptrs, uint64_t memptrs_iova)
{
struct msm_ringbuffer *ring;
long sched_timeout;
char name[32];
int ret;
/* We assume everwhere that MSM_GPU_RINGBUFFER_SZ is a power of 2 */
BUILD_BUG_ON(!is_power_of_2(MSM_GPU_RINGBUFFER_SZ));
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
if (!ring) {
ret = -ENOMEM;
goto fail;
}
ring->gpu = gpu;
ring->id = id;
ring->start = msm_gem_kernel_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ,
check_apriv(gpu, MSM_BO_WC | MSM_BO_GPU_READONLY),
gpu->aspace, &ring->bo, &ring->iova);
if (IS_ERR(ring->start)) {
ret = PTR_ERR(ring->start);
ring->start = NULL;
goto fail;
}
msm_gem_object_set_name(ring->bo, "ring%d", id);
ring->end = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2);
ring->next = ring->start;
ring->cur = ring->start;
ring->memptrs = memptrs;
ring->memptrs_iova = memptrs_iova;
/* currently managing hangcheck ourselves: */
sched_timeout = MAX_SCHEDULE_TIMEOUT;
ret = drm_sched_init(&ring->sched, &msm_sched_ops,
drm/sched: Convert the GPU scheduler to variable number of run-queues The GPU scheduler has now a variable number of run-queues, which are set up at drm_sched_init() time. This way, each driver announces how many run-queues it requires (supports) per each GPU scheduler it creates. Note, that run-queues correspond to scheduler "priorities", thus if the number of run-queues is set to 1 at drm_sched_init(), then that scheduler supports a single run-queue, i.e. single "priority". If a driver further sets a single entity per run-queue, then this creates a 1-to-1 correspondence between a scheduler and a scheduled entity. Cc: Lucas Stach <l.stach@pengutronix.de> Cc: Russell King <linux+etnaviv@armlinux.org.uk> Cc: Qiang Yu <yuq825@gmail.com> Cc: Rob Clark <robdclark@gmail.com> Cc: Abhinav Kumar <quic_abhinavk@quicinc.com> Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> Cc: Danilo Krummrich <dakr@redhat.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Boris Brezillon <boris.brezillon@collabora.com> Cc: Alex Deucher <alexander.deucher@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Emma Anholt <emma@anholt.net> Cc: etnaviv@lists.freedesktop.org Cc: lima@lists.freedesktop.org Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Luben Tuikov <luben.tuikov@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Link: https://lore.kernel.org/r/20231023032251.164775-1-luben.tuikov@amd.com
2023-10-14 21:15:35 -04:00
DRM_SCHED_PRIORITY_COUNT,
num_hw_submissions, 0, sched_timeout,
NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
if (ret) {
goto fail;
}
INIT_LIST_HEAD(&ring->submits);
spin_lock_init(&ring->submit_lock);
spin_lock_init(&ring->preempt_lock);
snprintf(name, sizeof(name), "gpu-ring-%d", ring->id);
ring->fctx = msm_fence_context_alloc(gpu->dev, &ring->memptrs->fence, name);
return ring;
fail:
msm_ringbuffer_destroy(ring);
return ERR_PTR(ret);
}
void msm_ringbuffer_destroy(struct msm_ringbuffer *ring)
{
if (IS_ERR_OR_NULL(ring))
return;
drm_sched_fini(&ring->sched);
msm_fence_context_free(ring->fctx);
msm_gem_kernel_put(ring->bo, ring->gpu->aspace);
kfree(ring);
}