From 4f1cb5875ca0e9386ff2f6545cd386d47ab8441b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Apr 2019 08:56:27 +0100 Subject: [PATCH 0001/1815] drm/i915: Verify workarounds immediately after application Immediately after writing the workaround, verify that it stuck in the register. References: https://bugs.freedesktop.org/show_bug.cgi?id=108954 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190417075657.19456-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_workarounds.c | 32 +++++++++++++----------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index ccaf63679435..ea9292ee755a 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -913,6 +913,20 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) return fw; } +static bool +wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) +{ + if ((cur ^ wa->val) & wa->mask) { + DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", + name, from, i915_mmio_reg_offset(wa->reg), cur, + cur & wa->mask, wa->val, wa->mask); + + return false; + } + + return true; +} + static void wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) { @@ -931,6 +945,10 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + wa_verify(wa, + intel_uncore_read_fw(uncore, wa->reg), + wal->name, "application"); } intel_uncore_forcewake_put__locked(uncore, fw); @@ -942,20 +960,6 @@ void intel_gt_apply_workarounds(struct drm_i915_private *i915) wa_list_apply(&i915->uncore, &i915->gt_wa_list); } -static bool -wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) -{ - if ((cur ^ wa->val) & wa->mask) { - DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", - name, from, i915_mmio_reg_offset(wa->reg), cur, - cur & wa->mask, wa->val, wa->mask); - - return false; - } - - return true; -} - static bool wa_list_verify(struct intel_uncore *uncore, const struct i915_wa_list *wal, const char *from) From 254e11864a36a1d3b362bf5727e89382f1540015 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Apr 2019 08:56:28 +0100 Subject: [PATCH 0002/1815] drm/i915: Verify the engine workarounds stick on application Read the engine workarounds back using the GPU after loading the initial context state to verify that we are setting them correctly, and bail if it fails. v2: Break out the verification into its own loop Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190417075657.19456-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 21 +++ drivers/gpu/drm/i915/intel_workarounds.c | 120 ++++++++++++++++++ drivers/gpu/drm/i915/intel_workarounds.h | 2 + .../drm/i915/selftests/intel_workarounds.c | 53 +------- 4 files changed, 149 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0a818a60ad31..a5412323fee1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4842,6 +4842,23 @@ static void i915_gem_fini_scratch(struct drm_i915_private *i915) i915_vma_unpin_and_release(&i915->gt.scratch, 0); } +static int intel_engines_verify_workarounds(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return 0; + + for_each_engine(engine, i915, id) { + if (intel_engine_verify_workarounds(engine, "load")) + err = -EIO; + } + + return err; +} + int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; @@ -4927,6 +4944,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_init_clock_gating(dev_priv); + ret = intel_engines_verify_workarounds(dev_priv); + if (ret) + goto err_init_hw; + ret = __intel_engines_record_defaults(dev_priv); if (ret) goto err_init_hw; diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index ea9292ee755a..89e2c603e34b 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -1259,6 +1259,126 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine) wa_list_apply(engine->uncore, &engine->wa_list); } +static struct i915_vma * +create_scratch(struct i915_address_space *vm, int count) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int size; + int err; + + size = round_up(count * sizeof(u32), PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, + i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); + if (err) + goto err_obj; + + return vma; + +err_obj: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int +wa_list_srm(struct i915_request *rq, + const struct i915_wa_list *wal, + struct i915_vma *vma) +{ + const struct i915_wa *wa; + unsigned int i; + u32 srm, *cs; + + srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + if (INTEL_GEN(rq->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * wal->count); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(wa->reg); + *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; + *cs++ = 0; + } + intel_ring_advance(rq, cs); + + return 0; +} + +static int engine_wa_list_verify(struct intel_engine_cs *engine, + const struct i915_wa_list * const wal, + const char *from) +{ + const struct i915_wa *wa; + struct i915_request *rq; + struct i915_vma *vma; + unsigned int i; + u32 *results; + int err; + + if (!wal->count) + return 0; + + vma = create_scratch(&engine->i915->ggtt.vm, wal->count); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + rq = i915_request_alloc(engine, engine->kernel_context->gem_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma; + } + + err = wa_list_srm(rq, wal, vma); + if (err) + goto err_vma; + + i915_request_add(rq); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + err = -ETIME; + goto err_vma; + } + + results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(results)) { + err = PTR_ERR(results); + goto err_vma; + } + + err = 0; + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + if (!wa_verify(wa, results[i], wal->name, from)) + err = -ENXIO; + + i915_gem_object_unpin_map(vma->obj); + +err_vma: + i915_vma_unpin(vma); + i915_vma_put(vma); + return err; +} + +int intel_engine_verify_workarounds(struct intel_engine_cs *engine, + const char *from) +{ + return engine_wa_list_verify(engine, &engine->wa_list, from); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/intel_workarounds.c" #endif diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h index 34eee5ec511e..fdf7ebb90f28 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ b/drivers/gpu/drm/i915/intel_workarounds.h @@ -30,5 +30,7 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine); void intel_engine_init_workarounds(struct intel_engine_cs *engine); void intel_engine_apply_workarounds(struct intel_engine_cs *engine); +int intel_engine_verify_workarounds(struct intel_engine_cs *engine, + const char *from); #endif diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 567b6f8dae86..a363748a7a4f 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -340,49 +340,6 @@ out: return err; } -static struct i915_vma *create_scratch(struct i915_gem_context *ctx) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - void *ptr; - int err; - - obj = i915_gem_object_create_internal(ctx->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - - ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(ptr)) { - err = PTR_ERR(ptr); - goto err_obj; - } - memset(ptr, 0xc5, PAGE_SIZE); - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err_obj; - - err = i915_gem_object_set_to_cpu_domain(obj, false); - if (err) - goto err_obj; - - return vma; - -err_obj: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - static struct i915_vma *create_batch(struct i915_gem_context *ctx) { struct drm_i915_gem_object *obj; @@ -475,7 +432,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, int err = 0, i, v; u32 *cs, *results; - scratch = create_scratch(ctx); + scratch = create_scratch(&ctx->ppgtt->vm, 2 * ARRAY_SIZE(values) + 1); if (IS_ERR(scratch)) return PTR_ERR(scratch); @@ -752,9 +709,11 @@ static bool verify_gt_engine_wa(struct drm_i915_private *i915, ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str); - for_each_engine(engine, i915, id) - ok &= wa_list_verify(engine->uncore, - &lists->engine[id].wa_list, str); + for_each_engine(engine, i915, id) { + ok &= engine_wa_list_verify(engine, + &lists->engine[id].wa_list, + str) == 0; + } return ok; } From 769f0dab622c58e3158fc55d761b62a61e7fa2e5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Apr 2019 08:56:29 +0100 Subject: [PATCH 0003/1815] drm/i915: Make workaround verification *optional* Sometimes the HW doesn't even play fair, and completely forgets about register writes. Skip verifying known troublemakers. References: https://bugs.freedesktop.org/show_bug.cgi?id=108954 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190417075657.19456-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_workarounds.c | 40 ++++++++++++++----- .../gpu/drm/i915/intel_workarounds_types.h | 7 ++-- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 89e2c603e34b..b3cbed1ee1c9 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -122,6 +122,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) wal->wa_count++; wa_->val |= wa->val; wa_->mask |= wa->mask; + wa_->read |= wa->read; return; } } @@ -146,9 +147,10 @@ wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) { struct i915_wa wa = { - .reg = reg, + .reg = reg, .mask = mask, - .val = val + .val = val, + .read = mask, }; _wa_add(wal, &wa); @@ -172,6 +174,19 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) wa_write_masked_or(wal, reg, val, val); } +static void +ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) +{ + struct i915_wa wa = { + .reg = reg, + .mask = mask, + .val = val, + /* Bonkers HW, skip verifying */ + }; + + _wa_add(wal, &wa); +} + #define WA_SET_BIT_MASKED(addr, mask) \ wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) @@ -916,10 +931,11 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) static bool wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) { - if ((cur ^ wa->val) & wa->mask) { + if ((cur ^ wa->val) & wa->read) { DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", - name, from, i915_mmio_reg_offset(wa->reg), cur, - cur & wa->mask, wa->val, wa->mask); + name, from, i915_mmio_reg_offset(wa->reg), + cur, cur & wa->read, + wa->val, wa->mask); return false; } @@ -1122,9 +1138,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); /* WaPipelineFlushCoherentLines:icl */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES); + ignore_wa_write_or(wal, + GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES, + GEN8_LQSC_FLUSH_COHERENT_LINES); /* * Wa_1405543622:icl @@ -1151,9 +1168,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1405733216:icl * Formerly known as WaDisableCleanEvicts */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN11_LQSC_CLEAN_EVICT_DISABLE); + ignore_wa_write_or(wal, + GEN8_L3SQCREG4, + GEN11_LQSC_CLEAN_EVICT_DISABLE, + GEN11_LQSC_CLEAN_EVICT_DISABLE); /* WaForwardProgressSoftReset:icl */ wa_write_or(wal, diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/intel_workarounds_types.h index 30918da180ff..42ac1fb99572 100644 --- a/drivers/gpu/drm/i915/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/intel_workarounds_types.h @@ -12,9 +12,10 @@ #include "i915_reg.h" struct i915_wa { - i915_reg_t reg; - u32 mask; - u32 val; + i915_reg_t reg; + u32 mask; + u32 val; + u32 read; }; struct i915_wa_list { From 99534023490686ce4453c45e5cb813535b9bff95 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 Apr 2019 14:25:07 +0100 Subject: [PATCH 0004/1815] drm/i915: Avoid use-after-free in reporting create.size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have to avoid chasing after a userspace race! <3>[ 473.114328] BUG: KASAN: use-after-free in i915_gem_create+0x1d2/0x1f0 [i915] <3>[ 473.114389] Read of size 8 at addr ffff88815bf1d840 by task gem_flink_race/1541 <4>[ 473.114464] CPU: 1 PID: 1541 Comm: gem_flink_race Tainted: G U 5.1.0-rc4-g7d07e025e786-kasan_88+ #1 <4>[ 473.114469] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016 <4>[ 473.114474] Call Trace: <4>[ 473.114488] dump_stack+0x7c/0xbb <4>[ 473.114612] ? i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.114621] print_address_description+0x65/0x270 <4>[ 473.114728] ? i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.114839] ? i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.114848] kasan_report+0x149/0x18d <4>[ 473.114962] ? i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.115069] i915_gem_create+0x1d2/0x1f0 [i915] <4>[ 473.115176] ? i915_gem_object_create.part.28+0x4b0/0x4b0 [i915] <4>[ 473.115289] ? i915_gem_dumb_create+0x1a0/0x1a0 [i915] <4>[ 473.115297] drm_ioctl_kernel+0x192/0x260 <4>[ 473.115306] ? drm_ioctl_permit+0x280/0x280 <4>[ 473.115326] drm_ioctl+0x67c/0x960 <4>[ 473.115438] ? i915_gem_dumb_create+0x1a0/0x1a0 [i915] <4>[ 473.115448] ? drm_getstats+0x20/0x20 <4>[ 473.115459] ? __lock_acquire+0xa66/0x3fe0 <4>[ 473.115474] ? _raw_spin_unlock_irqrestore+0x39/0x60 <4>[ 473.115485] ? debug_object_active_state+0x2ea/0x4e0 <4>[ 473.115496] ? debug_show_all_locks+0x2d0/0x2d0 <4>[ 473.115513] do_vfs_ioctl+0x18d/0xfa0 <4>[ 473.115522] ? check_flags.part.27+0x440/0x440 <4>[ 473.115532] ? ioctl_preallocate+0x1a0/0x1a0 <4>[ 473.115547] ? __fget+0x2ac/0x410 <4>[ 473.115561] ? __ia32_sys_dup3+0xb0/0xb0 <4>[ 473.115569] ? rwlock_bug.part.0+0x90/0x90 <4>[ 473.115590] ksys_ioctl+0x35/0x70 <4>[ 473.115597] ? lockdep_hardirqs_off+0x1cb/0x2b0 <4>[ 473.115608] __x64_sys_ioctl+0x6a/0xb0 <4>[ 473.115614] ? lockdep_hardirqs_on+0x342/0x590 <4>[ 473.115623] do_syscall_64+0x97/0x400 <4>[ 473.115633] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 473.115641] RIP: 0033:0x7fce590d55d7 <4>[ 473.115649] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48 <4>[ 473.115655] RSP: 002b:00007fce4d525ba8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 <4>[ 473.115662] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fce590d55d7 <4>[ 473.115667] RDX: 00007fce4d525c10 RSI: 00000000c010645b RDI: 0000000000000007 <4>[ 473.115672] RBP: 00007fce4d525c10 R08: 00007fce4d526700 R09: 00007fce4d526700 <4>[ 473.115677] R10: 0000000000000054 R11: 0000000000000246 R12: 00000000c010645b <4>[ 473.115682] R13: 0000000000000007 R14: 0000000000000000 R15: 00007ffe0e4a7450 <3>[ 473.115731] Allocated by task 1541: <4>[ 473.115766] kmem_cache_alloc+0xce/0x290 <4>[ 473.115895] i915_gem_object_create.part.28+0x1c/0x4b0 [i915] <4>[ 473.116000] i915_gem_create+0xe3/0x1f0 [i915] <4>[ 473.116008] drm_ioctl_kernel+0x192/0x260 <4>[ 473.116013] drm_ioctl+0x67c/0x960 <4>[ 473.116020] do_vfs_ioctl+0x18d/0xfa0 <4>[ 473.116026] ksys_ioctl+0x35/0x70 <4>[ 473.116032] __x64_sys_ioctl+0x6a/0xb0 <4>[ 473.116038] do_syscall_64+0x97/0x400 <4>[ 473.116044] entry_SYSCALL_64_after_hwframe+0x49/0xbe <3>[ 473.116071] Freed by task 1542: <4>[ 473.116101] kmem_cache_free+0xb7/0x2f0 <4>[ 473.116205] __i915_gem_free_objects+0x7d4/0xe10 [i915] <4>[ 473.116311] i915_gem_create_ioctl+0xaa/0xd0 [i915] <4>[ 473.116318] drm_ioctl_kernel+0x192/0x260 <4>[ 473.116323] drm_ioctl+0x67c/0x960 <4>[ 473.116330] do_vfs_ioctl+0x18d/0xfa0 <4>[ 473.116335] ksys_ioctl+0x35/0x70 <4>[ 473.116341] __x64_sys_ioctl+0x6a/0xb0 <4>[ 473.116347] do_syscall_64+0x97/0x400 <4>[ 473.116354] entry_SYSCALL_64_after_hwframe+0x49/0xbe Testcase: igt/gem_flink_race/flink_close Fixes: e163484afa8d ("drm/i915: Update size upon return from GEM_CREATE") Signed-off-by: Chris Wilson Cc: MichaƂ Winiarski Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190417132507.27133-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a5412323fee1..e5462639de0b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -647,7 +647,7 @@ i915_gem_create(struct drm_file *file, return ret; *handle_p = handle; - *size_p = obj->base.size; + *size_p = size; return 0; } From ad408c766cef7ebaa9b74f828db50e7642388581 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 18 Apr 2019 15:05:09 +0200 Subject: [PATCH 0005/1815] drm/sun4i: Use DRM_GEM_CMA_VMAP_DRIVER_OPS for GEM operations Our driver makes a typical use of CMA, with GEM object allocated as GEM CMA objects. Use DRM_GEM_CMA_VMAP_DRIVER_OPS to describe the ops instead of duplicating them. Because DRM_GEM_CMA_VMAP_DRIVER_OPS implements a gem_create_object op which sets per-object funcs (drm_cma_gem_default_funcs), we can also get rid of free_object_unlocked and gem_vm_ops, which are superseded by the object funcs. Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190418130509.3569-1-paul.kocialkowski@bootlin.com --- drivers/gpu/drm/sun4i/sun4i_drv.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 3ebd9f5e2719..c8c0ab3da972 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -52,22 +52,8 @@ static struct drm_driver sun4i_drv_driver = { .minor = 0, /* GEM Operations */ + DRM_GEM_CMA_VMAP_DRIVER_OPS, .dumb_create = drm_sun4i_gem_dumb_create, - .gem_free_object_unlocked = drm_gem_cma_free_object, - .gem_vm_ops = &drm_gem_cma_vm_ops, - - /* PRIME Operations */ - .prime_handle_to_fd = drm_gem_prime_handle_to_fd, - .prime_fd_to_handle = drm_gem_prime_fd_to_handle, - .gem_prime_import = drm_gem_prime_import, - .gem_prime_export = drm_gem_prime_export, - .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, - .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, - .gem_prime_vmap = drm_gem_cma_prime_vmap, - .gem_prime_vunmap = drm_gem_cma_prime_vunmap, - .gem_prime_mmap = drm_gem_cma_prime_mmap, - - /* Frame Buffer Operations */ }; static int sun4i_drv_bind(struct device *dev) From dfe2c8ed23d7524dd363e1941039da63e3982e98 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 Apr 2019 14:27:19 +0100 Subject: [PATCH 0006/1815] drm/i915: Stop overwriting RING_IMR in rcs resume We store the engine->imr mask and set up the RING_IMR register on restarting the engine. We do not then want to overwrite it with an incomplete mask later as we may then lose interrupts! Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190418132720.3716-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 029fd8ec1857..00bd9eeb053d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -873,9 +873,6 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN_RANGE(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - if (INTEL_GEN(dev_priv) >= 6) - ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); - return 0; } From 26ddc068de47e2a7cbbd06c915dca7a0dc22c499 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 Apr 2019 14:27:20 +0100 Subject: [PATCH 0007/1815] drm/i915: Setup the RCS ring prior to execution We need to set the various ring registers prior to restarting the engine, or else we may restart it after reset/resume in an ill-defined state. Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190418132720.3716-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 00bd9eeb053d..3844581f622c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -831,9 +831,6 @@ static int intel_rcs_ctx_init(struct i915_request *rq) static int init_render_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - int ret = init_ring_common(engine); - if (ret) - return ret; /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ if (IS_GEN_RANGE(dev_priv, 4, 6)) @@ -873,7 +870,7 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN_RANGE(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - return 0; + return init_ring_common(engine); } static void cancel_requests(struct intel_engine_cs *engine) From d4c3022a23d2f56057b67e9711199e68a1615567 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 16 Apr 2019 15:58:52 -0700 Subject: [PATCH 0008/1815] drm/v3d: Switch the type of job-> to reduce casting. All consumers wanted drm_gem_object * now. Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20190416225856.20264-2-eric@anholt.net Acked-by: Rob Clark --- drivers/gpu/drm/v3d/v3d_drv.h | 4 ++-- drivers/gpu/drm/v3d/v3d_gem.c | 42 +++++++++++++---------------------- 2 files changed, 17 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index e9d4a2fdcf44..67c323e781f9 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -189,7 +189,7 @@ struct v3d_exec_info { struct kref refcount; /* This is the array of BOs that were looked up at the start of exec. */ - struct v3d_bo **bo; + struct drm_gem_object **bo; u32 bo_count; /* List of overflow BOs used in the job that need to be @@ -217,7 +217,7 @@ struct v3d_tfu_job { struct kref refcount; /* This is the array of BOs that were looked up at the start of exec. */ - struct v3d_bo *bo[4]; + struct drm_gem_object *bo[4]; }; /** diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 93ff8fcbe475..aa0397d12847 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -194,27 +194,17 @@ v3d_invalidate_caches(struct v3d_dev *v3d) } static void -v3d_attach_object_fences(struct v3d_bo **bos, int bo_count, +v3d_attach_object_fences(struct drm_gem_object **bos, int bo_count, struct dma_fence *fence) { int i; for (i = 0; i < bo_count; i++) { /* XXX: Use shared fences for read-only objects. */ - reservation_object_add_excl_fence(bos[i]->base.base.resv, - fence); + reservation_object_add_excl_fence(bos[i]->resv, fence); } } -static void -v3d_unlock_bo_reservations(struct v3d_bo **bos, - int bo_count, - struct ww_acquire_ctx *acquire_ctx) -{ - drm_gem_unlock_reservations((struct drm_gem_object **)bos, bo_count, - acquire_ctx); -} - /* Takes the reservation lock on all the BOs being referenced, so that * at queue submit time we can update the reservations. * @@ -223,14 +213,13 @@ v3d_unlock_bo_reservations(struct v3d_bo **bos, * to v3d, so we don't attach dma-buf fences to them. */ static int -v3d_lock_bo_reservations(struct v3d_bo **bos, +v3d_lock_bo_reservations(struct drm_gem_object **bos, int bo_count, struct ww_acquire_ctx *acquire_ctx) { int i, ret; - ret = drm_gem_lock_reservations((struct drm_gem_object **)bos, - bo_count, acquire_ctx); + ret = drm_gem_lock_reservations(bos, bo_count, acquire_ctx); if (ret) return ret; @@ -238,11 +227,10 @@ v3d_lock_bo_reservations(struct v3d_bo **bos, * before we commit the CL to the hardware. */ for (i = 0; i < bo_count; i++) { - ret = reservation_object_reserve_shared(bos[i]->base.base.resv, - 1); + ret = reservation_object_reserve_shared(bos[i]->resv, 1); if (ret) { - v3d_unlock_bo_reservations(bos, bo_count, - acquire_ctx); + drm_gem_unlock_reservations(bos, bo_count, + acquire_ctx); return ret; } } @@ -319,7 +307,7 @@ v3d_cl_lookup_bos(struct drm_device *dev, goto fail; } drm_gem_object_get(bo); - exec->bo[i] = to_v3d_bo(bo); + exec->bo[i] = bo; } spin_unlock(&file_priv->table_lock); @@ -347,7 +335,7 @@ v3d_exec_cleanup(struct kref *ref) dma_fence_put(exec->render_done_fence); for (i = 0; i < exec->bo_count; i++) - drm_gem_object_put_unlocked(&exec->bo[i]->base.base); + drm_gem_object_put_unlocked(exec->bo[i]); kvfree(exec->bo); list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) { @@ -378,7 +366,7 @@ v3d_tfu_job_cleanup(struct kref *ref) for (i = 0; i < ARRAY_SIZE(job->bo); i++) { if (job->bo[i]) - drm_gem_object_put_unlocked(&job->bo[i]->base.base); + drm_gem_object_put_unlocked(job->bo[i]); } pm_runtime_mark_last_busy(v3d->dev); @@ -532,7 +520,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, v3d_attach_object_fences(exec->bo, exec->bo_count, exec->render_done_fence); - v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx); + drm_gem_unlock_reservations(exec->bo, exec->bo_count, &acquire_ctx); /* Update the return sync object for the */ sync_out = drm_syncobj_find(file_priv, args->out_sync); @@ -547,7 +535,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, fail_unreserve: mutex_unlock(&v3d->sched_lock); - v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx); + drm_gem_unlock_reservations(exec->bo, exec->bo_count, &acquire_ctx); fail: v3d_exec_put(exec); @@ -616,7 +604,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, goto fail; } drm_gem_object_get(bo); - job->bo[bo_count] = to_v3d_bo(bo); + job->bo[bo_count] = bo; } spin_unlock(&file_priv->table_lock); @@ -639,7 +627,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, v3d_attach_object_fences(job->bo, bo_count, sched_done_fence); - v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx); + drm_gem_unlock_reservations(job->bo, bo_count, &acquire_ctx); /* Update the return sync object */ sync_out = drm_syncobj_find(file_priv, args->out_sync); @@ -655,7 +643,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, fail_unreserve: mutex_unlock(&v3d->sched_lock); - v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx); + drm_gem_unlock_reservations(job->bo, bo_count, &acquire_ctx); fail: v3d_tfu_job_put(job); From a783a09ee76d6259296dc6aeea2b6884fa526980 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 16 Apr 2019 15:58:53 -0700 Subject: [PATCH 0009/1815] drm/v3d: Refactor job management. The CL submission had two jobs embedded in an exec struct. When I added TFU support, I had to replicate some of the exec stuff and some of the job stuff. As I went to add CSD, it became clear that actually what was in exec should just be in the two CL jobs, and it would let us share a lot more code between the 4 queues. v2: Fix missing error path in TFU ioctl's bo[] allocation. Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20190416225856.20264-3-eric@anholt.net Acked-by: Rob Clark --- drivers/gpu/drm/v3d/v3d_drv.h | 93 ++++---- drivers/gpu/drm/v3d/v3d_gem.c | 375 ++++++++++++++++---------------- drivers/gpu/drm/v3d/v3d_irq.c | 8 +- drivers/gpu/drm/v3d/v3d_sched.c | 285 +++++++++++++----------- 4 files changed, 406 insertions(+), 355 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 67c323e781f9..f82f8be04bd8 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -67,8 +67,8 @@ struct v3d_dev { struct work_struct overflow_mem_work; - struct v3d_exec_info *bin_job; - struct v3d_exec_info *render_job; + struct v3d_bin_job *bin_job; + struct v3d_render_job *render_job; struct v3d_tfu_job *tfu_job; struct v3d_queue_state queue[V3D_MAX_QUEUES]; @@ -117,7 +117,7 @@ struct v3d_bo { struct drm_mm_node node; /* List entry for the BO's position in - * v3d_exec_info->unref_list + * v3d_render_job->unref_list */ struct list_head unref_head; }; @@ -157,7 +157,15 @@ to_v3d_fence(struct dma_fence *fence) struct v3d_job { struct drm_sched_job base; - struct v3d_exec_info *exec; + struct kref refcount; + + struct v3d_dev *v3d; + + /* This is the array of BOs that were looked up at the start + * of submission. + */ + struct drm_gem_object **bo; + u32 bo_count; /* An optional fence userspace can pass in for the job to depend on. */ struct dma_fence *in_fence; @@ -165,59 +173,53 @@ struct v3d_job { /* v3d fence to be signaled by IRQ handler when the job is complete. */ struct dma_fence *irq_fence; + /* scheduler fence for when the job is considered complete and + * the BO reservations can be released. + */ + struct dma_fence *done_fence; + + /* Callback for the freeing of the job on refcount going to 0. */ + void (*free)(struct kref *ref); +}; + +struct v3d_bin_job { + struct v3d_job base; + /* GPU virtual addresses of the start/end of the CL job. */ u32 start, end; u32 timedout_ctca, timedout_ctra; -}; -struct v3d_exec_info { - struct v3d_dev *v3d; - - struct v3d_job bin, render; - - /* Fence for when the scheduler considers the binner to be - * done, for render to depend on. - */ - struct dma_fence *bin_done_fence; - - /* Fence for when the scheduler considers the render to be - * done, for when the BOs reservations should be complete. - */ - struct dma_fence *render_done_fence; - - struct kref refcount; - - /* This is the array of BOs that were looked up at the start of exec. */ - struct drm_gem_object **bo; - u32 bo_count; - - /* List of overflow BOs used in the job that need to be - * released once the job is complete. - */ - struct list_head unref_list; + /* Corresponding render job, for attaching our overflow memory. */ + struct v3d_render_job *render; /* Submitted tile memory allocation start/size, tile state. */ u32 qma, qms, qts; }; +struct v3d_render_job { + struct v3d_job base; + + /* Optional fence for the binner, to depend on before starting + * our job. + */ + struct dma_fence *bin_done_fence; + + /* GPU virtual addresses of the start/end of the CL job. */ + u32 start, end; + + u32 timedout_ctca, timedout_ctra; + + /* List of overflow BOs used in the job that need to be + * released once the job is complete. + */ + struct list_head unref_list; +}; + struct v3d_tfu_job { - struct drm_sched_job base; + struct v3d_job base; struct drm_v3d_submit_tfu args; - - /* An optional fence userspace can pass in for the job to depend on. */ - struct dma_fence *in_fence; - - /* v3d fence to be signaled by IRQ handler when the job is complete. */ - struct dma_fence *irq_fence; - - struct v3d_dev *v3d; - - struct kref refcount; - - /* This is the array of BOs that were looked up at the start of exec. */ - struct drm_gem_object *bo[4]; }; /** @@ -283,8 +285,7 @@ int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -void v3d_exec_put(struct v3d_exec_info *exec); -void v3d_tfu_job_put(struct v3d_tfu_job *exec); +void v3d_job_put(struct v3d_job *job); void v3d_reset(struct v3d_dev *v3d); void v3d_invalidate_caches(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index aa0397d12847..350a269a7b58 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -193,18 +193,6 @@ v3d_invalidate_caches(struct v3d_dev *v3d) v3d_invalidate_slices(v3d, 0); } -static void -v3d_attach_object_fences(struct drm_gem_object **bos, int bo_count, - struct dma_fence *fence) -{ - int i; - - for (i = 0; i < bo_count; i++) { - /* XXX: Use shared fences for read-only objects. */ - reservation_object_add_excl_fence(bos[i]->resv, fence); - } -} - /* Takes the reservation lock on all the BOs being referenced, so that * at queue submit time we can update the reservations. * @@ -239,11 +227,11 @@ v3d_lock_bo_reservations(struct drm_gem_object **bos, } /** - * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects + * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects * referenced by the job. * @dev: DRM device * @file_priv: DRM file for this fd - * @exec: V3D job being set up + * @job: V3D job being set up * * The command validator needs to reference BOs by their index within * the submitted job's BO list. This does the validation of the job's @@ -253,18 +241,19 @@ v3d_lock_bo_reservations(struct drm_gem_object **bos, * failure, because that will happen at v3d_exec_cleanup() time. */ static int -v3d_cl_lookup_bos(struct drm_device *dev, - struct drm_file *file_priv, - struct drm_v3d_submit_cl *args, - struct v3d_exec_info *exec) +v3d_lookup_bos(struct drm_device *dev, + struct drm_file *file_priv, + struct v3d_job *job, + u64 bo_handles, + u32 bo_count) { u32 *handles; int ret = 0; int i; - exec->bo_count = args->bo_handle_count; + job->bo_count = bo_count; - if (!exec->bo_count) { + if (!job->bo_count) { /* See comment on bo_index for why we have to check * this. */ @@ -272,15 +261,15 @@ v3d_cl_lookup_bos(struct drm_device *dev, return -EINVAL; } - exec->bo = kvmalloc_array(exec->bo_count, - sizeof(struct drm_gem_cma_object *), - GFP_KERNEL | __GFP_ZERO); - if (!exec->bo) { + job->bo = kvmalloc_array(job->bo_count, + sizeof(struct drm_gem_cma_object *), + GFP_KERNEL | __GFP_ZERO); + if (!job->bo) { DRM_DEBUG("Failed to allocate validated BO pointers\n"); return -ENOMEM; } - handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL); + handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL); if (!handles) { ret = -ENOMEM; DRM_DEBUG("Failed to allocate incoming GEM handles\n"); @@ -288,15 +277,15 @@ v3d_cl_lookup_bos(struct drm_device *dev, } if (copy_from_user(handles, - (void __user *)(uintptr_t)args->bo_handles, - exec->bo_count * sizeof(u32))) { + (void __user *)(uintptr_t)bo_handles, + job->bo_count * sizeof(u32))) { ret = -EFAULT; DRM_DEBUG("Failed to copy in GEM handles\n"); goto fail; } spin_lock(&file_priv->table_lock); - for (i = 0; i < exec->bo_count; i++) { + for (i = 0; i < job->bo_count; i++) { struct drm_gem_object *bo = idr_find(&file_priv->object_idr, handles[i]); if (!bo) { @@ -307,7 +296,7 @@ v3d_cl_lookup_bos(struct drm_device *dev, goto fail; } drm_gem_object_get(bo); - exec->bo[i] = bo; + job->bo[i] = bo; } spin_unlock(&file_priv->table_lock); @@ -317,67 +306,44 @@ fail: } static void -v3d_exec_cleanup(struct kref *ref) +v3d_job_free(struct kref *ref) { - struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info, - refcount); - struct v3d_dev *v3d = exec->v3d; - unsigned int i; - struct v3d_bo *bo, *save; + struct v3d_job *job = container_of(ref, struct v3d_job, refcount); + int i; - dma_fence_put(exec->bin.in_fence); - dma_fence_put(exec->render.in_fence); - - dma_fence_put(exec->bin.irq_fence); - dma_fence_put(exec->render.irq_fence); - - dma_fence_put(exec->bin_done_fence); - dma_fence_put(exec->render_done_fence); - - for (i = 0; i < exec->bo_count; i++) - drm_gem_object_put_unlocked(exec->bo[i]); - kvfree(exec->bo); - - list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) { - drm_gem_object_put_unlocked(&bo->base.base); - } - - pm_runtime_mark_last_busy(v3d->dev); - pm_runtime_put_autosuspend(v3d->dev); - - kfree(exec); -} - -void v3d_exec_put(struct v3d_exec_info *exec) -{ - kref_put(&exec->refcount, v3d_exec_cleanup); -} - -static void -v3d_tfu_job_cleanup(struct kref *ref) -{ - struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job, - refcount); - struct v3d_dev *v3d = job->v3d; - unsigned int i; - - dma_fence_put(job->in_fence); - dma_fence_put(job->irq_fence); - - for (i = 0; i < ARRAY_SIZE(job->bo); i++) { + for (i = 0; i < job->bo_count; i++) { if (job->bo[i]) drm_gem_object_put_unlocked(job->bo[i]); } + kvfree(job->bo); - pm_runtime_mark_last_busy(v3d->dev); - pm_runtime_put_autosuspend(v3d->dev); + dma_fence_put(job->in_fence); + dma_fence_put(job->irq_fence); + dma_fence_put(job->done_fence); + + pm_runtime_mark_last_busy(job->v3d->dev); + pm_runtime_put_autosuspend(job->v3d->dev); kfree(job); } -void v3d_tfu_job_put(struct v3d_tfu_job *job) +static void +v3d_render_job_free(struct kref *ref) { - kref_put(&job->refcount, v3d_tfu_job_cleanup); + struct v3d_render_job *job = container_of(ref, struct v3d_render_job, + base.refcount); + struct v3d_bo *bo, *save; + + list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { + drm_gem_object_put_unlocked(&bo->base.base); + } + + v3d_job_free(ref); +} + +void v3d_job_put(struct v3d_job *job) +{ + kref_put(&job->refcount, job->free); } int @@ -413,6 +379,77 @@ v3d_wait_bo_ioctl(struct drm_device *dev, void *data, return ret; } +static int +v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, + struct v3d_job *job, void (*free)(struct kref *ref), + u32 in_sync) +{ + int ret; + + job->v3d = v3d; + job->free = free; + + ret = pm_runtime_get_sync(v3d->dev); + if (ret < 0) + return ret; + + ret = drm_syncobj_find_fence(file_priv, in_sync, 0, 0, &job->in_fence); + if (ret == -EINVAL) { + pm_runtime_put_autosuspend(v3d->dev); + return ret; + } + + kref_init(&job->refcount); + + return 0; +} + +static int +v3d_push_job(struct v3d_file_priv *v3d_priv, + struct v3d_job *job, enum v3d_queue queue) +{ + int ret; + + ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], + v3d_priv); + if (ret) + return ret; + + job->done_fence = dma_fence_get(&job->base.s_fence->finished); + + /* put by scheduler job completion */ + kref_get(&job->refcount); + + drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[queue]); + + return 0; +} + +static void +v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, + struct v3d_job *job, + struct ww_acquire_ctx *acquire_ctx, + u32 out_sync) +{ + struct drm_syncobj *sync_out; + int i; + + for (i = 0; i < job->bo_count; i++) { + /* XXX: Use shared fences for read-only objects. */ + reservation_object_add_excl_fence(job->bo[i]->resv, + job->done_fence); + } + + drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); + + /* Update the return sync object for the job */ + sync_out = drm_syncobj_find(file_priv, out_sync); + if (sync_out) { + drm_syncobj_replace_fence(sync_out, job->done_fence); + drm_syncobj_put(sync_out); + } +} + /** * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. * @dev: DRM device @@ -432,9 +469,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, struct v3d_dev *v3d = to_v3d_dev(dev); struct v3d_file_priv *v3d_priv = file_priv->driver_priv; struct drm_v3d_submit_cl *args = data; - struct v3d_exec_info *exec; + struct v3d_bin_job *bin = NULL; + struct v3d_render_job *render; struct ww_acquire_ctx acquire_ctx; - struct drm_syncobj *sync_out; int ret = 0; trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); @@ -444,100 +481,83 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); - if (!exec) + render = kcalloc(1, sizeof(*render), GFP_KERNEL); + if (!render) return -ENOMEM; - ret = pm_runtime_get_sync(v3d->dev); - if (ret < 0) { - kfree(exec); + render->start = args->rcl_start; + render->end = args->rcl_end; + INIT_LIST_HEAD(&render->unref_list); + + ret = v3d_job_init(v3d, file_priv, &render->base, + v3d_render_job_free, args->in_sync_rcl); + if (ret) { + kfree(render); return ret; } - kref_init(&exec->refcount); + if (args->bcl_start != args->bcl_end) { + bin = kcalloc(1, sizeof(*bin), GFP_KERNEL); + if (!bin) + return -ENOMEM; - ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl, - 0, 0, &exec->bin.in_fence); - if (ret == -EINVAL) - goto fail; + ret = v3d_job_init(v3d, file_priv, &bin->base, + v3d_job_free, args->in_sync_bcl); + if (ret) { + v3d_job_put(&render->base); + return ret; + } - ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl, - 0, 0, &exec->render.in_fence); - if (ret == -EINVAL) - goto fail; + bin->start = args->bcl_start; + bin->end = args->bcl_end; + bin->qma = args->qma; + bin->qms = args->qms; + bin->qts = args->qts; + bin->render = render; + } - exec->qma = args->qma; - exec->qms = args->qms; - exec->qts = args->qts; - exec->bin.exec = exec; - exec->bin.start = args->bcl_start; - exec->bin.end = args->bcl_end; - exec->render.exec = exec; - exec->render.start = args->rcl_start; - exec->render.end = args->rcl_end; - exec->v3d = v3d; - INIT_LIST_HEAD(&exec->unref_list); - - ret = v3d_cl_lookup_bos(dev, file_priv, args, exec); + ret = v3d_lookup_bos(dev, file_priv, &render->base, + args->bo_handles, args->bo_handle_count); if (ret) goto fail; - ret = v3d_lock_bo_reservations(exec->bo, exec->bo_count, + ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count, &acquire_ctx); if (ret) goto fail; mutex_lock(&v3d->sched_lock); - if (exec->bin.start != exec->bin.end) { - ret = drm_sched_job_init(&exec->bin.base, - &v3d_priv->sched_entity[V3D_BIN], - v3d_priv); + if (bin) { + ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN); if (ret) goto fail_unreserve; - exec->bin_done_fence = - dma_fence_get(&exec->bin.base.s_fence->finished); - - kref_get(&exec->refcount); /* put by scheduler job completion */ - drm_sched_entity_push_job(&exec->bin.base, - &v3d_priv->sched_entity[V3D_BIN]); + render->bin_done_fence = dma_fence_get(bin->base.done_fence); } - ret = drm_sched_job_init(&exec->render.base, - &v3d_priv->sched_entity[V3D_RENDER], - v3d_priv); + ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER); if (ret) goto fail_unreserve; - - exec->render_done_fence = - dma_fence_get(&exec->render.base.s_fence->finished); - - kref_get(&exec->refcount); /* put by scheduler job completion */ - drm_sched_entity_push_job(&exec->render.base, - &v3d_priv->sched_entity[V3D_RENDER]); mutex_unlock(&v3d->sched_lock); - v3d_attach_object_fences(exec->bo, exec->bo_count, - exec->render_done_fence); + v3d_attach_fences_and_unlock_reservation(file_priv, + &render->base, &acquire_ctx, + args->out_sync); - drm_gem_unlock_reservations(exec->bo, exec->bo_count, &acquire_ctx); - - /* Update the return sync object for the */ - sync_out = drm_syncobj_find(file_priv, args->out_sync); - if (sync_out) { - drm_syncobj_replace_fence(sync_out, exec->render_done_fence); - drm_syncobj_put(sync_out); - } - - v3d_exec_put(exec); + if (bin) + v3d_job_put(&bin->base); + v3d_job_put(&render->base); return 0; fail_unreserve: mutex_unlock(&v3d->sched_lock); - drm_gem_unlock_reservations(exec->bo, exec->bo_count, &acquire_ctx); + drm_gem_unlock_reservations(render->base.bo, + render->base.bo_count, &acquire_ctx); fail: - v3d_exec_put(exec); + if (bin) + v3d_job_put(&bin->base); + v3d_job_put(&render->base); return ret; } @@ -560,10 +580,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, struct drm_v3d_submit_tfu *args = data; struct v3d_tfu_job *job; struct ww_acquire_ctx acquire_ctx; - struct drm_syncobj *sync_out; - struct dma_fence *sched_done_fence; int ret = 0; - int bo_count; trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); @@ -571,81 +588,71 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, if (!job) return -ENOMEM; - ret = pm_runtime_get_sync(v3d->dev); - if (ret < 0) { + ret = v3d_job_init(v3d, file_priv, &job->base, + v3d_job_free, args->in_sync); + if (ret) { kfree(job); return ret; } - kref_init(&job->refcount); - - ret = drm_syncobj_find_fence(file_priv, args->in_sync, - 0, 0, &job->in_fence); - if (ret == -EINVAL) - goto fail; + job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), + sizeof(*job->base.bo), GFP_KERNEL); + if (!job->base.bo) { + v3d_job_put(&job->base); + return -ENOMEM; + } job->args = *args; - job->v3d = v3d; spin_lock(&file_priv->table_lock); - for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) { + for (job->base.bo_count = 0; + job->base.bo_count < ARRAY_SIZE(args->bo_handles); + job->base.bo_count++) { struct drm_gem_object *bo; - if (!args->bo_handles[bo_count]) + if (!args->bo_handles[job->base.bo_count]) break; bo = idr_find(&file_priv->object_idr, - args->bo_handles[bo_count]); + args->bo_handles[job->base.bo_count]); if (!bo) { DRM_DEBUG("Failed to look up GEM BO %d: %d\n", - bo_count, args->bo_handles[bo_count]); + job->base.bo_count, + args->bo_handles[job->base.bo_count]); ret = -ENOENT; spin_unlock(&file_priv->table_lock); goto fail; } drm_gem_object_get(bo); - job->bo[bo_count] = bo; + job->base.bo[job->base.bo_count] = bo; } spin_unlock(&file_priv->table_lock); - ret = v3d_lock_bo_reservations(job->bo, bo_count, &acquire_ctx); + ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count, + &acquire_ctx); if (ret) goto fail; mutex_lock(&v3d->sched_lock); - ret = drm_sched_job_init(&job->base, - &v3d_priv->sched_entity[V3D_TFU], - v3d_priv); + ret = v3d_push_job(v3d_priv, &job->base, V3D_TFU); if (ret) goto fail_unreserve; - - sched_done_fence = dma_fence_get(&job->base.s_fence->finished); - - kref_get(&job->refcount); /* put by scheduler job completion */ - drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]); mutex_unlock(&v3d->sched_lock); - v3d_attach_object_fences(job->bo, bo_count, sched_done_fence); + v3d_attach_fences_and_unlock_reservation(file_priv, + &job->base, &acquire_ctx, + args->out_sync); - drm_gem_unlock_reservations(job->bo, bo_count, &acquire_ctx); - - /* Update the return sync object */ - sync_out = drm_syncobj_find(file_priv, args->out_sync); - if (sync_out) { - drm_syncobj_replace_fence(sync_out, sched_done_fence); - drm_syncobj_put(sync_out); - } - dma_fence_put(sched_done_fence); - - v3d_tfu_job_put(job); + v3d_job_put(&job->base); return 0; fail_unreserve: mutex_unlock(&v3d->sched_lock); - drm_gem_unlock_reservations(job->bo, bo_count, &acquire_ctx); + drm_gem_unlock_reservations(job->base.bo, job->base.bo_count, + &acquire_ctx); fail: - v3d_tfu_job_put(job); + v3d_job_put(&job->base); return ret; } @@ -703,7 +710,7 @@ v3d_gem_destroy(struct drm_device *dev) v3d_sched_fini(v3d); - /* Waiting for exec to finish would need to be done before + /* Waiting for jobs to finish would need to be done before * unregistering V3D. */ WARN_ON(v3d->bin_job); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index aa0a180ae700..ce373ffd6380 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -62,7 +62,7 @@ v3d_overflow_mem_work(struct work_struct *work) } drm_gem_object_get(obj); - list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list); + list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list); spin_unlock_irqrestore(&v3d->job_lock, irqflags); V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT); @@ -96,7 +96,7 @@ v3d_irq(int irq, void *arg) if (intsts & V3D_INT_FLDONE) { struct v3d_fence *fence = - to_v3d_fence(v3d->bin_job->bin.irq_fence); + to_v3d_fence(v3d->bin_job->base.irq_fence); trace_v3d_bcl_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); @@ -105,7 +105,7 @@ v3d_irq(int irq, void *arg) if (intsts & V3D_INT_FRDONE) { struct v3d_fence *fence = - to_v3d_fence(v3d->render_job->render.irq_fence); + to_v3d_fence(v3d->render_job->base.irq_fence); trace_v3d_rcl_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); @@ -141,7 +141,7 @@ v3d_hub_irq(int irq, void *arg) if (intsts & V3D_HUB_INT_TFUC) { struct v3d_fence *fence = - to_v3d_fence(v3d->tfu_job->irq_fence); + to_v3d_fence(v3d->tfu_job->base.irq_fence); trace_v3d_tfu_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index e740f3b99aa5..739f399308ce 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -30,10 +30,22 @@ to_v3d_job(struct drm_sched_job *sched_job) return container_of(sched_job, struct v3d_job, base); } +static struct v3d_bin_job * +to_bin_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct v3d_bin_job, base.base); +} + +static struct v3d_render_job * +to_render_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct v3d_render_job, base.base); +} + static struct v3d_tfu_job * to_tfu_job(struct drm_sched_job *sched_job) { - return container_of(sched_job, struct v3d_tfu_job, base); + return container_of(sched_job, struct v3d_tfu_job, base.base); } static void @@ -42,31 +54,20 @@ v3d_job_free(struct drm_sched_job *sched_job) struct v3d_job *job = to_v3d_job(sched_job); drm_sched_job_cleanup(sched_job); - - v3d_exec_put(job->exec); -} - -static void -v3d_tfu_job_free(struct drm_sched_job *sched_job) -{ - struct v3d_tfu_job *job = to_tfu_job(sched_job); - - drm_sched_job_cleanup(sched_job); - - v3d_tfu_job_put(job); + v3d_job_put(job); } /** - * Returns the fences that the bin or render job depends on, one by one. - * v3d_job_run() won't be called until all of them have been signaled. + * Returns the fences that the job depends on, one by one. + * + * If placed in the scheduler's .dependency method, the corresponding + * .run_job won't be called until all of them have been signaled. */ static struct dma_fence * v3d_job_dependency(struct drm_sched_job *sched_job, struct drm_sched_entity *s_entity) { struct v3d_job *job = to_v3d_job(sched_job); - struct v3d_exec_info *exec = job->exec; - enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; struct dma_fence *fence; fence = job->in_fence; @@ -75,113 +76,132 @@ v3d_job_dependency(struct drm_sched_job *sched_job, return fence; } - if (q == V3D_RENDER) { - /* If we had a bin job, the render job definitely depends on - * it. We first have to wait for bin to be scheduled, so that - * its done_fence is created. - */ - fence = exec->bin_done_fence; - if (fence) { - exec->bin_done_fence = NULL; - return fence; - } - } - /* XXX: Wait on a fence for switching the GMP if necessary, * and then do so. */ - return fence; -} - -/** - * Returns the fences that the TFU job depends on, one by one. - * v3d_tfu_job_run() won't be called until all of them have been - * signaled. - */ -static struct dma_fence * -v3d_tfu_job_dependency(struct drm_sched_job *sched_job, - struct drm_sched_entity *s_entity) -{ - struct v3d_tfu_job *job = to_tfu_job(sched_job); - struct dma_fence *fence; - - fence = job->in_fence; - if (fence) { - job->in_fence = NULL; - return fence; - } - return NULL; } -static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job) +/** + * Returns the fences that the render job depends on, one by one. + * v3d_job_run() won't be called until all of them have been signaled. + */ +static struct dma_fence * +v3d_render_job_dependency(struct drm_sched_job *sched_job, + struct drm_sched_entity *s_entity) { - struct v3d_job *job = to_v3d_job(sched_job); - struct v3d_exec_info *exec = job->exec; - enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; - struct v3d_dev *v3d = exec->v3d; + struct v3d_render_job *job = to_render_job(sched_job); + struct dma_fence *fence; + + fence = v3d_job_dependency(sched_job, s_entity); + if (fence) + return fence; + + /* If we had a bin job, the render job definitely depends on + * it. We first have to wait for bin to be scheduled, so that + * its done_fence is created. + */ + fence = job->bin_done_fence; + if (fence) { + job->bin_done_fence = NULL; + return fence; + } + + return fence; +} + +static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) +{ + struct v3d_bin_job *job = to_bin_job(sched_job); + struct v3d_dev *v3d = job->base.v3d; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; unsigned long irqflags; - if (unlikely(job->base.s_fence->finished.error)) + if (unlikely(job->base.base.s_fence->finished.error)) return NULL; /* Lock required around bin_job update vs * v3d_overflow_mem_work(). */ spin_lock_irqsave(&v3d->job_lock, irqflags); - if (q == V3D_BIN) { - v3d->bin_job = job->exec; - - /* Clear out the overflow allocation, so we don't - * reuse the overflow attached to a previous job. - */ - V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); - } else { - v3d->render_job = job->exec; - } + v3d->bin_job = job; + /* Clear out the overflow allocation, so we don't + * reuse the overflow attached to a previous job. + */ + V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); spin_unlock_irqrestore(&v3d->job_lock, irqflags); - /* Can we avoid this flush when q==RENDER? We need to be - * careful of scheduling, though -- imagine job0 rendering to - * texture and job1 reading, and them being executed as bin0, - * bin1, render0, render1, so that render1's flush at bin time - * wasn't enough. - */ v3d_invalidate_caches(v3d); - fence = v3d_fence_create(v3d, q); + fence = v3d_fence_create(v3d, V3D_BIN); if (IS_ERR(fence)) return NULL; - if (job->irq_fence) - dma_fence_put(job->irq_fence); - job->irq_fence = dma_fence_get(fence); + if (job->base.irq_fence) + dma_fence_put(job->base.irq_fence); + job->base.irq_fence = dma_fence_get(fence); - trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno, + trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno, job->start, job->end); - if (q == V3D_BIN) { - if (exec->qma) { - V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma); - V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms); - } - if (exec->qts) { - V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, - V3D_CLE_CT0QTS_ENABLE | - exec->qts); - } - } else { - /* XXX: Set the QCFG */ - } - /* Set the current and end address of the control list. * Writing the end register is what starts the job. */ - V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start); - V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end); + if (job->qma) { + V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma); + V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms); + } + if (job->qts) { + V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, + V3D_CLE_CT0QTS_ENABLE | + job->qts); + } + V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start); + V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end); + + return fence; +} + +static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) +{ + struct v3d_render_job *job = to_render_job(sched_job); + struct v3d_dev *v3d = job->base.v3d; + struct drm_device *dev = &v3d->drm; + struct dma_fence *fence; + + if (unlikely(job->base.base.s_fence->finished.error)) + return NULL; + + v3d->render_job = job; + + /* Can we avoid this flush? We need to be careful of + * scheduling, though -- imagine job0 rendering to texture and + * job1 reading, and them being executed as bin0, bin1, + * render0, render1, so that render1's flush at bin time + * wasn't enough. + */ + v3d_invalidate_caches(v3d); + + fence = v3d_fence_create(v3d, V3D_RENDER); + if (IS_ERR(fence)) + return NULL; + + if (job->base.irq_fence) + dma_fence_put(job->base.irq_fence); + job->base.irq_fence = dma_fence_get(fence); + + trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno, + job->start, job->end); + + /* XXX: Set the QCFG */ + + /* Set the current and end address of the control list. + * Writing the end register is what starts the job. + */ + V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start); + V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end); return fence; } @@ -190,7 +210,7 @@ static struct dma_fence * v3d_tfu_job_run(struct drm_sched_job *sched_job) { struct v3d_tfu_job *job = to_tfu_job(sched_job); - struct v3d_dev *v3d = job->v3d; + struct v3d_dev *v3d = job->base.v3d; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; @@ -199,9 +219,9 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) return NULL; v3d->tfu_job = job; - if (job->irq_fence) - dma_fence_put(job->irq_fence); - job->irq_fence = dma_fence_get(fence); + if (job->base.irq_fence) + dma_fence_put(job->base.irq_fence); + job->base.irq_fence = dma_fence_get(fence); trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno); @@ -251,51 +271,74 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) mutex_unlock(&v3d->reset_lock); } +/* If the current address or return address have changed, then the GPU + * has probably made progress and we should delay the reset. This + * could fail if the GPU got in an infinite loop in the CL, but that + * is pretty unlikely outside of an i-g-t testcase. + */ static void -v3d_job_timedout(struct drm_sched_job *sched_job) +v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, + u32 *timedout_ctca, u32 *timedout_ctra) { struct v3d_job *job = to_v3d_job(sched_job); - struct v3d_exec_info *exec = job->exec; - struct v3d_dev *v3d = exec->v3d; - enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER; - u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q)); - u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q)); + struct v3d_dev *v3d = job->v3d; + u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q)); + u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q)); - /* If the current address or return address have changed, then - * the GPU has probably made progress and we should delay the - * reset. This could fail if the GPU got in an infinite loop - * in the CL, but that is pretty unlikely outside of an i-g-t - * testcase. - */ - if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) { - job->timedout_ctca = ctca; - job->timedout_ctra = ctra; + if (*timedout_ctca != ctca || *timedout_ctra != ctra) { + *timedout_ctca = ctca; + *timedout_ctra = ctra; return; } v3d_gpu_reset_for_timeout(v3d, sched_job); } +static void +v3d_bin_job_timedout(struct drm_sched_job *sched_job) +{ + struct v3d_bin_job *job = to_bin_job(sched_job); + + v3d_cl_job_timedout(sched_job, V3D_BIN, + &job->timedout_ctca, &job->timedout_ctra); +} + +static void +v3d_render_job_timedout(struct drm_sched_job *sched_job) +{ + struct v3d_render_job *job = to_render_job(sched_job); + + v3d_cl_job_timedout(sched_job, V3D_RENDER, + &job->timedout_ctca, &job->timedout_ctra); +} + static void v3d_tfu_job_timedout(struct drm_sched_job *sched_job) { - struct v3d_tfu_job *job = to_tfu_job(sched_job); + struct v3d_job *job = to_v3d_job(sched_job); v3d_gpu_reset_for_timeout(job->v3d, sched_job); } -static const struct drm_sched_backend_ops v3d_sched_ops = { +static const struct drm_sched_backend_ops v3d_bin_sched_ops = { .dependency = v3d_job_dependency, - .run_job = v3d_job_run, - .timedout_job = v3d_job_timedout, - .free_job = v3d_job_free + .run_job = v3d_bin_job_run, + .timedout_job = v3d_bin_job_timedout, + .free_job = v3d_job_free, +}; + +static const struct drm_sched_backend_ops v3d_render_sched_ops = { + .dependency = v3d_render_job_dependency, + .run_job = v3d_render_job_run, + .timedout_job = v3d_render_job_timedout, + .free_job = v3d_job_free, }; static const struct drm_sched_backend_ops v3d_tfu_sched_ops = { - .dependency = v3d_tfu_job_dependency, + .dependency = v3d_job_dependency, .run_job = v3d_tfu_job_run, .timedout_job = v3d_tfu_job_timedout, - .free_job = v3d_tfu_job_free + .free_job = v3d_job_free, }; int @@ -307,7 +350,7 @@ v3d_sched_init(struct v3d_dev *v3d) int ret; ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, - &v3d_sched_ops, + &v3d_bin_sched_ops, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), "v3d_bin"); @@ -317,7 +360,7 @@ v3d_sched_init(struct v3d_dev *v3d) } ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, - &v3d_sched_ops, + &v3d_render_sched_ops, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), "v3d_render"); From d223f98f02099b002903b9b22b56febae16ef80d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 16 Apr 2019 15:58:54 -0700 Subject: [PATCH 0010/1815] drm/v3d: Add support for compute shader dispatch. The compute shader dispatch interface is pretty simple -- just pass in the regs that userspace has passed us, with no CLs to run. However, with no CL to run it means that we need to do manual cache flushing of the L2 after the HW execution completes (for SSBO, atomic, and image_load_store writes that are the output of compute shaders). This doesn't yet expose the L2 cache's ability to have a region of the address space not write back to memory (which could be used for shared_var storage). So far, the Mesa side has been tested on V3D v4.2 simpenrose (passing the ES31 tests), and on the kernel side on 7278 (failing atomic compswap tests in a way that doesn't reproduce on simpenrose). v2: Fix excessive allocation for the clean_job (reported by Dan Carpenter). Keep refs on jobs until clean_job is finished, to avoid spurious MMU errors if the output BOs are freed by userspace before L2 cleaning is finished. Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20190416225856.20264-4-eric@anholt.net Acked-by: Rob Clark --- drivers/gpu/drm/v3d/v3d_debugfs.c | 22 +++++ drivers/gpu/drm/v3d/v3d_drv.c | 10 +- drivers/gpu/drm/v3d/v3d_drv.h | 28 +++++- drivers/gpu/drm/v3d/v3d_fence.c | 2 + drivers/gpu/drm/v3d/v3d_gem.c | 156 +++++++++++++++++++++++++++++- drivers/gpu/drm/v3d/v3d_irq.c | 16 ++- drivers/gpu/drm/v3d/v3d_regs.h | 73 ++++++++++++++ drivers/gpu/drm/v3d/v3d_sched.c | 121 +++++++++++++++++++++-- drivers/gpu/drm/v3d/v3d_trace.h | 94 ++++++++++++++++++ include/uapi/drm/v3d_drm.h | 28 ++++++ 10 files changed, 531 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c index a24af2d2f574..a2dc4262955e 100644 --- a/drivers/gpu/drm/v3d/v3d_debugfs.c +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -58,6 +58,17 @@ static const struct v3d_reg_def v3d_core_reg_defs[] = { REGDEF(V3D_GMP_VIO_ADDR), }; +static const struct v3d_reg_def v3d_csd_reg_defs[] = { + REGDEF(V3D_CSD_STATUS), + REGDEF(V3D_CSD_CURRENT_CFG0), + REGDEF(V3D_CSD_CURRENT_CFG1), + REGDEF(V3D_CSD_CURRENT_CFG2), + REGDEF(V3D_CSD_CURRENT_CFG3), + REGDEF(V3D_CSD_CURRENT_CFG4), + REGDEF(V3D_CSD_CURRENT_CFG5), + REGDEF(V3D_CSD_CURRENT_CFG6), +}; + static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused) { struct drm_info_node *node = (struct drm_info_node *)m->private; @@ -89,6 +100,17 @@ static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused) V3D_CORE_READ(core, v3d_core_reg_defs[i].reg)); } + + if (v3d_has_csd(v3d)) { + for (i = 0; i < ARRAY_SIZE(v3d_csd_reg_defs); i++) { + seq_printf(m, "core %d %s (0x%04x): 0x%08x\n", + core, + v3d_csd_reg_defs[i].name, + v3d_csd_reg_defs[i].reg, + V3D_CORE_READ(core, + v3d_csd_reg_defs[i].reg)); + } + } } return 0; diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index a06b05f714a5..df66c90a0102 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -7,9 +7,9 @@ * This driver supports the Broadcom V3D 3.3 and 4.1 OpenGL ES GPUs. * For V3D 2.x support, see the VC4 driver. * - * Currently only single-core rendering using the binner and renderer, - * along with TFU (texture formatting unit) rendering is supported. - * V3D 4.x's CSD (compute shader dispatch) is not yet supported. + * The V3D GPU includes a tiled render (composed of a bin and render + * pipelines), the TFU (texture formatting unit), and the CSD (compute + * shader dispatch). */ #include @@ -120,6 +120,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, case DRM_V3D_PARAM_SUPPORTS_TFU: args->value = 1; return 0; + case DRM_V3D_PARAM_SUPPORTS_CSD: + args->value = v3d_has_csd(v3d); + return 0; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; @@ -179,6 +182,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(V3D_SUBMIT_TFU, v3d_submit_tfu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), + DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CSD, v3d_submit_csd_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), }; static struct drm_driver v3d_drm_driver = { diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index f82f8be04bd8..3d816e1674a0 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -16,9 +16,11 @@ enum v3d_queue { V3D_BIN, V3D_RENDER, V3D_TFU, + V3D_CSD, + V3D_CACHE_CLEAN, }; -#define V3D_MAX_QUEUES (V3D_TFU + 1) +#define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1) struct v3d_queue_state { struct drm_gpu_scheduler sched; @@ -70,6 +72,7 @@ struct v3d_dev { struct v3d_bin_job *bin_job; struct v3d_render_job *render_job; struct v3d_tfu_job *tfu_job; + struct v3d_csd_job *csd_job; struct v3d_queue_state queue[V3D_MAX_QUEUES]; @@ -92,6 +95,12 @@ struct v3d_dev { */ struct mutex sched_lock; + /* Lock taken during a cache clean and when initiating an L2 + * flush, to keep L2 flushes from interfering with the + * synchronous L2 cleans. + */ + struct mutex cache_clean_lock; + struct { u32 num_allocated; u32 pages_allocated; @@ -104,6 +113,12 @@ to_v3d_dev(struct drm_device *dev) return (struct v3d_dev *)dev->dev_private; } +static inline bool +v3d_has_csd(struct v3d_dev *v3d) +{ + return v3d->ver >= 41; +} + /* The per-fd struct, which tracks the MMU mappings. */ struct v3d_file_priv { struct v3d_dev *v3d; @@ -222,6 +237,14 @@ struct v3d_tfu_job { struct drm_v3d_submit_tfu args; }; +struct v3d_csd_job { + struct v3d_job base; + + u32 timedout_batches; + + struct drm_v3d_submit_csd args; +}; + /** * _wait_for - magic (register) wait macro * @@ -283,11 +306,14 @@ int v3d_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int v3d_submit_csd_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void v3d_job_put(struct v3d_job *job); void v3d_reset(struct v3d_dev *v3d); void v3d_invalidate_caches(struct v3d_dev *v3d); +void v3d_clean_caches(struct v3d_dev *v3d); /* v3d_irq.c */ int v3d_irq_init(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c index b0a2a1ae2eb1..89840ed212c0 100644 --- a/drivers/gpu/drm/v3d/v3d_fence.c +++ b/drivers/gpu/drm/v3d/v3d_fence.c @@ -36,6 +36,8 @@ static const char *v3d_fence_get_timeline_name(struct dma_fence *fence) return "v3d-render"; case V3D_TFU: return "v3d-tfu"; + case V3D_CSD: + return "v3d-csd"; default: return NULL; } diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 350a269a7b58..8bd6fa69f566 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -162,10 +162,52 @@ v3d_flush_l2t(struct v3d_dev *v3d, int core) /* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't * need to wait for completion before dispatching the job -- * L2T accesses will be stalled until the flush has completed. + * However, we do need to make sure we don't try to trigger a + * new flush while the L2_CLEAN queue is trying to + * synchronously clean after a job. */ + mutex_lock(&v3d->cache_clean_lock); V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_L2TFLS | V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM)); + mutex_unlock(&v3d->cache_clean_lock); +} + +/* Cleans texture L1 and L2 cachelines (writing back dirty data). + * + * For cleaning, which happens from the CACHE_CLEAN queue after CSD has + * executed, we need to make sure that the clean is done before + * signaling job completion. So, we synchronously wait before + * returning, and we make sure that L2 invalidates don't happen in the + * meantime to confuse our are-we-done checks. + */ +void +v3d_clean_caches(struct v3d_dev *v3d) +{ + struct drm_device *dev = &v3d->drm; + int core = 0; + + trace_v3d_cache_clean_begin(dev); + + V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & + V3D_L2TCACTL_L2TFLS), 100)) { + DRM_ERROR("Timeout waiting for L1T write combiner flush\n"); + } + + mutex_lock(&v3d->cache_clean_lock); + V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, + V3D_L2TCACTL_L2TFLS | + V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM)); + + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & + V3D_L2TCACTL_L2TFLS), 100)) { + DRM_ERROR("Timeout waiting for L2T clean\n"); + } + + mutex_unlock(&v3d->cache_clean_lock); + + trace_v3d_cache_clean_end(dev); } /* Invalidates the slice caches. These are read-only caches. */ @@ -429,7 +471,8 @@ static void v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, struct v3d_job *job, struct ww_acquire_ctx *acquire_ctx, - u32 out_sync) + u32 out_sync, + struct dma_fence *done_fence) { struct drm_syncobj *sync_out; int i; @@ -445,7 +488,7 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, /* Update the return sync object for the job */ sync_out = drm_syncobj_find(file_priv, out_sync); if (sync_out) { - drm_syncobj_replace_fence(sync_out, job->done_fence); + drm_syncobj_replace_fence(sync_out, done_fence); drm_syncobj_put(sync_out); } } @@ -541,8 +584,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, mutex_unlock(&v3d->sched_lock); v3d_attach_fences_and_unlock_reservation(file_priv, - &render->base, &acquire_ctx, - args->out_sync); + &render->base, + &acquire_ctx, + args->out_sync, + render->base.done_fence); if (bin) v3d_job_put(&bin->base); @@ -641,7 +686,8 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, v3d_attach_fences_and_unlock_reservation(file_priv, &job->base, &acquire_ctx, - args->out_sync); + args->out_sync, + job->base.done_fence); v3d_job_put(&job->base); @@ -657,6 +703,105 @@ fail: return ret; } +/** + * v3d_submit_csd_ioctl() - Submits a CSD (texture formatting) job to the V3D. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * Userspace provides the register setup for the CSD, which we don't + * need to validate since the CSD is behind the MMU. + */ +int +v3d_submit_csd_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_submit_csd *args = data; + struct v3d_csd_job *job; + struct v3d_job *clean_job; + struct ww_acquire_ctx acquire_ctx; + int ret; + + trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]); + + if (!v3d_has_csd(v3d)) { + DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n"); + return -EINVAL; + } + + job = kcalloc(1, sizeof(*job), GFP_KERNEL); + if (!job) + return -ENOMEM; + + ret = v3d_job_init(v3d, file_priv, &job->base, + v3d_job_free, args->in_sync); + if (ret) { + kfree(job); + return ret; + } + + clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL); + if (!clean_job) { + v3d_job_put(&job->base); + kfree(job); + return -ENOMEM; + } + + ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0); + if (ret) { + v3d_job_put(&job->base); + kfree(clean_job); + return ret; + } + + job->args = *args; + + ret = v3d_lookup_bos(dev, file_priv, clean_job, + args->bo_handles, args->bo_handle_count); + if (ret) + goto fail; + + ret = v3d_lock_bo_reservations(clean_job->base.bo, + clean_job->base.bo_count, + &acquire_ctx); + if (ret) + goto fail; + + mutex_lock(&v3d->sched_lock); + ret = v3d_push_job(v3d_priv, &job->base, V3D_CSD); + if (ret) + goto fail_unreserve; + + clean_job->in_fence = dma_fence_get(job->base.done_fence); + ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN); + if (ret) + goto fail_unreserve; + mutex_unlock(&v3d->sched_lock); + + v3d_attach_fences_and_unlock_reservation(file_priv, + clean_job, + &acquire_ctx, + args->out_sync, + clean_job->done_fence); + + v3d_job_put(&job->base); + v3d_job_put(clean_job); + + return 0; + +fail_unreserve: + mutex_unlock(&v3d->sched_lock); + drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, + &acquire_ctx); +fail: + v3d_job_put(&job->base); + v3d_job_put(clean_job); + + return ret; +} + int v3d_gem_init(struct drm_device *dev) { @@ -672,6 +817,7 @@ v3d_gem_init(struct drm_device *dev) mutex_init(&v3d->bo_lock); mutex_init(&v3d->reset_lock); mutex_init(&v3d->sched_lock); + mutex_init(&v3d->cache_clean_lock); /* Note: We don't allocate address 0. Various bits of HW * treat 0 as special, such as the occlusion query counters diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index ce373ffd6380..fac3c542860b 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -4,9 +4,9 @@ /** * DOC: Interrupt management for the V3D engine * - * When we take a bin, render, or TFU done interrupt, we need to - * signal the fence for that job so that the scheduler can queue up - * the next one and unblock any waiters. + * When we take a bin, render, TFU done, or CSD done interrupt, we + * need to signal the fence for that job so that the scheduler can + * queue up the next one and unblock any waiters. * * When we take the binner out of memory interrupt, we need to * allocate some new memory and pass it to the binner so that the @@ -20,6 +20,7 @@ #define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \ V3D_INT_FLDONE | \ V3D_INT_FRDONE | \ + V3D_INT_CSDDONE | \ V3D_INT_GMPV)) #define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \ @@ -112,6 +113,15 @@ v3d_irq(int irq, void *arg) status = IRQ_HANDLED; } + if (intsts & V3D_INT_CSDDONE) { + struct v3d_fence *fence = + to_v3d_fence(v3d->csd_job->base.irq_fence); + + trace_v3d_csd_irq(&v3d->drm, fence->seqno); + dma_fence_signal(&fence->base); + status = IRQ_HANDLED; + } + /* We shouldn't be triggering these if we have GMP in * always-allowed mode. */ diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h index 8e88af237610..9a8ff0ce648e 100644 --- a/drivers/gpu/drm/v3d/v3d_regs.h +++ b/drivers/gpu/drm/v3d/v3d_regs.h @@ -238,8 +238,11 @@ #define V3D_CTL_L2TCACTL 0x00030 # define V3D_L2TCACTL_TMUWCF BIT(8) # define V3D_L2TCACTL_L2T_NO_WM BIT(4) +/* Invalidates cache lines. */ # define V3D_L2TCACTL_FLM_FLUSH 0 +/* Removes cachelines without writing dirty lines back. */ # define V3D_L2TCACTL_FLM_CLEAR 1 +/* Writes out dirty cachelines and marks them clean, but doesn't invalidate. */ # define V3D_L2TCACTL_FLM_CLEAN 2 # define V3D_L2TCACTL_FLM_MASK V3D_MASK(2, 1) # define V3D_L2TCACTL_FLM_SHIFT 1 @@ -255,6 +258,8 @@ #define V3D_CTL_INT_MSK_CLR 0x00064 # define V3D_INT_QPU_MASK V3D_MASK(27, 16) # define V3D_INT_QPU_SHIFT 16 +# define V3D_INT_CSDDONE BIT(7) +# define V3D_INT_PCTR BIT(6) # define V3D_INT_GMPV BIT(5) # define V3D_INT_TRFB BIT(4) # define V3D_INT_SPILLUSE BIT(3) @@ -374,4 +379,72 @@ #define V3D_GMP_PRESERVE_LOAD 0x00818 #define V3D_GMP_VALID_LINES 0x00820 +#define V3D_CSD_STATUS 0x00900 +# define V3D_CSD_STATUS_NUM_COMPLETED_MASK V3D_MASK(11, 4) +# define V3D_CSD_STATUS_NUM_COMPLETED_SHIFT 4 +# define V3D_CSD_STATUS_NUM_ACTIVE_MASK V3D_MASK(3, 2) +# define V3D_CSD_STATUS_NUM_ACTIVE_SHIFT 2 +# define V3D_CSD_STATUS_HAVE_CURRENT_DISPATCH BIT(1) +# define V3D_CSD_STATUS_HAVE_QUEUED_DISPATCH BIT(0) + +#define V3D_CSD_QUEUED_CFG0 0x00904 +# define V3D_CSD_QUEUED_CFG0_NUM_WGS_X_MASK V3D_MASK(31, 16) +# define V3D_CSD_QUEUED_CFG0_NUM_WGS_X_SHIFT 16 +# define V3D_CSD_QUEUED_CFG0_WG_X_OFFSET_MASK V3D_MASK(15, 0) +# define V3D_CSD_QUEUED_CFG0_WG_X_OFFSET_SHIFT 0 + +#define V3D_CSD_QUEUED_CFG1 0x00908 +# define V3D_CSD_QUEUED_CFG1_NUM_WGS_Y_MASK V3D_MASK(31, 16) +# define V3D_CSD_QUEUED_CFG1_NUM_WGS_Y_SHIFT 16 +# define V3D_CSD_QUEUED_CFG1_WG_Y_OFFSET_MASK V3D_MASK(15, 0) +# define V3D_CSD_QUEUED_CFG1_WG_Y_OFFSET_SHIFT 0 + +#define V3D_CSD_QUEUED_CFG2 0x0090c +# define V3D_CSD_QUEUED_CFG2_NUM_WGS_Z_MASK V3D_MASK(31, 16) +# define V3D_CSD_QUEUED_CFG2_NUM_WGS_Z_SHIFT 16 +# define V3D_CSD_QUEUED_CFG2_WG_Z_OFFSET_MASK V3D_MASK(15, 0) +# define V3D_CSD_QUEUED_CFG2_WG_Z_OFFSET_SHIFT 0 + +#define V3D_CSD_QUEUED_CFG3 0x00910 +# define V3D_CSD_QUEUED_CFG3_OVERLAP_WITH_PREV BIT(26) +# define V3D_CSD_QUEUED_CFG3_MAX_SG_ID_MASK V3D_MASK(25, 20) +# define V3D_CSD_QUEUED_CFG3_MAX_SG_ID_SHIFT 20 +# define V3D_CSD_QUEUED_CFG3_BATCHES_PER_SG_M1_MASK V3D_MASK(19, 12) +# define V3D_CSD_QUEUED_CFG3_BATCHES_PER_SG_M1_SHIFT 12 +# define V3D_CSD_QUEUED_CFG3_WGS_PER_SG_MASK V3D_MASK(11, 8) +# define V3D_CSD_QUEUED_CFG3_WGS_PER_SG_SHIFT 8 +# define V3D_CSD_QUEUED_CFG3_WG_SIZE_MASK V3D_MASK(7, 0) +# define V3D_CSD_QUEUED_CFG3_WG_SIZE_SHIFT 0 + +/* Number of batches, minus 1 */ +#define V3D_CSD_QUEUED_CFG4 0x00914 + +/* Shader address, pnan, singleseg, threading, like a shader record. */ +#define V3D_CSD_QUEUED_CFG5 0x00918 + +/* Uniforms address (4 byte aligned) */ +#define V3D_CSD_QUEUED_CFG6 0x0091c + +#define V3D_CSD_CURRENT_CFG0 0x00920 +#define V3D_CSD_CURRENT_CFG1 0x00924 +#define V3D_CSD_CURRENT_CFG2 0x00928 +#define V3D_CSD_CURRENT_CFG3 0x0092c +#define V3D_CSD_CURRENT_CFG4 0x00930 +#define V3D_CSD_CURRENT_CFG5 0x00934 +#define V3D_CSD_CURRENT_CFG6 0x00938 + +#define V3D_CSD_CURRENT_ID0 0x0093c +# define V3D_CSD_CURRENT_ID0_WG_X_MASK V3D_MASK(31, 16) +# define V3D_CSD_CURRENT_ID0_WG_X_SHIFT 16 +# define V3D_CSD_CURRENT_ID0_WG_IN_SG_MASK V3D_MASK(11, 8) +# define V3D_CSD_CURRENT_ID0_WG_IN_SG_SHIFT 8 +# define V3D_CSD_CURRENT_ID0_L_IDX_MASK V3D_MASK(7, 0) +# define V3D_CSD_CURRENT_ID0_L_IDX_SHIFT 0 + +#define V3D_CSD_CURRENT_ID1 0x00940 +# define V3D_CSD_CURRENT_ID0_WG_Z_MASK V3D_MASK(31, 16) +# define V3D_CSD_CURRENT_ID0_WG_Z_SHIFT 16 +# define V3D_CSD_CURRENT_ID0_WG_Y_MASK V3D_MASK(15, 0) +# define V3D_CSD_CURRENT_ID0_WG_Y_SHIFT 0 + #endif /* V3D_REGS_H */ diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 739f399308ce..ad2245701dda 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -48,6 +48,12 @@ to_tfu_job(struct drm_sched_job *sched_job) return container_of(sched_job, struct v3d_tfu_job, base.base); } +static struct v3d_csd_job * +to_csd_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct v3d_csd_job, base.base); +} + static void v3d_job_free(struct drm_sched_job *sched_job) { @@ -243,6 +249,48 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) return fence; } +static struct dma_fence * +v3d_csd_job_run(struct drm_sched_job *sched_job) +{ + struct v3d_csd_job *job = to_csd_job(sched_job); + struct v3d_dev *v3d = job->base.v3d; + struct drm_device *dev = &v3d->drm; + struct dma_fence *fence; + int i; + + v3d->csd_job = job; + + v3d_invalidate_caches(v3d); + + fence = v3d_fence_create(v3d, V3D_CSD); + if (IS_ERR(fence)) + return NULL; + + if (job->base.irq_fence) + dma_fence_put(job->base.irq_fence); + job->base.irq_fence = dma_fence_get(fence); + + trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno); + + for (i = 1; i <= 6; i++) + V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]); + /* CFG0 write kicks off the job. */ + V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]); + + return fence; +} + +static struct dma_fence * +v3d_cache_clean_job_run(struct drm_sched_job *sched_job) +{ + struct v3d_job *job = to_v3d_job(sched_job); + struct v3d_dev *v3d = job->v3d; + + v3d_clean_caches(v3d); + + return NULL; +} + static void v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) { @@ -313,13 +361,31 @@ v3d_render_job_timedout(struct drm_sched_job *sched_job) } static void -v3d_tfu_job_timedout(struct drm_sched_job *sched_job) +v3d_generic_job_timedout(struct drm_sched_job *sched_job) { struct v3d_job *job = to_v3d_job(sched_job); v3d_gpu_reset_for_timeout(job->v3d, sched_job); } +static void +v3d_csd_job_timedout(struct drm_sched_job *sched_job) +{ + struct v3d_csd_job *job = to_csd_job(sched_job); + struct v3d_dev *v3d = job->base.v3d; + u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4); + + /* If we've made progress, skip reset and let the timer get + * rearmed. + */ + if (job->timedout_batches != batches) { + job->timedout_batches = batches; + return; + } + + v3d_gpu_reset_for_timeout(v3d, sched_job); +} + static const struct drm_sched_backend_ops v3d_bin_sched_ops = { .dependency = v3d_job_dependency, .run_job = v3d_bin_job_run, @@ -337,10 +403,24 @@ static const struct drm_sched_backend_ops v3d_render_sched_ops = { static const struct drm_sched_backend_ops v3d_tfu_sched_ops = { .dependency = v3d_job_dependency, .run_job = v3d_tfu_job_run, - .timedout_job = v3d_tfu_job_timedout, + .timedout_job = v3d_generic_job_timedout, .free_job = v3d_job_free, }; +static const struct drm_sched_backend_ops v3d_csd_sched_ops = { + .dependency = v3d_job_dependency, + .run_job = v3d_csd_job_run, + .timedout_job = v3d_csd_job_timedout, + .free_job = v3d_job_free +}; + +static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = { + .dependency = v3d_job_dependency, + .run_job = v3d_cache_clean_job_run, + .timedout_job = v3d_generic_job_timedout, + .free_job = v3d_job_free +}; + int v3d_sched_init(struct v3d_dev *v3d) { @@ -367,7 +447,7 @@ v3d_sched_init(struct v3d_dev *v3d) if (ret) { dev_err(v3d->dev, "Failed to create render scheduler: %d.", ret); - drm_sched_fini(&v3d->queue[V3D_BIN].sched); + v3d_sched_fini(v3d); return ret; } @@ -379,11 +459,36 @@ v3d_sched_init(struct v3d_dev *v3d) if (ret) { dev_err(v3d->dev, "Failed to create TFU scheduler: %d.", ret); - drm_sched_fini(&v3d->queue[V3D_RENDER].sched); - drm_sched_fini(&v3d->queue[V3D_BIN].sched); + v3d_sched_fini(v3d); return ret; } + if (v3d_has_csd(v3d)) { + ret = drm_sched_init(&v3d->queue[V3D_CSD].sched, + &v3d_csd_sched_ops, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), + "v3d_csd"); + if (ret) { + dev_err(v3d->dev, "Failed to create CSD scheduler: %d.", + ret); + v3d_sched_fini(v3d); + return ret; + } + + ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched, + &v3d_cache_clean_sched_ops, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), + "v3d_cache_clean"); + if (ret) { + dev_err(v3d->dev, "Failed to create CACHE_CLEAN scheduler: %d.", + ret); + v3d_sched_fini(v3d); + return ret; + } + } + return 0; } @@ -392,6 +497,8 @@ v3d_sched_fini(struct v3d_dev *v3d) { enum v3d_queue q; - for (q = 0; q < V3D_MAX_QUEUES; q++) - drm_sched_fini(&v3d->queue[q].sched); + for (q = 0; q < V3D_MAX_QUEUES; q++) { + if (v3d->queue[q].sched.ready) + drm_sched_fini(&v3d->queue[q].sched); + } } diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h index edd984afa33f..7aa8dc356e54 100644 --- a/drivers/gpu/drm/v3d/v3d_trace.h +++ b/drivers/gpu/drm/v3d/v3d_trace.h @@ -124,6 +124,26 @@ TRACE_EVENT(v3d_tfu_irq, __entry->seqno) ); +TRACE_EVENT(v3d_csd_irq, + TP_PROTO(struct drm_device *dev, + uint64_t seqno), + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u64, seqno) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->seqno = seqno; + ), + + TP_printk("dev=%u, seqno=%llu", + __entry->dev, + __entry->seqno) +); + TRACE_EVENT(v3d_submit_tfu_ioctl, TP_PROTO(struct drm_device *dev, u32 iia), TP_ARGS(dev, iia), @@ -163,6 +183,80 @@ TRACE_EVENT(v3d_submit_tfu, __entry->seqno) ); +TRACE_EVENT(v3d_submit_csd_ioctl, + TP_PROTO(struct drm_device *dev, u32 cfg5, u32 cfg6), + TP_ARGS(dev, cfg5, cfg6), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, cfg5) + __field(u32, cfg6) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->cfg5 = cfg5; + __entry->cfg6 = cfg6; + ), + + TP_printk("dev=%u, CFG5 0x%08x, CFG6 0x%08x", + __entry->dev, + __entry->cfg5, + __entry->cfg6) +); + +TRACE_EVENT(v3d_submit_csd, + TP_PROTO(struct drm_device *dev, + uint64_t seqno), + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u64, seqno) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->seqno = seqno; + ), + + TP_printk("dev=%u, seqno=%llu", + __entry->dev, + __entry->seqno) +); + +TRACE_EVENT(v3d_cache_clean_begin, + TP_PROTO(struct drm_device *dev), + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(u32, dev) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + ), + + TP_printk("dev=%u", + __entry->dev) +); + +TRACE_EVENT(v3d_cache_clean_end, + TP_PROTO(struct drm_device *dev), + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(u32, dev) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + ), + + TP_printk("dev=%u", + __entry->dev) +); + TRACE_EVENT(v3d_reset_begin, TP_PROTO(struct drm_device *dev), TP_ARGS(dev), diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index ea70669d2138..58fbe48c91e9 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -37,6 +37,7 @@ extern "C" { #define DRM_V3D_GET_PARAM 0x04 #define DRM_V3D_GET_BO_OFFSET 0x05 #define DRM_V3D_SUBMIT_TFU 0x06 +#define DRM_V3D_SUBMIT_CSD 0x07 #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) @@ -45,6 +46,7 @@ extern "C" { #define DRM_IOCTL_V3D_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param) #define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset) #define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu) +#define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd) /** * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D @@ -190,6 +192,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_V3D_CORE0_IDENT1, DRM_V3D_PARAM_V3D_CORE0_IDENT2, DRM_V3D_PARAM_SUPPORTS_TFU, + DRM_V3D_PARAM_SUPPORTS_CSD, }; struct drm_v3d_get_param { @@ -230,6 +233,31 @@ struct drm_v3d_submit_tfu { __u32 out_sync; }; +/* Submits a compute shader for dispatch. This job will block on any + * previous compute shaders submitted on this fd, and any other + * synchronization must be performed with in_sync/out_sync. + */ +struct drm_v3d_submit_csd { + __u32 cfg[7]; + __u32 coef[4]; + + /* Pointer to a u32 array of the BOs that are referenced by the job. + */ + __u64 bo_handles; + + /* Number of BO handles passed in (size is that times 4). */ + __u32 bo_handle_count; + + /* sync object to block on before running the CSD job. Each + * CSD job will execute in the order submitted to its FD. + * Synchronization against rendering/TFU jobs or CSD from + * other fds requires using sync objects. + */ + __u32 in_sync; + /* Sync object to signal when the CSD job is done. */ + __u32 out_sync; +}; + #if defined(__cplusplus) } #endif From 07fbbd66b9735fe309f3bb4ad6dbfaa767e9222e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 16 Apr 2019 15:58:55 -0700 Subject: [PATCH 0011/1815] drm/v3d: Drop reservation of a shared slot in the dma-buf reservations. We only set the excl (possible-writing) fence pointer and never add a shared (read-only) fence. Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20190416225856.20264-5-eric@anholt.net Acked-by: Rob Clark --- drivers/gpu/drm/v3d/v3d_gem.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 8bd6fa69f566..6873b14a0d38 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -253,18 +253,6 @@ v3d_lock_bo_reservations(struct drm_gem_object **bos, if (ret) return ret; - /* Reserve space for our shared (read-only) fence references, - * before we commit the CL to the hardware. - */ - for (i = 0; i < bo_count; i++) { - ret = reservation_object_reserve_shared(bos[i]->resv, 1); - if (ret) { - drm_gem_unlock_reservations(bos, bo_count, - acquire_ctx); - return ret; - } - } - return 0; } From dffa9b7a78c4361e55e21b3acb54e0d34ad15ea0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 16 Apr 2019 15:58:56 -0700 Subject: [PATCH 0012/1815] drm/v3d: Add missing implicit synchronization. It is the expectation of existing userspace (X11 + Mesa, in particular) that jobs submitted to the kernel against a shared BO will get implicitly synchronized by their submission order. If we want to allow clever userspace to disable implicit synchronization, we should do that under its own submit flag (as amdgpu and lima do). Note that we currently only implicitly sync for the rendering pass, not binning -- if you texture-from-pixmap in the binning vertex shader (vertex coordinate generation), you'll miss out on synchronization. Fixes flickering when multiple clients are running in parallel, particularly GL apps and compositors. v2: Fix a missing refcount on the CSD done fence for L2 cleaning. Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20190416225856.20264-6-eric@anholt.net Acked-by: Rob Clark --- drivers/gpu/drm/v3d/v3d_drv.h | 11 +++--- drivers/gpu/drm/v3d/v3d_gem.c | 63 +++++++++++++++++++++++---------- drivers/gpu/drm/v3d/v3d_sched.c | 40 +++------------------ 3 files changed, 53 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 3d816e1674a0..47b86a25629e 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -182,8 +182,10 @@ struct v3d_job { struct drm_gem_object **bo; u32 bo_count; - /* An optional fence userspace can pass in for the job to depend on. */ - struct dma_fence *in_fence; + /* Array of struct dma_fence * to block on before submitting this job. + */ + struct xarray deps; + unsigned long last_dep; /* v3d fence to be signaled by IRQ handler when the job is complete. */ struct dma_fence *irq_fence; @@ -215,11 +217,6 @@ struct v3d_bin_job { struct v3d_render_job { struct v3d_job base; - /* Optional fence for the binner, to depend on before starting - * our job. - */ - struct dma_fence *bin_done_fence; - /* GPU virtual addresses of the start/end of the CL job. */ u32 start, end; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 6873b14a0d38..f736e021467a 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -243,16 +243,25 @@ v3d_invalidate_caches(struct v3d_dev *v3d) * to v3d, so we don't attach dma-buf fences to them. */ static int -v3d_lock_bo_reservations(struct drm_gem_object **bos, - int bo_count, +v3d_lock_bo_reservations(struct v3d_job *job, struct ww_acquire_ctx *acquire_ctx) { int i, ret; - ret = drm_gem_lock_reservations(bos, bo_count, acquire_ctx); + ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx); if (ret) return ret; + for (i = 0; i < job->bo_count; i++) { + ret = drm_gem_fence_array_add_implicit(&job->deps, + job->bo[i], true); + if (ret) { + drm_gem_unlock_reservations(job->bo, job->bo_count, + acquire_ctx); + return ret; + } + } + return 0; } @@ -339,6 +348,8 @@ static void v3d_job_free(struct kref *ref) { struct v3d_job *job = container_of(ref, struct v3d_job, refcount); + unsigned long index; + struct dma_fence *fence; int i; for (i = 0; i < job->bo_count; i++) { @@ -347,7 +358,11 @@ v3d_job_free(struct kref *ref) } kvfree(job->bo); - dma_fence_put(job->in_fence); + xa_for_each(&job->deps, index, fence) { + dma_fence_put(fence); + } + xa_destroy(&job->deps); + dma_fence_put(job->irq_fence); dma_fence_put(job->done_fence); @@ -414,6 +429,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, struct v3d_job *job, void (*free)(struct kref *ref), u32 in_sync) { + struct dma_fence *in_fence = NULL; int ret; job->v3d = v3d; @@ -423,15 +439,23 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, if (ret < 0) return ret; - ret = drm_syncobj_find_fence(file_priv, in_sync, 0, 0, &job->in_fence); - if (ret == -EINVAL) { - pm_runtime_put_autosuspend(v3d->dev); - return ret; - } + xa_init_flags(&job->deps, XA_FLAGS_ALLOC); + + ret = drm_syncobj_find_fence(file_priv, in_sync, 0, 0, &in_fence); + if (ret == -EINVAL) + goto fail; + + ret = drm_gem_fence_array_add(&job->deps, in_fence); + if (ret) + goto fail; kref_init(&job->refcount); return 0; +fail: + xa_destroy(&job->deps); + pm_runtime_put_autosuspend(v3d->dev); + return ret; } static int @@ -552,8 +576,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; - ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count, - &acquire_ctx); + ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx); if (ret) goto fail; @@ -563,7 +586,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail_unreserve; - render->bin_done_fence = dma_fence_get(bin->base.done_fence); + ret = drm_gem_fence_array_add(&render->base.deps, + dma_fence_get(bin->base.done_fence)); + if (ret) + goto fail_unreserve; } ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER); @@ -661,8 +687,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, } spin_unlock(&file_priv->table_lock); - ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count, - &acquire_ctx); + ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); if (ret) goto fail; @@ -751,9 +776,7 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; - ret = v3d_lock_bo_reservations(clean_job->base.bo, - clean_job->base.bo_count, - &acquire_ctx); + ret = v3d_lock_bo_reservations(clean_job, &acquire_ctx); if (ret) goto fail; @@ -762,7 +785,11 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data, if (ret) goto fail_unreserve; - clean_job->in_fence = dma_fence_get(job->base.done_fence); + ret = drm_gem_fence_array_add(&clean_job->deps, + dma_fence_get(job->base.done_fence)); + if (ret) + goto fail_unreserve; + ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN); if (ret) goto fail_unreserve; diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index ad2245701dda..b4255807b3a7 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -74,49 +74,17 @@ v3d_job_dependency(struct drm_sched_job *sched_job, struct drm_sched_entity *s_entity) { struct v3d_job *job = to_v3d_job(sched_job); - struct dma_fence *fence; - - fence = job->in_fence; - if (fence) { - job->in_fence = NULL; - return fence; - } /* XXX: Wait on a fence for switching the GMP if necessary, * and then do so. */ + if (!xa_empty(&job->deps)) + return xa_erase(&job->deps, job->last_dep++); + return NULL; } -/** - * Returns the fences that the render job depends on, one by one. - * v3d_job_run() won't be called until all of them have been signaled. - */ -static struct dma_fence * -v3d_render_job_dependency(struct drm_sched_job *sched_job, - struct drm_sched_entity *s_entity) -{ - struct v3d_render_job *job = to_render_job(sched_job); - struct dma_fence *fence; - - fence = v3d_job_dependency(sched_job, s_entity); - if (fence) - return fence; - - /* If we had a bin job, the render job definitely depends on - * it. We first have to wait for bin to be scheduled, so that - * its done_fence is created. - */ - fence = job->bin_done_fence; - if (fence) { - job->bin_done_fence = NULL; - return fence; - } - - return fence; -} - static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) { struct v3d_bin_job *job = to_bin_job(sched_job); @@ -394,7 +362,7 @@ static const struct drm_sched_backend_ops v3d_bin_sched_ops = { }; static const struct drm_sched_backend_ops v3d_render_sched_ops = { - .dependency = v3d_render_job_dependency, + .dependency = v3d_job_dependency, .run_job = v3d_render_job_run, .timedout_job = v3d_render_job_timedout, .free_job = v3d_job_free, From 844e33135d3a17686e167af2b6e653a4721c26e5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 Apr 2019 21:53:58 +0100 Subject: [PATCH 0013/1815] drm/i915: Remove unwarranted clamping for hsw/bdw We always start off at an "efficient frequency" and can let the system autotune from there, eliminating the need to clamp the available range. Signed-off-by: Chris Wilson Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20190418205358.11450-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 44be676fabd6..87f6fc6d5502 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8528,18 +8528,9 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) gen6_init_rps_frequencies(dev_priv); /* Derive initial user preferences/limits from the hardware limits */ - rps->idle_freq = rps->min_freq; - rps->cur_freq = rps->idle_freq; - rps->max_freq_softlimit = rps->max_freq; rps->min_freq_softlimit = rps->min_freq; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - rps->min_freq_softlimit = - max_t(int, - rps->efficient_freq, - intel_freq_opcode(dev_priv, 450)); - /* After setting max-softlimit, find the overclock max freq */ if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { @@ -8556,6 +8547,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) /* Finally allow us to boost to max by default */ rps->boost_freq = rps->max_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; mutex_unlock(&dev_priv->pcu_lock); } From d69990e0c399e4f7f9b50505d3285e5de991148a Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Fri, 5 Apr 2019 15:02:34 +0200 Subject: [PATCH 0014/1815] drm/i915: Use drm_dev_unplug() The driver does not currently support unbinding from a device which is in use. Since open file descriptors may still be pointing into kernel memory where the device structures used to be, entirely correct kernel panics protect the driver from being unbound as we should not be unbinding it before those dangling pointers have been made safe. According to the documentation found inside drivers/gpu/drm/drm_drv.c, drm_dev_unplug() should be used instead of drm_dev_unregister() in order to make a device inaccessible to users as soon as it is unpluged. Follow that advice to make those possibly dangling pointers safe, protected by DRM layer from a user who is otherwise left pointing into possibly reused kernel memory after the driver has been unbound from the device. Once done, also cancel inflight operations immediately by calling i915_gem_set_wedged(). Signed-off-by: Janusz Krzysztofik Reviewed-by: Chris Wilson Reviewed-by: Daniel Vetter Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190405130235.7707-2-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1ad88e6d7c04..5e2ae2300454 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1760,7 +1760,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); - drm_dev_unregister(&dev_priv->drm); + drm_dev_unplug(&dev_priv->drm); i915_gem_shrinker_unregister(dev_priv); } From 91cbdb83d3aee84b6aaaa3fc3b4a6084a35e19c1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Apr 2019 14:48:36 +0100 Subject: [PATCH 0015/1815] drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info For consistency (and elegance!), add intel_device_info.has_rps. The immediate boon is that RPS support is now emitted along the other capabilities in the debug log and after errors. Signed-off-by: Chris Wilson Reviewed-by: Sagar Arun Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20190419134836.5626-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 5 +++++ drivers/gpu/drm/i915/intel_device_info.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 7 +++++-- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 066fd2a12851..71612e7fc8bc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2585,6 +2585,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_RC6p(dev_priv) (INTEL_INFO(dev_priv)->has_rc6p) #define HAS_RC6pp(dev_priv) (false) /* HW was never validated */ +#define HAS_RPS(dev_priv) (INTEL_INFO(dev_priv)->has_rps) + #define HAS_CSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_csr) #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f893c2cbce15..ffa2ee70a03d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -370,6 +370,7 @@ static const struct intel_device_info intel_ironlake_m_info = { .has_llc = 1, \ .has_rc6 = 1, \ .has_rc6p = 1, \ + .has_rps = true, \ .ppgtt_type = INTEL_PPGTT_ALIASING, \ .ppgtt_size = 31, \ I9XX_PIPE_OFFSETS, \ @@ -417,6 +418,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { .has_llc = 1, \ .has_rc6 = 1, \ .has_rc6p = 1, \ + .has_rps = true, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 31, \ IVB_PIPE_OFFSETS, \ @@ -470,6 +472,7 @@ static const struct intel_device_info intel_valleyview_info = { .num_pipes = 2, .has_runtime_pm = 1, .has_rc6 = 1, + .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, .ppgtt_type = INTEL_PPGTT_FULL, @@ -565,6 +568,7 @@ static const struct intel_device_info intel_cherryview_info = { .has_64bit_reloc = 1, .has_runtime_pm = 1, .has_rc6 = 1, + .has_rps = true, .has_logical_ring_contexts = 1, .display.has_gmch = 1, .ppgtt_type = INTEL_PPGTT_FULL, @@ -640,6 +644,7 @@ static const struct intel_device_info intel_skylake_gt4_info = { .has_runtime_pm = 1, \ .display.has_csr = 1, \ .has_rc6 = 1, \ + .has_rps = true, \ .display.has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ .has_logical_ring_preemption = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 0e579f158016..7a2f14eff699 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -118,6 +118,7 @@ enum intel_ppgtt_type { func(has_pooled_eu); \ func(has_rc6); \ func(has_rc6p); \ + func(has_rps); \ func(has_runtime_pm); \ func(has_snoop); \ func(has_coherent_ggtt); \ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 87f6fc6d5502..7aa9a8c12b54 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7013,8 +7013,10 @@ static bool sanitize_rc6(struct drm_i915_private *i915) struct intel_device_info *info = mkwrite_device_info(i915); /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(i915)) + if (intel_vgpu_active(i915)) { info->has_rc6 = 0; + info->has_rps = false; + } if (info->has_rc6 && IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) { @@ -8716,7 +8718,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) if (HAS_RC6(dev_priv)) intel_enable_rc6(dev_priv); - intel_enable_rps(dev_priv); + if (HAS_RPS(dev_priv)) + intel_enable_rps(dev_priv); if (HAS_LLC(dev_priv)) intel_enable_llc_pstate(dev_priv); From 96354b5ca4ac74d2e47e75987283963001b517f6 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 17 Apr 2019 22:15:19 +0000 Subject: [PATCH 0016/1815] drm: increase drm mmap_range size to 1TB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After patch "drm: Use the same mmap-range offset and size for GEM and TTM", application failed to create bo of system memory because drm mmap_range size decrease to 64GB from original 1TB. This is not big enough for applications. Increase the drm mmap_range size to 1TB. Reviewed-by: Thomas Zimmermann Reviewed-by: Christian König Signed-off-by: Philip Yang Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20190417221507.933-1-Philip.Yang@amd.com --- include/drm/drm_vma_manager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_vma_manager.h b/include/drm/drm_vma_manager.h index f4f8ff1cdeec..76ac5e97a559 100644 --- a/include/drm/drm_vma_manager.h +++ b/include/drm/drm_vma_manager.h @@ -35,7 +35,7 @@ */ #if BITS_PER_LONG == 64 #define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1) -#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 16) +#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 256) #else #define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFUL >> PAGE_SHIFT) + 1) #define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFUL >> PAGE_SHIFT) * 16) From 7ce99d24ed7265b8f83e0213252aa4f65755f872 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Apr 2019 19:26:25 +0100 Subject: [PATCH 0017/1815] drm/i915: Expose the busyspin durations for i915_wait_request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An interesting discussion regarding "hybrid interrupt polling" for NVMe came to the conclusion that the ideal busyspin before sleeping was half of the expected request latency (and better if it was already halfway through that request). This suggested that we too should look again at our tradeoff between spinning and waiting. Currently, our spin simply tries to hide the cost of enabling the interrupt, which is good to avoid penalising nop requests (i.e. test throughput) and not much else. Studying real world workloads suggests that a spin of upto 500us can dramatically boost performance, but the suggestion is that this is not from avoiding interrupt latency per-se, but from secondary effects of sleeping such as allowing the CPU reduce cstate and context switch away. In a truly hybrid interrupt polling scheme, we would aim to sleep until just before the request completed and then wake up in advance of the interrupt and do a quick poll to handle completion. This is tricky for ourselves at the moment as we are not recording request times, and since we allow preemption, our requests are not on as a nicely ordered timeline as IO. However, the idea is interesting, for it will certainly help us decide when busyspinning is worthwhile. v2: Expose the spin setting via Kconfig options for easier adjustment and testing. v3: Don't get caught sneaking in a change to the busyspin parameters. v4: Explain more about the "hybrid interrupt polling" scheme that we want to migrate towards. Suggested-by: Sagar Kamble References: http://events.linuxfoundation.org/sites/events/files/slides/lemoal-nvme-polling-vault-2017-final_0.pdf Signed-off-by: Chris Wilson Cc: Sagar Kamble Cc: Eero Tamminen Cc: Tvrtko Ursulin Cc: Ben Widawsky Cc: Joonas Lahtinen Cc: MichaƂ Winiarski Reviewed-by: Sagar Kamble Link: https://patchwork.freedesktop.org/patch/msgid/20190419182625.11186-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig | 6 ++++++ drivers/gpu/drm/i915/Kconfig.profile | 13 +++++++++++++ drivers/gpu/drm/i915/i915_request.c | 27 +++++++++++++++++++++++++-- 3 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/Kconfig.profile diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 148be8e1a090..f0556310b851 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -133,3 +133,9 @@ depends on DRM_I915 depends on EXPERT source "drivers/gpu/drm/i915/Kconfig.debug" endmenu + +menu "drm/i915 Profile Guided Optimisation" + visible if EXPERT + depends on DRM_I915 + source "drivers/gpu/drm/i915/Kconfig.profile" +endmenu diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile new file mode 100644 index 000000000000..0e5db98da8f3 --- /dev/null +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -0,0 +1,13 @@ +config DRM_I915_SPIN_REQUEST + int + default 5 # microseconds + help + Before sleeping waiting for a request (GPU operation) to complete, + we may spend some time polling for its completion. As the IRQ may + take a non-negligible time to setup, we do a short spin first to + check if the request will complete in the time it would have taken + us to enable the interrupt. + + May be 0 to disable the initial spin. In practice, we estimate + the cost of enabling the interrupt (if currently disabled) to be + a few microseconds. diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b836721d3b13..b1f00b59bb95 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1340,8 +1340,31 @@ long i915_request_wait(struct i915_request *rq, trace_i915_request_wait_begin(rq, flags); - /* Optimistic short spin before touching IRQs */ - if (__i915_spin_request(rq, state, 5)) + /* + * Optimistic spin before touching IRQs. + * + * We may use a rather large value here to offset the penalty of + * switching away from the active task. Frequently, the client will + * wait upon an old swapbuffer to throttle itself to remain within a + * frame of the gpu. If the client is running in lockstep with the gpu, + * then it should not be waiting long at all, and a sleep now will incur + * extra scheduler latency in producing the next frame. To try to + * avoid adding the cost of enabling/disabling the interrupt to the + * short wait, we first spin to see if the request would have completed + * in the time taken to setup the interrupt. + * + * We need upto 5us to enable the irq, and upto 20us to hide the + * scheduler latency of a context switch, ignoring the secondary + * impacts from a context switch such as cache eviction. + * + * The scheme used for low-latency IO is called "hybrid interrupt + * polling". The suggestion there is to sleep until just before you + * expect to be woken by the device interrupt and then poll for its + * completion. That requires having a good predictor for the request + * duration, which we currently lack. + */ + if (CONFIG_DRM_I915_SPIN_REQUEST && + __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) goto out; /* From b972fffa114b18a120a7bbde105d69a080d24970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 17 Apr 2019 13:25:24 +0200 Subject: [PATCH 0018/1815] drm/i915: remove DRM_AUTH from IOCTLs which also have DRM_RENDER_ALLOW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is to work around problems with libva and vainfo. Signed-off-by: Christian König Reviewed-by: Chris Wilson Reviewed-by: Daniel Vetter Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190417112525.16848-1-christian.koenig@amd.com --- drivers/gpu/drm/i915/i915_drv.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5e2ae2300454..6354c68c94b3 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -3098,7 +3098,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_BATCHBUFFER, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_IRQ_EMIT, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_IRQ_WAIT, drm_noop, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_SETPARAM, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_ALLOC, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_FREE, drm_noop, DRM_AUTH), @@ -3111,13 +3111,13 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer_ioctl, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), - DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_SET_CACHING, i915_gem_set_caching_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_GET_CACHING, i915_gem_get_caching_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_RENDER_ALLOW), @@ -3136,7 +3136,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER), - DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE_EXT, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_RENDER_ALLOW), From 267e80ee6a341bc694406ef7c4f30fa2721610b7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Apr 2019 21:12:07 +0100 Subject: [PATCH 0019/1815] drm/i915/gtt: Skip clearing the GGTT under gen6+ full-ppgtt If we know that the user cannot access the GGTT, by virtue of having a segregated memory area, we can skip clearing the unused entries as they cannot be accessed. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190419201207.5477-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8f460cc4cc1f..10558bc8bf90 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3280,7 +3280,9 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) size = gen6_get_total_gtt_size(snb_gmch_ctl); ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; - ggtt->vm.clear_range = gen6_ggtt_clear_range; + ggtt->vm.clear_range = nop_clear_range; + if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) + ggtt->vm.clear_range = gen6_ggtt_clear_range; ggtt->vm.insert_page = gen6_ggtt_insert_page; ggtt->vm.insert_entries = gen6_ggtt_insert_entries; ggtt->vm.cleanup = gen6_gmch_remove; From 95ebcda3ef4fa2c928e2e0dbe0f707ca90852110 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:11 -0700 Subject: [PATCH 0020/1815] drm/i915/uc: Rename uC firmware init/fini functions he uC firmware init function is called during GuC/HuC init early phases. Rename to include "_early" and properly reflect which phase we are at. The uC firmware fini function is cleaning up the state set/created on firmware fetch. Replace "_fini" with "_cleanup_fetch". v2: also rename uC fw fini function Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-2-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/intel_guc.c | 6 +++--- drivers/gpu/drm/i915/intel_guc_fw.c | 2 +- drivers/gpu/drm/i915/intel_huc.h | 2 +- drivers/gpu/drm/i915/intel_huc_fw.c | 2 +- drivers/gpu/drm/i915/intel_uc_fw.c | 4 ++-- drivers/gpu/drm/i915/intel_uc_fw.h | 5 +++-- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 3aabfa2d9198..d81a02b0f525 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -154,7 +154,7 @@ int intel_guc_init_misc(struct intel_guc *guc) void intel_guc_fini_misc(struct intel_guc *guc) { - intel_uc_fw_fini(&guc->fw); + intel_uc_fw_cleanup_fetch(&guc->fw); guc_fini_wq(guc); } @@ -221,7 +221,7 @@ err_log: err_shared: guc_shared_data_destroy(guc); err_fetch: - intel_uc_fw_fini(&guc->fw); + intel_uc_fw_cleanup_fetch(&guc->fw); return ret; } @@ -237,7 +237,7 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); guc_shared_data_destroy(guc); - intel_uc_fw_fini(&guc->fw); + intel_uc_fw_cleanup_fetch(&guc->fw); } static u32 guc_ctl_debug_flags(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index 792a551450c7..4385d9ef02bb 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -90,7 +90,7 @@ void intel_guc_fw_init_early(struct intel_guc *guc) { struct intel_uc_fw *guc_fw = &guc->fw; - intel_uc_fw_init(guc_fw, INTEL_UC_FW_TYPE_GUC); + intel_uc_fw_init_early(guc_fw, INTEL_UC_FW_TYPE_GUC); guc_fw_select(guc_fw); } diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index 7e41d870b509..ce129e301961 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -42,7 +42,7 @@ int intel_huc_check_status(struct intel_huc *huc); static inline void intel_huc_fini_misc(struct intel_huc *huc) { - intel_uc_fw_fini(&huc->fw); + intel_uc_fw_cleanup_fetch(&huc->fw); } static inline int intel_huc_sanitize(struct intel_huc *huc) diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c index 68d47c105939..80a176d91edc 100644 --- a/drivers/gpu/drm/i915/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/intel_huc_fw.c @@ -89,7 +89,7 @@ void intel_huc_fw_init_early(struct intel_huc *huc) { struct intel_uc_fw *huc_fw = &huc->fw; - intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC); + intel_uc_fw_init_early(huc_fw, INTEL_UC_FW_TYPE_HUC); huc_fw_select(huc_fw); } diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index becf05ebae4d..e3e74207a102 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -274,13 +274,13 @@ fail: } /** - * intel_uc_fw_fini - cleanup uC firmware + * intel_uc_fw_cleanup_fetch - cleanup uC firmware * * @uc_fw: uC firmware * * Cleans up uC firmware by releasing the firmware GEM obj. */ -void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index 0e3bd580e267..e6fa8599757c 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -102,7 +102,8 @@ static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) } static inline -void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) +void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, + enum intel_uc_fw_type type) { uc_fw->path = NULL; uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; @@ -144,10 +145,10 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); +void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, int (*xfer)(struct intel_uc_fw *uc_fw, struct i915_vma *vma)); -void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); #endif From 911800765ef6cdcb9103da7557aa5dd9ebb4cda0 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:12 -0700 Subject: [PATCH 0021/1815] drm/i915/uc: Reserve upper range of GGTT GuC and HuC depend on struct_mutex for device reinitialization. Moving away from this dependency requires perma-pinning the firmware images in GGTT. The upper portion of the GuC address space has a sizeable hole (several MB) that is inaccessible by GuC. Reserve this range within GGTT as it can comfortably hold GuC/HuC firmware images. v2: Reserve node rather than insert (Chris) Simpler determination of node start/size (Daniele) Move reserve/release out to intel_guc.* files v3: Reserve starting at GUC_GGTT_TOP only and bail if this fails (Chris) Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-3-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 25 ++++++++++++------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/intel_guc.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc.h | 2 ++ 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 10558bc8bf90..3557233de0f5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2752,6 +2752,12 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) if (ret) return ret; + if (USES_GUC(dev_priv)) { + ret = intel_guc_reserve_ggtt_top(&dev_priv->guc); + if (ret) + goto err_reserve; + } + /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", @@ -2766,12 +2772,14 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) if (INTEL_PPGTT(dev_priv) == INTEL_PPGTT_ALIASING) { ret = i915_gem_init_aliasing_ppgtt(dev_priv); if (ret) - goto err; + goto err_appgtt; } return 0; -err: +err_appgtt: + intel_guc_release_ggtt_top(&dev_priv->guc); +err_reserve: drm_mm_remove_node(&ggtt->error_capture); return ret; } @@ -2797,6 +2805,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); + intel_guc_release_ggtt_top(&dev_priv->guc); + if (drm_mm_initialized(&ggtt->vm.mm)) { intel_vgt_deballoon(dev_priv); i915_address_space_fini(&ggtt->vm); @@ -3371,17 +3381,6 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) if (ret) return ret; - /* Trim the GGTT to fit the GuC mappable upper range (when enabled). - * This is easier than doing range restriction on the fly, as we - * currently don't have any bits spare to pass in this upper - * restriction! - */ - if (USES_GUC(dev_priv)) { - ggtt->vm.total = min_t(u64, ggtt->vm.total, GUC_GGTT_TOP); - ggtt->mappable_end = - min_t(u64, ggtt->mappable_end, ggtt->vm.total); - } - if ((ggtt->vm.total - 1) >> 32) { DRM_ERROR("We never expected a Global GTT with more than 32bits" " of address space! Found %lldM!\n", diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f597f35b109b..b51e779732c3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -384,6 +384,7 @@ struct i915_ggtt { u32 pin_bias; struct drm_mm_node error_capture; + struct drm_mm_node uc_fw; }; struct i915_hw_ppgtt { diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index d81a02b0f525..a10a68e0ffce 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -721,3 +721,30 @@ u32 intel_guc_reserved_gtt_size(struct intel_guc *guc) { return guc_to_i915(guc)->wopcm.guc.size; } + +int intel_guc_reserve_ggtt_top(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_i915(guc); + struct i915_ggtt *ggtt = &i915->ggtt; + u64 size; + int ret; + + size = ggtt->vm.total - GUC_GGTT_TOP; + + ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, + GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, + PIN_NOEVICT); + if (ret) + DRM_DEBUG_DRIVER("GuC: failed to reserve top of ggtt\n"); + + return ret; +} + +void intel_guc_release_ggtt_top(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_i915(guc); + struct i915_ggtt *ggtt = &i915->ggtt; + + if (drm_mm_node_allocated(&ggtt->uc_fw)) + drm_mm_remove_node(&ggtt->uc_fw); +} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 2c59ff8d9f39..2494e84831a2 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -173,6 +173,8 @@ int intel_guc_suspend(struct intel_guc *guc); int intel_guc_resume(struct intel_guc *guc); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); u32 intel_guc_reserved_gtt_size(struct intel_guc *guc); +int intel_guc_reserve_ggtt_top(struct intel_guc *guc); +void intel_guc_release_ggtt_top(struct intel_guc *guc); static inline int intel_guc_sanitize(struct intel_guc *guc) { From fc488b59034aa4519f4971f4b2b842718e56af79 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:13 -0700 Subject: [PATCH 0022/1815] drm/i915/uc: Place uC firmware in upper range of GGTT Currently we pin the GuC or HuC firmware image just before uploading. Perma-pin during uC initialization instead and use the range reserved at the top of the address space. Moving the firmware resulted in needing to: - use an additional pinning for the rsa signature which will be used during HuC auth as addresses above GUC_GGTT_TOP do not map through GTT. v2: Remove call to set to gtt domain Do not restore fw gtt mapping unconditionally Separate out pin/unpin functions and drop usage of pin/unpin Use uc_fw init/fini functions to bind/unbind fw object v3: Bind is only needed during xfer (Chris) Remove attempts to bind outside of xfer (Chris) Mark fw bind/unbind static Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-4-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/intel_guc.c | 9 ++- drivers/gpu/drm/i915/intel_guc_fw.c | 18 +++--- drivers/gpu/drm/i915/intel_huc.c | 74 ++++++++++++++++----- drivers/gpu/drm/i915/intel_huc.h | 4 ++ drivers/gpu/drm/i915/intel_huc_fw.c | 47 ++++++++++---- drivers/gpu/drm/i915/intel_uc.c | 23 +++++-- drivers/gpu/drm/i915/intel_uc_fw.c | 99 ++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_uc_fw.h | 7 +- 8 files changed, 208 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index a10a68e0ffce..c4ac29309fcc 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -189,9 +189,13 @@ int intel_guc_init(struct intel_guc *guc) struct drm_i915_private *dev_priv = guc_to_i915(guc); int ret; - ret = guc_shared_data_create(guc); + ret = intel_uc_fw_init(&guc->fw); if (ret) goto err_fetch; + + ret = guc_shared_data_create(guc); + if (ret) + goto err_fw; GEM_BUG_ON(!guc->shared_data); ret = intel_guc_log_create(&guc->log); @@ -220,6 +224,8 @@ err_log: intel_guc_log_destroy(&guc->log); err_shared: guc_shared_data_destroy(guc); +err_fw: + intel_uc_fw_fini(&guc->fw); err_fetch: intel_uc_fw_cleanup_fetch(&guc->fw); return ret; @@ -237,6 +243,7 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); guc_shared_data_destroy(guc); + intel_uc_fw_fini(&guc->fw); intel_uc_fw_cleanup_fetch(&guc->fw); } diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index 4385d9ef02bb..8b2dcc70b956 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -122,14 +122,16 @@ static void guc_prepare_xfer(struct intel_guc *guc) } /* Copy RSA signature from the fw image to HW for verification */ -static void guc_xfer_rsa(struct intel_guc *guc, struct i915_vma *vma) +static void guc_xfer_rsa(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_uc_fw *fw = &guc->fw; + struct sg_table *pages = fw->obj->mm.pages; u32 rsa[UOS_RSA_SCRATCH_COUNT]; int i; - sg_pcopy_to_buffer(vma->pages->sgl, vma->pages->nents, - rsa, sizeof(rsa), guc->fw.rsa_offset); + sg_pcopy_to_buffer(pages->sgl, pages->nents, + rsa, sizeof(rsa), fw->rsa_offset); for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]); @@ -201,7 +203,7 @@ static int guc_wait_ucode(struct intel_guc *guc) * transfer between GTT locations. This functionality is left out of the API * for now as there is no need for it. */ -static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) +static int guc_xfer_ucode(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_uc_fw *guc_fw = &guc->fw; @@ -214,7 +216,7 @@ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); /* Set the source address for the new blob */ - offset = intel_guc_ggtt_offset(guc, vma) + guc_fw->header_offset; + offset = intel_uc_fw_ggtt_offset(guc_fw) + guc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); @@ -233,7 +235,7 @@ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) /* * Load the GuC firmware blob into the MinuteIA. */ -static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) +static int guc_fw_xfer(struct intel_uc_fw *guc_fw) { struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -250,9 +252,9 @@ static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) * by the DMA engine in one operation, whereas the RSA signature is * loaded via MMIO. */ - guc_xfer_rsa(guc, vma); + guc_xfer_rsa(guc); - ret = guc_xfer_ucode(guc, vma); + ret = guc_xfer_ucode(guc); intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 94c04f16a2ad..1ff1fb015e58 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -40,6 +40,61 @@ int intel_huc_init_misc(struct intel_huc *huc) return 0; } +static int intel_huc_rsa_data_create(struct intel_huc *huc) +{ + struct drm_i915_private *i915 = huc_to_i915(huc); + struct intel_guc *guc = &i915->guc; + struct i915_vma *vma; + void *vaddr; + + /* + * HuC firmware will sit above GUC_GGTT_TOP and will not map + * through GTT. Unfortunately, this means GuC cannot perform + * the HuC auth. as the rsa offset now falls within the GuC + * inaccessible range. We resort to perma-pinning an additional + * vma within the accessible range that only contains the rsa + * signature. The GuC can use this extra pinning to perform + * the authentication since its GGTT offset will be GuC + * accessible. + */ + vma = intel_guc_allocate_vma(guc, PAGE_SIZE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + return PTR_ERR(vaddr); + } + + huc->rsa_data = vma; + huc->rsa_data_vaddr = vaddr; + + return 0; +} + +static void intel_huc_rsa_data_destroy(struct intel_huc *huc) +{ + i915_vma_unpin_and_release(&huc->rsa_data, I915_VMA_RELEASE_MAP); +} + +int intel_huc_init(struct intel_huc *huc) +{ + int err; + + err = intel_huc_rsa_data_create(huc); + if (err) + return err; + + return intel_uc_fw_init(&huc->fw); +} + +void intel_huc_fini(struct intel_huc *huc) +{ + intel_uc_fw_fini(&huc->fw); + intel_huc_rsa_data_destroy(huc); +} + /** * intel_huc_auth() - Authenticate HuC uCode * @huc: intel_huc structure @@ -55,27 +110,17 @@ int intel_huc_auth(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_i915(huc); struct intel_guc *guc = &i915->guc; - struct i915_vma *vma; u32 status; int ret; if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) return -ENOEXEC; - vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0, - PIN_OFFSET_BIAS | i915->ggtt.pin_bias); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret); - goto fail; - } - ret = intel_guc_auth_huc(guc, - intel_guc_ggtt_offset(guc, vma) + - huc->fw.rsa_offset); + intel_guc_ggtt_offset(guc, huc->rsa_data)); if (ret) { DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); - goto fail_unpin; + goto fail; } /* Check authentication status, it should be done by now */ @@ -86,14 +131,11 @@ int intel_huc_auth(struct intel_huc *huc) 2, 50, &status); if (ret) { DRM_ERROR("HuC: Firmware not verified %#x\n", status); - goto fail_unpin; + goto fail; } - i915_vma_unpin(vma); return 0; -fail_unpin: - i915_vma_unpin(vma); fail: huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL; diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index ce129e301961..a0c21ae02a99 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -33,10 +33,14 @@ struct intel_huc { struct intel_uc_fw fw; /* HuC-specific additions */ + struct i915_vma *rsa_data; + void *rsa_data_vaddr; }; void intel_huc_init_early(struct intel_huc *huc); int intel_huc_init_misc(struct intel_huc *huc); +int intel_huc_init(struct intel_huc *huc); +void intel_huc_fini(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc); int intel_huc_check_status(struct intel_huc *huc); diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c index 80a176d91edc..44c559526072 100644 --- a/drivers/gpu/drm/i915/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/intel_huc_fw.c @@ -93,18 +93,24 @@ void intel_huc_fw_init_early(struct intel_huc *huc) huc_fw_select(huc_fw); } -/** - * huc_fw_xfer() - DMA's the firmware - * @huc_fw: the firmware descriptor - * @vma: the firmware image (bound into the GGTT) - * - * Transfer the firmware image to RAM for execution by the microcontroller. - * - * Return: 0 on success, non-zero on failure - */ -static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) +static void huc_xfer_rsa(struct intel_huc *huc) { - struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + struct intel_uc_fw *fw = &huc->fw; + struct sg_table *pages = fw->obj->mm.pages; + + /* + * HuC firmware image is outside GuC accessible range. + * Copy the RSA signature out of the image into + * the perma-pinned region set aside for it + */ + sg_pcopy_to_buffer(pages->sgl, pages->nents, + huc->rsa_data_vaddr, fw->rsa_size, + fw->rsa_offset); +} + +static int huc_xfer_ucode(struct intel_huc *huc) +{ + struct intel_uc_fw *huc_fw = &huc->fw; struct drm_i915_private *dev_priv = huc_to_i915(huc); struct intel_uncore *uncore = &dev_priv->uncore; unsigned long offset = 0; @@ -116,7 +122,7 @@ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); /* Set the source address for the uCode */ - offset = intel_guc_ggtt_offset(&dev_priv->guc, vma) + + offset = intel_uc_fw_ggtt_offset(huc_fw) + huc_fw->header_offset; intel_uncore_write(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset)); @@ -150,6 +156,23 @@ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) return ret; } +/** + * huc_fw_xfer() - DMA's the firmware + * @huc_fw: the firmware descriptor + * + * Transfer the firmware image to RAM for execution by the microcontroller. + * + * Return: 0 on success, non-zero on failure + */ +static int huc_fw_xfer(struct intel_uc_fw *huc_fw) +{ + struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + + huc_xfer_rsa(huc); + + return huc_xfer_ucode(huc); +} + /** * intel_huc_fw_upload() - load HuC uCode to device * @huc: intel_huc structure diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 25b80ffe71ad..488dffba04d2 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -280,6 +280,7 @@ void intel_uc_fini_misc(struct drm_i915_private *i915) int intel_uc_init(struct drm_i915_private *i915) { struct intel_guc *guc = &i915->guc; + struct intel_huc *huc = &i915->huc; int ret; if (!USES_GUC(i915)) @@ -292,19 +293,30 @@ int intel_uc_init(struct drm_i915_private *i915) if (ret) return ret; + if (USES_HUC(i915)) { + ret = intel_huc_init(huc); + if (ret) + goto err_guc; + } + if (USES_GUC_SUBMISSION(i915)) { /* * This is stuff we need to have available at fw load time * if we are planning to enable submission later */ ret = intel_guc_submission_init(guc); - if (ret) { - intel_guc_fini(guc); - return ret; - } + if (ret) + goto err_huc; } return 0; + +err_huc: + if (USES_HUC(i915)) + intel_huc_fini(huc); +err_guc: + intel_guc_fini(guc); + return ret; } void intel_uc_fini(struct drm_i915_private *i915) @@ -319,6 +331,9 @@ void intel_uc_fini(struct drm_i915_private *i915) if (USES_GUC_SUBMISSION(i915)) intel_guc_submission_fini(guc); + if (USES_HUC(i915)) + intel_huc_fini(&i915->huc); + intel_guc_fini(guc); } diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index e3e74207a102..b9cb6fea9332 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -191,6 +191,35 @@ fail: release_firmware(fw); /* OK even if fw is NULL */ } +static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj = uc_fw->obj; + struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; + struct i915_vma dummy = { + .node.start = intel_uc_fw_ggtt_offset(uc_fw), + .node.size = obj->base.size, + .pages = obj->mm.pages, + .vm = &ggtt->vm, + }; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size); + + /* uc_fw->obj cache domains were not controlled across suspend */ + drm_clflush_sg(dummy.pages); + + ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0); +} + +static void intel_uc_fw_ggtt_unbind(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj = uc_fw->obj; + struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; + u64 start = intel_uc_fw_ggtt_offset(uc_fw); + + ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size); +} + /** * intel_uc_fw_upload - load uC firmware using custom loader * @uc_fw: uC firmware @@ -201,11 +230,8 @@ fail: * Return: 0 on success, non-zero on failure. */ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, - int (*xfer)(struct intel_uc_fw *uc_fw, - struct i915_vma *vma)) + int (*xfer)(struct intel_uc_fw *uc_fw)) { - struct i915_vma *vma; - u32 ggtt_pin_bias; int err; DRM_DEBUG_DRIVER("%s fw load %s\n", @@ -219,36 +245,15 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, intel_uc_fw_type_repr(uc_fw->type), intel_uc_fw_status_repr(uc_fw->load_status)); - /* Pin object with firmware */ - err = i915_gem_object_set_to_gtt_domain(uc_fw->obj, false); - if (err) { - DRM_DEBUG_DRIVER("%s fw set-domain err=%d\n", - intel_uc_fw_type_repr(uc_fw->type), err); - goto fail; - } - - ggtt_pin_bias = to_i915(uc_fw->obj->base.dev)->ggtt.pin_bias; - vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0, - PIN_OFFSET_BIAS | ggtt_pin_bias); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n", - intel_uc_fw_type_repr(uc_fw->type), err); - goto fail; - } + intel_uc_fw_ggtt_bind(uc_fw); /* Call custom loader */ - err = xfer(uc_fw, vma); - - /* - * We keep the object pages for reuse during resume. But we can unpin it - * now that DMA has completed, so it doesn't continue to take up space. - */ - i915_vma_unpin(vma); - + err = xfer(uc_fw); if (err) goto fail; + intel_uc_fw_ggtt_unbind(uc_fw); + uc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; DRM_DEBUG_DRIVER("%s fw load %s\n", intel_uc_fw_type_repr(uc_fw->type), @@ -273,6 +278,42 @@ fail: return err; } +int intel_uc_fw_init(struct intel_uc_fw *uc_fw) +{ + int err; + + if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return -ENOEXEC; + + err = i915_gem_object_pin_pages(uc_fw->obj); + if (err) + DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + + return err; +} + +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +{ + if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return; + + i915_gem_object_unpin_pages(uc_fw->obj); +} + +u32 intel_uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_private *i915 = to_i915(uc_fw->obj->base.dev); + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node *node = &ggtt->uc_fw; + + GEM_BUG_ON(!node->allocated); + GEM_BUG_ON(upper_32_bits(node->start)); + GEM_BUG_ON(upper_32_bits(node->start + node->size - 1)); + + return lower_32_bits(node->start); +} + /** * intel_uc_fw_cleanup_fetch - cleanup uC firmware * diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index e6fa8599757c..ff98f8661d72 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -27,7 +27,6 @@ struct drm_printer; struct drm_i915_private; -struct i915_vma; /* Home of GuC, HuC and DMC firmwares */ #define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" @@ -147,8 +146,10 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, - int (*xfer)(struct intel_uc_fw *uc_fw, - struct i915_vma *vma)); + int (*xfer)(struct intel_uc_fw *uc_fw)); +int intel_uc_fw_init(struct intel_uc_fw *uc_fw); +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); +u32 intel_uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw); void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); #endif From 40d211ef62de3efdfb0a78894fc0bc3b24061b40 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:14 -0700 Subject: [PATCH 0023/1815] Revert "drm/i915/guc: Disable global reset" We have now prepared the guc reset paths to avoid taking struct_mutex, or any other lock, and so it is now safe to re-enable. References: fe62365f9f80 ("drm/i915/guc: Disable global reset") Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-5-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/i915_reset.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 677d59304e78..1092d16c289c 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -641,9 +641,6 @@ int intel_gpu_reset(struct drm_i915_private *i915, bool intel_has_gpu_reset(struct drm_i915_private *i915) { - if (USES_GUC(i915)) - return false; - if (!i915_modparams.reset) return NULL; From f3c2b76ef25e73e2065614108fe33bf2d790cac3 Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Fri, 19 Apr 2019 16:00:15 -0700 Subject: [PATCH 0024/1815] drm/i915/selftests: Check that gpu reset is usable from atomic context GPU reset is now available with GuC enabled, so re-enable our check that this reset is usable from atomic context. Signed-off-by: Fernando Pacheco Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190419230015.18121-6-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 050bd1e19e02..2fd33aad8683 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -1814,9 +1814,6 @@ static int igt_atomic_reset(void *arg) /* Check that the resets are usable from atomic context */ - if (USES_GUC_SUBMISSION(i915)) - return 0; /* guc is dead; long live the guc */ - igt_global_reset_lock(i915); mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(i915); @@ -1846,6 +1843,9 @@ static int igt_atomic_reset(void *arg) force_reset(i915); } + if (USES_GUC_SUBMISSION(i915)) + goto unlock; + if (intel_has_reset_engine(i915)) { struct intel_engine_cs *engine; enum intel_engine_id id; From 409c53f07a81f8db122c461f3255c6f43558c881 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Fri, 19 Apr 2019 09:33:01 +0100 Subject: [PATCH 0025/1815] drm/lima: handle shared irq case for lima_pp_bcast_irq_handler On Hikey board all lima ip blocks are shared with one irq. This patch avoids a NULL ptr deref crash on this platform on startup. Tested with Weston and kmscube. Signed-off-by: Peter Griffin Cc: Rob Herring Cc: Daniel Vetter Cc: Qiang Yu Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/1555662781-22570-7-git-send-email-peter.griffin@linaro.org --- drivers/gpu/drm/lima/lima_pp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c index d29721e177bf..8fef224b93c8 100644 --- a/drivers/gpu/drm/lima/lima_pp.c +++ b/drivers/gpu/drm/lima/lima_pp.c @@ -64,7 +64,13 @@ static irqreturn_t lima_pp_bcast_irq_handler(int irq, void *data) struct lima_ip *pp_bcast = data; struct lima_device *dev = pp_bcast->dev; struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp; - struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame; + struct drm_lima_m450_pp_frame *frame; + + /* for shared irq case */ + if (!pipe->current_task) + return IRQ_NONE; + + frame = pipe->current_task->frame; for (i = 0; i < frame->num_pp; i++) { struct lima_ip *ip = pipe->processor[i]; From 2d6692e642e7ca02883524350038e2a431ef44e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 20 Apr 2019 12:55:39 +0100 Subject: [PATCH 0026/1815] drm/i915: Start writeback from the shrinker When we are called to relieve mempressue via the shrinker, the only way we can make progress is either by discarding unwanted pages (those objects that userspace has marked MADV_DONTNEED) or by reclaiming the dirty objects via swap. As we know that is the only way to make further progress, we can initiate the writeback as we invalidate the objects. This means the objects we put onto the inactive anon lru list are already marked for reclaim+writeback and so will trigger a wait upon the writeback inside direct reclaim, greatly improving the success rate of direct reclaim on i915 objects. The corollary is that we may start a slow swap on opportunistic mempressure from the likes of the compaction + migration kthreads. This is limited by those threads only being allowed to shrink idle pages, but also that if we reactivate the page before it is swapped out by gpu activity, we only page the cost of repinning the page. The cost is most felt when an object is reused after mempressure, which hopefully excludes the latency sensitive tasks (as we are just extending the impact of swap thrashing to them). Apparently this is not the first time we've had this idea. Back in commit 5537252b6b6d ("drm/i915: Invalidate our pages under memory pressure") we wanted to start writeback but settled on invalidate after Hugh Dickins warned us about a possibility of a deadlock within shmemfs if we started writeback from shrink_slab. Looking at the callchain, using writeback from i915_gem_shrink should be equivalent to the pageout also employed by shrink_slab, i.e. it should not be any riskier afaict. v2: Leave mmapings intact. At this point, the only mmapings of our objects will be via CPU mmaps on the shmemfs filp, which are out-of-scope for our LRU tracking. Instead leave those pages to the inactive anon LRU page list for aging and pageout as normal. v3: Be selective on which paths trigger writeback, in particular excluding paths shrinking just to reclaim vm space (e.g. mmap, vmap reapers) and avoid starting writeback on the entire process space from within the pm freezer. References: https://bugs.freedesktop.org/show_bug.cgi?id=108686 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Matthew Auld Cc: Daniel Vetter Cc: Michal Hocko Reviewed-by: Joonas Lahtinen #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20190420115539.29081-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 13 ++-- drivers/gpu/drm/i915/i915_gem.c | 27 +-------- drivers/gpu/drm/i915/i915_gem_shrinker.c | 75 ++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 71612e7fc8bc..dc74d33c20aa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3007,7 +3007,7 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, enum i915_mm_subclass subclass); -void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj); +void __i915_gem_object_truncate(struct drm_i915_gem_object *obj); enum i915_map_type { I915_MAP_WB = 0, @@ -3268,11 +3268,12 @@ unsigned long i915_gem_shrink(struct drm_i915_private *i915, unsigned long target, unsigned long *nr_scanned, unsigned flags); -#define I915_SHRINK_PURGEABLE 0x1 -#define I915_SHRINK_UNBOUND 0x2 -#define I915_SHRINK_BOUND 0x4 -#define I915_SHRINK_ACTIVE 0x8 -#define I915_SHRINK_VMAPS 0x10 +#define I915_SHRINK_PURGEABLE BIT(0) +#define I915_SHRINK_UNBOUND BIT(1) +#define I915_SHRINK_BOUND BIT(2) +#define I915_SHRINK_ACTIVE BIT(3) +#define I915_SHRINK_VMAPS BIT(4) +#define I915_SHRINK_WRITEBACK BIT(5) unsigned long i915_gem_shrink_all(struct drm_i915_private *i915); void i915_gem_shrinker_register(struct drm_i915_private *i915); void i915_gem_shrinker_unregister(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e5462639de0b..a2bf94c3cfca 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2143,8 +2143,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, } /* Immediately discard the backing storage */ -static void -i915_gem_object_truncate(struct drm_i915_gem_object *obj) +void __i915_gem_object_truncate(struct drm_i915_gem_object *obj) { i915_gem_object_free_mmap_offset(obj); @@ -2161,28 +2160,6 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) obj->mm.pages = ERR_PTR(-EFAULT); } -/* Try to discard unwanted pages */ -void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) -{ - struct address_space *mapping; - - lockdep_assert_held(&obj->mm.lock); - GEM_BUG_ON(i915_gem_object_has_pages(obj)); - - switch (obj->mm.madv) { - case I915_MADV_DONTNEED: - i915_gem_object_truncate(obj); - case __I915_MADV_PURGED: - return; - } - - if (obj->base.filp == NULL) - return; - - mapping = obj->base.filp->f_mapping, - invalidate_mapping_pages(mapping, 0, (loff_t)-1); -} - /* * Move pages to appropriate lru and release the pagevec, decrementing the * ref count of those pages. @@ -4023,7 +4000,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, /* if the object is no longer attached, discard its backing storage */ if (obj->mm.madv == I915_MADV_DONTNEED && !i915_gem_object_has_pages(obj)) - i915_gem_object_truncate(obj); + __i915_gem_object_truncate(obj); args->retained = obj->mm.madv != __I915_MADV_PURGED; mutex_unlock(&obj->mm.lock); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 6da795c7e62e..588e3898b120 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -114,6 +114,67 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) return !i915_gem_object_has_pages(obj); } +static void __start_writeback(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + struct address_space *mapping; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = SWAP_CLUSTER_MAX, + .range_start = 0, + .range_end = LLONG_MAX, + .for_reclaim = 1, + }; + unsigned long i; + + lockdep_assert_held(&obj->mm.lock); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); + + switch (obj->mm.madv) { + case I915_MADV_DONTNEED: + __i915_gem_object_truncate(obj); + case __I915_MADV_PURGED: + return; + } + + if (!obj->base.filp) + return; + + if (!(flags & I915_SHRINK_WRITEBACK)) + return; + + /* + * Leave mmapings intact (GTT will have been revoked on unbinding, + * leaving only CPU mmapings around) and add those pages to the LRU + * instead of invoking writeback so they are aged and paged out + * as normal. + */ + mapping = obj->base.filp->f_mapping; + + /* Begin writeback on each dirty page */ + for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) { + struct page *page; + + page = find_lock_entry(mapping, i); + if (!page || xa_is_value(page)) + continue; + + if (!page_mapped(page) && clear_page_dirty_for_io(page)) { + int ret; + + SetPageReclaim(page); + ret = mapping->a_ops->writepage(page, &wbc); + if (!PageWriteback(page)) + ClearPageReclaim(page); + if (!ret) + goto put; + } + unlock_page(page); +put: + put_page(page); + } +} + /** * i915_gem_shrink - Shrink buffer object caches * @i915: i915 device @@ -254,7 +315,7 @@ i915_gem_shrink(struct drm_i915_private *i915, mutex_lock_nested(&obj->mm.lock, I915_MM_SHRINKER); if (!i915_gem_object_has_pages(obj)) { - __i915_gem_object_invalidate(obj); + __start_writeback(obj, flags); count += obj->base.size >> PAGE_SHIFT; } mutex_unlock(&obj->mm.lock); @@ -366,13 +427,15 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) &sc->nr_scanned, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE); + I915_SHRINK_PURGEABLE | + I915_SHRINK_WRITEBACK); if (sc->nr_scanned < sc->nr_to_scan) freed += i915_gem_shrink(i915, sc->nr_to_scan - sc->nr_scanned, &sc->nr_scanned, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { intel_wakeref_t wakeref; @@ -382,7 +445,8 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) &sc->nr_scanned, I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); } } @@ -404,7 +468,8 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) with_intel_runtime_pm(i915, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not From ef0db94f94a0458b99bd712c1a40a87c937f236f Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 16 Apr 2019 09:59:10 +0200 Subject: [PATCH 0027/1815] MAINTAINERS: Add Sam as reviewer for drm/panel Sam has been helping out a lot with reviewing DRM panel patches. Add him as reviewer to help him do this important work. Signed-off-by: Thierry Reding Acked-by: Jagan Teki Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20190416075910.12015-1-thierry.reding@gmail.com --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c227d2818c98..3d199592fd2c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5340,6 +5340,7 @@ T: git git://anongit.freedesktop.org/drm/drm-misc DRM PANEL DRIVERS M: Thierry Reding +R: Sam Ravnborg L: dri-devel@lists.freedesktop.org T: git git://anongit.freedesktop.org/drm/drm-misc S: Maintained From 9c11b12184bb01d8ba2c48e655509b184f02c769 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 19 Apr 2019 10:10:26 +0300 Subject: [PATCH 0028/1815] drm/i915/icl: Fix MG_DP_MODE() register programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the order of lane, port parameters passed to the register macro. Note that this was already partly fixed by commit 37fc7845df7b6 ("drm/i915: Call MG_DP_MODE() macro with the right parameters order") While at it simplify things by using the macro directly instead of an unnecessary redirection via an array. v2: - Add a note the commit message about simplifying things. (JosĂ©) Fixes: 58106b7d816e1 ("drm/i915: Make MG PHY macros semantically consistent") Cc: JosĂ© Roberto de Souza Cc: Lucas De Marchi Cc: Aditya Swarup Signed-off-by: Imre Deak Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190419071026.32370-1-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 24f9106efcc6..f181c26f62fd 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2905,21 +2905,20 @@ static void icl_enable_phy_clock_gating(struct intel_digital_port *dig_port) struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum port port = dig_port->base.port; enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - i915_reg_t mg_regs[2] = { MG_DP_MODE(0, port), MG_DP_MODE(1, port) }; u32 val; - int i; + int ln; if (tc_port == PORT_TC_NONE) return; - for (i = 0; i < ARRAY_SIZE(mg_regs); i++) { - val = I915_READ(mg_regs[i]); + for (ln = 0; ln < 2; ln++) { + val = I915_READ(MG_DP_MODE(ln, port)); val |= MG_DP_MODE_CFG_TR2PWR_GATING | MG_DP_MODE_CFG_TRPWR_GATING | MG_DP_MODE_CFG_CLNPWR_GATING | MG_DP_MODE_CFG_DIGPWR_GATING | MG_DP_MODE_CFG_GAONPWR_GATING; - I915_WRITE(mg_regs[i], val); + I915_WRITE(MG_DP_MODE(ln, port), val); } val = I915_READ(MG_MISC_SUS0(tc_port)); @@ -2938,21 +2937,20 @@ static void icl_disable_phy_clock_gating(struct intel_digital_port *dig_port) struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum port port = dig_port->base.port; enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - i915_reg_t mg_regs[2] = { MG_DP_MODE(port, 0), MG_DP_MODE(port, 1) }; u32 val; - int i; + int ln; if (tc_port == PORT_TC_NONE) return; - for (i = 0; i < ARRAY_SIZE(mg_regs); i++) { - val = I915_READ(mg_regs[i]); + for (ln = 0; ln < 2; ln++) { + val = I915_READ(MG_DP_MODE(ln, port)); val &= ~(MG_DP_MODE_CFG_TR2PWR_GATING | MG_DP_MODE_CFG_TRPWR_GATING | MG_DP_MODE_CFG_CLNPWR_GATING | MG_DP_MODE_CFG_DIGPWR_GATING | MG_DP_MODE_CFG_GAONPWR_GATING); - I915_WRITE(mg_regs[i], val); + I915_WRITE(MG_DP_MODE(ln, port), val); } val = I915_READ(MG_MISC_SUS0(tc_port)); From 245e736408175742a29acd5b3735ecc6b39f3ca1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Apr 2019 19:19:04 +0100 Subject: [PATCH 0029/1815] dma-buf: Remove unused sync_dump() sync_dump() is an unused, unexported, function that adds 64k to the kernel image and doesn't even provide locking around the global array it uses. add/remove: 0/2 grow/shrink: 0/0 up/down: 0/-65734 (-65734) Function old new delta sync_dump 198 - -198 sync_dump_buf 65536 - -65536 Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Gustavo Padovan Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190419181904.6199-1-chris@chris-wilson.co.uk --- drivers/dma-buf/sync_debug.c | 26 -------------------------- drivers/dma-buf/sync_debug.h | 1 - 2 files changed, 27 deletions(-) diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index c0abf37df88b..434a66518e0d 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -197,29 +197,3 @@ static __init int sync_debugfs_init(void) return 0; } late_initcall(sync_debugfs_init); - -#define DUMP_CHUNK 256 -static char sync_dump_buf[64 * 1024]; -void sync_dump(void) -{ - struct seq_file s = { - .buf = sync_dump_buf, - .size = sizeof(sync_dump_buf) - 1, - }; - int i; - - sync_info_debugfs_show(&s, NULL); - - for (i = 0; i < s.count; i += DUMP_CHUNK) { - if ((s.count - i) > DUMP_CHUNK) { - char c = s.buf[i + DUMP_CHUNK]; - - s.buf[i + DUMP_CHUNK] = 0; - pr_cont("%s", s.buf + i); - s.buf[i + DUMP_CHUNK] = c; - } else { - s.buf[s.count] = 0; - pr_cont("%s", s.buf + i); - } - } -} diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index 05e33f937ad0..6176e52ba2d7 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -68,6 +68,5 @@ void sync_timeline_debug_add(struct sync_timeline *obj); void sync_timeline_debug_remove(struct sync_timeline *obj); void sync_file_debug_add(struct sync_file *fence); void sync_file_debug_remove(struct sync_file *fence); -void sync_dump(void); #endif /* _LINUX_SYNC_H */ From b4a2c0055a4fb7bf34d0490284c966bea17f7117 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 18 Feb 2019 21:27:04 -0300 Subject: [PATCH 0030/1815] dt-bindings: Add vendor prefix for VXT Ltd VXT Ltd is a manufacturer of projected capacitive touch panel and display solutions: http://www.vxt.com.tw/ Reviewed-by: Otavio Salvador Reviewed-by: Rob Herring Signed-off-by: Fabio Estevam Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20190219002706.20077-1-festevam@gmail.com --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 9506140167d6..433e03fbe078 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -442,6 +442,7 @@ vivante Vivante Corporation vocore VoCore Studio voipac Voipac Technologies s.r.o. vot Vision Optical Technology Co., Ltd. +vxt VXT Ltd wd Western Digital Corp. wetek WeTek Electronics, limited. wexler Wexler From 68c2edaca65165114e04daeb5f0af0e3e447c4a9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 18 Feb 2019 21:27:05 -0300 Subject: [PATCH 0031/1815] dt-bindings: Add VXT VL050-8048NT-C01 panel bindings The VXT VL050-8048NT-C01 is a TFT LCD panel with a 800x480 resolution connected via 24 width parallel interface. Reviewed-by: Otavio Salvador Reviewed-by: Rob Herring Signed-off-by: Fabio Estevam Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20190219002706.20077-2-festevam@gmail.com --- .../bindings/display/panel/vl050_8048nt_c01.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt diff --git a/Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt b/Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt new file mode 100644 index 000000000000..b42bf06bbd99 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt @@ -0,0 +1,12 @@ +VXT 800x480 color TFT LCD panel + +Required properties: +- compatible: should be "vxt,vl050-8048nt-c01" +- power-supply: as specified in the base binding + +Optional properties: +- backlight: as specified in the base binding +- enable-gpios: as specified in the base binding + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. From 04206185a160071a7a13e3603ff403d19a03dd7e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 18 Feb 2019 21:27:06 -0300 Subject: [PATCH 0032/1815] drm/panel: simple: Add support for VXT VL050-8048NT-C01 panel Add support for the VXT VL050-8048NT-C01 800x480 panel to the panel-simple driver. This panel is used on some boards manufactured by TechNexion, such as imx7d-pico. Reviewed-by: Otavio Salvador Reviewed-by: Sam Ravnborg Signed-off-by: Fabio Estevam Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20190219002706.20077-3-festevam@gmail.com --- drivers/gpu/drm/panel/panel-simple.c | 29 ++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 569be4efd8d1..25dc1abd8e1d 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2508,6 +2508,32 @@ static const struct panel_desc urt_umsh_8596md_parallel = { .bus_format = MEDIA_BUS_FMT_RGB666_1X18, }; +static const struct drm_display_mode vl050_8048nt_c01_mode = { + .clock = 33333, + .hdisplay = 800, + .hsync_start = 800 + 210, + .hsync_end = 800 + 210 + 20, + .htotal = 800 + 210 + 20 + 46, + .vdisplay = 480, + .vsync_start = 480 + 22, + .vsync_end = 480 + 22 + 10, + .vtotal = 480 + 22 + 10 + 23, + .vrefresh = 60, + .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC, +}; + +static const struct panel_desc vl050_8048nt_c01 = { + .modes = &vl050_8048nt_c01_mode, + .num_modes = 1, + .bpc = 8, + .size = { + .width = 120, + .height = 76, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE, +}; + static const struct drm_display_mode winstar_wf35ltiacd_mode = { .clock = 6410, .hdisplay = 320, @@ -2834,6 +2860,9 @@ static const struct of_device_id platform_of_match[] = { }, { .compatible = "urt,umsh-8596md-20t", .data = &urt_umsh_8596md_parallel, + }, { + .compatible = "vxt,vl050-8048nt-c01", + .data = &vl050_8048nt_c01, }, { .compatible = "winstar,wf35ltiacd", .data = &winstar_wf35ltiacd, From 7a4f4c31c5bcd214707ec217a6cf067185f15cf3 Mon Sep 17 00:00:00 2001 From: Jonathan Bakker Date: Fri, 22 Feb 2019 18:51:52 +0100 Subject: [PATCH 0033/1815] dt-bindings: panel: Add Samsung S6E63M0 panel documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds documentation for Samsung S6E63M0 AMOLED LCD panel driver. Signed-off-by: Jonathan Bakker Signed-off-by: PaweƂ Chmiel Reviewed-by: Rob Herring Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20190222175153.20567-1-pawel.mikolaj.chmiel@gmail.com --- .../display/panel/samsung,s6e63m0.txt | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.txt diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.txt b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.txt new file mode 100644 index 000000000000..9fb9ebeef8e4 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.txt @@ -0,0 +1,33 @@ +Samsung s6e63m0 AMOLED LCD panel + +Required properties: + - compatible: "samsung,s6e63m0" + - reset-gpios: GPIO spec for reset pin + - vdd3-supply: VDD regulator + - vci-supply: VCI regulator + +The panel must obey rules for SPI slave device specified in document [1]. + +The device node can contain one 'port' child node with one child +'endpoint' node, according to the bindings defined in [2]. This +node should describe panel's video bus. + +[1]: Documentation/devicetree/bindings/spi/spi-bus.txt +[2]: Documentation/devicetree/bindings/media/video-interfaces.txt + +Example: + + s6e63m0: display@0 { + compatible = "samsung,s6e63m0"; + reg = <0>; + reset-gpio = <&mp05 5 1>; + vdd3-supply = <&ldo12_reg>; + vci-supply = <&ldo11_reg>; + spi-max-frequency = <1200000>; + + port { + lcd_ep: endpoint { + remote-endpoint = <&fimd_ep>; + }; + }; + }; From 994a08a266e3e6002100ca6e205204512a2f93d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Chmiel?= Date: Fri, 22 Feb 2019 18:51:53 +0100 Subject: [PATCH 0034/1815] drm/panel: Add driver for Samsung S6E63M0 panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds Samsung S6E63M0 AMOLED LCD panel driver, connected over spi. It's based on already removed, non-device-tree S6E63M0 driver and panel-samsung-ld9040. It can be found for example in some of Samsung Aries based phones. Signed-off-by: PaweƂ Chmiel Reviewed-by: Sam Ravnborg Reviewed-by: Andrzej Hajda Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20190222175153.20567-2-pawel.mikolaj.chmiel@gmail.com --- drivers/gpu/drm/panel/Kconfig | 9 + drivers/gpu/drm/panel/Makefile | 1 + drivers/gpu/drm/panel/panel-samsung-s6e63m0.c | 514 ++++++++++++++++++ 3 files changed, 524 insertions(+) create mode 100644 drivers/gpu/drm/panel/panel-samsung-s6e63m0.c diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index e36dbb4df867..25a3dae0133c 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -200,6 +200,15 @@ config DRM_PANEL_SAMSUNG_S6E63J0X03 depends on BACKLIGHT_CLASS_DEVICE select VIDEOMODE_HELPERS +config DRM_PANEL_SAMSUNG_S6E63M0 + tristate "Samsung S6E63M0 RGB/SPI panel" + depends on OF + depends on SPI + depends on BACKLIGHT_CLASS_DEVICE + help + Say Y here if you want to enable support for Samsung S6E63M0 + AMOLED LCD panel. + config DRM_PANEL_SAMSUNG_S6E8AA0 tristate "Samsung S6E8AA0 DSI video mode panel" depends on OF diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile index 78e3dc376bdd..7fd498ab8f0f 100644 --- a/drivers/gpu/drm/panel/Makefile +++ b/drivers/gpu/drm/panel/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_DRM_PANEL_SAMSUNG_LD9040) += panel-samsung-ld9040.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D16D0) += panel-samsung-s6d16d0.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2) += panel-samsung-s6e3ha2.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03) += panel-samsung-s6e63j0x03.o +obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63M0) += panel-samsung-s6e63m0.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0) += panel-samsung-s6e8aa0.o obj-$(CONFIG_DRM_PANEL_SEIKO_43WVF1G) += panel-seiko-43wvf1g.o obj-$(CONFIG_DRM_PANEL_SHARP_LQ101R1SX01) += panel-sharp-lq101r1sx01.o diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c new file mode 100644 index 000000000000..142d395ea512 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c @@ -0,0 +1,514 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * S6E63M0 AMOLED LCD drm_panel driver. + * + * Copyright (C) 2019 PaweƂ Chmiel + * Derived from drivers/gpu/drm/panel-samsung-ld9040.c + * + * Andrzej Hajda + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include