linux/drivers/gpu/drm/i915/intel_uc.c
Michał Winiarski 3176ff49bc drm/i915/guc: Move GuC workqueue allocations outside of the mutex
This gets rid of the following lockdep splat:

======================================================
WARNING: possible circular locking dependency detected
4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted
------------------------------------------------------
debugfs_test/1351 is trying to acquire lock:
 (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915]

but task is already holding lock:
 (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #6 (&mm->mmap_sem){++++}:
       __might_fault+0x63/0x90
       _copy_to_user+0x1e/0x70
       filldir+0x8c/0xf0
       dcache_readdir+0xeb/0x160
       iterate_dir+0xe6/0x150
       SyS_getdents+0xa0/0x130
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #5 (&sb->s_type->i_mutex_key#5){++++}:
       lockref_get+0x9/0x20

-> #4 ((completion)&req.done){+.+.}:
       wait_for_common+0x54/0x210
       devtmpfs_create_node+0x130/0x150
       device_add+0x5ad/0x5e0
       device_create_groups_vargs+0xd4/0xe0
       device_create+0x35/0x40
       msr_device_create+0x22/0x40
       cpuhp_invoke_callback+0xc5/0xbf0
       cpuhp_thread_fun+0x167/0x210
       smpboot_thread_fn+0x17f/0x270
       kthread+0x173/0x1b0
       ret_from_fork+0x24/0x30

-> #3 (cpuhp_state-up){+.+.}:
       cpuhp_issue_call+0x132/0x1c0
       __cpuhp_setup_state_cpuslocked+0x12f/0x2a0
       __cpuhp_setup_state+0x3a/0x50
       page_writeback_init+0x3a/0x5c
       start_kernel+0x393/0x3e2
       secondary_startup_64+0xa5/0xb0

-> #2 (cpuhp_state_mutex){+.+.}:
       __mutex_lock+0x81/0x9b0
       __cpuhp_setup_state_cpuslocked+0x4b/0x2a0
       __cpuhp_setup_state+0x3a/0x50
       page_alloc_init+0x1f/0x26
       start_kernel+0x139/0x3e2
       secondary_startup_64+0xa5/0xb0

-> #1 (cpu_hotplug_lock.rw_sem){++++}:
       cpus_read_lock+0x34/0xa0
       apply_workqueue_attrs+0xd/0x40
       __alloc_workqueue_key+0x2c7/0x4e1
       intel_guc_submission_init+0x10c/0x650 [i915]
       intel_uc_init_hw+0x29e/0x460 [i915]
       i915_gem_init_hw+0xca/0x290 [i915]
       i915_gem_init+0x115/0x3a0 [i915]
       i915_driver_load+0x9a8/0x16c0 [i915]
       i915_pci_probe+0x2e/0x90 [i915]
       pci_device_probe+0x9c/0x120
       driver_probe_device+0x2a3/0x480
       __driver_attach+0xd9/0xe0
       bus_for_each_dev+0x57/0x90
       bus_add_driver+0x168/0x260
       driver_register+0x52/0xc0
       do_one_initcall+0x39/0x150
       do_init_module+0x56/0x1ef
       load_module+0x231c/0x2d70
       SyS_finit_module+0xa5/0xe0
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #0 (&dev->struct_mutex){+.+.}:
       lock_acquire+0xaf/0x200
       __mutex_lock+0x81/0x9b0
       i915_mutex_lock_interruptible+0x47/0x130 [i915]
       i915_gem_fault+0x201/0x760 [i915]
       __do_fault+0x15/0x70
       __handle_mm_fault+0x85b/0xe40
       handle_mm_fault+0x14f/0x2f0
       __do_page_fault+0x2d1/0x560
       page_fault+0x22/0x30

other info that might help us debug this:

Chain exists of:
  &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&mm->mmap_sem);
                               lock(&sb->s_type->i_mutex_key#5);
                               lock(&mm->mmap_sem);
  lock(&dev->struct_mutex);

 *** DEADLOCK ***

1 lock held by debugfs_test/1351:
 #0:  (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560

stack backtrace:
CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1
Hardware name:                  /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017
Call Trace:
 dump_stack+0x5f/0x86
 print_circular_bug+0x230/0x3b0
 check_prev_add+0x439/0x7b0
 ? lockdep_init_map_crosslock+0x20/0x20
 ? unwind_get_return_address+0x16/0x30
 ? __lock_acquire+0x1385/0x15a0
 __lock_acquire+0x1385/0x15a0
 lock_acquire+0xaf/0x200
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 __mutex_lock+0x81/0x9b0
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? __pm_runtime_resume+0x4f/0x80
 i915_gem_fault+0x201/0x760 [i915]
 __do_fault+0x15/0x70
 __handle_mm_fault+0x85b/0xe40
 handle_mm_fault+0x14f/0x2f0
 __do_page_fault+0x2d1/0x560
 page_fault+0x22/0x30
RIP: 0033:0x7f98d6f49116
RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283
RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680
RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0
RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0
R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001
R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0

v2: Init preempt_work unconditionally (Chris)
v3: Mention that we need the enable_guc=1 for lockdep splat (Chris)

Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com
2017-12-14 08:06:54 +00:00

363 lines
9.0 KiB
C

/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include "intel_uc.h"
#include "intel_guc_submission.h"
#include "intel_guc.h"
#include "i915_drv.h"
/* Reset GuC providing us with fresh state for both GuC and HuC.
*/
static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv)
{
int ret;
u32 guc_status;
ret = intel_reset_guc(dev_priv);
if (ret) {
DRM_ERROR("Failed to reset GuC, ret = %d\n", ret);
return ret;
}
guc_status = I915_READ(GUC_STATUS);
WARN(!(guc_status & GS_MIA_IN_RESET),
"GuC status: 0x%x, MIA core expected to be in reset\n",
guc_status);
return ret;
}
static int __get_platform_enable_guc(struct drm_i915_private *dev_priv)
{
struct intel_uc_fw *guc_fw = &dev_priv->guc.fw;
struct intel_uc_fw *huc_fw = &dev_priv->huc.fw;
int enable_guc = 0;
/* Default is to enable GuC/HuC if we know their firmwares */
if (intel_uc_fw_is_selected(guc_fw))
enable_guc |= ENABLE_GUC_SUBMISSION;
if (intel_uc_fw_is_selected(huc_fw))
enable_guc |= ENABLE_GUC_LOAD_HUC;
/* Any platform specific fine-tuning can be done here */
return enable_guc;
}
/**
* intel_uc_sanitize_options - sanitize uC related modparam options
* @dev_priv: device private
*
* In case of "enable_guc" option this function will attempt to modify
* it only if it was initially set to "auto(-1)". Default value for this
* modparam varies between platforms and it is hardcoded in driver code.
* Any other modparam value is only monitored against availability of the
* related hardware or firmware definitions.
*/
void intel_uc_sanitize_options(struct drm_i915_private *dev_priv)
{
struct intel_uc_fw *guc_fw = &dev_priv->guc.fw;
struct intel_uc_fw *huc_fw = &dev_priv->huc.fw;
/* A negative value means "use platform default" */
if (i915_modparams.enable_guc < 0)
i915_modparams.enable_guc = __get_platform_enable_guc(dev_priv);
DRM_DEBUG_DRIVER("enable_guc=%d (submission:%s huc:%s)\n",
i915_modparams.enable_guc,
yesno(intel_uc_is_using_guc_submission()),
yesno(intel_uc_is_using_huc()));
/* Verify GuC firmware availability */
if (intel_uc_is_using_guc() && !intel_uc_fw_is_selected(guc_fw)) {
DRM_WARN("Incompatible option detected: enable_guc=%d, %s!\n",
i915_modparams.enable_guc,
!HAS_GUC(dev_priv) ? "no GuC hardware" :
"no GuC firmware");
}
/* Verify HuC firmware availability */
if (intel_uc_is_using_huc() && !intel_uc_fw_is_selected(huc_fw)) {
DRM_WARN("Incompatible option detected: enable_guc=%d, %s!\n",
i915_modparams.enable_guc,
!HAS_HUC(dev_priv) ? "no HuC hardware" :
"no HuC firmware");
}
/* Make sure that sanitization was done */
GEM_BUG_ON(i915_modparams.enable_guc < 0);
}
void intel_uc_init_early(struct drm_i915_private *dev_priv)
{
intel_guc_init_early(&dev_priv->guc);
intel_huc_init_early(&dev_priv->huc);
}
void intel_uc_init_fw(struct drm_i915_private *dev_priv)
{
if (!USES_GUC(dev_priv))
return;
if (USES_HUC(dev_priv))
intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw);
intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw);
}
void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
{
if (!USES_GUC(dev_priv))
return;
intel_uc_fw_fini(&dev_priv->guc.fw);
if (USES_HUC(dev_priv))
intel_uc_fw_fini(&dev_priv->huc.fw);
}
/**
* intel_uc_init_mmio - setup uC MMIO access
*
* @dev_priv: device private
*
* Setup minimal state necessary for MMIO accesses later in the
* initialization sequence.
*/
void intel_uc_init_mmio(struct drm_i915_private *dev_priv)
{
intel_guc_init_send_regs(&dev_priv->guc);
}
static void guc_capture_load_err_log(struct intel_guc *guc)
{
if (!guc->log.vma || i915_modparams.guc_log_level < 0)
return;
if (!guc->load_err_log)
guc->load_err_log = i915_gem_object_get(guc->log.vma->obj);
return;
}
static void guc_free_load_err_log(struct intel_guc *guc)
{
if (guc->load_err_log)
i915_gem_object_put(guc->load_err_log);
}
static int guc_enable_communication(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
if (HAS_GUC_CT(dev_priv))
return intel_guc_enable_ct(guc);
guc->send = intel_guc_send_mmio;
return 0;
}
static void guc_disable_communication(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
if (HAS_GUC_CT(dev_priv))
intel_guc_disable_ct(guc);
guc->send = intel_guc_send_nop;
}
int intel_uc_init_wq(struct drm_i915_private *dev_priv)
{
int ret;
if (!USES_GUC(dev_priv))
return 0;
ret = intel_guc_init_wq(&dev_priv->guc);
if (ret) {
DRM_ERROR("Couldn't allocate workqueues for GuC\n");
return ret;
}
return 0;
}
void intel_uc_fini_wq(struct drm_i915_private *dev_priv)
{
if (!USES_GUC(dev_priv))
return;
GEM_BUG_ON(!HAS_GUC(dev_priv));
intel_guc_fini_wq(&dev_priv->guc);
}
int intel_uc_init_hw(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
struct intel_huc *huc = &dev_priv->huc;
int ret, attempts;
if (!USES_GUC(dev_priv))
return 0;
if (!HAS_GUC(dev_priv)) {
ret = -ENODEV;
goto err_out;
}
guc_disable_communication(guc);
gen9_reset_guc_interrupts(dev_priv);
ret = intel_guc_init(guc);
if (ret)
goto err_out;
if (USES_GUC_SUBMISSION(dev_priv)) {
/*
* This is stuff we need to have available at fw load time
* if we are planning to enable submission later
*/
ret = intel_guc_submission_init(guc);
if (ret)
goto err_guc;
}
/* init WOPCM */
I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
I915_WRITE(DMA_GUC_WOPCM_OFFSET,
GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC);
/* WaEnableuKernelHeaderValidFix:skl */
/* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
if (IS_GEN9(dev_priv))
attempts = 3;
else
attempts = 1;
while (attempts--) {
/*
* Always reset the GuC just before (re)loading, so
* that the state and timing are fairly predictable
*/
ret = __intel_uc_reset_hw(dev_priv);
if (ret)
goto err_submission;
if (USES_HUC(dev_priv)) {
ret = intel_huc_init_hw(huc);
if (ret)
goto err_submission;
}
intel_guc_init_params(guc);
ret = intel_guc_fw_upload(guc);
if (ret == 0 || ret != -EAGAIN)
break;
DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and "
"retry %d more time(s)\n", ret, attempts);
}
/* Did we succeded or run out of retries? */
if (ret)
goto err_log_capture;
ret = guc_enable_communication(guc);
if (ret)
goto err_log_capture;
if (USES_HUC(dev_priv)) {
ret = intel_huc_auth(huc);
if (ret)
goto err_communication;
}
if (USES_GUC_SUBMISSION(dev_priv)) {
if (i915_modparams.guc_log_level >= 0)
gen9_enable_guc_interrupts(dev_priv);
ret = intel_guc_submission_enable(guc);
if (ret)
goto err_interrupts;
}
dev_info(dev_priv->drm.dev, "GuC firmware version %u.%u\n",
guc->fw.major_ver_found, guc->fw.minor_ver_found);
dev_info(dev_priv->drm.dev, "GuC submission %s\n",
enableddisabled(USES_GUC_SUBMISSION(dev_priv)));
dev_info(dev_priv->drm.dev, "HuC %s\n",
enableddisabled(USES_HUC(dev_priv)));
return 0;
/*
* We've failed to load the firmware :(
*/
err_interrupts:
gen9_disable_guc_interrupts(dev_priv);
err_communication:
guc_disable_communication(guc);
err_log_capture:
guc_capture_load_err_log(guc);
err_submission:
if (USES_GUC_SUBMISSION(dev_priv))
intel_guc_submission_fini(guc);
err_guc:
intel_guc_fini(guc);
err_out:
/*
* Note that there is no fallback as either user explicitly asked for
* the GuC or driver default option was to run with the GuC enabled.
*/
if (GEM_WARN_ON(ret == -EIO))
ret = -EINVAL;
dev_err(dev_priv->drm.dev, "GuC initialization failed %d\n", ret);
return ret;
}
void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
guc_free_load_err_log(guc);
if (!USES_GUC(dev_priv))
return;
if (USES_GUC_SUBMISSION(dev_priv))
intel_guc_submission_disable(guc);
guc_disable_communication(guc);
if (USES_GUC_SUBMISSION(dev_priv)) {
gen9_disable_guc_interrupts(dev_priv);
intel_guc_submission_fini(guc);
}
intel_guc_fini(guc);
}