From 1fa8d07ae1a5fa4e87de42c338e8fc27f46d8bb6 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 22 Feb 2024 03:05:16 +0200 Subject: [PATCH 01/32] gpu: host1x: Skip reset assert on Tegra186 On Tegra186, secure world applications may need to access host1x during suspend/resume, and rely on the kernel to keep Host1x out of reset during the suspend cycle. As such, as a quirk, skip asserting Host1x's reset on Tegra186. We don't need to keep the clocks enabled, as BPMP ensures the clock stays on while Host1x is being used. On newer SoC's, the reset line is inaccessible, so there is no need for the quirk. Fixes: b7c00cdf6df5 ("gpu: host1x: Enable system suspend callbacks") Signed-off-by: Mikko Perttunen Reviewed-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20240222010517.1573931-1-cyndis@kapsi.fi --- drivers/gpu/host1x/dev.c | 15 +++++++++------ drivers/gpu/host1x/dev.h | 6 ++++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 42fd504abbcd..89983d7d73ca 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -169,6 +169,7 @@ static const struct host1x_info host1x06_info = { .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), .sid_table = tegra186_sid_table, .reserve_vblank_syncpts = false, + .skip_reset_assert = true, }; static const struct host1x_sid_entry tegra194_sid_table[] = { @@ -680,13 +681,15 @@ static int __maybe_unused host1x_runtime_suspend(struct device *dev) host1x_intr_stop(host); host1x_syncpt_save(host); - err = reset_control_bulk_assert(host->nresets, host->resets); - if (err) { - dev_err(dev, "failed to assert reset: %d\n", err); - goto resume_host1x; - } + if (!host->info->skip_reset_assert) { + err = reset_control_bulk_assert(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + goto resume_host1x; + } - usleep_range(1000, 2000); + usleep_range(1000, 2000); + } clk_disable_unprepare(host->clk); reset_control_bulk_release(host->nresets, host->resets); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index c8e302de7625..925a118db23f 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -116,6 +116,12 @@ struct host1x_info { * the display driver disables VBLANK increments. */ bool reserve_vblank_syncpts; + /* + * On Tegra186, secure world applications may require access to + * host1x during suspend/resume. To allow this, we need to leave + * host1x not in reset. + */ + bool skip_reset_assert; }; struct host1x { From 9ee485bdda68d6d3f5728cbe3150eb9013d7d22b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 17 Feb 2024 16:02:23 +0100 Subject: [PATCH 02/32] drm/bridge: aux-hpd: fix OF node leaks The two device node references taken during allocation need to be dropped when the auxiliary device is freed. Fixes: 6914968a0b52 ("drm/bridge: properly refcount DT nodes in aux bridge drivers") Cc: Dmitry Baryshkov Cc: Neil Armstrong Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240217150228.5788-2-johan+linaro@kernel.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-2-johan+linaro@kernel.org --- drivers/gpu/drm/bridge/aux-hpd-bridge.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index bb55f697a181..9e71daf95bde 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -25,6 +25,7 @@ static void drm_aux_hpd_bridge_release(struct device *dev) ida_free(&drm_aux_hpd_bridge_ida, adev->id); of_node_put(adev->dev.platform_data); + of_node_put(adev->dev.of_node); kfree(adev); } @@ -74,6 +75,8 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent, ret = auxiliary_device_init(adev); if (ret) { + of_node_put(adev->dev.platform_data); + of_node_put(adev->dev.of_node); ida_free(&drm_aux_hpd_bridge_ida, adev->id); kfree(adev); return ERR_PTR(ret); From e5ca263508f7e9d2cf711edf3258d11ca087885c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 17 Feb 2024 16:02:24 +0100 Subject: [PATCH 03/32] drm/bridge: aux-hpd: separate allocation and registration Combining allocation and registration is an anti-pattern that should be avoided. Add two new functions for allocating and registering an dp-hpd bridge with a proper 'devm' prefix so that it is clear that these are device managed interfaces. devm_drm_dp_hpd_bridge_alloc() devm_drm_dp_hpd_bridge_add() The new interface will be used to fix a use-after-free bug in the Qualcomm PMIC GLINK driver and may prevent similar issues from being introduced elsewhere. The existing drm_dp_hpd_bridge_register() is reimplemented using the above and left in place for now. Signed-off-by: Johan Hovold Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-3-johan+linaro@kernel.org --- drivers/gpu/drm/bridge/aux-hpd-bridge.c | 67 +++++++++++++++++++------ include/drm/bridge/aux-bridge.h | 15 ++++++ 2 files changed, 67 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index 9e71daf95bde..6886db2d9e00 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -30,16 +30,13 @@ static void drm_aux_hpd_bridge_release(struct device *dev) kfree(adev); } -static void drm_aux_hpd_bridge_unregister_adev(void *_adev) +static void drm_aux_hpd_bridge_free_adev(void *_adev) { - struct auxiliary_device *adev = _adev; - - auxiliary_device_delete(adev); - auxiliary_device_uninit(adev); + auxiliary_device_uninit(_adev); } /** - * drm_dp_hpd_bridge_register - Create a simple HPD DisplayPort bridge + * devm_drm_dp_hpd_bridge_alloc - allocate a HPD DisplayPort bridge * @parent: device instance providing this bridge * @np: device node pointer corresponding to this bridge instance * @@ -47,11 +44,9 @@ static void drm_aux_hpd_bridge_unregister_adev(void *_adev) * DRM_MODE_CONNECTOR_DisplayPort, which terminates the bridge chain and is * able to send the HPD events. * - * Return: device instance that will handle created bridge or an error code - * encoded into the pointer. + * Return: bridge auxiliary device pointer or an error pointer */ -struct device *drm_dp_hpd_bridge_register(struct device *parent, - struct device_node *np) +struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np) { struct auxiliary_device *adev; int ret; @@ -82,13 +77,55 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent, return ERR_PTR(ret); } - ret = auxiliary_device_add(adev); - if (ret) { - auxiliary_device_uninit(adev); + ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_free_adev, adev); + if (ret) return ERR_PTR(ret); - } - ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_unregister_adev, adev); + return adev; +} +EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_alloc); + +static void drm_aux_hpd_bridge_del_adev(void *_adev) +{ + auxiliary_device_delete(_adev); +} + +/** + * devm_drm_dp_hpd_bridge_add - register a HDP DisplayPort bridge + * @dev: struct device to tie registration lifetime to + * @adev: bridge auxiliary device to be registered + * + * Returns: zero on success or a negative errno + */ +int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev) +{ + int ret; + + ret = auxiliary_device_add(adev); + if (ret) + return ret; + + return devm_add_action_or_reset(dev, drm_aux_hpd_bridge_del_adev, adev); +} +EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_add); + +/** + * drm_dp_hpd_bridge_register - allocate and register a HDP DisplayPort bridge + * @parent: device instance providing this bridge + * @np: device node pointer corresponding to this bridge instance + * + * Return: device instance that will handle created bridge or an error pointer + */ +struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np) +{ + struct auxiliary_device *adev; + int ret; + + adev = devm_drm_dp_hpd_bridge_alloc(parent, np); + if (IS_ERR(adev)) + return ERR_CAST(adev); + + ret = devm_drm_dp_hpd_bridge_add(parent, adev); if (ret) return ERR_PTR(ret); diff --git a/include/drm/bridge/aux-bridge.h b/include/drm/bridge/aux-bridge.h index c4c423e97f06..4453906105ca 100644 --- a/include/drm/bridge/aux-bridge.h +++ b/include/drm/bridge/aux-bridge.h @@ -9,6 +9,8 @@ #include +struct auxiliary_device; + #if IS_ENABLED(CONFIG_DRM_AUX_BRIDGE) int drm_aux_bridge_register(struct device *parent); #else @@ -19,10 +21,23 @@ static inline int drm_aux_bridge_register(struct device *parent) #endif #if IS_ENABLED(CONFIG_DRM_AUX_HPD_BRIDGE) +struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np); +int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev); struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np); void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status); #else +static inline struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, + struct device_node *np) +{ + return NULL; +} + +static inline int devm_drm_dp_hpd_bridge_add(struct auxiliary_device *adev) +{ + return 0; +} + static inline struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np) { From b979f2d50a099f3402418d7ff5f26c3952fb08bb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 17 Feb 2024 16:02:25 +0100 Subject: [PATCH 04/32] soc: qcom: pmic_glink_altmode: fix drm bridge use-after-free A recent DRM series purporting to simplify support for "transparent bridges" and handling of probe deferrals ironically exposed a use-after-free issue on pmic_glink_altmode probe deferral. This has manifested itself as the display subsystem occasionally failing to initialise and NULL-pointer dereferences during boot of machines like the Lenovo ThinkPad X13s. Specifically, the dp-hpd bridge is currently registered before all resources have been acquired which means that it can also be deregistered on probe deferrals. In the meantime there is a race window where the new aux bridge driver (or PHY driver previously) may have looked up the dp-hpd bridge and stored a (non-reference-counted) pointer to the bridge which is about to be deallocated. When the display controller is later initialised, this triggers a use-after-free when attaching the bridges: dp -> aux -> dp-hpd (freed) which may, for example, result in the freed bridge failing to attach: [drm:drm_bridge_attach [drm]] *ERROR* failed to attach bridge /soc@0/phy@88eb000 to encoder TMDS-31: -16 or a NULL-pointer dereference: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... Call trace: drm_bridge_attach+0x70/0x1a8 [drm] drm_aux_bridge_attach+0x24/0x38 [aux_bridge] drm_bridge_attach+0x80/0x1a8 [drm] dp_bridge_init+0xa8/0x15c [msm] msm_dp_modeset_init+0x28/0xc4 [msm] The DRM bridge implementation is clearly fragile and implicitly built on the assumption that bridges may never go away. In this case, the fix is to move the bridge registration in the pmic_glink_altmode driver to after all resources have been looked up. Incidentally, with the new dp-hpd bridge implementation, which registers child devices, this is also a requirement due to a long-standing issue in driver core that can otherwise lead to a probe deferral loop (see commit fbc35b45f9f6 ("Add documentation on meaning of -EPROBE_DEFER")). [DB: slightly fixed commit message by adding the word 'commit'] Fixes: 080b4e24852b ("soc: qcom: pmic_glink: Introduce altmode support") Fixes: 2bcca96abfbf ("soc: qcom: pmic-glink: switch to DRM_AUX_HPD_BRIDGE") Cc: # 6.3 Cc: Bjorn Andersson Cc: Dmitry Baryshkov Signed-off-by: Johan Hovold Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-4-johan+linaro@kernel.org --- drivers/soc/qcom/pmic_glink_altmode.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c index 5fcd0fdd2faa..b3808fc24c69 100644 --- a/drivers/soc/qcom/pmic_glink_altmode.c +++ b/drivers/soc/qcom/pmic_glink_altmode.c @@ -76,7 +76,7 @@ struct pmic_glink_altmode_port { struct work_struct work; - struct device *bridge; + struct auxiliary_device *bridge; enum typec_orientation orientation; u16 svid; @@ -230,7 +230,7 @@ static void pmic_glink_altmode_worker(struct work_struct *work) else pmic_glink_altmode_enable_usb(altmode, alt_port); - drm_aux_hpd_bridge_notify(alt_port->bridge, + drm_aux_hpd_bridge_notify(&alt_port->bridge->dev, alt_port->hpd_state ? connector_status_connected : connector_status_disconnected); @@ -454,7 +454,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev, alt_port->index = port; INIT_WORK(&alt_port->work, pmic_glink_altmode_worker); - alt_port->bridge = drm_dp_hpd_bridge_register(dev, to_of_node(fwnode)); + alt_port->bridge = devm_drm_dp_hpd_bridge_alloc(dev, to_of_node(fwnode)); if (IS_ERR(alt_port->bridge)) { fwnode_handle_put(fwnode); return PTR_ERR(alt_port->bridge); @@ -510,6 +510,16 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev, } } + for (port = 0; port < ARRAY_SIZE(altmode->ports); port++) { + alt_port = &altmode->ports[port]; + if (!alt_port->bridge) + continue; + + ret = devm_drm_dp_hpd_bridge_add(dev, alt_port->bridge); + if (ret) + return ret; + } + altmode->client = devm_pmic_glink_register_client(dev, altmode->owner_id, pmic_glink_altmode_callback, From f79ee78767ca60e7a2c89eacd2dbdf237d97e838 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 17 Feb 2024 16:02:26 +0100 Subject: [PATCH 05/32] soc: qcom: pmic_glink: Fix boot when QRTR=m We need to bail out before adding/removing devices if we are going to -EPROBE_DEFER. Otherwise boot can get stuck in a probe deferral loop due to a long-standing issue in driver core (see commit fbc35b45f9f6 ("Add documentation on meaning of -EPROBE_DEFER")). Deregistering the altmode child device can potentially also trigger bugs in the DRM bridge implementation, which does not expect bridges to go away. [DB: slightly fixed commit message by adding the word 'commit'] Suggested-by: Dmitry Baryshkov Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20231213210644.8702-1-robdclark@gmail.com [ johan: rebase on 6.8-rc4, amend commit message and mention DRM ] Fixes: 58ef4ece1e41 ("soc: qcom: pmic_glink: Introduce base PMIC GLINK driver") Cc: # 6.3 Cc: Bjorn Andersson Signed-off-by: Johan Hovold Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-5-johan+linaro@kernel.org --- drivers/soc/qcom/pmic_glink.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c index f4bfd24386f1..f913e9bd57ed 100644 --- a/drivers/soc/qcom/pmic_glink.c +++ b/drivers/soc/qcom/pmic_glink.c @@ -265,10 +265,17 @@ static int pmic_glink_probe(struct platform_device *pdev) pg->client_mask = *match_data; + pg->pdr = pdr_handle_alloc(pmic_glink_pdr_callback, pg); + if (IS_ERR(pg->pdr)) { + ret = dev_err_probe(&pdev->dev, PTR_ERR(pg->pdr), + "failed to initialize pdr\n"); + return ret; + } + if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_UCSI)) { ret = pmic_glink_add_aux_device(pg, &pg->ucsi_aux, "ucsi"); if (ret) - return ret; + goto out_release_pdr_handle; } if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_ALTMODE)) { ret = pmic_glink_add_aux_device(pg, &pg->altmode_aux, "altmode"); @@ -281,17 +288,11 @@ static int pmic_glink_probe(struct platform_device *pdev) goto out_release_altmode_aux; } - pg->pdr = pdr_handle_alloc(pmic_glink_pdr_callback, pg); - if (IS_ERR(pg->pdr)) { - ret = dev_err_probe(&pdev->dev, PTR_ERR(pg->pdr), "failed to initialize pdr\n"); - goto out_release_aux_devices; - } - service = pdr_add_lookup(pg->pdr, "tms/servreg", "msm/adsp/charger_pd"); if (IS_ERR(service)) { ret = dev_err_probe(&pdev->dev, PTR_ERR(service), "failed adding pdr lookup for charger_pd\n"); - goto out_release_pdr_handle; + goto out_release_aux_devices; } mutex_lock(&__pmic_glink_lock); @@ -300,8 +301,6 @@ static int pmic_glink_probe(struct platform_device *pdev) return 0; -out_release_pdr_handle: - pdr_handle_release(pg->pdr); out_release_aux_devices: if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_BATT)) pmic_glink_del_aux_device(pg, &pg->ps_aux); @@ -311,6 +310,8 @@ out_release_altmode_aux: out_release_ucsi_aux: if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_UCSI)) pmic_glink_del_aux_device(pg, &pg->ucsi_aux); +out_release_pdr_handle: + pdr_handle_release(pg->pdr); return ret; } From 86bf8cfda6d2a6720fa2e6e676c98f0882c9d3d7 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 23 Feb 2024 16:03:33 +0100 Subject: [PATCH 06/32] drm/tegra: Remove existing framebuffer only if we support display Tegra DRM doesn't support display on Tegra234 and later, so make sure not to remove any existing framebuffers in that case. v2: - add comments explaining how this situation can come about - clear DRIVER_MODESET and DRIVER_ATOMIC feature bits Fixes: 6848c291a54f ("drm/aperture: Convert drivers to aperture interfaces") Signed-off-by: Thierry Reding Reviewed-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20240223150333.1401582-1-thierry.reding@gmail.com --- drivers/gpu/drm/tegra/drm.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index ff36171c8fb7..373bcd79257e 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1242,9 +1242,26 @@ static int host1x_drm_probe(struct host1x_device *dev) drm_mode_config_reset(drm); - err = drm_aperture_remove_framebuffers(&tegra_drm_driver); - if (err < 0) - goto hub; + /* + * Only take over from a potential firmware framebuffer if any CRTCs + * have been registered. This must not be a fatal error because there + * are other accelerators that are exposed via this driver. + * + * Another case where this happens is on Tegra234 where the display + * hardware is no longer part of the host1x complex, so this driver + * will not expose any modesetting features. + */ + if (drm->mode_config.num_crtc > 0) { + err = drm_aperture_remove_framebuffers(&tegra_drm_driver); + if (err < 0) + goto hub; + } else { + /* + * Indicate to userspace that this doesn't expose any display + * capabilities. + */ + drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); + } err = drm_dev_register(drm, 0); if (err < 0) From 9d3f8a723c7950e56e0b95ab84b572caee29e065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 21 Feb 2024 08:18:59 +0100 Subject: [PATCH 07/32] drm/ttm/tests: depend on UML || COMPILE_TEST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At least the device test requires that no other driver using TTM is loaded. So make those unit tests depend on UML || COMPILE_TEST to prevent people from trying them on bare metal. Signed-off-by: Christian König Acked-by: Alex Deucher Link: https://lore.kernel.org/all/20240219230116.77b8ad68@yea/ --- drivers/gpu/drm/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2520db0b776e..c7edba18a6f0 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -199,7 +199,7 @@ config DRM_TTM config DRM_TTM_KUNIT_TEST tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS default n - depends on DRM && KUNIT && MMU + depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST) select DRM_TTM select DRM_EXPORT_FOR_TESTS if m select DRM_KUNIT_TEST_HELPERS @@ -207,7 +207,8 @@ config DRM_TTM_KUNIT_TEST help Enables unit tests for TTM, a GPU memory manager subsystem used to manage memory buffers. This option is mostly useful for kernel - developers. + developers. It depends on (UML || COMPILE_TEST) since no other driver + which uses TTM can be loaded while running the tests. If in doubt, say "N". From 00d6a284fcf3fad1b7e1b5bc3cd87cbfb60ce03f Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 8 Feb 2024 12:44:11 +0100 Subject: [PATCH 08/32] fbcon: always restore the old font data in fbcon_do_set_font() Commit a5a923038d70 (fbdev: fbcon: Properly revert changes when vc_resize() failed) started restoring old font data upon failure (of vc_resize()). But it performs so only for user fonts. It means that the "system"/internal fonts are not restored at all. So in result, the very first call to fbcon_do_set_font() performs no restore at all upon failing vc_resize(). This can be reproduced by Syzkaller to crash the system on the next invocation of font_get(). It's rather hard to hit the allocation failure in vc_resize() on the first font_set(), but not impossible. Esp. if fault injection is used to aid the execution/failure. It was demonstrated by Sirius: BUG: unable to handle page fault for address: fffffffffffffff8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD cb7b067 P4D cb7b067 PUD cb7d067 PMD 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 8007 Comm: poc Not tainted 6.7.0-g9d1694dc91ce #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:fbcon_get_font+0x229/0x800 drivers/video/fbdev/core/fbcon.c:2286 Call Trace: con_font_get drivers/tty/vt/vt.c:4558 [inline] con_font_op+0x1fc/0xf20 drivers/tty/vt/vt.c:4673 vt_k_ioctl drivers/tty/vt/vt_ioctl.c:474 [inline] vt_ioctl+0x632/0x2ec0 drivers/tty/vt/vt_ioctl.c:752 tty_ioctl+0x6f8/0x1570 drivers/tty/tty_io.c:2803 vfs_ioctl fs/ioctl.c:51 [inline] ... So restore the font data in any case, not only for user fonts. Note the later 'if' is now protected by 'old_userfont' and not 'old_data' as the latter is always set now. (And it is supposed to be non-NULL. Otherwise we would see the bug above again.) Signed-off-by: Jiri Slaby (SUSE) Fixes: a5a923038d70 ("fbdev: fbcon: Properly revert changes when vc_resize() failed") Reported-and-tested-by: Ubisectech Sirius Cc: Ubisectech Sirius Cc: Daniel Vetter Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240208114411.14604-1-jirislaby@kernel.org --- drivers/video/fbdev/core/fbcon.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 1183e7a871f8..46823c2e2ba1 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2399,11 +2399,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, struct fbcon_ops *ops = info->fbcon_par; struct fbcon_display *p = &fb_display[vc->vc_num]; int resize, ret, old_userfont, old_width, old_height, old_charcount; - char *old_data = NULL; + u8 *old_data = vc->vc_font.data; resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); - if (p->userfont) - old_data = vc->vc_font.data; vc->vc_font.data = (void *)(p->fontdata = data); old_userfont = p->userfont; if ((p->userfont = userfont)) @@ -2437,13 +2435,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, update_screen(vc); } - if (old_data && (--REFCOUNT(old_data) == 0)) + if (old_userfont && (--REFCOUNT(old_data) == 0)) kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); return 0; err_out: p->fontdata = old_data; - vc->vc_font.data = (void *)old_data; + vc->vc_font.data = old_data; if (userfont) { p->userfont = old_userfont; From 0f8ca019544a252d1afb468ce840c6dcbac73af4 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 20 Feb 2024 09:14:25 +0530 Subject: [PATCH 09/32] drm/amd/display: Prevent potential buffer overflow in map_hw_resources Adds a check in the map_hw_resources function to prevent a potential buffer overflow. The function was accessing arrays using an index that could potentially be greater than the size of the arrays, leading to a buffer overflow. Adds a check to ensure that the index is within the bounds of the arrays. If the index is out of bounds, an error message is printed and break it will continue execution with just ignoring extra data early to prevent the buffer overflow. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml2_wrapper.c:79 map_hw_resources() error: buffer overflow 'dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id' 6 <= 7 drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml2_wrapper.c:81 map_hw_resources() error: buffer overflow 'dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id' 6 <= 7 Fixes: 7966f319c66d ("drm/amd/display: Introduce DML2") Cc: Rodrigo Siqueira Cc: Roman Li Cc: Qingqing Zhuo Cc: Aurabindo Pillai Cc: Tom Chung Signed-off-by: Srinivasan Shanmugam Suggested-by: Roman Li Reviewed-by: Roman Li Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 26307e599614..2a58a7687bdb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -76,6 +76,11 @@ static void map_hw_resources(struct dml2_context *dml2, in_out_display_cfg->hw.DLGRefClkFreqMHz = 50; } for (j = 0; j < mode_support_info->DPPPerSurface[i]; j++) { + if (i >= __DML2_WRAPPER_MAX_STREAMS_PLANES__) { + dml_print("DML::%s: Index out of bounds: i=%d, __DML2_WRAPPER_MAX_STREAMS_PLANES__=%d\n", + __func__, i, __DML2_WRAPPER_MAX_STREAMS_PLANES__); + break; + } dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i]; dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[num_pipes] = true; dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i]; From 7968e9748fbbd7ae49770d9f8a8231d8bce2aebb Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Thu, 22 Feb 2024 17:08:42 +0800 Subject: [PATCH 10/32] drm/amdgpu/pm: Fix the power1_min_cap value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's unreasonable to use 0 as the power1_min_cap when OD is disabled. So, use the same lower limit as the value used when OD is enabled. Fixes: 1958946858a6 ("drm/amd/pm: Support for getting power1_cap_min value") Signed-off-by: Ma Jun Acked-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 9 ++++----- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 9 ++++----- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 9 ++++----- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 9 ++++----- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 9 ++++----- 5 files changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 4cd43bbec910..bcad42534da4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1303,13 +1303,12 @@ static int arcturus_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 8d1d29ffb0f1..ed189a3878eb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2357,13 +2357,12 @@ static int navi10_get_power_limit(struct smu_context *smu, *default_power_limit = power_limit; if (smu->od_enabled && - navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) { + navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 21fc033528fa..e2ad2b972ab0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -640,13 +640,12 @@ static int sienna_cichlid_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a9954ffc02c5..9b80f18ea6c3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2369,13 +2369,12 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 0ffdb58af74e..3dc7b60cb075 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2333,13 +2333,12 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); From c671ec01311b4744b377f98b0b4c6d033fe569b3 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 22 Feb 2024 20:56:59 +0800 Subject: [PATCH 11/32] drm/amdgpu: Enable gpu reset for S3 abort cases on Raven series Currently, GPU resets can now be performed successfully on the Raven series. While GPU reset is required for the S3 suspend abort case. So now can enable gpu reset for S3 abort cases on the Raven series. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 45 +++++++++++++++++------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c64c01e2944a..1c614451dead 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -574,11 +574,34 @@ soc15_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE1; } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_asic_reset(struct amdgpu_device *adev) { /* original raven doesn't have full asic reset */ - if ((adev->apu_flags & AMD_APU_IS_RAVEN) || - (adev->apu_flags & AMD_APU_IS_RAVEN2)) + /* On the latest Raven, the GPU reset can be performed + * successfully. So now, temporarily enable it for the + * S3 suspend abort case. + */ + if (((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) && + !soc15_need_reset_on_resume(adev)) return 0; switch (soc15_asic_reset_method(adev)) { @@ -1298,24 +1321,6 @@ static int soc15_common_suspend(void *handle) return soc15_common_hw_fini(adev); } -static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) -{ - u32 sol_reg; - - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - - /* Will reset for the following suspend abort cases. - * 1) Only reset limit on APU side, dGPU hasn't checked yet. - * 2) S3 suspend abort and TOS already launched. - */ - if (adev->flags & AMD_IS_APU && adev->in_s3 && - !adev->suspend_complete && - sol_reg) - return true; - - return false; -} - static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; From 955558030954b9637b41c97b730f9b38c92ac488 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Aug 2023 15:06:00 -0400 Subject: [PATCH 12/32] Revert "drm/amd/pm: resolve reboot exception for si oland" This reverts commit e490d60a2f76bff636c68ce4fe34c1b6c34bbd86. This causes hangs on SI when DC is enabled and errors on driver reboot and power off cycles. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3216 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2755 Reviewed-by: Yang Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index df4f20293c16..eb4da3666e05 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev) return 0; } +static int si_set_temperature_range(struct amdgpu_device *adev) +{ + int ret; + + ret = si_thermal_enable_alert(adev, false); + if (ret) + return ret; + ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) + return ret; + ret = si_thermal_enable_alert(adev, true); + if (ret) + return ret; + + return ret; +} + static void si_dpm_disable(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, static int si_dpm_late_init(void *handle) { + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->pm.dpm_enabled) + return 0; + + ret = si_set_temperature_range(adev); + if (ret) + return ret; +#if 0 //TODO ? + si_dpm_powergate_uvd(adev, true); +#endif return 0; } From f41900e4a6ef019d64a70394b0e0c3bd048d4ec8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Feb 2024 12:18:52 +0000 Subject: [PATCH 13/32] drm/buddy: fix range bias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a corner case here where start/end is after/before the block range we are currently checking. If so we need to be sure that splitting the block will eventually give use the block size we need. To do that we should adjust the block range to account for the start/end, and only continue with the split if the size/alignment will fit the requested size. Not doing so can result in leaving split blocks unmerged when it eventually fails. Fixes: afea229fe102 ("drm: improve drm_buddy_alloc function") Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Cc: # v5.18+ Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240219121851.25774-4-matthew.auld@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_buddy.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index c4222b886db7..f3a6ac908f81 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm, u64 start, u64 end, unsigned int order) { + u64 req_size = mm->chunk_size << order; struct drm_buddy_block *block; struct drm_buddy_block *buddy; LIST_HEAD(dfs); @@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm, if (drm_buddy_block_is_allocated(block)) continue; + if (block_start < start || block_end > end) { + u64 adjusted_start = max(block_start, start); + u64 adjusted_end = min(block_end, end); + + if (round_down(adjusted_end + 1, req_size) <= + round_up(adjusted_start, req_size)) + continue; + } + if (contains(start, end, block_start, block_end) && order == drm_buddy_block_order(block)) { /* From 2986314aa811c8a23aeb292edd30315495d54966 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Feb 2024 12:18:53 +0000 Subject: [PATCH 14/32] drm/buddy: check range allocation matches alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Likely not a big deal for real users, but for consistency we should respect the min_page_size here. Main issue is that bias allocations turns into normal range allocation if the range and size matches exactly, and in the next patch we want to add some unit tests for this part of the api. Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240219121851.25774-5-matthew.auld@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_buddy.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index f3a6ac908f81..5ebdd6f8f36e 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -771,8 +771,12 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, return -EINVAL; /* Actual range allocation */ - if (start + size == end) + if (start + size == end) { + if (!IS_ALIGNED(start | end, min_block_size)) + return -EINVAL; + return __drm_buddy_alloc_range(mm, start, size, NULL, blocks); + } original_size = size; original_min_size = min_block_size; From c70703320e557ff30847915e6a7631a9abdda16b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Feb 2024 12:18:54 +0000 Subject: [PATCH 15/32] drm/tests/drm_buddy: add alloc_range_bias test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sanity check range bias with DRM_BUDDY_RANGE_ALLOCATION. v2: - Be consistent with u32 here. Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240219121851.25774-6-matthew.auld@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/tests/drm_buddy_test.c | 218 +++++++++++++++++++++++++ 1 file changed, 218 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index edacc1adb28f..1008d5b9d61e 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -14,11 +14,216 @@ #include "../lib/drm_random.h" +static unsigned int random_seed; + static inline u64 get_size(int order, u64 chunk_size) { return (1 << order) * chunk_size; } +static void drm_test_buddy_alloc_range_bias(struct kunit *test) +{ + u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem; + DRM_RND_STATE(prng, random_seed); + unsigned int i, count, *order; + struct drm_buddy mm; + LIST_HEAD(allocated); + + bias_size = SZ_1M; + ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size); + ps = max(SZ_4K, ps); + mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */ + + kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps); + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + count = mm_size / bias_size; + order = drm_random_order(count, &prng); + KUNIT_EXPECT_TRUE(test, order); + + /* + * Idea is to split the address space into uniform bias ranges, and then + * in some random order allocate within each bias, using various + * patterns within. This should detect if allocations leak out from a + * given bias, for example. + */ + + for (i = 0; i < count; i++) { + LIST_HEAD(tmp); + u32 size; + + bias_start = order[i] * bias_size; + bias_end = bias_start + bias_size; + bias_rem = bias_size; + + /* internal round_up too big */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size + ps, bias_size, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size, bias_size); + + /* size too big */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size + ps, ps); + + /* bias range too small for size */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start + ps, + bias_end, bias_size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start + ps, bias_end, bias_size, ps); + + /* bias misaligned */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start + ps, + bias_end - ps, + bias_size >> 1, bias_size >> 1, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1); + + /* single big page */ + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size, bias_size, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size, bias_size); + drm_buddy_free_list(&mm, &tmp); + + /* single page with internal round_up */ + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, ps, bias_size, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, ps, bias_size); + drm_buddy_free_list(&mm, &tmp); + + /* random size within */ + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + if (size) + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + + bias_rem -= size; + /* too big for current avail */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_rem + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_rem + ps, ps); + + if (bias_rem) { + /* random fill of the remainder */ + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + size = max(size, ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + /* + * Intentionally allow some space to be left + * unallocated, and ideally not always on the bias + * boundaries. + */ + drm_buddy_free_list(&mm, &tmp); + } else { + list_splice_tail(&tmp, &allocated); + } + } + + kfree(order); + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); + + /* + * Something more free-form. Idea is to pick a random starting bias + * range within the address space and then start filling it up. Also + * randomly grow the bias range in both directions as we go along. This + * should give us bias start/end which is not always uniform like above, + * and in some cases will require the allocator to jump over already + * allocated nodes in the middle of the address space. + */ + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); + bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); + bias_end = max(bias_end, bias_start + ps); + bias_rem = bias_end - bias_start; + + do { + u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size); + bias_rem -= size; + + /* + * Try to randomly grow the bias range in both directions, or + * only one, or perhaps don't grow at all. + */ + do { + u32 old_bias_start = bias_start; + u32 old_bias_end = bias_end; + + if (bias_start) + bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps); + if (bias_end != mm_size) + bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps); + + bias_rem += old_bias_start - bias_start; + bias_rem += bias_end - old_bias_end; + } while (!bias_rem && (bias_start || bias_end != mm_size)); + } while (bias_rem); + + KUNIT_ASSERT_EQ(test, bias_start, 0); + KUNIT_ASSERT_EQ(test, bias_end, mm_size); + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, bias_end, + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc passed with bias(%x-%x), size=%u\n", + bias_start, bias_end, ps); + + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); +} + static void drm_test_buddy_alloc_contiguous(struct kunit *test) { u32 mm_size, ps = SZ_4K, i, n_pages, total; @@ -363,17 +568,30 @@ static void drm_test_buddy_alloc_limit(struct kunit *test) drm_buddy_fini(&mm); } +static int drm_buddy_suite_init(struct kunit_suite *suite) +{ + while (!random_seed) + random_seed = get_random_u32(); + + kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n", + random_seed); + + return 0; +} + static struct kunit_case drm_buddy_tests[] = { KUNIT_CASE(drm_test_buddy_alloc_limit), KUNIT_CASE(drm_test_buddy_alloc_optimistic), KUNIT_CASE(drm_test_buddy_alloc_pessimistic), KUNIT_CASE(drm_test_buddy_alloc_pathological), KUNIT_CASE(drm_test_buddy_alloc_contiguous), + KUNIT_CASE(drm_test_buddy_alloc_range_bias), {} }; static struct kunit_suite drm_buddy_test_suite = { .name = "drm_buddy", + .suite_init = drm_buddy_suite_init, .test_cases = drm_buddy_tests, }; From 664bad6af3cbe01d6804b7264bee674b3e7dae7e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 28 Feb 2024 00:08:08 +0200 Subject: [PATCH 16/32] Revert "drm/msm/dp: use drm_bridge_hpd_notify() to report HPD status changes" This reverts commit e467e0bde881 ("drm/msm/dp: use drm_bridge_hpd_notify() to report HPD status changes"). The commit changed the way how the MSM DP driver communicates HPD-related events to the userspace. The mentioned commit made some of the HPD events being reported earlier. This way userspace starts poking around. It interacts in a bad way with the dp_bridge_detect and the driver's state machine, ending up either with the very long delays during hotplug detection or even inability of the DP driver to report the display as connected. A proper fix will involve redesigning of the HPD handling in the MSM DP driver. It is underway, but it will be intrusive and can not be thought about as a simple fix for the issue. Thus, revert the offending commit. Fixes: e467e0bde881 ("drm/msm/dp: use drm_bridge_hpd_notify() to report HPD status changes") Link: https://gitlab.freedesktop.org/drm/msm/-/issues/50 Reported-by: Johan Hovold Link: https://lore.kernel.org/r/Zd3YPGmrprxv-N-O@hovoldconsulting.com/ Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Tested-by: Paloma Arellano Tested-by: Johan Hovold Tested-by: Neil Armstrong # on SM8650-HDK Patchwork: https://patchwork.freedesktop.org/patch/580313/ Link: https://lore.kernel.org/r/20240227220808.50146-1-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/dp/dp_display.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index d37d599aec27..4c72124ffb5d 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -329,10 +329,26 @@ static const struct component_ops dp_display_comp_ops = { .unbind = dp_display_unbind, }; +static void dp_display_send_hpd_event(struct msm_dp *dp_display) +{ + struct dp_display_private *dp; + struct drm_connector *connector; + + dp = container_of(dp_display, struct dp_display_private, dp_display); + + connector = dp->dp_display.connector; + drm_helper_hpd_irq_event(connector->dev); +} + static int dp_display_send_hpd_notification(struct dp_display_private *dp, bool hpd) { - struct drm_bridge *bridge = dp->dp_display.bridge; + if ((hpd && dp->dp_display.link_ready) || + (!hpd && !dp->dp_display.link_ready)) { + drm_dbg_dp(dp->drm_dev, "HPD already %s\n", + (hpd ? "on" : "off")); + return 0; + } /* reset video pattern flag on disconnect */ if (!hpd) { @@ -348,7 +364,7 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp, drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n", dp->dp_display.connector_type, hpd); - drm_bridge_hpd_notify(bridge, dp->dp_display.link_ready); + dp_display_send_hpd_event(&dp->dp_display); return 0; } From b7cdccc6a849568775f738b1e233f751a8fed013 Mon Sep 17 00:00:00 2001 From: Ryan Lin Date: Wed, 28 Feb 2024 11:39:21 -0700 Subject: [PATCH 17/32] drm/amd/display: Add monitor patch for specific eDP [WHY] Some eDP panels' ext caps don't write initial values. The value of dpcd_addr (0x317) can be random and the backlight control interface will be incorrect. [HOW] Add new panel patches to remove sink ext caps. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org # 6.5.x Cc: Tsung-hua Lin Cc: Chris Chi Reviewed-by: Wayne Lin Acked-by: Alex Hung Signed-off-by: Ryan Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 85b7f58a7f35..c27063305a13 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -67,6 +67,8 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */ case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB): case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B): + case drm_edid_encode_panel_id('B', 'O', 'E', 0x092A): + case drm_edid_encode_panel_id('L', 'G', 'D', 0x06D1): DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.remove_sink_ext_caps = true; break; @@ -120,6 +122,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps( edid_caps->edid_hdmi = connector->display_info.is_hdmi; + apply_edid_quirks(edid_buf, edid_caps); + sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads); if (sad_count <= 0) return result; @@ -146,8 +150,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps( else edid_caps->speaker_flags = DEFAULT_SPEAKER_LOCATION; - apply_edid_quirks(edid_buf, edid_caps); - kfree(sads); kfree(sadb); From 7e10d87e63f7f9c324d533bb4369e35bb19ab9a9 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 21 Feb 2024 14:30:18 +0100 Subject: [PATCH 18/32] drm/xe: Add uapi for dumpable bos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the flag XE_VM_BIND_FLAG_DUMPABLE to notify devcoredump that this mapping should be dumped. This is not hooked up, but the uapi should be ready before merging. It's likely easier to dump the contents of the bo's at devcoredump readout time, so it's better if the bos will stay unmodified after a hang. The NEEDS_CPU_MAPPING flag is removed as requirement. Signed-off-by: Maarten Lankhorst Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240221133024.898315-3-maarten.lankhorst@linux.intel.com (cherry picked from commit 76a86b58d2b3de31e88acb487ebfa0c3cc7c41d2) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 3 ++- include/uapi/drm/xe_drm.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 921ca28d49dd..945c89b5e4b5 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2722,7 +2722,8 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, #define SUPPORTED_FLAGS \ (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL) + DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ + DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 6d11ee9e571a..4f010949f812 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -931,6 +931,7 @@ struct drm_xe_vm_bind_op { #define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) #define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) +#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) /** @flags: Bind flags */ __u32 flags; From b6f4fb397db09024c189834d638abbd21bf00769 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 26 Dec 2023 08:11:14 -0800 Subject: [PATCH 19/32] drm/xe/uapi: Remove DRM_XE_VM_BIND_FLAG_ASYNC comment left over MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a comment left over of commit d3d767396a02 ("drm/xe/uapi: Remove sync binds"). Fixes: d3d767396a02 ("drm/xe/uapi: Remove sync binds") Reviewed-by: Rodrigo Vivi Cc: Matthew Brost Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20231226172321.61518-1-jose.souza@intel.com (cherry picked from commit f031c3a7af8ea06790dd0a71872c4f0175084baa) Signed-off-by: Thomas Hellström --- include/uapi/drm/xe_drm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 4f010949f812..a7274a99d456 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -832,7 +832,6 @@ struct drm_xe_vm_destroy { * * and the @flags can be: * - %DRM_XE_VM_BIND_FLAG_READONLY - * - %DRM_XE_VM_BIND_FLAG_ASYNC * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the * MAP operation immediately rather than deferring the MAP to the page * fault handler. From eaa367a0317ea4cbc7aa60f25829c89c0e12717b Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 22 Feb 2024 18:23:56 -0500 Subject: [PATCH 20/32] drm/xe/uapi: Remove unused flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those cases missed in previous uAPI cleanups were mostly accidentally brought in from i915 or created to exercise the possibilities of gpuvm but they are not used by userspace yet, so let's remove them. They can still be brought back later if needed. v2: - Fix XE_VM_FLAG_FAULT_MODE support in xe_lrc.c (Brian Welty) - Leave DRM_XE_VM_BIND_OP_UNMAP_ALL (José Roberto de Souza) - Ensure invalid flag values are rejected (Rodrigo Vivi) v3: Rebase after removal of persistent exec_queues (Francois Dugast) v4: Rodrigo: Rebase after the new dumpable flag. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Thomas Hellström Cc: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232356.175431-1-rodrigo.vivi@intel.com (cherry picked from commit 84a1ed5e67565b09b8fd22a26754d2897de55ce0) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_exec_queue.c | 88 +----------------------- drivers/gpu/drm/xe/xe_exec_queue_types.h | 10 --- drivers/gpu/drm/xe/xe_lrc.c | 10 +-- drivers/gpu/drm/xe/xe_vm.c | 12 +--- drivers/gpu/drm/xe/xe_vm_types.h | 4 -- include/uapi/drm/xe_drm.h | 19 ----- 6 files changed, 6 insertions(+), 137 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 3acfd4f07666..49223026c89f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -309,85 +309,6 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * return q->ops->set_timeslice(q, value); } -static int exec_queue_set_preemption_timeout(struct xe_device *xe, - struct xe_exec_queue *q, u64 value, - bool create) -{ - u32 min = 0, max = 0; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - return q->ops->set_preempt_timeout(q, value); -} - -static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - u32 min = 0, max = 0; - - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - return q->ops->set_job_timeout(q, value); -} - -static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_trigger = value; - - return 0; -} - -static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_notify = value; - - return 0; -} - -static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - if (value > DRM_XE_ACC_GRANULARITY_64M) - return -EINVAL; - - q->usm.acc_granularity = value; - - return 0; -} - typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create); @@ -395,11 +316,6 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -418,7 +334,9 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(exec_queue_set_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad)) + XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 947bbc4b285d..36f4901d8d7e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -150,16 +150,6 @@ struct xe_exec_queue { spinlock_t lock; } compute; - /** @usm: unified shared memory state */ - struct { - /** @acc_trigger: access counter trigger */ - u32 acc_trigger; - /** @acc_notify: access counter notify */ - u32 acc_notify; - /** @acc_granularity: access counter granularity */ - u32 acc_granularity; - } usm; - /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 0ec5ad2539f1..b38319d2801e 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -682,8 +682,6 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) #define PVC_CTX_ASID (0x2e + 1) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) -#define ACC_GRANULARITY_S 20 -#define ACC_NOTIFY_S 16 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) @@ -754,13 +752,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, - (q->usm.acc_granularity << - ACC_GRANULARITY_S) | vm->usm.asid); - if (xe->info.has_usm && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, - (q->usm.acc_notify << ACC_NOTIFY_S) | - q->usm.acc_trigger); + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 945c89b5e4b5..1d82616aa935 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2117,10 +2117,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.immediate = - flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; op->map.pat_index = pat_index; } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { @@ -2313,8 +2309,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; @@ -2445,7 +2439,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_MAP: err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, - op->map.immediate || !xe_vm_in_fault_mode(vm), + !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; @@ -2720,9 +2714,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, return 0; } -#define SUPPORTED_FLAGS \ - (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ +#define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index a603cc2eb56b..0f220b5d2e7b 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -288,10 +288,6 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; - /** @immediate: Immediate bind */ - bool immediate; - /** @read_only: Read only */ - bool read_only; /** @is_null: is NULL binding */ bool is_null; /** @pat_index: The pat index to use for this operation. */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index a7274a99d456..bb0c8a994116 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -831,10 +831,6 @@ struct drm_xe_vm_destroy { * - %DRM_XE_VM_BIND_OP_PREFETCH * * and the @flags can be: - * - %DRM_XE_VM_BIND_FLAG_READONLY - * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the - * MAP operation immediately rather than deferring the MAP to the page - * fault handler. * - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page * tables are setup with a special bit which indicates writes are * dropped and all reads return zero. In the future, the NULL flags @@ -927,8 +923,6 @@ struct drm_xe_vm_bind_op { /** @op: Bind operation to perform */ __u32 op; -#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) /** @flags: Bind flags */ @@ -1045,19 +1039,6 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 -/* Monitor 128KB contiguous region with 4K sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_128K 0 -/* Monitor 2MB contiguous region with 64KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_2M 1 -/* Monitor 16MB contiguous region with 512KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_16M 2 -/* Monitor 64MB contiguous region with 2M sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_64M 3 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; From dc15bd0aa7b5ba77bb216394b368c6f9aedbf2f4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Feb 2024 15:20:19 -0800 Subject: [PATCH 21/32] drm/xe: Fix execlist splat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although execlist submission is not supported it should be kept in a basic working state as it can be used for very early hardware bring up. Fix the below splat. WARNING: CPU: 3 PID: 11 at drivers/gpu/drm/xe/xe_execlist.c:217 execlist_run_job+0x1c2/0x220 [xe] Modules linked in: xe drm_kunit_helpers drm_gpuvm drm_ttm_helper ttm drm_exec drm_suballoc_helper drm_buddy gpu_sched mei_pxp mei_hdcp wmi_bmof x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul snd_hda_intel ghash_clmulni_intel snd_intel_dspcfg snd_hda_codec snd_hwdep snd_hda_core video snd_pcm mei_me mei wmi fuse e1000e i2c_i801 ptp i2c_smbus pps_core intel_lpss_pci CPU: 3 PID: 11 Comm: kworker/u16:0 Tainted: G U 6.8.0-rc3-guc+ #1046 Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLSFWI1.R00.3243.A01.2006102133 06/10/2020 Workqueue: rcs0 drm_sched_run_job_work [gpu_sched] RIP: 0010:execlist_run_job+0x1c2/0x220 [xe] Code: 8b f8 03 00 00 4c 89 39 e9 e2 fe ff ff 49 8d 7d 20 be ff ff ff ff e8 ed fd a6 e1 85 c0 0f 85 e1 fe ff ff 0f 0b e9 da fe ff ff <0f> 0b 0f 0b 41 83 fc 03 0f 86 8a fe ff ff 0f 0b e9 83 fe ff ff be RSP: 0018:ffffc9000013bdb8 EFLAGS: 00010246 RAX: ffff888105021a00 RBX: ffff888105078400 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffc9000013bd14 RDI: ffffc90001609090 RBP: ffff88811e3f0040 R08: 0000000000000088 R09: 00000000ffffff81 R10: 0000000000000001 R11: ffff88810c10c000 R12: 00000000fffffffe R13: ffff888109b72c28 R14: ffff8881050784a0 R15: ffff888105078408 FS: 0000000000000000(0000) GS:ffff88849f980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000563459d130f8 CR3: 000000000563a001 CR4: 0000000000f70ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0x7f/0x170 ? execlist_run_job+0x1c2/0x220 [xe] ? report_bug+0x1c7/0x1d0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? execlist_run_job+0x1c2/0x220 [xe] ? execlist_run_job+0x2c/0x220 [xe] drm_sched_run_job_work+0x246/0x3f0 [gpu_sched] ? process_one_work+0x18d/0x4e0 process_one_work+0x1f7/0x4e0 worker_thread+0x1da/0x3e0 ? __pfx_worker_thread+0x10/0x10 kthread+0xfe/0x130 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fixes: 9b9529ce379a ("drm/xe: Rename engine to exec_queue") Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232021.3911545-2-matthew.brost@intel.com (cherry picked from commit ddadc7120d4be7a40a9745924339c472c5850d14) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_execlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 42d01bbbf7d0..acb4d9f38fd7 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -212,7 +212,7 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) { struct xe_execlist_port *port = exl->port; - enum xe_exec_queue_priority priority = exl->active_priority; + enum xe_exec_queue_priority priority = exl->q->sched_props.priority; XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); XE_WARN_ON(priority < 0); From ccff0b21ebe0cbe3f402edb27b0b1fd22a9d08aa Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Feb 2024 15:20:21 -0800 Subject: [PATCH 22/32] drm/xe: Don't support execlists in xe_gt_tlb_invalidation layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xe_gt_tlb_invalidation layer implements TLB invalidations for a GuC backend. Simply return if in execlists mode. A follow up may properly implement the xe_gt_tlb_invalidation layer for both GuC and execlists. Fixes: a9351846d945 ("drm/xe: Break of TLB invalidation into its own file") Cc: Rodrigo Vivi Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232021.3911545-4-matthew.brost@intel.com (cherry picked from commit a9e483dda3efa5b9aae5d9eef94d2c3a878d9bea) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 7eef23a00d77..f4c485289dbe 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -247,6 +247,14 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, xe_gt_assert(gt, vma); + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) { + if (fence) + __invalidation_fence_signal(fence); + + return 0; + } + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ if (!xe->info.has_range_tlb_invalidation) { @@ -317,6 +325,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) struct drm_printer p = drm_err_printer(__func__); int ret; + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) + return 0; + /* * XXX: See above, this algorithm only works if seqno are always in * order From a41f6b0db58fe3cc2686e4065db48ebf44effa36 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Feb 2024 07:55:54 -0800 Subject: [PATCH 23/32] drm/xe: Use vmalloc for array of bind allocation in bind IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use vmalloc in effort to allow a user pass in a large number of binds in an IOCTL (mesa use case). Also use array allocations rather open coding the size calculation. v2: Use __GFP_ACCOUNT for allocations (Thomas) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240226155554.103384-1-matthew.brost@intel.com (cherry picked from commit 35ed1d2bfff7b1969e7f99f3641a83ea54f037e2) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 1d82616aa935..041b29439c4b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2740,8 +2740,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u64 __user *bind_user = u64_to_user_ptr(args->vector_of_binds); - *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * - args->num_binds, GFP_KERNEL); + *bind_ops = kvmalloc_array(args->num_binds, + sizeof(struct drm_xe_vm_bind_op), + GFP_KERNEL | __GFP_ACCOUNT); if (!*bind_ops) return -ENOMEM; @@ -2831,7 +2832,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, free_bind_ops: if (args->num_binds > 1) - kfree(*bind_ops); + kvfree(*bind_ops); return err; } @@ -2919,13 +2920,15 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } if (args->num_binds) { - bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL); + bos = kvcalloc(args->num_binds, sizeof(*bos), + GFP_KERNEL | __GFP_ACCOUNT); if (!bos) { err = -ENOMEM; goto release_vm_lock; } - ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL); + ops = kvcalloc(args->num_binds, sizeof(*ops), + GFP_KERNEL | __GFP_ACCOUNT); if (!ops) { err = -ENOMEM; goto release_vm_lock; @@ -3066,10 +3069,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (i = 0; bos && i < args->num_binds; ++i) xe_bo_put(bos[i]); - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; @@ -3093,10 +3096,10 @@ put_exec_queue: if (q) xe_exec_queue_put(q); free_objs: - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; } From 14d4d0ad0ab5aa980cf71a82da1297b28b274de1 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 14 Feb 2024 16:53:53 -0800 Subject: [PATCH 24/32] drm/xe: get rid of MAX_BINDS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa has been issuing a single bind operation per ioctl since xe.ko changed to GPUVA due xe.ko bug #746. If I change Mesa to try again to issue every single bind operation it can in the same ioctl, it hits the MAX_BINDS assertion when running Vulkan conformance tests. Test dEQP-VK.sparse_resources.transfer_queue.3d.rgba32i.1024_128_8 issues 960 bind operations in a single ioctl, it's the most I could find in the conformance suite. I don't see a reason to keep the MAX_BINDS restriction: it doesn't seem to be preventing any specific issue. If the number is too big for the memory allocations, then those will fail. Nothing related to num_binds seems to be using the stack. Let's just get rid of it. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Testcase: dEQP-VK.sparse_resources.transfer_queue.3d.rgba32i.1024_128_8 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/746 Cc: Matthew Brost Signed-off-by: Paulo Zanoni Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240215005353.1295420-1-paulo.r.zanoni@intel.com (cherry picked from commit ba6bbdc6eaef92998ec7f323c9e1211d344d2556) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 041b29439c4b..75b44777067e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2719,8 +2719,6 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) -#define MAX_BINDS 512 /* FIXME: Picking random upper limit */ - static int vm_bind_ioctl_check_args(struct xe_device *xe, struct drm_xe_vm_bind *args, struct drm_xe_vm_bind_op **bind_ops) @@ -2732,8 +2730,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions) || - XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS)) + if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; if (args->num_binds > 1) { From 12cb2b21c2d037a4299028fc56ac941185992e5e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Feb 2024 13:46:37 +0100 Subject: [PATCH 25/32] drm/xe/mmio: fix build warning for BAR resize on 32-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clang complains about a nonsensical test on builds with a 32-bit phys_addr_t, which means resizing will always fail: drivers/gpu/drm/xe/xe_mmio.c:109:23: error: result of comparison of constant 4294967296 with expression of type 'resource_size_t' (aka 'unsigned int') is always false [-Werror,-Wtautological-constant-out-of-range-compare] 109 | root_res->start > 0x100000000ull) | ~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~ Previously, BAR resize was always disallowed on 32-bit kernels, but this apparently changed recently. Since 32-bit machines can in theory support PAE/LPAE for large address spaces, this may end up useful, so change the driver to shut up the warning but still work when phys_addr_t/resource_size_t is 64 bit wide. Fixes: 9a6e6c14bfde ("drm/xe/mmio: Use non-atomic writeq/readq variant for 32b") Signed-off-by: Arnd Bergmann Reviewed-by: Lucas De Marchi Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240226124736.1272949-2-arnd@kernel.org Signed-off-by: Lucas De Marchi (cherry picked from commit f5d3983366c0b88ec388b3407b29c1c0862ee2b8) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 5f6b53ea5528..02f7808f28ca 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -105,7 +105,7 @@ static void xe_resize_vram_bar(struct xe_device *xe) pci_bus_for_each_resource(root, root_res, i) { if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && - root_res->start > 0x100000000ull) + (u64)root_res->start > 0x100000000ul) break; } From a09946a9a903e809abab9e0fb813dbf5a32084f5 Mon Sep 17 00:00:00 2001 From: Priyanka Dandamudi Date: Tue, 20 Feb 2024 10:17:48 +0530 Subject: [PATCH 26/32] drm/xe/xe_bo_move: Enhance xe_bo_move trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhanced xe_bo_move trace to be more readable. It will help to show the migration details. Src and dst details. v2: Modify trace_xe_bo_move(), it takes the integer mem_type rather than a string. Make mem_type_to_name() extern, it will be used by trace.(Thomas) v3: Move mem_type_to_name() to xe_bo.[ch] (Thomas, Matt) v4: Add device details to reduce ambiquity related to vram0/vram1. (Oak) v5: Rename mem_type_to_name to xe_mem_type_to_name. (Thomas) v6: Optimised code to use xe_bo_device(__entry->bo). (Thomas) Cc: Thomas Hellström Cc: Oak Zeng Cc: Kempczynski Zbigniew Cc: Matthew Brost Cc: Brian Welty Signed-off-by: Priyanka Dandamudi Reviewed-by: Oak Zeng Reviewed-by: Thomas Hellström Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240220044748.948496-1-priyanka.dandamudi@intel.com (cherry picked from commit a0df2cc858c309a8bc2e87b4274772587aa25e05) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_bo.c | 11 +++++++++-- drivers/gpu/drm/xe/xe_bo.h | 1 + drivers/gpu/drm/xe/xe_drm_client.c | 12 ++---------- drivers/gpu/drm/xe/xe_trace.h | 25 ++++++++++++++++++++++--- 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 0b0e262e2166..f2ea188663ac 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -28,6 +28,14 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" +const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = { + [XE_PL_SYSTEM] = "system", + [XE_PL_TT] = "gtt", + [XE_PL_VRAM0] = "vram0", + [XE_PL_VRAM1] = "vram1", + [XE_PL_STOLEN] = "stolen" +}; + static const struct ttm_place sys_placement_flags = { .fpfn = 0, .lpfn = 0, @@ -713,8 +721,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, migrate = xe->tiles[0].migrate; xe_assert(xe, migrate); - - trace_xe_bo_move(bo); + trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type); xe_device_mem_access_get(xe); if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 9b1279aca127..8be42ac6cd07 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -243,6 +243,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo); int xe_bo_restore_pinned(struct xe_bo *bo); extern struct ttm_device_funcs xe_ttm_funcs; +extern const char *const xe_mem_type_to_name[]; int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 82d1305e831f..6040e4d22b28 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -131,14 +131,6 @@ static void bo_meminfo(struct xe_bo *bo, static void show_meminfo(struct drm_printer *p, struct drm_file *file) { - static const char *const mem_type_to_name[TTM_NUM_MEM_TYPES] = { - [XE_PL_SYSTEM] = "system", - [XE_PL_TT] = "gtt", - [XE_PL_VRAM0] = "vram0", - [XE_PL_VRAM1] = "vram1", - [4 ... 6] = NULL, - [XE_PL_STOLEN] = "stolen" - }; struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {}; struct xe_file *xef = file->driver_priv; struct ttm_device *bdev = &xef->xe->ttm; @@ -171,7 +163,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) spin_unlock(&client->bos_lock); for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { - if (!mem_type_to_name[mem_type]) + if (!xe_mem_type_to_name[mem_type]) continue; man = ttm_manager_type(bdev, mem_type); @@ -182,7 +174,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) DRM_GEM_OBJECT_RESIDENT | (mem_type != XE_PL_SYSTEM ? 0 : DRM_GEM_OBJECT_PURGEABLE), - mem_type_to_name[mem_type]); + xe_mem_type_to_name[mem_type]); } } } diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 95163c303f3e..0cce98a6b14b 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -12,6 +12,7 @@ #include #include +#include "xe_bo.h" #include "xe_bo_types.h" #include "xe_exec_queue_types.h" #include "xe_gpu_scheduler_types.h" @@ -100,9 +101,27 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, TP_ARGS(bo) ); -DEFINE_EVENT(xe_bo, xe_bo_move, - TP_PROTO(struct xe_bo *bo), - TP_ARGS(bo) +TRACE_EVENT(xe_bo_move, + TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement), + TP_ARGS(bo, new_placement, old_placement), + TP_STRUCT__entry( + __field(struct xe_bo *, bo) + __field(size_t, size) + __field(u32, new_placement) + __field(u32, old_placement) + __array(char, device_id, 12) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->size = bo->size; + __entry->new_placement = new_placement; + __entry->old_placement = old_placement; + strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); + ), + TP_printk("migrate object %p [size %zu] from %s to %s device_id:%s", + __entry->bo, __entry->size, xe_mem_type_to_name[__entry->old_placement], + xe_mem_type_to_name[__entry->new_placement], __entry->device_id) ); DECLARE_EVENT_CLASS(xe_exec_queue, From 4ca5c82988e73f51587e2d7564d44f99429c111a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Feb 2024 06:41:24 -0800 Subject: [PATCH 27/32] drm/xe: Use pointers in trace events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a0df2cc858c3 ("drm/xe/xe_bo_move: Enhance xe_bo_move trace") inadvertently reverted commit 8d038f49c1f3 ("drm/xe: Fix cast on trace variable"), breaking the build on 32bits. As noted by Ville, there's no point in converting the pointers to u64 and add casts everywhere. In fact, it's better to just use %p and let the address be hashed. Convert all the cases in xe_trace.h to use pointers. Cc: Ville Syrjälä Cc: Matt Roper Cc: Priyanka Dandamudi Cc: Oak Zeng Cc: Thomas Hellström Signed-off-by: Lucas De Marchi Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240222144125.2862546-1-lucas.demarchi@intel.com (cherry picked from commit 7a975748d4dc0a524c99a390c6f74b7097ef8cf7) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_trace.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 0cce98a6b14b..3b97633d81d8 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -27,16 +27,16 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, TP_ARGS(fence), TP_STRUCT__entry( - __field(u64, fence) + __field(struct xe_gt_tlb_invalidation_fence *, fence) __field(int, seqno) ), TP_fast_assign( - __entry->fence = (u64)fence; + __entry->fence = fence; __entry->seqno = fence->seqno; ), - TP_printk("fence=0x%016llx, seqno=%d", + TP_printk("fence=%p, seqno=%d", __entry->fence, __entry->seqno) ); @@ -83,16 +83,16 @@ DECLARE_EVENT_CLASS(xe_bo, TP_STRUCT__entry( __field(size_t, size) __field(u32, flags) - __field(u64, vm) + __field(struct xe_vm *, vm) ), TP_fast_assign( __entry->size = bo->size; __entry->flags = bo->flags; - __entry->vm = (unsigned long)bo->vm; + __entry->vm = bo->vm; ), - TP_printk("size=%zu, flags=0x%02x, vm=0x%016llx", + TP_printk("size=%zu, flags=0x%02x, vm=%p", __entry->size, __entry->flags, __entry->vm) ); @@ -346,16 +346,16 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_STRUCT__entry( __field(u64, ctx) __field(u32, seqno) - __field(u64, fence) + __field(struct xe_hw_fence *, fence) ), TP_fast_assign( __entry->ctx = fence->dma.context; __entry->seqno = fence->dma.seqno; - __entry->fence = (unsigned long)fence; + __entry->fence = fence; ), - TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u", + TP_printk("ctx=0x%016llx, fence=%p, seqno=%u", __entry->ctx, __entry->fence, __entry->seqno) ); @@ -384,7 +384,7 @@ DECLARE_EVENT_CLASS(xe_vma, TP_ARGS(vma), TP_STRUCT__entry( - __field(u64, vma) + __field(struct xe_vma *, vma) __field(u32, asid) __field(u64, start) __field(u64, end) @@ -392,14 +392,14 @@ DECLARE_EVENT_CLASS(xe_vma, ), TP_fast_assign( - __entry->vma = (unsigned long)vma; + __entry->vma = vma; __entry->asid = xe_vma_vm(vma)->usm.asid; __entry->start = xe_vma_start(vma); __entry->end = xe_vma_end(vma) - 1; __entry->ptr = xe_vma_userptr(vma); ), - TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,", + TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", __entry->vma, __entry->asid, __entry->start, __entry->end, __entry->ptr) ) @@ -484,16 +484,16 @@ DECLARE_EVENT_CLASS(xe_vm, TP_ARGS(vm), TP_STRUCT__entry( - __field(u64, vm) + __field(struct xe_vm *, vm) __field(u32, asid) ), TP_fast_assign( - __entry->vm = (unsigned long)vm; + __entry->vm = vm; __entry->asid = vm->usm.asid; ), - TP_printk("vm=0x%016llx, asid=0x%05x", __entry->vm, + TP_printk("vm=%p, asid=0x%05x", __entry->vm, __entry->asid) ); From 86b3cd6d0713b3b1cb4e17dbddd4d4a2bff98d60 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 15 Feb 2024 20:11:51 +0200 Subject: [PATCH 28/32] drm/xe: Expose user fence from xe_sync_entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By allowing getting reference to user fence, we can control the lifetime outside of sync entries. This is needed to allow vma to track the associated user fence that was provided with bind ioctl. v2: xe_user_fence can be kept opaque (Jani, Matt) v3: indent fix (Matt) Cc: Thomas Hellström Cc: Matthew Brost Cc: Jani Nikula Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240215181152.450082-2-mika.kuoppala@linux.intel.com (cherry picked from commit 977e5b82e0901480bc201342d39f855fc0a2ef47) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_sync.c | 58 ++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_sync.h | 4 +++ drivers/gpu/drm/xe/xe_sync_types.h | 2 +- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index aab92bee1d7c..02c9577fe418 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -19,7 +19,7 @@ #include "xe_macros.h" #include "xe_sched_job_types.h" -struct user_fence { +struct xe_user_fence { struct xe_device *xe; struct kref refcount; struct dma_fence_cb cb; @@ -27,31 +27,32 @@ struct user_fence { struct mm_struct *mm; u64 __user *addr; u64 value; + int signalled; }; static void user_fence_destroy(struct kref *kref) { - struct user_fence *ufence = container_of(kref, struct user_fence, + struct xe_user_fence *ufence = container_of(kref, struct xe_user_fence, refcount); mmdrop(ufence->mm); kfree(ufence); } -static void user_fence_get(struct user_fence *ufence) +static void user_fence_get(struct xe_user_fence *ufence) { kref_get(&ufence->refcount); } -static void user_fence_put(struct user_fence *ufence) +static void user_fence_put(struct xe_user_fence *ufence) { kref_put(&ufence->refcount, user_fence_destroy); } -static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, - u64 value) +static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, + u64 value) { - struct user_fence *ufence; + struct xe_user_fence *ufence; ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) @@ -69,7 +70,7 @@ static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, static void user_fence_worker(struct work_struct *w) { - struct user_fence *ufence = container_of(w, struct user_fence, worker); + struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); @@ -80,10 +81,11 @@ static void user_fence_worker(struct work_struct *w) } wake_up_all(&ufence->xe->ufence_wq); + WRITE_ONCE(ufence->signalled, 1); user_fence_put(ufence); } -static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) +static void kick_ufence(struct xe_user_fence *ufence, struct dma_fence *fence) { INIT_WORK(&ufence->worker, user_fence_worker); queue_work(ufence->xe->ordered_wq, &ufence->worker); @@ -92,7 +94,7 @@ static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { - struct user_fence *ufence = container_of(cb, struct user_fence, cb); + struct xe_user_fence *ufence = container_of(cb, struct xe_user_fence, cb); kick_ufence(ufence, fence); } @@ -340,3 +342,39 @@ err_out: return ERR_PTR(-ENOMEM); } + +/** + * xe_sync_ufence_get() - Get user fence from sync + * @sync: input sync + * + * Get a user fence reference from sync. + * + * Return: xe_user_fence pointer with reference + */ +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync) +{ + user_fence_get(sync->ufence); + + return sync->ufence; +} + +/** + * xe_sync_ufence_put() - Put user fence reference + * @ufence: user fence reference + * + */ +void xe_sync_ufence_put(struct xe_user_fence *ufence) +{ + user_fence_put(ufence); +} + +/** + * xe_sync_ufence_get_status() - Get user fence status + * @ufence: user fence + * + * Return: 1 if signalled, 0 not signalled, <0 on error + */ +int xe_sync_ufence_get_status(struct xe_user_fence *ufence) +{ + return READ_ONCE(ufence->signalled); +} diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index f43cdcaca6c5..0fd0d51208e6 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -38,4 +38,8 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync) return !!sync->ufence; } +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync); +void xe_sync_ufence_put(struct xe_user_fence *ufence); +int xe_sync_ufence_get_status(struct xe_user_fence *ufence); + #endif diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 852db5e7884f..30ac3f51993b 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,7 +18,7 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; - struct user_fence *ufence; + struct xe_user_fence *ufence; u64 addr; u64 timeline_value; u32 type; From 785f4cc0689f32ab615f043d7889d17eb4f37061 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 15 Feb 2024 20:11:52 +0200 Subject: [PATCH 29/32] drm/xe: Deny unbinds if uapi ufence pending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If user fence was provided for MAP in vm_bind_ioctl and it has still not been signalled, deny UNMAP of said vma with EBUSY as long as unsignalled fence exists. This guarantees that MAP vs UNMAP sequences won't escape under the radar if we ever want to track the client's state wrt to completed and accessible MAPs. By means of intercepting the ufence release signalling. v2: find ufence with num_fences > 1 (Matt) v3: careful on clearing vma ufence (Matt) Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1159 Cc: Thomas Hellström Cc: Matthew Brost Cc: Joonas Lahtinen Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240215181152.450082-3-mika.kuoppala@linux.intel.com (cherry picked from commit 158900ade92cce5ab85a06d618eb51e6c7ffb28a) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 37 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm_types.h | 7 ++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 75b44777067e..3b21afe5b488 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -897,6 +897,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma) struct xe_device *xe = vm->xe; bool read_only = xe_vma_read_only(vma); + if (vma->ufence) { + xe_sync_ufence_put(vma->ufence); + vma->ufence = NULL; + } + if (xe_vma_is_userptr(vma)) { struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; @@ -1608,6 +1613,16 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, trace_xe_vma_unbind(vma); + if (vma->ufence) { + struct xe_user_fence * const f = vma->ufence; + + if (!xe_sync_ufence_get_status(f)) + return ERR_PTR(-EBUSY); + + vma->ufence = NULL; + xe_sync_ufence_put(f); + } + if (number_tiles > 1) { fences = kmalloc_array(number_tiles, sizeof(*fences), GFP_KERNEL); @@ -1741,6 +1756,21 @@ err_fences: return ERR_PTR(err); } +static struct xe_user_fence * +find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) +{ + unsigned int i; + + for (i = 0; i < num_syncs; i++) { + struct xe_sync_entry *e = &syncs[i]; + + if (xe_sync_is_ufence(e)) + return xe_sync_ufence_get(e); + } + + return NULL; +} + static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool immediate, bool first_op, @@ -1748,9 +1778,16 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); + struct xe_user_fence *ufence; xe_vm_assert_held(vm); + ufence = find_ufence_get(syncs, num_syncs); + if (vma->ufence && ufence) + xe_sync_ufence_put(vma->ufence); + + vma->ufence = ufence ?: vma->ufence; + if (immediate) { fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, last_op); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 0f220b5d2e7b..7300eea5394b 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -19,6 +19,7 @@ struct xe_bo; struct xe_sync_entry; +struct xe_user_fence; struct xe_vm; #define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS @@ -104,6 +105,12 @@ struct xe_vma { * @pat_index: The pat index to use when encoding the PTEs for this vma. */ u16 pat_index; + + /** + * @ufence: The user fence that was provided with MAP. + * Needs to be signalled before UNMAP can be processed. + */ + struct xe_user_fence *ufence; }; /** From 8188cae3cc3d8018ec97ca9ab8caa3acc69a056d Mon Sep 17 00:00:00 2001 From: Priyanka Dandamudi Date: Wed, 21 Feb 2024 15:49:50 +0530 Subject: [PATCH 30/32] drm/xe/xe_trace: Add move_lacks_source detail to xe_bo_move trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add move_lacks_source detail to xe_bo_move trace to make it readable that is to check if it is migrate clear or migrate copy. Cc: Thomas Hellström Signed-off-by: Priyanka Dandamudi Reviewed-by: Thomas Hellström Fixes: a09946a9a903 ("drm/xe/xe_bo_move: Enhance xe_bo_move trace") Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240221101950.1019312-1-priyanka.dandamudi@intel.com (cherry picked from commit 8034f6b070cc3716e81b1846f8a4ca5339c3f29b) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_trace.h | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index f2ea188663ac..4d3b80ec906d 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -721,7 +721,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, migrate = xe->tiles[0].migrate; xe_assert(xe, migrate); - trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type); + trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source); xe_device_mem_access_get(xe); if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 3b97633d81d8..4ddc55527f9a 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -102,14 +102,16 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, ); TRACE_EVENT(xe_bo_move, - TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement), - TP_ARGS(bo, new_placement, old_placement), + TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, + bool move_lacks_source), + TP_ARGS(bo, new_placement, old_placement, move_lacks_source), TP_STRUCT__entry( __field(struct xe_bo *, bo) __field(size_t, size) __field(u32, new_placement) __field(u32, old_placement) __array(char, device_id, 12) + __field(bool, move_lacks_source) ), TP_fast_assign( @@ -118,9 +120,11 @@ TRACE_EVENT(xe_bo_move, __entry->new_placement = new_placement; __entry->old_placement = old_placement; strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); + __entry->move_lacks_source = move_lacks_source; ), - TP_printk("migrate object %p [size %zu] from %s to %s device_id:%s", - __entry->bo, __entry->size, xe_mem_type_to_name[__entry->old_placement], + TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", + __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, + xe_mem_type_to_name[__entry->old_placement], xe_mem_type_to_name[__entry->new_placement], __entry->device_id) ); From f7916c47f66d778817068d86e5c9b5e511e23c86 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 26 Feb 2024 17:16:10 +1000 Subject: [PATCH 31/32] nouveau: report byte usage in VRAM usage. Turns out usage is always in bytes not shifted. Fixes: 72fa02fdf833 ("nouveau: add an ioctl to report vram usage") Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index cd14f993bdd1..80f74ee0fc78 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -269,7 +269,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) break; case NOUVEAU_GETPARAM_VRAM_USED: { struct ttm_resource_manager *vram_mgr = ttm_manager_type(&drm->ttm.bdev, TTM_PL_VRAM); - getparam->value = (u64)ttm_resource_manager_usage(vram_mgr) << PAGE_SHIFT; + getparam->value = (u64)ttm_resource_manager_usage(vram_mgr); break; } default: From f6ecfdad359a01c7fd8a3bcfde3ef0acdf107e6e Mon Sep 17 00:00:00 2001 From: Sid Pranjale Date: Thu, 29 Feb 2024 21:52:05 +0530 Subject: [PATCH 32/32] drm/nouveau: keep DMA buffers required for suspend/resume Nouveau deallocates a few buffers post GPU init which are required for GPU suspend/resume to function correctly. This is likely not as big an issue on systems where the NVGPU is the only GPU, but on multi-GPU set ups it leads to a regression where the kernel module errors and results in a system-wide rendering freeze. This commit addresses that regression by moving the two buffers required for suspend and resume to be deallocated at driver unload instead of post init. Fixes: 042b5f83841fb ("drm/nouveau: fix several DMA buffer leaks") Signed-off-by: Sid Pranjale Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index a64c81385682..a73a5b589790 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1054,8 +1054,6 @@ r535_gsp_postinit(struct nvkm_gsp *gsp) /* Release the DMA buffers that were needed only for boot and init */ nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw); nvkm_gsp_mem_dtor(gsp, &gsp->libos); - nvkm_gsp_mem_dtor(gsp, &gsp->rmargs); - nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta); return ret; } @@ -2163,6 +2161,8 @@ r535_gsp_dtor(struct nvkm_gsp *gsp) r535_gsp_dtor_fws(gsp); + nvkm_gsp_mem_dtor(gsp, &gsp->rmargs); + nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta); nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem); nvkm_gsp_mem_dtor(gsp, &gsp->loginit); nvkm_gsp_mem_dtor(gsp, &gsp->logintr);