linux/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c

587 lines
17 KiB
C
Raw Normal View History

/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2008-2015 Intel Corporation
*/
#include <linux/oom.h>
#include <linux/sched/mm.h>
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/pci.h>
#include <linux/dma-buf.h>
#include <linux/vmalloc.h>
#include "gt/intel_gt_requests.h"
#include "i915_trace.h"
static bool swap_available(void)
{
return get_nr_swap_pages() > 0;
}
static bool can_release_pages(struct drm_i915_gem_object *obj)
{
/* Consider only shrinkable ojects. */
if (!i915_gem_object_is_shrinkable(obj))
return false;
drm/i915: Replace obj->pin_global with obj->frontbuffer obj->pin_global was originally used as a means to keep the shrinker off the active scanout, but we use the vma->pin_count itself for that and the obj->frontbuffer to delay shrinking active framebuffers. The other role that obj->pin_global gained was for spotting display objects inside GEM and working harder to keep those coherent; for which we can again simply inspect obj->frontbuffer directly. Coming up next, we will want to manipulate the pin_global counter outside of the principle locks, so would need to make pin_global atomic. However, since obj->frontbuffer is already managed atomically, it makes sense to use that the primary key for display objects instead of having pin_global. Ville pointed out the principle difference is that obj->frontbuffer is set for as long as an intel_framebuffer is attached to an object, but obj->pin_global was only raised for as long as the object was active. In practice, this means that we consider the object as being on the scanout for longer than is strictly required, causing us to be more proactive in flushing -- though it should be true that we would have flushed eventually when the back became the front, except that on the flip path that flush is async but when hit from another ioctl it will be synchronous. v2: i915_gem_object_is_framebuffer() Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190902040303.14195-5-chris@chris-wilson.co.uk
2019-09-02 05:02:47 +01:00
/*
* We can only return physical pages to the system if we can either
* discard the contents (because the user has marked them as being
* purgeable) or if we can move their contents out to swap.
*/
return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
}
static int drop_pages(struct drm_i915_gem_object *obj,
unsigned long shrink, bool trylock_vm)
{
unsigned long flags;
flags = 0;
if (shrink & I915_SHRINK_ACTIVE)
drm/i915: Use trylock in shrinker for ggtt on bsw vt-d and bxt, v2. The stop_machine() lock may allocate memory, but is called inside vm->mutex, which is taken in the shrinker. This will cause a lockdep splat, as can be seen below: <4>[ 462.585762] ====================================================== <4>[ 462.585768] WARNING: possible circular locking dependency detected <4>[ 462.585773] 5.12.0-rc5-CI-Trybot_7644+ #1 Tainted: G U <4>[ 462.585779] ------------------------------------------------------ <4>[ 462.585783] i915_selftest/5540 is trying to acquire lock: <4>[ 462.585788] ffffffff826440b0 (cpu_hotplug_lock){++++}-{0:0}, at: stop_machine+0x12/0x30 <4>[ 462.585814] but task is already holding lock: <4>[ 462.585818] ffff888125369c70 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x38e/0xb40 [i915] <4>[ 462.586301] which lock already depends on the new lock. <4>[ 462.586305] the existing dependency chain (in reverse order) is: <4>[ 462.586309] -> #2 (&vm->mutex/1){+.+.}-{3:3}: <4>[ 462.586323] i915_gem_shrinker_taints_mutex+0x2d/0x50 [i915] <4>[ 462.586719] i915_address_space_init+0x12d/0x130 [i915] <4>[ 462.587092] ppgtt_init+0x4e/0x80 [i915] <4>[ 462.587467] gen8_ppgtt_create+0x3e/0x5c0 [i915] <4>[ 462.587828] i915_ppgtt_create+0x28/0xf0 [i915] <4>[ 462.588203] intel_gt_init+0x123/0x370 [i915] <4>[ 462.588572] i915_gem_init+0x129/0x1f0 [i915] <4>[ 462.588971] i915_driver_probe+0x753/0xd80 [i915] <4>[ 462.589320] i915_pci_probe+0x43/0x1d0 [i915] <4>[ 462.589671] pci_device_probe+0x9e/0x110 <4>[ 462.589680] really_probe+0xea/0x410 <4>[ 462.589690] driver_probe_device+0xd9/0x140 <4>[ 462.589697] device_driver_attach+0x4a/0x50 <4>[ 462.589704] __driver_attach+0x83/0x140 <4>[ 462.589711] bus_for_each_dev+0x75/0xc0 <4>[ 462.589718] bus_add_driver+0x14b/0x1f0 <4>[ 462.589724] driver_register+0x66/0xb0 <4>[ 462.589731] i915_init+0x70/0x87 [i915] <4>[ 462.590053] do_one_initcall+0x56/0x2e0 <4>[ 462.590061] do_init_module+0x55/0x200 <4>[ 462.590068] load_module+0x2703/0x2990 <4>[ 462.590074] __do_sys_finit_module+0xad/0x110 <4>[ 462.590080] do_syscall_64+0x33/0x80 <4>[ 462.590089] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 462.590096] -> #1 (fs_reclaim){+.+.}-{0:0}: <4>[ 462.590109] fs_reclaim_acquire+0x9f/0xd0 <4>[ 462.590118] kmem_cache_alloc_trace+0x3d/0x430 <4>[ 462.590126] intel_cpuc_prepare+0x3b/0x1b0 <4>[ 462.590133] cpuhp_invoke_callback+0x9e/0x890 <4>[ 462.590141] _cpu_up+0xa4/0x130 <4>[ 462.590147] cpu_up+0x82/0x90 <4>[ 462.590153] bringup_nonboot_cpus+0x4a/0x60 <4>[ 462.590159] smp_init+0x21/0x5c <4>[ 462.590167] kernel_init_freeable+0x8a/0x1b7 <4>[ 462.590175] kernel_init+0x5/0xff <4>[ 462.590181] ret_from_fork+0x22/0x30 <4>[ 462.590187] -> #0 (cpu_hotplug_lock){++++}-{0:0}: <4>[ 462.590199] __lock_acquire+0x1520/0x2590 <4>[ 462.590207] lock_acquire+0xd1/0x3d0 <4>[ 462.590213] cpus_read_lock+0x39/0xc0 <4>[ 462.590219] stop_machine+0x12/0x30 <4>[ 462.590226] bxt_vtd_ggtt_insert_entries__BKL+0x36/0x50 [i915] <4>[ 462.590601] ggtt_bind_vma+0x5d/0x80 [i915] <4>[ 462.590970] i915_vma_bind+0xdc/0x1c0 [i915] <4>[ 462.591374] i915_vma_pin_ww+0x435/0xb40 [i915] <4>[ 462.591779] make_obj_busy+0xcb/0x330 [i915] <4>[ 462.592170] igt_mmap_offset_exhaustion+0x45f/0x4c0 [i915] <4>[ 462.592562] __i915_subtests.cold.7+0x42/0x92 [i915] <4>[ 462.592995] __run_selftests.part.3+0x10d/0x172 [i915] <4>[ 462.593428] i915_live_selftests.cold.5+0x1f/0x47 [i915] <4>[ 462.593860] i915_pci_probe+0x93/0x1d0 [i915] <4>[ 462.594210] pci_device_probe+0x9e/0x110 <4>[ 462.594217] really_probe+0xea/0x410 <4>[ 462.594226] driver_probe_device+0xd9/0x140 <4>[ 462.594233] device_driver_attach+0x4a/0x50 <4>[ 462.594240] __driver_attach+0x83/0x140 <4>[ 462.594247] bus_for_each_dev+0x75/0xc0 <4>[ 462.594254] bus_add_driver+0x14b/0x1f0 <4>[ 462.594260] driver_register+0x66/0xb0 <4>[ 462.594267] i915_init+0x70/0x87 [i915] <4>[ 462.594586] do_one_initcall+0x56/0x2e0 <4>[ 462.594592] do_init_module+0x55/0x200 <4>[ 462.594599] load_module+0x2703/0x2990 <4>[ 462.594605] __do_sys_finit_module+0xad/0x110 <4>[ 462.594612] do_syscall_64+0x33/0x80 <4>[ 462.594618] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 462.594625] other info that might help us debug this: <4>[ 462.594629] Chain exists of: cpu_hotplug_lock --> fs_reclaim --> &vm->mutex/1 <4>[ 462.594645] Possible unsafe locking scenario: <4>[ 462.594648] CPU0 CPU1 <4>[ 462.594652] ---- ---- <4>[ 462.594655] lock(&vm->mutex/1); <4>[ 462.594664] lock(fs_reclaim); <4>[ 462.594671] lock(&vm->mutex/1); <4>[ 462.594679] lock(cpu_hotplug_lock); <4>[ 462.594686] *** DEADLOCK *** <4>[ 462.594690] 4 locks held by i915_selftest/5540: <4>[ 462.594696] #0: ffff888100fbc240 (&dev->mutex){....}-{3:3}, at: device_driver_attach+0x18/0x50 <4>[ 462.594715] #1: ffffc900006cb9a0 (reservation_ww_class_acquire){+.+.}-{0:0}, at: make_obj_busy+0x81/0x330 [i915] <4>[ 462.595118] #2: ffff88812a6081e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: make_obj_busy+0x21f/0x330 [i915] <4>[ 462.595519] #3: ffff888125369c70 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x38e/0xb40 [i915] <4>[ 462.595934] stack backtrace: <4>[ 462.595939] CPU: 0 PID: 5540 Comm: i915_selftest Tainted: G U 5.12.0-rc5-CI-Trybot_7644+ #1 <4>[ 462.595947] Hardware name: GOOGLE Kefka/Kefka, BIOS MrChromebox 02/04/2018 <4>[ 462.595952] Call Trace: <4>[ 462.595961] dump_stack+0x7f/0xad <4>[ 462.595974] check_noncircular+0x12e/0x150 <4>[ 462.595982] ? save_stack.isra.17+0x3f/0x70 <4>[ 462.595991] ? drm_mm_insert_node_in_range+0x34a/0x5b0 <4>[ 462.596000] ? i915_vma_pin_ww+0x9ec/0xb40 [i915] <4>[ 462.596410] __lock_acquire+0x1520/0x2590 <4>[ 462.596419] ? do_init_module+0x55/0x200 <4>[ 462.596429] lock_acquire+0xd1/0x3d0 <4>[ 462.596435] ? stop_machine+0x12/0x30 <4>[ 462.596445] ? gen8_ggtt_insert_entries+0xf0/0xf0 [i915] <4>[ 462.596816] cpus_read_lock+0x39/0xc0 <4>[ 462.596824] ? stop_machine+0x12/0x30 <4>[ 462.596831] stop_machine+0x12/0x30 <4>[ 462.596839] bxt_vtd_ggtt_insert_entries__BKL+0x36/0x50 [i915] <4>[ 462.597210] ggtt_bind_vma+0x5d/0x80 [i915] <4>[ 462.597580] i915_vma_bind+0xdc/0x1c0 [i915] <4>[ 462.597986] i915_vma_pin_ww+0x435/0xb40 [i915] <4>[ 462.598395] ? make_obj_busy+0xcb/0x330 [i915] <4>[ 462.598786] make_obj_busy+0xcb/0x330 [i915] <4>[ 462.599180] ? 0xffffffff81000000 <4>[ 462.599187] ? debug_mutex_unlock+0x50/0xa0 <4>[ 462.599198] igt_mmap_offset_exhaustion+0x45f/0x4c0 [i915] <4>[ 462.599592] __i915_subtests.cold.7+0x42/0x92 [i915] <4>[ 462.600026] ? i915_perf_selftests+0x20/0x20 [i915] <4>[ 462.600422] ? __i915_nop_setup+0x10/0x10 [i915] <4>[ 462.600820] __run_selftests.part.3+0x10d/0x172 [i915] <4>[ 462.601253] i915_live_selftests.cold.5+0x1f/0x47 [i915] <4>[ 462.601686] i915_pci_probe+0x93/0x1d0 [i915] <4>[ 462.602037] ? _raw_spin_unlock_irqrestore+0x3d/0x60 <4>[ 462.602047] pci_device_probe+0x9e/0x110 <4>[ 462.602057] really_probe+0xea/0x410 <4>[ 462.602067] driver_probe_device+0xd9/0x140 <4>[ 462.602075] device_driver_attach+0x4a/0x50 <4>[ 462.602084] __driver_attach+0x83/0x140 <4>[ 462.602091] ? device_driver_attach+0x50/0x50 <4>[ 462.602099] ? device_driver_attach+0x50/0x50 <4>[ 462.602107] bus_for_each_dev+0x75/0xc0 <4>[ 462.602116] bus_add_driver+0x14b/0x1f0 <4>[ 462.602124] driver_register+0x66/0xb0 <4>[ 462.602133] i915_init+0x70/0x87 [i915] <4>[ 462.602453] ? 0xffffffffa0606000 <4>[ 462.602458] do_one_initcall+0x56/0x2e0 <4>[ 462.602466] ? kmem_cache_alloc_trace+0x374/0x430 <4>[ 462.602476] do_init_module+0x55/0x200 <4>[ 462.602484] load_module+0x2703/0x2990 <4>[ 462.602500] ? __do_sys_finit_module+0xad/0x110 <4>[ 462.602507] __do_sys_finit_module+0xad/0x110 <4>[ 462.602519] do_syscall_64+0x33/0x80 <4>[ 462.602527] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 462.602535] RIP: 0033:0x7fab69d8d89d Changes since v1: - Add lockdep annotations during init, to ensure that lockdep is primed. This also fixes a false positive when reading /proc/lockdep_stats during module reload. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210426102351.921874-1-maarten.lankhorst@linux.intel.com Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2021-04-26 12:23:51 +02:00
flags |= I915_GEM_OBJECT_UNBIND_ACTIVE;
if (!(shrink & I915_SHRINK_BOUND))
drm/i915: Use trylock in shrinker for ggtt on bsw vt-d and bxt, v2. The stop_machine() lock may allocate memory, but is called inside vm->mutex, which is taken in the shrinker. This will cause a lockdep splat, as can be seen below: <4>[ 462.585762] ====================================================== <4>[ 462.585768] WARNING: possible circular locking dependency detected <4>[ 462.585773] 5.12.0-rc5-CI-Trybot_7644+ #1 Tainted: G U <4>[ 462.585779] ------------------------------------------------------ <4>[ 462.585783] i915_selftest/5540 is trying to acquire lock: <4>[ 462.585788] ffffffff826440b0 (cpu_hotplug_lock){++++}-{0:0}, at: stop_machine+0x12/0x30 <4>[ 462.585814] but task is already holding lock: <4>[ 462.585818] ffff888125369c70 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x38e/0xb40 [i915] <4>[ 462.586301] which lock already depends on the new lock. <4>[ 462.586305] the existing dependency chain (in reverse order) is: <4>[ 462.586309] -> #2 (&vm->mutex/1){+.+.}-{3:3}: <4>[ 462.586323] i915_gem_shrinker_taints_mutex+0x2d/0x50 [i915] <4>[ 462.586719] i915_address_space_init+0x12d/0x130 [i915] <4>[ 462.587092] ppgtt_init+0x4e/0x80 [i915] <4>[ 462.587467] gen8_ppgtt_create+0x3e/0x5c0 [i915] <4>[ 462.587828] i915_ppgtt_create+0x28/0xf0 [i915] <4>[ 462.588203] intel_gt_init+0x123/0x370 [i915] <4>[ 462.588572] i915_gem_init+0x129/0x1f0 [i915] <4>[ 462.588971] i915_driver_probe+0x753/0xd80 [i915] <4>[ 462.589320] i915_pci_probe+0x43/0x1d0 [i915] <4>[ 462.589671] pci_device_probe+0x9e/0x110 <4>[ 462.589680] really_probe+0xea/0x410 <4>[ 462.589690] driver_probe_device+0xd9/0x140 <4>[ 462.589697] device_driver_attach+0x4a/0x50 <4>[ 462.589704] __driver_attach+0x83/0x140 <4>[ 462.589711] bus_for_each_dev+0x75/0xc0 <4>[ 462.589718] bus_add_driver+0x14b/0x1f0 <4>[ 462.589724] driver_register+0x66/0xb0 <4>[ 462.589731] i915_init+0x70/0x87 [i915] <4>[ 462.590053] do_one_initcall+0x56/0x2e0 <4>[ 462.590061] do_init_module+0x55/0x200 <4>[ 462.590068] load_module+0x2703/0x2990 <4>[ 462.590074] __do_sys_finit_module+0xad/0x110 <4>[ 462.590080] do_syscall_64+0x33/0x80 <4>[ 462.590089] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 462.590096] -> #1 (fs_reclaim){+.+.}-{0:0}: <4>[ 462.590109] fs_reclaim_acquire+0x9f/0xd0 <4>[ 462.590118] kmem_cache_alloc_trace+0x3d/0x430 <4>[ 462.590126] intel_cpuc_prepare+0x3b/0x1b0 <4>[ 462.590133] cpuhp_invoke_callback+0x9e/0x890 <4>[ 462.590141] _cpu_up+0xa4/0x130 <4>[ 462.590147] cpu_up+0x82/0x90 <4>[ 462.590153] bringup_nonboot_cpus+0x4a/0x60 <4>[ 462.590159] smp_init+0x21/0x5c <4>[ 462.590167] kernel_init_freeable+0x8a/0x1b7 <4>[ 462.590175] kernel_init+0x5/0xff <4>[ 462.590181] ret_from_fork+0x22/0x30 <4>[ 462.590187] -> #0 (cpu_hotplug_lock){++++}-{0:0}: <4>[ 462.590199] __lock_acquire+0x1520/0x2590 <4>[ 462.590207] lock_acquire+0xd1/0x3d0 <4>[ 462.590213] cpus_read_lock+0x39/0xc0 <4>[ 462.590219] stop_machine+0x12/0x30 <4>[ 462.590226] bxt_vtd_ggtt_insert_entries__BKL+0x36/0x50 [i915] <4>[ 462.590601] ggtt_bind_vma+0x5d/0x80 [i915] <4>[ 462.590970] i915_vma_bind+0xdc/0x1c0 [i915] <4>[ 462.591374] i915_vma_pin_ww+0x435/0xb40 [i915] <4>[ 462.591779] make_obj_busy+0xcb/0x330 [i915] <4>[ 462.592170] igt_mmap_offset_exhaustion+0x45f/0x4c0 [i915] <4>[ 462.592562] __i915_subtests.cold.7+0x42/0x92 [i915] <4>[ 462.592995] __run_selftests.part.3+0x10d/0x172 [i915] <4>[ 462.593428] i915_live_selftests.cold.5+0x1f/0x47 [i915] <4>[ 462.593860] i915_pci_probe+0x93/0x1d0 [i915] <4>[ 462.594210] pci_device_probe+0x9e/0x110 <4>[ 462.594217] really_probe+0xea/0x410 <4>[ 462.594226] driver_probe_device+0xd9/0x140 <4>[ 462.594233] device_driver_attach+0x4a/0x50 <4>[ 462.594240] __driver_attach+0x83/0x140 <4>[ 462.594247] bus_for_each_dev+0x75/0xc0 <4>[ 462.594254] bus_add_driver+0x14b/0x1f0 <4>[ 462.594260] driver_register+0x66/0xb0 <4>[ 462.594267] i915_init+0x70/0x87 [i915] <4>[ 462.594586] do_one_initcall+0x56/0x2e0 <4>[ 462.594592] do_init_module+0x55/0x200 <4>[ 462.594599] load_module+0x2703/0x2990 <4>[ 462.594605] __do_sys_finit_module+0xad/0x110 <4>[ 462.594612] do_syscall_64+0x33/0x80 <4>[ 462.594618] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 462.594625] other info that might help us debug this: <4>[ 462.594629] Chain exists of: cpu_hotplug_lock --> fs_reclaim --> &vm->mutex/1 <4>[ 462.594645] Possible unsafe locking scenario: <4>[ 462.594648] CPU0 CPU1 <4>[ 462.594652] ---- ---- <4>[ 462.594655] lock(&vm->mutex/1); <4>[ 462.594664] lock(fs_reclaim); <4>[ 462.594671] lock(&vm->mutex/1); <4>[ 462.594679] lock(cpu_hotplug_lock); <4>[ 462.594686] *** DEADLOCK *** <4>[ 462.594690] 4 locks held by i915_selftest/5540: <4>[ 462.594696] #0: ffff888100fbc240 (&dev->mutex){....}-{3:3}, at: device_driver_attach+0x18/0x50 <4>[ 462.594715] #1: ffffc900006cb9a0 (reservation_ww_class_acquire){+.+.}-{0:0}, at: make_obj_busy+0x81/0x330 [i915] <4>[ 462.595118] #2: ffff88812a6081e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: make_obj_busy+0x21f/0x330 [i915] <4>[ 462.595519] #3: ffff888125369c70 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x38e/0xb40 [i915] <4>[ 462.595934] stack backtrace: <4>[ 462.595939] CPU: 0 PID: 5540 Comm: i915_selftest Tainted: G U 5.12.0-rc5-CI-Trybot_7644+ #1 <4>[ 462.595947] Hardware name: GOOGLE Kefka/Kefka, BIOS MrChromebox 02/04/2018 <4>[ 462.595952] Call Trace: <4>[ 462.595961] dump_stack+0x7f/0xad <4>[ 462.595974] check_noncircular+0x12e/0x150 <4>[ 462.595982] ? save_stack.isra.17+0x3f/0x70 <4>[ 462.595991] ? drm_mm_insert_node_in_range+0x34a/0x5b0 <4>[ 462.596000] ? i915_vma_pin_ww+0x9ec/0xb40 [i915] <4>[ 462.596410] __lock_acquire+0x1520/0x2590 <4>[ 462.596419] ? do_init_module+0x55/0x200 <4>[ 462.596429] lock_acquire+0xd1/0x3d0 <4>[ 462.596435] ? stop_machine+0x12/0x30 <4>[ 462.596445] ? gen8_ggtt_insert_entries+0xf0/0xf0 [i915] <4>[ 462.596816] cpus_read_lock+0x39/0xc0 <4>[ 462.596824] ? stop_machine+0x12/0x30 <4>[ 462.596831] stop_machine+0x12/0x30 <4>[ 462.596839] bxt_vtd_ggtt_insert_entries__BKL+0x36/0x50 [i915] <4>[ 462.597210] ggtt_bind_vma+0x5d/0x80 [i915] <4>[ 462.597580] i915_vma_bind+0xdc/0x1c0 [i915] <4>[ 462.597986] i915_vma_pin_ww+0x435/0xb40 [i915] <4>[ 462.598395] ? make_obj_busy+0xcb/0x330 [i915] <4>[ 462.598786] make_obj_busy+0xcb/0x330 [i915] <4>[ 462.599180] ? 0xffffffff81000000 <4>[ 462.599187] ? debug_mutex_unlock+0x50/0xa0 <4>[ 462.599198] igt_mmap_offset_exhaustion+0x45f/0x4c0 [i915] <4>[ 462.599592] __i915_subtests.cold.7+0x42/0x92 [i915] <4>[ 462.600026] ? i915_perf_selftests+0x20/0x20 [i915] <4>[ 462.600422] ? __i915_nop_setup+0x10/0x10 [i915] <4>[ 462.600820] __run_selftests.part.3+0x10d/0x172 [i915] <4>[ 462.601253] i915_live_selftests.cold.5+0x1f/0x47 [i915] <4>[ 462.601686] i915_pci_probe+0x93/0x1d0 [i915] <4>[ 462.602037] ? _raw_spin_unlock_irqrestore+0x3d/0x60 <4>[ 462.602047] pci_device_probe+0x9e/0x110 <4>[ 462.602057] really_probe+0xea/0x410 <4>[ 462.602067] driver_probe_device+0xd9/0x140 <4>[ 462.602075] device_driver_attach+0x4a/0x50 <4>[ 462.602084] __driver_attach+0x83/0x140 <4>[ 462.602091] ? device_driver_attach+0x50/0x50 <4>[ 462.602099] ? device_driver_attach+0x50/0x50 <4>[ 462.602107] bus_for_each_dev+0x75/0xc0 <4>[ 462.602116] bus_add_driver+0x14b/0x1f0 <4>[ 462.602124] driver_register+0x66/0xb0 <4>[ 462.602133] i915_init+0x70/0x87 [i915] <4>[ 462.602453] ? 0xffffffffa0606000 <4>[ 462.602458] do_one_initcall+0x56/0x2e0 <4>[ 462.602466] ? kmem_cache_alloc_trace+0x374/0x430 <4>[ 462.602476] do_init_module+0x55/0x200 <4>[ 462.602484] load_module+0x2703/0x2990 <4>[ 462.602500] ? __do_sys_finit_module+0xad/0x110 <4>[ 462.602507] __do_sys_finit_module+0xad/0x110 <4>[ 462.602519] do_syscall_64+0x33/0x80 <4>[ 462.602527] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 462.602535] RIP: 0033:0x7fab69d8d89d Changes since v1: - Add lockdep annotations during init, to ensure that lockdep is primed. This also fixes a false positive when reading /proc/lockdep_stats during module reload. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210426102351.921874-1-maarten.lankhorst@linux.intel.com Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2021-04-26 12:23:51 +02:00
flags |= I915_GEM_OBJECT_UNBIND_TEST;
if (trylock_vm)
flags |= I915_GEM_OBJECT_UNBIND_VM_TRYLOCK;
if (i915_gem_object_unbind(obj, flags) == 0)
return true;
return false;
}
drm/i915/ttm: add tt shmem backend For cached objects we can allocate our pages directly in shmem. This should make it possible(in a later patch) to utilise the existing i915-gem shrinker code for such objects. For now this is still disabled. v2(Thomas): - Add optional try_to_writeback hook for objects. Importantly we need to check if the object is even still shrinkable; in between us dropping the shrinker LRU lock and acquiring the object lock it could for example have been moved. Also we need to differentiate between "lazy" shrinking and the immediate writeback mode. Also later we need to handle objects which don't even have mm.pages, so bundling this into put_pages() would require somehow handling that edge case, hence just letting the ttm backend handle everything in try_to_writeback doesn't seem too bad. v3(Thomas): - Likely a bad idea to touch the object from the unpopulate hook, since it's not possible to hold a reference, without also creating circular dependency, so likely this is too fragile. For now just ensure we at least mark the pages as dirty/accessed when called from the shrinker on WILLNEED objects. - s/try_to_writeback/shrinker_release_pages, since this can do more than just writeback. - Get rid of do_backup boolean and just set the SWAPPED flag prior to calling unpopulate. - Keep shmem_tt as lowest priority for the TTM LRU bo_swapout walk, since these just get skipped anyway. We can try to come up with something better later. v4(Thomas): - s/PCI_DMA/DMA/. Also drop NO_KERNEL_MAPPING and NO_WARN, which apparently doesn't do anything with streaming mappings. - Just pass along the error for ->truncate, and assume nothing. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Christian König <christian.koenig@amd.com> Cc: Oak Zeng <oak.zeng@intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Acked-by: Oak Zeng <oak.zeng@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211018091055.1998191-2-matthew.auld@intel.com
2021-10-18 10:10:49 +01:00
static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
drm/i915: Start writeback from the shrinker When we are called to relieve mempressue via the shrinker, the only way we can make progress is either by discarding unwanted pages (those objects that userspace has marked MADV_DONTNEED) or by reclaiming the dirty objects via swap. As we know that is the only way to make further progress, we can initiate the writeback as we invalidate the objects. This means the objects we put onto the inactive anon lru list are already marked for reclaim+writeback and so will trigger a wait upon the writeback inside direct reclaim, greatly improving the success rate of direct reclaim on i915 objects. The corollary is that we may start a slow swap on opportunistic mempressure from the likes of the compaction + migration kthreads. This is limited by those threads only being allowed to shrink idle pages, but also that if we reactivate the page before it is swapped out by gpu activity, we only page the cost of repinning the page. The cost is most felt when an object is reused after mempressure, which hopefully excludes the latency sensitive tasks (as we are just extending the impact of swap thrashing to them). Apparently this is not the first time we've had this idea. Back in commit 5537252b6b6d ("drm/i915: Invalidate our pages under memory pressure") we wanted to start writeback but settled on invalidate after Hugh Dickins warned us about a possibility of a deadlock within shmemfs if we started writeback from shrink_slab. Looking at the callchain, using writeback from i915_gem_shrink should be equivalent to the pageout also employed by shrink_slab, i.e. it should not be any riskier afaict. v2: Leave mmapings intact. At this point, the only mmapings of our objects will be via CPU mmaps on the shmemfs filp, which are out-of-scope for our LRU tracking. Instead leave those pages to the inactive anon LRU page list for aging and pageout as normal. v3: Be selective on which paths trigger writeback, in particular excluding paths shrinking just to reclaim vm space (e.g. mmap, vmap reapers) and avoid starting writeback on the entire process space from within the pm freezer. References: https://bugs.freedesktop.org/show_bug.cgi?id=108686 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Michal Hocko <mhocko@suse.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20190420115539.29081-1-chris@chris-wilson.co.uk
2019-04-20 12:55:39 +01:00
{
if (obj->ops->shrink) {
unsigned int shrink_flags = 0;
if (!(flags & I915_SHRINK_ACTIVE))
shrink_flags |= I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT;
if (flags & I915_SHRINK_WRITEBACK)
shrink_flags |= I915_GEM_OBJECT_SHRINK_WRITEBACK;
return obj->ops->shrink(obj, shrink_flags);
}
drm/i915/ttm: add tt shmem backend For cached objects we can allocate our pages directly in shmem. This should make it possible(in a later patch) to utilise the existing i915-gem shrinker code for such objects. For now this is still disabled. v2(Thomas): - Add optional try_to_writeback hook for objects. Importantly we need to check if the object is even still shrinkable; in between us dropping the shrinker LRU lock and acquiring the object lock it could for example have been moved. Also we need to differentiate between "lazy" shrinking and the immediate writeback mode. Also later we need to handle objects which don't even have mm.pages, so bundling this into put_pages() would require somehow handling that edge case, hence just letting the ttm backend handle everything in try_to_writeback doesn't seem too bad. v3(Thomas): - Likely a bad idea to touch the object from the unpopulate hook, since it's not possible to hold a reference, without also creating circular dependency, so likely this is too fragile. For now just ensure we at least mark the pages as dirty/accessed when called from the shrinker on WILLNEED objects. - s/try_to_writeback/shrinker_release_pages, since this can do more than just writeback. - Get rid of do_backup boolean and just set the SWAPPED flag prior to calling unpopulate. - Keep shmem_tt as lowest priority for the TTM LRU bo_swapout walk, since these just get skipped anyway. We can try to come up with something better later. v4(Thomas): - s/PCI_DMA/DMA/. Also drop NO_KERNEL_MAPPING and NO_WARN, which apparently doesn't do anything with streaming mappings. - Just pass along the error for ->truncate, and assume nothing. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Christian König <christian.koenig@amd.com> Cc: Oak Zeng <oak.zeng@intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Acked-by: Oak Zeng <oak.zeng@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211018091055.1998191-2-matthew.auld@intel.com
2021-10-18 10:10:49 +01:00
return 0;
drm/i915: Start writeback from the shrinker When we are called to relieve mempressue via the shrinker, the only way we can make progress is either by discarding unwanted pages (those objects that userspace has marked MADV_DONTNEED) or by reclaiming the dirty objects via swap. As we know that is the only way to make further progress, we can initiate the writeback as we invalidate the objects. This means the objects we put onto the inactive anon lru list are already marked for reclaim+writeback and so will trigger a wait upon the writeback inside direct reclaim, greatly improving the success rate of direct reclaim on i915 objects. The corollary is that we may start a slow swap on opportunistic mempressure from the likes of the compaction + migration kthreads. This is limited by those threads only being allowed to shrink idle pages, but also that if we reactivate the page before it is swapped out by gpu activity, we only page the cost of repinning the page. The cost is most felt when an object is reused after mempressure, which hopefully excludes the latency sensitive tasks (as we are just extending the impact of swap thrashing to them). Apparently this is not the first time we've had this idea. Back in commit 5537252b6b6d ("drm/i915: Invalidate our pages under memory pressure") we wanted to start writeback but settled on invalidate after Hugh Dickins warned us about a possibility of a deadlock within shmemfs if we started writeback from shrink_slab. Looking at the callchain, using writeback from i915_gem_shrink should be equivalent to the pageout also employed by shrink_slab, i.e. it should not be any riskier afaict. v2: Leave mmapings intact. At this point, the only mmapings of our objects will be via CPU mmaps on the shmemfs filp, which are out-of-scope for our LRU tracking. Instead leave those pages to the inactive anon LRU page list for aging and pageout as normal. v3: Be selective on which paths trigger writeback, in particular excluding paths shrinking just to reclaim vm space (e.g. mmap, vmap reapers) and avoid starting writeback on the entire process space from within the pm freezer. References: https://bugs.freedesktop.org/show_bug.cgi?id=108686 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Michal Hocko <mhocko@suse.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20190420115539.29081-1-chris@chris-wilson.co.uk
2019-04-20 12:55:39 +01:00
}
/**
* i915_gem_shrink - Shrink buffer object caches
* @ww: i915 gem ww acquire ctx, or NULL
* @i915: i915 device
* @target: amount of memory to make available, in pages
* @nr_scanned: optional output for number of pages scanned (incremental)
* @shrink: control flags for selecting cache types
*
* This function is the main interface to the shrinker. It will try to release
* up to @target pages of main memory backing storage from buffer objects.
* Selection of the specific caches can be done with @flags. This is e.g. useful
* when purgeable objects should be removed from caches preferentially.
*
* Note that it's not guaranteed that released amount is actually available as
* free system memory - the pages might still be in-used to due to other reasons
* (like cpu mmaps) or the mm core has reused them before we could grab them.
* Therefore code that needs to explicitly shrink buffer objects caches (e.g. to
* avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all().
*
* Also note that any kind of pinning (both per-vma address space pins and
* backing storage pins at the buffer object level) result in the shrinker code
* having to skip the object.
*
* Returns:
* The number of pages of backing storage actually released.
*/
unsigned long
i915_gem_shrink(struct i915_gem_ww_ctx *ww,
struct drm_i915_private *i915,
unsigned long target,
unsigned long *nr_scanned,
unsigned int shrink)
{
const struct {
struct list_head *list;
unsigned int bit;
} phases[] = {
{ &i915->mm.purge_list, ~0u },
{
&i915->mm.shrink_list,
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND
},
{ NULL, 0 },
}, *phase;
intel_wakeref_t wakeref = 0;
unsigned long count = 0;
unsigned long scanned = 0;
int err = 0;
drm/i915: Use trylock in shrinker for ggtt on bsw vt-d and bxt, v2. The stop_machine() lock may allocate memory, but is called inside vm->mutex, which is taken in the shrinker. This will cause a lockdep splat, as can be seen below: <4>[ 462.585762] ====================================================== <4>[ 462.585768] WARNING: possible circular locking dependency detected <4>[ 462.585773] 5.12.0-rc5-CI-Trybot_7644+ #1 Tainted: G U <4>[ 462.585779] ------------------------------------------------------ <4>[ 462.585783] i915_selftest/5540 is trying to acquire lock: <4>[ 462.585788] ffffffff826440b0 (cpu_hotplug_lock){++++}-{0:0}, at: stop_machine+0x12/0x30 <4>[ 462.585814] but task is already holding lock: <4>[ 462.585818] ffff888125369c70 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x38e/0xb40 [i915] <4>[ 462.586301] which lock already depends on the new lock. <4>[ 462.586305] the existing dependency chain (in reverse order) is: <4>[ 462.586309] -> #2 (&vm->mutex/1){+.+.}-{3:3}: <4>[ 462.586323] i915_gem_shrinker_taints_mutex+0x2d/0x50 [i915] <4>[ 462.586719] i915_address_space_init+0x12d/0x130 [i915] <4>[ 462.587092] ppgtt_init+0x4e/0x80 [i915] <4>[ 462.587467] gen8_ppgtt_create+0x3e/0x5c0 [i915] <4>[ 462.587828] i915_ppgtt_create+0x28/0xf0 [i915] <4>[ 462.588203] intel_gt_init+0x123/0x370 [i915] <4>[ 462.588572] i915_gem_init+0x129/0x1f0 [i915] <4>[ 462.588971] i915_driver_probe+0x753/0xd80 [i915] <4>[ 462.589320] i915_pci_probe+0x43/0x1d0 [i915] <4>[ 462.589671] pci_device_probe+0x9e/0x110 <4>[ 462.589680] really_probe+0xea/0x410 <4>[ 462.589690] driver_probe_device+0xd9/0x140 <4>[ 462.589697] device_driver_attach+0x4a/0x50 <4>[ 462.589704] __driver_attach+0x83/0x140 <4>[ 462.589711] bus_for_each_dev+0x75/0xc0 <4>[ 462.589718] bus_add_driver+0x14b/0x1f0 <4>[ 462.589724] driver_register+0x66/0xb0 <4>[ 462.589731] i915_init+0x70/0x87 [i915] <4>[ 462.590053] do_one_initcall+0x56/0x2e0 <4>[ 462.590061] do_init_module+0x55/0x200 <4>[ 462.590068] load_module+0x2703/0x2990 <4>[ 462.590074] __do_sys_finit_module+0xad/0x110 <4>[ 462.590080] do_syscall_64+0x33/0x80 <4>[ 462.590089] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 462.590096] -> #1 (fs_reclaim){+.+.}-{0:0}: <4>[ 462.590109] fs_reclaim_acquire+0x9f/0xd0 <4>[ 462.590118] kmem_cache_alloc_trace+0x3d/0x430 <4>[ 462.590126] intel_cpuc_prepare+0x3b/0x1b0 <4>[ 462.590133] cpuhp_invoke_callback+0x9e/0x890 <4>[ 462.590141] _cpu_up+0xa4/0x130 <4>[ 462.590147] cpu_up+0x82/0x90 <4>[ 462.590153] bringup_nonboot_cpus+0x4a/0x60 <4>[ 462.590159] smp_init+0x21/0x5c <4>[ 462.590167] kernel_init_freeable+0x8a/0x1b7 <4>[ 462.590175] kernel_init+0x5/0xff <4>[ 462.590181] ret_from_fork+0x22/0x30 <4>[ 462.590187] -> #0 (cpu_hotplug_lock){++++}-{0:0}: <4>[ 462.590199] __lock_acquire+0x1520/0x2590 <4>[ 462.590207] lock_acquire+0xd1/0x3d0 <4>[ 462.590213] cpus_read_lock+0x39/0xc0 <4>[ 462.590219] stop_machine+0x12/0x30 <4>[ 462.590226] bxt_vtd_ggtt_insert_entries__BKL+0x36/0x50 [i915] <4>[ 462.590601] ggtt_bind_vma+0x5d/0x80 [i915] <4>[ 462.590970] i915_vma_bind+0xdc/0x1c0 [i915] <4>[ 462.591374] i915_vma_pin_ww+0x435/0xb40 [i915] <4>[ 462.591779] make_obj_busy+0xcb/0x330 [i915] <4>[ 462.592170] igt_mmap_offset_exhaustion+0x45f/0x4c0 [i915] <4>[ 462.592562] __i915_subtests.cold.7+0x42/0x92 [i915] <4>[ 462.592995] __run_selftests.part.3+0x10d/0x172 [i915] <4>[ 462.593428] i915_live_selftests.cold.5+0x1f/0x47 [i915] <4>[ 462.593860] i915_pci_probe+0x93/0x1d0 [i915] <4>[ 462.594210] pci_device_probe+0x9e/0x110 <4>[ 462.594217] really_probe+0xea/0x410 <4>[ 462.594226] driver_probe_device+0xd9/0x140 <4>[ 462.594233] device_driver_attach+0x4a/0x50 <4>[ 462.594240] __driver_attach+0x83/0x140 <4>[ 462.594247] bus_for_each_dev+0x75/0xc0 <4>[ 462.594254] bus_add_driver+0x14b/0x1f0 <4>[ 462.594260] driver_register+0x66/0xb0 <4>[ 462.594267] i915_init+0x70/0x87 [i915] <4>[ 462.594586] do_one_initcall+0x56/0x2e0 <4>[ 462.594592] do_init_module+0x55/0x200 <4>[ 462.594599] load_module+0x2703/0x2990 <4>[ 462.594605] __do_sys_finit_module+0xad/0x110 <4>[ 462.594612] do_syscall_64+0x33/0x80 <4>[ 462.594618] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 462.594625] other info that might help us debug this: <4>[ 462.594629] Chain exists of: cpu_hotplug_lock --> fs_reclaim --> &vm->mutex/1 <4>[ 462.594645] Possible unsafe locking scenario: <4>[ 462.594648] CPU0 CPU1 <4>[ 462.594652] ---- ---- <4>[ 462.594655] lock(&vm->mutex/1); <4>[ 462.594664] lock(fs_reclaim); <4>[ 462.594671] lock(&vm->mutex/1); <4>[ 462.594679] lock(cpu_hotplug_lock); <4>[ 462.594686] *** DEADLOCK *** <4>[ 462.594690] 4 locks held by i915_selftest/5540: <4>[ 462.594696] #0: ffff888100fbc240 (&dev->mutex){....}-{3:3}, at: device_driver_attach+0x18/0x50 <4>[ 462.594715] #1: ffffc900006cb9a0 (reservation_ww_class_acquire){+.+.}-{0:0}, at: make_obj_busy+0x81/0x330 [i915] <4>[ 462.595118] #2: ffff88812a6081e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: make_obj_busy+0x21f/0x330 [i915] <4>[ 462.595519] #3: ffff888125369c70 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x38e/0xb40 [i915] <4>[ 462.595934] stack backtrace: <4>[ 462.595939] CPU: 0 PID: 5540 Comm: i915_selftest Tainted: G U 5.12.0-rc5-CI-Trybot_7644+ #1 <4>[ 462.595947] Hardware name: GOOGLE Kefka/Kefka, BIOS MrChromebox 02/04/2018 <4>[ 462.595952] Call Trace: <4>[ 462.595961] dump_stack+0x7f/0xad <4>[ 462.595974] check_noncircular+0x12e/0x150 <4>[ 462.595982] ? save_stack.isra.17+0x3f/0x70 <4>[ 462.595991] ? drm_mm_insert_node_in_range+0x34a/0x5b0 <4>[ 462.596000] ? i915_vma_pin_ww+0x9ec/0xb40 [i915] <4>[ 462.596410] __lock_acquire+0x1520/0x2590 <4>[ 462.596419] ? do_init_module+0x55/0x200 <4>[ 462.596429] lock_acquire+0xd1/0x3d0 <4>[ 462.596435] ? stop_machine+0x12/0x30 <4>[ 462.596445] ? gen8_ggtt_insert_entries+0xf0/0xf0 [i915] <4>[ 462.596816] cpus_read_lock+0x39/0xc0 <4>[ 462.596824] ? stop_machine+0x12/0x30 <4>[ 462.596831] stop_machine+0x12/0x30 <4>[ 462.596839] bxt_vtd_ggtt_insert_entries__BKL+0x36/0x50 [i915] <4>[ 462.597210] ggtt_bind_vma+0x5d/0x80 [i915] <4>[ 462.597580] i915_vma_bind+0xdc/0x1c0 [i915] <4>[ 462.597986] i915_vma_pin_ww+0x435/0xb40 [i915] <4>[ 462.598395] ? make_obj_busy+0xcb/0x330 [i915] <4>[ 462.598786] make_obj_busy+0xcb/0x330 [i915] <4>[ 462.599180] ? 0xffffffff81000000 <4>[ 462.599187] ? debug_mutex_unlock+0x50/0xa0 <4>[ 462.599198] igt_mmap_offset_exhaustion+0x45f/0x4c0 [i915] <4>[ 462.599592] __i915_subtests.cold.7+0x42/0x92 [i915] <4>[ 462.600026] ? i915_perf_selftests+0x20/0x20 [i915] <4>[ 462.600422] ? __i915_nop_setup+0x10/0x10 [i915] <4>[ 462.600820] __run_selftests.part.3+0x10d/0x172 [i915] <4>[ 462.601253] i915_live_selftests.cold.5+0x1f/0x47 [i915] <4>[ 462.601686] i915_pci_probe+0x93/0x1d0 [i915] <4>[ 462.602037] ? _raw_spin_unlock_irqrestore+0x3d/0x60 <4>[ 462.602047] pci_device_probe+0x9e/0x110 <4>[ 462.602057] really_probe+0xea/0x410 <4>[ 462.602067] driver_probe_device+0xd9/0x140 <4>[ 462.602075] device_driver_attach+0x4a/0x50 <4>[ 462.602084] __driver_attach+0x83/0x140 <4>[ 462.602091] ? device_driver_attach+0x50/0x50 <4>[ 462.602099] ? device_driver_attach+0x50/0x50 <4>[ 462.602107] bus_for_each_dev+0x75/0xc0 <4>[ 462.602116] bus_add_driver+0x14b/0x1f0 <4>[ 462.602124] driver_register+0x66/0xb0 <4>[ 462.602133] i915_init+0x70/0x87 [i915] <4>[ 462.602453] ? 0xffffffffa0606000 <4>[ 462.602458] do_one_initcall+0x56/0x2e0 <4>[ 462.602466] ? kmem_cache_alloc_trace+0x374/0x430 <4>[ 462.602476] do_init_module+0x55/0x200 <4>[ 462.602484] load_module+0x2703/0x2990 <4>[ 462.602500] ? __do_sys_finit_module+0xad/0x110 <4>[ 462.602507] __do_sys_finit_module+0xad/0x110 <4>[ 462.602519] do_syscall_64+0x33/0x80 <4>[ 462.602527] entry_SYSCALL_64_after_hwframe+0x44/0xae <4>[ 462.602535] RIP: 0033:0x7fab69d8d89d Changes since v1: - Add lockdep annotations during init, to ensure that lockdep is primed. This also fixes a false positive when reading /proc/lockdep_stats during module reload. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210426102351.921874-1-maarten.lankhorst@linux.intel.com Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
2021-04-26 12:23:51 +02:00
/* CHV + VTD workaround use stop_machine(); need to trylock vm->mutex */
bool trylock_vm = !ww && intel_vm_no_concurrent_access_wa(i915);
trace_i915_gem_shrink(i915, target, shrink);
/*
* Unbinding of objects will require HW access; Let us not wake the
* device just to recover a little memory. If absolutely necessary,
* we will force the wake during oom-notifier.
*/
if (shrink & I915_SHRINK_BOUND) {
wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm);
if (!wakeref)
shrink &= ~I915_SHRINK_BOUND;
}
/*
* When shrinking the active list, we should also consider active
* contexts. Active contexts are pinned until they are retired, and
* so can not be simply unbound to retire and unpin their pages. To
* shrink the contexts, we must wait until the gpu is idle and
* completed its switch to the kernel context. In short, we do
* not have a good mechanism for idling a specific context, but
* what we can do is give them a kick so that we do not keep idle
* contexts around longer than is necessary.
*/
if (shrink & I915_SHRINK_ACTIVE)
/* Retire requests to unpin all idle contexts */
intel_gt_retire_requests(to_gt(i915));
/*
* As we may completely rewrite the (un)bound list whilst unbinding
* (due to retiring requests) we have to strictly process only
* one element of the list at the time, and recheck the list
* on every iteration.
*
* In particular, we must hold a reference whilst removing the
* object as we may end up waiting for and/or retiring the objects.
* This might release the final reference (held by the active list)
* and result in the object being freed from under us. This is
* similar to the precautions the eviction code must take whilst
* removing objects.
*
* Also note that although these lists do not hold a reference to
* the object we can safely grab one here: The final object
* unreferencing and the bound_list are both protected by the
* dev->struct_mutex and so we won't ever be able to observe an
* object on the bound_list with a reference count equals 0.
*/
for (phase = phases; phase->list; phase++) {
struct list_head still_in_list;
struct drm_i915_gem_object *obj;
unsigned long flags;
if ((shrink & phase->bit) == 0)
continue;
INIT_LIST_HEAD(&still_in_list);
/*
* We serialize our access to unreferenced objects through
* the use of the struct_mutex. While the objects are not
* yet freed (due to RCU then a workqueue) we still want
* to be able to shrink their pages, so they remain on
* the unbound/bound list until actually freed.
*/
spin_lock_irqsave(&i915->mm.obj_lock, flags);
while (count < target &&
(obj = list_first_entry_or_null(phase->list,
typeof(*obj),
mm.link))) {
list_move_tail(&obj->mm.link, &still_in_list);
if (shrink & I915_SHRINK_VMAPS &&
!is_vmalloc_addr(obj->mm.mapping))
continue;
if (!(shrink & I915_SHRINK_ACTIVE) &&
i915_gem_object_is_framebuffer(obj))
continue;
if (!can_release_pages(obj))
continue;
if (!kref_get_unless_zero(&obj->base.refcount))
continue;
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
/* May arrive from get_pages on another bo */
if (!ww) {
if (!i915_gem_object_trylock(obj, NULL))
goto skip;
} else {
err = i915_gem_object_lock(obj, ww);
if (err)
goto skip;
}
if (drop_pages(obj, shrink, trylock_vm) &&
!__i915_gem_object_put_pages(obj) &&
!try_to_writeback(obj, shrink))
count += obj->base.size >> PAGE_SHIFT;
if (!ww)
i915_gem_object_unlock(obj);
scanned += obj->base.size >> PAGE_SHIFT;
skip:
i915_gem_object_put(obj);
spin_lock_irqsave(&i915->mm.obj_lock, flags);
if (err)
break;
}
list_splice_tail(&still_in_list, phase->list);
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
if (err)
break;
}
if (shrink & I915_SHRINK_BOUND)
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
if (err)
return err;
if (nr_scanned)
*nr_scanned += scanned;
return count;
}
/**
* i915_gem_shrink_all - Shrink buffer object caches completely
* @i915: i915 device
*
* This is a simple wraper around i915_gem_shrink() to aggressively shrink all
* caches completely. It also first waits for and retires all outstanding
* requests to also be able to release backing storage for active objects.
*
* This should only be used in code to intentionally quiescent the gpu or as a
* last-ditch effort when memory seems to have run out.
*
* Returns:
* The number of pages of backing storage actually released.
*/
unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
{
intel_wakeref_t wakeref;
unsigned long freed = 0;
drm/i915: Enable lockless lookup of request tracking via RCU If we enable RCU for the requests (providing a grace period where we can inspect a "dead" request before it is freed), we can allow callers to carefully perform lockless lookup of an active request. However, by enabling deferred freeing of requests, we can potentially hog a lot of memory when dealing with tens of thousands of requests per second - with a quick insertion of a synchronize_rcu() inside our shrinker callback, that issue disappears. v2: Currently, it is our responsibility to handle reclaim i.e. to avoid hogging memory with the delayed slab frees. At the moment, we wait for a grace period in the shrinker, and block for all RCU callbacks on oom. Suggested alternatives focus on flushing our RCU callback when we have a certain number of outstanding request frees, and blocking on that flush after a second high watermark. (So rather than wait for the system to run out of memory, we stop issuing requests - both are nondeterministic.) Paul E. McKenney wrote: Another approach is synchronize_rcu() after some largish number of requests. The advantage of this approach is that it throttles the production of callbacks at the source. The corresponding disadvantage is that it slows things up. Another approach is to use call_rcu(), but if the previous call_rcu() is still in flight, block waiting for it. Yet another approach is the get_state_synchronize_rcu() / cond_synchronize_rcu() pair. The idea is to do something like this: cond_synchronize_rcu(cookie); cookie = get_state_synchronize_rcu(); You would of course do an initial get_state_synchronize_rcu() to get things going. This would not block unless there was less than one grace period's worth of time between invocations. But this assumes a busy system, where there is almost always a grace period in flight. But you can make that happen as follows: cond_synchronize_rcu(cookie); cookie = get_state_synchronize_rcu(); call_rcu(&my_rcu_head, noop_function); Note that you need additional code to make sure that the old callback has completed before doing a new one. Setting and clearing a flag with appropriate memory ordering control suffices (e.g,. smp_load_acquire() and smp_store_release()). v3: More comments on compiler and processor order of operations within the RCU lookup and discover we can use rcu_access_pointer() here instead. v4: Wrap i915_gem_active_get_rcu() to take the rcu_read_lock itself. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: "Goel, Akash" <akash.goel@intel.com> Cc: Josh Triplett <josh@joshtriplett.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-25-git-send-email-chris@chris-wilson.co.uk
2016-08-04 16:32:41 +01:00
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
freed = i915_gem_shrink(NULL, i915, -1UL, NULL,
I915_SHRINK_BOUND |
drm/i915: Avoid recursing onto active vma from the shrinker We mark the vma as active while binding it in order to protect outselves from being shrunk under mempressure. This only works if we are strict in not attempting to shrink active objects. <6> [472.618968] Workqueue: events_unbound fence_work [i915] <4> [472.618970] Call Trace: <4> [472.618974] ? __schedule+0x2e5/0x810 <4> [472.618978] schedule+0x37/0xe0 <4> [472.618982] schedule_preempt_disabled+0xf/0x20 <4> [472.618984] __mutex_lock+0x281/0x9c0 <4> [472.618987] ? mark_held_locks+0x49/0x70 <4> [472.618989] ? _raw_spin_unlock_irqrestore+0x47/0x60 <4> [472.619038] ? i915_vma_unbind+0xae/0x110 [i915] <4> [472.619084] ? i915_vma_unbind+0xae/0x110 [i915] <4> [472.619122] i915_vma_unbind+0xae/0x110 [i915] <4> [472.619165] i915_gem_object_unbind+0x1dc/0x400 [i915] <4> [472.619208] i915_gem_shrink+0x328/0x660 [i915] <4> [472.619250] ? i915_gem_shrink_all+0x38/0x60 [i915] <4> [472.619282] i915_gem_shrink_all+0x38/0x60 [i915] <4> [472.619325] vm_alloc_page.constprop.25+0x1aa/0x240 [i915] <4> [472.619330] ? rcu_read_lock_sched_held+0x4d/0x80 <4> [472.619363] ? __alloc_pd+0xb/0x30 [i915] <4> [472.619366] ? module_assert_mutex_or_preempt+0xf/0x30 <4> [472.619368] ? __module_address+0x23/0xe0 <4> [472.619371] ? is_module_address+0x26/0x40 <4> [472.619374] ? static_obj+0x34/0x50 <4> [472.619376] ? lockdep_init_map+0x4d/0x1e0 <4> [472.619407] setup_page_dma+0xd/0x90 [i915] <4> [472.619437] alloc_pd+0x29/0x50 [i915] <4> [472.619470] __gen8_ppgtt_alloc+0x443/0x6b0 [i915] <4> [472.619503] gen8_ppgtt_alloc+0xd7/0x300 [i915] <4> [472.619535] ppgtt_bind_vma+0x2a/0xe0 [i915] <4> [472.619577] __vma_bind+0x26/0x40 [i915] <4> [472.619611] fence_work+0x1c/0x90 [i915] <4> [472.619617] process_one_work+0x26a/0x620 Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200221221818.2861432-1-chris@chris-wilson.co.uk
2020-02-21 22:18:18 +00:00
I915_SHRINK_UNBOUND);
}
drm/i915: Enable lockless lookup of request tracking via RCU If we enable RCU for the requests (providing a grace period where we can inspect a "dead" request before it is freed), we can allow callers to carefully perform lockless lookup of an active request. However, by enabling deferred freeing of requests, we can potentially hog a lot of memory when dealing with tens of thousands of requests per second - with a quick insertion of a synchronize_rcu() inside our shrinker callback, that issue disappears. v2: Currently, it is our responsibility to handle reclaim i.e. to avoid hogging memory with the delayed slab frees. At the moment, we wait for a grace period in the shrinker, and block for all RCU callbacks on oom. Suggested alternatives focus on flushing our RCU callback when we have a certain number of outstanding request frees, and blocking on that flush after a second high watermark. (So rather than wait for the system to run out of memory, we stop issuing requests - both are nondeterministic.) Paul E. McKenney wrote: Another approach is synchronize_rcu() after some largish number of requests. The advantage of this approach is that it throttles the production of callbacks at the source. The corresponding disadvantage is that it slows things up. Another approach is to use call_rcu(), but if the previous call_rcu() is still in flight, block waiting for it. Yet another approach is the get_state_synchronize_rcu() / cond_synchronize_rcu() pair. The idea is to do something like this: cond_synchronize_rcu(cookie); cookie = get_state_synchronize_rcu(); You would of course do an initial get_state_synchronize_rcu() to get things going. This would not block unless there was less than one grace period's worth of time between invocations. But this assumes a busy system, where there is almost always a grace period in flight. But you can make that happen as follows: cond_synchronize_rcu(cookie); cookie = get_state_synchronize_rcu(); call_rcu(&my_rcu_head, noop_function); Note that you need additional code to make sure that the old callback has completed before doing a new one. Setting and clearing a flag with appropriate memory ordering control suffices (e.g,. smp_load_acquire() and smp_store_release()). v3: More comments on compiler and processor order of operations within the RCU lookup and discover we can use rcu_access_pointer() here instead. v4: Wrap i915_gem_active_get_rcu() to take the rcu_read_lock itself. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: "Goel, Akash" <akash.goel@intel.com> Cc: Josh Triplett <josh@joshtriplett.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-25-git-send-email-chris@chris-wilson.co.uk
2016-08-04 16:32:41 +01:00
return freed;
}
static unsigned long
i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
{
struct drm_i915_private *i915 =
container_of(shrinker, struct drm_i915_private, mm.shrinker);
drm/i915: Report all objects with allocated pages to the shrinker Currently, we try to report to the shrinker the precise number of objects (pages) that are available to be reaped at this moment. This requires searching all objects with allocated pages to see if they fulfill the search criteria, and this count is performed quite frequently. (The shrinker tries to free ~128 pages on each invocation, before which we count all the objects; counting takes longer than unbinding the objects!) If we take the pragmatic view that with sufficient desire, all objects are eventually reapable (they become inactive, or no longer used as framebuffer etc), we can simply return the count of pinned pages maintained during get_pages/put_pages rather than walk the lists every time. The downside is that we may (slightly) over-report the number of objects/pages we could shrink and so penalize ourselves by shrinking more than required. This is mitigated by keeping the order in which we shrink objects such that we avoid penalizing active and frequently used objects, and if memory is so tight that we need to free them we would need to anyway. v2: Only expose shrinkable objects to the shrinker; a small reduction in not considering stolen and foreign objects. v3: Restore the tracking from a "backup" copy from before the gem/ split Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190530203500.26272-2-chris@chris-wilson.co.uk
2019-05-30 21:35:00 +01:00
unsigned long num_objects;
unsigned long count;
drm/i915: Report all objects with allocated pages to the shrinker Currently, we try to report to the shrinker the precise number of objects (pages) that are available to be reaped at this moment. This requires searching all objects with allocated pages to see if they fulfill the search criteria, and this count is performed quite frequently. (The shrinker tries to free ~128 pages on each invocation, before which we count all the objects; counting takes longer than unbinding the objects!) If we take the pragmatic view that with sufficient desire, all objects are eventually reapable (they become inactive, or no longer used as framebuffer etc), we can simply return the count of pinned pages maintained during get_pages/put_pages rather than walk the lists every time. The downside is that we may (slightly) over-report the number of objects/pages we could shrink and so penalize ourselves by shrinking more than required. This is mitigated by keeping the order in which we shrink objects such that we avoid penalizing active and frequently used objects, and if memory is so tight that we need to free them we would need to anyway. v2: Only expose shrinkable objects to the shrinker; a small reduction in not considering stolen and foreign objects. v3: Restore the tracking from a "backup" copy from before the gem/ split Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190530203500.26272-2-chris@chris-wilson.co.uk
2019-05-30 21:35:00 +01:00
count = READ_ONCE(i915->mm.shrink_memory) >> PAGE_SHIFT;
num_objects = READ_ONCE(i915->mm.shrink_count);
drm/i915: Report all objects with allocated pages to the shrinker Currently, we try to report to the shrinker the precise number of objects (pages) that are available to be reaped at this moment. This requires searching all objects with allocated pages to see if they fulfill the search criteria, and this count is performed quite frequently. (The shrinker tries to free ~128 pages on each invocation, before which we count all the objects; counting takes longer than unbinding the objects!) If we take the pragmatic view that with sufficient desire, all objects are eventually reapable (they become inactive, or no longer used as framebuffer etc), we can simply return the count of pinned pages maintained during get_pages/put_pages rather than walk the lists every time. The downside is that we may (slightly) over-report the number of objects/pages we could shrink and so penalize ourselves by shrinking more than required. This is mitigated by keeping the order in which we shrink objects such that we avoid penalizing active and frequently used objects, and if memory is so tight that we need to free them we would need to anyway. v2: Only expose shrinkable objects to the shrinker; a small reduction in not considering stolen and foreign objects. v3: Restore the tracking from a "backup" copy from before the gem/ split Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190530203500.26272-2-chris@chris-wilson.co.uk
2019-05-30 21:35:00 +01:00
/*
* Update our preferred vmscan batch size for the next pass.
* Our rough guess for an effective batch size is roughly 2
* available GEM objects worth of pages. That is we don't want
* the shrinker to fire, until it is worth the cost of freeing an
* entire GEM object.
*/
if (num_objects) {
unsigned long avg = 2 * count / num_objects;
i915->mm.shrinker.batch =
max((i915->mm.shrinker.batch + avg) >> 1,
128ul /* default SHRINK_BATCH */);
}
return count;
}
static unsigned long
i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
{
struct drm_i915_private *i915 =
container_of(shrinker, struct drm_i915_private, mm.shrinker);
unsigned long freed;
sc->nr_scanned = 0;
freed = i915_gem_shrink(NULL, i915,
sc->nr_to_scan,
&sc->nr_scanned,
I915_SHRINK_BOUND |
drm/i915: Pull i915_vma_pin under the vm->mutex Replace the struct_mutex requirement for pinning the i915_vma with the local vm->mutex instead. Note that the vm->mutex is tainted by the shrinker (we require unbinding from inside fs-reclaim) and so we cannot allocate while holding that mutex. Instead we have to preallocate workers to do allocate and apply the PTE updates after we have we reserved their slot in the drm_mm (using fences to order the PTE writes with the GPU work and with later unbind). In adding the asynchronous vma binding, one subtle requirement is to avoid coupling the binding fence into the backing object->resv. That is the asynchronous binding only applies to the vma timeline itself and not to the pages as that is a more global timeline (the binding of one vma does not need to be ordered with another vma, nor does the implicit GEM fencing depend on a vma, only on writes to the backing store). Keeping the vma binding distinct from the backing store timelines is verified by a number of async gem_exec_fence and gem_exec_schedule tests. The way we do this is quite simple, we keep the fence for the vma binding separate and only wait on it as required, and never add it to the obj->resv itself. Another consequence in reducing the locking around the vma is the destruction of the vma is no longer globally serialised by struct_mutex. A natural solution would be to add a kref to i915_vma, but that requires decoupling the reference cycles, possibly by introducing a new i915_mm_pages object that is own by both obj->mm and vma->pages. However, we have not taken that route due to the overshadowing lmem/ttm discussions, and instead play a series of complicated games with trylocks to (hopefully) ensure that only one destruction path is called! v2: Add some commentary, and some helpers to reduce patch churn. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
I915_SHRINK_UNBOUND);
if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
intel_wakeref_t wakeref;
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
freed += i915_gem_shrink(NULL, i915,
sc->nr_to_scan - sc->nr_scanned,
&sc->nr_scanned,
I915_SHRINK_ACTIVE |
I915_SHRINK_BOUND |
drm/i915: Start writeback from the shrinker When we are called to relieve mempressue via the shrinker, the only way we can make progress is either by discarding unwanted pages (those objects that userspace has marked MADV_DONTNEED) or by reclaiming the dirty objects via swap. As we know that is the only way to make further progress, we can initiate the writeback as we invalidate the objects. This means the objects we put onto the inactive anon lru list are already marked for reclaim+writeback and so will trigger a wait upon the writeback inside direct reclaim, greatly improving the success rate of direct reclaim on i915 objects. The corollary is that we may start a slow swap on opportunistic mempressure from the likes of the compaction + migration kthreads. This is limited by those threads only being allowed to shrink idle pages, but also that if we reactivate the page before it is swapped out by gpu activity, we only page the cost of repinning the page. The cost is most felt when an object is reused after mempressure, which hopefully excludes the latency sensitive tasks (as we are just extending the impact of swap thrashing to them). Apparently this is not the first time we've had this idea. Back in commit 5537252b6b6d ("drm/i915: Invalidate our pages under memory pressure") we wanted to start writeback but settled on invalidate after Hugh Dickins warned us about a possibility of a deadlock within shmemfs if we started writeback from shrink_slab. Looking at the callchain, using writeback from i915_gem_shrink should be equivalent to the pageout also employed by shrink_slab, i.e. it should not be any riskier afaict. v2: Leave mmapings intact. At this point, the only mmapings of our objects will be via CPU mmaps on the shmemfs filp, which are out-of-scope for our LRU tracking. Instead leave those pages to the inactive anon LRU page list for aging and pageout as normal. v3: Be selective on which paths trigger writeback, in particular excluding paths shrinking just to reclaim vm space (e.g. mmap, vmap reapers) and avoid starting writeback on the entire process space from within the pm freezer. References: https://bugs.freedesktop.org/show_bug.cgi?id=108686 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Michal Hocko <mhocko@suse.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20190420115539.29081-1-chris@chris-wilson.co.uk
2019-04-20 12:55:39 +01:00
I915_SHRINK_UNBOUND |
I915_SHRINK_WRITEBACK);
}
}
drm/i915: Don't call synchronize_rcu_expedited under struct_mutex Only call synchronize_rcu_expedited after unlocking struct_mutex to avoid deadlock because the workqueues depend on struct_mutex. >From original patch by Andrea: synchronize_rcu/synchronize_sched/synchronize_rcu_expedited() will hang until its own workqueues are run. The i915 gem workqueues will wait on the struct_mutex to be released. So we cannot wait for a quiescent state using those rcu primitives while holding the struct_mutex or it creates a circular lock dependency resulting in kernel hangs (which is reproducible but goes undetected by lockdep). kswapd0 D 0 700 2 0x00000000 Call Trace: ? __schedule+0x1a5/0x660 ? schedule+0x36/0x80 ? _synchronize_rcu_expedited.constprop.65+0x2ef/0x300 ? wake_up_bit+0x20/0x20 ? rcu_stall_kick_kthreads.part.54+0xc0/0xc0 ? rcu_exp_wait_wake+0x530/0x530 ? i915_gem_shrink+0x34b/0x4b0 ? i915_gem_shrinker_scan+0x7c/0x90 ? i915_gem_shrinker_scan+0x7c/0x90 ? shrink_slab.part.61.constprop.72+0x1c1/0x3a0 ? shrink_zone+0x154/0x160 ? kswapd+0x40a/0x720 ? kthread+0xf4/0x130 ? try_to_free_pages+0x450/0x450 ? kthread_create_on_node+0x40/0x40 ? ret_from_fork+0x23/0x30 plasmashell D 0 4657 4614 0x00000000 Call Trace: ? __schedule+0x1a5/0x660 ? schedule+0x36/0x80 ? schedule_preempt_disabled+0xe/0x10 ? __mutex_lock.isra.4+0x1c9/0x790 ? i915_gem_close_object+0x26/0xc0 ? i915_gem_close_object+0x26/0xc0 ? drm_gem_object_release_handle+0x48/0x90 ? drm_gem_handle_delete+0x50/0x80 ? drm_ioctl+0x1fa/0x420 ? drm_gem_handle_create+0x40/0x40 ? pipe_write+0x391/0x410 ? __vfs_write+0xc6/0x120 ? do_vfs_ioctl+0x8b/0x5d0 ? SyS_ioctl+0x3b/0x70 ? entry_SYSCALL_64_fastpath+0x13/0x94 kworker/0:0 D 0 29186 2 0x00000000 Workqueue: events __i915_gem_free_work Call Trace: ? __schedule+0x1a5/0x660 ? schedule+0x36/0x80 ? schedule_preempt_disabled+0xe/0x10 ? __mutex_lock.isra.4+0x1c9/0x790 ? del_timer_sync+0x44/0x50 ? update_curr+0x57/0x110 ? __i915_gem_free_objects+0x31/0x300 ? __i915_gem_free_objects+0x31/0x300 ? __i915_gem_free_work+0x2d/0x40 ? process_one_work+0x13a/0x3b0 ? worker_thread+0x4a/0x460 ? kthread+0xf4/0x130 ? process_one_work+0x3b0/0x3b0 ? kthread_create_on_node+0x40/0x40 ? ret_from_fork+0x23/0x30 Fixes: 3d3d18f086cd ("drm/i915: Avoid rcu_barrier() from reclaim paths (shrinker)") Reported-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Jani Nikula <jani.nikula@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
2017-04-07 13:49:34 +03:00
return sc->nr_scanned ? freed : SHRINK_STOP;
}
static int
i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
{
struct drm_i915_private *i915 =
container_of(nb, struct drm_i915_private, mm.oom_notifier);
struct drm_i915_gem_object *obj;
unsigned long unevictable, available, freed_pages;
intel_wakeref_t wakeref;
unsigned long flags;
freed_pages = 0;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
freed_pages += i915_gem_shrink(NULL, i915, -1UL, NULL,
I915_SHRINK_BOUND |
drm/i915: Start writeback from the shrinker When we are called to relieve mempressue via the shrinker, the only way we can make progress is either by discarding unwanted pages (those objects that userspace has marked MADV_DONTNEED) or by reclaiming the dirty objects via swap. As we know that is the only way to make further progress, we can initiate the writeback as we invalidate the objects. This means the objects we put onto the inactive anon lru list are already marked for reclaim+writeback and so will trigger a wait upon the writeback inside direct reclaim, greatly improving the success rate of direct reclaim on i915 objects. The corollary is that we may start a slow swap on opportunistic mempressure from the likes of the compaction + migration kthreads. This is limited by those threads only being allowed to shrink idle pages, but also that if we reactivate the page before it is swapped out by gpu activity, we only page the cost of repinning the page. The cost is most felt when an object is reused after mempressure, which hopefully excludes the latency sensitive tasks (as we are just extending the impact of swap thrashing to them). Apparently this is not the first time we've had this idea. Back in commit 5537252b6b6d ("drm/i915: Invalidate our pages under memory pressure") we wanted to start writeback but settled on invalidate after Hugh Dickins warned us about a possibility of a deadlock within shmemfs if we started writeback from shrink_slab. Looking at the callchain, using writeback from i915_gem_shrink should be equivalent to the pageout also employed by shrink_slab, i.e. it should not be any riskier afaict. v2: Leave mmapings intact. At this point, the only mmapings of our objects will be via CPU mmaps on the shmemfs filp, which are out-of-scope for our LRU tracking. Instead leave those pages to the inactive anon LRU page list for aging and pageout as normal. v3: Be selective on which paths trigger writeback, in particular excluding paths shrinking just to reclaim vm space (e.g. mmap, vmap reapers) and avoid starting writeback on the entire process space from within the pm freezer. References: https://bugs.freedesktop.org/show_bug.cgi?id=108686 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Michal Hocko <mhocko@suse.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20190420115539.29081-1-chris@chris-wilson.co.uk
2019-04-20 12:55:39 +01:00
I915_SHRINK_UNBOUND |
I915_SHRINK_WRITEBACK);
/* Because we may be allocating inside our own driver, we cannot
* assert that there are no objects with pinned pages that are not
* being pointed to by hardware.
*/
available = unevictable = 0;
spin_lock_irqsave(&i915->mm.obj_lock, flags);
list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
if (!can_release_pages(obj))
unevictable += obj->base.size >> PAGE_SHIFT;
else
available += obj->base.size >> PAGE_SHIFT;
}
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
if (freed_pages || available)
pr_info("Purging GPU memory, %lu pages freed, "
"%lu pages still pinned, %lu pages left available.\n",
freed_pages, unevictable, available);
*(unsigned long *)ptr += freed_pages;
return NOTIFY_DONE;
}
static int
i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
{
struct drm_i915_private *i915 =
container_of(nb, struct drm_i915_private, mm.vmap_notifier);
struct i915_vma *vma, *next;
unsigned long freed_pages = 0;
intel_wakeref_t wakeref;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
freed_pages += i915_gem_shrink(NULL, i915, -1UL, NULL,
I915_SHRINK_BOUND |
I915_SHRINK_UNBOUND |
I915_SHRINK_VMAPS);
/* We also want to clear any cached iomaps as they wrap vmap */
mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
list_for_each_entry_safe(vma, next,
&to_gt(i915)->ggtt->vm.bound_list, vm_link) {
unsigned long count = vma->node.size >> PAGE_SHIFT;
struct drm_i915_gem_object *obj = vma->obj;
drm/i915: Stop tracking MRU activity on VMA Our goal is to remove struct_mutex and replace it with fine grained locking. One of the thorny issues is our eviction logic for reclaiming space for an execbuffer (or GTT mmaping, among a few other examples). While eviction itself is easy to move under a per-VM mutex, performing the activity tracking is less agreeable. One solution is not to do any MRU tracking and do a simple coarse evaluation during eviction of active/inactive, with a loose temporal ordering of last insertion/evaluation. That keeps all the locking constrained to when we are manipulating the VM itself, neatly avoiding the tricky handling of possible recursive locking during execbuf and elsewhere. Note that discarding the MRU (currently implemented as a pair of lists, to avoid scanning the active list for a NONBLOCKING search) is unlikely to impact upon our efficiency to reclaim VM space (where we think a LRU model is best) as our current strategy is to use random idle replacement first before doing a search, and over time the use of softpinned 48b per-ppGTT is growing (thereby eliminating any need to perform any eviction searches, in theory at least) with the remaining users being found on much older devices (gen2-gen6). v2: Changelog and commentary rewritten to elaborate on the duality of a single list being both an inactive and active list. v3: Consolidate bool parameters into a single set of flags; don't comment on the duality of a single variable being a multiplicity of bits. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-1-chris@chris-wilson.co.uk
2019-01-28 10:23:52 +00:00
if (!vma->iomap || i915_vma_is_active(vma))
continue;
if (!i915_gem_object_trylock(obj, NULL))
continue;
drm/i915: Pull i915_vma_pin under the vm->mutex Replace the struct_mutex requirement for pinning the i915_vma with the local vm->mutex instead. Note that the vm->mutex is tainted by the shrinker (we require unbinding from inside fs-reclaim) and so we cannot allocate while holding that mutex. Instead we have to preallocate workers to do allocate and apply the PTE updates after we have we reserved their slot in the drm_mm (using fences to order the PTE writes with the GPU work and with later unbind). In adding the asynchronous vma binding, one subtle requirement is to avoid coupling the binding fence into the backing object->resv. That is the asynchronous binding only applies to the vma timeline itself and not to the pages as that is a more global timeline (the binding of one vma does not need to be ordered with another vma, nor does the implicit GEM fencing depend on a vma, only on writes to the backing store). Keeping the vma binding distinct from the backing store timelines is verified by a number of async gem_exec_fence and gem_exec_schedule tests. The way we do this is quite simple, we keep the fence for the vma binding separate and only wait on it as required, and never add it to the obj->resv itself. Another consequence in reducing the locking around the vma is the destruction of the vma is no longer globally serialised by struct_mutex. A natural solution would be to add a kref to i915_vma, but that requires decoupling the reference cycles, possibly by introducing a new i915_mm_pages object that is own by both obj->mm and vma->pages. However, we have not taken that route due to the overshadowing lmem/ttm discussions, and instead play a series of complicated games with trylocks to (hopefully) ensure that only one destruction path is called! v2: Add some commentary, and some helpers to reduce patch churn. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
if (__i915_vma_unbind(vma) == 0)
freed_pages += count;
i915_gem_object_unlock(obj);
}
mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
*(unsigned long *)ptr += freed_pages;
return NOTIFY_DONE;
}
void i915_gem_driver_register__shrinker(struct drm_i915_private *i915)
{
i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
i915->mm.shrinker.count_objects = i915_gem_shrinker_count;
i915->mm.shrinker.seeks = DEFAULT_SEEKS;
i915->mm.shrinker.batch = 4096;
drm_WARN_ON(&i915->drm, register_shrinker(&i915->mm.shrinker));
i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
drm_WARN_ON(&i915->drm, register_oom_notifier(&i915->mm.oom_notifier));
i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap;
drm_WARN_ON(&i915->drm,
register_vmap_purge_notifier(&i915->mm.vmap_notifier));
}
void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915)
{
drm_WARN_ON(&i915->drm,
unregister_vmap_purge_notifier(&i915->mm.vmap_notifier));
drm_WARN_ON(&i915->drm,
unregister_oom_notifier(&i915->mm.oom_notifier));
unregister_shrinker(&i915->mm.shrinker);
}
drm/i915: Return immediately if trylock fails for direct-reclaim Ignore trying to shrink from i915 if we fail to acquire the struct_mutex in the shrinker while performing direct-reclaim. The trade-off being (much) lower latency for non-i915 clients at an increased risk of being unable to obtain a page from direct-reclaim without hitting the oom-notifier. The proviso being that we still keep trying to hard obtain the lock for kswapd so that we can reap under heavy memory pressure. v2: Taint all mutexes taken within the shrinker with the struct_mutex subclass as an early warning system, and drop I915_SHRINK_ACTIVE from vmap to reduce the number of dangerous paths. We also have to drop I915_SHRINK_ACTIVE from oom-notifier to be able to make the same claim that ACTIVE is only used from outside context, which fits in with a longer strategy of avoiding stalls due to scanning active during shrinking. The danger in using the subclass struct_mutex is that we declare ourselves more knowledgable than lockdep and deprive ourselves of automatic coverage. Instead, we require ourselves to mark up any mutex taken inside the shrinker in order to detect lock-inversion, and if we miss any we are doomed to a deadlock at the worst possible moment. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190107115509.12523-1-chris@chris-wilson.co.uk
2019-01-07 11:54:24 +00:00
void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
struct mutex *mutex)
{
if (!IS_ENABLED(CONFIG_LOCKDEP))
return;
fs_reclaim_acquire(GFP_KERNEL);
drm/i915: Return immediately if trylock fails for direct-reclaim Ignore trying to shrink from i915 if we fail to acquire the struct_mutex in the shrinker while performing direct-reclaim. The trade-off being (much) lower latency for non-i915 clients at an increased risk of being unable to obtain a page from direct-reclaim without hitting the oom-notifier. The proviso being that we still keep trying to hard obtain the lock for kswapd so that we can reap under heavy memory pressure. v2: Taint all mutexes taken within the shrinker with the struct_mutex subclass as an early warning system, and drop I915_SHRINK_ACTIVE from vmap to reduce the number of dangerous paths. We also have to drop I915_SHRINK_ACTIVE from oom-notifier to be able to make the same claim that ACTIVE is only used from outside context, which fits in with a longer strategy of avoiding stalls due to scanning active during shrinking. The danger in using the subclass struct_mutex is that we declare ourselves more knowledgable than lockdep and deprive ourselves of automatic coverage. Instead, we require ourselves to mark up any mutex taken inside the shrinker in order to detect lock-inversion, and if we miss any we are doomed to a deadlock at the worst possible moment. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190107115509.12523-1-chris@chris-wilson.co.uk
2019-01-07 11:54:24 +00:00
mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
2019-09-19 12:09:40 -04:00
mutex_release(&mutex->dep_map, _RET_IP_);
drm/i915: Return immediately if trylock fails for direct-reclaim Ignore trying to shrink from i915 if we fail to acquire the struct_mutex in the shrinker while performing direct-reclaim. The trade-off being (much) lower latency for non-i915 clients at an increased risk of being unable to obtain a page from direct-reclaim without hitting the oom-notifier. The proviso being that we still keep trying to hard obtain the lock for kswapd so that we can reap under heavy memory pressure. v2: Taint all mutexes taken within the shrinker with the struct_mutex subclass as an early warning system, and drop I915_SHRINK_ACTIVE from vmap to reduce the number of dangerous paths. We also have to drop I915_SHRINK_ACTIVE from oom-notifier to be able to make the same claim that ACTIVE is only used from outside context, which fits in with a longer strategy of avoiding stalls due to scanning active during shrinking. The danger in using the subclass struct_mutex is that we declare ourselves more knowledgable than lockdep and deprive ourselves of automatic coverage. Instead, we require ourselves to mark up any mutex taken inside the shrinker in order to detect lock-inversion, and if we miss any we are doomed to a deadlock at the worst possible moment. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190107115509.12523-1-chris@chris-wilson.co.uk
2019-01-07 11:54:24 +00:00
fs_reclaim_release(GFP_KERNEL);
}
#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
/**
* i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
* default all object types that support shrinking(see IS_SHRINKABLE), will also
* make the object visible to the shrinker after allocating the system memory
* pages.
* @obj: The GEM object.
*
* This is typically used for special kernel internal objects that can't be
* easily processed by the shrinker, like if they are perma-pinned.
*/
void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = obj_to_i915(obj);
unsigned long flags;
/*
* We can only be called while the pages are pinned or when
* the pages are released. If pinned, we should only be called
* from a single caller under controlled conditions; and on release
* only one caller may release us. Neither the two may cross.
*/
if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0))
return;
spin_lock_irqsave(&i915->mm.obj_lock, flags);
if (!atomic_fetch_inc(&obj->mm.shrink_pin) &&
!list_empty(&obj->mm.link)) {
list_del_init(&obj->mm.link);
i915->mm.shrink_count--;
i915->mm.shrink_memory -= obj->base.size;
}
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
}
drm/i915/ttm: move shrinker management into adjust_lru We currently just evict lmem objects to system memory when under memory pressure. For this case we might lack the usual object mm.pages, which effectively hides the pages from the i915-gem shrinker, until we actually "attach" the TT to the object, or in the case of lmem-only objects it just gets migrated back to lmem when touched again. For all cases we can just adjust the i915 shrinker LRU each time we also adjust the TTM LRU. The two cases we care about are: 1) When something is moved by TTM, including when initially populating an object. Importantly this covers the case where TTM moves something from lmem <-> smem, outside of the normal get_pages() interface, which should still ensure the shmem pages underneath are reclaimable. 2) When calling into i915_gem_object_unlock(). The unlock should ensure the object is removed from the shinker LRU, if it was indeed swapped out, or just purged, when the shrinker drops the object lock. v2(Thomas): - Handle managing the shrinker LRU in adjust_lru, where it is always safe to touch the object. v3(Thomas): - Pretty much a re-write. This time piggy back off the shrink_pin stuff, which actually seems to fit quite well for what we want here. v4(Thomas): - Just use a simple boolean for tracking ttm_shrinkable. v5: - Ensure we call adjust_lru when faulting the object, to ensure the pages are visible to the shrinker, if needed. - Add back the adjust_lru when in i915_ttm_move (Thomas) v6(Reported-by: kernel test robot <lkp@intel.com>): - Remove unused i915_tt Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> #v4 Link: https://patchwork.freedesktop.org/patch/msgid/20211018091055.1998191-6-matthew.auld@intel.com
2021-10-18 10:10:53 +01:00
static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
struct list_head *head)
{
struct drm_i915_private *i915 = obj_to_i915(obj);
unsigned long flags;
if (!i915_gem_object_is_shrinkable(obj))
return;
if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1))
return;
spin_lock_irqsave(&i915->mm.obj_lock, flags);
GEM_BUG_ON(!kref_read(&obj->base.refcount));
if (atomic_dec_and_test(&obj->mm.shrink_pin)) {
GEM_BUG_ON(!list_empty(&obj->mm.link));
list_add_tail(&obj->mm.link, head);
i915->mm.shrink_count++;
i915->mm.shrink_memory += obj->base.size;
}
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
}
drm/i915/ttm: move shrinker management into adjust_lru We currently just evict lmem objects to system memory when under memory pressure. For this case we might lack the usual object mm.pages, which effectively hides the pages from the i915-gem shrinker, until we actually "attach" the TT to the object, or in the case of lmem-only objects it just gets migrated back to lmem when touched again. For all cases we can just adjust the i915 shrinker LRU each time we also adjust the TTM LRU. The two cases we care about are: 1) When something is moved by TTM, including when initially populating an object. Importantly this covers the case where TTM moves something from lmem <-> smem, outside of the normal get_pages() interface, which should still ensure the shmem pages underneath are reclaimable. 2) When calling into i915_gem_object_unlock(). The unlock should ensure the object is removed from the shinker LRU, if it was indeed swapped out, or just purged, when the shrinker drops the object lock. v2(Thomas): - Handle managing the shrinker LRU in adjust_lru, where it is always safe to touch the object. v3(Thomas): - Pretty much a re-write. This time piggy back off the shrink_pin stuff, which actually seems to fit quite well for what we want here. v4(Thomas): - Just use a simple boolean for tracking ttm_shrinkable. v5: - Ensure we call adjust_lru when faulting the object, to ensure the pages are visible to the shrinker, if needed. - Add back the adjust_lru when in i915_ttm_move (Thomas) v6(Reported-by: kernel test robot <lkp@intel.com>): - Remove unused i915_tt Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> #v4 Link: https://patchwork.freedesktop.org/patch/msgid/20211018091055.1998191-6-matthew.auld@intel.com
2021-10-18 10:10:53 +01:00
/**
* __i915_gem_object_make_shrinkable - Move the object to the tail of the
* shrinkable list. Objects on this list might be swapped out. Used with
* WILLNEED objects.
* @obj: The GEM object.
*
* DO NOT USE. This is intended to be called on very special objects that don't
* yet have mm.pages, but are guaranteed to have potentially reclaimable pages
* underneath.
*/
void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
{
___i915_gem_object_make_shrinkable(obj,
&obj_to_i915(obj)->mm.shrink_list);
}
/**
* __i915_gem_object_make_purgeable - Move the object to the tail of the
* purgeable list. Objects on this list might be swapped out. Used with
* DONTNEED objects.
* @obj: The GEM object.
*
* DO NOT USE. This is intended to be called on very special objects that don't
* yet have mm.pages, but are guaranteed to have potentially reclaimable pages
* underneath.
*/
void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
{
___i915_gem_object_make_shrinkable(obj,
&obj_to_i915(obj)->mm.purge_list);
}
/**
* i915_gem_object_make_shrinkable - Move the object to the tail of the
* shrinkable list. Objects on this list might be swapped out. Used with
* WILLNEED objects.
* @obj: The GEM object.
*
* MUST only be called on objects which have backing pages.
*
* MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
*/
void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
{
drm/i915/ttm: move shrinker management into adjust_lru We currently just evict lmem objects to system memory when under memory pressure. For this case we might lack the usual object mm.pages, which effectively hides the pages from the i915-gem shrinker, until we actually "attach" the TT to the object, or in the case of lmem-only objects it just gets migrated back to lmem when touched again. For all cases we can just adjust the i915 shrinker LRU each time we also adjust the TTM LRU. The two cases we care about are: 1) When something is moved by TTM, including when initially populating an object. Importantly this covers the case where TTM moves something from lmem <-> smem, outside of the normal get_pages() interface, which should still ensure the shmem pages underneath are reclaimable. 2) When calling into i915_gem_object_unlock(). The unlock should ensure the object is removed from the shinker LRU, if it was indeed swapped out, or just purged, when the shrinker drops the object lock. v2(Thomas): - Handle managing the shrinker LRU in adjust_lru, where it is always safe to touch the object. v3(Thomas): - Pretty much a re-write. This time piggy back off the shrink_pin stuff, which actually seems to fit quite well for what we want here. v4(Thomas): - Just use a simple boolean for tracking ttm_shrinkable. v5: - Ensure we call adjust_lru when faulting the object, to ensure the pages are visible to the shrinker, if needed. - Add back the adjust_lru when in i915_ttm_move (Thomas) v6(Reported-by: kernel test robot <lkp@intel.com>): - Remove unused i915_tt Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> #v4 Link: https://patchwork.freedesktop.org/patch/msgid/20211018091055.1998191-6-matthew.auld@intel.com
2021-10-18 10:10:53 +01:00
GEM_BUG_ON(!i915_gem_object_has_pages(obj));
__i915_gem_object_make_shrinkable(obj);
}
/**
* i915_gem_object_make_purgeable - Move the object to the tail of the purgeable
* list. Used with DONTNEED objects. Unlike with shrinkable objects, the
* shrinker will attempt to discard the backing pages, instead of trying to swap
* them out.
* @obj: The GEM object.
*
* MUST only be called on objects which have backing pages.
*
* MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
*/
void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
{
drm/i915/ttm: move shrinker management into adjust_lru We currently just evict lmem objects to system memory when under memory pressure. For this case we might lack the usual object mm.pages, which effectively hides the pages from the i915-gem shrinker, until we actually "attach" the TT to the object, or in the case of lmem-only objects it just gets migrated back to lmem when touched again. For all cases we can just adjust the i915 shrinker LRU each time we also adjust the TTM LRU. The two cases we care about are: 1) When something is moved by TTM, including when initially populating an object. Importantly this covers the case where TTM moves something from lmem <-> smem, outside of the normal get_pages() interface, which should still ensure the shmem pages underneath are reclaimable. 2) When calling into i915_gem_object_unlock(). The unlock should ensure the object is removed from the shinker LRU, if it was indeed swapped out, or just purged, when the shrinker drops the object lock. v2(Thomas): - Handle managing the shrinker LRU in adjust_lru, where it is always safe to touch the object. v3(Thomas): - Pretty much a re-write. This time piggy back off the shrink_pin stuff, which actually seems to fit quite well for what we want here. v4(Thomas): - Just use a simple boolean for tracking ttm_shrinkable. v5: - Ensure we call adjust_lru when faulting the object, to ensure the pages are visible to the shrinker, if needed. - Add back the adjust_lru when in i915_ttm_move (Thomas) v6(Reported-by: kernel test robot <lkp@intel.com>): - Remove unused i915_tt Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> #v4 Link: https://patchwork.freedesktop.org/patch/msgid/20211018091055.1998191-6-matthew.auld@intel.com
2021-10-18 10:10:53 +01:00
GEM_BUG_ON(!i915_gem_object_has_pages(obj));
__i915_gem_object_make_purgeable(obj);
}