2019-06-04 10:11:37 +02:00
// SPDX-License-Identifier: GPL-2.0-only
2010-10-14 11:22:46 +02:00
/*
* kvm asynchronous fault support
*
* Copyright 2010 Red Hat , Inc .
*
* Author :
* Gleb Natapov < gleb @ redhat . com >
*/
# include <linux/kvm_host.h>
# include <linux/slab.h>
# include <linux/module.h>
# include <linux/mmu_context.h>
2017-02-08 18:51:29 +01:00
# include <linux/sched/mm.h>
2010-10-14 11:22:46 +02:00
# include "async_pf.h"
# include <trace/events/kvm.h>
static struct kmem_cache * async_pf_cache ;
int kvm_async_pf_init ( void )
{
async_pf_cache = KMEM_CACHE ( kvm_async_pf , 0 ) ;
if ( ! async_pf_cache )
return - ENOMEM ;
return 0 ;
}
void kvm_async_pf_deinit ( void )
{
2015-11-15 10:40:36 +01:00
kmem_cache_destroy ( async_pf_cache ) ;
2010-10-14 11:22:46 +02:00
async_pf_cache = NULL ;
}
void kvm_async_pf_vcpu_init ( struct kvm_vcpu * vcpu )
{
INIT_LIST_HEAD ( & vcpu - > async_pf . done ) ;
INIT_LIST_HEAD ( & vcpu - > async_pf . queue ) ;
spin_lock_init ( & vcpu - > async_pf . lock ) ;
}
static void async_pf_execute ( struct work_struct * work )
{
struct kvm_async_pf * apf =
container_of ( work , struct kvm_async_pf , work ) ;
struct kvm_vcpu * vcpu = apf - > vcpu ;
2024-01-09 17:15:32 -08:00
struct mm_struct * mm = vcpu - > kvm - > mm ;
2010-10-14 11:22:46 +02:00
unsigned long addr = apf - > addr ;
2019-12-06 15:57:14 -08:00
gpa_t cr2_or_gpa = apf - > cr2_or_gpa ;
2016-12-14 15:06:55 -08:00
int locked = 1 ;
2020-05-25 16:41:21 +02:00
bool first ;
2010-10-14 11:22:46 +02:00
might_sleep ( ) ;
2016-02-12 13:01:54 -08:00
/*
2024-01-09 17:15:32 -08:00
* Attempt to pin the VM ' s host address space , and simply skip gup ( ) if
* acquiring a pin fail , i . e . if the process is exiting . Note , KVM
* holds a reference to its associated mm_struct until the very end of
* kvm_destroy_vm ( ) , i . e . the struct itself won ' t be freed before this
* work item is fully processed .
2016-02-12 13:01:54 -08:00
*/
2024-01-09 17:15:32 -08:00
if ( mmget_not_zero ( mm ) ) {
mmap_read_lock ( mm ) ;
get_user_pages_remote ( mm , addr , 1 , FOLL_WRITE , NULL , & locked ) ;
if ( locked )
mmap_read_unlock ( mm ) ;
mmput ( mm ) ;
}
2016-02-12 13:01:54 -08:00
2024-01-09 17:15:32 -08:00
/*
* Notify and kick the vCPU even if faulting in the page failed , e . g .
* so that the vCPU can retry the fault synchronously .
*/
2020-01-20 16:14:37 +01:00
if ( IS_ENABLED ( CONFIG_KVM_ASYNC_PF_SYNC ) )
kvm_arch_async_page_present ( vcpu , apf ) ;
2010-10-14 11:22:46 +02:00
spin_lock ( & vcpu - > async_pf . lock ) ;
2020-05-25 16:41:21 +02:00
first = list_empty ( & vcpu - > async_pf . done ) ;
2010-10-14 11:22:46 +02:00
list_add_tail ( & apf - > link , & vcpu - > async_pf . done ) ;
spin_unlock ( & vcpu - > async_pf . lock ) ;
/*
2024-01-09 17:15:33 -08:00
* The apf struct may be freed by kvm_check_async_pf_completion ( ) as
* soon as the lock is dropped . Nullify it to prevent improper usage .
2010-10-14 11:22:46 +02:00
*/
2024-01-09 17:15:33 -08:00
apf = NULL ;
if ( ! IS_ENABLED ( CONFIG_KVM_ASYNC_PF_SYNC ) & & first )
kvm_arch_async_page_present_queued ( vcpu ) ;
2010-10-14 11:22:46 +02:00
2019-12-06 15:57:14 -08:00
trace_kvm_async_pf_completed ( addr , cr2_or_gpa ) ;
2010-10-14 11:22:46 +02:00
2021-10-08 19:12:12 -07:00
__kvm_vcpu_wake_up ( vcpu ) ;
KVM: Always flush async #PF workqueue when vCPU is being destroyed
Always flush the per-vCPU async #PF workqueue when a vCPU is clearing its
completion queue, e.g. when a VM and all its vCPUs is being destroyed.
KVM must ensure that none of its workqueue callbacks is running when the
last reference to the KVM _module_ is put. Gifting a reference to the
associated VM prevents the workqueue callback from dereferencing freed
vCPU/VM memory, but does not prevent the KVM module from being unloaded
before the callback completes.
Drop the misguided VM refcount gifting, as calling kvm_put_kvm() from
async_pf_execute() if kvm_put_kvm() flushes the async #PF workqueue will
result in deadlock. async_pf_execute() can't return until kvm_put_kvm()
finishes, and kvm_put_kvm() can't return until async_pf_execute() finishes:
WARNING: CPU: 8 PID: 251 at virt/kvm/kvm_main.c:1435 kvm_put_kvm+0x2d/0x320 [kvm]
Modules linked in: vhost_net vhost vhost_iotlb tap kvm_intel kvm irqbypass
CPU: 8 PID: 251 Comm: kworker/8:1 Tainted: G W 6.6.0-rc1-e7af8d17224a-x86/gmem-vm #119
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
Workqueue: events async_pf_execute [kvm]
RIP: 0010:kvm_put_kvm+0x2d/0x320 [kvm]
Call Trace:
<TASK>
async_pf_execute+0x198/0x260 [kvm]
process_one_work+0x145/0x2d0
worker_thread+0x27e/0x3a0
kthread+0xba/0xe0
ret_from_fork+0x2d/0x50
ret_from_fork_asm+0x11/0x20
</TASK>
---[ end trace 0000000000000000 ]---
INFO: task kworker/8:1:251 blocked for more than 120 seconds.
Tainted: G W 6.6.0-rc1-e7af8d17224a-x86/gmem-vm #119
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:kworker/8:1 state:D stack:0 pid:251 ppid:2 flags:0x00004000
Workqueue: events async_pf_execute [kvm]
Call Trace:
<TASK>
__schedule+0x33f/0xa40
schedule+0x53/0xc0
schedule_timeout+0x12a/0x140
__wait_for_common+0x8d/0x1d0
__flush_work.isra.0+0x19f/0x2c0
kvm_clear_async_pf_completion_queue+0x129/0x190 [kvm]
kvm_arch_destroy_vm+0x78/0x1b0 [kvm]
kvm_put_kvm+0x1c1/0x320 [kvm]
async_pf_execute+0x198/0x260 [kvm]
process_one_work+0x145/0x2d0
worker_thread+0x27e/0x3a0
kthread+0xba/0xe0
ret_from_fork+0x2d/0x50
ret_from_fork_asm+0x11/0x20
</TASK>
If kvm_clear_async_pf_completion_queue() actually flushes the workqueue,
then there's no need to gift async_pf_execute() a reference because all
invocations of async_pf_execute() will be forced to complete before the
vCPU and its VM are destroyed/freed. And that in turn fixes the module
unloading bug as __fput() won't do module_put() on the last vCPU reference
until the vCPU has been freed, e.g. if closing the vCPU file also puts the
last reference to the KVM module.
Note that kvm_check_async_pf_completion() may also take the work item off
the completion queue and so also needs to flush the work queue, as the
work will not be seen by kvm_clear_async_pf_completion_queue(). Waiting
on the workqueue could theoretically delay a vCPU due to waiting for the
work to complete, but that's a very, very small chance, and likely a very
small delay. kvm_arch_async_page_present_queued() unconditionally makes a
new request, i.e. will effectively delay entering the guest, so the
remaining work is really just:
trace_kvm_async_pf_completed(addr, cr2_or_gpa);
__kvm_vcpu_wake_up(vcpu);
mmput(mm);
and mmput() can't drop the last reference to the page tables if the vCPU is
still alive, i.e. the vCPU won't get stuck tearing down page tables.
Add a helper to do the flushing, specifically to deal with "wakeup all"
work items, as they aren't actually work items, i.e. are never placed in a
workqueue. Trying to flush a bogus workqueue entry rightly makes
__flush_work() complain (kudos to whoever added that sanity check).
Note, commit 5f6de5cbebee ("KVM: Prevent module exit until all VMs are
freed") *tried* to fix the module refcounting issue by having VMs grab a
reference to the module, but that only made the bug slightly harder to hit
as it gave async_pf_execute() a bit more time to complete before the KVM
module could be unloaded.
Fixes: af585b921e5d ("KVM: Halt vcpu if page it tries to access is swapped out")
Cc: stable@vger.kernel.org
Cc: David Matlack <dmatlack@google.com>
Reviewed-by: Xu Yilun <yilun.xu@intel.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Link: https://lore.kernel.org/r/20240110011533.503302-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2024-01-09 17:15:30 -08:00
}
static void kvm_flush_and_free_async_pf_work ( struct kvm_async_pf * work )
{
/*
* The async # PF is " done " , but KVM must wait for the work item itself ,
* i . e . async_pf_execute ( ) , to run to completion . If KVM is a module ,
* KVM must ensure * no * code owned by the KVM ( the module ) can be run
* after the last call to module_put ( ) . Note , flushing the work item
* is always required when the item is taken off the completion queue .
* E . g . even if the vCPU handles the item in the " normal " path , the VM
* could be terminated before async_pf_execute ( ) completes .
*
* Wake all events skip the queue and go straight done , i . e . don ' t
* need to be flushed ( but sanity check that the work wasn ' t queued ) .
*/
if ( work - > wakeup_all )
WARN_ON_ONCE ( work - > work . func ) ;
else
flush_work ( & work - > work ) ;
kmem_cache_free ( async_pf_cache , work ) ;
2010-10-14 11:22:46 +02:00
}
void kvm_clear_async_pf_completion_queue ( struct kvm_vcpu * vcpu )
{
/* cancel outstanding work queue item */
while ( ! list_empty ( & vcpu - > async_pf . queue ) ) {
struct kvm_async_pf * work =
2016-01-01 19:47:15 +08:00
list_first_entry ( & vcpu - > async_pf . queue ,
typeof ( * work ) , queue ) ;
2010-10-14 11:22:46 +02:00
list_del ( & work - > queue ) ;
2013-09-03 12:31:16 +02:00
# ifdef CONFIG_KVM_ASYNC_PF_SYNC
flush_work ( & work - > work ) ;
# else
2024-01-09 17:15:32 -08:00
if ( cancel_work_sync ( & work - > work ) )
2010-10-14 11:22:46 +02:00
kmem_cache_free ( async_pf_cache , work ) ;
2013-09-03 12:31:16 +02:00
# endif
2010-10-14 11:22:46 +02:00
}
Revert "KVM: async_pf: avoid recursive flushing of work items"
Now that KVM does NOT gift async #PF workers a "struct kvm" reference,
don't bother skipping "done" workers when flushing/canceling queued
workers, as the deadlock that was being fudged around can no longer occur.
When workers, i.e. async_pf_execute(), were gifted a referenced, it was
possible for a worker to put the last reference and trigger VM destruction,
i.e. trigger flushing of a workqueue from a worker in said workqueue.
Note, there is no actual lock, the deadlock was that a worker will be
stuck waiting for itself (the workqueue code simulates a lock/unlock via
lock_map_{acquire,release}()).
Skipping "done" workers isn't problematic per se, but using work->vcpu as
a "done" flag is confusing, e.g. it's not clear that async_pf.lock is
acquired to protect the work->vcpu, NOT the processing of async_pf.queue
(which is protected by vcpu->mutex).
This reverts commit 22583f0d9c85e60c9860bc8a0ebff59fe08be6d7.
Suggested-by: Xu Yilun <yilun.xu@linux.intel.com>
Link: https://lore.kernel.org/r/20240423191649.2885257-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2024-04-23 12:16:49 -07:00
spin_lock ( & vcpu - > async_pf . lock ) ;
2010-10-14 11:22:46 +02:00
while ( ! list_empty ( & vcpu - > async_pf . done ) ) {
struct kvm_async_pf * work =
2016-01-01 19:47:15 +08:00
list_first_entry ( & vcpu - > async_pf . done ,
typeof ( * work ) , link ) ;
2010-10-14 11:22:46 +02:00
list_del ( & work - > link ) ;
KVM: Always flush async #PF workqueue when vCPU is being destroyed
Always flush the per-vCPU async #PF workqueue when a vCPU is clearing its
completion queue, e.g. when a VM and all its vCPUs is being destroyed.
KVM must ensure that none of its workqueue callbacks is running when the
last reference to the KVM _module_ is put. Gifting a reference to the
associated VM prevents the workqueue callback from dereferencing freed
vCPU/VM memory, but does not prevent the KVM module from being unloaded
before the callback completes.
Drop the misguided VM refcount gifting, as calling kvm_put_kvm() from
async_pf_execute() if kvm_put_kvm() flushes the async #PF workqueue will
result in deadlock. async_pf_execute() can't return until kvm_put_kvm()
finishes, and kvm_put_kvm() can't return until async_pf_execute() finishes:
WARNING: CPU: 8 PID: 251 at virt/kvm/kvm_main.c:1435 kvm_put_kvm+0x2d/0x320 [kvm]
Modules linked in: vhost_net vhost vhost_iotlb tap kvm_intel kvm irqbypass
CPU: 8 PID: 251 Comm: kworker/8:1 Tainted: G W 6.6.0-rc1-e7af8d17224a-x86/gmem-vm #119
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
Workqueue: events async_pf_execute [kvm]
RIP: 0010:kvm_put_kvm+0x2d/0x320 [kvm]
Call Trace:
<TASK>
async_pf_execute+0x198/0x260 [kvm]
process_one_work+0x145/0x2d0
worker_thread+0x27e/0x3a0
kthread+0xba/0xe0
ret_from_fork+0x2d/0x50
ret_from_fork_asm+0x11/0x20
</TASK>
---[ end trace 0000000000000000 ]---
INFO: task kworker/8:1:251 blocked for more than 120 seconds.
Tainted: G W 6.6.0-rc1-e7af8d17224a-x86/gmem-vm #119
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:kworker/8:1 state:D stack:0 pid:251 ppid:2 flags:0x00004000
Workqueue: events async_pf_execute [kvm]
Call Trace:
<TASK>
__schedule+0x33f/0xa40
schedule+0x53/0xc0
schedule_timeout+0x12a/0x140
__wait_for_common+0x8d/0x1d0
__flush_work.isra.0+0x19f/0x2c0
kvm_clear_async_pf_completion_queue+0x129/0x190 [kvm]
kvm_arch_destroy_vm+0x78/0x1b0 [kvm]
kvm_put_kvm+0x1c1/0x320 [kvm]
async_pf_execute+0x198/0x260 [kvm]
process_one_work+0x145/0x2d0
worker_thread+0x27e/0x3a0
kthread+0xba/0xe0
ret_from_fork+0x2d/0x50
ret_from_fork_asm+0x11/0x20
</TASK>
If kvm_clear_async_pf_completion_queue() actually flushes the workqueue,
then there's no need to gift async_pf_execute() a reference because all
invocations of async_pf_execute() will be forced to complete before the
vCPU and its VM are destroyed/freed. And that in turn fixes the module
unloading bug as __fput() won't do module_put() on the last vCPU reference
until the vCPU has been freed, e.g. if closing the vCPU file also puts the
last reference to the KVM module.
Note that kvm_check_async_pf_completion() may also take the work item off
the completion queue and so also needs to flush the work queue, as the
work will not be seen by kvm_clear_async_pf_completion_queue(). Waiting
on the workqueue could theoretically delay a vCPU due to waiting for the
work to complete, but that's a very, very small chance, and likely a very
small delay. kvm_arch_async_page_present_queued() unconditionally makes a
new request, i.e. will effectively delay entering the guest, so the
remaining work is really just:
trace_kvm_async_pf_completed(addr, cr2_or_gpa);
__kvm_vcpu_wake_up(vcpu);
mmput(mm);
and mmput() can't drop the last reference to the page tables if the vCPU is
still alive, i.e. the vCPU won't get stuck tearing down page tables.
Add a helper to do the flushing, specifically to deal with "wakeup all"
work items, as they aren't actually work items, i.e. are never placed in a
workqueue. Trying to flush a bogus workqueue entry rightly makes
__flush_work() complain (kudos to whoever added that sanity check).
Note, commit 5f6de5cbebee ("KVM: Prevent module exit until all VMs are
freed") *tried* to fix the module refcounting issue by having VMs grab a
reference to the module, but that only made the bug slightly harder to hit
as it gave async_pf_execute() a bit more time to complete before the KVM
module could be unloaded.
Fixes: af585b921e5d ("KVM: Halt vcpu if page it tries to access is swapped out")
Cc: stable@vger.kernel.org
Cc: David Matlack <dmatlack@google.com>
Reviewed-by: Xu Yilun <yilun.xu@intel.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Link: https://lore.kernel.org/r/20240110011533.503302-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2024-01-09 17:15:30 -08:00
spin_unlock ( & vcpu - > async_pf . lock ) ;
kvm_flush_and_free_async_pf_work ( work ) ;
spin_lock ( & vcpu - > async_pf . lock ) ;
2010-10-14 11:22:46 +02:00
}
spin_unlock ( & vcpu - > async_pf . lock ) ;
vcpu - > async_pf . queued = 0 ;
}
void kvm_check_async_pf_completion ( struct kvm_vcpu * vcpu )
{
struct kvm_async_pf * work ;
2010-11-02 17:35:35 +08:00
while ( ! list_empty_careful ( & vcpu - > async_pf . done ) & &
2020-05-25 16:41:18 +02:00
kvm_arch_can_dequeue_async_page_present ( vcpu ) ) {
2010-11-02 17:35:35 +08:00
spin_lock ( & vcpu - > async_pf . lock ) ;
work = list_first_entry ( & vcpu - > async_pf . done , typeof ( * work ) ,
link ) ;
list_del ( & work - > link ) ;
spin_unlock ( & vcpu - > async_pf . lock ) ;
2010-10-14 11:22:46 +02:00
2013-10-14 22:22:33 +08:00
kvm_arch_async_page_ready ( vcpu , work ) ;
2020-01-20 16:14:37 +01:00
if ( ! IS_ENABLED ( CONFIG_KVM_ASYNC_PF_SYNC ) )
kvm_arch_async_page_present ( vcpu , work ) ;
2010-10-14 11:22:46 +02:00
2010-11-02 17:35:35 +08:00
list_del ( & work - > queue ) ;
vcpu - > async_pf . queued - - ;
KVM: Always flush async #PF workqueue when vCPU is being destroyed
Always flush the per-vCPU async #PF workqueue when a vCPU is clearing its
completion queue, e.g. when a VM and all its vCPUs is being destroyed.
KVM must ensure that none of its workqueue callbacks is running when the
last reference to the KVM _module_ is put. Gifting a reference to the
associated VM prevents the workqueue callback from dereferencing freed
vCPU/VM memory, but does not prevent the KVM module from being unloaded
before the callback completes.
Drop the misguided VM refcount gifting, as calling kvm_put_kvm() from
async_pf_execute() if kvm_put_kvm() flushes the async #PF workqueue will
result in deadlock. async_pf_execute() can't return until kvm_put_kvm()
finishes, and kvm_put_kvm() can't return until async_pf_execute() finishes:
WARNING: CPU: 8 PID: 251 at virt/kvm/kvm_main.c:1435 kvm_put_kvm+0x2d/0x320 [kvm]
Modules linked in: vhost_net vhost vhost_iotlb tap kvm_intel kvm irqbypass
CPU: 8 PID: 251 Comm: kworker/8:1 Tainted: G W 6.6.0-rc1-e7af8d17224a-x86/gmem-vm #119
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
Workqueue: events async_pf_execute [kvm]
RIP: 0010:kvm_put_kvm+0x2d/0x320 [kvm]
Call Trace:
<TASK>
async_pf_execute+0x198/0x260 [kvm]
process_one_work+0x145/0x2d0
worker_thread+0x27e/0x3a0
kthread+0xba/0xe0
ret_from_fork+0x2d/0x50
ret_from_fork_asm+0x11/0x20
</TASK>
---[ end trace 0000000000000000 ]---
INFO: task kworker/8:1:251 blocked for more than 120 seconds.
Tainted: G W 6.6.0-rc1-e7af8d17224a-x86/gmem-vm #119
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:kworker/8:1 state:D stack:0 pid:251 ppid:2 flags:0x00004000
Workqueue: events async_pf_execute [kvm]
Call Trace:
<TASK>
__schedule+0x33f/0xa40
schedule+0x53/0xc0
schedule_timeout+0x12a/0x140
__wait_for_common+0x8d/0x1d0
__flush_work.isra.0+0x19f/0x2c0
kvm_clear_async_pf_completion_queue+0x129/0x190 [kvm]
kvm_arch_destroy_vm+0x78/0x1b0 [kvm]
kvm_put_kvm+0x1c1/0x320 [kvm]
async_pf_execute+0x198/0x260 [kvm]
process_one_work+0x145/0x2d0
worker_thread+0x27e/0x3a0
kthread+0xba/0xe0
ret_from_fork+0x2d/0x50
ret_from_fork_asm+0x11/0x20
</TASK>
If kvm_clear_async_pf_completion_queue() actually flushes the workqueue,
then there's no need to gift async_pf_execute() a reference because all
invocations of async_pf_execute() will be forced to complete before the
vCPU and its VM are destroyed/freed. And that in turn fixes the module
unloading bug as __fput() won't do module_put() on the last vCPU reference
until the vCPU has been freed, e.g. if closing the vCPU file also puts the
last reference to the KVM module.
Note that kvm_check_async_pf_completion() may also take the work item off
the completion queue and so also needs to flush the work queue, as the
work will not be seen by kvm_clear_async_pf_completion_queue(). Waiting
on the workqueue could theoretically delay a vCPU due to waiting for the
work to complete, but that's a very, very small chance, and likely a very
small delay. kvm_arch_async_page_present_queued() unconditionally makes a
new request, i.e. will effectively delay entering the guest, so the
remaining work is really just:
trace_kvm_async_pf_completed(addr, cr2_or_gpa);
__kvm_vcpu_wake_up(vcpu);
mmput(mm);
and mmput() can't drop the last reference to the page tables if the vCPU is
still alive, i.e. the vCPU won't get stuck tearing down page tables.
Add a helper to do the flushing, specifically to deal with "wakeup all"
work items, as they aren't actually work items, i.e. are never placed in a
workqueue. Trying to flush a bogus workqueue entry rightly makes
__flush_work() complain (kudos to whoever added that sanity check).
Note, commit 5f6de5cbebee ("KVM: Prevent module exit until all VMs are
freed") *tried* to fix the module refcounting issue by having VMs grab a
reference to the module, but that only made the bug slightly harder to hit
as it gave async_pf_execute() a bit more time to complete before the KVM
module could be unloaded.
Fixes: af585b921e5d ("KVM: Halt vcpu if page it tries to access is swapped out")
Cc: stable@vger.kernel.org
Cc: David Matlack <dmatlack@google.com>
Reviewed-by: Xu Yilun <yilun.xu@intel.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Link: https://lore.kernel.org/r/20240110011533.503302-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2024-01-09 17:15:30 -08:00
kvm_flush_and_free_async_pf_work ( work ) ;
2010-11-02 17:35:35 +08:00
}
2010-10-14 11:22:46 +02:00
}
2020-06-15 14:13:34 +02:00
/*
* Try to schedule a job to handle page fault asynchronously . Returns ' true ' on
* success , ' false ' on failure ( page fault has to be handled synchronously ) .
*/
bool kvm_setup_async_pf ( struct kvm_vcpu * vcpu , gpa_t cr2_or_gpa ,
unsigned long hva , struct kvm_arch_async_pf * arch )
2010-10-14 11:22:46 +02:00
{
struct kvm_async_pf * work ;
if ( vcpu - > async_pf . queued > = ASYNC_PF_PER_VCPU )
2020-06-15 14:13:34 +02:00
return false ;
2010-10-14 11:22:46 +02:00
2020-06-10 19:55:31 +02:00
/* Arch specific code should not do async PF in this case */
if ( unlikely ( kvm_is_error_hva ( hva ) ) )
2020-06-15 14:13:34 +02:00
return false ;
2010-10-14 11:22:46 +02:00
/*
* do alloc nowait since if we are going to sleep anyway we
* may as well sleep faulting in page
*/
2016-02-19 13:11:46 +01:00
work = kmem_cache_zalloc ( async_pf_cache , GFP_NOWAIT | __GFP_NOWARN ) ;
2010-10-14 11:22:46 +02:00
if ( ! work )
2020-06-15 14:13:34 +02:00
return false ;
2010-10-14 11:22:46 +02:00
2013-10-14 22:22:33 +08:00
work - > wakeup_all = false ;
2010-10-14 11:22:46 +02:00
work - > vcpu = vcpu ;
2019-12-06 15:57:14 -08:00
work - > cr2_or_gpa = cr2_or_gpa ;
2013-06-06 15:32:37 +02:00
work - > addr = hva ;
2010-10-14 11:22:46 +02:00
work - > arch = * arch ;
INIT_WORK ( & work - > work , async_pf_execute ) ;
list_add_tail ( & work - > queue , & vcpu - > async_pf . queue ) ;
vcpu - > async_pf . queued + + ;
2020-06-10 19:55:32 +02:00
work - > notpresent_injected = kvm_arch_async_page_not_present ( vcpu , work ) ;
2020-06-10 19:55:31 +02:00
schedule_work ( & work - > work ) ;
2020-06-15 14:13:34 +02:00
return true ;
2010-10-14 11:22:46 +02:00
}
2010-10-14 11:22:50 +02:00
int kvm_async_pf_wakeup_all ( struct kvm_vcpu * vcpu )
{
struct kvm_async_pf * work ;
2020-05-25 16:41:21 +02:00
bool first ;
2010-10-14 11:22:50 +02:00
2010-11-01 17:03:44 +08:00
if ( ! list_empty_careful ( & vcpu - > async_pf . done ) )
2010-10-14 11:22:50 +02:00
return 0 ;
work = kmem_cache_zalloc ( async_pf_cache , GFP_ATOMIC ) ;
if ( ! work )
return - ENOMEM ;
2013-10-14 22:22:33 +08:00
work - > wakeup_all = true ;
2010-10-14 11:22:50 +02:00
INIT_LIST_HEAD ( & work - > queue ) ; /* for list_del to work */
2010-11-01 17:03:44 +08:00
spin_lock ( & vcpu - > async_pf . lock ) ;
2020-05-25 16:41:21 +02:00
first = list_empty ( & vcpu - > async_pf . done ) ;
2010-10-14 11:22:50 +02:00
list_add_tail ( & work - > link , & vcpu - > async_pf . done ) ;
2010-11-01 17:03:44 +08:00
spin_unlock ( & vcpu - > async_pf . lock ) ;
2020-05-25 16:41:21 +02:00
if ( ! IS_ENABLED ( CONFIG_KVM_ASYNC_PF_SYNC ) & & first )
kvm_arch_async_page_present_queued ( vcpu ) ;
2010-10-14 11:22:50 +02:00
vcpu - > async_pf . queued + + ;
return 0 ;
}