2018-08-22 10:20:00 +03:00
// SPDX-License-Identifier: GPL-2.0
/*
* KVM dirty page logging test
*
* Copyright ( C ) 2018 , Red Hat , Inc .
*/
2018-09-18 20:54:34 +03:00
# define _GNU_SOURCE /* for program_invocation_name */
2018-08-22 10:20:00 +03:00
# include <stdio.h>
# include <stdlib.h>
# include <unistd.h>
# include <time.h>
# include <pthread.h>
# include <linux/bitmap.h>
# include <linux/bitops.h>
# include "test_util.h"
# include "kvm_util.h"
2018-09-18 20:54:28 +03:00
# include "processor.h"
2018-08-22 10:20:00 +03:00
2018-09-18 20:54:32 +03:00
# define VCPU_ID 1
2018-08-22 10:20:00 +03:00
/* The memory slot index to track dirty pages */
2018-09-18 20:54:32 +03:00
# define TEST_MEM_SLOT_INDEX 1
2019-07-31 18:15:25 +03:00
/* Default guest test virtual memory offset */
# define DEFAULT_GUEST_TEST_MEM 0xc0000000
2018-09-18 20:54:32 +03:00
2018-08-22 10:20:00 +03:00
/* How many pages to dirty for each guest loop */
2018-09-18 20:54:32 +03:00
# define TEST_PAGES_PER_LOOP 1024
2018-08-22 10:20:00 +03:00
/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
2018-10-28 22:58:42 +03:00
# define TEST_HOST_LOOP_N 32UL
2018-09-18 20:54:32 +03:00
2018-08-22 10:20:00 +03:00
/* Interval for each host loop (ms) */
2018-10-28 22:58:42 +03:00
# define TEST_HOST_LOOP_INTERVAL 10UL
2018-08-22 10:20:00 +03:00
2019-07-31 18:15:25 +03:00
/* Dirty bitmaps are always little endian, so we need to swap on big endian */
# if defined(__s390x__)
# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
# define test_bit_le(nr, addr) \
test_bit ( ( nr ) ^ BITOP_LE_SWIZZLE , addr )
# define set_bit_le(nr, addr) \
set_bit ( ( nr ) ^ BITOP_LE_SWIZZLE , addr )
# define clear_bit_le(nr, addr) \
clear_bit ( ( nr ) ^ BITOP_LE_SWIZZLE , addr )
# define test_and_set_bit_le(nr, addr) \
test_and_set_bit ( ( nr ) ^ BITOP_LE_SWIZZLE , addr )
# define test_and_clear_bit_le(nr, addr) \
test_and_clear_bit ( ( nr ) ^ BITOP_LE_SWIZZLE , addr )
# else
# define test_bit_le test_bit
# define set_bit_le set_bit
# define clear_bit_le clear_bit
# define test_and_set_bit_le test_and_set_bit
# define test_and_clear_bit_le test_and_clear_bit
# endif
2018-08-22 10:20:00 +03:00
/*
2018-09-18 20:54:32 +03:00
* Guest / Host shared variables . Ensure addr_gva2hva ( ) and / or
* sync_global_to / from_guest ( ) are used when accessing from
* the host . READ / WRITE_ONCE ( ) should also be used with anything
* that may change .
2018-08-22 10:20:00 +03:00
*/
2018-09-18 20:54:32 +03:00
static uint64_t host_page_size ;
static uint64_t guest_page_size ;
2018-09-18 20:54:34 +03:00
static uint64_t guest_num_pages ;
2018-09-18 20:54:32 +03:00
static uint64_t random_array [ TEST_PAGES_PER_LOOP ] ;
static uint64_t iteration ;
2018-08-22 10:20:00 +03:00
2018-09-18 20:54:36 +03:00
/*
2018-11-06 16:57:08 +03:00
* Guest physical memory offset of the testing memory slot .
* This will be set to the topmost valid physical address minus
* the test memory size .
*/
static uint64_t guest_test_phys_mem ;
/*
* Guest virtual memory offset of the testing memory slot .
* Must not conflict with identity mapped test code .
2018-09-18 20:54:36 +03:00
*/
2018-11-06 16:57:07 +03:00
static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM ;
2018-09-18 20:54:36 +03:00
2018-08-22 10:20:00 +03:00
/*
2018-09-18 20:54:32 +03:00
* Continuously write to the first 8 bytes of a random pages within
* the testing memory region .
2018-08-22 10:20:00 +03:00
*/
2018-09-18 20:54:32 +03:00
static void guest_code ( void )
2018-08-22 10:20:00 +03:00
{
2019-07-31 18:15:25 +03:00
uint64_t addr ;
2018-09-18 20:54:32 +03:00
int i ;
2018-08-22 10:20:00 +03:00
2019-07-31 18:15:25 +03:00
/*
* On s390x , all pages of a 1 M segment are initially marked as dirty
* when a page of the segment is written to for the very first time .
* To compensate this specialty in this test , we need to touch all
* pages during the first iteration .
*/
for ( i = 0 ; i < guest_num_pages ; i + + ) {
addr = guest_test_virt_mem + i * guest_page_size ;
* ( uint64_t * ) addr = READ_ONCE ( iteration ) ;
}
2018-08-22 10:20:00 +03:00
while ( true ) {
for ( i = 0 ; i < TEST_PAGES_PER_LOOP ; i + + ) {
2019-07-31 18:15:25 +03:00
addr = guest_test_virt_mem ;
2018-09-18 20:54:34 +03:00
addr + = ( READ_ONCE ( random_array [ i ] ) % guest_num_pages )
2018-09-18 20:54:32 +03:00
* guest_page_size ;
addr & = ~ ( host_page_size - 1 ) ;
* ( uint64_t * ) addr = READ_ONCE ( iteration ) ;
2018-08-22 10:20:00 +03:00
}
2018-09-18 20:54:32 +03:00
2018-08-22 10:20:00 +03:00
/* Tell the host that we need more random numbers */
GUEST_SYNC ( 1 ) ;
}
}
2018-09-18 20:54:32 +03:00
/* Host variables */
static bool host_quit ;
2018-08-22 10:20:00 +03:00
/* Points to the test VM memory region on which we track dirty logs */
2018-09-18 20:54:32 +03:00
static void * host_test_mem ;
static uint64_t host_num_pages ;
2018-08-22 10:20:00 +03:00
/* For statistics only */
2018-09-18 20:54:32 +03:00
static uint64_t host_dirty_count ;
static uint64_t host_clear_count ;
static uint64_t host_track_next_count ;
2018-08-22 10:20:00 +03:00
/*
* We use this bitmap to track some pages that should have its dirty
* bit set in the _next_ iteration . For example , if we detected the
* page value changed to current iteration but at the same time the
* page bit is cleared in the latest bitmap , then the system must
* report that write in the next get dirty log call .
*/
2018-09-18 20:54:32 +03:00
static unsigned long * host_bmap_track ;
2018-08-22 10:20:00 +03:00
2018-09-18 20:54:32 +03:00
static void generate_random_array ( uint64_t * guest_array , uint64_t size )
2018-08-22 10:20:00 +03:00
{
uint64_t i ;
2018-09-18 20:54:32 +03:00
for ( i = 0 ; i < size ; i + + )
2018-08-22 10:20:00 +03:00
guest_array [ i ] = random ( ) ;
}
2018-09-18 20:54:32 +03:00
static void * vcpu_worker ( void * data )
2018-08-22 10:20:00 +03:00
{
int ret ;
struct kvm_vm * vm = data ;
2018-09-18 20:54:32 +03:00
uint64_t * guest_array ;
uint64_t pages_count = 0 ;
2018-08-22 10:20:00 +03:00
struct kvm_run * run ;
run = vcpu_state ( vm , VCPU_ID ) ;
2018-09-18 20:54:32 +03:00
guest_array = addr_gva2hva ( vm , ( vm_vaddr_t ) random_array ) ;
2018-08-22 10:20:00 +03:00
generate_random_array ( guest_array , TEST_PAGES_PER_LOOP ) ;
while ( ! READ_ONCE ( host_quit ) ) {
2018-09-18 20:54:32 +03:00
/* Let the guest dirty the random pages */
2018-08-22 10:20:00 +03:00
ret = _vcpu_run ( vm , VCPU_ID ) ;
2019-05-17 12:04:45 +03:00
TEST_ASSERT ( ret = = 0 , " vcpu_run failed: %d \n " , ret ) ;
2019-05-27 15:30:06 +03:00
if ( get_ucall ( vm , VCPU_ID , NULL ) = = UCALL_SYNC ) {
2018-08-22 10:20:00 +03:00
pages_count + = TEST_PAGES_PER_LOOP ;
generate_random_array ( guest_array , TEST_PAGES_PER_LOOP ) ;
} else {
TEST_ASSERT ( false ,
" Invalid guest sync status: "
" exit_reason=%s \n " ,
exit_reason_str ( run - > exit_reason ) ) ;
}
}
2018-09-18 20:54:32 +03:00
DEBUG ( " Dirtied % " PRIu64 " pages \n " , pages_count ) ;
2018-08-22 10:20:00 +03:00
return NULL ;
}
2018-09-18 20:54:32 +03:00
static void vm_dirty_log_verify ( unsigned long * bmap )
2018-08-22 10:20:00 +03:00
{
uint64_t page ;
2018-09-18 20:54:32 +03:00
uint64_t * value_ptr ;
2018-09-18 20:54:34 +03:00
uint64_t step = host_page_size > = guest_page_size ? 1 :
guest_page_size / host_page_size ;
2018-08-22 10:20:00 +03:00
2018-09-18 20:54:34 +03:00
for ( page = 0 ; page < host_num_pages ; page + = step ) {
2018-09-18 20:54:32 +03:00
value_ptr = host_test_mem + page * host_page_size ;
2018-08-22 10:20:00 +03:00
/* If this is a special page that we were tracking... */
2019-07-31 18:15:25 +03:00
if ( test_and_clear_bit_le ( page , host_bmap_track ) ) {
2018-08-22 10:20:00 +03:00
host_track_next_count + + ;
2019-07-31 18:15:25 +03:00
TEST_ASSERT ( test_bit_le ( page , bmap ) ,
2018-08-22 10:20:00 +03:00
" Page % " PRIu64 " should have its dirty bit "
" set in this iteration but it is missing " ,
page ) ;
}
2019-07-31 18:15:25 +03:00
if ( test_bit_le ( page , bmap ) ) {
2018-08-22 10:20:00 +03:00
host_dirty_count + + ;
/*
* If the bit is set , the value written onto
* the corresponding page should be either the
* previous iteration number or the current one .
*/
TEST_ASSERT ( * value_ptr = = iteration | |
* value_ptr = = iteration - 1 ,
" Set page % " PRIu64 " value % " PRIu64
" incorrect (iteration=% " PRIu64 " ) " ,
page , * value_ptr , iteration ) ;
} else {
host_clear_count + + ;
/*
* If cleared , the value written can be any
* value smaller or equals to the iteration
* number . Note that the value can be exactly
* ( iteration - 1 ) if that write can happen
* like this :
*
* ( 1 ) increase loop count to " iteration-1 "
* ( 2 ) write to page P happens ( with value
* " iteration-1 " )
* ( 3 ) get dirty log for " iteration-1 " ; we ' ll
* see that page P bit is set ( dirtied ) ,
* and not set the bit in host_bmap_track
* ( 4 ) increase loop count to " iteration "
* ( which is current iteration )
* ( 5 ) get dirty log for current iteration ,
* we ' ll see that page P is cleared , with
* value " iteration-1 " .
*/
TEST_ASSERT ( * value_ptr < = iteration ,
" Clear page % " PRIu64 " value % " PRIu64
" incorrect (iteration=% " PRIu64 " ) " ,
page , * value_ptr , iteration ) ;
if ( * value_ptr = = iteration ) {
/*
* This page is _just_ modified ; it
* should report its dirtyness in the
* next run
*/
2019-07-31 18:15:25 +03:00
set_bit_le ( page , host_bmap_track ) ;
2018-08-22 10:20:00 +03:00
}
}
}
}
2018-09-18 20:54:34 +03:00
static struct kvm_vm * create_vm ( enum vm_guest_mode mode , uint32_t vcpuid ,
2019-08-30 04:36:16 +03:00
uint64_t extra_mem_pages , void * guest_code )
2018-08-22 10:20:00 +03:00
{
2018-09-18 20:54:34 +03:00
struct kvm_vm * vm ;
uint64_t extra_pg_pages = extra_mem_pages / 512 * 2 ;
2019-08-30 04:36:16 +03:00
vm = _vm_create ( mode , DEFAULT_GUEST_PHY_PAGES + extra_pg_pages , O_RDWR ) ;
2018-09-18 20:54:34 +03:00
kvm_vm_elf_load ( vm , program_invocation_name , 0 , 0 ) ;
# ifdef __x86_64__
vm_create_irqchip ( vm ) ;
# endif
vm_vcpu_add_default ( vm , vcpuid , guest_code ) ;
return vm ;
2018-08-22 10:20:00 +03:00
}
2019-08-30 04:36:17 +03:00
# define DIRTY_MEM_BITS 30 /* 1G */
# define PAGE_SHIFT_4K 12
2018-09-18 20:54:34 +03:00
static void run_test ( enum vm_guest_mode mode , unsigned long iterations ,
2018-11-06 16:57:09 +03:00
unsigned long interval , uint64_t phys_offset )
2018-08-22 10:20:00 +03:00
{
2018-09-18 20:54:36 +03:00
unsigned int guest_pa_bits , guest_page_shift ;
2018-08-22 10:20:00 +03:00
pthread_t vcpu_thread ;
struct kvm_vm * vm ;
2018-09-18 20:54:36 +03:00
uint64_t max_gfn ;
2018-09-18 20:54:32 +03:00
unsigned long * bmap ;
2018-08-22 10:20:00 +03:00
2019-08-30 04:36:17 +03:00
/*
* We reserve page table for 2 times of extra dirty mem which
* will definitely cover the original ( 1 G + ) test range . Here
* we do the calculation with 4 K page size which is the
* smallest so the page number will be enough for all archs
* ( e . g . , 64 K page size guest will need even less memory for
* page tables ) .
*/
vm = create_vm ( mode , VCPU_ID ,
2ul < < ( DIRTY_MEM_BITS - PAGE_SHIFT_4K ) ,
guest_code ) ;
2018-09-18 20:54:34 +03:00
switch ( mode ) {
case VM_MODE_P52V48_4K :
2019-08-30 04:36:18 +03:00
case VM_MODE_PXXV48_4K :
2018-09-18 20:54:36 +03:00
guest_pa_bits = 52 ;
2018-09-18 20:54:34 +03:00
guest_page_shift = 12 ;
break ;
case VM_MODE_P52V48_64K :
2018-09-18 20:54:36 +03:00
guest_pa_bits = 52 ;
guest_page_shift = 16 ;
break ;
2018-11-06 16:57:12 +03:00
case VM_MODE_P48V48_4K :
guest_pa_bits = 48 ;
guest_page_shift = 12 ;
break ;
case VM_MODE_P48V48_64K :
guest_pa_bits = 48 ;
guest_page_shift = 16 ;
break ;
2018-09-18 20:54:36 +03:00
case VM_MODE_P40V48_4K :
guest_pa_bits = 40 ;
guest_page_shift = 12 ;
break ;
2018-09-18 20:54:35 +03:00
case VM_MODE_P40V48_64K :
2018-09-18 20:54:36 +03:00
guest_pa_bits = 40 ;
2018-09-18 20:54:34 +03:00
guest_page_shift = 16 ;
break ;
default :
TEST_ASSERT ( false , " Unknown guest mode, mode: 0x%x " , mode ) ;
2018-08-22 10:20:00 +03:00
}
2018-09-18 20:54:34 +03:00
DEBUG ( " Testing guest mode: %s \n " , vm_guest_mode_string ( mode ) ) ;
2018-08-22 10:20:00 +03:00
2018-11-06 16:57:06 +03:00
# ifdef __x86_64__
/*
* FIXME
* The x86_64 kvm selftests framework currently only supports a
* single PML4 which restricts the number of physical address
* bits we can change to 39.
*/
guest_pa_bits = 39 ;
# endif
2018-09-18 20:54:36 +03:00
max_gfn = ( 1ul < < ( guest_pa_bits - guest_page_shift ) ) - 1 ;
2018-09-18 20:54:34 +03:00
guest_page_size = ( 1ul < < guest_page_shift ) ;
2019-04-17 16:28:44 +03:00
/*
* A little more than 1 G of guest page sized pages . Cover the
* case where the size is not aligned to 64 pages .
*/
2019-08-30 04:36:17 +03:00
guest_num_pages = ( 1ul < < ( DIRTY_MEM_BITS - guest_page_shift ) ) + 16 ;
2019-07-31 18:15:25 +03:00
# ifdef __s390x__
/* Round up to multiple of 1M (segment size) */
guest_num_pages = ( guest_num_pages + 0xff ) & ~ 0xffUL ;
# endif
2018-09-18 20:54:32 +03:00
host_page_size = getpagesize ( ) ;
2018-09-18 20:54:34 +03:00
host_num_pages = ( guest_num_pages * guest_page_size ) / host_page_size +
! ! ( ( guest_num_pages * guest_page_size ) % host_page_size ) ;
2018-09-18 20:54:32 +03:00
2018-11-06 16:57:09 +03:00
if ( ! phys_offset ) {
2018-11-06 16:57:07 +03:00
guest_test_phys_mem = ( max_gfn - guest_num_pages ) * guest_page_size ;
guest_test_phys_mem & = ~ ( host_page_size - 1 ) ;
2018-11-06 16:57:09 +03:00
} else {
guest_test_phys_mem = phys_offset ;
2018-09-18 20:54:36 +03:00
}
2019-07-31 18:15:25 +03:00
# ifdef __s390x__
/* Align to 1M (segment size) */
guest_test_phys_mem & = ~ ( ( 1 < < 20 ) - 1 ) ;
# endif
2018-11-06 16:57:07 +03:00
DEBUG ( " guest physical test memory offset: 0x%lx \n " , guest_test_phys_mem ) ;
2018-09-18 20:54:36 +03:00
2018-09-18 20:54:32 +03:00
bmap = bitmap_alloc ( host_num_pages ) ;
host_bmap_track = bitmap_alloc ( host_num_pages ) ;
2018-08-22 10:20:00 +03:00
kvm: introduce manual dirty log reprotect
There are two problems with KVM_GET_DIRTY_LOG. First, and less important,
it can take kvm->mmu_lock for an extended period of time. Second, its user
can actually see many false positives in some cases. The latter is due
to a benign race like this:
1. KVM_GET_DIRTY_LOG returns a set of dirty pages and write protects
them.
2. The guest modifies the pages, causing them to be marked ditry.
3. Userspace actually copies the pages.
4. KVM_GET_DIRTY_LOG returns those pages as dirty again, even though
they were not written to since (3).
This is especially a problem for large guests, where the time between
(1) and (3) can be substantial. This patch introduces a new
capability which, when enabled, makes KVM_GET_DIRTY_LOG not
write-protect the pages it returns. Instead, userspace has to
explicitly clear the dirty log bits just before using the content
of the page. The new KVM_CLEAR_DIRTY_LOG ioctl can also operate on a
64-page granularity rather than requiring to sync a full memslot;
this way, the mmu_lock is taken for small amounts of time, and
only a small amount of time will pass between write protection
of pages and the sending of their content.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-10-23 03:36:47 +03:00
# ifdef USE_CLEAR_DIRTY_LOG
struct kvm_enable_cap cap = { } ;
2019-05-08 12:15:47 +03:00
cap . cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 ;
kvm: introduce manual dirty log reprotect
There are two problems with KVM_GET_DIRTY_LOG. First, and less important,
it can take kvm->mmu_lock for an extended period of time. Second, its user
can actually see many false positives in some cases. The latter is due
to a benign race like this:
1. KVM_GET_DIRTY_LOG returns a set of dirty pages and write protects
them.
2. The guest modifies the pages, causing them to be marked ditry.
3. Userspace actually copies the pages.
4. KVM_GET_DIRTY_LOG returns those pages as dirty again, even though
they were not written to since (3).
This is especially a problem for large guests, where the time between
(1) and (3) can be substantial. This patch introduces a new
capability which, when enabled, makes KVM_GET_DIRTY_LOG not
write-protect the pages it returns. Instead, userspace has to
explicitly clear the dirty log bits just before using the content
of the page. The new KVM_CLEAR_DIRTY_LOG ioctl can also operate on a
64-page granularity rather than requiring to sync a full memslot;
this way, the mmu_lock is taken for small amounts of time, and
only a small amount of time will pass between write protection
of pages and the sending of their content.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-10-23 03:36:47 +03:00
cap . args [ 0 ] = 1 ;
vm_enable_cap ( vm , & cap ) ;
# endif
2018-08-22 10:20:00 +03:00
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add ( vm , VM_MEM_SRC_ANONYMOUS ,
2018-11-06 16:57:07 +03:00
guest_test_phys_mem ,
2018-08-22 10:20:00 +03:00
TEST_MEM_SLOT_INDEX ,
2018-09-18 20:54:34 +03:00
guest_num_pages ,
2018-08-22 10:20:00 +03:00
KVM_MEM_LOG_DIRTY_PAGES ) ;
2018-11-06 16:57:07 +03:00
/* Do mapping for the dirty track memory slot */
virt_map ( vm , guest_test_virt_mem , guest_test_phys_mem ,
2018-09-18 20:54:34 +03:00
guest_num_pages * guest_page_size , 0 ) ;
2018-09-18 20:54:32 +03:00
/* Cache the HVA pointer of the region */
2018-11-06 16:57:07 +03:00
host_test_mem = addr_gpa2hva ( vm , ( vm_paddr_t ) guest_test_phys_mem ) ;
2018-08-22 10:20:00 +03:00
2018-09-18 20:54:32 +03:00
# ifdef __x86_64__
2018-08-22 10:20:00 +03:00
vcpu_set_cpuid ( vm , VCPU_ID , kvm_get_supported_cpuid ( ) ) ;
2018-09-18 20:54:32 +03:00
# endif
# ifdef __aarch64__
2019-07-31 18:15:23 +03:00
ucall_init ( vm , NULL ) ;
2018-09-18 20:54:32 +03:00
# endif
2018-08-22 10:20:00 +03:00
2018-09-18 20:54:34 +03:00
/* Export the shared variables to the guest */
2018-09-18 20:54:32 +03:00
sync_global_to_guest ( vm , host_page_size ) ;
sync_global_to_guest ( vm , guest_page_size ) ;
2018-11-06 16:57:07 +03:00
sync_global_to_guest ( vm , guest_test_virt_mem ) ;
2018-09-18 20:54:34 +03:00
sync_global_to_guest ( vm , guest_num_pages ) ;
2018-08-22 10:20:00 +03:00
/* Start the iterations */
2018-09-18 20:54:32 +03:00
iteration = 1 ;
sync_global_to_guest ( vm , iteration ) ;
2018-09-18 20:54:34 +03:00
host_quit = false ;
host_dirty_count = 0 ;
host_clear_count = 0 ;
host_track_next_count = 0 ;
2018-08-22 10:20:00 +03:00
pthread_create ( & vcpu_thread , NULL , vcpu_worker , vm ) ;
2018-09-18 20:54:32 +03:00
while ( iteration < iterations ) {
2018-08-22 10:20:00 +03:00
/* Give the vcpu thread some time to dirty some pages */
usleep ( interval * 1000 ) ;
kvm_vm_get_dirty_log ( vm , TEST_MEM_SLOT_INDEX , bmap ) ;
kvm: introduce manual dirty log reprotect
There are two problems with KVM_GET_DIRTY_LOG. First, and less important,
it can take kvm->mmu_lock for an extended period of time. Second, its user
can actually see many false positives in some cases. The latter is due
to a benign race like this:
1. KVM_GET_DIRTY_LOG returns a set of dirty pages and write protects
them.
2. The guest modifies the pages, causing them to be marked ditry.
3. Userspace actually copies the pages.
4. KVM_GET_DIRTY_LOG returns those pages as dirty again, even though
they were not written to since (3).
This is especially a problem for large guests, where the time between
(1) and (3) can be substantial. This patch introduces a new
capability which, when enabled, makes KVM_GET_DIRTY_LOG not
write-protect the pages it returns. Instead, userspace has to
explicitly clear the dirty log bits just before using the content
of the page. The new KVM_CLEAR_DIRTY_LOG ioctl can also operate on a
64-page granularity rather than requiring to sync a full memslot;
this way, the mmu_lock is taken for small amounts of time, and
only a small amount of time will pass between write protection
of pages and the sending of their content.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-10-23 03:36:47 +03:00
# ifdef USE_CLEAR_DIRTY_LOG
kvm_vm_clear_dirty_log ( vm , TEST_MEM_SLOT_INDEX , bmap , 0 ,
2019-04-17 16:28:44 +03:00
host_num_pages ) ;
kvm: introduce manual dirty log reprotect
There are two problems with KVM_GET_DIRTY_LOG. First, and less important,
it can take kvm->mmu_lock for an extended period of time. Second, its user
can actually see many false positives in some cases. The latter is due
to a benign race like this:
1. KVM_GET_DIRTY_LOG returns a set of dirty pages and write protects
them.
2. The guest modifies the pages, causing them to be marked ditry.
3. Userspace actually copies the pages.
4. KVM_GET_DIRTY_LOG returns those pages as dirty again, even though
they were not written to since (3).
This is especially a problem for large guests, where the time between
(1) and (3) can be substantial. This patch introduces a new
capability which, when enabled, makes KVM_GET_DIRTY_LOG not
write-protect the pages it returns. Instead, userspace has to
explicitly clear the dirty log bits just before using the content
of the page. The new KVM_CLEAR_DIRTY_LOG ioctl can also operate on a
64-page granularity rather than requiring to sync a full memslot;
this way, the mmu_lock is taken for small amounts of time, and
only a small amount of time will pass between write protection
of pages and the sending of their content.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-10-23 03:36:47 +03:00
# endif
2018-09-18 20:54:32 +03:00
vm_dirty_log_verify ( bmap ) ;
iteration + + ;
sync_global_to_guest ( vm , iteration ) ;
2018-08-22 10:20:00 +03:00
}
/* Tell the vcpu thread to quit */
host_quit = true ;
pthread_join ( vcpu_thread , NULL ) ;
DEBUG ( " Total bits checked: dirty (% " PRIu64 " ), clear (% " PRIu64 " ), "
" track_next (% " PRIu64 " ) \n " , host_dirty_count , host_clear_count ,
host_track_next_count ) ;
free ( bmap ) ;
free ( host_bmap_track ) ;
2018-09-18 20:54:32 +03:00
ucall_uninit ( vm ) ;
2018-08-22 10:20:00 +03:00
kvm_vm_free ( vm ) ;
2018-09-18 20:54:34 +03:00
}
2018-11-06 16:57:10 +03:00
struct vm_guest_mode_params {
2018-09-18 20:54:34 +03:00
bool supported ;
bool enabled ;
} ;
2018-11-06 16:57:10 +03:00
struct vm_guest_mode_params vm_guest_mode_params [ NUM_VM_MODES ] ;
# define vm_guest_mode_params_init(mode, supported, enabled) \
( { \
vm_guest_mode_params [ mode ] = ( struct vm_guest_mode_params ) { supported , enabled } ; \
} )
2018-09-18 20:54:34 +03:00
static void help ( char * name )
{
int i ;
puts ( " " ) ;
2018-09-18 20:54:36 +03:00
printf ( " usage: %s [-h] [-i iterations] [-I interval] "
2018-11-06 16:57:08 +03:00
" [-p offset] [-m mode] \n " , name ) ;
2018-09-18 20:54:34 +03:00
puts ( " " ) ;
printf ( " -i: specify iteration counts (default: % " PRIu64 " ) \n " ,
TEST_HOST_LOOP_N ) ;
printf ( " -I: specify interval in ms (default: % " PRIu64 " ms) \n " ,
TEST_HOST_LOOP_INTERVAL ) ;
2018-11-06 16:57:08 +03:00
printf ( " -p: specify guest physical test memory offset \n "
" Warning: a low offset can conflict with the loaded test code. \n " ) ;
2018-09-18 20:54:34 +03:00
printf ( " -m: specify the guest mode ID to test "
" (default: test all supported modes) \n "
" This option may be used multiple times. \n "
" Guest mode IDs: \n " ) ;
for ( i = 0 ; i < NUM_VM_MODES ; + + i ) {
2018-11-06 16:57:10 +03:00
printf ( " %d: %s%s \n " , i , vm_guest_mode_string ( i ) ,
vm_guest_mode_params [ i ] . supported ? " (supported) " : " " ) ;
2018-09-18 20:54:34 +03:00
}
puts ( " " ) ;
exit ( 0 ) ;
}
int main ( int argc , char * argv [ ] )
{
unsigned long iterations = TEST_HOST_LOOP_N ;
unsigned long interval = TEST_HOST_LOOP_INTERVAL ;
bool mode_selected = false ;
2018-11-06 16:57:09 +03:00
uint64_t phys_offset = 0 ;
2019-05-17 12:04:45 +03:00
unsigned int mode ;
2018-09-18 20:54:34 +03:00
int opt , i ;
2019-05-17 12:04:45 +03:00
# ifdef __aarch64__
unsigned int host_ipa_limit ;
# endif
2018-09-18 20:54:34 +03:00
kvm: introduce manual dirty log reprotect
There are two problems with KVM_GET_DIRTY_LOG. First, and less important,
it can take kvm->mmu_lock for an extended period of time. Second, its user
can actually see many false positives in some cases. The latter is due
to a benign race like this:
1. KVM_GET_DIRTY_LOG returns a set of dirty pages and write protects
them.
2. The guest modifies the pages, causing them to be marked ditry.
3. Userspace actually copies the pages.
4. KVM_GET_DIRTY_LOG returns those pages as dirty again, even though
they were not written to since (3).
This is especially a problem for large guests, where the time between
(1) and (3) can be substantial. This patch introduces a new
capability which, when enabled, makes KVM_GET_DIRTY_LOG not
write-protect the pages it returns. Instead, userspace has to
explicitly clear the dirty log bits just before using the content
of the page. The new KVM_CLEAR_DIRTY_LOG ioctl can also operate on a
64-page granularity rather than requiring to sync a full memslot;
this way, the mmu_lock is taken for small amounts of time, and
only a small amount of time will pass between write protection
of pages and the sending of their content.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-10-23 03:36:47 +03:00
# ifdef USE_CLEAR_DIRTY_LOG
2019-05-08 12:15:47 +03:00
if ( ! kvm_check_cap ( KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 ) ) {
kvm: introduce manual dirty log reprotect
There are two problems with KVM_GET_DIRTY_LOG. First, and less important,
it can take kvm->mmu_lock for an extended period of time. Second, its user
can actually see many false positives in some cases. The latter is due
to a benign race like this:
1. KVM_GET_DIRTY_LOG returns a set of dirty pages and write protects
them.
2. The guest modifies the pages, causing them to be marked ditry.
3. Userspace actually copies the pages.
4. KVM_GET_DIRTY_LOG returns those pages as dirty again, even though
they were not written to since (3).
This is especially a problem for large guests, where the time between
(1) and (3) can be substantial. This patch introduces a new
capability which, when enabled, makes KVM_GET_DIRTY_LOG not
write-protect the pages it returns. Instead, userspace has to
explicitly clear the dirty log bits just before using the content
of the page. The new KVM_CLEAR_DIRTY_LOG ioctl can also operate on a
64-page granularity rather than requiring to sync a full memslot;
this way, the mmu_lock is taken for small amounts of time, and
only a small amount of time will pass between write protection
of pages and the sending of their content.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-10-23 03:36:47 +03:00
fprintf ( stderr , " KVM_CLEAR_DIRTY_LOG not available, skipping tests \n " ) ;
exit ( KSFT_SKIP ) ;
}
# endif
2018-11-06 16:57:10 +03:00
# ifdef __x86_64__
2019-08-30 04:36:18 +03:00
vm_guest_mode_params_init ( VM_MODE_PXXV48_4K , true , true ) ;
2018-11-06 16:57:10 +03:00
# endif
# ifdef __aarch64__
vm_guest_mode_params_init ( VM_MODE_P40V48_4K , true , true ) ;
vm_guest_mode_params_init ( VM_MODE_P40V48_64K , true , true ) ;
2018-11-06 16:57:12 +03:00
host_ipa_limit = kvm_check_cap ( KVM_CAP_ARM_VM_IPA_SIZE ) ;
if ( host_ipa_limit > = 52 )
vm_guest_mode_params_init ( VM_MODE_P52V48_64K , true , true ) ;
if ( host_ipa_limit > = 48 ) {
vm_guest_mode_params_init ( VM_MODE_P48V48_4K , true , true ) ;
vm_guest_mode_params_init ( VM_MODE_P48V48_64K , true , true ) ;
}
2018-11-06 16:57:10 +03:00
# endif
2019-07-31 18:15:25 +03:00
# ifdef __s390x__
vm_guest_mode_params_init ( VM_MODE_P40V48_4K , true , true ) ;
# endif
2018-11-06 16:57:10 +03:00
2018-11-06 16:57:08 +03:00
while ( ( opt = getopt ( argc , argv , " hi:I:p:m: " ) ) ! = - 1 ) {
2018-09-18 20:54:34 +03:00
switch ( opt ) {
case ' i ' :
iterations = strtol ( optarg , NULL , 10 ) ;
break ;
case ' I ' :
interval = strtol ( optarg , NULL , 10 ) ;
break ;
2018-11-06 16:57:08 +03:00
case ' p ' :
2018-11-06 16:57:09 +03:00
phys_offset = strtoull ( optarg , NULL , 0 ) ;
2018-09-18 20:54:36 +03:00
break ;
2018-09-18 20:54:34 +03:00
case ' m ' :
if ( ! mode_selected ) {
for ( i = 0 ; i < NUM_VM_MODES ; + + i )
2018-11-06 16:57:10 +03:00
vm_guest_mode_params [ i ] . enabled = false ;
2018-09-18 20:54:34 +03:00
mode_selected = true ;
}
mode = strtoul ( optarg , NULL , 10 ) ;
TEST_ASSERT ( mode < NUM_VM_MODES ,
" Guest mode ID %d too big " , mode ) ;
2018-11-06 16:57:10 +03:00
vm_guest_mode_params [ mode ] . enabled = true ;
2018-09-18 20:54:34 +03:00
break ;
case ' h ' :
default :
help ( argv [ 0 ] ) ;
break ;
}
}
TEST_ASSERT ( iterations > 2 , " Iterations must be greater than two " ) ;
TEST_ASSERT ( interval > 0 , " Interval must be greater than zero " ) ;
DEBUG ( " Test iterations: % " PRIu64 " , interval: % " PRIu64 " (ms) \n " ,
iterations , interval ) ;
srandom ( time ( 0 ) ) ;
for ( i = 0 ; i < NUM_VM_MODES ; + + i ) {
2018-11-06 16:57:10 +03:00
if ( ! vm_guest_mode_params [ i ] . enabled )
2018-09-18 20:54:34 +03:00
continue ;
2018-11-06 16:57:10 +03:00
TEST_ASSERT ( vm_guest_mode_params [ i ] . supported ,
2018-09-18 20:54:34 +03:00
" Guest mode ID %d (%s) not supported. " ,
2018-11-06 16:57:10 +03:00
i , vm_guest_mode_string ( i ) ) ;
run_test ( i , iterations , interval , phys_offset ) ;
2018-09-18 20:54:34 +03:00
}
2018-08-22 10:20:00 +03:00
return 0 ;
}