2015-09-04 15:47:23 -07:00
/*
* Stress userfaultfd syscall .
*
* Copyright ( C ) 2015 Red Hat , Inc .
*
* This work is licensed under the terms of the GNU GPL , version 2. See
* the COPYING file in the top - level directory .
*
* This test allocates two virtual areas and bounces the physical
* memory across the two virtual areas ( from area_src to area_dst )
* using userfaultfd .
*
* There are three threads running per CPU :
*
* 1 ) one per - CPU thread takes a per - page pthread_mutex in a random
* page of the area_dst ( while the physical page may still be in
* area_src ) , and increments a per - page counter in the same page ,
* and checks its value against a verification region .
*
* 2 ) another per - CPU thread handles the userfaults generated by
* thread 1 above . userfaultfd blocking reads or poll ( ) modes are
* exercised interleaved .
*
* 3 ) one last per - CPU thread transfers the memory in the background
* at maximum bandwidth ( if not already transferred by thread
* 2 ) . Each cpu thread takes cares of transferring a portion of the
* area .
*
* When all threads of type 3 completed the transfer , one bounce is
* complete . area_src and area_dst are then swapped . All threads are
* respawned and so the bounce is immediately restarted in the
* opposite direction .
*
* per - CPU threads 1 by triggering userfaults inside
* pthread_mutex_lock will also verify the atomicity of the memory
* transfer ( UFFDIO_COPY ) .
*
* The program takes two parameters : the amounts of physical memory in
* megabytes ( MiB ) of the area and the number of bounces to execute .
*
* # 100 MiB 99999 bounces
* . / userfaultfd 100 99999
*
* # 1 GiB 99 bounces
* . / userfaultfd 1000 99
*
* # 10 MiB - ~ 6 GiB 999 bounces , continue forever unless an error triggers
* while . / userfaultfd $ [ RANDOM % 6000 + 10 ] 999 ; do true ; done
*/
# define _GNU_SOURCE
# include <stdio.h>
# include <errno.h>
# include <unistd.h>
# include <stdlib.h>
# include <sys/types.h>
# include <sys/stat.h>
# include <fcntl.h>
# include <time.h>
# include <signal.h>
# include <poll.h>
# include <string.h>
# include <sys/mman.h>
# include <sys/syscall.h>
# include <sys/ioctl.h>
2017-02-22 15:44:06 -08:00
# include <sys/wait.h>
2015-09-04 15:47:23 -07:00
# include <pthread.h>
2015-09-22 14:58:52 -07:00
# include <linux/userfaultfd.h>
2015-09-04 15:47:23 -07:00
2015-09-22 14:58:58 -07:00
# ifdef __NR_userfaultfd
2015-09-04 15:47:23 -07:00
static unsigned long nr_cpus , nr_pages , nr_pages_per_cpu , page_size ;
# define BOUNCE_RANDOM (1<<0)
# define BOUNCE_RACINGFAULTS (1<<1)
# define BOUNCE_VERIFY (1<<2)
# define BOUNCE_POLL (1<<3)
static int bounces ;
2017-02-22 15:43:07 -08:00
# ifdef HUGETLB_TEST
static int huge_fd ;
static char * huge_fd_off0 ;
# endif
2015-09-04 15:47:23 -07:00
static unsigned long long * count_verify ;
2017-02-22 15:44:01 -08:00
static int uffd , uffd_flags , finished , * pipefd ;
2015-09-04 15:47:23 -07:00
static char * area_src , * area_dst ;
static char * zeropage ;
pthread_attr_t attr ;
/* pthread_mutex_t starts at page offset 0 */
# define area_mutex(___area, ___nr) \
( ( pthread_mutex_t * ) ( ( ___area ) + ( ___nr ) * page_size ) )
/*
* count is placed in the page after pthread_mutex_t naturally aligned
* to avoid non alignment faults on non - x86 archs .
*/
# define area_count(___area, ___nr) \
( ( volatile unsigned long long * ) ( ( unsigned long ) \
( ( ___area ) + ( ___nr ) * page_size + \
sizeof ( pthread_mutex_t ) + \
sizeof ( unsigned long long ) - 1 ) & \
~ ( unsigned long ) ( sizeof ( unsigned long long ) \
- 1 ) ) )
2017-02-22 15:43:46 -08:00
# if !defined(HUGETLB_TEST) && !defined(SHMEM_TEST)
2017-02-22 15:43:07 -08:00
2017-02-22 15:43:46 -08:00
/* Anonymous memory */
2017-02-22 15:43:07 -08:00
# define EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
( 1 < < _UFFDIO_COPY ) | \
( 1 < < _UFFDIO_ZEROPAGE ) )
static int release_pages ( char * rel_area )
{
int ret = 0 ;
if ( madvise ( rel_area , nr_pages * page_size , MADV_DONTNEED ) ) {
perror ( " madvise " ) ;
ret = 1 ;
}
return ret ;
}
static void allocate_area ( void * * alloc_area )
{
if ( posix_memalign ( alloc_area , page_size , nr_pages * page_size ) ) {
fprintf ( stderr , " out of memory \n " ) ;
* alloc_area = NULL ;
}
}
2017-02-22 15:43:46 -08:00
# else /* HUGETLB_TEST or SHMEM_TEST */
2017-02-22 15:43:07 -08:00
2017-02-22 15:43:40 -08:00
# define EXPECTED_IOCTLS UFFD_API_RANGE_IOCTLS_BASIC
2017-02-22 15:43:07 -08:00
2017-02-22 15:43:46 -08:00
# ifdef HUGETLB_TEST
/* HugeTLB memory */
2017-02-22 15:43:07 -08:00
static int release_pages ( char * rel_area )
{
int ret = 0 ;
if ( fallocate ( huge_fd , FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE ,
rel_area = = huge_fd_off0 ? 0 :
nr_pages * page_size ,
nr_pages * page_size ) ) {
perror ( " fallocate " ) ;
ret = 1 ;
}
return ret ;
}
static void allocate_area ( void * * alloc_area )
{
* alloc_area = mmap ( NULL , nr_pages * page_size , PROT_READ | PROT_WRITE ,
MAP_PRIVATE | MAP_HUGETLB , huge_fd ,
* alloc_area = = area_src ? 0 :
nr_pages * page_size ) ;
if ( * alloc_area = = MAP_FAILED ) {
fprintf ( stderr , " mmap of hugetlbfs file failed \n " ) ;
* alloc_area = NULL ;
}
if ( * alloc_area = = area_src )
huge_fd_off0 = * alloc_area ;
}
2017-02-22 15:43:46 -08:00
# elif defined(SHMEM_TEST)
/* Shared memory */
static int release_pages ( char * rel_area )
{
int ret = 0 ;
if ( madvise ( rel_area , nr_pages * page_size , MADV_REMOVE ) ) {
perror ( " madvise " ) ;
ret = 1 ;
}
return ret ;
}
static void allocate_area ( void * * alloc_area )
{
* alloc_area = mmap ( NULL , nr_pages * page_size , PROT_READ | PROT_WRITE ,
MAP_ANONYMOUS | MAP_SHARED , - 1 , 0 ) ;
if ( * alloc_area = = MAP_FAILED ) {
fprintf ( stderr , " shared memory mmap failed \n " ) ;
* alloc_area = NULL ;
}
}
# else /* SHMEM_TEST */
# error "Undefined test type"
2017-02-22 15:43:07 -08:00
# endif /* HUGETLB_TEST */
2017-02-22 15:43:46 -08:00
# endif /* !defined(HUGETLB_TEST) && !defined(SHMEM_TEST) */
2015-09-04 15:47:23 -07:00
static int my_bcmp ( char * str1 , char * str2 , size_t n )
{
unsigned long i ;
for ( i = 0 ; i < n ; i + + )
if ( str1 [ i ] ! = str2 [ i ] )
return 1 ;
return 0 ;
}
static void * locking_thread ( void * arg )
{
unsigned long cpu = ( unsigned long ) arg ;
struct random_data rand ;
unsigned long page_nr = * ( & ( page_nr ) ) ; /* uninitialized warning */
int32_t rand_nr ;
unsigned long long count ;
char randstate [ 64 ] ;
unsigned int seed ;
time_t start ;
if ( bounces & BOUNCE_RANDOM ) {
seed = ( unsigned int ) time ( NULL ) - bounces ;
if ( ! ( bounces & BOUNCE_RACINGFAULTS ) )
seed + = cpu ;
bzero ( & rand , sizeof ( rand ) ) ;
bzero ( & randstate , sizeof ( randstate ) ) ;
if ( initstate_r ( seed , randstate , sizeof ( randstate ) , & rand ) )
fprintf ( stderr , " srandom_r error \n " ) , exit ( 1 ) ;
} else {
page_nr = - bounces ;
if ( ! ( bounces & BOUNCE_RACINGFAULTS ) )
page_nr + = cpu * nr_pages_per_cpu ;
}
while ( ! finished ) {
if ( bounces & BOUNCE_RANDOM ) {
if ( random_r ( & rand , & rand_nr ) )
fprintf ( stderr , " random_r 1 error \n " ) , exit ( 1 ) ;
page_nr = rand_nr ;
if ( sizeof ( page_nr ) > sizeof ( rand_nr ) ) {
if ( random_r ( & rand , & rand_nr ) )
fprintf ( stderr , " random_r 2 error \n " ) , exit ( 1 ) ;
2015-09-08 14:58:25 -07:00
page_nr | = ( ( ( unsigned long ) rand_nr ) < < 16 ) < <
16 ;
2015-09-04 15:47:23 -07:00
}
} else
page_nr + = 1 ;
page_nr % = nr_pages ;
start = time ( NULL ) ;
if ( bounces & BOUNCE_VERIFY ) {
count = * area_count ( area_dst , page_nr ) ;
if ( ! count )
fprintf ( stderr ,
" page_nr %lu wrong count %Lu %Lu \n " ,
page_nr , count ,
count_verify [ page_nr ] ) , exit ( 1 ) ;
/*
* We can ' t use bcmp ( or memcmp ) because that
* returns 0 erroneously if the memory is
* changing under it ( even if the end of the
* page is never changing and always
* different ) .
*/
# if 1
if ( ! my_bcmp ( area_dst + page_nr * page_size , zeropage ,
page_size ) )
fprintf ( stderr ,
" my_bcmp page_nr %lu wrong count %Lu %Lu \n " ,
page_nr , count ,
count_verify [ page_nr ] ) , exit ( 1 ) ;
# else
unsigned long loops ;
loops = 0 ;
/* uncomment the below line to test with mutex */
/* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
while ( ! bcmp ( area_dst + page_nr * page_size , zeropage ,
page_size ) ) {
loops + = 1 ;
if ( loops > 10 )
break ;
}
/* uncomment below line to test with mutex */
/* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
if ( loops ) {
fprintf ( stderr ,
" page_nr %lu all zero thread %lu %p %lu \n " ,
page_nr , cpu , area_dst + page_nr * page_size ,
loops ) ;
if ( loops > 10 )
exit ( 1 ) ;
}
# endif
}
pthread_mutex_lock ( area_mutex ( area_dst , page_nr ) ) ;
count = * area_count ( area_dst , page_nr ) ;
if ( count ! = count_verify [ page_nr ] ) {
fprintf ( stderr ,
" page_nr %lu memory corruption %Lu %Lu \n " ,
page_nr , count ,
count_verify [ page_nr ] ) , exit ( 1 ) ;
}
count + + ;
* area_count ( area_dst , page_nr ) = count_verify [ page_nr ] = count ;
pthread_mutex_unlock ( area_mutex ( area_dst , page_nr ) ) ;
if ( time ( NULL ) - start > 1 )
fprintf ( stderr ,
" userfault too slow %ld "
" possible false positive with overcommit \n " ,
time ( NULL ) - start ) ;
}
return NULL ;
}
2017-02-22 15:44:04 -08:00
static int copy_page ( int ufd , unsigned long offset )
2015-09-04 15:47:23 -07:00
{
struct uffdio_copy uffdio_copy ;
if ( offset > = nr_pages * page_size )
fprintf ( stderr , " unexpected offset %lu \n " ,
offset ) , exit ( 1 ) ;
uffdio_copy . dst = ( unsigned long ) area_dst + offset ;
uffdio_copy . src = ( unsigned long ) area_src + offset ;
uffdio_copy . len = page_size ;
uffdio_copy . mode = 0 ;
uffdio_copy . copy = 0 ;
2017-02-22 15:44:04 -08:00
if ( ioctl ( ufd , UFFDIO_COPY , & uffdio_copy ) ) {
2015-09-04 15:47:23 -07:00
/* real retval in ufdio_copy.copy */
if ( uffdio_copy . copy ! = - EEXIST )
fprintf ( stderr , " UFFDIO_COPY error %Ld \n " ,
uffdio_copy . copy ) , exit ( 1 ) ;
} else if ( uffdio_copy . copy ! = page_size ) {
fprintf ( stderr , " UFFDIO_COPY unexpected copy %Ld \n " ,
uffdio_copy . copy ) , exit ( 1 ) ;
} else
return 1 ;
return 0 ;
}
static void * uffd_poll_thread ( void * arg )
{
unsigned long cpu = ( unsigned long ) arg ;
struct pollfd pollfd [ 2 ] ;
struct uffd_msg msg ;
2017-02-22 15:44:06 -08:00
struct uffdio_register uffd_reg ;
2015-09-04 15:47:23 -07:00
int ret ;
unsigned long offset ;
char tmp_chr ;
unsigned long userfaults = 0 ;
pollfd [ 0 ] . fd = uffd ;
pollfd [ 0 ] . events = POLLIN ;
pollfd [ 1 ] . fd = pipefd [ cpu * 2 ] ;
pollfd [ 1 ] . events = POLLIN ;
for ( ; ; ) {
ret = poll ( pollfd , 2 , - 1 ) ;
if ( ! ret )
fprintf ( stderr , " poll error %d \n " , ret ) , exit ( 1 ) ;
if ( ret < 0 )
perror ( " poll " ) , exit ( 1 ) ;
if ( pollfd [ 1 ] . revents & POLLIN ) {
if ( read ( pollfd [ 1 ] . fd , & tmp_chr , 1 ) ! = 1 )
fprintf ( stderr , " read pipefd error \n " ) ,
exit ( 1 ) ;
break ;
}
if ( ! ( pollfd [ 0 ] . revents & POLLIN ) )
fprintf ( stderr , " pollfd[0].revents %d \n " ,
pollfd [ 0 ] . revents ) , exit ( 1 ) ;
ret = read ( uffd , & msg , sizeof ( msg ) ) ;
if ( ret < 0 ) {
if ( errno = = EAGAIN )
continue ;
perror ( " nonblocking read error " ) , exit ( 1 ) ;
}
2017-02-22 15:44:06 -08:00
switch ( msg . event ) {
default :
2015-09-04 15:47:23 -07:00
fprintf ( stderr , " unexpected msg event %u \n " ,
msg . event ) , exit ( 1 ) ;
2017-02-22 15:44:06 -08:00
break ;
case UFFD_EVENT_PAGEFAULT :
if ( msg . arg . pagefault . flags & UFFD_PAGEFAULT_FLAG_WRITE )
fprintf ( stderr , " unexpected write fault \n " ) , exit ( 1 ) ;
offset = ( char * ) ( unsigned long ) msg . arg . pagefault . address -
area_dst ;
offset & = ~ ( page_size - 1 ) ;
if ( copy_page ( uffd , offset ) )
userfaults + + ;
break ;
case UFFD_EVENT_FORK :
uffd = msg . arg . fork . ufd ;
pollfd [ 0 ] . fd = uffd ;
break ;
2017-02-24 14:56:02 -08:00
case UFFD_EVENT_REMOVE :
uffd_reg . range . start = msg . arg . remove . start ;
uffd_reg . range . len = msg . arg . remove . end -
msg . arg . remove . start ;
2017-02-22 15:44:06 -08:00
if ( ioctl ( uffd , UFFDIO_UNREGISTER , & uffd_reg . range ) )
2017-02-24 14:56:02 -08:00
fprintf ( stderr , " remove failure \n " ) , exit ( 1 ) ;
2017-02-22 15:44:06 -08:00
break ;
case UFFD_EVENT_REMAP :
area_dst = ( char * ) ( unsigned long ) msg . arg . remap . to ;
break ;
}
2015-09-04 15:47:23 -07:00
}
return ( void * ) userfaults ;
}
pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER ;
static void * uffd_read_thread ( void * arg )
{
unsigned long * this_cpu_userfaults ;
struct uffd_msg msg ;
unsigned long offset ;
int ret ;
this_cpu_userfaults = ( unsigned long * ) arg ;
* this_cpu_userfaults = 0 ;
pthread_mutex_unlock ( & uffd_read_mutex ) ;
/* from here cancellation is ok */
for ( ; ; ) {
ret = read ( uffd , & msg , sizeof ( msg ) ) ;
if ( ret ! = sizeof ( msg ) ) {
if ( ret < 0 )
perror ( " blocking read error " ) , exit ( 1 ) ;
else
fprintf ( stderr , " short read \n " ) , exit ( 1 ) ;
}
if ( msg . event ! = UFFD_EVENT_PAGEFAULT )
fprintf ( stderr , " unexpected msg event %u \n " ,
msg . event ) , exit ( 1 ) ;
if ( bounces & BOUNCE_VERIFY & &
msg . arg . pagefault . flags & UFFD_PAGEFAULT_FLAG_WRITE )
fprintf ( stderr , " unexpected write fault \n " ) , exit ( 1 ) ;
2015-09-08 14:58:25 -07:00
offset = ( char * ) ( unsigned long ) msg . arg . pagefault . address -
area_dst ;
2015-09-04 15:47:23 -07:00
offset & = ~ ( page_size - 1 ) ;
2017-02-22 15:44:04 -08:00
if ( copy_page ( uffd , offset ) )
2015-09-04 15:47:23 -07:00
( * this_cpu_userfaults ) + + ;
}
return ( void * ) NULL ;
}
static void * background_thread ( void * arg )
{
unsigned long cpu = ( unsigned long ) arg ;
unsigned long page_nr ;
for ( page_nr = cpu * nr_pages_per_cpu ;
page_nr < ( cpu + 1 ) * nr_pages_per_cpu ;
page_nr + + )
2017-02-22 15:44:04 -08:00
copy_page ( uffd , page_nr * page_size ) ;
2015-09-04 15:47:23 -07:00
return NULL ;
}
static int stress ( unsigned long * userfaults )
{
unsigned long cpu ;
pthread_t locking_threads [ nr_cpus ] ;
pthread_t uffd_threads [ nr_cpus ] ;
pthread_t background_threads [ nr_cpus ] ;
void * * _userfaults = ( void * * ) userfaults ;
finished = 0 ;
for ( cpu = 0 ; cpu < nr_cpus ; cpu + + ) {
if ( pthread_create ( & locking_threads [ cpu ] , & attr ,
locking_thread , ( void * ) cpu ) )
return 1 ;
if ( bounces & BOUNCE_POLL ) {
if ( pthread_create ( & uffd_threads [ cpu ] , & attr ,
uffd_poll_thread , ( void * ) cpu ) )
return 1 ;
} else {
if ( pthread_create ( & uffd_threads [ cpu ] , & attr ,
uffd_read_thread ,
& _userfaults [ cpu ] ) )
return 1 ;
pthread_mutex_lock ( & uffd_read_mutex ) ;
}
if ( pthread_create ( & background_threads [ cpu ] , & attr ,
background_thread , ( void * ) cpu ) )
return 1 ;
}
for ( cpu = 0 ; cpu < nr_cpus ; cpu + + )
if ( pthread_join ( background_threads [ cpu ] , NULL ) )
return 1 ;
/*
* Be strict and immediately zap area_src , the whole area has
* been transferred already by the background treads . The
* area_src could then be faulted in in a racy way by still
* running uffdio_threads reading zeropages after we zapped
* area_src ( but they ' re guaranteed to get - EEXIST from
* UFFDIO_COPY without writing zero pages into area_dst
* because the background threads already completed ) .
*/
2017-02-22 15:43:07 -08:00
if ( release_pages ( area_src ) )
2015-09-04 15:47:23 -07:00
return 1 ;
for ( cpu = 0 ; cpu < nr_cpus ; cpu + + ) {
char c ;
if ( bounces & BOUNCE_POLL ) {
if ( write ( pipefd [ cpu * 2 + 1 ] , & c , 1 ) ! = 1 ) {
fprintf ( stderr , " pipefd write error \n " ) ;
return 1 ;
}
if ( pthread_join ( uffd_threads [ cpu ] , & _userfaults [ cpu ] ) )
return 1 ;
} else {
if ( pthread_cancel ( uffd_threads [ cpu ] ) )
return 1 ;
if ( pthread_join ( uffd_threads [ cpu ] , NULL ) )
return 1 ;
}
}
finished = 1 ;
for ( cpu = 0 ; cpu < nr_cpus ; cpu + + )
if ( pthread_join ( locking_threads [ cpu ] , NULL ) )
return 1 ;
return 0 ;
}
2017-02-22 15:44:06 -08:00
static int userfaultfd_open ( int features )
2015-09-04 15:47:23 -07:00
{
struct uffdio_api uffdio_api ;
uffd = syscall ( __NR_userfaultfd , O_CLOEXEC | O_NONBLOCK ) ;
if ( uffd < 0 ) {
fprintf ( stderr ,
" userfaultfd syscall not available in this kernel \n " ) ;
return 1 ;
}
uffd_flags = fcntl ( uffd , F_GETFD , NULL ) ;
uffdio_api . api = UFFD_API ;
2017-02-22 15:44:06 -08:00
uffdio_api . features = features ;
2015-09-04 15:47:23 -07:00
if ( ioctl ( uffd , UFFDIO_API , & uffdio_api ) ) {
fprintf ( stderr , " UFFDIO_API \n " ) ;
return 1 ;
}
if ( uffdio_api . api ! = UFFD_API ) {
fprintf ( stderr , " UFFDIO_API error %Lu \n " , uffdio_api . api ) ;
return 1 ;
}
2017-02-22 15:44:01 -08:00
return 0 ;
}
2017-02-22 15:44:06 -08:00
/*
* For non - cooperative userfaultfd test we fork ( ) a process that will
* generate pagefaults , will mremap the area monitored by the
* userfaultfd and at last this process will release the monitored
* area .
* For the anonymous and shared memory the area is divided into two
* parts , the first part is accessed before mremap , and the second
* part is accessed after mremap . Since hugetlbfs does not support
* mremap , the entire monitored area is accessed in a single pass for
* HUGETLB_TEST .
* The release of the pages currently generates event only for
2017-02-24 14:56:02 -08:00
* anonymous memory ( UFFD_EVENT_REMOVE ) , hence it is not checked
2017-02-22 15:44:06 -08:00
* for hugetlb and shmem .
*/
static int faulting_process ( void )
{
unsigned long nr ;
unsigned long long count ;
# ifndef HUGETLB_TEST
unsigned long split_nr_pages = ( nr_pages + 1 ) / 2 ;
# else
unsigned long split_nr_pages = nr_pages ;
# endif
for ( nr = 0 ; nr < split_nr_pages ; nr + + ) {
count = * area_count ( area_dst , nr ) ;
if ( count ! = count_verify [ nr ] ) {
fprintf ( stderr ,
" nr %lu memory corruption %Lu %Lu \n " ,
nr , count ,
count_verify [ nr ] ) , exit ( 1 ) ;
}
}
# ifndef HUGETLB_TEST
area_dst = mremap ( area_dst , nr_pages * page_size , nr_pages * page_size ,
MREMAP_MAYMOVE | MREMAP_FIXED , area_src ) ;
if ( area_dst = = MAP_FAILED )
perror ( " mremap " ) , exit ( 1 ) ;
for ( ; nr < nr_pages ; nr + + ) {
count = * area_count ( area_dst , nr ) ;
if ( count ! = count_verify [ nr ] ) {
fprintf ( stderr ,
" nr %lu memory corruption %Lu %Lu \n " ,
nr , count ,
count_verify [ nr ] ) , exit ( 1 ) ;
}
}
# ifndef SHMEM_TEST
if ( release_pages ( area_dst ) )
return 1 ;
for ( nr = 0 ; nr < nr_pages ; nr + + ) {
if ( my_bcmp ( area_dst + nr * page_size , zeropage , page_size ) )
fprintf ( stderr , " nr %lu is not zero \n " , nr ) , exit ( 1 ) ;
}
# endif /* SHMEM_TEST */
# endif /* HUGETLB_TEST */
return 0 ;
}
2017-02-22 15:44:10 -08:00
static int uffdio_zeropage ( int ufd , unsigned long offset )
{
struct uffdio_zeropage uffdio_zeropage ;
int ret ;
unsigned long has_zeropage = EXPECTED_IOCTLS & ( 1 < < _UFFDIO_ZEROPAGE ) ;
if ( offset > = nr_pages * page_size )
fprintf ( stderr , " unexpected offset %lu \n " ,
offset ) , exit ( 1 ) ;
uffdio_zeropage . range . start = ( unsigned long ) area_dst + offset ;
uffdio_zeropage . range . len = page_size ;
uffdio_zeropage . mode = 0 ;
ret = ioctl ( ufd , UFFDIO_ZEROPAGE , & uffdio_zeropage ) ;
if ( ret ) {
/* real retval in ufdio_zeropage.zeropage */
if ( has_zeropage ) {
if ( uffdio_zeropage . zeropage = = - EEXIST )
fprintf ( stderr , " UFFDIO_ZEROPAGE -EEXIST \n " ) ,
exit ( 1 ) ;
else
fprintf ( stderr , " UFFDIO_ZEROPAGE error %Ld \n " ,
uffdio_zeropage . zeropage ) , exit ( 1 ) ;
} else {
if ( uffdio_zeropage . zeropage ! = - EINVAL )
fprintf ( stderr ,
" UFFDIO_ZEROPAGE not -EINVAL %Ld \n " ,
uffdio_zeropage . zeropage ) , exit ( 1 ) ;
}
} else if ( has_zeropage ) {
if ( uffdio_zeropage . zeropage ! = page_size ) {
fprintf ( stderr , " UFFDIO_ZEROPAGE unexpected %Ld \n " ,
uffdio_zeropage . zeropage ) , exit ( 1 ) ;
} else
return 1 ;
} else {
fprintf ( stderr ,
" UFFDIO_ZEROPAGE succeeded %Ld \n " ,
uffdio_zeropage . zeropage ) , exit ( 1 ) ;
}
return 0 ;
}
/* exercise UFFDIO_ZEROPAGE */
static int userfaultfd_zeropage_test ( void )
{
struct uffdio_register uffdio_register ;
unsigned long expected_ioctls ;
printf ( " testing UFFDIO_ZEROPAGE: " ) ;
fflush ( stdout ) ;
if ( release_pages ( area_dst ) )
return 1 ;
if ( userfaultfd_open ( 0 ) < 0 )
return 1 ;
uffdio_register . range . start = ( unsigned long ) area_dst ;
uffdio_register . range . len = nr_pages * page_size ;
uffdio_register . mode = UFFDIO_REGISTER_MODE_MISSING ;
if ( ioctl ( uffd , UFFDIO_REGISTER , & uffdio_register ) )
fprintf ( stderr , " register failure \n " ) , exit ( 1 ) ;
expected_ioctls = EXPECTED_IOCTLS ;
if ( ( uffdio_register . ioctls & expected_ioctls ) ! =
expected_ioctls )
fprintf ( stderr ,
" unexpected missing ioctl for anon memory \n " ) ,
exit ( 1 ) ;
if ( uffdio_zeropage ( uffd , 0 ) ) {
if ( my_bcmp ( area_dst , zeropage , page_size ) )
fprintf ( stderr , " zeropage is not zero \n " ) , exit ( 1 ) ;
}
close ( uffd ) ;
printf ( " done. \n " ) ;
return 0 ;
}
2017-02-22 15:44:06 -08:00
static int userfaultfd_events_test ( void )
{
struct uffdio_register uffdio_register ;
unsigned long expected_ioctls ;
unsigned long userfaults ;
pthread_t uffd_mon ;
int err , features ;
pid_t pid ;
char c ;
2017-02-24 14:56:02 -08:00
printf ( " testing events (fork, remap, remove): " ) ;
2017-02-22 15:44:06 -08:00
fflush ( stdout ) ;
if ( release_pages ( area_dst ) )
return 1 ;
features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
2017-02-24 14:56:02 -08:00
UFFD_FEATURE_EVENT_REMOVE ;
2017-02-22 15:44:06 -08:00
if ( userfaultfd_open ( features ) < 0 )
return 1 ;
fcntl ( uffd , F_SETFL , uffd_flags | O_NONBLOCK ) ;
uffdio_register . range . start = ( unsigned long ) area_dst ;
uffdio_register . range . len = nr_pages * page_size ;
uffdio_register . mode = UFFDIO_REGISTER_MODE_MISSING ;
if ( ioctl ( uffd , UFFDIO_REGISTER , & uffdio_register ) )
fprintf ( stderr , " register failure \n " ) , exit ( 1 ) ;
expected_ioctls = EXPECTED_IOCTLS ;
if ( ( uffdio_register . ioctls & expected_ioctls ) ! =
expected_ioctls )
fprintf ( stderr ,
" unexpected missing ioctl for anon memory \n " ) ,
exit ( 1 ) ;
if ( pthread_create ( & uffd_mon , & attr , uffd_poll_thread , NULL ) )
perror ( " uffd_poll_thread create " ) , exit ( 1 ) ;
pid = fork ( ) ;
if ( pid < 0 )
perror ( " fork " ) , exit ( 1 ) ;
if ( ! pid )
return faulting_process ( ) ;
waitpid ( pid , & err , 0 ) ;
if ( err )
fprintf ( stderr , " faulting process failed \n " ) , exit ( 1 ) ;
if ( write ( pipefd [ 1 ] , & c , sizeof ( c ) ) ! = sizeof ( c ) )
perror ( " pipe write " ) , exit ( 1 ) ;
if ( pthread_join ( uffd_mon , ( void * * ) & userfaults ) )
return 1 ;
close ( uffd ) ;
printf ( " userfaults: %ld \n " , userfaults ) ;
return userfaults ! = nr_pages ;
}
2017-02-22 15:44:01 -08:00
static int userfaultfd_stress ( void )
{
void * area ;
char * tmp_area ;
unsigned long nr ;
struct uffdio_register uffdio_register ;
unsigned long cpu ;
int err ;
unsigned long userfaults [ nr_cpus ] ;
allocate_area ( ( void * * ) & area_src ) ;
if ( ! area_src )
return 1 ;
allocate_area ( ( void * * ) & area_dst ) ;
if ( ! area_dst )
return 1 ;
2017-02-22 15:44:06 -08:00
if ( userfaultfd_open ( 0 ) < 0 )
2017-02-22 15:44:01 -08:00
return 1 ;
2015-09-04 15:47:23 -07:00
count_verify = malloc ( nr_pages * sizeof ( unsigned long long ) ) ;
if ( ! count_verify ) {
perror ( " count_verify " ) ;
return 1 ;
}
for ( nr = 0 ; nr < nr_pages ; nr + + ) {
* area_mutex ( area_src , nr ) = ( pthread_mutex_t )
PTHREAD_MUTEX_INITIALIZER ;
count_verify [ nr ] = * area_count ( area_src , nr ) = 1 ;
2015-09-22 14:59:00 -07:00
/*
* In the transition between 255 to 256 , powerpc will
* read out of order in my_bcmp and see both bytes as
* zero , so leave a placeholder below always non - zero
* after the count , to avoid my_bcmp to trigger false
* positives .
*/
* ( area_count ( area_src , nr ) + 1 ) = 1 ;
2015-09-04 15:47:23 -07:00
}
pipefd = malloc ( sizeof ( int ) * nr_cpus * 2 ) ;
if ( ! pipefd ) {
perror ( " pipefd " ) ;
return 1 ;
}
for ( cpu = 0 ; cpu < nr_cpus ; cpu + + ) {
if ( pipe2 ( & pipefd [ cpu * 2 ] , O_CLOEXEC | O_NONBLOCK ) ) {
perror ( " pipe " ) ;
return 1 ;
}
}
if ( posix_memalign ( & area , page_size , page_size ) ) {
fprintf ( stderr , " out of memory \n " ) ;
return 1 ;
}
zeropage = area ;
bzero ( zeropage , page_size ) ;
pthread_mutex_lock ( & uffd_read_mutex ) ;
pthread_attr_init ( & attr ) ;
pthread_attr_setstacksize ( & attr , 16 * 1024 * 1024 ) ;
2015-09-22 14:59:03 -07:00
err = 0 ;
2015-09-04 15:47:23 -07:00
while ( bounces - - ) {
unsigned long expected_ioctls ;
printf ( " bounces: %d, mode: " , bounces ) ;
if ( bounces & BOUNCE_RANDOM )
printf ( " rnd " ) ;
if ( bounces & BOUNCE_RACINGFAULTS )
printf ( " racing " ) ;
if ( bounces & BOUNCE_VERIFY )
printf ( " ver " ) ;
if ( bounces & BOUNCE_POLL )
printf ( " poll " ) ;
printf ( " , " ) ;
fflush ( stdout ) ;
if ( bounces & BOUNCE_POLL )
fcntl ( uffd , F_SETFL , uffd_flags | O_NONBLOCK ) ;
else
fcntl ( uffd , F_SETFL , uffd_flags & ~ O_NONBLOCK ) ;
/* register */
uffdio_register . range . start = ( unsigned long ) area_dst ;
uffdio_register . range . len = nr_pages * page_size ;
uffdio_register . mode = UFFDIO_REGISTER_MODE_MISSING ;
if ( ioctl ( uffd , UFFDIO_REGISTER , & uffdio_register ) ) {
fprintf ( stderr , " register failure \n " ) ;
return 1 ;
}
2017-02-22 15:43:07 -08:00
expected_ioctls = EXPECTED_IOCTLS ;
2015-09-04 15:47:23 -07:00
if ( ( uffdio_register . ioctls & expected_ioctls ) ! =
expected_ioctls ) {
fprintf ( stderr ,
" unexpected missing ioctl for anon memory \n " ) ;
return 1 ;
}
/*
* The madvise done previously isn ' t enough : some
* uffd_thread could have read userfaults ( one of
* those already resolved by the background thread )
* and it may be in the process of calling
* UFFDIO_COPY . UFFDIO_COPY will read the zapped
* area_src and it would map a zero page in it ( of
* course such a UFFDIO_COPY is perfectly safe as it ' d
* return - EEXIST ) . The problem comes at the next
* bounce though : that racing UFFDIO_COPY would
* generate zeropages in the area_src , so invalidating
* the previous MADV_DONTNEED . Without this additional
* MADV_DONTNEED those zeropages leftovers in the
* area_src would lead to - EEXIST failure during the
* next bounce , effectively leaving a zeropage in the
* area_dst .
*
* Try to comment this out madvise to see the memory
* corruption being caught pretty quick .
*
* khugepaged is also inhibited to collapse THP after
* MADV_DONTNEED only after the UFFDIO_REGISTER , so it ' s
* required to MADV_DONTNEED here .
*/
2017-02-22 15:43:07 -08:00
if ( release_pages ( area_dst ) )
2015-09-04 15:47:23 -07:00
return 1 ;
/* bounce pass */
if ( stress ( userfaults ) )
return 1 ;
/* unregister */
if ( ioctl ( uffd , UFFDIO_UNREGISTER , & uffdio_register . range ) ) {
fprintf ( stderr , " register failure \n " ) ;
return 1 ;
}
/* verification */
if ( bounces & BOUNCE_VERIFY ) {
for ( nr = 0 ; nr < nr_pages ; nr + + ) {
if ( * area_count ( area_dst , nr ) ! = count_verify [ nr ] ) {
fprintf ( stderr ,
" error area_count %Lu %Lu %lu \n " ,
* area_count ( area_src , nr ) ,
count_verify [ nr ] ,
nr ) ;
2015-09-22 14:59:03 -07:00
err = 1 ;
2015-09-04 15:47:23 -07:00
bounces = 0 ;
}
}
}
/* prepare next bounce */
tmp_area = area_src ;
area_src = area_dst ;
area_dst = tmp_area ;
printf ( " userfaults: " ) ;
for ( cpu = 0 ; cpu < nr_cpus ; cpu + + )
printf ( " %lu " , userfaults [ cpu ] ) ;
printf ( " \n " ) ;
}
2017-02-22 15:44:06 -08:00
if ( err )
return err ;
close ( uffd ) ;
2017-02-22 15:44:10 -08:00
return userfaultfd_zeropage_test ( ) | | userfaultfd_events_test ( ) ;
2015-09-04 15:47:23 -07:00
}
2017-02-22 15:43:07 -08:00
# ifndef HUGETLB_TEST
2015-09-04 15:47:23 -07:00
int main ( int argc , char * * argv )
{
if ( argc < 3 )
fprintf ( stderr , " Usage: <MiB> <bounces> \n " ) , exit ( 1 ) ;
nr_cpus = sysconf ( _SC_NPROCESSORS_ONLN ) ;
page_size = sysconf ( _SC_PAGE_SIZE ) ;
2015-09-22 14:59:00 -07:00
if ( ( unsigned long ) area_count ( NULL , 0 ) + sizeof ( unsigned long long ) * 2
> page_size )
2015-09-04 15:47:23 -07:00
fprintf ( stderr , " Impossible to run this test \n " ) , exit ( 2 ) ;
nr_pages_per_cpu = atol ( argv [ 1 ] ) * 1024 * 1024 / page_size /
nr_cpus ;
if ( ! nr_pages_per_cpu ) {
fprintf ( stderr , " invalid MiB \n " ) ;
fprintf ( stderr , " Usage: <MiB> <bounces> \n " ) , exit ( 1 ) ;
}
bounces = atoi ( argv [ 2 ] ) ;
if ( bounces < = 0 ) {
fprintf ( stderr , " invalid bounces \n " ) ;
fprintf ( stderr , " Usage: <MiB> <bounces> \n " ) , exit ( 1 ) ;
}
nr_pages = nr_pages_per_cpu * nr_cpus ;
printf ( " nr_pages: %lu, nr_pages_per_cpu: %lu \n " ,
nr_pages , nr_pages_per_cpu ) ;
return userfaultfd_stress ( ) ;
}
2015-09-22 14:58:58 -07:00
2017-02-22 15:43:07 -08:00
# else /* HUGETLB_TEST */
/*
* Copied from mlock2 - tests . c
*/
unsigned long default_huge_page_size ( void )
{
unsigned long hps = 0 ;
char * line = NULL ;
size_t linelen = 0 ;
FILE * f = fopen ( " /proc/meminfo " , " r " ) ;
if ( ! f )
return 0 ;
while ( getline ( & line , & linelen , f ) > 0 ) {
if ( sscanf ( line , " Hugepagesize: %lu kB " , & hps ) = = 1 ) {
hps < < = 10 ;
break ;
}
}
free ( line ) ;
fclose ( f ) ;
return hps ;
}
int main ( int argc , char * * argv )
{
if ( argc < 4 )
fprintf ( stderr , " Usage: <MiB> <bounces> <hugetlbfs_file> \n " ) ,
exit ( 1 ) ;
nr_cpus = sysconf ( _SC_NPROCESSORS_ONLN ) ;
page_size = default_huge_page_size ( ) ;
if ( ! page_size )
fprintf ( stderr , " Unable to determine huge page size \n " ) ,
exit ( 2 ) ;
if ( ( unsigned long ) area_count ( NULL , 0 ) + sizeof ( unsigned long long ) * 2
> page_size )
fprintf ( stderr , " Impossible to run this test \n " ) , exit ( 2 ) ;
nr_pages_per_cpu = atol ( argv [ 1 ] ) * 1024 * 1024 / page_size /
nr_cpus ;
if ( ! nr_pages_per_cpu ) {
fprintf ( stderr , " invalid MiB \n " ) ;
fprintf ( stderr , " Usage: <MiB> <bounces> \n " ) , exit ( 1 ) ;
}
bounces = atoi ( argv [ 2 ] ) ;
if ( bounces < = 0 ) {
fprintf ( stderr , " invalid bounces \n " ) ;
fprintf ( stderr , " Usage: <MiB> <bounces> \n " ) , exit ( 1 ) ;
}
nr_pages = nr_pages_per_cpu * nr_cpus ;
huge_fd = open ( argv [ 3 ] , O_CREAT | O_RDWR , 0755 ) ;
if ( huge_fd < 0 ) {
fprintf ( stderr , " Open of %s failed " , argv [ 3 ] ) ;
perror ( " open " ) ;
exit ( 1 ) ;
}
if ( ftruncate ( huge_fd , 0 ) ) {
fprintf ( stderr , " ftruncate %s to size 0 failed " , argv [ 3 ] ) ;
perror ( " ftruncate " ) ;
exit ( 1 ) ;
}
printf ( " nr_pages: %lu, nr_pages_per_cpu: %lu \n " ,
nr_pages , nr_pages_per_cpu ) ;
return userfaultfd_stress ( ) ;
}
# endif
2015-09-22 14:58:58 -07:00
# else /* __NR_userfaultfd */
# warning "missing __NR_userfaultfd definition"
int main ( void )
{
printf ( " skip: Skipping userfaultfd test (missing __NR_userfaultfd) \n " ) ;
return 0 ;
}
# endif /* __NR_userfaultfd */