2009-02-09 23:05:49 +03:00
/******************************************************************************
* privcmd . c
*
* Interface to privileged domain - 0 commands .
*
* Copyright ( c ) 2002 - 2004 , K A Fraser , B Dragovic
*/
2013-06-28 14:21:41 +04:00
# define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
2009-02-09 23:05:49 +03:00
# include <linux/kernel.h>
2011-12-16 20:34:33 +04:00
# include <linux/module.h>
2009-02-09 23:05:49 +03:00
# include <linux/sched.h>
# include <linux/slab.h>
# include <linux/string.h>
# include <linux/errno.h>
# include <linux/mm.h>
# include <linux/mman.h>
# include <linux/uaccess.h>
# include <linux/swap.h>
# include <linux/highmem.h>
# include <linux/pagemap.h>
# include <linux/seq_file.h>
2011-12-16 20:34:33 +04:00
# include <linux/miscdevice.h>
2017-02-13 20:03:23 +03:00
# include <linux/moduleparam.h>
2009-02-09 23:05:49 +03:00
# include <asm/pgalloc.h>
# include <asm/pgtable.h>
# include <asm/tlb.h>
# include <asm/xen/hypervisor.h>
# include <asm/xen/hypercall.h>
# include <xen/xen.h>
# include <xen/privcmd.h>
# include <xen/interface/xen.h>
2017-02-13 20:03:23 +03:00
# include <xen/interface/hvm/dm_op.h>
2009-02-09 23:05:49 +03:00
# include <xen/features.h>
# include <xen/page.h>
2009-05-21 13:09:46 +04:00
# include <xen/xen-ops.h>
2012-10-18 04:11:21 +04:00
# include <xen/balloon.h>
2009-05-20 18:42:14 +04:00
2011-12-16 20:34:33 +04:00
# include "privcmd.h"
MODULE_LICENSE ( " GPL " ) ;
2012-10-18 04:11:21 +04:00
# define PRIV_VMA_LOCKED ((void *)1)
2017-02-13 20:03:23 +03:00
static unsigned int privcmd_dm_op_max_num = 16 ;
module_param_named ( dm_op_max_nr_bufs , privcmd_dm_op_max_num , uint , 0644 ) ;
MODULE_PARM_DESC ( dm_op_max_nr_bufs ,
" Maximum number of buffers per dm_op hypercall " ) ;
static unsigned int privcmd_dm_op_buf_max_size = 4096 ;
module_param_named ( dm_op_buf_max_size , privcmd_dm_op_buf_max_size , uint ,
0644 ) ;
MODULE_PARM_DESC ( dm_op_buf_max_size ,
" Maximum size of a dm_op hypercall buffer " ) ;
2017-02-13 20:03:24 +03:00
struct privcmd_data {
domid_t domid ;
} ;
2013-08-23 21:10:06 +04:00
static int privcmd_vma_range_is_mapped (
struct vm_area_struct * vma ,
unsigned long addr ,
unsigned long nr_pages ) ;
2009-02-09 23:05:49 +03:00
2017-02-13 20:03:24 +03:00
static long privcmd_ioctl_hypercall ( struct file * file , void __user * udata )
2009-02-09 23:05:49 +03:00
{
2017-02-13 20:03:24 +03:00
struct privcmd_data * data = file - > private_data ;
2009-02-09 23:05:49 +03:00
struct privcmd_hypercall hypercall ;
long ret ;
2017-02-13 20:03:24 +03:00
/* Disallow arbitrary hypercalls if restricted */
if ( data - > domid ! = DOMID_INVALID )
return - EPERM ;
2009-02-09 23:05:49 +03:00
if ( copy_from_user ( & hypercall , udata , sizeof ( hypercall ) ) )
return - EFAULT ;
2015-02-19 18:23:17 +03:00
xen_preemptible_hcall_begin ( ) ;
2009-02-09 23:05:49 +03:00
ret = privcmd_call ( hypercall . op ,
hypercall . arg [ 0 ] , hypercall . arg [ 1 ] ,
hypercall . arg [ 2 ] , hypercall . arg [ 3 ] ,
hypercall . arg [ 4 ] ) ;
2015-02-19 18:23:17 +03:00
xen_preemptible_hcall_end ( ) ;
2009-02-09 23:05:49 +03:00
return ret ;
}
static void free_page_list ( struct list_head * pages )
{
struct page * p , * n ;
list_for_each_entry_safe ( p , n , pages , lru )
__free_page ( p ) ;
INIT_LIST_HEAD ( pages ) ;
}
/*
* Given an array of items in userspace , return a list of pages
* containing the data . If copying fails , either because of memory
* allocation failure or a problem reading user memory , return an
* error code ; its up to the caller to dispose of any partial list .
*/
static int gather_array ( struct list_head * pagelist ,
unsigned nelem , size_t size ,
2012-08-31 17:59:30 +04:00
const void __user * data )
2009-02-09 23:05:49 +03:00
{
unsigned pageidx ;
void * pagedata ;
int ret ;
if ( size > PAGE_SIZE )
return 0 ;
pageidx = PAGE_SIZE ;
pagedata = NULL ; /* quiet, gcc */
while ( nelem - - ) {
if ( pageidx > PAGE_SIZE - size ) {
struct page * page = alloc_page ( GFP_KERNEL ) ;
ret = - ENOMEM ;
if ( page = = NULL )
goto fail ;
pagedata = page_address ( page ) ;
list_add_tail ( & page - > lru , pagelist ) ;
pageidx = 0 ;
}
ret = - EFAULT ;
if ( copy_from_user ( pagedata + pageidx , data , size ) )
goto fail ;
data + = size ;
pageidx + = size ;
}
ret = 0 ;
fail :
return ret ;
}
/*
* Call function " fn " on each element of the array fragmented
* over a list of pages .
*/
static int traverse_pages ( unsigned nelem , size_t size ,
struct list_head * pos ,
int ( * fn ) ( void * data , void * state ) ,
void * state )
{
void * pagedata ;
unsigned pageidx ;
2009-05-20 18:42:14 +04:00
int ret = 0 ;
2009-02-09 23:05:49 +03:00
BUG_ON ( size > PAGE_SIZE ) ;
pageidx = PAGE_SIZE ;
pagedata = NULL ; /* hush, gcc */
while ( nelem - - ) {
if ( pageidx > PAGE_SIZE - size ) {
struct page * page ;
pos = pos - > next ;
page = list_entry ( pos , struct page , lru ) ;
pagedata = page_address ( page ) ;
pageidx = 0 ;
}
ret = ( * fn ) ( pagedata + pageidx , state ) ;
if ( ret )
break ;
pageidx + = size ;
}
return ret ;
}
2015-03-11 17:49:57 +03:00
/*
* Similar to traverse_pages , but use each page as a " block " of
* data to be processed as one unit .
*/
static int traverse_pages_block ( unsigned nelem , size_t size ,
struct list_head * pos ,
int ( * fn ) ( void * data , int nr , void * state ) ,
void * state )
{
void * pagedata ;
unsigned pageidx ;
int ret = 0 ;
BUG_ON ( size > PAGE_SIZE ) ;
pageidx = PAGE_SIZE ;
while ( nelem ) {
int nr = ( PAGE_SIZE / size ) ;
struct page * page ;
if ( nr > nelem )
nr = nelem ;
pos = pos - > next ;
page = list_entry ( pos , struct page , lru ) ;
pagedata = page_address ( page ) ;
ret = ( * fn ) ( pagedata , nr , state ) ;
if ( ret )
break ;
nelem - = nr ;
}
return ret ;
}
2015-08-07 19:34:41 +03:00
struct mmap_gfn_state {
2009-02-09 23:05:49 +03:00
unsigned long va ;
struct vm_area_struct * vma ;
domid_t domain ;
} ;
2015-08-07 19:34:41 +03:00
static int mmap_gfn_range ( void * data , void * state )
2009-02-09 23:05:49 +03:00
{
struct privcmd_mmap_entry * msg = data ;
2015-08-07 19:34:41 +03:00
struct mmap_gfn_state * st = state ;
2009-02-09 23:05:49 +03:00
struct vm_area_struct * vma = st - > vma ;
int rc ;
/* Do not allow range to wrap the address space. */
if ( ( msg - > npages > ( LONG_MAX > > PAGE_SHIFT ) ) | |
( ( unsigned long ) ( msg - > npages < < PAGE_SHIFT ) > = - st - > va ) )
return - EINVAL ;
/* Range chunks must be contiguous in va space. */
if ( ( msg - > va ! = st - > va ) | |
( ( msg - > va + ( msg - > npages < < PAGE_SHIFT ) ) > vma - > vm_end ) )
return - EINVAL ;
2015-08-07 19:34:41 +03:00
rc = xen_remap_domain_gfn_range ( vma ,
2009-05-21 13:09:46 +04:00
msg - > va & PAGE_MASK ,
msg - > mfn , msg - > npages ,
vma - > vm_page_prot ,
2012-10-18 00:37:49 +04:00
st - > domain , NULL ) ;
2009-02-09 23:05:49 +03:00
if ( rc < 0 )
return rc ;
st - > va + = msg - > npages < < PAGE_SHIFT ;
return 0 ;
}
2017-02-13 20:03:24 +03:00
static long privcmd_ioctl_mmap ( struct file * file , void __user * udata )
2009-02-09 23:05:49 +03:00
{
2017-02-13 20:03:24 +03:00
struct privcmd_data * data = file - > private_data ;
2009-02-09 23:05:49 +03:00
struct privcmd_mmap mmapcmd ;
struct mm_struct * mm = current - > mm ;
struct vm_area_struct * vma ;
int rc ;
LIST_HEAD ( pagelist ) ;
2015-08-07 19:34:41 +03:00
struct mmap_gfn_state state ;
2009-02-09 23:05:49 +03:00
2012-10-18 04:11:21 +04:00
/* We only support privcmd_ioctl_mmap_batch for auto translated. */
if ( xen_feature ( XENFEAT_auto_translated_physmap ) )
return - ENOSYS ;
2009-02-09 23:05:49 +03:00
if ( copy_from_user ( & mmapcmd , udata , sizeof ( mmapcmd ) ) )
return - EFAULT ;
2017-02-13 20:03:24 +03:00
/* If restriction is in place, check the domid matches */
if ( data - > domid ! = DOMID_INVALID & & data - > domid ! = mmapcmd . dom )
return - EPERM ;
2009-02-09 23:05:49 +03:00
rc = gather_array ( & pagelist ,
mmapcmd . num , sizeof ( struct privcmd_mmap_entry ) ,
mmapcmd . entry ) ;
if ( rc | | list_empty ( & pagelist ) )
goto out ;
down_write ( & mm - > mmap_sem ) ;
{
struct page * page = list_first_entry ( & pagelist ,
struct page , lru ) ;
struct privcmd_mmap_entry * msg = page_address ( page ) ;
vma = find_vma ( mm , msg - > va ) ;
rc = - EINVAL ;
2013-08-23 21:10:06 +04:00
if ( ! vma | | ( msg - > va ! = vma - > vm_start ) | | vma - > vm_private_data )
2009-02-09 23:05:49 +03:00
goto out_up ;
2013-08-23 21:10:06 +04:00
vma - > vm_private_data = PRIV_VMA_LOCKED ;
2009-02-09 23:05:49 +03:00
}
state . va = vma - > vm_start ;
state . vma = vma ;
state . domain = mmapcmd . dom ;
rc = traverse_pages ( mmapcmd . num , sizeof ( struct privcmd_mmap_entry ) ,
& pagelist ,
2015-08-07 19:34:41 +03:00
mmap_gfn_range , & state ) ;
2009-02-09 23:05:49 +03:00
out_up :
up_write ( & mm - > mmap_sem ) ;
out :
free_page_list ( & pagelist ) ;
return rc ;
}
struct mmap_batch_state {
domid_t domain ;
unsigned long va ;
struct vm_area_struct * vma ;
2012-10-18 04:11:21 +04:00
int index ;
2012-08-31 17:59:30 +04:00
/* A tristate:
* 0 for no errors
* 1 if at least one error has happened ( and no
* - ENOENT errors have happened )
* - ENOENT if at least 1 - ENOENT has happened .
*/
int global_error ;
2013-01-15 07:35:40 +04:00
int version ;
2012-08-31 17:59:30 +04:00
2015-08-07 19:34:41 +03:00
/* User-space gfn array to store errors in the second pass for V1. */
xen_pfn_t __user * user_gfn ;
2013-01-15 07:35:40 +04:00
/* User-space int array to store errors in the second pass for V2. */
int __user * user_err ;
2009-02-09 23:05:49 +03:00
} ;
2015-08-07 19:34:41 +03:00
/* auto translated dom0 note: if domU being created is PV, then gfn is
* mfn ( addr on bus ) . If it ' s auto xlated , then gfn is pfn ( input to HAP ) .
2012-10-18 04:11:21 +04:00
*/
2015-03-11 17:49:57 +03:00
static int mmap_batch_fn ( void * data , int nr , void * state )
2009-02-09 23:05:49 +03:00
{
2015-08-07 19:34:41 +03:00
xen_pfn_t * gfnp = data ;
2009-02-09 23:05:49 +03:00
struct mmap_batch_state * st = state ;
2012-10-18 04:11:21 +04:00
struct vm_area_struct * vma = st - > vma ;
struct page * * pages = vma - > vm_private_data ;
2015-03-11 17:49:57 +03:00
struct page * * cur_pages = NULL ;
2012-08-31 17:59:30 +04:00
int ret ;
2012-10-18 04:11:21 +04:00
if ( xen_feature ( XENFEAT_auto_translated_physmap ) )
2015-03-11 17:49:57 +03:00
cur_pages = & pages [ st - > index ] ;
2012-10-18 04:11:21 +04:00
2015-03-11 17:49:57 +03:00
BUG_ON ( nr < 0 ) ;
2015-08-07 19:34:41 +03:00
ret = xen_remap_domain_gfn_array ( st - > vma , st - > va & PAGE_MASK , gfnp , nr ,
( int * ) gfnp , st - > vma - > vm_page_prot ,
2015-03-11 17:49:57 +03:00
st - > domain , cur_pages ) ;
2009-02-09 23:05:49 +03:00
2015-03-11 17:49:57 +03:00
/* Adjust the global_error? */
if ( ret ! = nr ) {
2012-08-31 17:59:30 +04:00
if ( ret = = - ENOENT )
st - > global_error = - ENOENT ;
else {
/* Record that at least one error has happened. */
if ( st - > global_error = = 0 )
st - > global_error = 1 ;
}
2009-02-09 23:05:49 +03:00
}
2015-03-11 17:49:57 +03:00
st - > va + = PAGE_SIZE * nr ;
st - > index + = nr ;
2009-02-09 23:05:49 +03:00
return 0 ;
}
2015-03-11 17:49:57 +03:00
static int mmap_return_error ( int err , struct mmap_batch_state * st )
2009-02-09 23:05:49 +03:00
{
2015-03-11 17:49:57 +03:00
int ret ;
2012-08-31 17:59:30 +04:00
2013-01-15 07:35:40 +04:00
if ( st - > version = = 1 ) {
2015-03-11 17:49:57 +03:00
if ( err ) {
2015-08-07 19:34:41 +03:00
xen_pfn_t gfn ;
2015-03-11 17:49:57 +03:00
2015-08-07 19:34:41 +03:00
ret = get_user ( gfn , st - > user_gfn ) ;
2015-03-11 17:49:57 +03:00
if ( ret < 0 )
return ret ;
/*
* V1 encodes the error codes in the 32 bit top
2015-08-07 19:34:41 +03:00
* nibble of the gfn ( with its known
2015-03-11 17:49:57 +03:00
* limitations vis - a - vis 64 bit callers ) .
*/
2015-08-07 19:34:41 +03:00
gfn | = ( err = = - ENOENT ) ?
2015-03-11 17:49:57 +03:00
PRIVCMD_MMAPBATCH_PAGED_ERROR :
PRIVCMD_MMAPBATCH_MFN_ERROR ;
2015-08-07 19:34:41 +03:00
return __put_user ( gfn , st - > user_gfn + + ) ;
2015-03-11 17:49:57 +03:00
} else
2015-08-07 19:34:41 +03:00
st - > user_gfn + + ;
2013-01-15 07:35:40 +04:00
} else { /* st->version == 2 */
if ( err )
return __put_user ( err , st - > user_err + + ) ;
else
st - > user_err + + ;
}
return 0 ;
2009-02-09 23:05:49 +03:00
}
2015-03-11 17:49:57 +03:00
static int mmap_return_errors ( void * data , int nr , void * state )
{
struct mmap_batch_state * st = state ;
int * errs = data ;
int i ;
int ret ;
for ( i = 0 ; i < nr ; i + + ) {
ret = mmap_return_error ( errs [ i ] , st ) ;
if ( ret < 0 )
return ret ;
}
return 0 ;
}
2015-08-07 19:34:41 +03:00
/* Allocate pfns that are then mapped with gfns from foreign domid. Update
2012-10-18 04:11:21 +04:00
* the vma with the page info to use later .
* Returns : 0 if success , otherwise - errno
*/
static int alloc_empty_pages ( struct vm_area_struct * vma , int numpgs )
{
int rc ;
struct page * * pages ;
pages = kcalloc ( numpgs , sizeof ( pages [ 0 ] ) , GFP_KERNEL ) ;
if ( pages = = NULL )
return - ENOMEM ;
2015-06-25 15:12:46 +03:00
rc = alloc_xenballooned_pages ( numpgs , pages ) ;
2012-10-18 04:11:21 +04:00
if ( rc ! = 0 ) {
pr_warn ( " %s Could not alloc %d pfns rc:%d \n " , __func__ ,
numpgs , rc ) ;
kfree ( pages ) ;
return - ENOMEM ;
}
2013-08-23 21:10:06 +04:00
BUG_ON ( vma - > vm_private_data ! = NULL ) ;
2012-10-18 04:11:21 +04:00
vma - > vm_private_data = pages ;
return 0 ;
}
2015-09-10 01:39:26 +03:00
static const struct vm_operations_struct privcmd_vm_ops ;
2009-03-08 14:10:00 +03:00
2017-02-13 20:03:24 +03:00
static long privcmd_ioctl_mmap_batch (
struct file * file , void __user * udata , int version )
2009-02-09 23:05:49 +03:00
{
2017-02-13 20:03:24 +03:00
struct privcmd_data * data = file - > private_data ;
2009-02-09 23:05:49 +03:00
int ret ;
2012-08-31 17:59:30 +04:00
struct privcmd_mmapbatch_v2 m ;
2009-02-09 23:05:49 +03:00
struct mm_struct * mm = current - > mm ;
struct vm_area_struct * vma ;
unsigned long nr_pages ;
LIST_HEAD ( pagelist ) ;
struct mmap_batch_state state ;
2012-08-31 17:59:30 +04:00
switch ( version ) {
case 1 :
if ( copy_from_user ( & m , udata , sizeof ( struct privcmd_mmapbatch ) ) )
return - EFAULT ;
/* Returns per-frame error in m.arr. */
m . err = NULL ;
if ( ! access_ok ( VERIFY_WRITE , m . arr , m . num * sizeof ( * m . arr ) ) )
return - EFAULT ;
break ;
case 2 :
if ( copy_from_user ( & m , udata , sizeof ( struct privcmd_mmapbatch_v2 ) ) )
return - EFAULT ;
/* Returns per-frame error code in m.err. */
if ( ! access_ok ( VERIFY_WRITE , m . err , m . num * ( sizeof ( * m . err ) ) ) )
return - EFAULT ;
break ;
default :
return - EINVAL ;
}
2009-02-09 23:05:49 +03:00
2017-02-13 20:03:24 +03:00
/* If restriction is in place, check the domid matches */
if ( data - > domid ! = DOMID_INVALID & & data - > domid ! = m . dom )
return - EPERM ;
2015-05-05 18:54:12 +03:00
nr_pages = DIV_ROUND_UP ( m . num , XEN_PFN_PER_PAGE ) ;
2009-02-09 23:05:49 +03:00
if ( ( m . num < = 0 ) | | ( nr_pages > ( LONG_MAX > > PAGE_SHIFT ) ) )
return - EINVAL ;
2012-08-31 17:59:30 +04:00
ret = gather_array ( & pagelist , m . num , sizeof ( xen_pfn_t ) , m . arr ) ;
2009-02-09 23:05:49 +03:00
2012-08-31 17:59:30 +04:00
if ( ret )
2009-02-09 23:05:49 +03:00
goto out ;
2012-08-31 17:59:30 +04:00
if ( list_empty ( & pagelist ) ) {
ret = - EINVAL ;
goto out ;
}
2013-01-15 07:35:40 +04:00
if ( version = = 2 ) {
/* Zero error array now to only copy back actual errors. */
if ( clear_user ( m . err , sizeof ( int ) * m . num ) ) {
ret = - EFAULT ;
goto out ;
}
2012-08-31 17:59:30 +04:00
}
2009-02-09 23:05:49 +03:00
down_write ( & mm - > mmap_sem ) ;
vma = find_vma ( mm , m . addr ) ;
if ( ! vma | |
2013-08-23 21:10:06 +04:00
vma - > vm_ops ! = & privcmd_vm_ops ) {
2012-11-16 22:36:49 +04:00
ret = - EINVAL ;
2013-08-23 21:10:06 +04:00
goto out_unlock ;
2009-02-09 23:05:49 +03:00
}
2013-08-23 21:10:06 +04:00
/*
* Caller must either :
*
* Map the whole VMA range , which will also allocate all the
* pages required for the auto_translated_physmap case .
*
* Or
*
* Map unmapped holes left from a previous map attempt ( e . g . ,
* because those foreign frames were previously paged out ) .
*/
if ( vma - > vm_private_data = = NULL ) {
if ( m . addr ! = vma - > vm_start | |
m . addr + ( nr_pages < < PAGE_SHIFT ) ! = vma - > vm_end ) {
ret = - EINVAL ;
goto out_unlock ;
}
if ( xen_feature ( XENFEAT_auto_translated_physmap ) ) {
2015-05-05 18:54:12 +03:00
ret = alloc_empty_pages ( vma , nr_pages ) ;
2013-08-23 21:10:06 +04:00
if ( ret < 0 )
goto out_unlock ;
} else
vma - > vm_private_data = PRIV_VMA_LOCKED ;
} else {
if ( m . addr < vma - > vm_start | |
m . addr + ( nr_pages < < PAGE_SHIFT ) > vma - > vm_end ) {
ret = - EINVAL ;
goto out_unlock ;
}
if ( privcmd_vma_range_is_mapped ( vma , m . addr , nr_pages ) ) {
ret = - EINVAL ;
goto out_unlock ;
2012-10-18 04:11:21 +04:00
}
}
2009-02-09 23:05:49 +03:00
2012-08-31 17:59:30 +04:00
state . domain = m . dom ;
state . vma = vma ;
state . va = m . addr ;
2012-10-18 04:11:21 +04:00
state . index = 0 ;
2012-08-31 17:59:30 +04:00
state . global_error = 0 ;
2013-01-15 07:35:40 +04:00
state . version = version ;
2009-02-09 23:05:49 +03:00
2015-05-05 18:54:12 +03:00
BUILD_BUG_ON ( ( ( PAGE_SIZE / sizeof ( xen_pfn_t ) ) % XEN_PFN_PER_PAGE ) ! = 0 ) ;
2012-08-31 17:59:30 +04:00
/* mmap_batch_fn guarantees ret == 0 */
2015-03-11 17:49:57 +03:00
BUG_ON ( traverse_pages_block ( m . num , sizeof ( xen_pfn_t ) ,
& pagelist , mmap_batch_fn , & state ) ) ;
2009-02-09 23:05:49 +03:00
up_write ( & mm - > mmap_sem ) ;
2013-01-15 07:35:40 +04:00
if ( state . global_error ) {
/* Write back errors in second pass. */
2015-08-07 19:34:41 +03:00
state . user_gfn = ( xen_pfn_t * ) m . arr ;
2013-01-15 07:35:40 +04:00
state . user_err = m . err ;
2015-03-11 17:49:57 +03:00
ret = traverse_pages_block ( m . num , sizeof ( xen_pfn_t ) ,
& pagelist , mmap_return_errors , & state ) ;
2013-01-15 07:35:40 +04:00
} else
ret = 0 ;
2012-08-31 17:59:30 +04:00
/* If we have not had any EFAULT-like global errors then set the global
* error to - ENOENT if necessary . */
if ( ( ret = = 0 ) & & ( state . global_error = = - ENOENT ) )
ret = - ENOENT ;
2009-02-09 23:05:49 +03:00
out :
free_page_list ( & pagelist ) ;
return ret ;
2013-08-23 21:10:06 +04:00
out_unlock :
up_write ( & mm - > mmap_sem ) ;
goto out ;
2009-02-09 23:05:49 +03:00
}
2017-02-13 20:03:23 +03:00
static int lock_pages (
struct privcmd_dm_op_buf kbufs [ ] , unsigned int num ,
struct page * pages [ ] , unsigned int nr_pages )
{
unsigned int i ;
for ( i = 0 ; i < num ; i + + ) {
unsigned int requested ;
int pinned ;
requested = DIV_ROUND_UP (
offset_in_page ( kbufs [ i ] . uptr ) + kbufs [ i ] . size ,
PAGE_SIZE ) ;
if ( requested > nr_pages )
return - ENOSPC ;
pinned = get_user_pages_fast (
( unsigned long ) kbufs [ i ] . uptr ,
requested , FOLL_WRITE , pages ) ;
if ( pinned < 0 )
return pinned ;
nr_pages - = pinned ;
pages + = pinned ;
}
return 0 ;
}
static void unlock_pages ( struct page * pages [ ] , unsigned int nr_pages )
{
unsigned int i ;
if ( ! pages )
return ;
for ( i = 0 ; i < nr_pages ; i + + ) {
if ( pages [ i ] )
put_page ( pages [ i ] ) ;
}
}
2017-02-13 20:03:24 +03:00
static long privcmd_ioctl_dm_op ( struct file * file , void __user * udata )
2017-02-13 20:03:23 +03:00
{
2017-02-13 20:03:24 +03:00
struct privcmd_data * data = file - > private_data ;
2017-02-13 20:03:23 +03:00
struct privcmd_dm_op kdata ;
struct privcmd_dm_op_buf * kbufs ;
unsigned int nr_pages = 0 ;
struct page * * pages = NULL ;
struct xen_dm_op_buf * xbufs = NULL ;
unsigned int i ;
long rc ;
if ( copy_from_user ( & kdata , udata , sizeof ( kdata ) ) )
return - EFAULT ;
2017-02-13 20:03:24 +03:00
/* If restriction is in place, check the domid matches */
if ( data - > domid ! = DOMID_INVALID & & data - > domid ! = kdata . dom )
return - EPERM ;
2017-02-13 20:03:23 +03:00
if ( kdata . num = = 0 )
return 0 ;
if ( kdata . num > privcmd_dm_op_max_num )
return - E2BIG ;
kbufs = kcalloc ( kdata . num , sizeof ( * kbufs ) , GFP_KERNEL ) ;
if ( ! kbufs )
return - ENOMEM ;
if ( copy_from_user ( kbufs , kdata . ubufs ,
sizeof ( * kbufs ) * kdata . num ) ) {
rc = - EFAULT ;
goto out ;
}
for ( i = 0 ; i < kdata . num ; i + + ) {
if ( kbufs [ i ] . size > privcmd_dm_op_buf_max_size ) {
rc = - E2BIG ;
goto out ;
}
if ( ! access_ok ( VERIFY_WRITE , kbufs [ i ] . uptr ,
kbufs [ i ] . size ) ) {
rc = - EFAULT ;
goto out ;
}
nr_pages + = DIV_ROUND_UP (
offset_in_page ( kbufs [ i ] . uptr ) + kbufs [ i ] . size ,
PAGE_SIZE ) ;
}
pages = kcalloc ( nr_pages , sizeof ( * pages ) , GFP_KERNEL ) ;
if ( ! pages ) {
rc = - ENOMEM ;
goto out ;
}
xbufs = kcalloc ( kdata . num , sizeof ( * xbufs ) , GFP_KERNEL ) ;
if ( ! xbufs ) {
rc = - ENOMEM ;
goto out ;
}
rc = lock_pages ( kbufs , kdata . num , pages , nr_pages ) ;
if ( rc )
goto out ;
for ( i = 0 ; i < kdata . num ; i + + ) {
set_xen_guest_handle ( xbufs [ i ] . h , kbufs [ i ] . uptr ) ;
xbufs [ i ] . size = kbufs [ i ] . size ;
}
xen_preemptible_hcall_begin ( ) ;
rc = HYPERVISOR_dm_op ( kdata . dom , kdata . num , xbufs ) ;
xen_preemptible_hcall_end ( ) ;
out :
unlock_pages ( pages , nr_pages ) ;
kfree ( xbufs ) ;
kfree ( pages ) ;
kfree ( kbufs ) ;
return rc ;
}
2017-02-13 20:03:24 +03:00
static long privcmd_ioctl_restrict ( struct file * file , void __user * udata )
{
struct privcmd_data * data = file - > private_data ;
domid_t dom ;
if ( copy_from_user ( & dom , udata , sizeof ( dom ) ) )
return - EFAULT ;
/* Set restriction to the specified domain, or check it matches */
if ( data - > domid = = DOMID_INVALID )
data - > domid = dom ;
else if ( data - > domid ! = dom )
return - EINVAL ;
return 0 ;
}
2009-02-09 23:05:49 +03:00
static long privcmd_ioctl ( struct file * file ,
unsigned int cmd , unsigned long data )
{
2017-02-13 20:03:22 +03:00
int ret = - ENOTTY ;
2009-02-09 23:05:49 +03:00
void __user * udata = ( void __user * ) data ;
switch ( cmd ) {
case IOCTL_PRIVCMD_HYPERCALL :
2017-02-13 20:03:24 +03:00
ret = privcmd_ioctl_hypercall ( file , udata ) ;
2009-02-09 23:05:49 +03:00
break ;
case IOCTL_PRIVCMD_MMAP :
2017-02-13 20:03:24 +03:00
ret = privcmd_ioctl_mmap ( file , udata ) ;
2009-02-09 23:05:49 +03:00
break ;
case IOCTL_PRIVCMD_MMAPBATCH :
2017-02-13 20:03:24 +03:00
ret = privcmd_ioctl_mmap_batch ( file , udata , 1 ) ;
2012-08-31 17:59:30 +04:00
break ;
case IOCTL_PRIVCMD_MMAPBATCH_V2 :
2017-02-13 20:03:24 +03:00
ret = privcmd_ioctl_mmap_batch ( file , udata , 2 ) ;
2009-02-09 23:05:49 +03:00
break ;
2017-02-13 20:03:23 +03:00
case IOCTL_PRIVCMD_DM_OP :
2017-02-13 20:03:24 +03:00
ret = privcmd_ioctl_dm_op ( file , udata ) ;
break ;
case IOCTL_PRIVCMD_RESTRICT :
ret = privcmd_ioctl_restrict ( file , udata ) ;
2017-02-13 20:03:23 +03:00
break ;
2009-02-09 23:05:49 +03:00
default :
break ;
}
return ret ;
}
2017-02-13 20:03:24 +03:00
static int privcmd_open ( struct inode * ino , struct file * file )
{
struct privcmd_data * data = kzalloc ( sizeof ( * data ) , GFP_KERNEL ) ;
if ( ! data )
return - ENOMEM ;
/* DOMID_INVALID implies no restriction */
data - > domid = DOMID_INVALID ;
file - > private_data = data ;
return 0 ;
}
static int privcmd_release ( struct inode * ino , struct file * file )
{
struct privcmd_data * data = file - > private_data ;
kfree ( data ) ;
return 0 ;
}
2012-10-18 04:11:21 +04:00
static void privcmd_close ( struct vm_area_struct * vma )
{
struct page * * pages = vma - > vm_private_data ;
2016-05-24 03:04:32 +03:00
int numpgs = vma_pages ( vma ) ;
2015-05-05 18:54:12 +03:00
int numgfns = ( vma - > vm_end - vma - > vm_start ) > > XEN_PAGE_SHIFT ;
2013-12-06 21:55:56 +04:00
int rc ;
2012-10-18 04:11:21 +04:00
2012-11-05 10:42:17 +04:00
if ( ! xen_feature ( XENFEAT_auto_translated_physmap ) | | ! numpgs | | ! pages )
2012-10-18 04:11:21 +04:00
return ;
2015-05-05 18:54:12 +03:00
rc = xen_unmap_domain_gfn_range ( vma , numgfns , pages ) ;
2013-12-06 21:55:56 +04:00
if ( rc = = 0 )
free_xenballooned_pages ( numpgs , pages ) ;
else
pr_crit ( " unable to unmap MFN range: leaking %d pages. rc=%d \n " ,
numpgs , rc ) ;
2012-10-18 04:11:21 +04:00
kfree ( pages ) ;
}
2009-02-09 23:05:49 +03:00
static int privcmd_fault ( struct vm_area_struct * vma , struct vm_fault * vmf )
{
2009-03-06 20:56:59 +03:00
printk ( KERN_DEBUG " privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p \n " ,
vma , vma - > vm_start , vma - > vm_end ,
2016-12-15 02:07:01 +03:00
vmf - > pgoff , ( void * ) vmf - > address ) ;
2009-03-06 20:56:59 +03:00
2009-02-09 23:05:49 +03:00
return VM_FAULT_SIGBUS ;
}
2015-09-10 01:39:26 +03:00
static const struct vm_operations_struct privcmd_vm_ops = {
2012-10-18 04:11:21 +04:00
. close = privcmd_close ,
2009-02-09 23:05:49 +03:00
. fault = privcmd_fault
} ;
static int privcmd_mmap ( struct file * file , struct vm_area_struct * vma )
{
2010-11-11 23:37:43 +03:00
/* DONTCOPY is essential for Xen because copy_page_range doesn't know
* how to recreate these mappings */
mm: kill vma flag VM_RESERVED and mm->reserved_vm counter
A long time ago, in v2.4, VM_RESERVED kept swapout process off VMA,
currently it lost original meaning but still has some effects:
| effect | alternative flags
-+------------------------+---------------------------------------------
1| account as reserved_vm | VM_IO
2| skip in core dump | VM_IO, VM_DONTDUMP
3| do not merge or expand | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP
4| do not mlock | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP
This patch removes reserved_vm counter from mm_struct. Seems like nobody
cares about it, it does not exported into userspace directly, it only
reduces total_vm showed in proc.
Thus VM_RESERVED can be replaced with VM_IO or pair VM_DONTEXPAND | VM_DONTDUMP.
remap_pfn_range() and io_remap_pfn_range() set VM_IO|VM_DONTEXPAND|VM_DONTDUMP.
remap_vmalloc_range() set VM_DONTEXPAND | VM_DONTDUMP.
[akpm@linux-foundation.org: drivers/vfio/pci/vfio_pci.c fixup]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Venkatesh Pallipadi <venki@google.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-10-09 03:29:02 +04:00
vma - > vm_flags | = VM_IO | VM_PFNMAP | VM_DONTCOPY |
VM_DONTEXPAND | VM_DONTDUMP ;
2009-02-09 23:05:49 +03:00
vma - > vm_ops = & privcmd_vm_ops ;
vma - > vm_private_data = NULL ;
return 0 ;
}
2013-08-23 21:10:06 +04:00
/*
* For MMAPBATCH * . This allows asserting the singleshot mapping
* on a per pfn / pte basis . Mapping calls that fail with ENOENT
* can be then retried until success .
*/
static int is_mapped_fn ( pte_t * pte , struct page * pmd_page ,
unsigned long addr , void * data )
{
return pte_none ( * pte ) ? 0 : - EBUSY ;
}
static int privcmd_vma_range_is_mapped (
struct vm_area_struct * vma ,
unsigned long addr ,
unsigned long nr_pages )
2009-02-09 23:05:49 +03:00
{
2013-08-23 21:10:06 +04:00
return apply_to_page_range ( vma - > vm_mm , addr , nr_pages < < PAGE_SHIFT ,
is_mapped_fn , NULL ) ! = 0 ;
2009-02-09 23:05:49 +03:00
}
2011-12-16 20:34:33 +04:00
const struct file_operations xen_privcmd_fops = {
. owner = THIS_MODULE ,
2009-02-09 23:05:49 +03:00
. unlocked_ioctl = privcmd_ioctl ,
2017-02-13 20:03:24 +03:00
. open = privcmd_open ,
. release = privcmd_release ,
2009-02-09 23:05:49 +03:00
. mmap = privcmd_mmap ,
} ;
2011-12-16 20:34:33 +04:00
EXPORT_SYMBOL_GPL ( xen_privcmd_fops ) ;
static struct miscdevice privcmd_dev = {
. minor = MISC_DYNAMIC_MINOR ,
. name = " xen/privcmd " ,
. fops = & xen_privcmd_fops ,
} ;
static int __init privcmd_init ( void )
{
int err ;
if ( ! xen_domain ( ) )
return - ENODEV ;
err = misc_register ( & privcmd_dev ) ;
if ( err ! = 0 ) {
2013-06-28 14:21:41 +04:00
pr_err ( " Could not register Xen privcmd device \n " ) ;
2011-12-16 20:34:33 +04:00
return err ;
}
return 0 ;
}
static void __exit privcmd_exit ( void )
{
misc_deregister ( & privcmd_dev ) ;
}
module_init ( privcmd_init ) ;
module_exit ( privcmd_exit ) ;