2020-03-26 17:01:23 +03:00
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright ( C ) 2018 - 2020 Intel Corporation .
* Copyright ( C ) 2020 Red Hat , Inc .
*
* Author : Tiwei Bie < tiwei . bie @ intel . com >
* Jason Wang < jasowang @ redhat . com >
*
* Thanks Michael S . Tsirkin for the valuable comments and
* suggestions . And thanks to Cunming Liang and Zhihong Wang for all
* their supports .
*/
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/cdev.h>
# include <linux/device.h>
2020-05-29 11:03:01 +03:00
# include <linux/mm.h>
2020-03-26 17:01:23 +03:00
# include <linux/iommu.h>
# include <linux/uuid.h>
# include <linux/vdpa.h>
# include <linux/nospec.h>
# include <linux/vhost.h>
# include <linux/virtio_net.h>
# include "vhost.h"
2020-08-04 19:20:39 +03:00
enum {
2020-08-04 19:20:40 +03:00
VHOST_VDPA_BACKEND_FEATURES =
( 1ULL < < VHOST_BACKEND_F_IOTLB_MSG_V2 ) |
( 1ULL < < VHOST_BACKEND_F_IOTLB_BATCH ) ,
2020-08-04 19:20:39 +03:00
} ;
2020-03-26 17:01:23 +03:00
# define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
struct vhost_vdpa {
struct vhost_dev vdev ;
struct iommu_domain * domain ;
struct vhost_virtqueue * vqs ;
struct completion completion ;
struct vdpa_device * vdpa ;
struct device dev ;
struct cdev cdev ;
atomic_t opened ;
int nvqs ;
int virtio_id ;
int minor ;
2020-06-05 13:27:13 +03:00
struct eventfd_ctx * config_ctx ;
2020-08-04 19:20:40 +03:00
int in_batch ;
2020-10-23 12:00:42 +03:00
struct vdpa_iova_range range ;
2020-03-26 17:01:23 +03:00
} ;
static DEFINE_IDA ( vhost_vdpa_ida ) ;
static dev_t vhost_vdpa_major ;
static void handle_vq_kick ( struct vhost_work * work )
{
struct vhost_virtqueue * vq = container_of ( work , struct vhost_virtqueue ,
poll . work ) ;
struct vhost_vdpa * v = container_of ( vq - > dev , struct vhost_vdpa , vdev ) ;
const struct vdpa_config_ops * ops = v - > vdpa - > config ;
ops - > kick_vq ( v - > vdpa , vq - v - > vqs ) ;
}
static irqreturn_t vhost_vdpa_virtqueue_cb ( void * private )
{
struct vhost_virtqueue * vq = private ;
2020-07-31 09:55:28 +03:00
struct eventfd_ctx * call_ctx = vq - > call_ctx . ctx ;
2020-03-26 17:01:23 +03:00
if ( call_ctx )
eventfd_signal ( call_ctx , 1 ) ;
return IRQ_HANDLED ;
}
2020-06-05 13:27:13 +03:00
static irqreturn_t vhost_vdpa_config_cb ( void * private )
{
struct vhost_vdpa * v = private ;
struct eventfd_ctx * config_ctx = v - > config_ctx ;
if ( config_ctx )
eventfd_signal ( config_ctx , 1 ) ;
return IRQ_HANDLED ;
}
2020-07-31 09:55:31 +03:00
static void vhost_vdpa_setup_vq_irq ( struct vhost_vdpa * v , u16 qid )
{
struct vhost_virtqueue * vq = & v - > vqs [ qid ] ;
const struct vdpa_config_ops * ops = v - > vdpa - > config ;
struct vdpa_device * vdpa = v - > vdpa ;
int ret , irq ;
if ( ! ops - > get_vq_irq )
return ;
irq = ops - > get_vq_irq ( vdpa , qid ) ;
irq_bypass_unregister_producer ( & vq - > call_ctx . producer ) ;
2020-09-09 09:52:34 +03:00
if ( ! vq - > call_ctx . ctx | | irq < 0 )
2020-07-31 09:55:31 +03:00
return ;
vq - > call_ctx . producer . token = vq - > call_ctx . ctx ;
vq - > call_ctx . producer . irq = irq ;
ret = irq_bypass_register_producer ( & vq - > call_ctx . producer ) ;
2020-10-23 13:40:46 +03:00
if ( unlikely ( ret ) )
dev_info ( & v - > dev , " vq %u, irq bypass producer (token %p) registration fails, ret = %d \n " ,
qid , vq - > call_ctx . producer . token , ret ) ;
2020-07-31 09:55:31 +03:00
}
static void vhost_vdpa_unsetup_vq_irq ( struct vhost_vdpa * v , u16 qid )
{
struct vhost_virtqueue * vq = & v - > vqs [ qid ] ;
irq_bypass_unregister_producer ( & vq - > call_ctx . producer ) ;
}
2020-03-26 17:01:23 +03:00
static void vhost_vdpa_reset ( struct vhost_vdpa * v )
{
struct vdpa_device * vdpa = v - > vdpa ;
2020-07-27 17:58:18 +03:00
vdpa_reset ( vdpa ) ;
2020-08-04 19:20:40 +03:00
v - > in_batch = 0 ;
2020-03-26 17:01:23 +03:00
}
static long vhost_vdpa_get_device_id ( struct vhost_vdpa * v , u8 __user * argp )
{
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
u32 device_id ;
device_id = ops - > get_device_id ( vdpa ) ;
if ( copy_to_user ( argp , & device_id , sizeof ( device_id ) ) )
return - EFAULT ;
return 0 ;
}
static long vhost_vdpa_get_status ( struct vhost_vdpa * v , u8 __user * statusp )
{
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
u8 status ;
status = ops - > get_status ( vdpa ) ;
if ( copy_to_user ( statusp , & status , sizeof ( status ) ) )
return - EFAULT ;
return 0 ;
}
static long vhost_vdpa_set_status ( struct vhost_vdpa * v , u8 __user * statusp )
{
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
2020-07-31 09:55:31 +03:00
u8 status , status_old ;
int nvqs = v - > nvqs ;
u16 i ;
2020-03-26 17:01:23 +03:00
if ( copy_from_user ( & status , statusp , sizeof ( status ) ) )
return - EFAULT ;
2020-07-31 09:55:31 +03:00
status_old = ops - > get_status ( vdpa ) ;
2020-03-26 17:01:23 +03:00
/*
* Userspace shouldn ' t remove status bits unless reset the
* status to 0.
*/
if ( status ! = 0 & & ( ops - > get_status ( vdpa ) & ~ status ) ! = 0 )
return - EINVAL ;
ops - > set_status ( vdpa , status ) ;
2020-07-31 09:55:31 +03:00
if ( ( status & VIRTIO_CONFIG_S_DRIVER_OK ) & & ! ( status_old & VIRTIO_CONFIG_S_DRIVER_OK ) )
for ( i = 0 ; i < nvqs ; i + + )
vhost_vdpa_setup_vq_irq ( v , i ) ;
if ( ( status_old & VIRTIO_CONFIG_S_DRIVER_OK ) & & ! ( status & VIRTIO_CONFIG_S_DRIVER_OK ) )
for ( i = 0 ; i < nvqs ; i + + )
vhost_vdpa_unsetup_vq_irq ( v , i ) ;
2020-03-26 17:01:23 +03:00
return 0 ;
}
static int vhost_vdpa_config_validate ( struct vhost_vdpa * v ,
struct vhost_vdpa_config * c )
{
long size = 0 ;
switch ( v - > virtio_id ) {
case VIRTIO_ID_NET :
size = sizeof ( struct virtio_net_config ) ;
break ;
}
if ( c - > len = = 0 )
return - EINVAL ;
if ( c - > len > size - c - > off )
return - E2BIG ;
return 0 ;
}
static long vhost_vdpa_get_config ( struct vhost_vdpa * v ,
struct vhost_vdpa_config __user * c )
{
struct vdpa_device * vdpa = v - > vdpa ;
struct vhost_vdpa_config config ;
unsigned long size = offsetof ( struct vhost_vdpa_config , buf ) ;
u8 * buf ;
if ( copy_from_user ( & config , c , size ) )
return - EFAULT ;
if ( vhost_vdpa_config_validate ( v , & config ) )
return - EINVAL ;
buf = kvzalloc ( config . len , GFP_KERNEL ) ;
if ( ! buf )
return - ENOMEM ;
2020-07-27 17:58:18 +03:00
vdpa_get_config ( vdpa , config . off , buf , config . len ) ;
2020-03-26 17:01:23 +03:00
if ( copy_to_user ( c - > buf , buf , config . len ) ) {
kvfree ( buf ) ;
return - EFAULT ;
}
kvfree ( buf ) ;
return 0 ;
}
static long vhost_vdpa_set_config ( struct vhost_vdpa * v ,
struct vhost_vdpa_config __user * c )
{
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
struct vhost_vdpa_config config ;
unsigned long size = offsetof ( struct vhost_vdpa_config , buf ) ;
u8 * buf ;
if ( copy_from_user ( & config , c , size ) )
return - EFAULT ;
if ( vhost_vdpa_config_validate ( v , & config ) )
return - EINVAL ;
2020-11-11 04:14:48 +03:00
buf = vmemdup_user ( c - > buf , config . len ) ;
if ( IS_ERR ( buf ) )
return PTR_ERR ( buf ) ;
2020-03-26 17:01:23 +03:00
ops - > set_config ( vdpa , config . off , buf , config . len ) ;
kvfree ( buf ) ;
return 0 ;
}
static long vhost_vdpa_get_features ( struct vhost_vdpa * v , u64 __user * featurep )
{
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
u64 features ;
features = ops - > get_features ( vdpa ) ;
if ( copy_to_user ( featurep , & features , sizeof ( features ) ) )
return - EFAULT ;
return 0 ;
}
static long vhost_vdpa_set_features ( struct vhost_vdpa * v , u64 __user * featurep )
{
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
u64 features ;
/*
* It ' s not allowed to change the features after they have
* been negotiated .
*/
if ( ops - > get_status ( vdpa ) & VIRTIO_CONFIG_S_FEATURES_OK )
return - EBUSY ;
if ( copy_from_user ( & features , featurep , sizeof ( features ) ) )
return - EFAULT ;
2020-07-27 17:58:18 +03:00
if ( vdpa_set_features ( vdpa , features ) )
2020-03-26 17:01:23 +03:00
return - EINVAL ;
return 0 ;
}
static long vhost_vdpa_get_vring_num ( struct vhost_vdpa * v , u16 __user * argp )
{
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
u16 num ;
num = ops - > get_vq_num_max ( vdpa ) ;
if ( copy_to_user ( argp , & num , sizeof ( num ) ) )
return - EFAULT ;
return 0 ;
}
2020-06-05 13:27:13 +03:00
static void vhost_vdpa_config_put ( struct vhost_vdpa * v )
{
2021-03-11 16:52:56 +03:00
if ( v - > config_ctx ) {
2020-06-05 13:27:13 +03:00
eventfd_ctx_put ( v - > config_ctx ) ;
2021-03-11 16:52:56 +03:00
v - > config_ctx = NULL ;
}
2020-06-05 13:27:13 +03:00
}
static long vhost_vdpa_set_config_call ( struct vhost_vdpa * v , u32 __user * argp )
{
struct vdpa_callback cb ;
int fd ;
struct eventfd_ctx * ctx ;
cb . callback = vhost_vdpa_config_cb ;
cb . private = v - > vdpa ;
if ( copy_from_user ( & fd , argp , sizeof ( fd ) ) )
return - EFAULT ;
ctx = fd = = VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget ( fd ) ;
swap ( ctx , v - > config_ctx ) ;
if ( ! IS_ERR_OR_NULL ( ctx ) )
eventfd_ctx_put ( ctx ) ;
2021-03-11 16:52:57 +03:00
if ( IS_ERR ( v - > config_ctx ) ) {
long ret = PTR_ERR ( v - > config_ctx ) ;
v - > config_ctx = NULL ;
return ret ;
}
2020-06-05 13:27:13 +03:00
v - > vdpa - > config - > set_config_cb ( v - > vdpa , & cb ) ;
return 0 ;
}
2020-07-31 09:55:31 +03:00
2020-10-23 12:00:42 +03:00
static long vhost_vdpa_get_iova_range ( struct vhost_vdpa * v , u32 __user * argp )
{
struct vhost_vdpa_iova_range range = {
. first = v - > range . first ,
. last = v - > range . last ,
} ;
2020-12-02 09:44:43 +03:00
if ( copy_to_user ( argp , & range , sizeof ( range ) ) )
return - EFAULT ;
return 0 ;
2020-10-23 12:00:42 +03:00
}
2020-03-26 17:01:23 +03:00
static long vhost_vdpa_vring_ioctl ( struct vhost_vdpa * v , unsigned int cmd ,
void __user * argp )
{
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
2020-08-04 19:20:43 +03:00
struct vdpa_vq_state vq_state ;
2020-03-26 17:01:23 +03:00
struct vdpa_callback cb ;
struct vhost_virtqueue * vq ;
struct vhost_vring_state s ;
u32 idx ;
long r ;
r = get_user ( idx , ( u32 __user * ) argp ) ;
if ( r < 0 )
return r ;
if ( idx > = v - > nvqs )
return - ENOBUFS ;
idx = array_index_nospec ( idx , v - > nvqs ) ;
vq = & v - > vqs [ idx ] ;
2020-08-04 19:20:37 +03:00
switch ( cmd ) {
case VHOST_VDPA_SET_VRING_ENABLE :
2020-03-26 17:01:23 +03:00
if ( copy_from_user ( & s , argp , sizeof ( s ) ) )
return - EFAULT ;
ops - > set_vq_ready ( vdpa , idx , s . num ) ;
return 0 ;
2020-08-04 19:20:37 +03:00
case VHOST_GET_VRING_BASE :
2020-08-04 19:20:44 +03:00
r = ops - > get_vq_state ( v - > vdpa , idx , & vq_state ) ;
if ( r )
return r ;
2020-08-04 19:20:43 +03:00
vq - > last_avail_idx = vq_state . avail_index ;
2020-08-04 19:20:37 +03:00
break ;
}
2020-03-26 17:01:23 +03:00
r = vhost_vring_ioctl ( & v - > vdev , cmd , argp ) ;
if ( r )
return r ;
switch ( cmd ) {
case VHOST_SET_VRING_ADDR :
if ( ops - > set_vq_address ( vdpa , idx ,
( u64 ) ( uintptr_t ) vq - > desc ,
( u64 ) ( uintptr_t ) vq - > avail ,
( u64 ) ( uintptr_t ) vq - > used ) )
r = - EINVAL ;
break ;
case VHOST_SET_VRING_BASE :
2020-08-04 19:20:43 +03:00
vq_state . avail_index = vq - > last_avail_idx ;
if ( ops - > set_vq_state ( vdpa , idx , & vq_state ) )
2020-03-26 17:01:23 +03:00
r = - EINVAL ;
break ;
case VHOST_SET_VRING_CALL :
2020-07-31 09:55:28 +03:00
if ( vq - > call_ctx . ctx ) {
2020-03-26 17:01:23 +03:00
cb . callback = vhost_vdpa_virtqueue_cb ;
cb . private = vq ;
} else {
cb . callback = NULL ;
cb . private = NULL ;
}
ops - > set_vq_cb ( vdpa , idx , & cb ) ;
2020-07-31 09:55:31 +03:00
vhost_vdpa_setup_vq_irq ( v , idx ) ;
2020-03-26 17:01:23 +03:00
break ;
case VHOST_SET_VRING_NUM :
ops - > set_vq_num ( vdpa , idx , vq - > num ) ;
break ;
}
return r ;
}
static long vhost_vdpa_unlocked_ioctl ( struct file * filep ,
unsigned int cmd , unsigned long arg )
{
struct vhost_vdpa * v = filep - > private_data ;
struct vhost_dev * d = & v - > vdev ;
void __user * argp = ( void __user * ) arg ;
2020-09-07 13:43:43 +03:00
u64 __user * featurep = argp ;
u64 features ;
2020-10-23 15:08:53 +03:00
long r = 0 ;
2020-03-26 17:01:23 +03:00
2020-09-07 13:43:43 +03:00
if ( cmd = = VHOST_SET_BACKEND_FEATURES ) {
2020-10-23 15:08:53 +03:00
if ( copy_from_user ( & features , featurep , sizeof ( features ) ) )
return - EFAULT ;
2020-09-07 13:43:43 +03:00
if ( features & ~ VHOST_VDPA_BACKEND_FEATURES )
return - EOPNOTSUPP ;
vhost_set_backend_features ( & v - > vdev , features ) ;
return 0 ;
}
2020-03-26 17:01:23 +03:00
mutex_lock ( & d - > mutex ) ;
switch ( cmd ) {
case VHOST_VDPA_GET_DEVICE_ID :
r = vhost_vdpa_get_device_id ( v , argp ) ;
break ;
case VHOST_VDPA_GET_STATUS :
r = vhost_vdpa_get_status ( v , argp ) ;
break ;
case VHOST_VDPA_SET_STATUS :
r = vhost_vdpa_set_status ( v , argp ) ;
break ;
case VHOST_VDPA_GET_CONFIG :
r = vhost_vdpa_get_config ( v , argp ) ;
break ;
case VHOST_VDPA_SET_CONFIG :
r = vhost_vdpa_set_config ( v , argp ) ;
break ;
case VHOST_GET_FEATURES :
r = vhost_vdpa_get_features ( v , argp ) ;
break ;
case VHOST_SET_FEATURES :
r = vhost_vdpa_set_features ( v , argp ) ;
break ;
case VHOST_VDPA_GET_VRING_NUM :
r = vhost_vdpa_get_vring_num ( v , argp ) ;
break ;
case VHOST_SET_LOG_BASE :
case VHOST_SET_LOG_FD :
r = - ENOIOCTLCMD ;
break ;
2020-06-05 13:27:13 +03:00
case VHOST_VDPA_SET_CONFIG_CALL :
r = vhost_vdpa_set_config_call ( v , argp ) ;
break ;
2020-09-07 13:43:43 +03:00
case VHOST_GET_BACKEND_FEATURES :
features = VHOST_VDPA_BACKEND_FEATURES ;
2020-10-23 15:08:53 +03:00
if ( copy_to_user ( featurep , & features , sizeof ( features ) ) )
r = - EFAULT ;
2020-09-07 13:43:43 +03:00
break ;
2020-10-23 12:00:42 +03:00
case VHOST_VDPA_GET_IOVA_RANGE :
r = vhost_vdpa_get_iova_range ( v , argp ) ;
break ;
2020-03-26 17:01:23 +03:00
default :
r = vhost_dev_ioctl ( & v - > vdev , cmd , argp ) ;
if ( r = = - ENOIOCTLCMD )
r = vhost_vdpa_vring_ioctl ( v , cmd , argp ) ;
break ;
}
mutex_unlock ( & d - > mutex ) ;
return r ;
}
static void vhost_vdpa_iotlb_unmap ( struct vhost_vdpa * v , u64 start , u64 last )
{
struct vhost_dev * dev = & v - > vdev ;
struct vhost_iotlb * iotlb = dev - > iotlb ;
struct vhost_iotlb_map * map ;
struct page * page ;
unsigned long pfn , pinned ;
while ( ( map = vhost_iotlb_itree_first ( iotlb , start , last ) ) ! = NULL ) {
pinned = map - > size > > PAGE_SHIFT ;
for ( pfn = map - > addr > > PAGE_SHIFT ;
pinned > 0 ; pfn + + , pinned - - ) {
page = pfn_to_page ( pfn ) ;
if ( map - > perm & VHOST_ACCESS_WO )
set_page_dirty_lock ( page ) ;
unpin_user_page ( page ) ;
}
atomic64_sub ( map - > size > > PAGE_SHIFT , & dev - > mm - > pinned_vm ) ;
vhost_iotlb_map_free ( iotlb , map ) ;
}
}
static void vhost_vdpa_iotlb_free ( struct vhost_vdpa * v )
{
struct vhost_dev * dev = & v - > vdev ;
vhost_vdpa_iotlb_unmap ( v , 0ULL , 0ULL - 1 ) ;
kfree ( dev - > iotlb ) ;
dev - > iotlb = NULL ;
}
static int perm_to_iommu_flags ( u32 perm )
{
int flags = 0 ;
switch ( perm ) {
case VHOST_ACCESS_WO :
flags | = IOMMU_WRITE ;
break ;
case VHOST_ACCESS_RO :
flags | = IOMMU_READ ;
break ;
case VHOST_ACCESS_RW :
flags | = ( IOMMU_WRITE | IOMMU_READ ) ;
break ;
default :
WARN ( 1 , " invalidate vhost IOTLB permission \n " ) ;
break ;
}
return flags | IOMMU_CACHE ;
}
static int vhost_vdpa_map ( struct vhost_vdpa * v ,
u64 iova , u64 size , u64 pa , u32 perm )
{
struct vhost_dev * dev = & v - > vdev ;
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
int r = 0 ;
r = vhost_iotlb_add_range ( dev - > iotlb , iova , iova + size - 1 ,
pa , perm ) ;
if ( r )
return r ;
2020-08-04 19:20:40 +03:00
if ( ops - > dma_map ) {
2020-03-26 17:01:23 +03:00
r = ops - > dma_map ( vdpa , iova , size , pa , perm ) ;
2020-08-04 19:20:40 +03:00
} else if ( ops - > set_map ) {
if ( ! v - > in_batch )
r = ops - > set_map ( vdpa , dev - > iotlb ) ;
} else {
2020-03-26 17:01:23 +03:00
r = iommu_map ( v - > domain , iova , pa , size ,
perm_to_iommu_flags ( perm ) ) ;
2020-08-04 19:20:40 +03:00
}
2020-03-26 17:01:23 +03:00
2020-10-03 08:02:09 +03:00
if ( r )
vhost_iotlb_del_range ( dev - > iotlb , iova , iova + size - 1 ) ;
2020-11-06 02:26:33 +03:00
else
atomic64_add ( size > > PAGE_SHIFT , & dev - > mm - > pinned_vm ) ;
2020-10-03 08:02:09 +03:00
2020-03-26 17:01:23 +03:00
return r ;
}
static void vhost_vdpa_unmap ( struct vhost_vdpa * v , u64 iova , u64 size )
{
struct vhost_dev * dev = & v - > vdev ;
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
vhost_vdpa_iotlb_unmap ( v , iova , iova + size - 1 ) ;
2020-08-04 19:20:40 +03:00
if ( ops - > dma_map ) {
2020-03-26 17:01:23 +03:00
ops - > dma_unmap ( vdpa , iova , size ) ;
2020-08-04 19:20:40 +03:00
} else if ( ops - > set_map ) {
if ( ! v - > in_batch )
ops - > set_map ( vdpa , dev - > iotlb ) ;
} else {
2020-03-26 17:01:23 +03:00
iommu_unmap ( v - > domain , iova , size ) ;
2020-08-04 19:20:40 +03:00
}
2020-03-26 17:01:23 +03:00
}
static int vhost_vdpa_process_iotlb_update ( struct vhost_vdpa * v ,
struct vhost_iotlb_msg * msg )
{
struct vhost_dev * dev = & v - > vdev ;
struct vhost_iotlb * iotlb = dev - > iotlb ;
struct page * * page_list ;
2020-10-30 00:53:36 +03:00
unsigned long list_size = PAGE_SIZE / sizeof ( struct page * ) ;
2020-03-26 17:01:23 +03:00
unsigned int gup_flags = FOLL_LONGTERM ;
2020-10-30 00:53:36 +03:00
unsigned long npages , cur_base , map_pfn , last_pfn = 0 ;
2020-11-06 02:26:33 +03:00
unsigned long lock_limit , sz2pin , nchunks , i ;
2020-03-26 17:01:23 +03:00
u64 iova = msg - > iova ;
2020-11-06 02:26:33 +03:00
long pinned ;
2020-03-26 17:01:23 +03:00
int ret = 0 ;
2020-10-23 12:00:42 +03:00
if ( msg - > iova < v - > range . first | |
msg - > iova + msg - > size - 1 > v - > range . last )
return - EINVAL ;
2020-03-26 17:01:23 +03:00
if ( vhost_iotlb_itree_first ( iotlb , msg - > iova ,
msg - > iova + msg - > size - 1 ) )
return - EEXIST ;
2020-11-06 02:26:33 +03:00
/* Limit the use of memory for bookkeeping */
2020-10-30 00:53:36 +03:00
page_list = ( struct page * * ) __get_free_page ( GFP_KERNEL ) ;
if ( ! page_list )
return - ENOMEM ;
2020-03-26 17:01:23 +03:00
if ( msg - > perm & VHOST_ACCESS_WO )
gup_flags | = FOLL_WRITE ;
npages = PAGE_ALIGN ( msg - > size + ( iova & ~ PAGE_MASK ) ) > > PAGE_SHIFT ;
2020-11-06 02:26:33 +03:00
if ( ! npages ) {
ret = - EINVAL ;
goto free ;
}
2020-03-26 17:01:23 +03:00
2020-06-09 07:33:25 +03:00
mmap_read_lock ( dev - > mm ) ;
2020-03-26 17:01:23 +03:00
lock_limit = rlimit ( RLIMIT_MEMLOCK ) > > PAGE_SHIFT ;
2020-11-06 02:26:33 +03:00
if ( npages + atomic64_read ( & dev - > mm - > pinned_vm ) > lock_limit ) {
2020-10-30 00:53:36 +03:00
ret = - ENOMEM ;
2020-11-06 02:26:33 +03:00
goto unlock ;
2020-10-03 08:02:10 +03:00
}
2020-03-26 17:01:23 +03:00
2020-10-30 00:53:36 +03:00
cur_base = msg - > uaddr & PAGE_MASK ;
2020-10-03 08:02:10 +03:00
iova & = PAGE_MASK ;
2020-11-06 02:26:33 +03:00
nchunks = 0 ;
2020-10-30 00:53:36 +03:00
while ( npages ) {
2020-11-06 02:26:33 +03:00
sz2pin = min_t ( unsigned long , npages , list_size ) ;
pinned = pin_user_pages ( cur_base , sz2pin ,
gup_flags , page_list , NULL ) ;
if ( sz2pin ! = pinned ) {
if ( pinned < 0 ) {
ret = pinned ;
} else {
unpin_user_pages ( page_list , pinned ) ;
ret = - ENOMEM ;
}
2020-10-30 00:53:36 +03:00
goto out ;
2020-11-06 02:26:33 +03:00
}
nchunks + + ;
2020-10-30 00:53:36 +03:00
if ( ! last_pfn )
map_pfn = page_to_pfn ( page_list [ 0 ] ) ;
2020-11-06 02:26:33 +03:00
for ( i = 0 ; i < pinned ; i + + ) {
2020-10-30 00:53:36 +03:00
unsigned long this_pfn = page_to_pfn ( page_list [ i ] ) ;
u64 csize ;
if ( last_pfn & & ( this_pfn ! = last_pfn + 1 ) ) {
/* Pin a contiguous chunk of memory */
csize = ( last_pfn - map_pfn + 1 ) < < PAGE_SHIFT ;
2020-11-06 02:26:33 +03:00
ret = vhost_vdpa_map ( v , iova , csize ,
map_pfn < < PAGE_SHIFT ,
msg - > perm ) ;
if ( ret ) {
/*
* Unpin the pages that are left unmapped
* from this point on in the current
* page_list . The remaining outstanding
* ones which may stride across several
* chunks will be covered in the common
* error path subsequently .
*/
unpin_user_pages ( & page_list [ i ] ,
pinned - i ) ;
2020-10-30 00:53:36 +03:00
goto out ;
2020-11-06 02:26:33 +03:00
}
2020-10-30 00:53:36 +03:00
map_pfn = this_pfn ;
iova + = csize ;
2020-11-06 02:26:33 +03:00
nchunks = 0 ;
2020-03-26 17:01:23 +03:00
}
2020-10-30 00:53:36 +03:00
last_pfn = this_pfn ;
2020-03-26 17:01:23 +03:00
}
2020-10-30 00:53:36 +03:00
2020-11-06 02:26:33 +03:00
cur_base + = pinned < < PAGE_SHIFT ;
npages - = pinned ;
2020-03-26 17:01:23 +03:00
}
2020-10-30 00:53:36 +03:00
/* Pin the rest chunk */
ret = vhost_vdpa_map ( v , iova , ( last_pfn - map_pfn + 1 ) < < PAGE_SHIFT ,
map_pfn < < PAGE_SHIFT , msg - > perm ) ;
2020-03-26 17:01:23 +03:00
out :
2020-10-30 00:53:36 +03:00
if ( ret ) {
2020-11-06 02:26:33 +03:00
if ( nchunks ) {
unsigned long pfn ;
/*
* Unpin the outstanding pages which are yet to be
* mapped but haven ' t due to vdpa_map ( ) or
* pin_user_pages ( ) failure .
*
* Mapped pages are accounted in vdpa_map ( ) , hence
* the corresponding unpinning will be handled by
* vdpa_unmap ( ) .
*/
WARN_ON ( ! last_pfn ) ;
for ( pfn = map_pfn ; pfn < = last_pfn ; pfn + + )
unpin_user_page ( pfn_to_page ( pfn ) ) ;
}
2020-03-26 17:01:23 +03:00
vhost_vdpa_unmap ( v , msg - > iova , msg - > size ) ;
2020-10-30 00:53:36 +03:00
}
2020-11-06 02:26:33 +03:00
unlock :
2020-06-09 07:33:25 +03:00
mmap_read_unlock ( dev - > mm ) ;
2020-11-06 02:26:33 +03:00
free :
2020-10-30 00:53:36 +03:00
free_page ( ( unsigned long ) page_list ) ;
2020-03-26 17:01:23 +03:00
return ret ;
}
static int vhost_vdpa_process_iotlb_msg ( struct vhost_dev * dev ,
struct vhost_iotlb_msg * msg )
{
struct vhost_vdpa * v = container_of ( dev , struct vhost_vdpa , vdev ) ;
2020-08-04 19:20:40 +03:00
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
2020-03-26 17:01:23 +03:00
int r = 0 ;
2021-04-12 12:55:12 +03:00
mutex_lock ( & dev - > mutex ) ;
2020-03-26 17:01:23 +03:00
r = vhost_dev_check_owner ( dev ) ;
if ( r )
2021-04-12 12:55:12 +03:00
goto unlock ;
2020-03-26 17:01:23 +03:00
switch ( msg - > type ) {
case VHOST_IOTLB_UPDATE :
r = vhost_vdpa_process_iotlb_update ( v , msg ) ;
break ;
case VHOST_IOTLB_INVALIDATE :
vhost_vdpa_unmap ( v , msg - > iova , msg - > size ) ;
break ;
2020-08-04 19:20:40 +03:00
case VHOST_IOTLB_BATCH_BEGIN :
v - > in_batch = true ;
break ;
case VHOST_IOTLB_BATCH_END :
if ( v - > in_batch & & ops - > set_map )
ops - > set_map ( vdpa , dev - > iotlb ) ;
v - > in_batch = false ;
break ;
2020-03-26 17:01:23 +03:00
default :
r = - EINVAL ;
break ;
}
2021-04-12 12:55:12 +03:00
unlock :
mutex_unlock ( & dev - > mutex ) ;
2020-03-26 17:01:23 +03:00
return r ;
}
static ssize_t vhost_vdpa_chr_write_iter ( struct kiocb * iocb ,
struct iov_iter * from )
{
struct file * file = iocb - > ki_filp ;
struct vhost_vdpa * v = file - > private_data ;
struct vhost_dev * dev = & v - > vdev ;
return vhost_chr_write_iter ( dev , from ) ;
}
static int vhost_vdpa_alloc_domain ( struct vhost_vdpa * v )
{
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
struct device * dma_dev = vdpa_get_dma_dev ( vdpa ) ;
struct bus_type * bus ;
int ret ;
/* Device want to do DMA by itself */
if ( ops - > set_map | | ops - > dma_map )
return 0 ;
bus = dma_dev - > bus ;
if ( ! bus )
return - EFAULT ;
if ( ! iommu_capable ( bus , IOMMU_CAP_CACHE_COHERENCY ) )
return - ENOTSUPP ;
v - > domain = iommu_domain_alloc ( bus ) ;
if ( ! v - > domain )
return - EIO ;
ret = iommu_attach_device ( v - > domain , dma_dev ) ;
if ( ret )
goto err_attach ;
return 0 ;
err_attach :
iommu_domain_free ( v - > domain ) ;
return ret ;
}
static void vhost_vdpa_free_domain ( struct vhost_vdpa * v )
{
struct vdpa_device * vdpa = v - > vdpa ;
struct device * dma_dev = vdpa_get_dma_dev ( vdpa ) ;
if ( v - > domain ) {
iommu_detach_device ( v - > domain , dma_dev ) ;
iommu_domain_free ( v - > domain ) ;
}
v - > domain = NULL ;
}
2020-10-23 12:00:42 +03:00
static void vhost_vdpa_set_iova_range ( struct vhost_vdpa * v )
{
struct vdpa_iova_range * range = & v - > range ;
struct iommu_domain_geometry geo ;
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
if ( ops - > get_iova_range ) {
* range = ops - > get_iova_range ( vdpa ) ;
} else if ( v - > domain & &
! iommu_domain_get_attr ( v - > domain ,
DOMAIN_ATTR_GEOMETRY , & geo ) & &
geo . force_aperture ) {
range - > first = geo . aperture_start ;
range - > last = geo . aperture_end ;
} else {
range - > first = 0 ;
range - > last = ULLONG_MAX ;
}
}
2020-03-26 17:01:23 +03:00
static int vhost_vdpa_open ( struct inode * inode , struct file * filep )
{
struct vhost_vdpa * v ;
struct vhost_dev * dev ;
struct vhost_virtqueue * * vqs ;
int nvqs , i , r , opened ;
v = container_of ( inode - > i_cdev , struct vhost_vdpa , cdev ) ;
opened = atomic_cmpxchg ( & v - > opened , 0 , 1 ) ;
if ( opened )
return - EBUSY ;
nvqs = v - > nvqs ;
vhost_vdpa_reset ( v ) ;
vqs = kmalloc_array ( nvqs , sizeof ( * vqs ) , GFP_KERNEL ) ;
if ( ! vqs ) {
r = - ENOMEM ;
goto err ;
}
dev = & v - > vdev ;
for ( i = 0 ; i < nvqs ; i + + ) {
vqs [ i ] = & v - > vqs [ i ] ;
vqs [ i ] - > handle_kick = handle_vq_kick ;
}
2020-05-29 11:02:58 +03:00
vhost_dev_init ( dev , vqs , nvqs , 0 , 0 , 0 , false ,
2020-03-26 17:01:23 +03:00
vhost_vdpa_process_iotlb_msg ) ;
dev - > iotlb = vhost_iotlb_alloc ( 0 , 0 ) ;
if ( ! dev - > iotlb ) {
r = - ENOMEM ;
goto err_init_iotlb ;
}
r = vhost_vdpa_alloc_domain ( v ) ;
if ( r )
goto err_init_iotlb ;
2020-10-23 12:00:42 +03:00
vhost_vdpa_set_iova_range ( v ) ;
2020-03-26 17:01:23 +03:00
filep - > private_data = v ;
return 0 ;
err_init_iotlb :
vhost_dev_cleanup ( & v - > vdev ) ;
2020-09-21 21:23:01 +03:00
kfree ( vqs ) ;
2020-03-26 17:01:23 +03:00
err :
atomic_dec ( & v - > opened ) ;
return r ;
}
2020-07-31 09:55:31 +03:00
static void vhost_vdpa_clean_irq ( struct vhost_vdpa * v )
{
int i ;
2021-02-24 14:48:45 +03:00
for ( i = 0 ; i < v - > nvqs ; i + + )
vhost_vdpa_unsetup_vq_irq ( v , i ) ;
2020-07-31 09:55:31 +03:00
}
2020-03-26 17:01:23 +03:00
static int vhost_vdpa_release ( struct inode * inode , struct file * filep )
{
struct vhost_vdpa * v = filep - > private_data ;
struct vhost_dev * d = & v - > vdev ;
mutex_lock ( & d - > mutex ) ;
filep - > private_data = NULL ;
vhost_vdpa_reset ( v ) ;
vhost_dev_stop ( & v - > vdev ) ;
vhost_vdpa_iotlb_free ( v ) ;
vhost_vdpa_free_domain ( v ) ;
2020-06-05 13:27:13 +03:00
vhost_vdpa_config_put ( v ) ;
2020-07-31 09:55:31 +03:00
vhost_vdpa_clean_irq ( v ) ;
2020-03-26 17:01:23 +03:00
vhost_dev_cleanup ( & v - > vdev ) ;
kfree ( v - > vdev . vqs ) ;
mutex_unlock ( & d - > mutex ) ;
atomic_dec ( & v - > opened ) ;
complete ( & v - > completion ) ;
return 0 ;
}
2020-06-04 21:47:29 +03:00
# ifdef CONFIG_MMU
2020-05-29 11:03:01 +03:00
static vm_fault_t vhost_vdpa_fault ( struct vm_fault * vmf )
{
struct vhost_vdpa * v = vmf - > vma - > vm_file - > private_data ;
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
struct vdpa_notification_area notify ;
struct vm_area_struct * vma = vmf - > vma ;
u16 index = vma - > vm_pgoff ;
notify = ops - > get_vq_notification ( vdpa , index ) ;
vma - > vm_page_prot = pgprot_noncached ( vma - > vm_page_prot ) ;
if ( remap_pfn_range ( vma , vmf - > address & PAGE_MASK ,
notify . addr > > PAGE_SHIFT , PAGE_SIZE ,
vma - > vm_page_prot ) )
return VM_FAULT_SIGBUS ;
return VM_FAULT_NOPAGE ;
}
static const struct vm_operations_struct vhost_vdpa_vm_ops = {
. fault = vhost_vdpa_fault ,
} ;
static int vhost_vdpa_mmap ( struct file * file , struct vm_area_struct * vma )
{
struct vhost_vdpa * v = vma - > vm_file - > private_data ;
struct vdpa_device * vdpa = v - > vdpa ;
const struct vdpa_config_ops * ops = vdpa - > config ;
struct vdpa_notification_area notify ;
2020-06-10 11:58:52 +03:00
unsigned long index = vma - > vm_pgoff ;
2020-05-29 11:03:01 +03:00
if ( vma - > vm_end - vma - > vm_start ! = PAGE_SIZE )
return - EINVAL ;
if ( ( vma - > vm_flags & VM_SHARED ) = = 0 )
return - EINVAL ;
if ( vma - > vm_flags & VM_READ )
return - EINVAL ;
if ( index > 65535 )
return - EINVAL ;
if ( ! ops - > get_vq_notification )
return - ENOTSUPP ;
/* To be safe and easily modelled by userspace, We only
* support the doorbell which sits on the page boundary and
* does not share the page with other registers .
*/
notify = ops - > get_vq_notification ( vdpa , index ) ;
if ( notify . addr & ( PAGE_SIZE - 1 ) )
return - EINVAL ;
if ( vma - > vm_end - vma - > vm_start ! = notify . size )
return - ENOTSUPP ;
2021-04-13 12:15:57 +03:00
vma - > vm_flags | = VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP ;
2020-05-29 11:03:01 +03:00
vma - > vm_ops = & vhost_vdpa_vm_ops ;
return 0 ;
}
2020-06-04 21:47:29 +03:00
# endif /* CONFIG_MMU */
2020-05-29 11:03:01 +03:00
2020-03-26 17:01:23 +03:00
static const struct file_operations vhost_vdpa_fops = {
. owner = THIS_MODULE ,
. open = vhost_vdpa_open ,
. release = vhost_vdpa_release ,
. write_iter = vhost_vdpa_chr_write_iter ,
. unlocked_ioctl = vhost_vdpa_unlocked_ioctl ,
2020-06-04 21:47:29 +03:00
# ifdef CONFIG_MMU
2020-05-29 11:03:01 +03:00
. mmap = vhost_vdpa_mmap ,
2020-06-04 21:47:29 +03:00
# endif /* CONFIG_MMU */
2020-03-26 17:01:23 +03:00
. compat_ioctl = compat_ptr_ioctl ,
} ;
static void vhost_vdpa_release_dev ( struct device * device )
{
struct vhost_vdpa * v =
container_of ( device , struct vhost_vdpa , dev ) ;
ida_simple_remove ( & vhost_vdpa_ida , v - > minor ) ;
kfree ( v - > vqs ) ;
kfree ( v ) ;
}
static int vhost_vdpa_probe ( struct vdpa_device * vdpa )
{
const struct vdpa_config_ops * ops = vdpa - > config ;
struct vhost_vdpa * v ;
2020-08-04 19:20:42 +03:00
int minor ;
2020-03-26 17:01:23 +03:00
int r ;
/* Currently, we only accept the network devices. */
if ( ops - > get_device_id ( vdpa ) ! = VIRTIO_ID_NET )
return - ENOTSUPP ;
v = kzalloc ( sizeof ( * v ) , GFP_KERNEL | __GFP_RETRY_MAYFAIL ) ;
if ( ! v )
return - ENOMEM ;
minor = ida_simple_get ( & vhost_vdpa_ida , 0 ,
VHOST_VDPA_DEV_MAX , GFP_KERNEL ) ;
if ( minor < 0 ) {
kfree ( v ) ;
return minor ;
}
atomic_set ( & v - > opened , 0 ) ;
v - > minor = minor ;
v - > vdpa = vdpa ;
2020-08-04 19:20:42 +03:00
v - > nvqs = vdpa - > nvqs ;
2020-03-26 17:01:23 +03:00
v - > virtio_id = ops - > get_device_id ( vdpa ) ;
device_initialize ( & v - > dev ) ;
v - > dev . release = vhost_vdpa_release_dev ;
v - > dev . parent = & vdpa - > dev ;
v - > dev . devt = MKDEV ( MAJOR ( vhost_vdpa_major ) , minor ) ;
2020-08-04 19:20:42 +03:00
v - > vqs = kmalloc_array ( v - > nvqs , sizeof ( struct vhost_virtqueue ) ,
2020-03-26 17:01:23 +03:00
GFP_KERNEL ) ;
if ( ! v - > vqs ) {
r = - ENOMEM ;
goto err ;
}
r = dev_set_name ( & v - > dev , " vhost-vdpa-%u " , minor ) ;
if ( r )
goto err ;
cdev_init ( & v - > cdev , & vhost_vdpa_fops ) ;
v - > cdev . owner = THIS_MODULE ;
r = cdev_device_add ( & v - > cdev , & v - > dev ) ;
if ( r )
goto err ;
init_completion ( & v - > completion ) ;
vdpa_set_drvdata ( vdpa , v ) ;
return 0 ;
err :
put_device ( & v - > dev ) ;
return r ;
}
static void vhost_vdpa_remove ( struct vdpa_device * vdpa )
{
struct vhost_vdpa * v = vdpa_get_drvdata ( vdpa ) ;
int opened ;
cdev_device_del ( & v - > cdev , & v - > dev ) ;
do {
opened = atomic_cmpxchg ( & v - > opened , 0 , 1 ) ;
if ( ! opened )
break ;
wait_for_completion ( & v - > completion ) ;
} while ( 1 ) ;
put_device ( & v - > dev ) ;
}
static struct vdpa_driver vhost_vdpa_driver = {
. driver = {
. name = " vhost_vdpa " ,
} ,
. probe = vhost_vdpa_probe ,
. remove = vhost_vdpa_remove ,
} ;
static int __init vhost_vdpa_init ( void )
{
int r ;
r = alloc_chrdev_region ( & vhost_vdpa_major , 0 , VHOST_VDPA_DEV_MAX ,
" vhost-vdpa " ) ;
if ( r )
goto err_alloc_chrdev ;
r = vdpa_register_driver ( & vhost_vdpa_driver ) ;
if ( r )
goto err_vdpa_register_driver ;
return 0 ;
err_vdpa_register_driver :
unregister_chrdev_region ( vhost_vdpa_major , VHOST_VDPA_DEV_MAX ) ;
err_alloc_chrdev :
return r ;
}
module_init ( vhost_vdpa_init ) ;
static void __exit vhost_vdpa_exit ( void )
{
vdpa_unregister_driver ( & vhost_vdpa_driver ) ;
unregister_chrdev_region ( vhost_vdpa_major , VHOST_VDPA_DEV_MAX ) ;
}
module_exit ( vhost_vdpa_exit ) ;
MODULE_VERSION ( " 0.0.1 " ) ;
MODULE_LICENSE ( " GPL v2 " ) ;
MODULE_AUTHOR ( " Intel Corporation " ) ;
MODULE_DESCRIPTION ( " vDPA-based vhost backend for virtio " ) ;