2019-06-04 11:11:15 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2016-07-28 17:36:34 +03:00
/*
* vhost transport for vsock
*
* Copyright ( C ) 2013 - 2015 Red Hat , Inc .
* Author : Asias He < asias @ redhat . com >
* Stefan Hajnoczi < stefanha @ redhat . com >
*/
# include <linux/miscdevice.h>
# include <linux/atomic.h>
# include <linux/module.h>
# include <linux/mutex.h>
# include <linux/vmalloc.h>
# include <net/sock.h>
# include <linux/virtio_vsock.h>
# include <linux/vhost.h>
2018-11-05 13:35:47 +03:00
# include <linux/hashtable.h>
2016-07-28 17:36:34 +03:00
# include <net/af_vsock.h>
# include "vhost.h"
# define VHOST_VSOCK_DEFAULT_HOST_CID 2
2019-05-17 07:29:49 +03:00
/* Max number of bytes transferred before requeueing the job.
* Using this limit prevents one virtqueue from starving others . */
# define VHOST_VSOCK_WEIGHT 0x80000
/* Max number of packets transferred before requeueing the job.
* Using this limit prevents one virtqueue from starving others with
* small pkts .
*/
# define VHOST_VSOCK_PKT_WEIGHT 256
2016-07-28 17:36:34 +03:00
enum {
vhost/vsock: add IOTLB API support
This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.
These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
chardev; they are used by the userspace to exchange IOTLB messages
This patch was tested specifying "intel_iommu=strict" in the guest
kernel command line. I used QEMU with a patch applied [1] to fix a
simple issue (that patch was merged in QEMU v5.2.0):
$ qemu -M q35,accel=kvm,kernel-irqchip=split \
-drive file=fedora.qcow2,format=qcow2,if=virtio \
-device intel-iommu,intremap=on,device-iotlb=on \
-device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on
[1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2020-12-23 17:36:38 +03:00
VHOST_VSOCK_FEATURES = VHOST_FEATURES |
2021-06-11 14:13:37 +03:00
( 1ULL < < VIRTIO_F_ACCESS_PLATFORM ) |
( 1ULL < < VIRTIO_VSOCK_F_SEQPACKET )
vhost/vsock: add IOTLB API support
This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.
These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
chardev; they are used by the userspace to exchange IOTLB messages
This patch was tested specifying "intel_iommu=strict" in the guest
kernel command line. I used QEMU with a patch applied [1] to fix a
simple issue (that patch was merged in QEMU v5.2.0):
$ qemu -M q35,accel=kvm,kernel-irqchip=split \
-drive file=fedora.qcow2,format=qcow2,if=virtio \
-device intel-iommu,intremap=on,device-iotlb=on \
-device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on
[1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2020-12-23 17:36:38 +03:00
} ;
enum {
VHOST_VSOCK_BACKEND_FEATURES = ( 1ULL < < VHOST_BACKEND_F_IOTLB_MSG_V2 )
2016-07-28 17:36:34 +03:00
} ;
/* Used to track all the vhost_vsock instances on the system. */
2018-11-05 20:33:22 +03:00
static DEFINE_MUTEX ( vhost_vsock_mutex ) ;
2018-11-05 13:35:47 +03:00
static DEFINE_READ_MOSTLY_HASHTABLE ( vhost_vsock_hash , 8 ) ;
2016-07-28 17:36:34 +03:00
struct vhost_vsock {
struct vhost_dev dev ;
struct vhost_virtqueue vqs [ 2 ] ;
2018-11-05 20:33:22 +03:00
/* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */
2018-11-05 13:35:47 +03:00
struct hlist_node hash ;
2016-07-28 17:36:34 +03:00
struct vhost_work send_pkt_work ;
spinlock_t send_pkt_list_lock ;
struct list_head send_pkt_list ; /* host->guest pending packets */
atomic_t queued_replies ;
u32 guest_cid ;
2021-06-11 14:13:37 +03:00
bool seqpacket_allow ;
2016-07-28 17:36:34 +03:00
} ;
static u32 vhost_transport_get_local_cid ( void )
{
return VHOST_VSOCK_DEFAULT_HOST_CID ;
}
2018-11-05 20:33:22 +03:00
/* Callers that dereference the return value must hold vhost_vsock_mutex or the
2018-11-05 13:35:47 +03:00
* RCU read lock .
*/
static struct vhost_vsock * vhost_vsock_get ( u32 guest_cid )
2016-07-28 17:36:34 +03:00
{
struct vhost_vsock * vsock ;
2018-11-05 13:35:47 +03:00
hash_for_each_possible_rcu ( vhost_vsock_hash , vsock , hash , guest_cid ) {
2016-07-28 17:36:34 +03:00
u32 other_cid = vsock - > guest_cid ;
/* Skip instances that have no CID yet */
if ( other_cid = = 0 )
continue ;
2018-03-09 05:56:03 +03:00
if ( other_cid = = guest_cid )
2016-07-28 17:36:34 +03:00
return vsock ;
2018-03-09 05:56:03 +03:00
2016-07-28 17:36:34 +03:00
}
return NULL ;
}
static void
vhost_transport_do_send_pkt ( struct vhost_vsock * vsock ,
struct vhost_virtqueue * vq )
{
struct vhost_virtqueue * tx_vq = & vsock - > vqs [ VSOCK_VQ_TX ] ;
2019-05-17 07:29:51 +03:00
int pkts = 0 , total_len = 0 ;
2016-07-28 17:36:34 +03:00
bool added = false ;
bool restart_tx = false ;
mutex_lock ( & vq - > mutex ) ;
2020-03-31 22:27:57 +03:00
if ( ! vhost_vq_get_backend ( vq ) )
2016-07-28 17:36:34 +03:00
goto out ;
vhost/vsock: add IOTLB API support
This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.
These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
chardev; they are used by the userspace to exchange IOTLB messages
This patch was tested specifying "intel_iommu=strict" in the guest
kernel command line. I used QEMU with a patch applied [1] to fix a
simple issue (that patch was merged in QEMU v5.2.0):
$ qemu -M q35,accel=kvm,kernel-irqchip=split \
-drive file=fedora.qcow2,format=qcow2,if=virtio \
-device intel-iommu,intremap=on,device-iotlb=on \
-device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on
[1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2020-12-23 17:36:38 +03:00
if ( ! vq_meta_prefetch ( vq ) )
goto out ;
2016-07-28 17:36:34 +03:00
/* Avoid further vmexits, we're already processing the virtqueue */
vhost_disable_notify ( & vsock - > dev , vq ) ;
2019-05-17 07:29:51 +03:00
do {
2016-07-28 17:36:34 +03:00
struct virtio_vsock_pkt * pkt ;
struct iov_iter iov_iter ;
unsigned out , in ;
size_t nbytes ;
2019-07-30 18:43:33 +03:00
size_t iov_len , payload_len ;
2016-07-28 17:36:34 +03:00
int head ;
2021-09-03 15:32:35 +03:00
u32 flags_to_restore = 0 ;
2016-07-28 17:36:34 +03:00
spin_lock_bh ( & vsock - > send_pkt_list_lock ) ;
if ( list_empty ( & vsock - > send_pkt_list ) ) {
spin_unlock_bh ( & vsock - > send_pkt_list_lock ) ;
vhost_enable_notify ( & vsock - > dev , vq ) ;
break ;
}
pkt = list_first_entry ( & vsock - > send_pkt_list ,
struct virtio_vsock_pkt , list ) ;
list_del_init ( & pkt - > list ) ;
spin_unlock_bh ( & vsock - > send_pkt_list_lock ) ;
head = vhost_get_vq_desc ( vq , vq - > iov , ARRAY_SIZE ( vq - > iov ) ,
& out , & in , NULL , NULL ) ;
if ( head < 0 ) {
spin_lock_bh ( & vsock - > send_pkt_list_lock ) ;
list_add ( & pkt - > list , & vsock - > send_pkt_list ) ;
spin_unlock_bh ( & vsock - > send_pkt_list_lock ) ;
break ;
}
if ( head = = vq - > num ) {
spin_lock_bh ( & vsock - > send_pkt_list_lock ) ;
list_add ( & pkt - > list , & vsock - > send_pkt_list ) ;
spin_unlock_bh ( & vsock - > send_pkt_list_lock ) ;
/* We cannot finish yet if more buffers snuck in while
* re - enabling notify .
*/
if ( unlikely ( vhost_enable_notify ( & vsock - > dev , vq ) ) ) {
vhost_disable_notify ( & vsock - > dev , vq ) ;
continue ;
}
break ;
}
if ( out ) {
virtio_transport_free_pkt ( pkt ) ;
vq_err ( vq , " Expected 0 output buffers, got %u \n " , out ) ;
break ;
}
2019-07-30 18:43:33 +03:00
iov_len = iov_length ( & vq - > iov [ out ] , in ) ;
if ( iov_len < sizeof ( pkt - > hdr ) ) {
virtio_transport_free_pkt ( pkt ) ;
vq_err ( vq , " Buffer len [%zu] too small \n " , iov_len ) ;
break ;
}
iov_iter_init ( & iov_iter , READ , & vq - > iov [ out ] , in , iov_len ) ;
payload_len = pkt - > len - pkt - > off ;
/* If the packet is greater than the space available in the
* buffer , we split it using multiple buffers .
*/
2021-06-11 14:13:37 +03:00
if ( payload_len > iov_len - sizeof ( pkt - > hdr ) ) {
2019-07-30 18:43:33 +03:00
payload_len = iov_len - sizeof ( pkt - > hdr ) ;
2021-06-11 14:13:37 +03:00
/* As we are copying pieces of large packet's buffer to
* small rx buffers , headers of packets in rx queue are
* created dynamically and are initialized with header
* of current packet ( except length ) . But in case of
2021-09-03 15:31:06 +03:00
* SOCK_SEQPACKET , we also must clear message delimeter
2021-09-03 15:32:35 +03:00
* bit ( VIRTIO_VSOCK_SEQ_EOM ) and MSG_EOR bit
* ( VIRTIO_VSOCK_SEQ_EOR ) if set . Otherwise ,
* there will be sequence of packets with these
* bits set . After initialized header will be copied to
* rx buffer , these required bits will be restored .
2021-06-11 14:13:37 +03:00
*/
2021-09-03 15:31:06 +03:00
if ( le32_to_cpu ( pkt - > hdr . flags ) & VIRTIO_VSOCK_SEQ_EOM ) {
pkt - > hdr . flags & = ~ cpu_to_le32 ( VIRTIO_VSOCK_SEQ_EOM ) ;
2021-09-03 15:32:35 +03:00
flags_to_restore | = VIRTIO_VSOCK_SEQ_EOM ;
if ( le32_to_cpu ( pkt - > hdr . flags ) & VIRTIO_VSOCK_SEQ_EOR ) {
pkt - > hdr . flags & = ~ cpu_to_le32 ( VIRTIO_VSOCK_SEQ_EOR ) ;
flags_to_restore | = VIRTIO_VSOCK_SEQ_EOR ;
}
2021-06-11 14:13:37 +03:00
}
}
2019-07-30 18:43:33 +03:00
/* Set the correct length in the header */
pkt - > hdr . len = cpu_to_le32 ( payload_len ) ;
2016-07-28 17:36:34 +03:00
nbytes = copy_to_iter ( & pkt - > hdr , sizeof ( pkt - > hdr ) , & iov_iter ) ;
if ( nbytes ! = sizeof ( pkt - > hdr ) ) {
virtio_transport_free_pkt ( pkt ) ;
vq_err ( vq , " Faulted on copying pkt hdr \n " ) ;
break ;
}
2019-07-30 18:43:33 +03:00
nbytes = copy_to_iter ( pkt - > buf + pkt - > off , payload_len ,
& iov_iter ) ;
if ( nbytes ! = payload_len ) {
2016-07-28 17:36:34 +03:00
virtio_transport_free_pkt ( pkt ) ;
vq_err ( vq , " Faulted on copying pkt buf \n " ) ;
break ;
}
2020-04-24 18:08:29 +03:00
/* Deliver to monitoring devices all packets that we
* will transmit .
2017-04-21 12:10:46 +03:00
*/
virtio_transport_deliver_tap_pkt ( pkt ) ;
2020-04-24 18:08:29 +03:00
vhost_add_used ( vq , head , sizeof ( pkt - > hdr ) + payload_len ) ;
added = true ;
2019-07-30 18:43:33 +03:00
pkt - > off + = payload_len ;
total_len + = payload_len ;
/* If we didn't send all the payload we can requeue the packet
* to send it with the next available buffer .
*/
if ( pkt - > off < pkt - > len ) {
2021-09-03 15:32:35 +03:00
pkt - > hdr . flags | = cpu_to_le32 ( flags_to_restore ) ;
2021-06-11 14:13:37 +03:00
2020-04-24 18:08:30 +03:00
/* We are queueing the same virtio_vsock_pkt to handle
* the remaining bytes , and we want to deliver it
* to monitoring devices in the next iteration .
*/
pkt - > tap_delivered = false ;
2019-07-30 18:43:33 +03:00
spin_lock_bh ( & vsock - > send_pkt_list_lock ) ;
list_add ( & pkt - > list , & vsock - > send_pkt_list ) ;
spin_unlock_bh ( & vsock - > send_pkt_list_lock ) ;
} else {
if ( pkt - > reply ) {
int val ;
val = atomic_dec_return ( & vsock - > queued_replies ) ;
/* Do we have resources to resume tx
* processing ?
*/
if ( val + 1 = = tx_vq - > num )
restart_tx = true ;
}
virtio_transport_free_pkt ( pkt ) ;
}
2019-05-17 07:29:51 +03:00
} while ( likely ( ! vhost_exceeds_weight ( vq , + + pkts , total_len ) ) ) ;
2016-07-28 17:36:34 +03:00
if ( added )
vhost_signal ( & vsock - > dev , vq ) ;
out :
mutex_unlock ( & vq - > mutex ) ;
if ( restart_tx )
vhost_poll_queue ( & tx_vq - > poll ) ;
}
static void vhost_transport_send_pkt_work ( struct vhost_work * work )
{
struct vhost_virtqueue * vq ;
struct vhost_vsock * vsock ;
vsock = container_of ( work , struct vhost_vsock , send_pkt_work ) ;
vq = & vsock - > vqs [ VSOCK_VQ_RX ] ;
vhost_transport_do_send_pkt ( vsock , vq ) ;
}
static int
vhost_transport_send_pkt ( struct virtio_vsock_pkt * pkt )
{
struct vhost_vsock * vsock ;
int len = pkt - > len ;
2018-11-05 13:35:47 +03:00
rcu_read_lock ( ) ;
2016-07-28 17:36:34 +03:00
/* Find the vhost_vsock according to guest context id */
vsock = vhost_vsock_get ( le64_to_cpu ( pkt - > hdr . dst_cid ) ) ;
if ( ! vsock ) {
2018-11-05 13:35:47 +03:00
rcu_read_unlock ( ) ;
2016-07-28 17:36:34 +03:00
virtio_transport_free_pkt ( pkt ) ;
return - ENODEV ;
}
if ( pkt - > reply )
atomic_inc ( & vsock - > queued_replies ) ;
spin_lock_bh ( & vsock - > send_pkt_list_lock ) ;
list_add_tail ( & pkt - > list , & vsock - > send_pkt_list ) ;
spin_unlock_bh ( & vsock - > send_pkt_list_lock ) ;
vhost_work_queue ( & vsock - > dev , & vsock - > send_pkt_work ) ;
2018-11-05 13:35:47 +03:00
rcu_read_unlock ( ) ;
2016-07-28 17:36:34 +03:00
return len ;
}
2017-03-15 04:32:15 +03:00
static int
vhost_transport_cancel_pkt ( struct vsock_sock * vsk )
{
struct vhost_vsock * vsock ;
struct virtio_vsock_pkt * pkt , * n ;
int cnt = 0 ;
2018-11-05 13:35:47 +03:00
int ret = - ENODEV ;
2017-03-15 04:32:15 +03:00
LIST_HEAD ( freeme ) ;
2018-11-05 13:35:47 +03:00
rcu_read_lock ( ) ;
2017-03-15 04:32:15 +03:00
/* Find the vhost_vsock according to guest context id */
vsock = vhost_vsock_get ( vsk - > remote_addr . svm_cid ) ;
if ( ! vsock )
2018-11-05 13:35:47 +03:00
goto out ;
2017-03-15 04:32:15 +03:00
spin_lock_bh ( & vsock - > send_pkt_list_lock ) ;
list_for_each_entry_safe ( pkt , n , & vsock - > send_pkt_list , list ) {
if ( pkt - > vsk ! = vsk )
continue ;
list_move ( & pkt - > list , & freeme ) ;
}
spin_unlock_bh ( & vsock - > send_pkt_list_lock ) ;
list_for_each_entry_safe ( pkt , n , & freeme , list ) {
if ( pkt - > reply )
cnt + + ;
list_del ( & pkt - > list ) ;
virtio_transport_free_pkt ( pkt ) ;
}
if ( cnt ) {
struct vhost_virtqueue * tx_vq = & vsock - > vqs [ VSOCK_VQ_TX ] ;
int new_cnt ;
new_cnt = atomic_sub_return ( cnt , & vsock - > queued_replies ) ;
if ( new_cnt + cnt > = tx_vq - > num & & new_cnt < tx_vq - > num )
vhost_poll_queue ( & tx_vq - > poll ) ;
}
2018-11-05 13:35:47 +03:00
ret = 0 ;
out :
rcu_read_unlock ( ) ;
return ret ;
2017-03-15 04:32:15 +03:00
}
2016-07-28 17:36:34 +03:00
static struct virtio_vsock_pkt *
vhost_vsock_alloc_pkt ( struct vhost_virtqueue * vq ,
unsigned int out , unsigned int in )
{
struct virtio_vsock_pkt * pkt ;
struct iov_iter iov_iter ;
size_t nbytes ;
size_t len ;
if ( in ! = 0 ) {
vq_err ( vq , " Expected 0 input buffers, got %u \n " , in ) ;
return NULL ;
}
pkt = kzalloc ( sizeof ( * pkt ) , GFP_KERNEL ) ;
if ( ! pkt )
return NULL ;
len = iov_length ( vq - > iov , out ) ;
iov_iter_init ( & iov_iter , WRITE , vq - > iov , out , len ) ;
nbytes = copy_from_iter ( & pkt - > hdr , sizeof ( pkt - > hdr ) , & iov_iter ) ;
if ( nbytes ! = sizeof ( pkt - > hdr ) ) {
vq_err ( vq , " Expected %zu bytes for pkt->hdr, got %zu bytes \n " ,
sizeof ( pkt - > hdr ) , nbytes ) ;
kfree ( pkt ) ;
return NULL ;
}
2021-06-11 14:13:37 +03:00
pkt - > len = le32_to_cpu ( pkt - > hdr . len ) ;
2016-07-28 17:36:34 +03:00
/* No payload */
if ( ! pkt - > len )
return pkt ;
/* The pkt is too big */
if ( pkt - > len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE ) {
kfree ( pkt ) ;
return NULL ;
}
2022-09-28 09:45:38 +03:00
pkt - > buf = kvmalloc ( pkt - > len , GFP_KERNEL ) ;
2016-07-28 17:36:34 +03:00
if ( ! pkt - > buf ) {
kfree ( pkt ) ;
return NULL ;
}
2019-07-30 18:43:30 +03:00
pkt - > buf_len = pkt - > len ;
2016-07-28 17:36:34 +03:00
nbytes = copy_from_iter ( pkt - > buf , pkt - > len , & iov_iter ) ;
if ( nbytes ! = pkt - > len ) {
vq_err ( vq , " Expected %u byte payload, got %zu bytes \n " ,
pkt - > len , nbytes ) ;
virtio_transport_free_pkt ( pkt ) ;
return NULL ;
}
return pkt ;
}
/* Is there space left for replies to rx packets? */
static bool vhost_vsock_more_replies ( struct vhost_vsock * vsock )
{
struct vhost_virtqueue * vq = & vsock - > vqs [ VSOCK_VQ_TX ] ;
int val ;
smp_rmb ( ) ; /* paired with atomic_inc() and atomic_dec_return() */
val = atomic_read ( & vsock - > queued_replies ) ;
return val < vq - > num ;
}
2021-06-11 14:13:37 +03:00
static bool vhost_transport_seqpacket_allow ( u32 remote_cid ) ;
2019-11-14 12:57:40 +03:00
static struct virtio_transport vhost_transport = {
. transport = {
2019-11-14 12:57:48 +03:00
. module = THIS_MODULE ,
2019-11-14 12:57:40 +03:00
. get_local_cid = vhost_transport_get_local_cid ,
. init = virtio_transport_do_socket_init ,
. destruct = virtio_transport_destruct ,
. release = virtio_transport_release ,
. connect = virtio_transport_connect ,
. shutdown = virtio_transport_shutdown ,
. cancel_pkt = vhost_transport_cancel_pkt ,
. dgram_enqueue = virtio_transport_dgram_enqueue ,
. dgram_dequeue = virtio_transport_dgram_dequeue ,
. dgram_bind = virtio_transport_dgram_bind ,
. dgram_allow = virtio_transport_dgram_allow ,
. stream_enqueue = virtio_transport_stream_enqueue ,
. stream_dequeue = virtio_transport_stream_dequeue ,
. stream_has_data = virtio_transport_stream_has_data ,
. stream_has_space = virtio_transport_stream_has_space ,
. stream_rcvhiwat = virtio_transport_stream_rcvhiwat ,
. stream_is_active = virtio_transport_stream_is_active ,
. stream_allow = virtio_transport_stream_allow ,
2021-06-11 14:13:37 +03:00
. seqpacket_dequeue = virtio_transport_seqpacket_dequeue ,
. seqpacket_enqueue = virtio_transport_seqpacket_enqueue ,
. seqpacket_allow = vhost_transport_seqpacket_allow ,
. seqpacket_has_data = virtio_transport_seqpacket_has_data ,
2019-11-14 12:57:40 +03:00
. notify_poll_in = virtio_transport_notify_poll_in ,
. notify_poll_out = virtio_transport_notify_poll_out ,
. notify_recv_init = virtio_transport_notify_recv_init ,
. notify_recv_pre_block = virtio_transport_notify_recv_pre_block ,
. notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue ,
. notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue ,
. notify_send_init = virtio_transport_notify_send_init ,
. notify_send_pre_block = virtio_transport_notify_send_pre_block ,
. notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue ,
. notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue ,
2019-11-14 12:57:42 +03:00
. notify_buffer_size = virtio_transport_notify_buffer_size ,
2019-11-14 12:57:40 +03:00
} ,
. send_pkt = vhost_transport_send_pkt ,
} ;
2021-06-11 14:13:37 +03:00
static bool vhost_transport_seqpacket_allow ( u32 remote_cid )
{
struct vhost_vsock * vsock ;
bool seqpacket_allow = false ;
rcu_read_lock ( ) ;
vsock = vhost_vsock_get ( remote_cid ) ;
if ( vsock )
seqpacket_allow = vsock - > seqpacket_allow ;
rcu_read_unlock ( ) ;
return seqpacket_allow ;
}
2016-07-28 17:36:34 +03:00
static void vhost_vsock_handle_tx_kick ( struct vhost_work * work )
{
struct vhost_virtqueue * vq = container_of ( work , struct vhost_virtqueue ,
poll . work ) ;
struct vhost_vsock * vsock = container_of ( vq - > dev , struct vhost_vsock ,
dev ) ;
struct virtio_vsock_pkt * pkt ;
2019-05-17 07:29:51 +03:00
int head , pkts = 0 , total_len = 0 ;
2016-07-28 17:36:34 +03:00
unsigned int out , in ;
bool added = false ;
mutex_lock ( & vq - > mutex ) ;
2020-03-31 22:27:57 +03:00
if ( ! vhost_vq_get_backend ( vq ) )
2016-07-28 17:36:34 +03:00
goto out ;
vhost/vsock: add IOTLB API support
This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.
These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
chardev; they are used by the userspace to exchange IOTLB messages
This patch was tested specifying "intel_iommu=strict" in the guest
kernel command line. I used QEMU with a patch applied [1] to fix a
simple issue (that patch was merged in QEMU v5.2.0):
$ qemu -M q35,accel=kvm,kernel-irqchip=split \
-drive file=fedora.qcow2,format=qcow2,if=virtio \
-device intel-iommu,intremap=on,device-iotlb=on \
-device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on
[1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2020-12-23 17:36:38 +03:00
if ( ! vq_meta_prefetch ( vq ) )
goto out ;
2016-07-28 17:36:34 +03:00
vhost_disable_notify ( & vsock - > dev , vq ) ;
2019-05-17 07:29:51 +03:00
do {
2016-07-28 17:36:34 +03:00
if ( ! vhost_vsock_more_replies ( vsock ) ) {
/* Stop tx until the device processes already
* pending replies . Leave tx virtqueue
* callbacks disabled .
*/
goto no_more_replies ;
}
head = vhost_get_vq_desc ( vq , vq - > iov , ARRAY_SIZE ( vq - > iov ) ,
& out , & in , NULL , NULL ) ;
if ( head < 0 )
break ;
if ( head = = vq - > num ) {
if ( unlikely ( vhost_enable_notify ( & vsock - > dev , vq ) ) ) {
vhost_disable_notify ( & vsock - > dev , vq ) ;
continue ;
}
break ;
}
pkt = vhost_vsock_alloc_pkt ( vq , out , in ) ;
if ( ! pkt ) {
vq_err ( vq , " Faulted on pkt \n " ) ;
continue ;
}
2021-11-22 19:35:25 +03:00
total_len + = sizeof ( pkt - > hdr ) + pkt - > len ;
2016-08-04 16:52:53 +03:00
2017-04-21 12:10:46 +03:00
/* Deliver to monitoring devices all received packets */
virtio_transport_deliver_tap_pkt ( pkt ) ;
2016-07-28 17:36:34 +03:00
/* Only accept correctly addressed packets */
2019-12-06 17:39:12 +03:00
if ( le64_to_cpu ( pkt - > hdr . src_cid ) = = vsock - > guest_cid & &
le64_to_cpu ( pkt - > hdr . dst_cid ) = =
vhost_transport_get_local_cid ( ) )
2019-11-14 12:57:40 +03:00
virtio_transport_recv_pkt ( & vhost_transport , pkt ) ;
2016-07-28 17:36:34 +03:00
else
virtio_transport_free_pkt ( pkt ) ;
2021-11-22 19:35:24 +03:00
vhost_add_used ( vq , head , 0 ) ;
2016-07-28 17:36:34 +03:00
added = true ;
2019-05-17 07:29:51 +03:00
} while ( likely ( ! vhost_exceeds_weight ( vq , + + pkts , total_len ) ) ) ;
2016-07-28 17:36:34 +03:00
no_more_replies :
if ( added )
vhost_signal ( & vsock - > dev , vq ) ;
out :
mutex_unlock ( & vq - > mutex ) ;
}
static void vhost_vsock_handle_rx_kick ( struct vhost_work * work )
{
struct vhost_virtqueue * vq = container_of ( work , struct vhost_virtqueue ,
poll . work ) ;
struct vhost_vsock * vsock = container_of ( vq - > dev , struct vhost_vsock ,
dev ) ;
vhost_transport_do_send_pkt ( vsock , vq ) ;
}
static int vhost_vsock_start ( struct vhost_vsock * vsock )
{
2017-01-19 13:43:53 +03:00
struct vhost_virtqueue * vq ;
2016-07-28 17:36:34 +03:00
size_t i ;
int ret ;
mutex_lock ( & vsock - > dev . mutex ) ;
ret = vhost_dev_check_owner ( & vsock - > dev ) ;
if ( ret )
goto err ;
for ( i = 0 ; i < ARRAY_SIZE ( vsock - > vqs ) ; i + + ) {
2017-01-19 13:43:53 +03:00
vq = & vsock - > vqs [ i ] ;
2016-07-28 17:36:34 +03:00
mutex_lock ( & vq - > mutex ) ;
if ( ! vhost_vq_access_ok ( vq ) ) {
ret = - EFAULT ;
goto err_vq ;
}
2020-03-31 22:27:57 +03:00
if ( ! vhost_vq_get_backend ( vq ) ) {
vhost_vq_set_backend ( vq , vsock ) ;
2017-01-19 13:43:53 +03:00
ret = vhost_vq_init_access ( vq ) ;
if ( ret )
goto err_vq ;
2016-07-28 17:36:34 +03:00
}
mutex_unlock ( & vq - > mutex ) ;
}
2020-05-01 07:38:40 +03:00
/* Some packets may have been queued before the device was started,
* let ' s kick the send worker to send them .
*/
vhost_work_queue ( & vsock - > dev , & vsock - > send_pkt_work ) ;
2016-07-28 17:36:34 +03:00
mutex_unlock ( & vsock - > dev . mutex ) ;
return 0 ;
err_vq :
2020-03-31 22:27:57 +03:00
vhost_vq_set_backend ( vq , NULL ) ;
2017-01-19 13:43:53 +03:00
mutex_unlock ( & vq - > mutex ) ;
2016-07-28 17:36:34 +03:00
for ( i = 0 ; i < ARRAY_SIZE ( vsock - > vqs ) ; i + + ) {
2017-01-19 13:43:53 +03:00
vq = & vsock - > vqs [ i ] ;
2016-07-28 17:36:34 +03:00
mutex_lock ( & vq - > mutex ) ;
2020-03-31 22:27:57 +03:00
vhost_vq_set_backend ( vq , NULL ) ;
2016-07-28 17:36:34 +03:00
mutex_unlock ( & vq - > mutex ) ;
}
err :
mutex_unlock ( & vsock - > dev . mutex ) ;
return ret ;
}
2022-02-22 12:47:42 +03:00
static int vhost_vsock_stop ( struct vhost_vsock * vsock , bool check_owner )
2016-07-28 17:36:34 +03:00
{
size_t i ;
2022-02-22 12:47:42 +03:00
int ret = 0 ;
2016-07-28 17:36:34 +03:00
mutex_lock ( & vsock - > dev . mutex ) ;
2022-02-22 12:47:42 +03:00
if ( check_owner ) {
ret = vhost_dev_check_owner ( & vsock - > dev ) ;
if ( ret )
goto err ;
}
2016-07-28 17:36:34 +03:00
for ( i = 0 ; i < ARRAY_SIZE ( vsock - > vqs ) ; i + + ) {
struct vhost_virtqueue * vq = & vsock - > vqs [ i ] ;
mutex_lock ( & vq - > mutex ) ;
2020-03-31 22:27:57 +03:00
vhost_vq_set_backend ( vq , NULL ) ;
2016-07-28 17:36:34 +03:00
mutex_unlock ( & vq - > mutex ) ;
}
err :
mutex_unlock ( & vsock - > dev . mutex ) ;
return ret ;
}
static void vhost_vsock_free ( struct vhost_vsock * vsock )
{
2016-08-02 16:50:42 +03:00
kvfree ( vsock ) ;
2016-07-28 17:36:34 +03:00
}
static int vhost_vsock_dev_open ( struct inode * inode , struct file * file )
{
struct vhost_virtqueue * * vqs ;
struct vhost_vsock * vsock ;
int ret ;
/* This struct is large and allocation could fail, fall back to vmalloc
* if there is no other way .
*/
2017-07-13 00:36:45 +03:00
vsock = kvmalloc ( sizeof ( * vsock ) , GFP_KERNEL | __GFP_RETRY_MAYFAIL ) ;
2017-05-09 01:57:15 +03:00
if ( ! vsock )
return - ENOMEM ;
2016-07-28 17:36:34 +03:00
vqs = kmalloc_array ( ARRAY_SIZE ( vsock - > vqs ) , sizeof ( * vqs ) , GFP_KERNEL ) ;
if ( ! vqs ) {
ret = - ENOMEM ;
goto out ;
}
2017-11-09 16:29:10 +03:00
vsock - > guest_cid = 0 ; /* no CID assigned yet */
2016-07-28 17:36:34 +03:00
atomic_set ( & vsock - > queued_replies , 0 ) ;
vqs [ VSOCK_VQ_TX ] = & vsock - > vqs [ VSOCK_VQ_TX ] ;
vqs [ VSOCK_VQ_RX ] = & vsock - > vqs [ VSOCK_VQ_RX ] ;
vsock - > vqs [ VSOCK_VQ_TX ] . handle_kick = vhost_vsock_handle_tx_kick ;
vsock - > vqs [ VSOCK_VQ_RX ] . handle_kick = vhost_vsock_handle_rx_kick ;
2019-05-17 07:29:49 +03:00
vhost_dev_init ( & vsock - > dev , vqs , ARRAY_SIZE ( vsock - > vqs ) ,
UIO_MAXIOV , VHOST_VSOCK_PKT_WEIGHT ,
2020-05-29 11:02:58 +03:00
VHOST_VSOCK_WEIGHT , true , NULL ) ;
2016-07-28 17:36:34 +03:00
file - > private_data = vsock ;
spin_lock_init ( & vsock - > send_pkt_list_lock ) ;
INIT_LIST_HEAD ( & vsock - > send_pkt_list ) ;
vhost_work_init ( & vsock - > send_pkt_work , vhost_transport_send_pkt_work ) ;
return 0 ;
out :
vhost_vsock_free ( vsock ) ;
return ret ;
}
static void vhost_vsock_flush ( struct vhost_vsock * vsock )
{
2022-05-17 21:08:50 +03:00
vhost_dev_flush ( & vsock - > dev ) ;
2016-07-28 17:36:34 +03:00
}
static void vhost_vsock_reset_orphans ( struct sock * sk )
{
struct vsock_sock * vsk = vsock_sk ( sk ) ;
/* vmci_transport.c doesn't take sk_lock here either. At least we're
* under vsock_table_lock so the sock cannot disappear while we ' re
* executing .
*/
2018-12-06 22:14:34 +03:00
/* If the peer is still valid, no need to reset connection */
if ( vhost_vsock_get ( vsk - > remote_addr . svm_cid ) )
return ;
/* If the close timeout is pending, let it expire. This avoids races
* with the timeout callback .
*/
if ( vsk - > close_work_scheduled )
return ;
sock_set_flag ( sk , SOCK_DONE ) ;
vsk - > peer_shutdown = SHUTDOWN_MASK ;
sk - > sk_state = SS_UNCONNECTED ;
sk - > sk_err = ECONNRESET ;
2021-06-28 01:48:21 +03:00
sk_error_report ( sk ) ;
2016-07-28 17:36:34 +03:00
}
static int vhost_vsock_dev_release ( struct inode * inode , struct file * file )
{
struct vhost_vsock * vsock = file - > private_data ;
2018-11-05 20:33:22 +03:00
mutex_lock ( & vhost_vsock_mutex ) ;
2018-11-05 13:35:47 +03:00
if ( vsock - > guest_cid )
hash_del_rcu ( & vsock - > hash ) ;
2018-11-05 20:33:22 +03:00
mutex_unlock ( & vhost_vsock_mutex ) ;
2016-07-28 17:36:34 +03:00
2018-11-05 13:35:47 +03:00
/* Wait for other CPUs to finish using vsock */
synchronize_rcu ( ) ;
2016-07-28 17:36:34 +03:00
/* Iterating over all connections for all CIDs to find orphans is
* inefficient . Room for improvement here . */
2022-03-11 05:00:16 +03:00
vsock_for_each_connected_socket ( & vhost_transport . transport ,
vhost_vsock_reset_orphans ) ;
2016-07-28 17:36:34 +03:00
2022-02-22 12:47:42 +03:00
/* Don't check the owner, because we are in the release path, so we
* need to stop the vsock device in any case .
* vhost_vsock_stop ( ) can not fail in this case , so we don ' t need to
* check the return code .
*/
vhost_vsock_stop ( vsock , false ) ;
2016-07-28 17:36:34 +03:00
vhost_vsock_flush ( vsock ) ;
vhost_dev_stop ( & vsock - > dev ) ;
spin_lock_bh ( & vsock - > send_pkt_list_lock ) ;
while ( ! list_empty ( & vsock - > send_pkt_list ) ) {
struct virtio_vsock_pkt * pkt ;
pkt = list_first_entry ( & vsock - > send_pkt_list ,
struct virtio_vsock_pkt , list ) ;
list_del_init ( & pkt - > list ) ;
virtio_transport_free_pkt ( pkt ) ;
}
spin_unlock_bh ( & vsock - > send_pkt_list_lock ) ;
2017-12-24 19:08:58 +03:00
vhost_dev_cleanup ( & vsock - > dev ) ;
2016-07-28 17:36:34 +03:00
kfree ( vsock - > dev . vqs ) ;
vhost_vsock_free ( vsock ) ;
return 0 ;
}
static int vhost_vsock_set_cid ( struct vhost_vsock * vsock , u64 guest_cid )
{
struct vhost_vsock * other ;
/* Refuse reserved CIDs */
if ( guest_cid < = VMADDR_CID_HOST | |
guest_cid = = U32_MAX )
return - EINVAL ;
/* 64-bit CIDs are not yet supported */
if ( guest_cid > U32_MAX )
return - EINVAL ;
2019-11-14 12:57:50 +03:00
/* Refuse if CID is assigned to the guest->host transport (i.e. nested
* VM ) , to make the loopback work .
*/
if ( vsock_find_cid ( guest_cid ) )
return - EADDRINUSE ;
2016-07-28 17:36:34 +03:00
/* Refuse if CID is already in use */
2018-11-05 20:33:22 +03:00
mutex_lock ( & vhost_vsock_mutex ) ;
2018-11-05 13:35:47 +03:00
other = vhost_vsock_get ( guest_cid ) ;
2016-12-14 14:24:36 +03:00
if ( other & & other ! = vsock ) {
2018-11-05 20:33:22 +03:00
mutex_unlock ( & vhost_vsock_mutex ) ;
2016-12-14 14:24:36 +03:00
return - EADDRINUSE ;
}
2018-11-05 13:35:47 +03:00
if ( vsock - > guest_cid )
hash_del_rcu ( & vsock - > hash ) ;
2016-07-28 17:36:34 +03:00
vsock - > guest_cid = guest_cid ;
2019-01-08 11:07:03 +03:00
hash_add_rcu ( vhost_vsock_hash , & vsock - > hash , vsock - > guest_cid ) ;
2018-11-05 20:33:22 +03:00
mutex_unlock ( & vhost_vsock_mutex ) ;
2016-07-28 17:36:34 +03:00
return 0 ;
}
static int vhost_vsock_set_features ( struct vhost_vsock * vsock , u64 features )
{
struct vhost_virtqueue * vq ;
int i ;
if ( features & ~ VHOST_VSOCK_FEATURES )
return - EOPNOTSUPP ;
mutex_lock ( & vsock - > dev . mutex ) ;
if ( ( features & ( 1 < < VHOST_F_LOG_ALL ) ) & &
! vhost_log_access_ok ( & vsock - > dev ) ) {
vhost/vsock: add IOTLB API support
This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.
These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
chardev; they are used by the userspace to exchange IOTLB messages
This patch was tested specifying "intel_iommu=strict" in the guest
kernel command line. I used QEMU with a patch applied [1] to fix a
simple issue (that patch was merged in QEMU v5.2.0):
$ qemu -M q35,accel=kvm,kernel-irqchip=split \
-drive file=fedora.qcow2,format=qcow2,if=virtio \
-device intel-iommu,intremap=on,device-iotlb=on \
-device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on
[1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2020-12-23 17:36:38 +03:00
goto err ;
}
if ( ( features & ( 1ULL < < VIRTIO_F_ACCESS_PLATFORM ) ) ) {
if ( vhost_init_device_iotlb ( & vsock - > dev , true ) )
goto err ;
2016-07-28 17:36:34 +03:00
}
2021-06-11 14:13:37 +03:00
if ( features & ( 1ULL < < VIRTIO_VSOCK_F_SEQPACKET ) )
vsock - > seqpacket_allow = true ;
2016-07-28 17:36:34 +03:00
for ( i = 0 ; i < ARRAY_SIZE ( vsock - > vqs ) ; i + + ) {
vq = & vsock - > vqs [ i ] ;
mutex_lock ( & vq - > mutex ) ;
vq - > acked_features = features ;
mutex_unlock ( & vq - > mutex ) ;
}
mutex_unlock ( & vsock - > dev . mutex ) ;
return 0 ;
vhost/vsock: add IOTLB API support
This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.
These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
chardev; they are used by the userspace to exchange IOTLB messages
This patch was tested specifying "intel_iommu=strict" in the guest
kernel command line. I used QEMU with a patch applied [1] to fix a
simple issue (that patch was merged in QEMU v5.2.0):
$ qemu -M q35,accel=kvm,kernel-irqchip=split \
-drive file=fedora.qcow2,format=qcow2,if=virtio \
-device intel-iommu,intremap=on,device-iotlb=on \
-device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on
[1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2020-12-23 17:36:38 +03:00
err :
mutex_unlock ( & vsock - > dev . mutex ) ;
return - EFAULT ;
2016-07-28 17:36:34 +03:00
}
static long vhost_vsock_dev_ioctl ( struct file * f , unsigned int ioctl ,
unsigned long arg )
{
struct vhost_vsock * vsock = f - > private_data ;
void __user * argp = ( void __user * ) arg ;
u64 guest_cid ;
u64 features ;
int start ;
int r ;
switch ( ioctl ) {
case VHOST_VSOCK_SET_GUEST_CID :
if ( copy_from_user ( & guest_cid , argp , sizeof ( guest_cid ) ) )
return - EFAULT ;
return vhost_vsock_set_cid ( vsock , guest_cid ) ;
case VHOST_VSOCK_SET_RUNNING :
if ( copy_from_user ( & start , argp , sizeof ( start ) ) )
return - EFAULT ;
if ( start )
return vhost_vsock_start ( vsock ) ;
else
2022-02-22 12:47:42 +03:00
return vhost_vsock_stop ( vsock , true ) ;
2016-07-28 17:36:34 +03:00
case VHOST_GET_FEATURES :
features = VHOST_VSOCK_FEATURES ;
if ( copy_to_user ( argp , & features , sizeof ( features ) ) )
return - EFAULT ;
return 0 ;
case VHOST_SET_FEATURES :
if ( copy_from_user ( & features , argp , sizeof ( features ) ) )
return - EFAULT ;
return vhost_vsock_set_features ( vsock , features ) ;
vhost/vsock: add IOTLB API support
This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.
These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
chardev; they are used by the userspace to exchange IOTLB messages
This patch was tested specifying "intel_iommu=strict" in the guest
kernel command line. I used QEMU with a patch applied [1] to fix a
simple issue (that patch was merged in QEMU v5.2.0):
$ qemu -M q35,accel=kvm,kernel-irqchip=split \
-drive file=fedora.qcow2,format=qcow2,if=virtio \
-device intel-iommu,intremap=on,device-iotlb=on \
-device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on
[1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2020-12-23 17:36:38 +03:00
case VHOST_GET_BACKEND_FEATURES :
features = VHOST_VSOCK_BACKEND_FEATURES ;
if ( copy_to_user ( argp , & features , sizeof ( features ) ) )
return - EFAULT ;
return 0 ;
case VHOST_SET_BACKEND_FEATURES :
if ( copy_from_user ( & features , argp , sizeof ( features ) ) )
return - EFAULT ;
if ( features & ~ VHOST_VSOCK_BACKEND_FEATURES )
return - EOPNOTSUPP ;
vhost_set_backend_features ( & vsock - > dev , features ) ;
return 0 ;
2016-07-28 17:36:34 +03:00
default :
mutex_lock ( & vsock - > dev . mutex ) ;
r = vhost_dev_ioctl ( & vsock - > dev , ioctl , argp ) ;
if ( r = = - ENOIOCTLCMD )
r = vhost_vring_ioctl ( & vsock - > dev , ioctl , argp ) ;
else
vhost_vsock_flush ( vsock ) ;
mutex_unlock ( & vsock - > dev . mutex ) ;
return r ;
}
}
vhost/vsock: add IOTLB API support
This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.
These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
chardev; they are used by the userspace to exchange IOTLB messages
This patch was tested specifying "intel_iommu=strict" in the guest
kernel command line. I used QEMU with a patch applied [1] to fix a
simple issue (that patch was merged in QEMU v5.2.0):
$ qemu -M q35,accel=kvm,kernel-irqchip=split \
-drive file=fedora.qcow2,format=qcow2,if=virtio \
-device intel-iommu,intremap=on,device-iotlb=on \
-device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on
[1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2020-12-23 17:36:38 +03:00
static ssize_t vhost_vsock_chr_read_iter ( struct kiocb * iocb , struct iov_iter * to )
{
struct file * file = iocb - > ki_filp ;
struct vhost_vsock * vsock = file - > private_data ;
struct vhost_dev * dev = & vsock - > dev ;
int noblock = file - > f_flags & O_NONBLOCK ;
return vhost_chr_read_iter ( dev , to , noblock ) ;
}
static ssize_t vhost_vsock_chr_write_iter ( struct kiocb * iocb ,
struct iov_iter * from )
{
struct file * file = iocb - > ki_filp ;
struct vhost_vsock * vsock = file - > private_data ;
struct vhost_dev * dev = & vsock - > dev ;
return vhost_chr_write_iter ( dev , from ) ;
}
static __poll_t vhost_vsock_chr_poll ( struct file * file , poll_table * wait )
{
struct vhost_vsock * vsock = file - > private_data ;
struct vhost_dev * dev = & vsock - > dev ;
return vhost_chr_poll ( file , dev , wait ) ;
}
2016-07-28 17:36:34 +03:00
static const struct file_operations vhost_vsock_fops = {
. owner = THIS_MODULE ,
. open = vhost_vsock_dev_open ,
. release = vhost_vsock_dev_release ,
. llseek = noop_llseek ,
. unlocked_ioctl = vhost_vsock_dev_ioctl ,
2018-09-11 18:23:00 +03:00
. compat_ioctl = compat_ptr_ioctl ,
vhost/vsock: add IOTLB API support
This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.
These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
chardev; they are used by the userspace to exchange IOTLB messages
This patch was tested specifying "intel_iommu=strict" in the guest
kernel command line. I used QEMU with a patch applied [1] to fix a
simple issue (that patch was merged in QEMU v5.2.0):
$ qemu -M q35,accel=kvm,kernel-irqchip=split \
-drive file=fedora.qcow2,format=qcow2,if=virtio \
-device intel-iommu,intremap=on,device-iotlb=on \
-device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on
[1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
2020-12-23 17:36:38 +03:00
. read_iter = vhost_vsock_chr_read_iter ,
. write_iter = vhost_vsock_chr_write_iter ,
. poll = vhost_vsock_chr_poll ,
2016-07-28 17:36:34 +03:00
} ;
static struct miscdevice vhost_vsock_misc = {
2017-05-10 17:19:18 +03:00
. minor = VHOST_VSOCK_MINOR ,
2016-07-28 17:36:34 +03:00
. name = " vhost-vsock " ,
. fops = & vhost_vsock_fops ,
} ;
static int __init vhost_vsock_init ( void )
{
int ret ;
2019-11-14 12:57:46 +03:00
ret = vsock_core_register ( & vhost_transport . transport ,
VSOCK_TRANSPORT_F_H2G ) ;
2016-07-28 17:36:34 +03:00
if ( ret < 0 )
return ret ;
return misc_register ( & vhost_vsock_misc ) ;
} ;
static void __exit vhost_vsock_exit ( void )
{
misc_deregister ( & vhost_vsock_misc ) ;
2019-11-14 12:57:46 +03:00
vsock_core_unregister ( & vhost_transport . transport ) ;
2016-07-28 17:36:34 +03:00
} ;
module_init ( vhost_vsock_init ) ;
module_exit ( vhost_vsock_exit ) ;
MODULE_LICENSE ( " GPL v2 " ) ;
MODULE_AUTHOR ( " Asias He " ) ;
MODULE_DESCRIPTION ( " vhost transport for vsock " ) ;
2017-05-10 17:19:18 +03:00
MODULE_ALIAS_MISCDEV ( VHOST_VSOCK_MINOR ) ;
MODULE_ALIAS ( " devname:vhost-vsock " ) ;