2017-07-05 23:08:39 +03:00
/*
* ( c ) 2017 Stefano Stabellini < stefano @ aporeto . com >
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*/
2017-07-06 21:01:00 +03:00
# include <linux/inet.h>
2017-07-05 23:08:39 +03:00
# include <linux/kthread.h>
# include <linux/list.h>
# include <linux/radix-tree.h>
# include <linux/module.h>
# include <linux/semaphore.h>
# include <linux/wait.h>
2017-07-06 21:01:00 +03:00
# include <net/sock.h>
# include <net/inet_common.h>
# include <net/inet_connection_sock.h>
# include <net/request_sock.h>
2017-07-05 23:08:39 +03:00
# include <xen/events.h>
# include <xen/grant_table.h>
# include <xen/xen.h>
# include <xen/xenbus.h>
# include <xen/interface/io/pvcalls.h>
2017-07-06 20:59:17 +03:00
# define PVCALLS_VERSIONS "1"
# define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
2017-07-05 23:08:48 +03:00
struct pvcalls_back_global {
struct list_head frontends ;
struct semaphore frontends_lock ;
} pvcalls_back_global ;
2017-07-06 20:59:29 +03:00
/*
* Per - frontend data structure . It contains pointers to the command
* ring , its event channel , a list of active sockets and a tree of
* passive sockets .
*/
struct pvcalls_fedata {
struct list_head list ;
struct xenbus_device * dev ;
struct xen_pvcalls_sring * sring ;
struct xen_pvcalls_back_ring ring ;
int irq ;
struct list_head socket_mappings ;
struct radix_tree_root socketpass_mappings ;
struct semaphore socket_lock ;
} ;
2017-07-06 21:01:06 +03:00
struct pvcalls_ioworker {
struct work_struct register_work ;
struct workqueue_struct * wq ;
} ;
struct sock_mapping {
struct list_head list ;
struct pvcalls_fedata * fedata ;
struct socket * sock ;
uint64_t id ;
grant_ref_t ref ;
struct pvcalls_data_intf * ring ;
void * bytes ;
struct pvcalls_data data ;
uint32_t ring_order ;
int irq ;
atomic_t read ;
atomic_t write ;
atomic_t io ;
atomic_t release ;
void ( * saved_data_ready ) ( struct sock * sk ) ;
struct pvcalls_ioworker ioworker ;
} ;
static irqreturn_t pvcalls_back_conn_event ( int irq , void * sock_map ) ;
static int pvcalls_back_release_active ( struct xenbus_device * dev ,
struct pvcalls_fedata * fedata ,
struct sock_mapping * map ) ;
static void pvcalls_back_ioworker ( struct work_struct * work )
{
}
2017-07-06 21:00:00 +03:00
static int pvcalls_back_socket ( struct xenbus_device * dev ,
struct xen_pvcalls_request * req )
{
2017-07-06 21:01:00 +03:00
struct pvcalls_fedata * fedata ;
int ret ;
struct xen_pvcalls_response * rsp ;
fedata = dev_get_drvdata ( & dev - > dev ) ;
if ( req - > u . socket . domain ! = AF_INET | |
req - > u . socket . type ! = SOCK_STREAM | |
( req - > u . socket . protocol ! = IPPROTO_IP & &
req - > u . socket . protocol ! = AF_INET ) )
ret = - EAFNOSUPPORT ;
else
ret = 0 ;
/* leave the actual socket allocation for later */
rsp = RING_GET_RESPONSE ( & fedata - > ring , fedata - > ring . rsp_prod_pvt + + ) ;
rsp - > req_id = req - > req_id ;
rsp - > cmd = req - > cmd ;
rsp - > u . socket . id = req - > u . socket . id ;
rsp - > ret = ret ;
2017-07-06 21:00:00 +03:00
return 0 ;
}
2017-07-06 21:01:06 +03:00
static void pvcalls_sk_state_change ( struct sock * sock )
{
struct sock_mapping * map = sock - > sk_user_data ;
struct pvcalls_data_intf * intf ;
if ( map = = NULL )
return ;
intf = map - > ring ;
intf - > in_error = - ENOTCONN ;
notify_remote_via_irq ( map - > irq ) ;
}
static void pvcalls_sk_data_ready ( struct sock * sock )
{
}
static struct sock_mapping * pvcalls_new_active_socket (
struct pvcalls_fedata * fedata ,
uint64_t id ,
grant_ref_t ref ,
uint32_t evtchn ,
struct socket * sock )
{
int ret ;
struct sock_mapping * map ;
void * page ;
map = kzalloc ( sizeof ( * map ) , GFP_KERNEL ) ;
if ( map = = NULL )
return NULL ;
map - > fedata = fedata ;
map - > sock = sock ;
map - > id = id ;
map - > ref = ref ;
ret = xenbus_map_ring_valloc ( fedata - > dev , & ref , 1 , & page ) ;
if ( ret < 0 )
goto out ;
map - > ring = page ;
map - > ring_order = map - > ring - > ring_order ;
/* first read the order, then map the data ring */
virt_rmb ( ) ;
if ( map - > ring_order > MAX_RING_ORDER ) {
pr_warn ( " %s frontend requested ring_order %u, which is > MAX (%u) \n " ,
__func__ , map - > ring_order , MAX_RING_ORDER ) ;
goto out ;
}
ret = xenbus_map_ring_valloc ( fedata - > dev , map - > ring - > ref ,
( 1 < < map - > ring_order ) , & page ) ;
if ( ret < 0 )
goto out ;
map - > bytes = page ;
ret = bind_interdomain_evtchn_to_irqhandler ( fedata - > dev - > otherend_id ,
evtchn ,
pvcalls_back_conn_event ,
0 ,
" pvcalls-backend " ,
map ) ;
if ( ret < 0 )
goto out ;
map - > irq = ret ;
map - > data . in = map - > bytes ;
map - > data . out = map - > bytes + XEN_FLEX_RING_SIZE ( map - > ring_order ) ;
map - > ioworker . wq = alloc_workqueue ( " pvcalls_io " , WQ_UNBOUND , 1 ) ;
if ( ! map - > ioworker . wq )
goto out ;
atomic_set ( & map - > io , 1 ) ;
INIT_WORK ( & map - > ioworker . register_work , pvcalls_back_ioworker ) ;
down ( & fedata - > socket_lock ) ;
list_add_tail ( & map - > list , & fedata - > socket_mappings ) ;
up ( & fedata - > socket_lock ) ;
write_lock_bh ( & map - > sock - > sk - > sk_callback_lock ) ;
map - > saved_data_ready = map - > sock - > sk - > sk_data_ready ;
map - > sock - > sk - > sk_user_data = map ;
map - > sock - > sk - > sk_data_ready = pvcalls_sk_data_ready ;
map - > sock - > sk - > sk_state_change = pvcalls_sk_state_change ;
write_unlock_bh ( & map - > sock - > sk - > sk_callback_lock ) ;
return map ;
out :
down ( & fedata - > socket_lock ) ;
list_del ( & map - > list ) ;
pvcalls_back_release_active ( fedata - > dev , fedata , map ) ;
up ( & fedata - > socket_lock ) ;
return NULL ;
}
2017-07-06 21:00:00 +03:00
static int pvcalls_back_connect ( struct xenbus_device * dev ,
struct xen_pvcalls_request * req )
2017-07-06 21:01:06 +03:00
{
struct pvcalls_fedata * fedata ;
int ret = - EINVAL ;
struct socket * sock ;
struct sock_mapping * map ;
struct xen_pvcalls_response * rsp ;
struct sockaddr * sa = ( struct sockaddr * ) & req - > u . connect . addr ;
fedata = dev_get_drvdata ( & dev - > dev ) ;
if ( req - > u . connect . len < sizeof ( sa - > sa_family ) | |
req - > u . connect . len > sizeof ( req - > u . connect . addr ) | |
sa - > sa_family ! = AF_INET )
goto out ;
ret = sock_create ( AF_INET , SOCK_STREAM , 0 , & sock ) ;
if ( ret < 0 )
goto out ;
ret = inet_stream_connect ( sock , sa , req - > u . connect . len , 0 ) ;
if ( ret < 0 ) {
sock_release ( sock ) ;
goto out ;
}
map = pvcalls_new_active_socket ( fedata ,
req - > u . connect . id ,
req - > u . connect . ref ,
req - > u . connect . evtchn ,
sock ) ;
if ( ! map ) {
ret = - EFAULT ;
sock_release ( map - > sock ) ;
}
out :
rsp = RING_GET_RESPONSE ( & fedata - > ring , fedata - > ring . rsp_prod_pvt + + ) ;
rsp - > req_id = req - > req_id ;
rsp - > cmd = req - > cmd ;
rsp - > u . connect . id = req - > u . connect . id ;
rsp - > ret = ret ;
return 0 ;
}
static int pvcalls_back_release_active ( struct xenbus_device * dev ,
struct pvcalls_fedata * fedata ,
struct sock_mapping * map )
2017-07-06 21:00:00 +03:00
{
return 0 ;
}
static int pvcalls_back_release ( struct xenbus_device * dev ,
struct xen_pvcalls_request * req )
{
return 0 ;
}
static int pvcalls_back_bind ( struct xenbus_device * dev ,
struct xen_pvcalls_request * req )
{
return 0 ;
}
static int pvcalls_back_listen ( struct xenbus_device * dev ,
struct xen_pvcalls_request * req )
{
return 0 ;
}
static int pvcalls_back_accept ( struct xenbus_device * dev ,
struct xen_pvcalls_request * req )
{
return 0 ;
}
static int pvcalls_back_poll ( struct xenbus_device * dev ,
struct xen_pvcalls_request * req )
{
return 0 ;
}
static int pvcalls_back_handle_cmd ( struct xenbus_device * dev ,
struct xen_pvcalls_request * req )
{
int ret = 0 ;
switch ( req - > cmd ) {
case PVCALLS_SOCKET :
ret = pvcalls_back_socket ( dev , req ) ;
break ;
case PVCALLS_CONNECT :
ret = pvcalls_back_connect ( dev , req ) ;
break ;
case PVCALLS_RELEASE :
ret = pvcalls_back_release ( dev , req ) ;
break ;
case PVCALLS_BIND :
ret = pvcalls_back_bind ( dev , req ) ;
break ;
case PVCALLS_LISTEN :
ret = pvcalls_back_listen ( dev , req ) ;
break ;
case PVCALLS_ACCEPT :
ret = pvcalls_back_accept ( dev , req ) ;
break ;
case PVCALLS_POLL :
ret = pvcalls_back_poll ( dev , req ) ;
break ;
default :
{
struct pvcalls_fedata * fedata ;
struct xen_pvcalls_response * rsp ;
fedata = dev_get_drvdata ( & dev - > dev ) ;
rsp = RING_GET_RESPONSE (
& fedata - > ring , fedata - > ring . rsp_prod_pvt + + ) ;
rsp - > req_id = req - > req_id ;
rsp - > cmd = req - > cmd ;
rsp - > ret = - ENOTSUPP ;
break ;
}
}
return ret ;
}
static void pvcalls_back_work ( struct pvcalls_fedata * fedata )
{
int notify , notify_all = 0 , more = 1 ;
struct xen_pvcalls_request req ;
struct xenbus_device * dev = fedata - > dev ;
while ( more ) {
while ( RING_HAS_UNCONSUMED_REQUESTS ( & fedata - > ring ) ) {
RING_COPY_REQUEST ( & fedata - > ring ,
fedata - > ring . req_cons + + ,
& req ) ;
if ( ! pvcalls_back_handle_cmd ( dev , & req ) ) {
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY (
& fedata - > ring , notify ) ;
notify_all + = notify ;
}
}
if ( notify_all ) {
notify_remote_via_irq ( fedata - > irq ) ;
notify_all = 0 ;
}
RING_FINAL_CHECK_FOR_REQUESTS ( & fedata - > ring , more ) ;
}
}
2017-07-06 20:59:29 +03:00
static irqreturn_t pvcalls_back_event ( int irq , void * dev_id )
{
2017-07-06 21:00:00 +03:00
struct xenbus_device * dev = dev_id ;
struct pvcalls_fedata * fedata = NULL ;
if ( dev = = NULL )
return IRQ_HANDLED ;
fedata = dev_get_drvdata ( & dev - > dev ) ;
if ( fedata = = NULL )
return IRQ_HANDLED ;
pvcalls_back_work ( fedata ) ;
2017-07-06 20:59:29 +03:00
return IRQ_HANDLED ;
}
2017-07-06 21:01:06 +03:00
static irqreturn_t pvcalls_back_conn_event ( int irq , void * sock_map )
{
return IRQ_HANDLED ;
}
2017-07-06 20:59:17 +03:00
static int backend_connect ( struct xenbus_device * dev )
{
2017-07-06 20:59:29 +03:00
int err , evtchn ;
grant_ref_t ring_ref ;
struct pvcalls_fedata * fedata = NULL ;
fedata = kzalloc ( sizeof ( struct pvcalls_fedata ) , GFP_KERNEL ) ;
if ( ! fedata )
return - ENOMEM ;
fedata - > irq = - 1 ;
err = xenbus_scanf ( XBT_NIL , dev - > otherend , " port " , " %u " ,
& evtchn ) ;
if ( err ! = 1 ) {
err = - EINVAL ;
xenbus_dev_fatal ( dev , err , " reading %s/event-channel " ,
dev - > otherend ) ;
goto error ;
}
err = xenbus_scanf ( XBT_NIL , dev - > otherend , " ring-ref " , " %u " , & ring_ref ) ;
if ( err ! = 1 ) {
err = - EINVAL ;
xenbus_dev_fatal ( dev , err , " reading %s/ring-ref " ,
dev - > otherend ) ;
goto error ;
}
err = bind_interdomain_evtchn_to_irq ( dev - > otherend_id , evtchn ) ;
if ( err < 0 )
goto error ;
fedata - > irq = err ;
err = request_threaded_irq ( fedata - > irq , NULL , pvcalls_back_event ,
IRQF_ONESHOT , " pvcalls-back " , dev ) ;
if ( err < 0 )
goto error ;
err = xenbus_map_ring_valloc ( dev , & ring_ref , 1 ,
( void * * ) & fedata - > sring ) ;
if ( err < 0 )
goto error ;
BACK_RING_INIT ( & fedata - > ring , fedata - > sring , XEN_PAGE_SIZE * 1 ) ;
fedata - > dev = dev ;
INIT_LIST_HEAD ( & fedata - > socket_mappings ) ;
INIT_RADIX_TREE ( & fedata - > socketpass_mappings , GFP_KERNEL ) ;
sema_init ( & fedata - > socket_lock , 1 ) ;
dev_set_drvdata ( & dev - > dev , fedata ) ;
down ( & pvcalls_back_global . frontends_lock ) ;
list_add_tail ( & fedata - > list , & pvcalls_back_global . frontends ) ;
up ( & pvcalls_back_global . frontends_lock ) ;
2017-07-06 20:59:17 +03:00
return 0 ;
2017-07-06 20:59:29 +03:00
error :
if ( fedata - > irq > = 0 )
unbind_from_irqhandler ( fedata - > irq , dev ) ;
if ( fedata - > sring ! = NULL )
xenbus_unmap_ring_vfree ( dev , fedata - > sring ) ;
kfree ( fedata ) ;
return err ;
2017-07-06 20:59:17 +03:00
}
static int backend_disconnect ( struct xenbus_device * dev )
{
return 0 ;
}
2017-07-05 23:08:39 +03:00
static int pvcalls_back_probe ( struct xenbus_device * dev ,
const struct xenbus_device_id * id )
{
2017-07-06 20:59:17 +03:00
int err , abort ;
struct xenbus_transaction xbt ;
again :
abort = 1 ;
err = xenbus_transaction_start ( & xbt ) ;
if ( err ) {
pr_warn ( " %s cannot create xenstore transaction \n " , __func__ ) ;
return err ;
}
err = xenbus_printf ( xbt , dev - > nodename , " versions " , " %s " ,
PVCALLS_VERSIONS ) ;
if ( err ) {
pr_warn ( " %s write out 'versions' failed \n " , __func__ ) ;
goto abort ;
}
err = xenbus_printf ( xbt , dev - > nodename , " max-page-order " , " %u " ,
MAX_RING_ORDER ) ;
if ( err ) {
pr_warn ( " %s write out 'max-page-order' failed \n " , __func__ ) ;
goto abort ;
}
err = xenbus_printf ( xbt , dev - > nodename , " function-calls " ,
XENBUS_FUNCTIONS_CALLS ) ;
if ( err ) {
pr_warn ( " %s write out 'function-calls' failed \n " , __func__ ) ;
goto abort ;
}
abort = 0 ;
abort :
err = xenbus_transaction_end ( xbt , abort ) ;
if ( err ) {
if ( err = = - EAGAIN & & ! abort )
goto again ;
pr_warn ( " %s cannot complete xenstore transaction \n " , __func__ ) ;
return err ;
}
if ( abort )
return - EFAULT ;
xenbus_switch_state ( dev , XenbusStateInitWait ) ;
2017-07-05 23:08:39 +03:00
return 0 ;
}
2017-07-06 20:59:17 +03:00
static void set_backend_state ( struct xenbus_device * dev ,
enum xenbus_state state )
{
while ( dev - > state ! = state ) {
switch ( dev - > state ) {
case XenbusStateClosed :
switch ( state ) {
case XenbusStateInitWait :
case XenbusStateConnected :
xenbus_switch_state ( dev , XenbusStateInitWait ) ;
break ;
case XenbusStateClosing :
xenbus_switch_state ( dev , XenbusStateClosing ) ;
break ;
default :
__WARN ( ) ;
}
break ;
case XenbusStateInitWait :
case XenbusStateInitialised :
switch ( state ) {
case XenbusStateConnected :
backend_connect ( dev ) ;
xenbus_switch_state ( dev , XenbusStateConnected ) ;
break ;
case XenbusStateClosing :
case XenbusStateClosed :
xenbus_switch_state ( dev , XenbusStateClosing ) ;
break ;
default :
__WARN ( ) ;
}
break ;
case XenbusStateConnected :
switch ( state ) {
case XenbusStateInitWait :
case XenbusStateClosing :
case XenbusStateClosed :
down ( & pvcalls_back_global . frontends_lock ) ;
backend_disconnect ( dev ) ;
up ( & pvcalls_back_global . frontends_lock ) ;
xenbus_switch_state ( dev , XenbusStateClosing ) ;
break ;
default :
__WARN ( ) ;
}
break ;
case XenbusStateClosing :
switch ( state ) {
case XenbusStateInitWait :
case XenbusStateConnected :
case XenbusStateClosed :
xenbus_switch_state ( dev , XenbusStateClosed ) ;
break ;
default :
__WARN ( ) ;
}
break ;
default :
__WARN ( ) ;
}
}
}
2017-07-05 23:08:39 +03:00
static void pvcalls_back_changed ( struct xenbus_device * dev ,
enum xenbus_state frontend_state )
{
2017-07-06 20:59:17 +03:00
switch ( frontend_state ) {
case XenbusStateInitialising :
set_backend_state ( dev , XenbusStateInitWait ) ;
break ;
case XenbusStateInitialised :
case XenbusStateConnected :
set_backend_state ( dev , XenbusStateConnected ) ;
break ;
case XenbusStateClosing :
set_backend_state ( dev , XenbusStateClosing ) ;
break ;
case XenbusStateClosed :
set_backend_state ( dev , XenbusStateClosed ) ;
if ( xenbus_dev_is_online ( dev ) )
break ;
device_unregister ( & dev - > dev ) ;
break ;
case XenbusStateUnknown :
set_backend_state ( dev , XenbusStateClosed ) ;
device_unregister ( & dev - > dev ) ;
break ;
default :
xenbus_dev_fatal ( dev , - EINVAL , " saw state %d at frontend " ,
frontend_state ) ;
break ;
}
2017-07-05 23:08:39 +03:00
}
static int pvcalls_back_remove ( struct xenbus_device * dev )
{
return 0 ;
}
static int pvcalls_back_uevent ( struct xenbus_device * xdev ,
struct kobj_uevent_env * env )
{
return 0 ;
}
static const struct xenbus_device_id pvcalls_back_ids [ ] = {
{ " pvcalls " } ,
{ " " }
} ;
static struct xenbus_driver pvcalls_back_driver = {
. ids = pvcalls_back_ids ,
. probe = pvcalls_back_probe ,
. remove = pvcalls_back_remove ,
. uevent = pvcalls_back_uevent ,
. otherend_changed = pvcalls_back_changed ,
} ;
2017-07-05 23:08:48 +03:00
static int __init pvcalls_back_init ( void )
{
int ret ;
if ( ! xen_domain ( ) )
return - ENODEV ;
ret = xenbus_register_backend ( & pvcalls_back_driver ) ;
if ( ret < 0 )
return ret ;
sema_init ( & pvcalls_back_global . frontends_lock , 1 ) ;
INIT_LIST_HEAD ( & pvcalls_back_global . frontends ) ;
return 0 ;
}
module_init ( pvcalls_back_init ) ;