2006-12-04 16:02:08 +03:00
/*
* Unix SMB / CIFS implementation .
* Wrap Infiniband calls .
*
* Copyright ( C ) Sven Oehme < oehmes @ de . ibm . com > 2006
*
* Major code contributions by Peter Somogyi < psomogyi @ gamax . hu >
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*/
# include "ibwrapper.h"
# include <stdlib.h>
# include <string.h>
# include <stdio.h>
# include <errno.h>
# include <sys/types.h>
# include <netinet/in.h>
# include <sys/socket.h>
# include <netdb.h>
# include <arpa/inet.h>
# include <rdma/rdma_cma.h>
# include "lib/events/events.h"
# include "ibwrapper_internal.h"
2006-12-05 15:09:48 +03:00
# include "lib/util/dlinklist.h"
2006-12-04 16:02:08 +03:00
# define IBW_LASTERR_BUFSIZE 512
static char ibw_lasterr [ IBW_LASTERR_BUFSIZE ] ;
2006-12-12 21:09:16 +03:00
static void ibw_event_handler_verbs ( struct event_context * ev ,
struct fd_event * fde , uint16_t flags , void * private_data ) ;
2006-12-13 13:02:49 +03:00
static int ibw_init_memory ( struct ibw_conn * conn )
2006-12-04 16:02:08 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx_priv * pctx = talloc_get_type ( conn - > ctx - > internal , struct ibw_ctx_priv ) ;
struct ibw_conn_priv * pconn = talloc_get_type ( conn - > internal , struct ibw_conn_priv ) ;
2006-12-11 21:56:15 +03:00
2006-12-12 21:09:16 +03:00
int i ;
2006-12-13 13:02:49 +03:00
struct ibw_wr * p ;
2006-12-11 21:56:15 +03:00
2006-12-12 21:09:16 +03:00
pconn - > buf = memalign ( pctx - > page_size , pctx - > max_msg_size ) ;
2006-12-11 21:56:15 +03:00
if ( ! pconn - > buf ) {
sprintf ( ibw_lasterr , " couldn't allocate work buf \n " ) ;
return - 1 ;
}
2006-12-12 21:09:16 +03:00
pconn - > mr = ibv_reg_mr ( pctx - > pd , pconn - > buf ,
pctx - > qsize * pctx - > max_msg_size , IBV_ACCESS_LOCAL_WRITE ) ;
2006-12-11 21:56:15 +03:00
if ( ! pconn - > mr ) {
2006-12-12 21:09:16 +03:00
sprintf ( ibw_lasterr , " couldn't allocate mr \n " ) ;
2006-12-11 21:56:15 +03:00
return - 1 ;
}
2006-12-13 13:02:49 +03:00
pconn - > wr_index = talloc_size ( pconn , pctx - > qsize * sizeof ( struct ibw_wr * ) ) ;
2006-12-11 21:56:15 +03:00
2006-12-12 21:09:16 +03:00
for ( i = 0 ; i < pctx - > qsize ; i + + ) {
2006-12-13 13:02:49 +03:00
p = pconn - > wr_index [ i ] = talloc_zero ( pconn , struct ibw_wr ) ;
2006-12-12 21:09:16 +03:00
p - > msg = pconn - > buf + ( i * pctx - > max_msg_size ) ;
2006-12-11 21:56:15 +03:00
p - > wr_id = i ;
DLIST_ADD ( pconn - > mr_list_avail , p ) ;
}
return 0 ;
2006-12-04 16:02:08 +03:00
}
2006-12-04 21:48:11 +03:00
static int ibw_ctx_priv_destruct ( void * ptr )
2006-12-04 16:02:08 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx * pctx = talloc_get_type ( ctx - > internal , struct ibw_ctx_priv ) ;
2006-12-04 16:02:08 +03:00
assert ( pctx ! = NULL ) ;
2006-12-06 20:49:46 +03:00
if ( pctx - > pd ) {
ibv_dealloc_pd ( pctx - > pd ) ;
pctx - > pd = NULL ;
}
/* destroy cm */
2006-12-04 21:48:11 +03:00
if ( pctx - > cm_channel ) {
rdma_destroy_event_channel ( pctx - > cm_channel ) ;
pctx - > cm_channel = NULL ;
}
2006-12-06 20:49:46 +03:00
if ( pctx - > cm_channel_event ) {
/* TODO: do we have to do this here? */
talloc_free ( pctx - > cm_channel_event ) ;
pctx - > cm_channel_event = NULL ;
}
if ( pctx - > cm_id ) {
rdma_destroy_id ( pctx - > cm_id ) ;
pctx - > cm_id = NULL ;
}
2006-12-04 21:48:11 +03:00
}
static int ibw_ctx_destruct ( void * ptr )
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx * ctx = talloc_get_type ( ptr , struct ibw_ctx ) ;
2006-12-04 21:48:11 +03:00
assert ( ctx ! = NULL ) ;
2006-12-04 16:02:08 +03:00
return 0 ;
}
2006-12-04 21:48:11 +03:00
static int ibw_conn_priv_destruct ( void * ptr )
{
2006-12-13 13:02:49 +03:00
struct ibw_conn * pconn = talloc_get_type ( ptr , struct ibw_conn_priv ) ;
2006-12-04 21:48:11 +03:00
assert ( pconn ! = NULL ) ;
2006-12-11 21:56:15 +03:00
/* free memory regions */
if ( pconn - > mr ) {
ibv_dereg_mr ( pconn - > mr ) ;
pconn - > mr = NULL ;
}
if ( pconn - > buf ) {
free ( pconn - > buf ) ; /* memalign-ed */
pconn - > buf = NULL ;
}
/* pconn->wr_index is freed by talloc */
/* pconn->wr_index[i] are freed by talloc */
/* destroy verbs */
if ( pconn - > cm_id - > qp ) {
ibv_destroy_qp ( pconn - > qp ) ;
pconn - > qp = NULL ;
}
if ( pconn - > cq ) {
ibv_destroy_cq ( pconn - > cq ) ;
pconn - > cq = NULL ;
}
2006-12-12 21:09:16 +03:00
if ( pconn - > verbs_channel ) {
ibv_destroy_comp_channel ( pconn - > verbs_channel ) ;
pconn - > verbs_channel = NULL ;
}
if ( pconn - > verbs_channel_event ) {
/* TODO: do we have to do this here? */
talloc_free ( pconn - > verbs_channel_event ) ;
pconn - > verbs_channel_event = NULL ;
}
2006-12-11 21:56:15 +03:00
if ( pconn - > cm_id ) {
rdma_destroy_id ( pctx - > cm_id ) ;
pctx - > cm_id = NULL ;
}
2006-12-04 21:48:11 +03:00
}
static int ibw_conn_destruct ( void * ptr )
{
2006-12-13 13:02:49 +03:00
struct ibw_conn * conn = talloc_get_type ( ptr , struct ibw_conn ) ;
struct ibw_ctx * ctx ;
2006-12-04 21:48:11 +03:00
assert ( conn ! = NULL ) ;
ctx = ibw_conn - > ctx ;
assert ( ctx ! = NULL ) ;
2006-12-05 15:09:48 +03:00
DLIST_REMOVE ( ctx - > conn_list , conn ) ;
2006-12-04 21:48:11 +03:00
return 0 ;
}
2006-12-13 13:02:49 +03:00
static struct ibw_conn * ibw_conn_new ( struct ibw_ctx * ctx )
2006-12-04 21:48:11 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_conn * conn ;
struct ibw_conn_priv * pconn ;
2006-12-04 21:48:11 +03:00
2006-12-13 13:02:49 +03:00
conn = talloc_zero ( ctx , struct ibw_conn ) ;
2006-12-04 21:48:11 +03:00
assert ( conn ! = NULL ) ;
2006-12-13 13:02:49 +03:00
talloc_set_destructor ( conn , struct ibw_conn_destruct ) ;
2006-12-04 21:48:11 +03:00
2006-12-13 13:02:49 +03:00
pconn = talloc_zero ( ctx , struct ibw_conn_priv ) ;
2006-12-04 21:48:11 +03:00
assert ( pconn ! = NULL ) ;
2006-12-13 13:02:49 +03:00
talloc_set_destructor ( pconn , struct ibw_conn_priv_destruct ) ;
2006-12-04 21:48:11 +03:00
conn - > ctx = ctx ;
2006-12-05 15:09:48 +03:00
DLIST_ADD ( ctx - > conn_list , conn ) ;
2006-12-04 21:48:11 +03:00
return conn ;
}
2006-12-04 16:02:08 +03:00
2006-12-13 13:02:49 +03:00
static int ibw_setup_cq_qp ( struct ibw_conn * conn )
2006-12-11 21:56:15 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx_priv * pctx = talloc_get_type ( conn - > ctx - > internal , struct ibw_ctx_priv ) ;
struct ibw_conn_priv * pconn = talloc_get_type ( conn - > internal , struct ibw_conn_priv ) ;
2006-12-11 21:56:15 +03:00
struct ibv_qp_init_attr init_attr ;
int rc ;
2006-12-12 21:09:16 +03:00
/* init mr */
2006-12-11 21:56:15 +03:00
if ( ibw_init_memory ( conn ) )
return - 1 ;
2006-12-12 21:09:16 +03:00
/* init verbs */
pconn - > verbs_channel = ibv_create_comp_channel ( pconn - > cm_id - > verbs ) ;
if ( ! pconn - > verbs_channel ) {
sprintf ( ibw_lasterr , " ibv_create_comp_channel failed %d \n " , errno ) ;
goto cleanup ;
}
DEBUG ( 10 , " created channel %p \n " , pconn - > channel ) ;
pconn - > verbs_channel_event = event_add_fd ( pctx - > ectx , conn ,
pconn - > verbs_channel - > fd , EVENT_FD_READ , ibw_event_handler_verbs , conn ) ;
/* init cq */
pconn - > cq = ibv_create_cq ( conn - > cm_id - > verbs , pctx - > qsize ,
conn , pconn - > verbs_channel , 0 ) ;
2006-12-11 21:56:15 +03:00
if ( cq = = NULL ) {
sprintf ( ibw_lasterr , " ibv_create_cq failed \n " ) ;
return - 1 ;
}
2006-12-12 21:09:16 +03:00
rc = ibv_req_notify_cq ( pconn - > cq , 0 ) ;
2006-12-11 21:56:15 +03:00
if ( rc ) {
sprintf ( ibw_lasterr , " ibv_req_notify_cq failed with %d \n " , rc ) ;
return rc ;
}
2006-12-12 21:09:16 +03:00
/* init qp */
2006-12-11 21:56:15 +03:00
memset ( & init_attr , 0 , sizeof ( init_attr ) ) ;
init_attr . cap . max_send_wr = pctx - > opts . max_send_wr ;
init_attr . cap . max_recv_wr = pctx - > opts . max_recv_wr ;
init_attr . cap . max_recv_sge = 1 ;
init_attr . cap . max_send_sge = 1 ;
init_attr . qp_type = IBV_QPT_RC ;
2006-12-12 21:09:16 +03:00
init_attr . send_cq = pconn - > cq ;
init_attr . recv_cq = pconn - > cq ;
2006-12-11 21:56:15 +03:00
rc = rdma_create_qp ( conn - > cm_id , pctx - > pd , & init_attr ) ;
if ( rc ) {
sprintf ( ibw_lasterr , " rdma_create_qp (%d) failed with %d \n " , is_server , rc ) ;
return rc ;
}
/* elase result is in pconn->cm_id->qp */
return rc ;
}
2006-12-13 13:02:49 +03:00
static int ibw_refill_cq_recv ( struct ibw_conn * conn )
2006-12-11 21:56:15 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx_priv * pctx = talloc_get_type ( conn - > ctx - > internal , struct ibw_ctx_priv ) ;
struct ibw_conn_priv * pconn = talloc_get_type ( conn - > internal , struct ibw_conn_priv ) ;
2006-12-12 21:09:16 +03:00
int i , rc ;
struct ibv_sge list = {
. addr = ( uintptr_t ) NULL ,
. length = pctx - > max_msg_size ,
. lkey = pconn - > mr - > lkey
} ;
struct ibv_recv_wr wr = {
. wr_id = 0 ,
. sg_list = & list ,
. num_sge = 1 ,
} ;
struct ibv_recv_wr * bad_wr ;
2006-12-13 13:02:49 +03:00
struct ibw_wr * p = pconn - > wr_list_avail ;
2006-12-12 21:09:16 +03:00
if ( p = = NULL ) {
sprintf ( ibw_last_err , " out of wr_list_avail " ) ;
DEBUG ( 0 , ibw_last_err ) ;
return - 1 ;
}
DLIST_REMOVE ( pconn - > wr_list_avail , p ) ;
DLIST_ADD ( pconn - > wr_list_used , p ) ;
list . addr = p - > msg ;
wr . wr_id = p - > wr_id ;
rc = ibv_post_recv ( pconn - > qp , & wr , & bad_wr ) ;
if ( rc ) {
sprintf ( ibw_lasterr , " ibv_post_recv failed with %d \n " , rc ) ;
DEBUG ( 0 , ibw_last_err ) ;
return - 2 ;
}
return 0 ;
}
2006-12-13 13:02:49 +03:00
static int ibw_fill_cq ( struct ibw_conn * conn )
2006-12-12 21:09:16 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx_priv * pctx = talloc_get_type ( conn - > ctx - > internal , struct ibw_ctx_priv ) ;
struct ibw_conn_priv * pconn = talloc_get_type ( conn - > internal , struct ibw_conn_priv ) ;
2006-12-12 21:09:16 +03:00
int i , rc ;
struct ibv_sge list = {
. addr = ( uintptr_t ) NULL ,
. length = pctx - > max_msg_size ,
. lkey = pconn - > mr - > lkey
} ;
struct ibv_recv_wr wr = {
. wr_id = 0 ,
. sg_list = & list ,
. num_sge = 1 ,
} ;
struct ibv_recv_wr * bad_wr ;
2006-12-13 13:02:49 +03:00
struct ibw_wr * p ;
2006-12-12 21:09:16 +03:00
for ( i = pctx - > opts . max_recv_wr ; i ! = 0 ; i - - ) {
p = pconn - > wr_list_avail ;
if ( p = = NULL ) {
sprintf ( ibw_last_err , " out of wr_list_avail " ) ;
DEBUG ( 0 , ibw_last_err ) ;
return - 1 ;
}
DLIST_REMOVE ( pconn - > wr_list_avail , p ) ;
DLIST_ADD ( pconn - > wr_list_used , p ) ;
list . addr = p - > msg ;
wr . wr_id = p - > wr_id ;
rc = ibv_post_recv ( pconn - > qp , & wr , & bad_wr ) ;
if ( rc ) {
sprintf ( ibw_lasterr , " ibv_post_recv failed with %d \n " , rc ) ;
DEBUG ( 0 , ibw_last_err ) ;
return - 2 ;
}
}
return 0 ;
2006-12-11 21:56:15 +03:00
}
2006-12-13 13:02:49 +03:00
static int ibw_manage_connect ( struct ibw_conn * conn , struct rdma_cm_id * cma_id )
2006-12-06 20:49:46 +03:00
{
struct rdma_conn_param conn_param ;
int rc ;
2006-12-11 21:56:15 +03:00
rc = ibw_setup_cq_qp ( conn ) ;
if ( rc )
return - 1 ;
2006-12-06 20:49:46 +03:00
/* cm connect */
memset ( & conn_param , 0 , sizeof conn_param ) ;
conn_param . responder_resources = 1 ;
conn_param . initiator_depth = 1 ;
conn_param . retry_count = 10 ;
rc = rdma_connect ( cma_id , & conn_param ) ;
if ( rc )
sprintf ( ibw_lasterr , " rdma_connect error %d \n " , rc ) ;
return rc ;
}
static void ibw_event_handler_cm ( struct event_context * ev ,
2006-12-04 16:02:08 +03:00
struct fd_event * fde , uint16_t flags , void * private_data )
{
2006-12-04 21:48:11 +03:00
int rc ;
2006-12-13 13:02:49 +03:00
struct ibw_ctx * ctx = talloc_get_type ( private_data , struct ibw_ctx ) ;
struct ibw_ctx_priv * pctx = talloc_get_type ( ctx - > internal , struct ibw_ctx_priv ) ;
struct ibw_conn * conn = NULL ;
struct ibw_conn_priv * pconn = NULL ;
2006-12-06 20:49:46 +03:00
struct rdma_cm_id * cma_id = NULL ;
2006-12-05 20:48:16 +03:00
struct rdma_cm_event * event = NULL ;
2006-12-04 21:48:11 +03:00
assert ( ctx ! = NULL ) ;
2006-12-11 21:56:15 +03:00
rc = rdma_get_cm_event ( pctx - > cm_channel , & event ) ;
2006-12-04 21:48:11 +03:00
if ( rc ) {
ctx - > state = IBWS_ERROR ;
sprintf ( ibw_lasterr , " rdma_get_cm_event error %d \n " , rc ) ;
2006-12-12 21:09:16 +03:00
goto error ;
2006-12-04 21:48:11 +03:00
}
2006-12-06 20:49:46 +03:00
cma_id = event - > id ;
2006-12-04 21:48:11 +03:00
2006-12-06 20:49:46 +03:00
DEBUG ( 10 , " cma_event type %d cma_id %p (%s) \n " , event - > event , id ,
( cma_id = = ctx - > cm_id ) ? " parent " : " child " ) ;
2006-12-04 21:48:11 +03:00
switch ( event - > event ) {
case RDMA_CM_EVENT_ADDR_RESOLVED :
2006-12-06 20:49:46 +03:00
/* continuing from ibw_connect ... */
rc = rdma_resolve_route ( cma_id , 2000 ) ;
2006-12-05 15:17:41 +03:00
if ( rc ) {
2006-12-11 21:56:15 +03:00
ctx - > state = ERROR ;
2006-12-05 15:17:41 +03:00
sprintf ( ibw_lasterr , " rdma_resolve_route error %d \n " , rc ) ;
2006-12-12 21:09:16 +03:00
goto error ;
2006-12-04 21:48:11 +03:00
}
2006-12-06 20:49:46 +03:00
/* continued at RDMA_CM_EVENT_ROUTE_RESOLVED */
2006-12-04 21:48:11 +03:00
break ;
case RDMA_CM_EVENT_ROUTE_RESOLVED :
2006-12-06 20:49:46 +03:00
/* after RDMA_CM_EVENT_ADDR_RESOLVED: */
2006-12-12 21:09:16 +03:00
assert ( cma_id - > context ! = NULL ) ;
2006-12-13 13:02:49 +03:00
conn = talloc_get_type ( cma_id - > context , struct ibw_conn ) ;
2006-12-06 20:49:46 +03:00
2006-12-11 21:56:15 +03:00
rc = ibw_manage_connect ( conn , cma_id ) ;
2006-12-06 20:49:46 +03:00
if ( rc )
2006-12-12 21:09:16 +03:00
goto error ;
2006-12-06 20:49:46 +03:00
2006-12-04 21:48:11 +03:00
break ;
2006-12-05 20:48:16 +03:00
2006-12-04 21:48:11 +03:00
case RDMA_CM_EVENT_CONNECT_REQUEST :
2006-12-05 20:48:16 +03:00
ctx - > state = IBWS_CONNECT_REQUEST ;
2006-12-06 20:49:46 +03:00
conn = ibw_conn_new ( ctx ) ;
2006-12-13 13:02:49 +03:00
pconn = talloc_get_type ( conn - > internal , struct ibw_conn_priv ) ;
2006-12-06 20:49:46 +03:00
pconn - > cm_id = cma_id ; /* !!! event will be freed but id not */
cma_id - > context = ( void * ) conn ;
DEBUG ( 10 , " pconn->cm_id %p \n " , pconn - > cm_id ) ;
conn - > state = IBWC_INIT ;
pctx - > connstate_func ( ctx , conn ) ;
/* continued at ibw_accept when invoked by the func above */
if ( ! pconn - > is_accepted ) {
talloc_free ( conn ) ;
DEBUG ( 10 , " pconn->cm_id %p wasn't accepted \n " , pconn - > cm_id ) ;
2006-12-11 21:56:15 +03:00
} else {
if ( ibw_setup_cq_qp ( ctx , conn ) )
2006-12-12 21:09:16 +03:00
goto error ;
2006-12-06 20:49:46 +03:00
}
/* TODO: clarify whether if it's needed by upper layer: */
ctx - > state = IBWS_READY ;
pctx - > connstate_func ( ctx , NULL ) ;
/* NOTE: more requests can arrive until RDMA_CM_EVENT_ESTABLISHED ! */
2006-12-04 21:48:11 +03:00
break ;
case RDMA_CM_EVENT_ESTABLISHED :
2006-12-06 20:49:46 +03:00
/* expected after ibw_accept and ibw_connect[not directly] */
2006-12-11 21:56:15 +03:00
DEBUG ( 0 , " ESTABLISHED (conn: %u) \n " , cma_id - > context ) ;
2006-12-13 13:02:49 +03:00
conn = talloc_get_type ( cma_id - > context , struct ibw_conn ) ;
2006-12-06 20:49:46 +03:00
assert ( conn ! = NULL ) ; /* important assumption */
2006-12-11 21:56:15 +03:00
/* client conn is up */
2006-12-06 20:49:46 +03:00
conn - > state = IBWC_CONNECTED ;
/* both ctx and conn have changed */
pctx - > connstate_func ( ctx , conn ) ;
2006-12-04 21:48:11 +03:00
break ;
case RDMA_CM_EVENT_ADDR_ERROR :
case RDMA_CM_EVENT_ROUTE_ERROR :
case RDMA_CM_EVENT_CONNECT_ERROR :
case RDMA_CM_EVENT_UNREACHABLE :
case RDMA_CM_EVENT_REJECTED :
2006-12-12 21:09:16 +03:00
sprintf ( ibw_lasterr , " cma event %d, error %d \n " , event - > event , event - > status ) ;
goto error ;
2006-12-04 21:48:11 +03:00
case RDMA_CM_EVENT_DISCONNECTED :
2006-12-06 20:49:46 +03:00
if ( cma_id ! = ctx - > cm_id ) {
DEBUG ( 0 , " client DISCONNECT event \n " ) ;
2006-12-13 13:02:49 +03:00
conn = talloc_get_type ( cma_id - > context , struct ibw_conn ) ;
2006-12-06 20:49:46 +03:00
conn - > state = IBWC_DISCONNECTED ;
pctx - > connstate_func ( NULL , conn ) ;
talloc_free ( conn ) ;
2006-12-11 21:56:15 +03:00
2006-12-12 21:09:16 +03:00
/* if we are the last... */
2006-12-11 21:56:15 +03:00
if ( ctx - > conn_list = = NULL )
rdma_disconnect ( ctx - > cm_id ) ;
2006-12-06 20:49:46 +03:00
} else {
DEBUG ( 0 , " server DISCONNECT event \n " ) ;
ctx - > state = IBWS_STOPPED ; /* ??? TODO: try it... */
2006-12-11 21:56:15 +03:00
/* talloc_free(ctx) should be called within or after this func */
2006-12-06 20:49:46 +03:00
pctx - > connstate_func ( ctx , NULL ) ;
}
2006-12-04 21:48:11 +03:00
break ;
case RDMA_CM_EVENT_DEVICE_REMOVAL :
2006-12-12 21:09:16 +03:00
sprintf ( ibw_lasterr , " cma detected device removal! \n " ) ;
goto error ;
2006-12-04 21:48:11 +03:00
default :
2006-12-12 21:09:16 +03:00
sprintf ( ibw_lasterr , " unknown event %d \n " , event - > event ) ;
goto error ;
2006-12-06 20:49:46 +03:00
}
2006-12-05 20:48:16 +03:00
if ( ( rc = rdma_ack_cm_event ( event ) ) ) {
2006-12-06 20:49:46 +03:00
sprintf ( ibw_lasterr , " rdma_ack_cm_event failed with %d \n " ) ;
2006-12-12 21:09:16 +03:00
goto error ;
}
return ;
error :
DEBUG ( 0 , " cm event handler: %s " , ibw_lasterr ) ;
if ( cma_id ! = ctx - > cm_id ) {
2006-12-13 13:02:49 +03:00
conn = talloc_get_type ( cma_id - > context , struct ibw_conn ) ;
2006-12-12 21:09:16 +03:00
if ( conn )
conn - > state = IBWC_ERROR ;
pctx - > connstate_func ( NULL , conn ) ;
} else {
ctx - > state = IBWS_ERROR ;
pctx - > connstate_func ( ctx , NULL ) ;
2006-12-05 20:48:16 +03:00
}
2006-12-04 16:02:08 +03:00
}
2006-12-06 20:49:46 +03:00
static void ibw_event_handler_verbs ( struct event_context * ev ,
struct fd_event * fde , uint16_t flags , void * private_data )
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx * conn = talloc_get_type ( private_data , struct ibw_conn ) ;
struct ibw_ctx_priv * pconn = talloc_get_type ( conn - > internal , struct ibw_ctx_priv ) ;
struct ibw_ctx * pctx = talloc_get_type ( conn - > ctx - > internal , struct ibw_ctx_priv ) ;
2006-12-12 21:09:16 +03:00
struct ibv_wc wc ;
int rc ;
rc = ibv_poll_cq ( conn - > cq , 1 , & wc ) ;
if ( rc ! = 1 ) {
sprintf ( ibw_lasterr , " ibv_poll_cq error %d \n " , rc ) ;
goto error ;
}
if ( wc . status ) {
sprintf ( ibw_lasterr , " cq completion failed status %d \n " ,
wc . status ) ;
goto error ;
}
switch ( wc . opcode ) {
case IBV_WC_SEND :
{
2006-12-13 13:02:49 +03:00
struct ibw_wr * p ;
2006-12-12 21:09:16 +03:00
DEBUG ( 10 , " send completion \n " ) ;
assert ( pconn - > qp - > qp_num = = wc . qp_num ) ;
assert ( wc . wr_id < pctx - > qsize ) ;
p = pconn - > wr_index [ wc . wr_id ] ;
DLIST_REMOVE ( pconn - > wr_list_used , p ) ;
DLIST_ADD ( pconn - > wr_list_avail , p ) ;
}
break ;
case IBV_WC_RDMA_WRITE :
DEBUG ( 10 , " rdma write completion \n " ) ;
break ;
2006-12-06 20:49:46 +03:00
2006-12-12 21:09:16 +03:00
case IBV_WC_RDMA_READ :
DEBUG ( 10 , " rdma read completion \n " ) ;
break ;
case IBV_WC_RECV :
{
2006-12-13 13:02:49 +03:00
struct ibw_wr * p ;
2006-12-12 21:09:16 +03:00
assert ( pconn - > qp - > qp_num = = wc . qp_num ) ;
assert ( wc . wr_id < pctx - > qsize ) ;
p = pconn - > wr_index [ wc . wr_id ] ;
DLIST_REMOVE ( pconn - > wr_list_used , p ) ;
DLIST_ADD ( pconn - > wr_list_avail , p ) ;
DEBUG ( 10 , " recv completion \n " ) ;
assert ( wc . byte_len < = pctx - > max_msg_size ) ;
pctx - > receive_func ( conn , p - > msg , wc . byte_len ) ;
if ( ibw_refill_cq_recv ( conn ) )
goto error ;
}
break ;
default :
sprintf ( ibw_lasterr , " unknown completion %d \n " , wc . opcode ) ;
goto error ;
}
2006-12-11 21:56:15 +03:00
2006-12-12 21:09:16 +03:00
return ;
error :
DEBUG ( 0 , ibw_lasterr ) ;
conn - > status = IBWC_ERROR ;
pctx - > connstate_func ( NULL , conn ) ;
2006-12-06 20:49:46 +03:00
}
2006-12-13 13:02:49 +03:00
static int ibw_process_init_attrs ( struct ibw_initattr * attr , int nattr , struct ibw_opts * opts )
2006-12-04 16:02:08 +03:00
{
2006-12-11 21:56:15 +03:00
int i , mtu ;
2006-12-04 16:02:08 +03:00
char * name , * value ;
2006-12-11 21:56:15 +03:00
opts - > max_send_wr = 256 ;
opts - > max_recv_wr = 1024 ;
2006-12-04 16:02:08 +03:00
for ( i = 0 ; i < nattr ; i + + ) {
name = attr [ i ] . name ;
value = attr [ i ] . value ;
assert ( name ! = NULL & & value ! = NULL ) ;
2006-12-11 21:56:15 +03:00
if ( strcmp ( name , " max_send_wr " ) = = 0 )
opts - > max_send_wr = atoi ( value ) ;
else if ( strcmp ( name , " max_recv_wr " ) = = 0 )
opts - > max_recv_wr = atoi ( value ) ;
2006-12-04 16:02:08 +03:00
else {
sprintf ( ibw_lasterr , " ibw_init: unknown name %s \n " , name ) ;
return - 1 ;
}
}
return 0 ;
}
2006-12-13 13:02:49 +03:00
struct ibw_ctx * ibw_init ( struct ibw_initattr * attr , int nattr ,
2006-12-04 16:02:08 +03:00
void * ctx_userdata ,
ibw_connstate_fn_t ibw_connstate ,
2006-12-05 20:48:16 +03:00
ibw_receive_fn_t ibw_receive ,
2006-12-12 21:09:16 +03:00
event_content * ectx ,
int max_msg_size )
2006-12-04 16:02:08 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx * ctx = talloc_zero ( NULL , struct ibw_ctx ) ;
struct ibw_ctx_priv * pctx ;
2006-12-04 16:02:08 +03:00
int rc ;
2006-12-04 21:48:11 +03:00
/* initialize basic data structures */
2006-12-04 16:02:08 +03:00
memset ( ibw_lasterr , 0 , IBW_LASTERR_BUFSIZE ) ;
assert ( ctx ! = NULL ) ;
ibw_lasterr [ 0 ] = ' \0 ' ;
2006-12-04 21:48:11 +03:00
talloc_set_destructor ( ctx , ibw_ctx_destruct ) ;
2006-12-04 16:02:08 +03:00
ctx - > userdata = userdata ;
2006-12-13 13:02:49 +03:00
pctx = talloc_zero ( ctx , struct ibw_ctx_priv ) ;
2006-12-04 21:48:11 +03:00
talloc_set_destructor ( pctx , ibw_ctx_priv_destruct ) ;
2006-12-04 16:02:08 +03:00
ctx - > internal = ( void * ) pctx ;
assert ( pctx ! = NULL ) ;
pctx - > connstate_func = ibw_connstate ;
pctx - > receive_func = ibw_receive ;
2006-12-05 20:48:16 +03:00
pctx - > ectx = ectx ;
2006-12-04 16:02:08 +03:00
/* process attributes */
if ( ibw_process_init_attrs ( attr , nattr , pctx - > opts ) )
goto cleanup ;
2006-12-06 20:49:46 +03:00
/* init cm */
2006-12-04 16:02:08 +03:00
pctx - > cm_channel = rdma_create_event_channel ( ) ;
if ( ! pctx - > cm_channel ) {
2006-12-11 21:56:15 +03:00
sprintf ( ibw_lasterr , " rdma_create_event_channel error %d \n " , errno ) ;
2006-12-04 16:02:08 +03:00
goto cleanup ;
}
pctx - > cm_channel_event = event_add_fd ( pctx - > ectx , pctx ,
2006-12-06 20:49:46 +03:00
pctx - > cm_channel - > fd , EVENT_FD_READ , ibw_event_handler_cm , ctx ) ;
2006-12-04 16:02:08 +03:00
2006-12-12 21:09:16 +03:00
rc = rdma_create_id ( pctx - > cm_channel , & pctx - > cm_id , ctx , RDMA_PS_TCP ) ;
2006-12-04 16:02:08 +03:00
if ( rc ) {
rc = errno ;
sprintf ( ibw_lasterr , " rdma_create_id error %d \n " , rc ) ;
goto cleanup ;
}
DEBUG ( 10 , " created cm_id %p \n " , pctx - > cm_id ) ;
2006-12-06 20:49:46 +03:00
/* init verbs */
pctx - > pd = ibv_alloc_pd ( pctx - > cmid - > verbs ) ;
if ( ! pctx - > pd ) {
sprintf ( ibw_lasterr , " ibv_alloc_pd failed %d \n " , errno ) ;
goto cleanup ;
}
DEBUG ( 10 , " created pd %p \n " , pctx - > pd ) ;
2006-12-11 21:56:15 +03:00
pctx - > pagesize = sysconf ( _SC_PAGESIZE ) ;
2006-12-12 21:09:16 +03:00
pctx - > qsize = pctx - > opts . max_send_wr + pctx - > opts . max_recv_wr ;
pctx - > max_msg_size = max_msg_size ;
2006-12-04 16:02:08 +03:00
return ctx ;
2006-12-11 21:56:15 +03:00
/* don't put code here */
2006-12-04 16:02:08 +03:00
cleanup :
2006-12-06 20:49:46 +03:00
DEBUG ( 0 , ibw_lasterr ) ;
2006-12-11 21:56:15 +03:00
2006-12-04 16:02:08 +03:00
if ( ctx )
talloc_free ( ctx ) ;
return NULL ;
}
2006-12-13 13:02:49 +03:00
int ibw_stop ( struct ibw_ctx * ctx )
2006-12-04 16:02:08 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx_priv * pctx = ( struct ibw_ctx_priv * ) ctx - > internal ;
struct ibw_conn * p ;
2006-12-04 16:02:08 +03:00
2006-12-12 21:09:16 +03:00
for ( p = ctx - > conn_list ; p ! = NULL ; p = p - > next ) {
if ( ctx - > state = = IBWC_ERROR | | ctx - > state = = IBWC_CONNECTED ) {
if ( ibw_disconnect ( p ) )
return - 1 ;
}
}
return 0 ;
2006-12-04 16:02:08 +03:00
}
2006-12-13 13:02:49 +03:00
int ibw_bind ( struct ibw_ctx * ctx , struct sockaddr_in * my_addr )
2006-12-04 16:02:08 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx_priv * pctx = ( struct ibw_ctx_priv * ) ctx - > internal ;
2006-12-04 21:48:11 +03:00
int rc ;
2006-12-06 20:49:46 +03:00
rc = rdma_bind_addr ( pctx - > cm_id , ( struct sockaddr * ) my_addr ) ;
2006-12-04 21:48:11 +03:00
if ( rc ) {
sprintf ( ibw_lasterr , " rdma_bind_addr error %d \n " , rc ) ;
2006-12-06 20:49:46 +03:00
DEBUG ( 0 , ibw_lasterr ) ;
2006-12-04 21:48:11 +03:00
return rc ;
}
2006-12-06 20:49:46 +03:00
DEBUG ( 10 , " rdma_bind_addr successful \n " ) ;
2006-12-04 21:48:11 +03:00
return 0 ;
2006-12-04 16:02:08 +03:00
}
2006-12-13 13:02:49 +03:00
int ibw_listen ( struct ibw_ctx * ctx , int backlog )
2006-12-04 16:02:08 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx_priv * pctx = talloc_get_type ( ctx - > internal , struct ibw_ctx_priv ) ;
2006-12-06 20:49:46 +03:00
int rc ;
DEBUG_LOG ( " rdma_listen... \n " ) ;
2006-12-11 21:56:15 +03:00
rc = rdma_listen ( pctx - > cm_id , backlog ) ;
2006-12-06 20:49:46 +03:00
if ( rc ) {
sprintf ( ibw_lasterr , " rdma_listen failed: %d \n " , ret ) ;
DEBUG ( 0 , ibw_lasterr ) ;
return rc ;
}
2006-12-04 21:48:11 +03:00
return 0 ;
2006-12-04 16:02:08 +03:00
}
2006-12-13 13:02:49 +03:00
int ibw_accept ( struct ibw_ctx * ctx , struct ibw_conn * conn , void * conn_userdata )
2006-12-04 16:02:08 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx_priv * pctx = talloc_get_type ( ctx - > internal , struct ibw_ctx_priv ) ;
struct ibw_conn_priv * pconn = talloc_get_type ( conn - > internal , struct ibw_conn_priv ) ;
2006-12-06 20:49:46 +03:00
struct rdma_conn_param conn_param ;
2006-12-11 21:56:15 +03:00
conn - > conn_userdata = conn_userdata ;
2006-12-06 20:49:46 +03:00
memset ( & conn_param , 0 , sizeof ( struct rdma_conn_param ) ) ;
conn_param . responder_resources = 1 ;
conn_param . initiator_depth = 1 ;
rc = rdma_accept ( pconn - > cm_id , & conn_param ) ;
if ( rc ) {
sprintf ( ibw_lasterr , " rdma_accept failed %d \n " , rc ) ;
DEBUG ( 0 , ibw_lasterr ) ;
return - 1 ; ;
}
pconn - > is_accepted = 1 ;
/* continued at RDMA_CM_EVENT_ESTABLISHED */
2006-12-04 21:48:11 +03:00
return 0 ;
2006-12-04 16:02:08 +03:00
}
2006-12-13 13:02:49 +03:00
int ibw_connect ( struct ibw_ctx * ctx , struct sockaddr_in * serv_addr , void * conn_userdata )
2006-12-04 16:02:08 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx_priv * pctx = talloc_get_type ( ctx - > internal , struct ibw_ctx_priv ) ;
struct ibw_conn * conn = NULL ;
2006-12-06 20:49:46 +03:00
int rc ;
conn = ibw_conn_new ( ctx ) ;
conn - > conn_userdata = conn_userdata ;
2006-12-13 13:02:49 +03:00
pconn = talloc_get_type ( conn - > internal , struct ibw_conn_priv ) ;
2006-12-06 20:49:46 +03:00
rc = rdma_create_id ( pctx - > cm_channel , & pconn - > cm_id , conn , RDMA_PS_TCP ) ;
if ( rc ) {
rc = errno ;
sprintf ( ibw_lasterr , " rdma_create_id error %d \n " , rc ) ;
return rc ;
}
assert ( ctx - > state = = IBWS_READY ) ;
rc = rdma_resolve_addr ( pconn - > cm_id , NULL , ( struct sockaddr * ) & serv_addr , 2000 ) ;
if ( rc ) {
sprintf ( ibw_lasterr , " rdma_resolve_addr error %d \n " , rc ) ;
DEBUG ( 0 , ibw_lasterr ) ;
return - 1 ;
}
/* continued at RDMA_CM_EVENT_ADDR_RESOLVED */
2006-12-04 21:48:11 +03:00
return 0 ;
2006-12-04 16:02:08 +03:00
}
2006-12-13 13:02:49 +03:00
void ibw_disconnect ( struct ibw_conn * conn )
2006-12-04 16:02:08 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_conn_priv * pconn = talloc_get_type ( conn - > internal , struct ibw_conn_priv ) ;
struct ibw_ctx * ctx = conn - > ctx ;
struct ibw_ctx_priv * pctx = talloc_get_type ( ctx - > internal ) ;
2006-12-11 21:56:15 +03:00
rdma_disconnect ( pctx - > cm_id ) ;
/* continued at RDMA_CM_EVENT_DISCONNECTED */
2006-12-04 21:48:11 +03:00
return 0 ;
2006-12-04 16:02:08 +03:00
}
2006-12-13 13:02:49 +03:00
int ibw_alloc_send_buf ( struct ibw_conn * conn , void * * buf , void * * key )
2006-12-04 16:02:08 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_conn_priv * pconn = talloc_get_type ( conn - > internal , struct ibw_conn_priv ) ;
struct ibw_ctx_priv * pctx = talloc_get_type ( conn - > ctx - > internal , struct ibw_ctx_priv ) ;
struct ibw_wr * p = pctx - > wr_list_avail ;
2006-12-04 21:48:11 +03:00
2006-12-11 21:56:15 +03:00
if ( p = = NULL ) {
sprintf ( ibw_last_err , " insufficient wr chunks \n " ) ;
return - 1 ;
}
DLIST_REMOVE ( pctx - > wr_list_avail , p ) ;
DLIST_ADD ( pctx - > wr_list_used , p ) ;
* buf = ( void * ) p - > msg ;
* key = ( void * ) p ;
return pctx - > buf ;
2006-12-04 16:02:08 +03:00
}
2006-12-13 13:02:49 +03:00
int ibw_send ( struct ibw_conn * conn , void * buf , void * key , int n )
2006-12-04 16:02:08 +03:00
{
2006-12-13 13:02:49 +03:00
struct ibw_ctx_priv pctx = talloc_get_type ( conn - > ctx - > internal , struct ibw_ctx_priv ) ;
struct ibw_wr * p = talloc_get_type ( key , struct ibw_wr ) ;
2006-12-11 21:56:15 +03:00
struct ibv_sge list = {
. addr = ( uintptr_t ) p - > msg ,
. length = n ,
. lkey = pctx - > mr - > lkey
} ;
struct ibv_send_wr wr = {
. wr_id = p - > wr_id ,
. sg_list = & list ,
. num_sge = 1 ,
. opcode = IBV_WR_SEND ,
. send_flags = IBV_SEND_SIGNALED ,
} ;
struct ibv_send_wr * bad_wr ;
assert ( p - > msg = = ( char * ) buf ) ;
2006-12-12 21:09:16 +03:00
assert ( n < = pctx - > max_msg_size ) ;
2006-12-11 21:56:15 +03:00
return ibv_post_send ( conn - > qp , & wr , & bad_wr ) ;
2006-12-04 16:02:08 +03:00
}
const char * ibw_getLastError ( )
{
return ibw_lasterr ;
}