2015-03-30 14:34:21 -04:00
/*
* Copyright ( c ) 2015 Oracle . All rights reserved .
* Copyright ( c ) 2003 - 2007 Network Appliance , Inc . All rights reserved .
*/
/* Lightweight memory registration using Fast Memory Regions (FMR).
* Referred to sometimes as MTHCAFMR mode .
*
* FMR uses synchronous memory registration and deregistration .
* FMR registration is known to be fast , but FMR deregistration
* can take tens of usecs to complete .
*/
2015-05-26 11:52:16 -04:00
/* Normal operation
*
* A Memory Region is prepared for RDMA READ or WRITE using the
* ib_map_phys_fmr verb ( fmr_op_map ) . When the RDMA operation is
* finished , the Memory Region is unmapped using the ib_unmap_fmr
* verb ( fmr_op_unmap ) .
*/
2015-03-30 14:34:21 -04:00
# include "xprt_rdma.h"
# if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
# endif
2015-03-30 14:34:30 -04:00
/* Maximum scatter/gather per FMR */
# define RPCRDMA_MAX_FMR_SGES (64)
2016-06-29 13:52:29 -04:00
/* Access mode of externally registered pages */
enum {
RPCRDMA_FMR_ACCESS_FLAGS = IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ ,
} ;
2016-06-29 13:53:27 -04:00
bool
fmr_is_supported ( struct rpcrdma_ia * ia )
{
if ( ! ia - > ri_device - > alloc_fmr ) {
pr_info ( " rpcrdma: 'fmr' mode is not supported by device %s \n " ,
ia - > ri_device - > name ) ;
return false ;
}
return true ;
}
2016-06-29 13:52:29 -04:00
static int
2016-06-29 13:54:00 -04:00
fmr_op_init_mr ( struct rpcrdma_ia * ia , struct rpcrdma_mw * mw )
2016-06-29 13:52:29 -04:00
{
static struct ib_fmr_attr fmr_attr = {
. max_pages = RPCRDMA_MAX_FMR_SGES ,
. max_maps = 1 ,
. page_shift = PAGE_SHIFT
} ;
2016-06-29 13:52:37 -04:00
mw - > fmr . fm_physaddrs = kcalloc ( RPCRDMA_MAX_FMR_SGES ,
sizeof ( u64 ) , GFP_KERNEL ) ;
if ( ! mw - > fmr . fm_physaddrs )
2016-06-29 13:52:29 -04:00
goto out_free ;
mw - > mw_sg = kcalloc ( RPCRDMA_MAX_FMR_SGES ,
sizeof ( * mw - > mw_sg ) , GFP_KERNEL ) ;
if ( ! mw - > mw_sg )
goto out_free ;
sg_init_table ( mw - > mw_sg , RPCRDMA_MAX_FMR_SGES ) ;
2016-06-29 13:54:00 -04:00
mw - > fmr . fm_mr = ib_alloc_fmr ( ia - > ri_pd , RPCRDMA_FMR_ACCESS_FLAGS ,
2016-06-29 13:52:37 -04:00
& fmr_attr ) ;
if ( IS_ERR ( mw - > fmr . fm_mr ) )
2016-06-29 13:52:29 -04:00
goto out_fmr_err ;
return 0 ;
out_fmr_err :
dprintk ( " RPC: %s: ib_alloc_fmr returned %ld \n " , __func__ ,
2016-06-29 13:52:37 -04:00
PTR_ERR ( mw - > fmr . fm_mr ) ) ;
2016-06-29 13:52:29 -04:00
out_free :
kfree ( mw - > mw_sg ) ;
2016-06-29 13:52:37 -04:00
kfree ( mw - > fmr . fm_physaddrs ) ;
2016-06-29 13:52:29 -04:00
return - ENOMEM ;
}
2016-05-02 14:42:46 -04:00
static int
__fmr_unmap ( struct rpcrdma_mw * mw )
{
LIST_HEAD ( l ) ;
2016-06-29 13:52:12 -04:00
int rc ;
2016-05-02 14:42:46 -04:00
2016-06-29 13:52:37 -04:00
list_add ( & mw - > fmr . fm_mr - > list , & l ) ;
2016-06-29 13:52:12 -04:00
rc = ib_unmap_fmr ( & l ) ;
2016-06-29 13:52:37 -04:00
list_del_init ( & mw - > fmr . fm_mr - > list ) ;
2016-06-29 13:52:12 -04:00
return rc ;
2016-05-02 14:42:46 -04:00
}
2016-06-29 13:52:29 -04:00
static void
2016-06-29 13:54:00 -04:00
fmr_op_release_mr ( struct rpcrdma_mw * r )
2016-06-29 13:52:29 -04:00
{
2016-06-29 13:52:54 -04:00
LIST_HEAD ( unmap_list ) ;
2016-06-29 13:52:29 -04:00
int rc ;
2016-06-29 13:54:16 -04:00
/* Ensure MW is not on any rl_registered list */
if ( ! list_empty ( & r - > mw_list ) )
list_del ( & r - > mw_list ) ;
2016-06-29 13:52:37 -04:00
kfree ( r - > fmr . fm_physaddrs ) ;
2016-06-29 13:52:29 -04:00
kfree ( r - > mw_sg ) ;
2016-06-29 13:52:54 -04:00
/* In case this one was left mapped, try to unmap it
* to prevent dealloc_fmr from failing with EBUSY
*/
rc = __fmr_unmap ( r ) ;
if ( rc )
pr_err ( " rpcrdma: final ib_unmap_fmr for %p failed %i \n " ,
r , rc ) ;
2016-06-29 13:52:37 -04:00
rc = ib_dealloc_fmr ( r - > fmr . fm_mr ) ;
2016-06-29 13:52:29 -04:00
if ( rc )
pr_err ( " rpcrdma: final ib_dealloc_fmr for %p returned %i \n " ,
r , rc ) ;
2016-06-29 13:54:00 -04:00
kfree ( r ) ;
2016-06-29 13:52:29 -04:00
}
2016-06-29 13:52:54 -04:00
/* Reset of a single FMR.
2016-05-02 14:42:46 -04:00
*/
static void
2016-06-29 13:52:54 -04:00
fmr_op_recover_mr ( struct rpcrdma_mw * mw )
2016-05-02 14:42:46 -04:00
{
2016-06-29 13:52:54 -04:00
struct rpcrdma_xprt * r_xprt = mw - > mw_xprt ;
int rc ;
2016-05-02 14:42:46 -04:00
2016-06-29 13:52:54 -04:00
/* ORDER: invalidate first */
rc = __fmr_unmap ( mw ) ;
2016-05-02 14:42:46 -04:00
2016-06-29 13:52:54 -04:00
/* ORDER: then DMA unmap */
ib_dma_unmap_sg ( r_xprt - > rx_ia . ri_device ,
mw - > mw_sg , mw - > mw_nents , mw - > mw_dir ) ;
2016-06-29 13:54:08 -04:00
if ( rc )
goto out_release ;
2016-06-29 13:52:54 -04:00
rpcrdma_put_mw ( r_xprt , mw ) ;
r_xprt - > rx_stats . mrs_recovered + + ;
2016-06-29 13:54:08 -04:00
return ;
out_release :
pr_err ( " rpcrdma: FMR reset failed (%d), %p released \n " , rc , mw ) ;
r_xprt - > rx_stats . mrs_orphaned + + ;
spin_lock ( & r_xprt - > rx_buf . rb_mwlock ) ;
list_del ( & mw - > mw_all ) ;
spin_unlock ( & r_xprt - > rx_buf . rb_mwlock ) ;
fmr_op_release_mr ( mw ) ;
2016-05-02 14:42:46 -04:00
}
2015-03-30 14:35:26 -04:00
static int
fmr_op_open ( struct rpcrdma_ia * ia , struct rpcrdma_ep * ep ,
struct rpcrdma_create_data_internal * cdata )
{
2016-09-15 10:57:07 -04:00
ia - > ri_max_segs = max_t ( unsigned int , 1 , RPCRDMA_MAX_DATA_SEGS /
RPCRDMA_MAX_FMR_SGES ) ;
2015-03-30 14:35:26 -04:00
return 0 ;
}
2015-03-30 14:34:30 -04:00
/* FMR mode conveys up to 64 pages of payload per chunk segment.
*/
static size_t
fmr_op_maxpages ( struct rpcrdma_xprt * r_xprt )
{
return min_t ( unsigned int , RPCRDMA_MAX_DATA_SEGS ,
2016-05-02 14:40:56 -04:00
RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES ) ;
2015-03-30 14:34:30 -04:00
}
2015-03-30 14:34:39 -04:00
/* Use the ib_map_phys_fmr() verb to register a memory region
* for remote access via RDMA READ or RDMA WRITE .
*/
static int
fmr_op_map ( struct rpcrdma_xprt * r_xprt , struct rpcrdma_mr_seg * seg ,
2016-06-29 13:54:16 -04:00
int nsegs , bool writing , struct rpcrdma_mw * * out )
2015-03-30 14:34:39 -04:00
{
struct rpcrdma_mr_seg * seg1 = seg ;
int len , pageoff , i , rc ;
2015-05-26 11:52:16 -04:00
struct rpcrdma_mw * mw ;
2016-06-29 13:52:45 -04:00
u64 * dma_pages ;
2015-05-26 11:52:16 -04:00
2016-06-29 13:52:54 -04:00
mw = rpcrdma_get_mw ( r_xprt ) ;
if ( ! mw )
2016-06-29 13:53:43 -04:00
return - ENOBUFS ;
2015-03-30 14:34:39 -04:00
pageoff = offset_in_page ( seg1 - > mr_offset ) ;
seg1 - > mr_offset - = pageoff ; /* start of page */
seg1 - > mr_len + = pageoff ;
len = - pageoff ;
if ( nsegs > RPCRDMA_MAX_FMR_SGES )
nsegs = RPCRDMA_MAX_FMR_SGES ;
for ( i = 0 ; i < nsegs ; ) {
2016-06-29 13:52:45 -04:00
if ( seg - > mr_page )
sg_set_page ( & mw - > mw_sg [ i ] ,
seg - > mr_page ,
seg - > mr_len ,
offset_in_page ( seg - > mr_offset ) ) ;
else
sg_set_buf ( & mw - > mw_sg [ i ] , seg - > mr_offset ,
seg - > mr_len ) ;
2015-03-30 14:34:39 -04:00
len + = seg - > mr_len ;
+ + seg ;
+ + i ;
/* Check for holes */
if ( ( i < nsegs & & offset_in_page ( seg - > mr_offset ) ) | |
offset_in_page ( ( seg - 1 ) - > mr_offset + ( seg - 1 ) - > mr_len ) )
break ;
}
2016-06-29 13:52:45 -04:00
mw - > mw_nents = i ;
mw - > mw_dir = rpcrdma_data_dir ( writing ) ;
2016-06-29 13:53:52 -04:00
if ( i = = 0 )
goto out_dmamap_err ;
2016-06-29 13:52:45 -04:00
if ( ! ib_dma_map_sg ( r_xprt - > rx_ia . ri_device ,
mw - > mw_sg , mw - > mw_nents , mw - > mw_dir ) )
goto out_dmamap_err ;
2015-03-30 14:34:39 -04:00
2016-06-29 13:52:45 -04:00
for ( i = 0 , dma_pages = mw - > fmr . fm_physaddrs ; i < mw - > mw_nents ; i + + )
dma_pages [ i ] = sg_dma_address ( & mw - > mw_sg [ i ] ) ;
rc = ib_map_phys_fmr ( mw - > fmr . fm_mr , dma_pages , mw - > mw_nents ,
dma_pages [ 0 ] ) ;
2015-03-30 14:34:39 -04:00
if ( rc )
goto out_maperr ;
2016-06-29 13:54:16 -04:00
mw - > mw_handle = mw - > fmr . fm_mr - > rkey ;
mw - > mw_length = len ;
mw - > mw_offset = dma_pages [ 0 ] + pageoff ;
* out = mw ;
2016-06-29 13:52:45 -04:00
return mw - > mw_nents ;
out_dmamap_err :
pr_err ( " rpcrdma: failed to dma map sg %p sg_nents %u \n " ,
mw - > mw_sg , mw - > mw_nents ) ;
2016-06-29 13:53:02 -04:00
rpcrdma_defer_mr_recovery ( mw ) ;
2016-06-29 13:53:43 -04:00
return - EIO ;
2015-03-30 14:34:39 -04:00
out_maperr :
2016-06-29 13:52:45 -04:00
pr_err ( " rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i \n " ,
len , ( unsigned long long ) dma_pages [ 0 ] ,
pageoff , mw - > mw_nents , rc ) ;
2016-06-29 13:52:54 -04:00
rpcrdma_defer_mr_recovery ( mw ) ;
2016-06-29 13:53:43 -04:00
return - EIO ;
2015-03-30 14:34:39 -04:00
}
2015-12-16 17:22:55 -05:00
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions .
2016-06-29 13:54:16 -04:00
*
* Caller ensures that req - > rl_registered is not empty .
2015-12-16 17:22:55 -05:00
*/
static void
fmr_op_unmap_sync ( struct rpcrdma_xprt * r_xprt , struct rpcrdma_req * req )
{
2016-06-29 13:54:16 -04:00
struct rpcrdma_mw * mw , * tmp ;
2015-12-16 17:22:55 -05:00
LIST_HEAD ( unmap_list ) ;
int rc ;
dprintk ( " RPC: %s: req %p \n " , __func__ , req ) ;
/* ORDER: Invalidate all of the req's MRs first
*
* ib_unmap_fmr ( ) is slow , so use a single call instead
2016-06-29 13:52:54 -04:00
* of one call per mapped FMR .
2015-12-16 17:22:55 -05:00
*/
2016-06-29 13:54:16 -04:00
list_for_each_entry ( mw , & req - > rl_registered , mw_list )
2016-06-29 13:52:37 -04:00
list_add_tail ( & mw - > fmr . fm_mr - > list , & unmap_list ) ;
2016-09-15 10:57:16 -04:00
r_xprt - > rx_stats . local_inv_needed + + ;
2015-12-16 17:22:55 -05:00
rc = ib_unmap_fmr ( & unmap_list ) ;
if ( rc )
2016-06-29 13:52:54 -04:00
goto out_reset ;
2015-12-16 17:22:55 -05:00
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list .
*/
2016-06-29 13:54:16 -04:00
list_for_each_entry_safe ( mw , tmp , & req - > rl_registered , mw_list ) {
list_del_init ( & mw - > mw_list ) ;
2016-06-29 13:52:37 -04:00
list_del_init ( & mw - > fmr . fm_mr - > list ) ;
2016-06-29 13:52:54 -04:00
ib_dma_unmap_sg ( r_xprt - > rx_ia . ri_device ,
mw - > mw_sg , mw - > mw_nents , mw - > mw_dir ) ;
rpcrdma_put_mw ( r_xprt , mw ) ;
2015-12-16 17:22:55 -05:00
}
2016-06-29 13:52:54 -04:00
return ;
out_reset :
pr_err ( " rpcrdma: ib_unmap_fmr failed (%i) \n " , rc ) ;
2016-06-29 13:54:16 -04:00
list_for_each_entry_safe ( mw , tmp , & req - > rl_registered , mw_list ) {
2016-06-29 13:52:54 -04:00
list_del_init ( & mw - > fmr . fm_mr - > list ) ;
fmr_op_recover_mr ( mw ) ;
}
2015-12-16 17:22:55 -05:00
}
2016-05-02 14:42:46 -04:00
/* Use a slow, safe mechanism to invalidate all memory regions
* that were registered for " req " .
*/
static void
fmr_op_unmap_safe ( struct rpcrdma_xprt * r_xprt , struct rpcrdma_req * req ,
bool sync )
{
struct rpcrdma_mw * mw ;
2016-06-29 13:54:16 -04:00
while ( ! list_empty ( & req - > rl_registered ) ) {
2017-02-08 17:00:43 -05:00
mw = rpcrdma_pop_mw ( & req - > rl_registered ) ;
2016-06-29 13:52:45 -04:00
if ( sync )
2016-06-29 13:52:54 -04:00
fmr_op_recover_mr ( mw ) ;
2016-06-29 13:52:45 -04:00
else
2016-06-29 13:52:54 -04:00
rpcrdma_defer_mr_recovery ( mw ) ;
2016-05-02 14:42:46 -04:00
}
}
2015-03-30 14:34:21 -04:00
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
2015-03-30 14:34:39 -04:00
. ro_map = fmr_op_map ,
2015-12-16 17:22:55 -05:00
. ro_unmap_sync = fmr_op_unmap_sync ,
2016-05-02 14:42:46 -04:00
. ro_unmap_safe = fmr_op_unmap_safe ,
2016-06-29 13:52:54 -04:00
. ro_recover_mr = fmr_op_recover_mr ,
2015-03-30 14:35:26 -04:00
. ro_open = fmr_op_open ,
2015-03-30 14:34:30 -04:00
. ro_maxpages = fmr_op_maxpages ,
2016-06-29 13:54:00 -04:00
. ro_init_mr = fmr_op_init_mr ,
. ro_release_mr = fmr_op_release_mr ,
2015-03-30 14:34:21 -04:00
. ro_displayname = " fmr " ,
2016-09-15 10:57:16 -04:00
. ro_send_w_inv_ok = 0 ,
2015-03-30 14:34:21 -04:00
} ;