2019-05-29 07:18:02 -07:00
// SPDX-License-Identifier: GPL-2.0-only
2015-12-11 11:53:03 -08:00
/*
* Copyright ( c ) 2015 HGST , a Western Digital Company .
*/
# include <linux/err.h>
# include <linux/slab.h>
# include <rdma/ib_verbs.h>
2020-05-27 11:34:53 +03:00
# include "core_priv.h"
2019-12-18 15:18:15 -05:00
# include <trace/events/rdma_core.h>
2020-05-27 11:34:53 +03:00
/* Max size for shared CQ, may require tuning */
# define IB_MAX_SHARED_CQ_SZ 4096U
2019-12-18 15:18:15 -05:00
2015-12-11 11:53:03 -08:00
/* # of WCs to poll for with a single call to ib_poll_cq */
# define IB_POLL_BATCH 16
2018-03-05 20:09:48 +02:00
# define IB_POLL_BATCH_DIRECT 8
2015-12-11 11:53:03 -08:00
/* # of WCs to iterate over before yielding */
# define IB_POLL_BUDGET_IRQ 256
# define IB_POLL_BUDGET_WORKQUEUE 65536
# define IB_POLL_FLAGS \
( IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS )
2019-07-11 09:55:56 -04:00
static const struct dim_cq_moder
rdma_dim_prof [ RDMA_DIM_PARAMS_NUM_PROFILES ] = {
{ 1 , 0 , 1 , 0 } ,
{ 1 , 0 , 4 , 0 } ,
{ 2 , 0 , 4 , 0 } ,
{ 2 , 0 , 8 , 0 } ,
{ 4 , 0 , 8 , 0 } ,
{ 16 , 0 , 8 , 0 } ,
{ 16 , 0 , 16 , 0 } ,
{ 32 , 0 , 16 , 0 } ,
{ 32 , 0 , 32 , 0 } ,
} ;
2019-07-08 13:59:03 +03:00
static void ib_cq_rdma_dim_work ( struct work_struct * w )
{
struct dim * dim = container_of ( w , struct dim , work ) ;
struct ib_cq * cq = dim - > priv ;
u16 usec = rdma_dim_prof [ dim - > profile_ix ] . usec ;
u16 comps = rdma_dim_prof [ dim - > profile_ix ] . comps ;
dim - > state = DIM_START_MEASURE ;
2019-12-18 15:18:15 -05:00
trace_cq_modify ( cq , comps , usec ) ;
2019-07-08 13:59:03 +03:00
cq - > device - > ops . modify_cq ( cq , comps , usec ) ;
}
static void rdma_dim_init ( struct ib_cq * cq )
{
struct dim * dim ;
if ( ! cq - > device - > ops . modify_cq | | ! cq - > device - > use_cq_dim | |
cq - > poll_ctx = = IB_POLL_DIRECT )
return ;
dim = kzalloc ( sizeof ( struct dim ) , GFP_KERNEL ) ;
if ( ! dim )
return ;
dim - > state = DIM_START_MEASURE ;
dim - > tune_state = DIM_GOING_RIGHT ;
dim - > profile_ix = RDMA_DIM_START_PROFILE ;
dim - > priv = cq ;
cq - > dim = dim ;
INIT_WORK ( & dim - > work , ib_cq_rdma_dim_work ) ;
}
2020-07-30 11:27:18 +03:00
static void rdma_dim_destroy ( struct ib_cq * cq )
{
if ( ! cq - > dim )
return ;
cancel_work_sync ( & cq - > dim - > work ) ;
kfree ( cq - > dim ) ;
}
2019-12-18 15:18:15 -05:00
static int __poll_cq ( struct ib_cq * cq , int num_entries , struct ib_wc * wc )
{
int rc ;
rc = ib_poll_cq ( cq , num_entries , wc ) ;
trace_cq_poll ( cq , num_entries , rc ) ;
return rc ;
}
2018-03-05 20:09:48 +02:00
static int __ib_process_cq ( struct ib_cq * cq , int budget , struct ib_wc * wcs ,
int batch )
2015-12-11 11:53:03 -08:00
{
int i , n , completed = 0 ;
2019-12-18 15:18:15 -05:00
trace_cq_process ( cq ) ;
2017-03-16 18:57:00 +02:00
/*
* budget might be ( - 1 ) if the caller does not
* want to bound this call , thus we need unsigned
* minimum here .
*/
2019-12-18 15:18:15 -05:00
while ( ( n = __poll_cq ( cq , min_t ( u32 , batch ,
budget - completed ) , wcs ) ) > 0 ) {
2015-12-11 11:53:03 -08:00
for ( i = 0 ; i < n ; i + + ) {
2018-01-14 17:07:50 +02:00
struct ib_wc * wc = & wcs [ i ] ;
2015-12-11 11:53:03 -08:00
if ( wc - > wr_cqe )
wc - > wr_cqe - > done ( cq , wc ) ;
else
WARN_ON_ONCE ( wc - > status = = IB_WC_SUCCESS ) ;
}
completed + = n ;
2018-03-05 20:09:48 +02:00
if ( n ! = batch | | ( budget ! = - 1 & & completed > = budget ) )
2015-12-11 11:53:03 -08:00
break ;
}
return completed ;
}
/**
2020-12-01 13:08:55 +01:00
* ib_process_cq_direct - process a CQ in caller context
2015-12-11 11:53:03 -08:00
* @ cq : CQ to process
* @ budget : number of CQEs to poll for
*
2018-01-14 17:07:50 +02:00
* This function is used to process all outstanding CQ entries .
* It does not offload CQ processing to a different context and does
* not ask for completion interrupts from the HCA .
* Using direct processing on CQ with non IB_POLL_DIRECT type may trigger
* concurrent processing .
2015-12-11 11:53:03 -08:00
*
2017-02-14 10:56:35 -08:00
* Note : do not pass - 1 as % budget unless it is guaranteed that the number
* of completions that will be processed is small .
2015-12-11 11:53:03 -08:00
*/
int ib_process_cq_direct ( struct ib_cq * cq , int budget )
{
2018-03-05 20:09:48 +02:00
struct ib_wc wcs [ IB_POLL_BATCH_DIRECT ] ;
2015-12-11 11:53:03 -08:00
2018-03-05 20:09:48 +02:00
return __ib_process_cq ( cq , budget , wcs , IB_POLL_BATCH_DIRECT ) ;
2015-12-11 11:53:03 -08:00
}
EXPORT_SYMBOL ( ib_process_cq_direct ) ;
static void ib_cq_completion_direct ( struct ib_cq * cq , void * private )
{
WARN_ONCE ( 1 , " got unsolicited completion for CQ 0x%p \n " , cq ) ;
}
static int ib_poll_handler ( struct irq_poll * iop , int budget )
{
struct ib_cq * cq = container_of ( iop , struct ib_cq , iop ) ;
2019-07-08 13:59:03 +03:00
struct dim * dim = cq - > dim ;
2015-12-11 11:53:03 -08:00
int completed ;
2018-03-05 20:09:48 +02:00
completed = __ib_process_cq ( cq , budget , cq - > wc , IB_POLL_BATCH ) ;
2015-12-11 11:53:03 -08:00
if ( completed < budget ) {
irq_poll_complete ( & cq - > iop ) ;
2019-12-18 15:18:15 -05:00
if ( ib_req_notify_cq ( cq , IB_POLL_FLAGS ) > 0 ) {
trace_cq_reschedule ( cq ) ;
2015-12-11 11:53:03 -08:00
irq_poll_sched ( & cq - > iop ) ;
2019-12-18 15:18:15 -05:00
}
2015-12-11 11:53:03 -08:00
}
2019-07-08 13:59:03 +03:00
if ( dim )
rdma_dim ( dim , completed ) ;
2015-12-11 11:53:03 -08:00
return completed ;
}
static void ib_cq_completion_softirq ( struct ib_cq * cq , void * private )
{
2019-12-18 15:18:15 -05:00
trace_cq_schedule ( cq ) ;
2015-12-11 11:53:03 -08:00
irq_poll_sched ( & cq - > iop ) ;
}
static void ib_cq_poll_work ( struct work_struct * work )
{
struct ib_cq * cq = container_of ( work , struct ib_cq , work ) ;
int completed ;
2018-03-05 20:09:48 +02:00
completed = __ib_process_cq ( cq , IB_POLL_BUDGET_WORKQUEUE , cq - > wc ,
IB_POLL_BATCH ) ;
2015-12-11 11:53:03 -08:00
if ( completed > = IB_POLL_BUDGET_WORKQUEUE | |
ib_req_notify_cq ( cq , IB_POLL_FLAGS ) > 0 )
2018-08-27 08:35:55 +03:00
queue_work ( cq - > comp_wq , & cq - > work ) ;
2019-07-08 13:59:03 +03:00
else if ( cq - > dim )
rdma_dim ( cq - > dim , completed ) ;
2015-12-11 11:53:03 -08:00
}
static void ib_cq_completion_workqueue ( struct ib_cq * cq , void * private )
{
2019-12-18 15:18:15 -05:00
trace_cq_schedule ( cq ) ;
2018-08-27 08:35:55 +03:00
queue_work ( cq - > comp_wq , & cq - > work ) ;
2015-12-11 11:53:03 -08:00
}
/**
2020-12-01 13:08:55 +01:00
* __ib_alloc_cq - allocate a completion queue
2015-12-11 11:53:03 -08:00
* @ dev : device to allocate the CQ for
* @ private : driver private data , accessible from cq - > cq_context
* @ nr_cqe : number of CQEs to allocate
* @ comp_vector : HCA completion vectors for this CQ
* @ poll_ctx : context to poll the CQ from .
2018-01-28 11:17:19 +02:00
* @ caller : module owner name .
2015-12-11 11:53:03 -08:00
*
* This is the proper interface to allocate a CQ for in - kernel users . A
* CQ allocated with this interface will automatically be polled from the
2017-01-04 22:17:14 +02:00
* specified context . The ULP must use wr - > wr_cqe instead of wr - > wr_id
2015-12-11 11:53:03 -08:00
* to use this CQ abstraction .
*/
2020-09-07 15:09:17 +03:00
struct ib_cq * __ib_alloc_cq ( struct ib_device * dev , void * private , int nr_cqe ,
int comp_vector , enum ib_poll_context poll_ctx ,
const char * caller )
2015-12-11 11:53:03 -08:00
{
struct ib_cq_init_attr cq_attr = {
. cqe = nr_cqe ,
. comp_vector = comp_vector ,
} ;
struct ib_cq * cq ;
int ret = - ENOMEM ;
2019-05-28 14:37:29 +03:00
cq = rdma_zalloc_drv_obj ( dev , ib_cq ) ;
if ( ! cq )
return ERR_PTR ( ret ) ;
2015-12-11 11:53:03 -08:00
cq - > device = dev ;
cq - > cq_context = private ;
cq - > poll_ctx = poll_ctx ;
atomic_set ( & cq - > usecnt , 0 ) ;
2020-05-27 11:34:53 +03:00
cq - > comp_vector = comp_vector ;
2015-12-11 11:53:03 -08:00
cq - > wc = kmalloc_array ( IB_POLL_BATCH , sizeof ( * cq - > wc ) , GFP_KERNEL ) ;
if ( ! cq - > wc )
2019-05-28 14:37:29 +03:00
goto out_free_cq ;
2015-12-11 11:53:03 -08:00
2020-09-22 12:11:04 +03:00
rdma_restrack_new ( & cq - > res , RDMA_RESTRACK_CQ ) ;
2020-09-22 12:11:06 +03:00
rdma_restrack_set_name ( & cq - > res , caller ) ;
2019-05-28 14:37:29 +03:00
ret = dev - > ops . create_cq ( cq , & cq_attr , NULL ) ;
if ( ret )
goto out_free_wc ;
2019-07-08 13:59:03 +03:00
rdma_dim_init ( cq ) ;
2015-12-11 11:53:03 -08:00
switch ( cq - > poll_ctx ) {
case IB_POLL_DIRECT :
cq - > comp_handler = ib_cq_completion_direct ;
break ;
case IB_POLL_SOFTIRQ :
cq - > comp_handler = ib_cq_completion_softirq ;
irq_poll_init ( & cq - > iop , IB_POLL_BUDGET_IRQ , ib_poll_handler ) ;
ib_req_notify_cq ( cq , IB_CQ_NEXT_COMP ) ;
break ;
case IB_POLL_WORKQUEUE :
2018-08-27 08:35:55 +03:00
case IB_POLL_UNBOUND_WORKQUEUE :
2015-12-11 11:53:03 -08:00
cq - > comp_handler = ib_cq_completion_workqueue ;
INIT_WORK ( & cq - > work , ib_cq_poll_work ) ;
ib_req_notify_cq ( cq , IB_CQ_NEXT_COMP ) ;
2018-08-27 08:35:55 +03:00
cq - > comp_wq = ( cq - > poll_ctx = = IB_POLL_WORKQUEUE ) ?
ib_comp_wq : ib_comp_unbound_wq ;
2015-12-11 11:53:03 -08:00
break ;
default :
ret = - EINVAL ;
2019-05-28 14:37:29 +03:00
goto out_destroy_cq ;
2015-12-11 11:53:03 -08:00
}
2020-09-22 12:11:05 +03:00
rdma_restrack_add ( & cq - > res ) ;
2019-12-18 15:18:15 -05:00
trace_cq_alloc ( cq , nr_cqe , comp_vector , poll_ctx ) ;
2015-12-11 11:53:03 -08:00
return cq ;
out_destroy_cq :
2020-07-30 11:27:19 +03:00
rdma_dim_destroy ( cq ) ;
2020-09-07 15:09:17 +03:00
cq - > device - > ops . destroy_cq ( cq , NULL ) ;
2019-05-28 14:37:29 +03:00
out_free_wc :
2020-09-22 12:11:04 +03:00
rdma_restrack_put ( & cq - > res ) ;
2019-05-28 14:37:29 +03:00
kfree ( cq - > wc ) ;
out_free_cq :
kfree ( cq ) ;
2019-12-18 15:18:15 -05:00
trace_cq_alloc_error ( nr_cqe , comp_vector , poll_ctx , ret ) ;
2015-12-11 11:53:03 -08:00
return ERR_PTR ( ret ) ;
}
2020-09-07 15:09:17 +03:00
EXPORT_SYMBOL ( __ib_alloc_cq ) ;
2015-12-11 11:53:03 -08:00
2019-07-29 13:22:09 -04:00
/**
* __ib_alloc_cq_any - allocate a completion queue
* @ dev : device to allocate the CQ for
* @ private : driver private data , accessible from cq - > cq_context
* @ nr_cqe : number of CQEs to allocate
* @ poll_ctx : context to poll the CQ from
* @ caller : module owner name
*
* Attempt to spread ULP Completion Queues over each device ' s interrupt
* vectors . A simple best - effort mechanism is used .
*/
struct ib_cq * __ib_alloc_cq_any ( struct ib_device * dev , void * private ,
int nr_cqe , enum ib_poll_context poll_ctx ,
const char * caller )
{
static atomic_t counter ;
int comp_vector = 0 ;
if ( dev - > num_comp_vectors > 1 )
comp_vector =
atomic_inc_return ( & counter ) %
min_t ( int , dev - > num_comp_vectors , num_online_cpus ( ) ) ;
2020-09-07 15:09:17 +03:00
return __ib_alloc_cq ( dev , private , nr_cqe , comp_vector , poll_ctx ,
caller ) ;
2019-07-29 13:22:09 -04:00
}
EXPORT_SYMBOL ( __ib_alloc_cq_any ) ;
2015-12-11 11:53:03 -08:00
/**
2020-09-07 15:09:17 +03:00
* ib_free_cq - free a completion queue
2015-12-11 11:53:03 -08:00
* @ cq : completion queue to free .
*/
2020-09-07 15:09:17 +03:00
void ib_free_cq ( struct ib_cq * cq )
2015-12-11 11:53:03 -08:00
{
2020-09-07 15:09:18 +03:00
int ret ;
2015-12-11 11:53:03 -08:00
if ( WARN_ON_ONCE ( atomic_read ( & cq - > usecnt ) ) )
return ;
2020-05-27 11:34:53 +03:00
if ( WARN_ON_ONCE ( cq - > cqe_used ) )
return ;
2015-12-11 11:53:03 -08:00
switch ( cq - > poll_ctx ) {
case IB_POLL_DIRECT :
break ;
case IB_POLL_SOFTIRQ :
irq_poll_disable ( & cq - > iop ) ;
break ;
case IB_POLL_WORKQUEUE :
2018-08-27 08:35:55 +03:00
case IB_POLL_UNBOUND_WORKQUEUE :
2017-03-08 22:00:52 +02:00
cancel_work_sync ( & cq - > work ) ;
2015-12-11 11:53:03 -08:00
break ;
default :
WARN_ON_ONCE ( 1 ) ;
}
2020-07-30 11:27:18 +03:00
rdma_dim_destroy ( cq ) ;
2019-12-18 15:18:15 -05:00
trace_cq_free ( cq ) ;
2020-09-07 15:09:18 +03:00
ret = cq - > device - > ops . destroy_cq ( cq , NULL ) ;
WARN_ONCE ( ret , " Destroy of kernel CQ shouldn't fail " ) ;
2018-01-28 11:17:22 +02:00
rdma_restrack_del ( & cq - > res ) ;
2019-05-28 14:37:29 +03:00
kfree ( cq - > wc ) ;
kfree ( cq ) ;
2015-12-11 11:53:03 -08:00
}
2020-09-07 15:09:17 +03:00
EXPORT_SYMBOL ( ib_free_cq ) ;
2020-05-27 11:34:53 +03:00
2020-12-08 09:35:43 +02:00
void ib_cq_pool_cleanup ( struct ib_device * dev )
2020-05-27 11:34:53 +03:00
{
struct ib_cq * cq , * n ;
unsigned int i ;
for ( i = 0 ; i < ARRAY_SIZE ( dev - > cq_pools ) ; i + + ) {
list_for_each_entry_safe ( cq , n , & dev - > cq_pools [ i ] ,
pool_entry ) {
WARN_ON ( cq - > cqe_used ) ;
2020-12-08 09:35:43 +02:00
list_del ( & cq - > pool_entry ) ;
2020-05-27 11:34:53 +03:00
cq - > shared = false ;
ib_free_cq ( cq ) ;
}
}
}
static int ib_alloc_cqs ( struct ib_device * dev , unsigned int nr_cqes ,
enum ib_poll_context poll_ctx )
{
LIST_HEAD ( tmp_list ) ;
unsigned int nr_cqs , i ;
2020-09-01 20:38:55 +08:00
struct ib_cq * cq , * n ;
2020-05-27 11:34:53 +03:00
int ret ;
if ( poll_ctx > IB_POLL_LAST_POOL_TYPE ) {
WARN_ON_ONCE ( poll_ctx > IB_POLL_LAST_POOL_TYPE ) ;
return - EINVAL ;
}
/*
* Allocate at least as many CQEs as requested , and otherwise
* a reasonable batch size so that we can share CQs between
* multiple users instead of allocating a larger number of CQs .
*/
nr_cqes = min_t ( unsigned int , dev - > attrs . max_cqe ,
max ( nr_cqes , IB_MAX_SHARED_CQ_SZ ) ) ;
nr_cqs = min_t ( unsigned int , dev - > num_comp_vectors , num_online_cpus ( ) ) ;
for ( i = 0 ; i < nr_cqs ; i + + ) {
cq = ib_alloc_cq ( dev , NULL , nr_cqes , i , poll_ctx ) ;
if ( IS_ERR ( cq ) ) {
ret = PTR_ERR ( cq ) ;
goto out_free_cqs ;
}
cq - > shared = true ;
list_add_tail ( & cq - > pool_entry , & tmp_list ) ;
}
spin_lock_irq ( & dev - > cq_pools_lock ) ;
list_splice ( & tmp_list , & dev - > cq_pools [ poll_ctx ] ) ;
spin_unlock_irq ( & dev - > cq_pools_lock ) ;
return 0 ;
out_free_cqs :
2020-09-01 20:38:55 +08:00
list_for_each_entry_safe ( cq , n , & tmp_list , pool_entry ) {
2020-05-27 11:34:53 +03:00
cq - > shared = false ;
ib_free_cq ( cq ) ;
}
return ret ;
}
/**
* ib_cq_pool_get ( ) - Find the least used completion queue that matches
* a given cpu hint ( or least used for wild card affinity ) and fits
* nr_cqe .
* @ dev : rdma device
* @ nr_cqe : number of needed cqe entries
* @ comp_vector_hint : completion vector hint ( - 1 ) for the driver to assign
* a comp vector based on internal counter
* @ poll_ctx : cq polling context
*
* Finds a cq that satisfies @ comp_vector_hint and @ nr_cqe requirements and
* claim entries in it for us . In case there is no available cq , allocate
* a new cq with the requirements and add it to the device pool .
* IB_POLL_DIRECT cannot be used for shared cqs so it is not a valid value
* for @ poll_ctx .
*/
struct ib_cq * ib_cq_pool_get ( struct ib_device * dev , unsigned int nr_cqe ,
int comp_vector_hint ,
enum ib_poll_context poll_ctx )
{
static unsigned int default_comp_vector ;
unsigned int vector , num_comp_vectors ;
struct ib_cq * cq , * found = NULL ;
int ret ;
if ( poll_ctx > IB_POLL_LAST_POOL_TYPE ) {
WARN_ON_ONCE ( poll_ctx > IB_POLL_LAST_POOL_TYPE ) ;
return ERR_PTR ( - EINVAL ) ;
}
num_comp_vectors =
min_t ( unsigned int , dev - > num_comp_vectors , num_online_cpus ( ) ) ;
/* Project the affinty to the device completion vector range */
if ( comp_vector_hint < 0 ) {
comp_vector_hint =
( READ_ONCE ( default_comp_vector ) + 1 ) % num_comp_vectors ;
WRITE_ONCE ( default_comp_vector , comp_vector_hint ) ;
}
vector = comp_vector_hint % num_comp_vectors ;
/*
* Find the least used CQ with correct affinity and
* enough free CQ entries
*/
while ( ! found ) {
spin_lock_irq ( & dev - > cq_pools_lock ) ;
list_for_each_entry ( cq , & dev - > cq_pools [ poll_ctx ] ,
pool_entry ) {
/*
* Check to see if we have found a CQ with the
* correct completion vector
*/
if ( vector ! = cq - > comp_vector )
continue ;
if ( cq - > cqe_used + nr_cqe > cq - > cqe )
continue ;
found = cq ;
break ;
}
if ( found ) {
found - > cqe_used + = nr_cqe ;
spin_unlock_irq ( & dev - > cq_pools_lock ) ;
return found ;
}
spin_unlock_irq ( & dev - > cq_pools_lock ) ;
/*
* Didn ' t find a match or ran out of CQs in the device
* pool , allocate a new array of CQs .
*/
ret = ib_alloc_cqs ( dev , nr_cqe , poll_ctx ) ;
if ( ret )
return ERR_PTR ( ret ) ;
}
return found ;
}
EXPORT_SYMBOL ( ib_cq_pool_get ) ;
/**
* ib_cq_pool_put - Return a CQ taken from a shared pool .
* @ cq : The CQ to return .
* @ nr_cqe : The max number of cqes that the user had requested .
*/
void ib_cq_pool_put ( struct ib_cq * cq , unsigned int nr_cqe )
{
if ( WARN_ON_ONCE ( nr_cqe > cq - > cqe_used ) )
return ;
spin_lock_irq ( & cq - > device - > cq_pools_lock ) ;
cq - > cqe_used - = nr_cqe ;
spin_unlock_irq ( & cq - > device - > cq_pools_lock ) ;
}
EXPORT_SYMBOL ( ib_cq_pool_put ) ;