2016-01-13 23:29:48 +03:00
/*
* This file is provided under a dual BSD / GPLv2 license . When using or
* redistributing this file , you may do so under either license .
*
* GPL LICENSE SUMMARY
*
* Copyright ( c ) 2015 Intel Corporation . All rights reserved .
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation .
*
* BSD LICENSE
*
* Copyright ( c ) 2015 Intel Corporation . All rights reserved .
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions
* are met :
*
* * Redistributions of source code must retain the above copyright
* notice , this list of conditions and the following disclaimer .
* * Redistributions in binary form must reproduce the above copy
* notice , this list of conditions and the following disclaimer in
* the documentation and / or other materials provided with the
* distribution .
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* " AS IS " AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT
* LIMITED TO , THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL ,
* SPECIAL , EXEMPLARY , OR CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT
* LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE ,
* DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY , OR TORT
* ( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
*
* PCIe NTB Perf Linux driver
*/
# include <linux/init.h>
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/kthread.h>
# include <linux/time.h>
# include <linux/timer.h>
# include <linux/dma-mapping.h>
# include <linux/pci.h>
# include <linux/slab.h>
# include <linux/spinlock.h>
# include <linux/debugfs.h>
# include <linux/dmaengine.h>
# include <linux/delay.h>
# include <linux/sizes.h>
# include <linux/ntb.h>
2016-06-20 22:15:05 +03:00
# include <linux/mutex.h>
2016-01-13 23:29:48 +03:00
# define DRIVER_NAME "ntb_perf"
# define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"
# define DRIVER_LICENSE "Dual BSD / GPL"
# define DRIVER_VERSION "1.0"
# define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>"
# define PERF_LINK_DOWN_TIMEOUT 10
# define PERF_VERSION 0xffff0001
# define MAX_THREADS 32
# define MAX_TEST_SIZE SZ_1M
# define MAX_SRCS 32
2016-08-22 19:51:36 +03:00
# define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50)
2016-01-13 23:29:48 +03:00
# define DMA_RETRIES 20
# define SZ_4G (1ULL << 32)
# define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */
2016-12-14 02:49:14 +03:00
# define PIDX NTB_DEF_PEER_IDX
2016-01-13 23:29:48 +03:00
MODULE_LICENSE ( DRIVER_LICENSE ) ;
MODULE_VERSION ( DRIVER_VERSION ) ;
MODULE_AUTHOR ( DRIVER_AUTHOR ) ;
MODULE_DESCRIPTION ( DRIVER_DESCRIPTION ) ;
static struct dentry * perf_debugfs_dir ;
2016-06-03 23:50:31 +03:00
static unsigned long max_mw_size ;
module_param ( max_mw_size , ulong , 0644 ) ;
MODULE_PARM_DESC ( max_mw_size , " Limit size of large memory windows " ) ;
2016-01-13 23:29:48 +03:00
static unsigned int seg_order = 19 ; /* 512K */
module_param ( seg_order , uint , 0644 ) ;
2017-05-04 19:36:52 +03:00
MODULE_PARM_DESC ( seg_order , " size order [2^n] of buffer segment for testing " ) ;
2016-01-13 23:29:48 +03:00
static unsigned int run_order = 32 ; /* 4G */
module_param ( run_order , uint , 0644 ) ;
2017-05-04 19:36:52 +03:00
MODULE_PARM_DESC ( run_order , " size order [2^n] of total data to transfer " ) ;
2016-01-13 23:29:48 +03:00
static bool use_dma ; /* default to 0 */
module_param ( use_dma , bool , 0644 ) ;
MODULE_PARM_DESC ( use_dma , " Using DMA engine to measure performance " ) ;
struct perf_mw {
phys_addr_t phys_addr ;
resource_size_t phys_size ;
resource_size_t xlat_align ;
resource_size_t xlat_align_size ;
void __iomem * vbase ;
size_t xlat_size ;
size_t buf_size ;
void * virt_addr ;
dma_addr_t dma_addr ;
} ;
struct perf_ctx ;
struct pthr_ctx {
struct task_struct * thread ;
struct perf_ctx * perf ;
atomic_t dma_sync ;
struct dma_chan * dma_chan ;
int dma_prep_err ;
int src_idx ;
void * srcs [ MAX_SRCS ] ;
2016-06-20 22:15:05 +03:00
wait_queue_head_t * wq ;
2016-06-20 22:15:06 +03:00
int status ;
u64 copied ;
u64 diff_us ;
2016-01-13 23:29:48 +03:00
} ;
struct perf_ctx {
struct ntb_dev * ntb ;
spinlock_t db_lock ;
struct perf_mw mw ;
bool link_is_up ;
struct delayed_work link_work ;
2016-06-20 22:15:07 +03:00
wait_queue_head_t link_wq ;
2016-01-13 23:29:48 +03:00
struct dentry * debugfs_node_dir ;
struct dentry * debugfs_run ;
struct dentry * debugfs_threads ;
u8 perf_threads ;
2016-06-20 22:15:05 +03:00
/* mutex ensures only one set of threads run at once */
struct mutex run_mutex ;
2016-01-13 23:29:48 +03:00
struct pthr_ctx pthr_ctx [ MAX_THREADS ] ;
atomic_t tsync ;
2016-06-20 22:15:05 +03:00
atomic_t tdone ;
2016-01-13 23:29:48 +03:00
} ;
enum {
VERSION = 0 ,
MW_SZ_HIGH ,
MW_SZ_LOW ,
MAX_SPAD
} ;
static void perf_link_event ( void * ctx )
{
struct perf_ctx * perf = ctx ;
2016-06-20 22:15:13 +03:00
if ( ntb_link_is_up ( perf - > ntb , NULL , NULL ) = = 1 ) {
2016-01-13 23:29:48 +03:00
schedule_delayed_work ( & perf - > link_work , 2 * HZ ) ;
2016-06-20 22:15:13 +03:00
} else {
dev_dbg ( & perf - > ntb - > pdev - > dev , " link down \n " ) ;
if ( ! perf - > link_is_up )
cancel_delayed_work_sync ( & perf - > link_work ) ;
perf - > link_is_up = false ;
}
2016-01-13 23:29:48 +03:00
}
static void perf_db_event ( void * ctx , int vec )
{
struct perf_ctx * perf = ctx ;
u64 db_bits , db_mask ;
db_mask = ntb_db_vector_mask ( perf - > ntb , vec ) ;
db_bits = ntb_db_read ( perf - > ntb ) ;
dev_dbg ( & perf - > ntb - > dev , " doorbell vec %d mask %#llx bits %#llx \n " ,
vec , db_mask , db_bits ) ;
}
static const struct ntb_ctx_ops perf_ops = {
. link_event = perf_link_event ,
. db_event = perf_db_event ,
} ;
static void perf_copy_callback ( void * data )
{
struct pthr_ctx * pctx = data ;
atomic_dec ( & pctx - > dma_sync ) ;
}
2016-01-26 12:31:45 +03:00
static ssize_t perf_copy ( struct pthr_ctx * pctx , char __iomem * dst ,
2016-01-13 23:29:48 +03:00
char * src , size_t size )
{
struct perf_ctx * perf = pctx - > perf ;
struct dma_async_tx_descriptor * txd ;
struct dma_chan * chan = pctx - > dma_chan ;
struct dma_device * device ;
struct dmaengine_unmap_data * unmap ;
dma_cookie_t cookie ;
size_t src_off , dst_off ;
struct perf_mw * mw = & perf - > mw ;
2016-01-26 12:31:45 +03:00
void __iomem * vbase ;
void __iomem * dst_vaddr ;
2016-01-13 23:29:48 +03:00
dma_addr_t dst_phys ;
int retries = 0 ;
if ( ! use_dma ) {
memcpy_toio ( dst , src , size ) ;
return size ;
}
if ( ! chan ) {
dev_err ( & perf - > ntb - > dev , " DMA engine does not exist \n " ) ;
return - EINVAL ;
}
device = chan - > device ;
2016-01-26 12:31:45 +03:00
src_off = ( uintptr_t ) src & ~ PAGE_MASK ;
dst_off = ( uintptr_t __force ) dst & ~ PAGE_MASK ;
2016-01-13 23:29:48 +03:00
if ( ! is_dma_copy_aligned ( device , src_off , dst_off , size ) )
return - ENODEV ;
2016-01-26 12:31:45 +03:00
vbase = mw - > vbase ;
dst_vaddr = dst ;
2016-01-13 23:29:48 +03:00
dst_phys = mw - > phys_addr + ( dst_vaddr - vbase ) ;
unmap = dmaengine_get_unmap_data ( device - > dev , 1 , GFP_NOWAIT ) ;
if ( ! unmap )
return - ENOMEM ;
unmap - > len = size ;
unmap - > addr [ 0 ] = dma_map_page ( device - > dev , virt_to_page ( src ) ,
src_off , size , DMA_TO_DEVICE ) ;
if ( dma_mapping_error ( device - > dev , unmap - > addr [ 0 ] ) )
goto err_get_unmap ;
unmap - > to_cnt = 1 ;
do {
txd = device - > device_prep_dma_memcpy ( chan , dst_phys ,
unmap - > addr [ 0 ] ,
size , DMA_PREP_INTERRUPT ) ;
if ( ! txd ) {
set_current_state ( TASK_INTERRUPTIBLE ) ;
schedule_timeout ( DMA_OUT_RESOURCE_TO ) ;
}
} while ( ! txd & & ( + + retries < DMA_RETRIES ) ) ;
if ( ! txd ) {
pctx - > dma_prep_err + + ;
goto err_get_unmap ;
}
txd - > callback = perf_copy_callback ;
txd - > callback_param = pctx ;
dma_set_unmap ( txd , unmap ) ;
cookie = dmaengine_submit ( txd ) ;
if ( dma_submit_error ( cookie ) )
goto err_set_unmap ;
2017-01-31 00:21:17 +03:00
dmaengine_unmap_put ( unmap ) ;
2016-01-13 23:29:48 +03:00
atomic_inc ( & pctx - > dma_sync ) ;
dma_async_issue_pending ( chan ) ;
return size ;
err_set_unmap :
dmaengine_unmap_put ( unmap ) ;
err_get_unmap :
dmaengine_unmap_put ( unmap ) ;
return 0 ;
}
2016-01-26 12:31:45 +03:00
static int perf_move_data ( struct pthr_ctx * pctx , char __iomem * dst , char * src ,
2016-01-13 23:29:48 +03:00
u64 buf_size , u64 win_size , u64 total )
{
int chunks , total_chunks , i ;
int copied_chunks = 0 ;
u64 copied = 0 , result ;
2016-01-26 12:31:45 +03:00
char __iomem * tmp = dst ;
2016-01-13 23:29:48 +03:00
u64 perf , diff_us ;
ktime_t kstart , kstop , kdiff ;
2016-06-20 22:15:04 +03:00
unsigned long last_sleep = jiffies ;
2016-01-13 23:29:48 +03:00
chunks = div64_u64 ( win_size , buf_size ) ;
total_chunks = div64_u64 ( total , buf_size ) ;
kstart = ktime_get ( ) ;
for ( i = 0 ; i < total_chunks ; i + + ) {
result = perf_copy ( pctx , tmp , src , buf_size ) ;
copied + = result ;
copied_chunks + + ;
if ( copied_chunks = = chunks ) {
tmp = dst ;
copied_chunks = 0 ;
} else
tmp + = buf_size ;
2016-06-20 22:15:04 +03:00
/* Probably should schedule every 5s to prevent soft hang. */
if ( unlikely ( ( jiffies - last_sleep ) > 5 * HZ ) ) {
last_sleep = jiffies ;
2016-01-13 23:29:48 +03:00
set_current_state ( TASK_INTERRUPTIBLE ) ;
schedule_timeout ( 1 ) ;
}
2016-06-20 22:15:05 +03:00
if ( unlikely ( kthread_should_stop ( ) ) )
break ;
2016-01-13 23:29:48 +03:00
}
if ( use_dma ) {
2016-06-20 22:15:06 +03:00
pr_debug ( " %s: All DMA descriptors submitted \n " , current - > comm ) ;
2016-06-20 22:15:05 +03:00
while ( atomic_read ( & pctx - > dma_sync ) ! = 0 ) {
if ( kthread_should_stop ( ) )
break ;
2016-01-13 23:29:48 +03:00
msleep ( 20 ) ;
2016-06-20 22:15:05 +03:00
}
2016-01-13 23:29:48 +03:00
}
kstop = ktime_get ( ) ;
kdiff = ktime_sub ( kstop , kstart ) ;
diff_us = ktime_to_us ( kdiff ) ;
2016-06-20 22:15:06 +03:00
pr_debug ( " %s: copied %llu bytes \n " , current - > comm , copied ) ;
2016-01-13 23:29:48 +03:00
2016-06-20 22:15:06 +03:00
pr_debug ( " %s: lasted %llu usecs \n " , current - > comm , diff_us ) ;
2016-01-13 23:29:48 +03:00
perf = div64_u64 ( copied , diff_us ) ;
2016-06-20 22:15:06 +03:00
pr_debug ( " %s: MBytes/s: %llu \n " , current - > comm , perf ) ;
pctx - > copied = copied ;
pctx - > diff_us = diff_us ;
2016-01-13 23:29:48 +03:00
return 0 ;
}
static bool perf_dma_filter_fn ( struct dma_chan * chan , void * node )
{
return dev_to_node ( & chan - > dev - > device ) = = ( int ) ( unsigned long ) node ;
}
static int ntb_perf_thread ( void * data )
{
struct pthr_ctx * pctx = data ;
struct perf_ctx * perf = pctx - > perf ;
struct pci_dev * pdev = perf - > ntb - > pdev ;
struct perf_mw * mw = & perf - > mw ;
2016-01-26 12:31:45 +03:00
char __iomem * dst ;
2016-01-13 23:29:48 +03:00
u64 win_size , buf_size , total ;
void * src ;
int rc , node , i ;
struct dma_chan * dma_chan = NULL ;
2016-06-20 22:15:06 +03:00
pr_debug ( " kthread %s starting... \n " , current - > comm ) ;
2016-01-13 23:29:48 +03:00
node = dev_to_node ( & pdev - > dev ) ;
if ( use_dma & & ! pctx - > dma_chan ) {
dma_cap_mask_t dma_mask ;
dma_cap_zero ( dma_mask ) ;
dma_cap_set ( DMA_MEMCPY , dma_mask ) ;
dma_chan = dma_request_channel ( dma_mask , perf_dma_filter_fn ,
( void * ) ( unsigned long ) node ) ;
if ( ! dma_chan ) {
pr_warn ( " %s: cannot acquire DMA channel, quitting \n " ,
current - > comm ) ;
return - ENODEV ;
}
pctx - > dma_chan = dma_chan ;
}
for ( i = 0 ; i < MAX_SRCS ; i + + ) {
pctx - > srcs [ i ] = kmalloc_node ( MAX_TEST_SIZE , GFP_KERNEL , node ) ;
if ( ! pctx - > srcs [ i ] ) {
rc = - ENOMEM ;
goto err ;
}
}
win_size = mw - > phys_size ;
buf_size = 1ULL < < seg_order ;
total = 1ULL < < run_order ;
if ( buf_size > MAX_TEST_SIZE )
buf_size = MAX_TEST_SIZE ;
2016-01-26 12:31:45 +03:00
dst = ( char __iomem * ) mw - > vbase ;
2016-01-13 23:29:48 +03:00
atomic_inc ( & perf - > tsync ) ;
while ( atomic_read ( & perf - > tsync ) ! = perf - > perf_threads )
schedule ( ) ;
src = pctx - > srcs [ pctx - > src_idx ] ;
pctx - > src_idx = ( pctx - > src_idx + 1 ) & ( MAX_SRCS - 1 ) ;
rc = perf_move_data ( pctx , dst , src , buf_size , win_size , total ) ;
atomic_dec ( & perf - > tsync ) ;
if ( rc < 0 ) {
pr_err ( " %s: failed \n " , current - > comm ) ;
rc = - ENXIO ;
goto err ;
}
for ( i = 0 ; i < MAX_SRCS ; i + + ) {
kfree ( pctx - > srcs [ i ] ) ;
pctx - > srcs [ i ] = NULL ;
}
2016-06-20 22:15:05 +03:00
atomic_inc ( & perf - > tdone ) ;
wake_up ( pctx - > wq ) ;
rc = 0 ;
goto done ;
2016-01-13 23:29:48 +03:00
err :
for ( i = 0 ; i < MAX_SRCS ; i + + ) {
kfree ( pctx - > srcs [ i ] ) ;
pctx - > srcs [ i ] = NULL ;
}
if ( dma_chan ) {
dma_release_channel ( dma_chan ) ;
pctx - > dma_chan = NULL ;
}
2016-06-20 22:15:05 +03:00
done :
/* Wait until we are told to stop */
for ( ; ; ) {
set_current_state ( TASK_INTERRUPTIBLE ) ;
if ( kthread_should_stop ( ) )
break ;
schedule ( ) ;
}
__set_current_state ( TASK_RUNNING ) ;
2016-01-13 23:29:48 +03:00
return rc ;
}
static void perf_free_mw ( struct perf_ctx * perf )
{
struct perf_mw * mw = & perf - > mw ;
struct pci_dev * pdev = perf - > ntb - > pdev ;
if ( ! mw - > virt_addr )
return ;
2017-01-11 03:11:33 +03:00
ntb_mw_clear_trans ( perf - > ntb , PIDX , 0 ) ;
2016-01-13 23:29:48 +03:00
dma_free_coherent ( & pdev - > dev , mw - > buf_size ,
mw - > virt_addr , mw - > dma_addr ) ;
mw - > xlat_size = 0 ;
mw - > buf_size = 0 ;
mw - > virt_addr = NULL ;
}
static int perf_set_mw ( struct perf_ctx * perf , resource_size_t size )
{
struct perf_mw * mw = & perf - > mw ;
size_t xlat_size , buf_size ;
2016-03-08 01:57:25 +03:00
int rc ;
2016-01-13 23:29:48 +03:00
if ( ! size )
return - EINVAL ;
xlat_size = round_up ( size , mw - > xlat_align_size ) ;
buf_size = round_up ( size , mw - > xlat_align ) ;
if ( mw - > xlat_size = = xlat_size )
return 0 ;
if ( mw - > buf_size )
perf_free_mw ( perf ) ;
mw - > xlat_size = xlat_size ;
mw - > buf_size = buf_size ;
mw - > virt_addr = dma_alloc_coherent ( & perf - > ntb - > pdev - > dev , buf_size ,
& mw - > dma_addr , GFP_KERNEL ) ;
if ( ! mw - > virt_addr ) {
mw - > xlat_size = 0 ;
mw - > buf_size = 0 ;
}
2017-01-11 03:11:33 +03:00
rc = ntb_mw_set_trans ( perf - > ntb , PIDX , 0 , mw - > dma_addr , mw - > xlat_size ) ;
2016-03-08 01:57:25 +03:00
if ( rc ) {
dev_err ( & perf - > ntb - > dev , " Unable to set mw0 translation \n " ) ;
perf_free_mw ( perf ) ;
return - EIO ;
}
2016-01-13 23:29:48 +03:00
return 0 ;
}
static void perf_link_work ( struct work_struct * work )
{
struct perf_ctx * perf =
container_of ( work , struct perf_ctx , link_work . work ) ;
struct ntb_dev * ndev = perf - > ntb ;
struct pci_dev * pdev = ndev - > pdev ;
u32 val ;
u64 size ;
int rc ;
dev_dbg ( & perf - > ntb - > pdev - > dev , " %s called \n " , __func__ ) ;
size = perf - > mw . phys_size ;
2016-06-03 23:50:31 +03:00
if ( max_mw_size & & size > max_mw_size )
size = max_mw_size ;
2016-01-13 23:29:48 +03:00
ntb_peer_spad_write ( ndev , MW_SZ_HIGH , upper_32_bits ( size ) ) ;
ntb_peer_spad_write ( ndev , MW_SZ_LOW , lower_32_bits ( size ) ) ;
ntb_peer_spad_write ( ndev , VERSION , PERF_VERSION ) ;
/* now read what peer wrote */
val = ntb_spad_read ( ndev , VERSION ) ;
if ( val ! = PERF_VERSION ) {
dev_dbg ( & pdev - > dev , " Remote version = %#x \n " , val ) ;
goto out ;
}
val = ntb_spad_read ( ndev , MW_SZ_HIGH ) ;
size = ( u64 ) val < < 32 ;
val = ntb_spad_read ( ndev , MW_SZ_LOW ) ;
size | = val ;
dev_dbg ( & pdev - > dev , " Remote MW size = %#llx \n " , size ) ;
rc = perf_set_mw ( perf , size ) ;
if ( rc )
goto out1 ;
perf - > link_is_up = true ;
2016-06-20 22:15:07 +03:00
wake_up ( & perf - > link_wq ) ;
2016-01-13 23:29:48 +03:00
return ;
out1 :
perf_free_mw ( perf ) ;
out :
if ( ntb_link_is_up ( ndev , NULL , NULL ) = = 1 )
schedule_delayed_work ( & perf - > link_work ,
msecs_to_jiffies ( PERF_LINK_DOWN_TIMEOUT ) ) ;
}
static int perf_setup_mw ( struct ntb_dev * ntb , struct perf_ctx * perf )
{
struct perf_mw * mw ;
int rc ;
mw = & perf - > mw ;
2017-01-11 03:11:33 +03:00
rc = ntb_mw_get_align ( ntb , PIDX , 0 , & mw - > xlat_align ,
& mw - > xlat_align_size , NULL ) ;
if ( rc )
return rc ;
rc = ntb_peer_mw_get_addr ( ntb , 0 , & mw - > phys_addr , & mw - > phys_size ) ;
2016-01-13 23:29:48 +03:00
if ( rc )
return rc ;
perf - > mw . vbase = ioremap_wc ( mw - > phys_addr , mw - > phys_size ) ;
if ( ! mw - > vbase )
return - ENOMEM ;
return 0 ;
}
static ssize_t debugfs_run_read ( struct file * filp , char __user * ubuf ,
size_t count , loff_t * offp )
{
struct perf_ctx * perf = filp - > private_data ;
char * buf ;
2016-06-20 22:15:06 +03:00
ssize_t ret , out_off = 0 ;
struct pthr_ctx * pctx ;
int i ;
u64 rate ;
2016-01-13 23:29:48 +03:00
if ( ! perf )
return 0 ;
2016-06-20 22:15:06 +03:00
buf = kmalloc ( 1024 , GFP_KERNEL ) ;
2016-03-10 15:21:11 +03:00
if ( ! buf )
return - ENOMEM ;
2016-06-20 22:15:05 +03:00
2016-06-20 22:15:06 +03:00
if ( mutex_is_locked ( & perf - > run_mutex ) ) {
2016-10-14 10:34:18 +03:00
out_off = scnprintf ( buf , 64 , " running \n " ) ;
2016-06-20 22:15:06 +03:00
goto read_from_buf ;
}
for ( i = 0 ; i < MAX_THREADS ; i + + ) {
pctx = & perf - > pthr_ctx [ i ] ;
if ( pctx - > status = = - ENODATA )
break ;
if ( pctx - > status ) {
2016-10-14 10:34:18 +03:00
out_off + = scnprintf ( buf + out_off , 1024 - out_off ,
2016-06-20 22:15:06 +03:00
" %d: error %d \n " , i ,
pctx - > status ) ;
continue ;
}
rate = div64_u64 ( pctx - > copied , pctx - > diff_us ) ;
2016-10-14 10:34:18 +03:00
out_off + = scnprintf ( buf + out_off , 1024 - out_off ,
2016-06-20 22:15:06 +03:00
" %d: copied %llu bytes in %llu usecs, %llu MBytes/s \n " ,
i , pctx - > copied , pctx - > diff_us , rate ) ;
}
read_from_buf :
ret = simple_read_from_buffer ( ubuf , count , offp , buf , out_off ) ;
2016-01-13 23:29:48 +03:00
kfree ( buf ) ;
return ret ;
}
2016-03-19 02:39:47 +03:00
static void threads_cleanup ( struct perf_ctx * perf )
{
struct pthr_ctx * pctx ;
int i ;
for ( i = 0 ; i < MAX_THREADS ; i + + ) {
pctx = & perf - > pthr_ctx [ i ] ;
if ( pctx - > thread ) {
2016-06-20 22:15:06 +03:00
pctx - > status = kthread_stop ( pctx - > thread ) ;
2016-03-19 02:39:47 +03:00
pctx - > thread = NULL ;
}
}
}
2016-06-20 22:15:06 +03:00
static void perf_clear_thread_status ( struct perf_ctx * perf )
{
int i ;
for ( i = 0 ; i < MAX_THREADS ; i + + )
perf - > pthr_ctx [ i ] . status = - ENODATA ;
}
2016-01-13 23:29:48 +03:00
static ssize_t debugfs_run_write ( struct file * filp , const char __user * ubuf ,
size_t count , loff_t * offp )
{
struct perf_ctx * perf = filp - > private_data ;
int node , i ;
2016-06-20 22:15:05 +03:00
DECLARE_WAIT_QUEUE_HEAD ( wq ) ;
2016-01-13 23:29:48 +03:00
2016-06-20 22:15:07 +03:00
if ( wait_event_interruptible ( perf - > link_wq , perf - > link_is_up ) )
2016-06-20 22:15:05 +03:00
return - ENOLINK ;
2016-01-13 23:29:48 +03:00
if ( perf - > perf_threads = = 0 )
2016-06-20 22:15:05 +03:00
return - EINVAL ;
2016-01-13 23:29:48 +03:00
2016-06-20 22:15:05 +03:00
if ( ! mutex_trylock ( & perf - > run_mutex ) )
return - EBUSY ;
2016-01-13 23:29:48 +03:00
2016-06-20 22:15:06 +03:00
perf_clear_thread_status ( perf ) ;
2016-06-20 22:15:05 +03:00
if ( perf - > perf_threads > MAX_THREADS ) {
perf - > perf_threads = MAX_THREADS ;
pr_info ( " Reset total threads to: %u \n " , MAX_THREADS ) ;
}
2016-01-13 23:29:48 +03:00
2016-06-20 22:15:05 +03:00
/* no greater than 1M */
if ( seg_order > MAX_SEG_ORDER ) {
seg_order = MAX_SEG_ORDER ;
pr_info ( " Fix seg_order to %u \n " , seg_order ) ;
}
2016-01-13 23:29:48 +03:00
2016-06-20 22:15:05 +03:00
if ( run_order < seg_order ) {
run_order = seg_order ;
pr_info ( " Fix run_order to %u \n " , run_order ) ;
}
2016-01-13 23:29:48 +03:00
2016-06-20 22:15:05 +03:00
node = dev_to_node ( & perf - > ntb - > pdev - > dev ) ;
atomic_set ( & perf - > tdone , 0 ) ;
2016-01-13 23:29:48 +03:00
2016-06-20 22:15:05 +03:00
/* launch kernel thread */
for ( i = 0 ; i < perf - > perf_threads ; i + + ) {
struct pthr_ctx * pctx ;
2016-01-13 23:29:48 +03:00
2016-06-20 22:15:05 +03:00
pctx = & perf - > pthr_ctx [ i ] ;
atomic_set ( & pctx - > dma_sync , 0 ) ;
pctx - > perf = perf ;
pctx - > wq = & wq ;
pctx - > thread =
kthread_create_on_node ( ntb_perf_thread ,
( void * ) pctx ,
node , " ntb_perf %d " , i ) ;
if ( IS_ERR ( pctx - > thread ) ) {
pctx - > thread = NULL ;
goto err ;
} else {
wake_up_process ( pctx - > thread ) ;
}
2016-01-13 23:29:48 +03:00
}
2016-06-20 22:15:05 +03:00
wait_event_interruptible ( wq ,
atomic_read ( & perf - > tdone ) = = perf - > perf_threads ) ;
threads_cleanup ( perf ) ;
mutex_unlock ( & perf - > run_mutex ) ;
2016-01-13 23:29:48 +03:00
return count ;
2016-03-19 02:39:47 +03:00
err :
threads_cleanup ( perf ) ;
2016-06-20 22:15:05 +03:00
mutex_unlock ( & perf - > run_mutex ) ;
2016-03-19 02:39:47 +03:00
return - ENXIO ;
2016-01-13 23:29:48 +03:00
}
static const struct file_operations ntb_perf_debugfs_run = {
. owner = THIS_MODULE ,
. open = simple_open ,
. read = debugfs_run_read ,
. write = debugfs_run_write ,
} ;
static int perf_debugfs_setup ( struct perf_ctx * perf )
{
struct pci_dev * pdev = perf - > ntb - > pdev ;
if ( ! debugfs_initialized ( ) )
return - ENODEV ;
if ( ! perf_debugfs_dir ) {
perf_debugfs_dir = debugfs_create_dir ( KBUILD_MODNAME , NULL ) ;
if ( ! perf_debugfs_dir )
return - ENODEV ;
}
perf - > debugfs_node_dir = debugfs_create_dir ( pci_name ( pdev ) ,
perf_debugfs_dir ) ;
if ( ! perf - > debugfs_node_dir )
return - ENODEV ;
perf - > debugfs_run = debugfs_create_file ( " run " , S_IRUSR | S_IWUSR ,
perf - > debugfs_node_dir , perf ,
& ntb_perf_debugfs_run ) ;
if ( ! perf - > debugfs_run )
return - ENODEV ;
perf - > debugfs_threads = debugfs_create_u8 ( " threads " , S_IRUSR | S_IWUSR ,
perf - > debugfs_node_dir ,
& perf - > perf_threads ) ;
if ( ! perf - > debugfs_threads )
return - ENODEV ;
return 0 ;
}
static int perf_probe ( struct ntb_client * client , struct ntb_dev * ntb )
{
struct pci_dev * pdev = ntb - > pdev ;
struct perf_ctx * perf ;
int node ;
int rc = 0 ;
2016-06-07 20:20:22 +03:00
if ( ntb_spad_count ( ntb ) < MAX_SPAD ) {
dev_err ( & ntb - > dev , " Not enough scratch pad registers for %s " ,
DRIVER_NAME ) ;
return - EIO ;
}
2017-01-11 03:11:33 +03:00
if ( ! ntb - > ops - > mw_set_trans ) {
dev_err ( & ntb - > dev , " Need inbound MW based NTB API \n " ) ;
return - EINVAL ;
}
2016-12-14 02:49:14 +03:00
if ( ntb_peer_port_count ( ntb ) ! = NTB_DEF_PEER_CNT )
dev_warn ( & ntb - > dev , " Multi-port NTB devices unsupported \n " ) ;
2016-01-13 23:29:48 +03:00
node = dev_to_node ( & pdev - > dev ) ;
perf = kzalloc_node ( sizeof ( * perf ) , GFP_KERNEL , node ) ;
if ( ! perf ) {
rc = - ENOMEM ;
goto err_perf ;
}
perf - > ntb = ntb ;
perf - > perf_threads = 1 ;
atomic_set ( & perf - > tsync , 0 ) ;
2016-06-20 22:15:05 +03:00
mutex_init ( & perf - > run_mutex ) ;
2016-01-13 23:29:48 +03:00
spin_lock_init ( & perf - > db_lock ) ;
perf_setup_mw ( ntb , perf ) ;
2016-06-20 22:15:07 +03:00
init_waitqueue_head ( & perf - > link_wq ) ;
2016-01-13 23:29:48 +03:00
INIT_DELAYED_WORK ( & perf - > link_work , perf_link_work ) ;
rc = ntb_set_ctx ( ntb , perf , & perf_ops ) ;
if ( rc )
goto err_ctx ;
perf - > link_is_up = false ;
ntb_link_enable ( ntb , NTB_SPEED_AUTO , NTB_WIDTH_AUTO ) ;
ntb_link_event ( ntb ) ;
rc = perf_debugfs_setup ( perf ) ;
if ( rc )
goto err_ctx ;
2016-06-20 22:15:06 +03:00
perf_clear_thread_status ( perf ) ;
2016-01-13 23:29:48 +03:00
return 0 ;
err_ctx :
cancel_delayed_work_sync ( & perf - > link_work ) ;
kfree ( perf ) ;
err_perf :
return rc ;
}
static void perf_remove ( struct ntb_client * client , struct ntb_dev * ntb )
{
struct perf_ctx * perf = ntb - > ctx ;
int i ;
dev_dbg ( & perf - > ntb - > dev , " %s called \n " , __func__ ) ;
2016-06-20 22:15:05 +03:00
mutex_lock ( & perf - > run_mutex ) ;
2016-01-13 23:29:48 +03:00
cancel_delayed_work_sync ( & perf - > link_work ) ;
ntb_clear_ctx ( ntb ) ;
ntb_link_disable ( ntb ) ;
debugfs_remove_recursive ( perf_debugfs_dir ) ;
perf_debugfs_dir = NULL ;
if ( use_dma ) {
for ( i = 0 ; i < MAX_THREADS ; i + + ) {
struct pthr_ctx * pctx = & perf - > pthr_ctx [ i ] ;
if ( pctx - > dma_chan )
dma_release_channel ( pctx - > dma_chan ) ;
}
}
kfree ( perf ) ;
}
static struct ntb_client perf_client = {
. ops = {
. probe = perf_probe ,
. remove = perf_remove ,
} ,
} ;
module_ntb_client ( perf_client ) ;