2008-07-29 22:33:57 -07:00
/*
* SN Platform GRU Driver
*
* KERNEL SERVICES THAT USE THE GRU
*
* Copyright ( c ) 2008 Silicon Graphics , Inc . All Rights Reserved .
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
# include <linux/kernel.h>
# include <linux/errno.h>
# include <linux/slab.h>
# include <linux/mm.h>
# include <linux/smp_lock.h>
# include <linux/spinlock.h>
# include <linux/device.h>
# include <linux/miscdevice.h>
# include <linux/proc_fs.h>
# include <linux/interrupt.h>
# include <linux/uaccess.h>
2009-06-17 16:28:22 -07:00
# include <linux/delay.h>
2008-07-29 22:33:57 -07:00
# include "gru.h"
# include "grulib.h"
# include "grutables.h"
# include "grukservices.h"
# include "gru_instructions.h"
# include <asm/uv/uv_hub.h>
/*
* Kernel GRU Usage
*
* The following is an interim algorithm for management of kernel GRU
* resources . This will likely be replaced when we better understand the
* kernel / user requirements .
*
2009-06-17 16:28:22 -07:00
* Blade percpu resources reserved for kernel use . These resources are
* reserved whenever the the kernel context for the blade is loaded . Note
* that the kernel context is not guaranteed to be always available . It is
* loaded on demand & can be stolen by a user if the user demand exceeds the
* kernel demand . The kernel can always reload the kernel context but
* a SLEEP may be required ! ! ! .
2009-06-17 16:28:25 -07:00
*
* Async Overview :
*
* Each blade has one " kernel context " that owns GRU kernel resources
* located on the blade . Kernel drivers use GRU resources in this context
* for sending messages , zeroing memory , etc .
*
* The kernel context is dynamically loaded on demand . If it is not in
* use by the kernel , the kernel context can be unloaded & given to a user .
* The kernel context will be reloaded when needed . This may require that
* a context be stolen from a user .
* NOTE : frequent unloading / reloading of the kernel context is
* expensive . We are depending on batch schedulers , cpusets , sane
* drivers or some other mechanism to prevent the need for frequent
* stealing / reloading .
*
* The kernel context consists of two parts :
* - 1 CB & a few DSRs that are reserved for each cpu on the blade .
* Each cpu has it ' s own private resources & does not share them
* with other cpus . These resources are used serially , ie ,
* locked , used & unlocked on each call to a function in
* grukservices .
* ( Now that we have dynamic loading of kernel contexts , I
* may rethink this & allow sharing between cpus . . . . )
*
* - Additional resources can be reserved long term & used directly
* by UV drivers located in the kernel . Drivers using these GRU
* resources can use asynchronous GRU instructions that send
* interrupts on completion .
* - these resources must be explicitly locked / unlocked
* - locked resources prevent ( obviously ) the kernel
* context from being unloaded .
* - drivers using these resource directly issue their own
* GRU instruction and must wait / check completion .
*
* When these resources are reserved , the caller can optionally
* associate a wait_queue with the resources and use asynchronous
* GRU instructions . When an async GRU instruction completes , the
* driver will do a wakeup on the event .
*
2008-07-29 22:33:57 -07:00
*/
2009-06-17 16:28:25 -07:00
# define ASYNC_HAN_TO_BID(h) ((h) - 1)
# define ASYNC_BID_TO_HAN(b) ((b) + 1)
# define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)]
2009-04-02 16:59:10 -07:00
# define GRU_NUM_KERNEL_CBR 1
2008-07-29 22:33:57 -07:00
# define GRU_NUM_KERNEL_DSR_BYTES 256
2009-04-02 16:59:10 -07:00
# define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \
GRU_CACHE_LINE_BYTES )
2008-07-29 22:33:57 -07:00
/* GRU instruction attributes for all instructions */
# define IMA IMA_CB_DELAY
/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
# define __gru_cacheline_aligned__ \
__attribute__ ( ( __aligned__ ( GRU_CACHE_LINE_BYTES ) ) )
# define MAGIC 0x1234567887654321UL
/* Default retry count for GRU errors on kernel instructions */
# define EXCEPTION_RETRY_LIMIT 3
/* Status of message queue sections */
# define MQS_EMPTY 0
# define MQS_FULL 1
# define MQS_NOOP 2
/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
/* optimized for x86_64 */
struct message_queue {
union gru_mesqhead head __gru_cacheline_aligned__ ; /* CL 0 */
int qlines ; /* DW 1 */
long hstatus [ 2 ] ;
void * next __gru_cacheline_aligned__ ; /* CL 1 */
void * limit ;
void * start ;
void * start2 ;
char data ____cacheline_aligned ; /* CL 2 */
} ;
/* First word in every message - used by mesq interface */
struct message_header {
char present ;
char present2 ;
char lines ;
char fill ;
} ;
# define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
2009-06-17 16:28:22 -07:00
/*
* Reload the blade ' s kernel context into a GRU chiplet . Called holding
* the bs_kgts_sema for READ . Will steal user contexts if necessary .
*/
static void gru_load_kernel_context ( struct gru_blade_state * bs , int blade_id )
{
struct gru_state * gru ;
struct gru_thread_state * kgts ;
void * vaddr ;
2009-06-17 16:28:25 -07:00
int ctxnum , ncpus ;
2009-06-17 16:28:22 -07:00
up_read ( & bs - > bs_kgts_sema ) ;
down_write ( & bs - > bs_kgts_sema ) ;
if ( ! bs - > bs_kgts )
2009-06-17 16:28:25 -07:00
bs - > bs_kgts = gru_alloc_gts ( NULL , 0 , 0 , 0 , 0 ) ;
2009-06-17 16:28:22 -07:00
kgts = bs - > bs_kgts ;
if ( ! kgts - > ts_gru ) {
STAT ( load_kernel_context ) ;
2009-06-17 16:28:25 -07:00
ncpus = uv_blade_nr_possible_cpus ( blade_id ) ;
kgts - > ts_cbr_au_count = GRU_CB_COUNT_TO_AU (
GRU_NUM_KERNEL_CBR * ncpus + bs - > bs_async_cbrs ) ;
kgts - > ts_dsr_au_count = GRU_DS_BYTES_TO_AU (
GRU_NUM_KERNEL_DSR_BYTES * ncpus +
bs - > bs_async_dsr_bytes ) ;
2009-06-17 16:28:22 -07:00
while ( ! gru_assign_gru_context ( kgts , blade_id ) ) {
msleep ( 1 ) ;
gru_steal_context ( kgts , blade_id ) ;
}
gru_load_context ( kgts ) ;
gru = bs - > bs_kgts - > ts_gru ;
vaddr = gru - > gs_gru_base_vaddr ;
ctxnum = kgts - > ts_ctxnum ;
bs - > kernel_cb = get_gseg_base_address_cb ( vaddr , ctxnum , 0 ) ;
bs - > kernel_dsr = get_gseg_base_address_ds ( vaddr , ctxnum , 0 ) ;
}
downgrade_write ( & bs - > bs_kgts_sema ) ;
}
/*
* Lock & load the kernel context for the specified blade .
*/
static struct gru_blade_state * gru_lock_kernel_context ( int blade_id )
{
struct gru_blade_state * bs ;
STAT ( lock_kernel_context ) ;
bs = gru_base [ blade_id ] ;
down_read ( & bs - > bs_kgts_sema ) ;
if ( ! bs - > bs_kgts | | ! bs - > bs_kgts - > ts_gru )
gru_load_kernel_context ( bs , blade_id ) ;
return bs ;
}
/*
* Unlock the kernel context for the specified blade . Context is not
* unloaded but may be stolen before next use .
*/
static void gru_unlock_kernel_context ( int blade_id )
{
struct gru_blade_state * bs ;
bs = gru_base [ blade_id ] ;
up_read ( & bs - > bs_kgts_sema ) ;
STAT ( unlock_kernel_context ) ;
}
/*
* Reserve & get pointers to the DSR / CBRs reserved for the current cpu .
* - returns with preemption disabled
*/
2008-07-29 22:33:57 -07:00
static int gru_get_cpu_resources ( int dsr_bytes , void * * cb , void * * dsr )
{
struct gru_blade_state * bs ;
int lcpu ;
BUG_ON ( dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES ) ;
preempt_disable ( ) ;
2009-06-17 16:28:22 -07:00
bs = gru_lock_kernel_context ( uv_numa_blade_id ( ) ) ;
2008-07-29 22:33:57 -07:00
lcpu = uv_blade_processor_id ( ) ;
* cb = bs - > kernel_cb + lcpu * GRU_HANDLE_STRIDE ;
* dsr = bs - > kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES ;
return 0 ;
}
2009-06-17 16:28:22 -07:00
/*
* Free the current cpus reserved DSR / CBR resources .
*/
2008-07-29 22:33:57 -07:00
static void gru_free_cpu_resources ( void * cb , void * dsr )
{
2009-06-17 16:28:22 -07:00
gru_unlock_kernel_context ( uv_numa_blade_id ( ) ) ;
2008-07-29 22:33:57 -07:00
preempt_enable ( ) ;
}
2009-06-17 16:28:25 -07:00
/*
* Reserve GRU resources to be used asynchronously .
* Note : currently supports only 1 reservation per blade .
*
* input :
* blade_id - blade on which resources should be reserved
* cbrs - number of CBRs
* dsr_bytes - number of DSR bytes needed
* output :
* handle to identify resource
* ( 0 = async resources already reserved )
*/
unsigned long gru_reserve_async_resources ( int blade_id , int cbrs , int dsr_bytes ,
struct completion * cmp )
{
struct gru_blade_state * bs ;
struct gru_thread_state * kgts ;
int ret = 0 ;
bs = gru_base [ blade_id ] ;
down_write ( & bs - > bs_kgts_sema ) ;
/* Verify no resources already reserved */
if ( bs - > bs_async_dsr_bytes + bs - > bs_async_cbrs )
goto done ;
bs - > bs_async_dsr_bytes = dsr_bytes ;
bs - > bs_async_cbrs = cbrs ;
bs - > bs_async_wq = cmp ;
kgts = bs - > bs_kgts ;
/* Resources changed. Unload context if already loaded */
if ( kgts & & kgts - > ts_gru )
gru_unload_context ( kgts , 0 ) ;
ret = ASYNC_BID_TO_HAN ( blade_id ) ;
done :
up_write ( & bs - > bs_kgts_sema ) ;
return ret ;
}
/*
* Release async resources previously reserved .
*
* input :
* han - handle to identify resources
*/
void gru_release_async_resources ( unsigned long han )
{
struct gru_blade_state * bs = ASYNC_HAN_TO_BS ( han ) ;
down_write ( & bs - > bs_kgts_sema ) ;
bs - > bs_async_dsr_bytes = 0 ;
bs - > bs_async_cbrs = 0 ;
bs - > bs_async_wq = NULL ;
up_write ( & bs - > bs_kgts_sema ) ;
}
/*
* Wait for async GRU instructions to complete .
*
* input :
* han - handle to identify resources
*/
void gru_wait_async_cbr ( unsigned long han )
{
struct gru_blade_state * bs = ASYNC_HAN_TO_BS ( han ) ;
wait_for_completion ( bs - > bs_async_wq ) ;
mb ( ) ;
}
/*
* Lock previous reserved async GRU resources
*
* input :
* han - handle to identify resources
* output :
* cb - pointer to first CBR
* dsr - pointer to first DSR
*/
void gru_lock_async_resource ( unsigned long han , void * * cb , void * * dsr )
{
struct gru_blade_state * bs = ASYNC_HAN_TO_BS ( han ) ;
int blade_id = ASYNC_HAN_TO_BID ( han ) ;
int ncpus ;
gru_lock_kernel_context ( blade_id ) ;
ncpus = uv_blade_nr_possible_cpus ( blade_id ) ;
if ( cb )
* cb = bs - > kernel_cb + ncpus * GRU_HANDLE_STRIDE ;
if ( dsr )
* dsr = bs - > kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES ;
}
/*
* Unlock previous reserved async GRU resources
*
* input :
* han - handle to identify resources
*/
void gru_unlock_async_resource ( unsigned long han )
{
int blade_id = ASYNC_HAN_TO_BID ( han ) ;
gru_unlock_kernel_context ( blade_id ) ;
}
2009-06-17 16:28:22 -07:00
/*----------------------------------------------------------------------*/
2008-07-29 22:33:57 -07:00
int gru_get_cb_exception_detail ( void * cb ,
struct control_block_extended_exc_detail * excdet )
{
struct gru_control_block_extended * cbe ;
cbe = get_cbe ( GRUBASE ( cb ) , get_cb_number ( cb ) ) ;
2009-04-02 16:59:04 -07:00
prefetchw ( cbe ) ; /* Harmless on hardware, required for emulator */
2008-07-29 22:33:57 -07:00
excdet - > opc = cbe - > opccpy ;
excdet - > exopc = cbe - > exopccpy ;
excdet - > ecause = cbe - > ecause ;
excdet - > exceptdet0 = cbe - > idef1upd ;
excdet - > exceptdet1 = cbe - > idef3upd ;
return 0 ;
}
char * gru_get_cb_exception_detail_str ( int ret , void * cb ,
char * buf , int size )
{
struct gru_control_block_status * gen = ( void * ) cb ;
struct control_block_extended_exc_detail excdet ;
if ( ret > 0 & & gen - > istatus = = CBS_EXCEPTION ) {
gru_get_cb_exception_detail ( cb , & excdet ) ;
snprintf ( buf , size ,
" GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x, "
" excdet0 0x%lx, excdet1 0x%x " ,
gen , excdet . opc , excdet . exopc , excdet . ecause ,
excdet . exceptdet0 , excdet . exceptdet1 ) ;
} else {
snprintf ( buf , size , " No exception " ) ;
}
return buf ;
}
static int gru_wait_idle_or_exception ( struct gru_control_block_status * gen )
{
while ( gen - > istatus > = CBS_ACTIVE ) {
cpu_relax ( ) ;
barrier ( ) ;
}
return gen - > istatus ;
}
static int gru_retry_exception ( void * cb )
{
struct gru_control_block_status * gen = ( void * ) cb ;
struct control_block_extended_exc_detail excdet ;
int retry = EXCEPTION_RETRY_LIMIT ;
while ( 1 ) {
if ( gru_get_cb_message_queue_substatus ( cb ) )
break ;
if ( gru_wait_idle_or_exception ( gen ) = = CBS_IDLE )
return CBS_IDLE ;
gru_get_cb_exception_detail ( cb , & excdet ) ;
2009-06-17 16:28:27 -07:00
if ( ( excdet . ecause & ~ EXCEPTION_RETRY_BITS ) | |
( excdet . cbrexecstatus & CBR_EXS_ABORT_OCC ) )
2008-07-29 22:33:57 -07:00
break ;
if ( retry - - = = 0 )
break ;
gen - > icmd = 1 ;
gru_flush_cache ( gen ) ;
}
return CBS_EXCEPTION ;
}
int gru_check_status_proc ( void * cb )
{
struct gru_control_block_status * gen = ( void * ) cb ;
int ret ;
ret = gen - > istatus ;
if ( ret ! = CBS_EXCEPTION )
return ret ;
return gru_retry_exception ( cb ) ;
}
int gru_wait_proc ( void * cb )
{
struct gru_control_block_status * gen = ( void * ) cb ;
int ret ;
ret = gru_wait_idle_or_exception ( gen ) ;
if ( ret = = CBS_EXCEPTION )
ret = gru_retry_exception ( cb ) ;
return ret ;
}
void gru_abort ( int ret , void * cb , char * str )
{
char buf [ GRU_EXC_STR_SIZE ] ;
panic ( " GRU FATAL ERROR: %s - %s \n " , str ,
gru_get_cb_exception_detail_str ( ret , cb , buf , sizeof ( buf ) ) ) ;
}
void gru_wait_abort_proc ( void * cb )
{
int ret ;
ret = gru_wait_proc ( cb ) ;
if ( ret )
gru_abort ( ret , cb , " gru_wait_abort " ) ;
}
/*------------------------------ MESSAGE QUEUES -----------------------------*/
/* Internal status . These are NOT returned to the user. */
# define MQIE_AGAIN -1 /* try again */
/*
* Save / restore the " present " flag that is in the second line of 2 - line
* messages
*/
static inline int get_present2 ( void * p )
{
struct message_header * mhdr = p + GRU_CACHE_LINE_BYTES ;
return mhdr - > present ;
}
static inline void restore_present2 ( void * p , int val )
{
struct message_header * mhdr = p + GRU_CACHE_LINE_BYTES ;
mhdr - > present = val ;
}
/*
* Create a message queue .
* qlines - message queue size in cache lines . Includes 2 - line header .
*/
2009-04-02 16:59:10 -07:00
int gru_create_message_queue ( struct gru_message_queue_desc * mqd ,
void * p , unsigned int bytes , int nasid , int vector , int apicid )
2008-07-29 22:33:57 -07:00
{
struct message_queue * mq = p ;
unsigned int qlines ;
qlines = bytes / GRU_CACHE_LINE_BYTES - 2 ;
memset ( mq , 0 , bytes ) ;
mq - > start = & mq - > data ;
mq - > start2 = & mq - > data + ( qlines / 2 - 1 ) * GRU_CACHE_LINE_BYTES ;
mq - > next = & mq - > data ;
mq - > limit = & mq - > data + ( qlines - 2 ) * GRU_CACHE_LINE_BYTES ;
mq - > qlines = qlines ;
mq - > hstatus [ 0 ] = 0 ;
mq - > hstatus [ 1 ] = 1 ;
mq - > head = gru_mesq_head ( 2 , qlines / 2 + 1 ) ;
2009-04-02 16:59:10 -07:00
mqd - > mq = mq ;
mqd - > mq_gpa = uv_gpa ( mq ) ;
mqd - > qlines = qlines ;
mqd - > interrupt_pnode = UV_NASID_TO_PNODE ( nasid ) ;
mqd - > interrupt_vector = vector ;
mqd - > interrupt_apicid = apicid ;
2008-07-29 22:33:57 -07:00
return 0 ;
}
EXPORT_SYMBOL_GPL ( gru_create_message_queue ) ;
/*
* Send a NOOP message to a message queue
* Returns :
* 0 - if queue is full after the send . This is the normal case
* but various races can change this .
* - 1 - if mesq sent successfully but queue not full
* > 0 - unexpected error . MQE_xxx returned
*/
2009-04-02 16:59:10 -07:00
static int send_noop_message ( void * cb , struct gru_message_queue_desc * mqd ,
void * mesg )
2008-07-29 22:33:57 -07:00
{
const struct message_header noop_header = {
. present = MQS_NOOP , . lines = 1 } ;
unsigned long m ;
int substatus , ret ;
struct message_header save_mhdr , * mhdr = mesg ;
STAT ( mesq_noop ) ;
save_mhdr = * mhdr ;
* mhdr = noop_header ;
2009-04-02 16:59:10 -07:00
gru_mesq ( cb , mqd - > mq_gpa , gru_get_tri ( mhdr ) , 1 , IMA ) ;
2008-07-29 22:33:57 -07:00
ret = gru_wait ( cb ) ;
if ( ret ) {
substatus = gru_get_cb_message_queue_substatus ( cb ) ;
switch ( substatus ) {
case CBSS_NO_ERROR :
STAT ( mesq_noop_unexpected_error ) ;
ret = MQE_UNEXPECTED_CB_ERR ;
break ;
case CBSS_LB_OVERFLOWED :
STAT ( mesq_noop_lb_overflow ) ;
ret = MQE_CONGESTION ;
break ;
case CBSS_QLIMIT_REACHED :
STAT ( mesq_noop_qlimit_reached ) ;
ret = 0 ;
break ;
case CBSS_AMO_NACKED :
STAT ( mesq_noop_amo_nacked ) ;
ret = MQE_CONGESTION ;
break ;
case CBSS_PUT_NACKED :
STAT ( mesq_noop_put_nacked ) ;
2009-04-02 16:59:10 -07:00
m = mqd - > mq_gpa + ( gru_get_amo_value_head ( cb ) < < 6 ) ;
2008-07-29 22:33:57 -07:00
gru_vstore ( cb , m , gru_get_tri ( mesg ) , XTYPE_CL , 1 , 1 ,
IMA ) ;
if ( gru_wait ( cb ) = = CBS_IDLE )
ret = MQIE_AGAIN ;
else
ret = MQE_UNEXPECTED_CB_ERR ;
break ;
case CBSS_PAGE_OVERFLOW :
default :
BUG ( ) ;
}
}
* mhdr = save_mhdr ;
return ret ;
}
/*
* Handle a gru_mesq full .
*/
2009-04-02 16:59:10 -07:00
static int send_message_queue_full ( void * cb , struct gru_message_queue_desc * mqd ,
void * mesg , int lines )
2008-07-29 22:33:57 -07:00
{
union gru_mesqhead mqh ;
unsigned int limit , head ;
unsigned long avalue ;
2009-04-02 16:59:10 -07:00
int half , qlines ;
2008-07-29 22:33:57 -07:00
/* Determine if switching to first/second half of q */
avalue = gru_get_amo_value ( cb ) ;
head = gru_get_amo_value_head ( cb ) ;
limit = gru_get_amo_value_limit ( cb ) ;
2009-04-02 16:59:10 -07:00
qlines = mqd - > qlines ;
2008-07-29 22:33:57 -07:00
half = ( limit ! = qlines ) ;
if ( half )
mqh = gru_mesq_head ( qlines / 2 + 1 , qlines ) ;
else
mqh = gru_mesq_head ( 2 , qlines / 2 + 1 ) ;
/* Try to get lock for switching head pointer */
2009-04-02 16:59:10 -07:00
gru_gamir ( cb , EOP_IR_CLR , HSTATUS ( mqd - > mq_gpa , half ) , XTYPE_DW , IMA ) ;
2008-07-29 22:33:57 -07:00
if ( gru_wait ( cb ) ! = CBS_IDLE )
goto cberr ;
if ( ! gru_get_amo_value ( cb ) ) {
STAT ( mesq_qf_locked ) ;
return MQE_QUEUE_FULL ;
}
/* Got the lock. Send optional NOP if queue not full, */
if ( head ! = limit ) {
2009-04-02 16:59:10 -07:00
if ( send_noop_message ( cb , mqd , mesg ) ) {
gru_gamir ( cb , EOP_IR_INC , HSTATUS ( mqd - > mq_gpa , half ) ,
2008-07-29 22:33:57 -07:00
XTYPE_DW , IMA ) ;
if ( gru_wait ( cb ) ! = CBS_IDLE )
goto cberr ;
STAT ( mesq_qf_noop_not_full ) ;
return MQIE_AGAIN ;
}
avalue + + ;
}
/* Then flip queuehead to other half of queue. */
2009-04-02 16:59:10 -07:00
gru_gamer ( cb , EOP_ERR_CSWAP , mqd - > mq_gpa , XTYPE_DW , mqh . val , avalue ,
IMA ) ;
2008-07-29 22:33:57 -07:00
if ( gru_wait ( cb ) ! = CBS_IDLE )
goto cberr ;
/* If not successfully in swapping queue head, clear the hstatus lock */
if ( gru_get_amo_value ( cb ) ! = avalue ) {
STAT ( mesq_qf_switch_head_failed ) ;
2009-04-02 16:59:10 -07:00
gru_gamir ( cb , EOP_IR_INC , HSTATUS ( mqd - > mq_gpa , half ) , XTYPE_DW ,
IMA ) ;
2008-07-29 22:33:57 -07:00
if ( gru_wait ( cb ) ! = CBS_IDLE )
goto cberr ;
}
return MQIE_AGAIN ;
cberr :
STAT ( mesq_qf_unexpected_error ) ;
return MQE_UNEXPECTED_CB_ERR ;
}
2009-04-02 16:59:10 -07:00
/*
* Send a cross - partition interrupt to the SSI that contains the target
* message queue . Normally , the interrupt is automatically delivered by hardware
* but some error conditions require explicit delivery .
*/
static void send_message_queue_interrupt ( struct gru_message_queue_desc * mqd )
{
if ( mqd - > interrupt_vector )
uv_hub_send_ipi ( mqd - > interrupt_pnode , mqd - > interrupt_apicid ,
mqd - > interrupt_vector ) ;
}
2009-06-17 16:28:23 -07:00
/*
* Handle a PUT failure . Note : if message was a 2 - line message , one of the
* lines might have successfully have been written . Before sending the
* message , " present " must be cleared in BOTH lines to prevent the receiver
* from prematurely seeing the full message .
*/
static int send_message_put_nacked ( void * cb , struct gru_message_queue_desc * mqd ,
void * mesg , int lines )
{
unsigned long m ;
m = mqd - > mq_gpa + ( gru_get_amo_value_head ( cb ) < < 6 ) ;
if ( lines = = 2 ) {
gru_vset ( cb , m , 0 , XTYPE_CL , lines , 1 , IMA ) ;
if ( gru_wait ( cb ) ! = CBS_IDLE )
return MQE_UNEXPECTED_CB_ERR ;
}
gru_vstore ( cb , m , gru_get_tri ( mesg ) , XTYPE_CL , lines , 1 , IMA ) ;
if ( gru_wait ( cb ) ! = CBS_IDLE )
return MQE_UNEXPECTED_CB_ERR ;
send_message_queue_interrupt ( mqd ) ;
return MQE_OK ;
}
2008-07-29 22:33:57 -07:00
/*
* Handle a gru_mesq failure . Some of these failures are software recoverable
* or retryable .
*/
2009-04-02 16:59:10 -07:00
static int send_message_failure ( void * cb , struct gru_message_queue_desc * mqd ,
void * mesg , int lines )
2008-07-29 22:33:57 -07:00
{
int substatus , ret = 0 ;
substatus = gru_get_cb_message_queue_substatus ( cb ) ;
switch ( substatus ) {
case CBSS_NO_ERROR :
STAT ( mesq_send_unexpected_error ) ;
ret = MQE_UNEXPECTED_CB_ERR ;
break ;
case CBSS_LB_OVERFLOWED :
STAT ( mesq_send_lb_overflow ) ;
ret = MQE_CONGESTION ;
break ;
case CBSS_QLIMIT_REACHED :
STAT ( mesq_send_qlimit_reached ) ;
2009-04-02 16:59:10 -07:00
ret = send_message_queue_full ( cb , mqd , mesg , lines ) ;
2008-07-29 22:33:57 -07:00
break ;
case CBSS_AMO_NACKED :
STAT ( mesq_send_amo_nacked ) ;
ret = MQE_CONGESTION ;
break ;
case CBSS_PUT_NACKED :
STAT ( mesq_send_put_nacked ) ;
2009-06-17 16:28:23 -07:00
ret = send_message_put_nacked ( cb , mqd , mesg , lines ) ;
2008-07-29 22:33:57 -07:00
break ;
default :
BUG ( ) ;
}
return ret ;
}
/*
* Send a message to a message queue
2009-04-02 16:59:10 -07:00
* mqd message queue descriptor
2008-07-29 22:33:57 -07:00
* mesg message . ust be vaddr within a GSEG
* bytes message size ( < = 2 CL )
*/
2009-04-02 16:59:10 -07:00
int gru_send_message_gpa ( struct gru_message_queue_desc * mqd , void * mesg ,
unsigned int bytes )
2008-07-29 22:33:57 -07:00
{
struct message_header * mhdr ;
void * cb ;
void * dsr ;
int istatus , clines , ret ;
STAT ( mesq_send ) ;
BUG_ON ( bytes < sizeof ( int ) | | bytes > 2 * GRU_CACHE_LINE_BYTES ) ;
2008-10-15 22:01:27 -07:00
clines = DIV_ROUND_UP ( bytes , GRU_CACHE_LINE_BYTES ) ;
2008-07-29 22:33:57 -07:00
if ( gru_get_cpu_resources ( bytes , & cb , & dsr ) )
return MQE_BUG_NO_RESOURCES ;
memcpy ( dsr , mesg , bytes ) ;
mhdr = dsr ;
mhdr - > present = MQS_FULL ;
mhdr - > lines = clines ;
if ( clines = = 2 ) {
mhdr - > present2 = get_present2 ( mhdr ) ;
restore_present2 ( mhdr , MQS_FULL ) ;
}
do {
ret = MQE_OK ;
2009-04-02 16:59:10 -07:00
gru_mesq ( cb , mqd - > mq_gpa , gru_get_tri ( mhdr ) , clines , IMA ) ;
2008-07-29 22:33:57 -07:00
istatus = gru_wait ( cb ) ;
if ( istatus ! = CBS_IDLE )
2009-04-02 16:59:10 -07:00
ret = send_message_failure ( cb , mqd , dsr , clines ) ;
2008-07-29 22:33:57 -07:00
} while ( ret = = MQIE_AGAIN ) ;
gru_free_cpu_resources ( cb , dsr ) ;
if ( ret )
STAT ( mesq_send_failed ) ;
return ret ;
}
EXPORT_SYMBOL_GPL ( gru_send_message_gpa ) ;
/*
* Advance the receive pointer for the queue to the next message .
*/
2009-04-02 16:59:10 -07:00
void gru_free_message ( struct gru_message_queue_desc * mqd , void * mesg )
2008-07-29 22:33:57 -07:00
{
2009-04-02 16:59:10 -07:00
struct message_queue * mq = mqd - > mq ;
2008-07-29 22:33:57 -07:00
struct message_header * mhdr = mq - > next ;
void * next , * pnext ;
int half = - 1 ;
int lines = mhdr - > lines ;
if ( lines = = 2 )
restore_present2 ( mhdr , MQS_EMPTY ) ;
mhdr - > present = MQS_EMPTY ;
pnext = mq - > next ;
next = pnext + GRU_CACHE_LINE_BYTES * lines ;
if ( next = = mq - > limit ) {
next = mq - > start ;
half = 1 ;
} else if ( pnext < mq - > start2 & & next > = mq - > start2 ) {
half = 0 ;
}
if ( half > = 0 )
mq - > hstatus [ half ] = 1 ;
mq - > next = next ;
}
EXPORT_SYMBOL_GPL ( gru_free_message ) ;
/*
* Get next message from message queue . Return NULL if no message
* present . User must call next_message ( ) to move to next message .
* rmq message queue
*/
2009-04-02 16:59:10 -07:00
void * gru_get_next_message ( struct gru_message_queue_desc * mqd )
2008-07-29 22:33:57 -07:00
{
2009-04-02 16:59:10 -07:00
struct message_queue * mq = mqd - > mq ;
2008-07-29 22:33:57 -07:00
struct message_header * mhdr = mq - > next ;
int present = mhdr - > present ;
/* skip NOOP messages */
STAT ( mesq_receive ) ;
while ( present = = MQS_NOOP ) {
2009-04-02 16:59:10 -07:00
gru_free_message ( mqd , mhdr ) ;
2008-07-29 22:33:57 -07:00
mhdr = mq - > next ;
present = mhdr - > present ;
}
/* Wait for both halves of 2 line messages */
if ( present = = MQS_FULL & & mhdr - > lines = = 2 & &
get_present2 ( mhdr ) = = MQS_EMPTY )
present = MQS_EMPTY ;
if ( ! present ) {
STAT ( mesq_receive_none ) ;
return NULL ;
}
if ( mhdr - > lines = = 2 )
restore_present2 ( mhdr , mhdr - > present2 ) ;
return mhdr ;
}
EXPORT_SYMBOL_GPL ( gru_get_next_message ) ;
/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
/*
* Copy a block of data using the GRU resources
*/
int gru_copy_gpa ( unsigned long dest_gpa , unsigned long src_gpa ,
unsigned int bytes )
{
void * cb ;
void * dsr ;
int ret ;
STAT ( copy_gpa ) ;
if ( gru_get_cpu_resources ( GRU_NUM_KERNEL_DSR_BYTES , & cb , & dsr ) )
return MQE_BUG_NO_RESOURCES ;
gru_bcopy ( cb , src_gpa , dest_gpa , gru_get_tri ( dsr ) ,
2009-04-02 16:59:10 -07:00
XTYPE_B , bytes , GRU_NUM_KERNEL_DSR_CL , IMA ) ;
2008-07-29 22:33:57 -07:00
ret = gru_wait ( cb ) ;
gru_free_cpu_resources ( cb , dsr ) ;
return ret ;
}
EXPORT_SYMBOL_GPL ( gru_copy_gpa ) ;
/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
/* Temp - will delete after we gain confidence in the GRU */
2009-06-17 16:28:26 -07:00
static int quicktest0 ( unsigned long arg )
2008-07-29 22:33:57 -07:00
{
2009-06-17 16:28:22 -07:00
unsigned long word0 ;
unsigned long word1 ;
2008-07-29 22:33:57 -07:00
void * cb ;
2009-06-17 16:28:22 -07:00
void * dsr ;
2008-07-29 22:33:57 -07:00
unsigned long * p ;
2009-06-17 16:28:26 -07:00
int ret = - EIO ;
2008-07-29 22:33:57 -07:00
2009-06-17 16:28:22 -07:00
if ( gru_get_cpu_resources ( GRU_CACHE_LINE_BYTES , & cb , & dsr ) )
return MQE_BUG_NO_RESOURCES ;
p = dsr ;
2008-07-29 22:33:57 -07:00
word0 = MAGIC ;
2009-06-17 16:28:22 -07:00
word1 = 0 ;
2008-07-29 22:33:57 -07:00
2009-06-17 16:28:22 -07:00
gru_vload ( cb , uv_gpa ( & word0 ) , gru_get_tri ( dsr ) , XTYPE_DW , 1 , 1 , IMA ) ;
2009-06-17 16:28:26 -07:00
if ( gru_wait ( cb ) ! = CBS_IDLE ) {
printk ( KERN_DEBUG " GRU quicktest0: CBR failure 1 \n " ) ;
goto done ;
}
2008-07-29 22:33:57 -07:00
2009-06-17 16:28:26 -07:00
if ( * p ! = MAGIC ) {
printk ( KERN_DEBUG " GRU: quicktest0 bad magic 0x%lx \n " , * p ) ;
goto done ;
}
2009-06-17 16:28:22 -07:00
gru_vstore ( cb , uv_gpa ( & word1 ) , gru_get_tri ( dsr ) , XTYPE_DW , 1 , 1 , IMA ) ;
2009-06-17 16:28:26 -07:00
if ( gru_wait ( cb ) ! = CBS_IDLE ) {
printk ( KERN_DEBUG " GRU quicktest0: CBR failure 2 \n " ) ;
goto done ;
}
2008-07-29 22:33:57 -07:00
2009-06-17 16:28:22 -07:00
if ( word0 ! = word1 | | word1 ! = MAGIC ) {
2009-06-17 16:28:26 -07:00
printk ( KERN_DEBUG
" GRU quicktest0 err: found 0x%lx, expected 0x%lx \n " ,
2009-06-17 16:28:22 -07:00
word1 , MAGIC ) ;
2009-06-17 16:28:26 -07:00
goto done ;
2008-07-29 22:33:57 -07:00
}
2009-06-17 16:28:26 -07:00
ret = 0 ;
2008-07-29 22:33:57 -07:00
2009-06-17 16:28:26 -07:00
done :
gru_free_cpu_resources ( cb , dsr ) ;
return ret ;
2008-07-29 22:33:57 -07:00
}
2009-06-17 16:28:26 -07:00
# define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
static int quicktest1 ( unsigned long arg )
{
struct gru_message_queue_desc mqd ;
void * p , * mq ;
unsigned long * dw ;
int i , ret = - EIO ;
char mes [ GRU_CACHE_LINE_BYTES ] , * m ;
/* Need 1K cacheline aligned that does not cross page boundary */
p = kmalloc ( 4096 , 0 ) ;
mq = ALIGNUP ( p , 1024 ) ;
memset ( mes , 0xee , sizeof ( mes ) ) ;
dw = mq ;
gru_create_message_queue ( & mqd , mq , 8 * GRU_CACHE_LINE_BYTES , 0 , 0 , 0 ) ;
for ( i = 0 ; i < 6 ; i + + ) {
mes [ 8 ] = i ;
do {
ret = gru_send_message_gpa ( & mqd , mes , sizeof ( mes ) ) ;
} while ( ret = = MQE_CONGESTION ) ;
if ( ret )
break ;
}
if ( ret ! = MQE_QUEUE_FULL | | i ! = 4 )
goto done ;
for ( i = 0 ; i < 6 ; i + + ) {
m = gru_get_next_message ( & mqd ) ;
if ( ! m | | m [ 8 ] ! = i )
break ;
gru_free_message ( & mqd , m ) ;
}
ret = ( i = = 4 ) ? 0 : - EIO ;
done :
kfree ( p ) ;
return ret ;
}
static int quicktest2 ( unsigned long arg )
{
static DECLARE_COMPLETION ( cmp ) ;
unsigned long han ;
int blade_id = 0 ;
int numcb = 4 ;
int ret = 0 ;
unsigned long * buf ;
void * cb0 , * cb ;
int i , k , istatus , bytes ;
bytes = numcb * 4 * 8 ;
buf = kmalloc ( bytes , GFP_KERNEL ) ;
if ( ! buf )
return - ENOMEM ;
ret = - EBUSY ;
han = gru_reserve_async_resources ( blade_id , numcb , 0 , & cmp ) ;
if ( ! han )
goto done ;
gru_lock_async_resource ( han , & cb0 , NULL ) ;
memset ( buf , 0xee , bytes ) ;
for ( i = 0 ; i < numcb ; i + + )
gru_vset ( cb0 + i * GRU_HANDLE_STRIDE , uv_gpa ( & buf [ i * 4 ] ) , 0 ,
XTYPE_DW , 4 , 1 , IMA_INTERRUPT ) ;
ret = 0 ;
for ( k = 0 ; k < numcb ; k + + ) {
gru_wait_async_cbr ( han ) ;
for ( i = 0 ; i < numcb ; i + + ) {
cb = cb0 + i * GRU_HANDLE_STRIDE ;
istatus = gru_check_status ( cb ) ;
if ( istatus = = CBS_ACTIVE )
continue ;
if ( istatus = = CBS_EXCEPTION )
ret = - EFAULT ;
else if ( buf [ i ] | | buf [ i + 1 ] | | buf [ i + 2 ] | |
buf [ i + 3 ] )
ret = - EIO ;
}
}
BUG_ON ( cmp . done ) ;
gru_unlock_async_resource ( han ) ;
gru_release_async_resources ( han ) ;
done :
kfree ( buf ) ;
return ret ;
}
/*
* Debugging only . User hook for various kernel tests
* of driver & gru .
*/
int gru_ktest ( unsigned long arg )
{
int ret = - EINVAL ;
switch ( arg & 0xff ) {
case 0 :
ret = quicktest0 ( arg ) ;
break ;
case 1 :
ret = quicktest1 ( arg ) ;
break ;
case 2 :
ret = quicktest2 ( arg ) ;
break ;
}
return ret ;
}
2008-07-29 22:33:57 -07:00
int gru_kservices_init ( struct gru_state * gru )
{
struct gru_blade_state * bs ;
2009-06-17 16:28:22 -07:00
2008-07-29 22:33:57 -07:00
bs = gru - > gs_blade ;
2009-06-17 16:28:22 -07:00
if ( gru ! = & bs - > bs_grus [ 0 ] )
2008-07-29 22:33:57 -07:00
return 0 ;
2009-06-17 16:28:22 -07:00
init_rwsem ( & bs - > bs_kgts_sema ) ;
2008-07-29 22:33:57 -07:00
return 0 ;
}
2009-04-02 16:59:11 -07:00
void gru_kservices_exit ( struct gru_state * gru )
{
struct gru_blade_state * bs ;
2009-06-17 16:28:22 -07:00
struct gru_thread_state * kgts ;
2009-04-02 16:59:11 -07:00
bs = gru - > gs_blade ;
2009-06-17 16:28:22 -07:00
if ( gru ! = & bs - > bs_grus [ 0 ] )
2009-04-02 16:59:11 -07:00
return ;
2009-06-17 16:28:22 -07:00
kgts = bs - > bs_kgts ;
if ( kgts & & kgts - > ts_gru )
gru_unload_context ( kgts , 0 ) ;
kfree ( kgts ) ;
2009-04-02 16:59:11 -07:00
}