2008-07-29 22:33:57 -07:00
/*
* SN Platform GRU Driver
*
* KERNEL SERVICES THAT USE THE GRU
*
* Copyright ( c ) 2008 Silicon Graphics , Inc . All Rights Reserved .
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
# include <linux/kernel.h>
# include <linux/errno.h>
# include <linux/slab.h>
# include <linux/mm.h>
# include <linux/smp_lock.h>
# include <linux/spinlock.h>
# include <linux/device.h>
# include <linux/miscdevice.h>
# include <linux/proc_fs.h>
# include <linux/interrupt.h>
# include <linux/uaccess.h>
# include "gru.h"
# include "grulib.h"
# include "grutables.h"
# include "grukservices.h"
# include "gru_instructions.h"
# include <asm/uv/uv_hub.h>
/*
* Kernel GRU Usage
*
* The following is an interim algorithm for management of kernel GRU
* resources . This will likely be replaced when we better understand the
* kernel / user requirements .
*
* At boot time , the kernel permanently reserves a fixed number of
* CBRs / DSRs for each cpu to use . The resources are all taken from
* the GRU chiplet 1 on the blade . This leaves the full set of resources
* of chiplet 0 available to be allocated to a single user .
*/
/* Blade percpu resources PERMANENTLY reserved for kernel use */
# define GRU_NUM_KERNEL_CBR 1
# define GRU_NUM_KERNEL_DSR_BYTES 256
# define KERNEL_CTXNUM 15
/* GRU instruction attributes for all instructions */
# define IMA IMA_CB_DELAY
/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
# define __gru_cacheline_aligned__ \
__attribute__ ( ( __aligned__ ( GRU_CACHE_LINE_BYTES ) ) )
# define MAGIC 0x1234567887654321UL
/* Default retry count for GRU errors on kernel instructions */
# define EXCEPTION_RETRY_LIMIT 3
/* Status of message queue sections */
# define MQS_EMPTY 0
# define MQS_FULL 1
# define MQS_NOOP 2
/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
/* optimized for x86_64 */
struct message_queue {
union gru_mesqhead head __gru_cacheline_aligned__ ; /* CL 0 */
int qlines ; /* DW 1 */
long hstatus [ 2 ] ;
void * next __gru_cacheline_aligned__ ; /* CL 1 */
void * limit ;
void * start ;
void * start2 ;
char data ____cacheline_aligned ; /* CL 2 */
} ;
/* First word in every message - used by mesq interface */
struct message_header {
char present ;
char present2 ;
char lines ;
char fill ;
} ;
# define QLINES(mq) ((mq) + offsetof(struct message_queue, qlines))
# define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
static int gru_get_cpu_resources ( int dsr_bytes , void * * cb , void * * dsr )
{
struct gru_blade_state * bs ;
int lcpu ;
BUG_ON ( dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES ) ;
preempt_disable ( ) ;
bs = gru_base [ uv_numa_blade_id ( ) ] ;
lcpu = uv_blade_processor_id ( ) ;
* cb = bs - > kernel_cb + lcpu * GRU_HANDLE_STRIDE ;
* dsr = bs - > kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES ;
return 0 ;
}
static void gru_free_cpu_resources ( void * cb , void * dsr )
{
preempt_enable ( ) ;
}
int gru_get_cb_exception_detail ( void * cb ,
struct control_block_extended_exc_detail * excdet )
{
struct gru_control_block_extended * cbe ;
cbe = get_cbe ( GRUBASE ( cb ) , get_cb_number ( cb ) ) ;
2008-10-15 22:05:13 -07:00
prefetchw ( cbe ) ; /* Harmless on hardware, required for emulator */
2008-07-29 22:33:57 -07:00
excdet - > opc = cbe - > opccpy ;
excdet - > exopc = cbe - > exopccpy ;
excdet - > ecause = cbe - > ecause ;
excdet - > exceptdet0 = cbe - > idef1upd ;
excdet - > exceptdet1 = cbe - > idef3upd ;
return 0 ;
}
char * gru_get_cb_exception_detail_str ( int ret , void * cb ,
char * buf , int size )
{
struct gru_control_block_status * gen = ( void * ) cb ;
struct control_block_extended_exc_detail excdet ;
if ( ret > 0 & & gen - > istatus = = CBS_EXCEPTION ) {
gru_get_cb_exception_detail ( cb , & excdet ) ;
snprintf ( buf , size ,
" GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x, "
" excdet0 0x%lx, excdet1 0x%x " ,
gen , excdet . opc , excdet . exopc , excdet . ecause ,
excdet . exceptdet0 , excdet . exceptdet1 ) ;
} else {
snprintf ( buf , size , " No exception " ) ;
}
return buf ;
}
static int gru_wait_idle_or_exception ( struct gru_control_block_status * gen )
{
while ( gen - > istatus > = CBS_ACTIVE ) {
cpu_relax ( ) ;
barrier ( ) ;
}
return gen - > istatus ;
}
static int gru_retry_exception ( void * cb )
{
struct gru_control_block_status * gen = ( void * ) cb ;
struct control_block_extended_exc_detail excdet ;
int retry = EXCEPTION_RETRY_LIMIT ;
while ( 1 ) {
if ( gru_get_cb_message_queue_substatus ( cb ) )
break ;
if ( gru_wait_idle_or_exception ( gen ) = = CBS_IDLE )
return CBS_IDLE ;
gru_get_cb_exception_detail ( cb , & excdet ) ;
if ( excdet . ecause & ~ EXCEPTION_RETRY_BITS )
break ;
if ( retry - - = = 0 )
break ;
gen - > icmd = 1 ;
gru_flush_cache ( gen ) ;
}
return CBS_EXCEPTION ;
}
int gru_check_status_proc ( void * cb )
{
struct gru_control_block_status * gen = ( void * ) cb ;
int ret ;
ret = gen - > istatus ;
if ( ret ! = CBS_EXCEPTION )
return ret ;
return gru_retry_exception ( cb ) ;
}
int gru_wait_proc ( void * cb )
{
struct gru_control_block_status * gen = ( void * ) cb ;
int ret ;
ret = gru_wait_idle_or_exception ( gen ) ;
if ( ret = = CBS_EXCEPTION )
ret = gru_retry_exception ( cb ) ;
return ret ;
}
void gru_abort ( int ret , void * cb , char * str )
{
char buf [ GRU_EXC_STR_SIZE ] ;
panic ( " GRU FATAL ERROR: %s - %s \n " , str ,
gru_get_cb_exception_detail_str ( ret , cb , buf , sizeof ( buf ) ) ) ;
}
void gru_wait_abort_proc ( void * cb )
{
int ret ;
ret = gru_wait_proc ( cb ) ;
if ( ret )
gru_abort ( ret , cb , " gru_wait_abort " ) ;
}
/*------------------------------ MESSAGE QUEUES -----------------------------*/
/* Internal status . These are NOT returned to the user. */
# define MQIE_AGAIN -1 /* try again */
/*
* Save / restore the " present " flag that is in the second line of 2 - line
* messages
*/
static inline int get_present2 ( void * p )
{
struct message_header * mhdr = p + GRU_CACHE_LINE_BYTES ;
return mhdr - > present ;
}
static inline void restore_present2 ( void * p , int val )
{
struct message_header * mhdr = p + GRU_CACHE_LINE_BYTES ;
mhdr - > present = val ;
}
/*
* Create a message queue .
* qlines - message queue size in cache lines . Includes 2 - line header .
*/
int gru_create_message_queue ( void * p , unsigned int bytes )
{
struct message_queue * mq = p ;
unsigned int qlines ;
qlines = bytes / GRU_CACHE_LINE_BYTES - 2 ;
memset ( mq , 0 , bytes ) ;
mq - > start = & mq - > data ;
mq - > start2 = & mq - > data + ( qlines / 2 - 1 ) * GRU_CACHE_LINE_BYTES ;
mq - > next = & mq - > data ;
mq - > limit = & mq - > data + ( qlines - 2 ) * GRU_CACHE_LINE_BYTES ;
mq - > qlines = qlines ;
mq - > hstatus [ 0 ] = 0 ;
mq - > hstatus [ 1 ] = 1 ;
mq - > head = gru_mesq_head ( 2 , qlines / 2 + 1 ) ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( gru_create_message_queue ) ;
/*
* Send a NOOP message to a message queue
* Returns :
* 0 - if queue is full after the send . This is the normal case
* but various races can change this .
* - 1 - if mesq sent successfully but queue not full
* > 0 - unexpected error . MQE_xxx returned
*/
static int send_noop_message ( void * cb ,
unsigned long mq , void * mesg )
{
const struct message_header noop_header = {
. present = MQS_NOOP , . lines = 1 } ;
unsigned long m ;
int substatus , ret ;
struct message_header save_mhdr , * mhdr = mesg ;
STAT ( mesq_noop ) ;
save_mhdr = * mhdr ;
* mhdr = noop_header ;
gru_mesq ( cb , mq , gru_get_tri ( mhdr ) , 1 , IMA ) ;
ret = gru_wait ( cb ) ;
if ( ret ) {
substatus = gru_get_cb_message_queue_substatus ( cb ) ;
switch ( substatus ) {
case CBSS_NO_ERROR :
STAT ( mesq_noop_unexpected_error ) ;
ret = MQE_UNEXPECTED_CB_ERR ;
break ;
case CBSS_LB_OVERFLOWED :
STAT ( mesq_noop_lb_overflow ) ;
ret = MQE_CONGESTION ;
break ;
case CBSS_QLIMIT_REACHED :
STAT ( mesq_noop_qlimit_reached ) ;
ret = 0 ;
break ;
case CBSS_AMO_NACKED :
STAT ( mesq_noop_amo_nacked ) ;
ret = MQE_CONGESTION ;
break ;
case CBSS_PUT_NACKED :
STAT ( mesq_noop_put_nacked ) ;
m = mq + ( gru_get_amo_value_head ( cb ) < < 6 ) ;
gru_vstore ( cb , m , gru_get_tri ( mesg ) , XTYPE_CL , 1 , 1 ,
IMA ) ;
if ( gru_wait ( cb ) = = CBS_IDLE )
ret = MQIE_AGAIN ;
else
ret = MQE_UNEXPECTED_CB_ERR ;
break ;
case CBSS_PAGE_OVERFLOW :
default :
BUG ( ) ;
}
}
* mhdr = save_mhdr ;
return ret ;
}
/*
* Handle a gru_mesq full .
*/
static int send_message_queue_full ( void * cb ,
unsigned long mq , void * mesg , int lines )
{
union gru_mesqhead mqh ;
unsigned int limit , head ;
unsigned long avalue ;
int half , qlines , save ;
/* Determine if switching to first/second half of q */
avalue = gru_get_amo_value ( cb ) ;
head = gru_get_amo_value_head ( cb ) ;
limit = gru_get_amo_value_limit ( cb ) ;
/*
* Fetch " qlines " from the queue header . Since the queue may be
* in memory that can ' t be accessed using socket addresses , use
* the GRU to access the data . Use DSR space from the message .
*/
save = * ( int * ) mesg ;
gru_vload ( cb , QLINES ( mq ) , gru_get_tri ( mesg ) , XTYPE_W , 1 , 1 , IMA ) ;
if ( gru_wait ( cb ) ! = CBS_IDLE )
goto cberr ;
qlines = * ( int * ) mesg ;
* ( int * ) mesg = save ;
half = ( limit ! = qlines ) ;
if ( half )
mqh = gru_mesq_head ( qlines / 2 + 1 , qlines ) ;
else
mqh = gru_mesq_head ( 2 , qlines / 2 + 1 ) ;
/* Try to get lock for switching head pointer */
gru_gamir ( cb , EOP_IR_CLR , HSTATUS ( mq , half ) , XTYPE_DW , IMA ) ;
if ( gru_wait ( cb ) ! = CBS_IDLE )
goto cberr ;
if ( ! gru_get_amo_value ( cb ) ) {
STAT ( mesq_qf_locked ) ;
return MQE_QUEUE_FULL ;
}
/* Got the lock. Send optional NOP if queue not full, */
if ( head ! = limit ) {
if ( send_noop_message ( cb , mq , mesg ) ) {
gru_gamir ( cb , EOP_IR_INC , HSTATUS ( mq , half ) ,
XTYPE_DW , IMA ) ;
if ( gru_wait ( cb ) ! = CBS_IDLE )
goto cberr ;
STAT ( mesq_qf_noop_not_full ) ;
return MQIE_AGAIN ;
}
avalue + + ;
}
/* Then flip queuehead to other half of queue. */
gru_gamer ( cb , EOP_ERR_CSWAP , mq , XTYPE_DW , mqh . val , avalue , IMA ) ;
if ( gru_wait ( cb ) ! = CBS_IDLE )
goto cberr ;
/* If not successfully in swapping queue head, clear the hstatus lock */
if ( gru_get_amo_value ( cb ) ! = avalue ) {
STAT ( mesq_qf_switch_head_failed ) ;
gru_gamir ( cb , EOP_IR_INC , HSTATUS ( mq , half ) , XTYPE_DW , IMA ) ;
if ( gru_wait ( cb ) ! = CBS_IDLE )
goto cberr ;
}
return MQIE_AGAIN ;
cberr :
STAT ( mesq_qf_unexpected_error ) ;
return MQE_UNEXPECTED_CB_ERR ;
}
/*
* Handle a gru_mesq failure . Some of these failures are software recoverable
* or retryable .
*/
static int send_message_failure ( void * cb ,
unsigned long mq ,
void * mesg ,
int lines )
{
int substatus , ret = 0 ;
unsigned long m ;
substatus = gru_get_cb_message_queue_substatus ( cb ) ;
switch ( substatus ) {
case CBSS_NO_ERROR :
STAT ( mesq_send_unexpected_error ) ;
ret = MQE_UNEXPECTED_CB_ERR ;
break ;
case CBSS_LB_OVERFLOWED :
STAT ( mesq_send_lb_overflow ) ;
ret = MQE_CONGESTION ;
break ;
case CBSS_QLIMIT_REACHED :
STAT ( mesq_send_qlimit_reached ) ;
ret = send_message_queue_full ( cb , mq , mesg , lines ) ;
break ;
case CBSS_AMO_NACKED :
STAT ( mesq_send_amo_nacked ) ;
ret = MQE_CONGESTION ;
break ;
case CBSS_PUT_NACKED :
STAT ( mesq_send_put_nacked ) ;
m = mq + ( gru_get_amo_value_head ( cb ) < < 6 ) ;
gru_vstore ( cb , m , gru_get_tri ( mesg ) , XTYPE_CL , lines , 1 , IMA ) ;
if ( gru_wait ( cb ) = = CBS_IDLE )
ret = MQE_OK ;
else
ret = MQE_UNEXPECTED_CB_ERR ;
break ;
default :
BUG ( ) ;
}
return ret ;
}
/*
* Send a message to a message queue
* cb GRU control block to use to send message
* mq message queue
* mesg message . ust be vaddr within a GSEG
* bytes message size ( < = 2 CL )
*/
int gru_send_message_gpa ( unsigned long mq , void * mesg , unsigned int bytes )
{
struct message_header * mhdr ;
void * cb ;
void * dsr ;
int istatus , clines , ret ;
STAT ( mesq_send ) ;
BUG_ON ( bytes < sizeof ( int ) | | bytes > 2 * GRU_CACHE_LINE_BYTES ) ;
2008-10-15 22:01:27 -07:00
clines = DIV_ROUND_UP ( bytes , GRU_CACHE_LINE_BYTES ) ;
2008-07-29 22:33:57 -07:00
if ( gru_get_cpu_resources ( bytes , & cb , & dsr ) )
return MQE_BUG_NO_RESOURCES ;
memcpy ( dsr , mesg , bytes ) ;
mhdr = dsr ;
mhdr - > present = MQS_FULL ;
mhdr - > lines = clines ;
if ( clines = = 2 ) {
mhdr - > present2 = get_present2 ( mhdr ) ;
restore_present2 ( mhdr , MQS_FULL ) ;
}
do {
ret = MQE_OK ;
gru_mesq ( cb , mq , gru_get_tri ( mhdr ) , clines , IMA ) ;
istatus = gru_wait ( cb ) ;
if ( istatus ! = CBS_IDLE )
ret = send_message_failure ( cb , mq , dsr , clines ) ;
} while ( ret = = MQIE_AGAIN ) ;
gru_free_cpu_resources ( cb , dsr ) ;
if ( ret )
STAT ( mesq_send_failed ) ;
return ret ;
}
EXPORT_SYMBOL_GPL ( gru_send_message_gpa ) ;
/*
* Advance the receive pointer for the queue to the next message .
*/
void gru_free_message ( void * rmq , void * mesg )
{
struct message_queue * mq = rmq ;
struct message_header * mhdr = mq - > next ;
void * next , * pnext ;
int half = - 1 ;
int lines = mhdr - > lines ;
if ( lines = = 2 )
restore_present2 ( mhdr , MQS_EMPTY ) ;
mhdr - > present = MQS_EMPTY ;
pnext = mq - > next ;
next = pnext + GRU_CACHE_LINE_BYTES * lines ;
if ( next = = mq - > limit ) {
next = mq - > start ;
half = 1 ;
} else if ( pnext < mq - > start2 & & next > = mq - > start2 ) {
half = 0 ;
}
if ( half > = 0 )
mq - > hstatus [ half ] = 1 ;
mq - > next = next ;
}
EXPORT_SYMBOL_GPL ( gru_free_message ) ;
/*
* Get next message from message queue . Return NULL if no message
* present . User must call next_message ( ) to move to next message .
* rmq message queue
*/
void * gru_get_next_message ( void * rmq )
{
struct message_queue * mq = rmq ;
struct message_header * mhdr = mq - > next ;
int present = mhdr - > present ;
/* skip NOOP messages */
STAT ( mesq_receive ) ;
while ( present = = MQS_NOOP ) {
gru_free_message ( rmq , mhdr ) ;
mhdr = mq - > next ;
present = mhdr - > present ;
}
/* Wait for both halves of 2 line messages */
if ( present = = MQS_FULL & & mhdr - > lines = = 2 & &
get_present2 ( mhdr ) = = MQS_EMPTY )
present = MQS_EMPTY ;
if ( ! present ) {
STAT ( mesq_receive_none ) ;
return NULL ;
}
if ( mhdr - > lines = = 2 )
restore_present2 ( mhdr , mhdr - > present2 ) ;
return mhdr ;
}
EXPORT_SYMBOL_GPL ( gru_get_next_message ) ;
/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
/*
* Copy a block of data using the GRU resources
*/
int gru_copy_gpa ( unsigned long dest_gpa , unsigned long src_gpa ,
unsigned int bytes )
{
void * cb ;
void * dsr ;
int ret ;
STAT ( copy_gpa ) ;
if ( gru_get_cpu_resources ( GRU_NUM_KERNEL_DSR_BYTES , & cb , & dsr ) )
return MQE_BUG_NO_RESOURCES ;
gru_bcopy ( cb , src_gpa , dest_gpa , gru_get_tri ( dsr ) ,
XTYPE_B , bytes , GRU_NUM_KERNEL_DSR_BYTES , IMA ) ;
ret = gru_wait ( cb ) ;
gru_free_cpu_resources ( cb , dsr ) ;
return ret ;
}
EXPORT_SYMBOL_GPL ( gru_copy_gpa ) ;
/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
/* Temp - will delete after we gain confidence in the GRU */
static __cacheline_aligned unsigned long word0 ;
static __cacheline_aligned unsigned long word1 ;
static int quicktest ( struct gru_state * gru )
{
void * cb ;
void * ds ;
unsigned long * p ;
cb = get_gseg_base_address_cb ( gru - > gs_gru_base_vaddr , KERNEL_CTXNUM , 0 ) ;
ds = get_gseg_base_address_ds ( gru - > gs_gru_base_vaddr , KERNEL_CTXNUM , 0 ) ;
p = ds ;
word0 = MAGIC ;
gru_vload ( cb , uv_gpa ( & word0 ) , 0 , XTYPE_DW , 1 , 1 , IMA ) ;
if ( gru_wait ( cb ) ! = CBS_IDLE )
BUG ( ) ;
if ( * ( unsigned long * ) ds ! = MAGIC )
BUG ( ) ;
gru_vstore ( cb , uv_gpa ( & word1 ) , 0 , XTYPE_DW , 1 , 1 , IMA ) ;
if ( gru_wait ( cb ) ! = CBS_IDLE )
BUG ( ) ;
if ( word0 ! = word1 | | word0 ! = MAGIC ) {
printk
( " GRU quicktest err: gru %d, found 0x%lx, expected 0x%lx \n " ,
gru - > gs_gid , word1 , MAGIC ) ;
BUG ( ) ; /* ZZZ should not be fatal */
}
return 0 ;
}
int gru_kservices_init ( struct gru_state * gru )
{
struct gru_blade_state * bs ;
struct gru_context_configuration_handle * cch ;
unsigned long cbr_map , dsr_map ;
int err , num , cpus_possible ;
/*
* Currently , resources are reserved ONLY on the second chiplet
* on each blade . This leaves ALL resources on chiplet 0 available
* for user code .
*/
bs = gru - > gs_blade ;
if ( gru ! = & bs - > bs_grus [ 1 ] )
return 0 ;
cpus_possible = uv_blade_nr_possible_cpus ( gru - > gs_blade_id ) ;
num = GRU_NUM_KERNEL_CBR * cpus_possible ;
2008-07-29 22:34:02 -07:00
cbr_map = gru_reserve_cb_resources ( gru , GRU_CB_COUNT_TO_AU ( num ) , NULL ) ;
2008-07-29 22:33:57 -07:00
gru - > gs_reserved_cbrs + = num ;
num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible ;
2008-07-29 22:34:02 -07:00
dsr_map = gru_reserve_ds_resources ( gru , GRU_DS_BYTES_TO_AU ( num ) , NULL ) ;
2008-07-29 22:33:57 -07:00
gru - > gs_reserved_dsr_bytes + = num ;
gru - > gs_active_contexts + + ;
__set_bit ( KERNEL_CTXNUM , & gru - > gs_context_map ) ;
cch = get_cch ( gru - > gs_gru_base_vaddr , KERNEL_CTXNUM ) ;
bs - > kernel_cb = get_gseg_base_address_cb ( gru - > gs_gru_base_vaddr ,
KERNEL_CTXNUM , 0 ) ;
bs - > kernel_dsr = get_gseg_base_address_ds ( gru - > gs_gru_base_vaddr ,
KERNEL_CTXNUM , 0 ) ;
lock_cch_handle ( cch ) ;
cch - > tfm_fault_bit_enable = 0 ;
cch - > tlb_int_enable = 0 ;
cch - > tfm_done_bit_enable = 0 ;
cch - > unmap_enable = 1 ;
err = cch_allocate ( cch , 0 , cbr_map , dsr_map ) ;
if ( err ) {
gru_dbg ( grudev ,
" Unable to allocate kernel CCH: gru %d, err %d \n " ,
gru - > gs_gid , err ) ;
BUG ( ) ;
}
if ( cch_start ( cch ) ) {
gru_dbg ( grudev , " Unable to start kernel CCH: gru %d, err %d \n " ,
gru - > gs_gid , err ) ;
BUG ( ) ;
}
unlock_cch_handle ( cch ) ;
2008-07-29 22:34:02 -07:00
if ( gru_options & GRU_QUICKLOOK )
2008-07-29 22:33:57 -07:00
quicktest ( gru ) ;
return 0 ;
}