2005-04-16 15:20:36 -07:00
/*
* File : mca . c
* Purpose : Generic MCA handling layer
*
* Copyright ( C ) 2003 Hewlett - Packard Co
* David Mosberger - Tang < davidm @ hpl . hp . com >
*
* Copyright ( C ) 2002 Dell Inc .
2008-01-07 10:11:57 +09:00
* Copyright ( C ) Matt Domsch < Matt_Domsch @ dell . com >
2005-04-16 15:20:36 -07:00
*
* Copyright ( C ) 2002 Intel
2008-01-07 10:11:57 +09:00
* Copyright ( C ) Jenna Hall < jenna . s . hall @ intel . com >
2005-04-16 15:20:36 -07:00
*
* Copyright ( C ) 2001 Intel
2008-01-07 10:11:57 +09:00
* Copyright ( C ) Fred Lewis < frederick . v . lewis @ intel . com >
2005-04-16 15:20:36 -07:00
*
* Copyright ( C ) 2000 Intel
2008-01-07 10:11:57 +09:00
* Copyright ( C ) Chuck Fleckenstein < cfleck @ co . intel . com >
2005-04-16 15:20:36 -07:00
*
2008-02-05 17:12:32 -06:00
* Copyright ( C ) 1999 , 2004 - 2008 Silicon Graphics , Inc .
2008-01-07 10:11:57 +09:00
* Copyright ( C ) Vijay Chander < vijay @ engr . sgi . com >
2005-04-16 15:20:36 -07:00
*
2008-01-07 10:11:57 +09:00
* Copyright ( C ) 2006 FUJITSU LIMITED
* Copyright ( C ) Hidetoshi Seto < seto . hidetoshi @ jp . fujitsu . com >
2005-04-16 15:20:36 -07:00
*
2008-01-07 10:11:57 +09:00
* 2000 - 03 - 29 Chuck Fleckenstein < cfleck @ co . intel . com >
* Fixed PAL / SAL update issues , began MCA bug fixes , logging issues ,
* added min save state dump , added INIT handler .
2005-04-16 15:20:36 -07:00
*
2008-01-07 10:11:57 +09:00
* 2001 - 01 - 03 Fred Lewis < frederick . v . lewis @ intel . com >
* Added setup of CMCI and CPEI IRQs , logging of corrected platform
* errors , completed code for logging of corrected & uncorrected
* machine check errors , and updated for conformance with Nov . 2000
* revision of the SAL 3.0 spec .
*
* 2002 - 01 - 04 Jenna Hall < jenna . s . hall @ intel . com >
* Aligned MCA stack to 16 bytes , added platform vs . CPU error flag ,
* set SAL default return values , changed error record structure to
* linked list , added init call to sal_get_state_info_size ( ) .
*
* 2002 - 03 - 25 Matt Domsch < Matt_Domsch @ dell . com >
* GUID cleanups .
*
* 2003 - 04 - 15 David Mosberger - Tang < davidm @ hpl . hp . com >
* Added INIT backtrace support .
2005-04-16 15:20:36 -07:00
*
* 2003 - 12 - 08 Keith Owens < kaos @ sgi . com >
2008-01-07 10:11:57 +09:00
* smp_call_function ( ) must not be called from interrupt context
* ( can deadlock on tasklist_lock ) .
* Use keventd to call smp_call_function ( ) .
2005-04-16 15:20:36 -07:00
*
* 2004 - 02 - 01 Keith Owens < kaos @ sgi . com >
2008-01-07 10:11:57 +09:00
* Avoid deadlock when using printk ( ) for MCA and INIT records .
* Delete all record printing code , moved to salinfo_decode in user
* space . Mark variables and functions static where possible .
* Delete dead variables and functions . Reorder to remove the need
* for forward declarations and to consolidate related code .
2005-09-11 17:22:53 +10:00
*
* 2005 - 08 - 12 Keith Owens < kaos @ sgi . com >
2008-01-07 10:11:57 +09:00
* Convert MCA / INIT handlers to use per event stacks and SAL / OS
* state .
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-11-07 11:27:13 -08:00
*
* 2005 - 10 - 07 Keith Owens < kaos @ sgi . com >
* Add notify_die ( ) hooks .
2006-09-26 14:44:37 -07:00
*
* 2006 - 09 - 15 Hidetoshi Seto < seto . hidetoshi @ jp . fujitsu . com >
2008-01-07 10:11:57 +09:00
* Add printing support for MCA / INIT .
2007-05-18 17:17:17 -05:00
*
* 2007 - 04 - 27 Russ Anderson < rja @ sgi . com >
* Support multiple cpus going through OS_MCA in the same event .
2005-04-16 15:20:36 -07:00
*/
2008-03-28 14:27:05 -07:00
# include <linux/jiffies.h>
2005-04-16 15:20:36 -07:00
# include <linux/types.h>
# include <linux/init.h>
# include <linux/sched.h>
# include <linux/interrupt.h>
# include <linux/irq.h>
# include <linux/bootmem.h>
# include <linux/acpi.h>
# include <linux/timer.h>
# include <linux/module.h>
# include <linux/kernel.h>
# include <linux/smp.h>
# include <linux/workqueue.h>
2006-03-26 01:39:03 -08:00
# include <linux/cpumask.h>
2007-05-08 00:27:03 -07:00
# include <linux/kdebug.h>
2007-12-19 11:42:02 -08:00
# include <linux/cpu.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 17:04:11 +09:00
# include <linux/gfp.h>
2005-04-16 15:20:36 -07:00
# include <asm/delay.h>
# include <asm/machvec.h>
# include <asm/meminit.h>
# include <asm/page.h>
# include <asm/ptrace.h>
# include <asm/sal.h>
# include <asm/mca.h>
2006-12-07 09:51:35 -08:00
# include <asm/kexec.h>
2005-04-16 15:20:36 -07:00
# include <asm/irq.h>
# include <asm/hw_irq.h>
2008-04-03 11:02:58 -07:00
# include <asm/tlb.h>
2005-04-16 15:20:36 -07:00
2006-03-24 09:49:52 -08:00
# include "mca_drv.h"
2005-09-11 17:22:53 +10:00
# include "entry.h"
2005-04-16 15:20:36 -07:00
# if defined(IA64_MCA_DEBUG_INFO)
# define IA64_MCA_DEBUG(fmt...) printk(fmt)
# else
# define IA64_MCA_DEBUG(fmt...)
# endif
2008-04-17 17:00:37 +09:00
# define NOTIFY_INIT(event, regs, arg, spin) \
do { \
if ( ( notify_die ( ( event ) , " INIT " , ( regs ) , ( arg ) , 0 , 0 ) \
= = NOTIFY_STOP ) & & ( ( spin ) = = 1 ) ) \
ia64_mca_spin ( __func__ ) ; \
} while ( 0 )
# define NOTIFY_MCA(event, regs, arg, spin) \
do { \
if ( ( notify_die ( ( event ) , " MCA " , ( regs ) , ( arg ) , 0 , 0 ) \
= = NOTIFY_STOP ) & & ( ( spin ) = = 1 ) ) \
ia64_mca_spin ( __func__ ) ; \
} while ( 0 )
2005-04-16 15:20:36 -07:00
/* Used by mca_asm.S */
DEFINE_PER_CPU ( u64 , ia64_mca_data ) ; /* == __per_cpu_mca[smp_processor_id()] */
DEFINE_PER_CPU ( u64 , ia64_mca_per_cpu_pte ) ; /* PTE to map per-CPU area */
DEFINE_PER_CPU ( u64 , ia64_mca_pal_pte ) ; /* PTE to map PAL code */
DEFINE_PER_CPU ( u64 , ia64_mca_pal_base ) ; /* vaddr PAL code granule */
2008-04-03 11:02:58 -07:00
DEFINE_PER_CPU ( u64 , ia64_mca_tr_reload ) ; /* Flag for TR reload */
2005-04-16 15:20:36 -07:00
unsigned long __per_cpu_mca [ NR_CPUS ] ;
/* In mca_asm.S */
2005-09-11 17:22:53 +10:00
extern void ia64_os_init_dispatch_monarch ( void ) ;
extern void ia64_os_init_dispatch_slave ( void ) ;
static int monarch_cpu = - 1 ;
2005-04-16 15:20:36 -07:00
static ia64_mc_info_t ia64_mc_info ;
# define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */
# define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */
# define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */
# define CPE_HISTORY_LENGTH 5
# define CMC_HISTORY_LENGTH 5
2007-05-10 13:20:19 -07:00
# ifdef CONFIG_ACPI
2005-04-16 15:20:36 -07:00
static struct timer_list cpe_poll_timer ;
2007-05-10 13:20:19 -07:00
# endif
2005-04-16 15:20:36 -07:00
static struct timer_list cmc_poll_timer ;
/*
* This variable tells whether we are currently in polling mode .
* Start with this in the wrong state so we won ' t play w / timers
* before the system is ready .
*/
static int cmc_polling_enabled = 1 ;
/*
* Clearing this variable prevents CPE polling from getting activated
* in mca_late_init . Use it if your system doesn ' t provide a CPEI ,
* but encounters problems retrieving CPE logs . This should only be
* necessary for debugging .
*/
static int cpe_poll_enabled = 1 ;
extern void salinfo_log_wakeup ( int type , u8 * buffer , u64 size , int irqsafe ) ;
2006-03-12 08:52:20 -08:00
static int mca_init __initdata ;
2005-04-16 15:20:36 -07:00
2006-09-26 14:44:37 -07:00
/*
* limited & delayed printing support for MCA / INIT handler
*/
# define mprintk(fmt...) ia64_mca_printk(fmt)
# define MLOGBUF_SIZE (512+256*NR_CPUS)
# define MLOGBUF_MSGMAX 256
static char mlogbuf [ MLOGBUF_SIZE ] ;
static DEFINE_SPINLOCK ( mlogbuf_wlock ) ; /* mca context only */
static DEFINE_SPINLOCK ( mlogbuf_rlock ) ; /* normal context only */
static unsigned long mlogbuf_start ;
static unsigned long mlogbuf_end ;
static unsigned int mlogbuf_finished = 0 ;
static unsigned long mlogbuf_timestamp = 0 ;
static int loglevel_save = - 1 ;
# define BREAK_LOGLEVEL(__console_loglevel) \
oops_in_progress = 1 ; \
if ( loglevel_save < 0 ) \
loglevel_save = __console_loglevel ; \
__console_loglevel = 15 ;
# define RESTORE_LOGLEVEL(__console_loglevel) \
if ( loglevel_save > = 0 ) { \
__console_loglevel = loglevel_save ; \
loglevel_save = - 1 ; \
} \
mlogbuf_finished = 0 ; \
oops_in_progress = 0 ;
/*
* Push messages into buffer , print them later if not urgent .
*/
void ia64_mca_printk ( const char * fmt , . . . )
{
va_list args ;
int printed_len ;
char temp_buf [ MLOGBUF_MSGMAX ] ;
char * p ;
va_start ( args , fmt ) ;
printed_len = vscnprintf ( temp_buf , sizeof ( temp_buf ) , fmt , args ) ;
va_end ( args ) ;
/* Copy the output into mlogbuf */
if ( oops_in_progress ) {
/* mlogbuf was abandoned, use printk directly instead. */
2014-03-15 13:11:18 -07:00
printk ( " %s " , temp_buf ) ;
2006-09-26 14:44:37 -07:00
} else {
spin_lock ( & mlogbuf_wlock ) ;
for ( p = temp_buf ; * p ; p + + ) {
unsigned long next = ( mlogbuf_end + 1 ) % MLOGBUF_SIZE ;
if ( next ! = mlogbuf_start ) {
mlogbuf [ mlogbuf_end ] = * p ;
mlogbuf_end = next ;
} else {
/* buffer full */
break ;
}
}
mlogbuf [ mlogbuf_end ] = ' \0 ' ;
spin_unlock ( & mlogbuf_wlock ) ;
}
}
EXPORT_SYMBOL ( ia64_mca_printk ) ;
/*
* Print buffered messages .
* NOTE : call this after returning normal context . ( ex . from salinfod )
*/
void ia64_mlogbuf_dump ( void )
{
char temp_buf [ MLOGBUF_MSGMAX ] ;
char * p ;
unsigned long index ;
unsigned long flags ;
unsigned int printed_len ;
/* Get output from mlogbuf */
while ( mlogbuf_start ! = mlogbuf_end ) {
temp_buf [ 0 ] = ' \0 ' ;
p = temp_buf ;
printed_len = 0 ;
spin_lock_irqsave ( & mlogbuf_rlock , flags ) ;
index = mlogbuf_start ;
while ( index ! = mlogbuf_end ) {
* p = mlogbuf [ index ] ;
index = ( index + 1 ) % MLOGBUF_SIZE ;
if ( ! * p )
break ;
p + + ;
if ( + + printed_len > = MLOGBUF_MSGMAX - 1 )
break ;
}
* p = ' \0 ' ;
if ( temp_buf [ 0 ] )
2014-03-15 13:11:18 -07:00
printk ( " %s " , temp_buf ) ;
2006-09-26 14:44:37 -07:00
mlogbuf_start = index ;
mlogbuf_timestamp = 0 ;
spin_unlock_irqrestore ( & mlogbuf_rlock , flags ) ;
}
}
EXPORT_SYMBOL ( ia64_mlogbuf_dump ) ;
/*
* Call this if system is going to down or if immediate flushing messages to
* console is required . ( ex . recovery was failed , crash dump is going to be
* invoked , long - wait rendezvous etc . )
* NOTE : this should be called from monarch .
*/
static void ia64_mlogbuf_finish ( int wait )
{
BREAK_LOGLEVEL ( console_loglevel ) ;
spin_lock_init ( & mlogbuf_rlock ) ;
ia64_mlogbuf_dump ( ) ;
printk ( KERN_EMERG " mlogbuf_finish: printing switched to urgent mode, "
" MCA/INIT might be dodgy or fail. \n " ) ;
if ( ! wait )
return ;
/* wait for console */
printk ( " Delaying for 5 seconds... \n " ) ;
udelay ( 5 * 1000000 ) ;
mlogbuf_finished = 1 ;
}
/*
* Print buffered messages from INIT context .
*/
static void ia64_mlogbuf_dump_from_init ( void )
{
if ( mlogbuf_finished )
return ;
2008-03-28 14:27:05 -07:00
if ( mlogbuf_timestamp & &
time_before ( jiffies , mlogbuf_timestamp + 30 * HZ ) ) {
2006-09-26 14:44:37 -07:00
printk ( KERN_ERR " INIT: mlogbuf_dump is interrupted by INIT "
" and the system seems to be messed up. \n " ) ;
ia64_mlogbuf_finish ( 0 ) ;
return ;
}
if ( ! spin_trylock ( & mlogbuf_rlock ) ) {
printk ( KERN_ERR " INIT: mlogbuf_dump is interrupted by INIT. "
" Generated messages other than stack dump will be "
" buffered to mlogbuf and will be printed later. \n " ) ;
printk ( KERN_ERR " INIT: If messages would not printed after "
" this INIT, wait 30sec and assert INIT again. \n " ) ;
if ( ! mlogbuf_timestamp )
mlogbuf_timestamp = jiffies ;
return ;
}
spin_unlock ( & mlogbuf_rlock ) ;
ia64_mlogbuf_dump ( ) ;
}
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-11-07 11:27:13 -08:00
static void inline
ia64_mca_spin ( const char * func )
{
2006-09-26 14:44:37 -07:00
if ( monarch_cpu = = smp_processor_id ( ) )
ia64_mlogbuf_finish ( 0 ) ;
mprintk ( KERN_EMERG " %s: spinning here, not returning to SAL \n " , func ) ;
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-11-07 11:27:13 -08:00
while ( 1 )
cpu_relax ( ) ;
}
2005-04-16 15:20:36 -07:00
/*
* IA64_MCA log support
*/
# define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */
# define IA64_MAX_LOG_TYPES 4 /* MCA, INIT, CMC, CPE */
typedef struct ia64_state_log_s
{
spinlock_t isl_lock ;
int isl_index ;
unsigned long isl_count ;
ia64_err_rec_t * isl_log [ IA64_MAX_LOGS ] ; /* need space to store header + error log */
} ia64_state_log_t ;
static ia64_state_log_t ia64_state_log [ IA64_MAX_LOG_TYPES ] ;
# define IA64_LOG_ALLOCATE(it, size) \
{ ia64_state_log [ it ] . isl_log [ IA64_LOG_CURR_INDEX ( it ) ] = \
( ia64_err_rec_t * ) alloc_bootmem ( size ) ; \
ia64_state_log [ it ] . isl_log [ IA64_LOG_NEXT_INDEX ( it ) ] = \
( ia64_err_rec_t * ) alloc_bootmem ( size ) ; }
# define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock)
# define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s)
# define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s)
# define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index
# define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index
# define IA64_LOG_INDEX_INC(it) \
{ ia64_state_log [ it ] . isl_index = 1 - ia64_state_log [ it ] . isl_index ; \
ia64_state_log [ it ] . isl_count + + ; }
# define IA64_LOG_INDEX_DEC(it) \
ia64_state_log [ it ] . isl_index = 1 - ia64_state_log [ it ] . isl_index
# define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
# define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
# define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count
/*
* ia64_log_init
* Reset the OS ia64 log buffer
* Inputs : info_type ( SAL_INFO_TYPE_ { MCA , INIT , CMC , CPE } )
* Outputs : None
*/
2006-03-12 08:52:20 -08:00
static void __init
2005-04-16 15:20:36 -07:00
ia64_log_init ( int sal_info_type )
{
u64 max_size = 0 ;
IA64_LOG_NEXT_INDEX ( sal_info_type ) = 0 ;
IA64_LOG_LOCK_INIT ( sal_info_type ) ;
// SAL will tell us the maximum size of any error record of this type
max_size = ia64_sal_get_state_info_size ( sal_info_type ) ;
if ( ! max_size )
/* alloc_bootmem() doesn't like zero-sized allocations! */
return ;
// set up OS data structures to hold error info
IA64_LOG_ALLOCATE ( sal_info_type , max_size ) ;
memset ( IA64_LOG_CURR_BUFFER ( sal_info_type ) , 0 , max_size ) ;
memset ( IA64_LOG_NEXT_BUFFER ( sal_info_type ) , 0 , max_size ) ;
}
/*
* ia64_log_get
*
* Get the current MCA log from SAL and copy it into the OS log buffer .
*
* Inputs : info_type ( SAL_INFO_TYPE_ { MCA , INIT , CMC , CPE } )
* irq_safe whether you can use printk at this point
* Outputs : size ( total record length )
* * buffer ( ptr to error record )
*
*/
static u64
ia64_log_get ( int sal_info_type , u8 * * buffer , int irq_safe )
{
sal_log_record_header_t * log_buffer ;
u64 total_len = 0 ;
2006-09-30 23:27:37 -07:00
unsigned long s ;
2005-04-16 15:20:36 -07:00
IA64_LOG_LOCK ( sal_info_type ) ;
/* Get the process state information */
log_buffer = IA64_LOG_NEXT_BUFFER ( sal_info_type ) ;
total_len = ia64_sal_get_state_info ( sal_info_type , ( u64 * ) log_buffer ) ;
if ( total_len ) {
IA64_LOG_INDEX_INC ( sal_info_type ) ;
IA64_LOG_UNLOCK ( sal_info_type ) ;
if ( irq_safe ) {
2008-03-04 15:15:00 -08:00
IA64_MCA_DEBUG ( " %s: SAL error record type %d retrieved. Record length = %ld \n " ,
__func__ , sal_info_type , total_len ) ;
2005-04-16 15:20:36 -07:00
}
* buffer = ( u8 * ) log_buffer ;
return total_len ;
} else {
IA64_LOG_UNLOCK ( sal_info_type ) ;
return 0 ;
}
}
/*
* ia64_mca_log_sal_error_record
*
* This function retrieves a specified error record type from SAL
* and wakes up any processes waiting for error records .
*
2005-09-11 17:22:53 +10:00
* Inputs : sal_info_type ( Type of error record MCA / CMC / CPE )
* FIXME : remove MCA and irq_safe .
2005-04-16 15:20:36 -07:00
*/
static void
ia64_mca_log_sal_error_record ( int sal_info_type )
{
u8 * buffer ;
sal_log_record_header_t * rh ;
u64 size ;
2005-09-11 17:22:53 +10:00
int irq_safe = sal_info_type ! = SAL_INFO_TYPE_MCA ;
2005-04-16 15:20:36 -07:00
# ifdef IA64_MCA_DEBUG_INFO
static const char * const rec_name [ ] = { " MCA " , " INIT " , " CMC " , " CPE " } ;
# endif
size = ia64_log_get ( sal_info_type , & buffer , irq_safe ) ;
if ( ! size )
return ;
salinfo_log_wakeup ( sal_info_type , buffer , size , irq_safe ) ;
if ( irq_safe )
IA64_MCA_DEBUG ( " CPU %d: SAL log contains %s error record \n " ,
smp_processor_id ( ) ,
sal_info_type < ARRAY_SIZE ( rec_name ) ? rec_name [ sal_info_type ] : " UNKNOWN " ) ;
/* Clear logs from corrected errors in case there's no user-level logger */
rh = ( sal_log_record_header_t * ) buffer ;
if ( rh - > severity = = sal_log_severity_corrected )
ia64_sal_clear_state_info ( sal_info_type ) ;
}
2006-03-24 09:49:52 -08:00
/*
* search_mca_table
* See if the MCA surfaced in an instruction range
* that has been tagged as recoverable .
*
* Inputs
* first First address range to check
* last Last address range to check
* ip Instruction pointer , address we are looking for
*
* Return value :
* 1 on Success ( in the table ) / 0 on Failure ( not in the table )
*/
int
search_mca_table ( const struct mca_table_entry * first ,
const struct mca_table_entry * last ,
unsigned long ip )
{
const struct mca_table_entry * curr ;
u64 curr_start , curr_end ;
curr = first ;
while ( curr < = last ) {
curr_start = ( u64 ) & curr - > start_addr + curr - > start_addr ;
curr_end = ( u64 ) & curr - > end_addr + curr - > end_addr ;
if ( ( ip > = curr_start ) & & ( ip < = curr_end ) ) {
return 1 ;
}
curr + + ;
}
return 0 ;
}
/* Given an address, look for it in the mca tables. */
int mca_recover_range ( unsigned long addr )
{
extern struct mca_table_entry __start___mca_table [ ] ;
extern struct mca_table_entry __stop___mca_table [ ] ;
return search_mca_table ( __start___mca_table , __stop___mca_table - 1 , addr ) ;
}
EXPORT_SYMBOL_GPL ( mca_recover_range ) ;
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_ACPI
2005-03-31 22:51:10 -05:00
int cpe_vector = - 1 ;
2005-11-11 14:32:40 -08:00
int ia64_cpe_irq = - 1 ;
2005-04-16 15:20:36 -07:00
static irqreturn_t
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers
Maintain a per-CPU global "struct pt_regs *" variable which can be used instead
of passing regs around manually through all ~1800 interrupt handlers in the
Linux kernel.
The regs pointer is used in few places, but it potentially costs both stack
space and code to pass it around. On the FRV arch, removing the regs parameter
from all the genirq function results in a 20% speed up of the IRQ exit path
(ie: from leaving timer_interrupt() to leaving do_IRQ()).
Where appropriate, an arch may override the generic storage facility and do
something different with the variable. On FRV, for instance, the address is
maintained in GR28 at all times inside the kernel as part of general exception
handling.
Having looked over the code, it appears that the parameter may be handed down
through up to twenty or so layers of functions. Consider a USB character
device attached to a USB hub, attached to a USB controller that posts its
interrupts through a cascaded auxiliary interrupt controller. A character
device driver may want to pass regs to the sysrq handler through the input
layer which adds another few layers of parameter passing.
I've build this code with allyesconfig for x86_64 and i386. I've runtested the
main part of the code on FRV and i386, though I can't test most of the drivers.
I've also done partial conversion for powerpc and MIPS - these at least compile
with minimal configurations.
This will affect all archs. Mostly the changes should be relatively easy.
Take do_IRQ(), store the regs pointer at the beginning, saving the old one:
struct pt_regs *old_regs = set_irq_regs(regs);
And put the old one back at the end:
set_irq_regs(old_regs);
Don't pass regs through to generic_handle_irq() or __do_IRQ().
In timer_interrupt(), this sort of change will be necessary:
- update_process_times(user_mode(regs));
- profile_tick(CPU_PROFILING, regs);
+ update_process_times(user_mode(get_irq_regs()));
+ profile_tick(CPU_PROFILING);
I'd like to move update_process_times()'s use of get_irq_regs() into itself,
except that i386, alone of the archs, uses something other than user_mode().
Some notes on the interrupt handling in the drivers:
(*) input_dev() is now gone entirely. The regs pointer is no longer stored in
the input_dev struct.
(*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does
something different depending on whether it's been supplied with a regs
pointer or not.
(*) Various IRQ handler function pointers have been moved to type
irq_handler_t.
Signed-Off-By: David Howells <dhowells@redhat.com>
(cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 14:55:46 +01:00
ia64_mca_cpe_int_handler ( int cpe_irq , void * arg )
2005-04-16 15:20:36 -07:00
{
static unsigned long cpe_history [ CPE_HISTORY_LENGTH ] ;
static int index ;
static DEFINE_SPINLOCK ( cpe_history_lock ) ;
IA64_MCA_DEBUG ( " %s: received interrupt vector = %#x on CPU %d \n " ,
2008-03-04 15:15:00 -08:00
__func__ , cpe_irq , smp_processor_id ( ) ) ;
2005-04-16 15:20:36 -07:00
/* SAL spec states this should run w/ interrupts enabled */
local_irq_enable ( ) ;
spin_lock ( & cpe_history_lock ) ;
if ( ! cpe_poll_enabled & & cpe_vector > = 0 ) {
int i , count = 1 ; /* we know 1 happened now */
unsigned long now = jiffies ;
for ( i = 0 ; i < CPE_HISTORY_LENGTH ; i + + ) {
if ( now - cpe_history [ i ] < = HZ )
count + + ;
}
IA64_MCA_DEBUG ( KERN_INFO " CPE threshold %d/%d \n " , count , CPE_HISTORY_LENGTH ) ;
if ( count > = CPE_HISTORY_LENGTH ) {
cpe_poll_enabled = 1 ;
spin_unlock ( & cpe_history_lock ) ;
disable_irq_nosync ( local_vector_to_irq ( IA64_CPE_VECTOR ) ) ;
/*
* Corrected errors will still be corrected , but
* make sure there ' s a log somewhere that indicates
* something is generating more than we can handle .
*/
printk ( KERN_WARNING " WARNING: Switching to polling CPE handler; error records may be lost \n " ) ;
mod_timer ( & cpe_poll_timer , jiffies + MIN_CPE_POLL_INTERVAL ) ;
/* lock already released, get out now */
2006-09-26 15:27:56 -07:00
goto out ;
2005-04-16 15:20:36 -07:00
} else {
cpe_history [ index + + ] = now ;
if ( index = = CPE_HISTORY_LENGTH )
index = 0 ;
}
}
spin_unlock ( & cpe_history_lock ) ;
2006-09-26 15:27:56 -07:00
out :
/* Get the CPE error record and log it */
ia64_mca_log_sal_error_record ( SAL_INFO_TYPE_CPE ) ;
2011-02-24 15:22:05 -08:00
local_irq_disable ( ) ;
2005-04-16 15:20:36 -07:00
return IRQ_HANDLED ;
}
# endif /* CONFIG_ACPI */
# ifdef CONFIG_ACPI
/*
* ia64_mca_register_cpev
*
* Register the corrected platform error vector with SAL .
*
* Inputs
* cpev Corrected Platform Error Vector number
*
* Outputs
* None
*/
2007-10-31 11:10:38 -05:00
void
2005-04-16 15:20:36 -07:00
ia64_mca_register_cpev ( int cpev )
{
/* Register the CPE interrupt vector with SAL */
struct ia64_sal_retval isrv ;
isrv = ia64_sal_mc_set_params ( SAL_MC_PARAM_CPE_INT , SAL_MC_PARAM_MECHANISM_INT , cpev , 0 , 0 ) ;
if ( isrv . status ) {
printk ( KERN_ERR " Failed to register Corrected Platform "
" Error interrupt vector with SAL (status %ld) \n " , isrv . status ) ;
return ;
}
IA64_MCA_DEBUG ( " %s: corrected platform error "
2008-03-04 15:15:00 -08:00
" vector %#x registered \n " , __func__ , cpev ) ;
2005-04-16 15:20:36 -07:00
}
# endif /* CONFIG_ACPI */
/*
* ia64_mca_cmc_vector_setup
*
* Setup the corrected machine check vector register in the processor .
* ( The interrupt is masked on boot . ia64_mca_late_init unmask this . )
* This function is invoked on a per - processor basis .
*
* Inputs
* None
*
* Outputs
* None
*/
2013-06-17 15:51:20 -04:00
void
2005-04-16 15:20:36 -07:00
ia64_mca_cmc_vector_setup ( void )
{
cmcv_reg_t cmcv ;
cmcv . cmcv_regval = 0 ;
cmcv . cmcv_mask = 1 ; /* Mask/disable interrupt at first */
cmcv . cmcv_vector = IA64_CMC_VECTOR ;
ia64_setreg ( _IA64_REG_CR_CMCV , cmcv . cmcv_regval ) ;
2008-03-04 15:15:00 -08:00
IA64_MCA_DEBUG ( " %s: CPU %d corrected machine check vector %#x registered. \n " ,
__func__ , smp_processor_id ( ) , IA64_CMC_VECTOR ) ;
2005-04-16 15:20:36 -07:00
IA64_MCA_DEBUG ( " %s: CPU %d CMCV = %#016lx \n " ,
2008-03-04 15:15:00 -08:00
__func__ , smp_processor_id ( ) , ia64_getreg ( _IA64_REG_CR_CMCV ) ) ;
2005-04-16 15:20:36 -07:00
}
/*
* ia64_mca_cmc_vector_disable
*
* Mask the corrected machine check vector register in the processor .
* This function is invoked on a per - processor basis .
*
* Inputs
* dummy ( unused )
*
* Outputs
* None
*/
static void
ia64_mca_cmc_vector_disable ( void * dummy )
{
cmcv_reg_t cmcv ;
cmcv . cmcv_regval = ia64_getreg ( _IA64_REG_CR_CMCV ) ;
cmcv . cmcv_mask = 1 ; /* Mask/disable interrupt */
ia64_setreg ( _IA64_REG_CR_CMCV , cmcv . cmcv_regval ) ;
2008-03-04 15:15:00 -08:00
IA64_MCA_DEBUG ( " %s: CPU %d corrected machine check vector %#x disabled. \n " ,
__func__ , smp_processor_id ( ) , cmcv . cmcv_vector ) ;
2005-04-16 15:20:36 -07:00
}
/*
* ia64_mca_cmc_vector_enable
*
* Unmask the corrected machine check vector register in the processor .
* This function is invoked on a per - processor basis .
*
* Inputs
* dummy ( unused )
*
* Outputs
* None
*/
static void
ia64_mca_cmc_vector_enable ( void * dummy )
{
cmcv_reg_t cmcv ;
cmcv . cmcv_regval = ia64_getreg ( _IA64_REG_CR_CMCV ) ;
cmcv . cmcv_mask = 0 ; /* Unmask/enable interrupt */
ia64_setreg ( _IA64_REG_CR_CMCV , cmcv . cmcv_regval ) ;
2008-03-04 15:15:00 -08:00
IA64_MCA_DEBUG ( " %s: CPU %d corrected machine check vector %#x enabled. \n " ,
__func__ , smp_processor_id ( ) , cmcv . cmcv_vector ) ;
2005-04-16 15:20:36 -07:00
}
/*
* ia64_mca_cmc_vector_disable_keventd
*
* Called via keventd ( smp_call_function ( ) is not safe in interrupt context ) to
* disable the cmc interrupt vector .
*/
static void
2006-12-05 19:36:26 +00:00
ia64_mca_cmc_vector_disable_keventd ( struct work_struct * unused )
2005-04-16 15:20:36 -07:00
{
2008-05-09 09:39:44 +02:00
on_each_cpu ( ia64_mca_cmc_vector_disable , NULL , 0 ) ;
2005-04-16 15:20:36 -07:00
}
/*
* ia64_mca_cmc_vector_enable_keventd
*
* Called via keventd ( smp_call_function ( ) is not safe in interrupt context ) to
* enable the cmc interrupt vector .
*/
static void
2006-12-05 19:36:26 +00:00
ia64_mca_cmc_vector_enable_keventd ( struct work_struct * unused )
2005-04-16 15:20:36 -07:00
{
2008-05-09 09:39:44 +02:00
on_each_cpu ( ia64_mca_cmc_vector_enable , NULL , 0 ) ;
2005-04-16 15:20:36 -07:00
}
/*
* ia64_mca_wakeup
*
2007-09-19 16:58:31 -05:00
* Send an inter - cpu interrupt to wake - up a particular cpu .
2005-04-16 15:20:36 -07:00
*
* Inputs : cpuid
* Outputs : None
*/
static void
ia64_mca_wakeup ( int cpu )
{
platform_send_ipi ( cpu , IA64_MCA_WAKEUP_VECTOR , IA64_IPI_DM_INT , 0 ) ;
}
/*
* ia64_mca_wakeup_all
*
2007-09-19 16:58:31 -05:00
* Wakeup all the slave cpus which have rendez ' ed previously .
2005-04-16 15:20:36 -07:00
*
* Inputs : None
* Outputs : None
*/
static void
ia64_mca_wakeup_all ( void )
{
int cpu ;
/* Clear the Rendez checkin flag for all cpus */
2005-10-13 12:01:18 -07:00
for_each_online_cpu ( cpu ) {
2005-04-16 15:20:36 -07:00
if ( ia64_mc_info . imi_rendez_checkin [ cpu ] = = IA64_MCA_RENDEZ_CHECKIN_DONE )
ia64_mca_wakeup ( cpu ) ;
}
}
/*
* ia64_mca_rendez_interrupt_handler
*
* This is handler used to put slave processors into spinloop
* while the monarch processor does the mca handling and later
2007-09-19 16:58:31 -05:00
* wake each slave up once the monarch is done . The state
* IA64_MCA_RENDEZ_CHECKIN_DONE indicates the cpu is rendez ' ed
* in SAL . The state IA64_MCA_RENDEZ_CHECKIN_NOTDONE indicates
* the cpu has come out of OS rendezvous .
2005-04-16 15:20:36 -07:00
*
* Inputs : None
* Outputs : None
*/
static irqreturn_t
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers
Maintain a per-CPU global "struct pt_regs *" variable which can be used instead
of passing regs around manually through all ~1800 interrupt handlers in the
Linux kernel.
The regs pointer is used in few places, but it potentially costs both stack
space and code to pass it around. On the FRV arch, removing the regs parameter
from all the genirq function results in a 20% speed up of the IRQ exit path
(ie: from leaving timer_interrupt() to leaving do_IRQ()).
Where appropriate, an arch may override the generic storage facility and do
something different with the variable. On FRV, for instance, the address is
maintained in GR28 at all times inside the kernel as part of general exception
handling.
Having looked over the code, it appears that the parameter may be handed down
through up to twenty or so layers of functions. Consider a USB character
device attached to a USB hub, attached to a USB controller that posts its
interrupts through a cascaded auxiliary interrupt controller. A character
device driver may want to pass regs to the sysrq handler through the input
layer which adds another few layers of parameter passing.
I've build this code with allyesconfig for x86_64 and i386. I've runtested the
main part of the code on FRV and i386, though I can't test most of the drivers.
I've also done partial conversion for powerpc and MIPS - these at least compile
with minimal configurations.
This will affect all archs. Mostly the changes should be relatively easy.
Take do_IRQ(), store the regs pointer at the beginning, saving the old one:
struct pt_regs *old_regs = set_irq_regs(regs);
And put the old one back at the end:
set_irq_regs(old_regs);
Don't pass regs through to generic_handle_irq() or __do_IRQ().
In timer_interrupt(), this sort of change will be necessary:
- update_process_times(user_mode(regs));
- profile_tick(CPU_PROFILING, regs);
+ update_process_times(user_mode(get_irq_regs()));
+ profile_tick(CPU_PROFILING);
I'd like to move update_process_times()'s use of get_irq_regs() into itself,
except that i386, alone of the archs, uses something other than user_mode().
Some notes on the interrupt handling in the drivers:
(*) input_dev() is now gone entirely. The regs pointer is no longer stored in
the input_dev struct.
(*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does
something different depending on whether it's been supplied with a regs
pointer or not.
(*) Various IRQ handler function pointers have been moved to type
irq_handler_t.
Signed-Off-By: David Howells <dhowells@redhat.com>
(cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 14:55:46 +01:00
ia64_mca_rendez_int_handler ( int rendez_irq , void * arg )
2005-04-16 15:20:36 -07:00
{
unsigned long flags ;
int cpu = smp_processor_id ( ) ;
2006-04-03 15:26:12 +10:00
struct ia64_mca_notify_die nd =
{ . sos = NULL , . monarch_cpu = & monarch_cpu } ;
2005-04-16 15:20:36 -07:00
/* Mask all interrupts */
local_irq_save ( flags ) ;
2008-04-17 17:00:37 +09:00
NOTIFY_MCA ( DIE_MCA_RENDZVOUS_ENTER , get_irq_regs ( ) , ( long ) & nd , 1 ) ;
2005-04-16 15:20:36 -07:00
ia64_mc_info . imi_rendez_checkin [ cpu ] = IA64_MCA_RENDEZ_CHECKIN_DONE ;
/* Register with the SAL monarch that the slave has
* reached SAL
*/
ia64_sal_mc_rendez ( ) ;
2008-04-17 17:00:37 +09:00
NOTIFY_MCA ( DIE_MCA_RENDZVOUS_PROCESS , get_irq_regs ( ) , ( long ) & nd , 1 ) ;
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-11-07 11:27:13 -08:00
2005-09-11 17:22:53 +10:00
/* Wait for the monarch cpu to exit. */
while ( monarch_cpu ! = - 1 )
cpu_relax ( ) ; /* spin until monarch leaves */
2005-04-16 15:20:36 -07:00
2008-04-17 17:00:37 +09:00
NOTIFY_MCA ( DIE_MCA_RENDZVOUS_LEAVE , get_irq_regs ( ) , ( long ) & nd , 1 ) ;
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-11-07 11:27:13 -08:00
2007-09-19 16:58:31 -05:00
ia64_mc_info . imi_rendez_checkin [ cpu ] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE ;
2005-04-16 15:20:36 -07:00
/* Enable all interrupts */
local_irq_restore ( flags ) ;
return IRQ_HANDLED ;
}
/*
* ia64_mca_wakeup_int_handler
*
* The interrupt handler for processing the inter - cpu interrupt to the
* slave cpu which was spinning in the rendez loop .
* Since this spinning is done by turning off the interrupts and
* polling on the wakeup - interrupt bit in the IRR , there is
* nothing useful to be done in the handler .
*
* Inputs : wakeup_irq ( Wakeup - interrupt bit )
* arg ( Interrupt handler specific argument )
* Outputs : None
*
*/
static irqreturn_t
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers
Maintain a per-CPU global "struct pt_regs *" variable which can be used instead
of passing regs around manually through all ~1800 interrupt handlers in the
Linux kernel.
The regs pointer is used in few places, but it potentially costs both stack
space and code to pass it around. On the FRV arch, removing the regs parameter
from all the genirq function results in a 20% speed up of the IRQ exit path
(ie: from leaving timer_interrupt() to leaving do_IRQ()).
Where appropriate, an arch may override the generic storage facility and do
something different with the variable. On FRV, for instance, the address is
maintained in GR28 at all times inside the kernel as part of general exception
handling.
Having looked over the code, it appears that the parameter may be handed down
through up to twenty or so layers of functions. Consider a USB character
device attached to a USB hub, attached to a USB controller that posts its
interrupts through a cascaded auxiliary interrupt controller. A character
device driver may want to pass regs to the sysrq handler through the input
layer which adds another few layers of parameter passing.
I've build this code with allyesconfig for x86_64 and i386. I've runtested the
main part of the code on FRV and i386, though I can't test most of the drivers.
I've also done partial conversion for powerpc and MIPS - these at least compile
with minimal configurations.
This will affect all archs. Mostly the changes should be relatively easy.
Take do_IRQ(), store the regs pointer at the beginning, saving the old one:
struct pt_regs *old_regs = set_irq_regs(regs);
And put the old one back at the end:
set_irq_regs(old_regs);
Don't pass regs through to generic_handle_irq() or __do_IRQ().
In timer_interrupt(), this sort of change will be necessary:
- update_process_times(user_mode(regs));
- profile_tick(CPU_PROFILING, regs);
+ update_process_times(user_mode(get_irq_regs()));
+ profile_tick(CPU_PROFILING);
I'd like to move update_process_times()'s use of get_irq_regs() into itself,
except that i386, alone of the archs, uses something other than user_mode().
Some notes on the interrupt handling in the drivers:
(*) input_dev() is now gone entirely. The regs pointer is no longer stored in
the input_dev struct.
(*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does
something different depending on whether it's been supplied with a regs
pointer or not.
(*) Various IRQ handler function pointers have been moved to type
irq_handler_t.
Signed-Off-By: David Howells <dhowells@redhat.com>
(cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 14:55:46 +01:00
ia64_mca_wakeup_int_handler ( int wakeup_irq , void * arg )
2005-04-16 15:20:36 -07:00
{
return IRQ_HANDLED ;
}
/* Function pointer for extra MCA recovery */
int ( * ia64_mca_ucmc_extension )
2005-09-11 17:22:53 +10:00
( void * , struct ia64_sal_os_state * )
2005-04-16 15:20:36 -07:00
= NULL ;
int
2005-09-11 17:22:53 +10:00
ia64_reg_MCA_extension ( int ( * fn ) ( void * , struct ia64_sal_os_state * ) )
2005-04-16 15:20:36 -07:00
{
if ( ia64_mca_ucmc_extension )
return 1 ;
ia64_mca_ucmc_extension = fn ;
return 0 ;
}
void
ia64_unreg_MCA_extension ( void )
{
if ( ia64_mca_ucmc_extension )
ia64_mca_ucmc_extension = NULL ;
}
EXPORT_SYMBOL ( ia64_reg_MCA_extension ) ;
EXPORT_SYMBOL ( ia64_unreg_MCA_extension ) ;
2005-09-11 17:22:53 +10:00
static inline void
2009-05-22 13:49:49 -07:00
copy_reg ( const u64 * fr , u64 fnat , unsigned long * tr , unsigned long * tnat )
2005-09-11 17:22:53 +10:00
{
u64 fslot , tslot , nat ;
* tr = * fr ;
fslot = ( ( unsigned long ) fr > > 3 ) & 63 ;
tslot = ( ( unsigned long ) tr > > 3 ) & 63 ;
* tnat & = ~ ( 1UL < < tslot ) ;
nat = ( fnat > > fslot ) & 1 ;
* tnat | = ( nat < < tslot ) ;
}
2006-02-08 13:41:04 +11:00
/* Change the comm field on the MCA/INT task to include the pid that
* was interrupted , it makes for easier debugging . If that pid was 0
* ( swapper or nested MCA / INIT ) then use the start of the previous comm
* field suffixed with its cpu .
*/
static void
2006-07-03 00:25:41 -07:00
ia64_mca_modify_comm ( const struct task_struct * previous_current )
2006-02-08 13:41:04 +11:00
{
char * p , comm [ sizeof ( current - > comm ) ] ;
if ( previous_current - > pid )
snprintf ( comm , sizeof ( comm ) , " %s %d " ,
current - > comm , previous_current - > pid ) ;
else {
int l ;
if ( ( p = strchr ( previous_current - > comm , ' ' ) ) )
l = p - previous_current - > comm ;
else
l = strlen ( previous_current - > comm ) ;
snprintf ( comm , sizeof ( comm ) , " %s %*s %d " ,
current - > comm , l , previous_current - > comm ,
task_thread_info ( previous_current ) - > cpu ) ;
}
memcpy ( current - > comm , comm , sizeof ( current - > comm ) ) ;
}
2009-10-01 17:55:16 -04:00
static void
[IA64] Save I-resources to ia64_sal_os_state
This is a patch related to this discussion.
http://www.spinics.net/lists/linux-ia64/msg07605.html
When INIT is sent, ip/psr/pfs register is stored to the I-resources
(iip/ipsr/ifs registers), and they are copied in the min-state save
area(pmsa_{iip,ipsr,ifs}).
Therefore, in creating pt_regs at ia64_mca_modify_original_stack(),
cr_{iip,ipsr,ifs} should be derived from pmsa_{iip,ipsr,ifs}. But
current code copies pmsa_{xip,xpsr,xfs} to cr_{iip,ipsr,ifs}
when PSR.ic is 0.
finish_pt_regs(struct pt_regs *regs, const pal_min_state_area_t *ms,
unsigned long *nat)
{
(snip)
if (ia64_psr(regs)->ic) {
regs->cr_iip = ms->pmsa_iip;
regs->cr_ipsr = ms->pmsa_ipsr;
regs->cr_ifs = ms->pmsa_ifs;
} else {
regs->cr_iip = ms->pmsa_xip;
regs->cr_ipsr = ms->pmsa_xpsr;
regs->cr_ifs = ms->pmsa_xfs;
}
It's ok when PSR.ic is not 0. But when PSR.ic is 0, this could be
a problem when we investigate kernel as the value of regs->cr_iip does
not point to where INIT really interrupted.
At first I tried to change finish_pt_regs() so that it uses always
pmsa_{iip,ipsr,ifs} for cr_{iip,ipsr,ifs}, but Keith Owens pointed out
it could cause another problem if I change it.
>The only problem I can think of is an MCA/INIT
>arriving while code like SAVE_MIN or SAVE_REST is executing. Back
>tracing at that point using pmsa_iip is going to be a problem, you have
>no idea what state the registers or stack are in.
I confirmed he was right, so I decided to keep it as-is and to
save pmsa_{iip,ipsr,ifs} to ia64_sal_os_state for debugging.
An attached patch is just adding new members into ia64_sal_os_state to
save pmsa_{iip,ipsr,ifs}.
Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2009-11-19 16:39:22 -05:00
finish_pt_regs ( struct pt_regs * regs , struct ia64_sal_os_state * sos ,
2009-10-01 17:55:16 -04:00
unsigned long * nat )
{
[IA64] Save I-resources to ia64_sal_os_state
This is a patch related to this discussion.
http://www.spinics.net/lists/linux-ia64/msg07605.html
When INIT is sent, ip/psr/pfs register is stored to the I-resources
(iip/ipsr/ifs registers), and they are copied in the min-state save
area(pmsa_{iip,ipsr,ifs}).
Therefore, in creating pt_regs at ia64_mca_modify_original_stack(),
cr_{iip,ipsr,ifs} should be derived from pmsa_{iip,ipsr,ifs}. But
current code copies pmsa_{xip,xpsr,xfs} to cr_{iip,ipsr,ifs}
when PSR.ic is 0.
finish_pt_regs(struct pt_regs *regs, const pal_min_state_area_t *ms,
unsigned long *nat)
{
(snip)
if (ia64_psr(regs)->ic) {
regs->cr_iip = ms->pmsa_iip;
regs->cr_ipsr = ms->pmsa_ipsr;
regs->cr_ifs = ms->pmsa_ifs;
} else {
regs->cr_iip = ms->pmsa_xip;
regs->cr_ipsr = ms->pmsa_xpsr;
regs->cr_ifs = ms->pmsa_xfs;
}
It's ok when PSR.ic is not 0. But when PSR.ic is 0, this could be
a problem when we investigate kernel as the value of regs->cr_iip does
not point to where INIT really interrupted.
At first I tried to change finish_pt_regs() so that it uses always
pmsa_{iip,ipsr,ifs} for cr_{iip,ipsr,ifs}, but Keith Owens pointed out
it could cause another problem if I change it.
>The only problem I can think of is an MCA/INIT
>arriving while code like SAVE_MIN or SAVE_REST is executing. Back
>tracing at that point using pmsa_iip is going to be a problem, you have
>no idea what state the registers or stack are in.
I confirmed he was right, so I decided to keep it as-is and to
save pmsa_{iip,ipsr,ifs} to ia64_sal_os_state for debugging.
An attached patch is just adding new members into ia64_sal_os_state to
save pmsa_{iip,ipsr,ifs}.
Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2009-11-19 16:39:22 -05:00
const pal_min_state_area_t * ms = sos - > pal_min_state ;
2009-10-01 17:55:16 -04:00
const u64 * bank ;
/* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use
* pmsa_ { xip , xpsr , xfs }
*/
if ( ia64_psr ( regs ) - > ic ) {
regs - > cr_iip = ms - > pmsa_iip ;
regs - > cr_ipsr = ms - > pmsa_ipsr ;
regs - > cr_ifs = ms - > pmsa_ifs ;
} else {
regs - > cr_iip = ms - > pmsa_xip ;
regs - > cr_ipsr = ms - > pmsa_xpsr ;
regs - > cr_ifs = ms - > pmsa_xfs ;
[IA64] Save I-resources to ia64_sal_os_state
This is a patch related to this discussion.
http://www.spinics.net/lists/linux-ia64/msg07605.html
When INIT is sent, ip/psr/pfs register is stored to the I-resources
(iip/ipsr/ifs registers), and they are copied in the min-state save
area(pmsa_{iip,ipsr,ifs}).
Therefore, in creating pt_regs at ia64_mca_modify_original_stack(),
cr_{iip,ipsr,ifs} should be derived from pmsa_{iip,ipsr,ifs}. But
current code copies pmsa_{xip,xpsr,xfs} to cr_{iip,ipsr,ifs}
when PSR.ic is 0.
finish_pt_regs(struct pt_regs *regs, const pal_min_state_area_t *ms,
unsigned long *nat)
{
(snip)
if (ia64_psr(regs)->ic) {
regs->cr_iip = ms->pmsa_iip;
regs->cr_ipsr = ms->pmsa_ipsr;
regs->cr_ifs = ms->pmsa_ifs;
} else {
regs->cr_iip = ms->pmsa_xip;
regs->cr_ipsr = ms->pmsa_xpsr;
regs->cr_ifs = ms->pmsa_xfs;
}
It's ok when PSR.ic is not 0. But when PSR.ic is 0, this could be
a problem when we investigate kernel as the value of regs->cr_iip does
not point to where INIT really interrupted.
At first I tried to change finish_pt_regs() so that it uses always
pmsa_{iip,ipsr,ifs} for cr_{iip,ipsr,ifs}, but Keith Owens pointed out
it could cause another problem if I change it.
>The only problem I can think of is an MCA/INIT
>arriving while code like SAVE_MIN or SAVE_REST is executing. Back
>tracing at that point using pmsa_iip is going to be a problem, you have
>no idea what state the registers or stack are in.
I confirmed he was right, so I decided to keep it as-is and to
save pmsa_{iip,ipsr,ifs} to ia64_sal_os_state for debugging.
An attached patch is just adding new members into ia64_sal_os_state to
save pmsa_{iip,ipsr,ifs}.
Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2009-11-19 16:39:22 -05:00
sos - > iip = ms - > pmsa_iip ;
sos - > ipsr = ms - > pmsa_ipsr ;
sos - > ifs = ms - > pmsa_ifs ;
2009-10-01 17:55:16 -04:00
}
regs - > pr = ms - > pmsa_pr ;
regs - > b0 = ms - > pmsa_br0 ;
regs - > ar_rsc = ms - > pmsa_rsc ;
copy_reg ( & ms - > pmsa_gr [ 1 - 1 ] , ms - > pmsa_nat_bits , & regs - > r1 , nat ) ;
copy_reg ( & ms - > pmsa_gr [ 2 - 1 ] , ms - > pmsa_nat_bits , & regs - > r2 , nat ) ;
copy_reg ( & ms - > pmsa_gr [ 3 - 1 ] , ms - > pmsa_nat_bits , & regs - > r3 , nat ) ;
copy_reg ( & ms - > pmsa_gr [ 8 - 1 ] , ms - > pmsa_nat_bits , & regs - > r8 , nat ) ;
copy_reg ( & ms - > pmsa_gr [ 9 - 1 ] , ms - > pmsa_nat_bits , & regs - > r9 , nat ) ;
copy_reg ( & ms - > pmsa_gr [ 10 - 1 ] , ms - > pmsa_nat_bits , & regs - > r10 , nat ) ;
copy_reg ( & ms - > pmsa_gr [ 11 - 1 ] , ms - > pmsa_nat_bits , & regs - > r11 , nat ) ;
copy_reg ( & ms - > pmsa_gr [ 12 - 1 ] , ms - > pmsa_nat_bits , & regs - > r12 , nat ) ;
copy_reg ( & ms - > pmsa_gr [ 13 - 1 ] , ms - > pmsa_nat_bits , & regs - > r13 , nat ) ;
copy_reg ( & ms - > pmsa_gr [ 14 - 1 ] , ms - > pmsa_nat_bits , & regs - > r14 , nat ) ;
copy_reg ( & ms - > pmsa_gr [ 15 - 1 ] , ms - > pmsa_nat_bits , & regs - > r15 , nat ) ;
if ( ia64_psr ( regs ) - > bn )
bank = ms - > pmsa_bank1_gr ;
else
bank = ms - > pmsa_bank0_gr ;
copy_reg ( & bank [ 16 - 16 ] , ms - > pmsa_nat_bits , & regs - > r16 , nat ) ;
copy_reg ( & bank [ 17 - 16 ] , ms - > pmsa_nat_bits , & regs - > r17 , nat ) ;
copy_reg ( & bank [ 18 - 16 ] , ms - > pmsa_nat_bits , & regs - > r18 , nat ) ;
copy_reg ( & bank [ 19 - 16 ] , ms - > pmsa_nat_bits , & regs - > r19 , nat ) ;
copy_reg ( & bank [ 20 - 16 ] , ms - > pmsa_nat_bits , & regs - > r20 , nat ) ;
copy_reg ( & bank [ 21 - 16 ] , ms - > pmsa_nat_bits , & regs - > r21 , nat ) ;
copy_reg ( & bank [ 22 - 16 ] , ms - > pmsa_nat_bits , & regs - > r22 , nat ) ;
copy_reg ( & bank [ 23 - 16 ] , ms - > pmsa_nat_bits , & regs - > r23 , nat ) ;
copy_reg ( & bank [ 24 - 16 ] , ms - > pmsa_nat_bits , & regs - > r24 , nat ) ;
copy_reg ( & bank [ 25 - 16 ] , ms - > pmsa_nat_bits , & regs - > r25 , nat ) ;
copy_reg ( & bank [ 26 - 16 ] , ms - > pmsa_nat_bits , & regs - > r26 , nat ) ;
copy_reg ( & bank [ 27 - 16 ] , ms - > pmsa_nat_bits , & regs - > r27 , nat ) ;
copy_reg ( & bank [ 28 - 16 ] , ms - > pmsa_nat_bits , & regs - > r28 , nat ) ;
copy_reg ( & bank [ 29 - 16 ] , ms - > pmsa_nat_bits , & regs - > r29 , nat ) ;
copy_reg ( & bank [ 30 - 16 ] , ms - > pmsa_nat_bits , & regs - > r30 , nat ) ;
copy_reg ( & bank [ 31 - 16 ] , ms - > pmsa_nat_bits , & regs - > r31 , nat ) ;
}
2005-09-11 17:22:53 +10:00
/* On entry to this routine, we are running on the per cpu stack, see
* mca_asm . h . The original stack has not been touched by this event . Some of
* the original stack ' s registers will be in the RBS on this stack . This stack
* also contains a partial pt_regs and switch_stack , the rest of the data is in
* PAL minstate .
*
* The first thing to do is modify the original stack to look like a blocked
* task so we can run backtrace on the original task . Also mark the per cpu
* stack as current to ensure that we use the correct task state , it also means
* that we can do backtrace on the MCA / INIT handler code itself .
*/
2006-07-03 00:25:41 -07:00
static struct task_struct *
2005-09-11 17:22:53 +10:00
ia64_mca_modify_original_stack ( struct pt_regs * regs ,
const struct switch_stack * sw ,
struct ia64_sal_os_state * sos ,
const char * type )
{
2006-02-08 13:41:04 +11:00
char * p ;
2005-09-11 17:22:53 +10:00
ia64_va va ;
extern char ia64_leave_kernel [ ] ; /* Need asm address, not function descriptor */
const pal_min_state_area_t * ms = sos - > pal_min_state ;
2006-07-03 00:25:41 -07:00
struct task_struct * previous_current ;
2005-09-11 17:22:53 +10:00
struct pt_regs * old_regs ;
struct switch_stack * old_sw ;
unsigned size = sizeof ( struct pt_regs ) +
sizeof ( struct switch_stack ) + 16 ;
2009-05-22 13:49:49 -07:00
unsigned long * old_bspstore , * old_bsp ;
unsigned long * new_bspstore , * new_bsp ;
unsigned long old_unat , old_rnat , new_rnat , nat ;
2005-09-11 17:22:53 +10:00
u64 slots , loadrs = regs - > loadrs ;
u64 r12 = ms - > pmsa_gr [ 12 - 1 ] , r13 = ms - > pmsa_gr [ 13 - 1 ] ;
u64 ar_bspstore = regs - > ar_bspstore ;
u64 ar_bsp = regs - > ar_bspstore + ( loadrs > > 16 ) ;
const char * msg ;
int cpu = smp_processor_id ( ) ;
previous_current = curr_task ( cpu ) ;
2016-09-20 20:29:40 +02:00
ia64_set_curr_task ( cpu , current ) ;
2005-09-11 17:22:53 +10:00
if ( ( p = strchr ( current - > comm , ' ' ) ) )
* p = ' \0 ' ;
/* Best effort attempt to cope with MCA/INIT delivered while in
* physical mode .
*/
regs - > cr_ipsr = ms - > pmsa_ipsr ;
if ( ia64_psr ( regs ) - > dt = = 0 ) {
va . l = r12 ;
if ( va . f . reg = = 0 ) {
va . f . reg = 7 ;
r12 = va . l ;
}
va . l = r13 ;
if ( va . f . reg = = 0 ) {
va . f . reg = 7 ;
r13 = va . l ;
}
}
if ( ia64_psr ( regs ) - > rt = = 0 ) {
va . l = ar_bspstore ;
if ( va . f . reg = = 0 ) {
va . f . reg = 7 ;
ar_bspstore = va . l ;
}
va . l = ar_bsp ;
if ( va . f . reg = = 0 ) {
va . f . reg = 7 ;
ar_bsp = va . l ;
}
}
/* mca_asm.S ia64_old_stack() cannot assume that the dirty registers
* have been copied to the old stack , the old stack may fail the
* validation tests below . So ia64_old_stack ( ) must restore the dirty
* registers from the new stack . The old and new bspstore probably
* have different alignments , so loadrs calculated on the old bsp
* cannot be used to restore from the new bsp . Calculate a suitable
* loadrs for the new stack and save it in the new pt_regs , where
* ia64_old_stack ( ) can get it .
*/
2009-05-22 13:49:49 -07:00
old_bspstore = ( unsigned long * ) ar_bspstore ;
old_bsp = ( unsigned long * ) ar_bsp ;
2005-09-11 17:22:53 +10:00
slots = ia64_rse_num_regs ( old_bspstore , old_bsp ) ;
2009-05-22 13:49:49 -07:00
new_bspstore = ( unsigned long * ) ( ( u64 ) current + IA64_RBS_OFFSET ) ;
2005-09-11 17:22:53 +10:00
new_bsp = ia64_rse_skip_regs ( new_bspstore , slots ) ;
regs - > loadrs = ( new_bsp - new_bspstore ) * 8 < < 16 ;
/* Verify the previous stack state before we change it */
if ( user_mode ( regs ) ) {
msg = " occurred in user space " ;
2006-02-08 13:41:04 +11:00
/* previous_current is guaranteed to be valid when the task was
* in user space , so . . .
*/
ia64_mca_modify_comm ( previous_current ) ;
2005-09-11 17:22:53 +10:00
goto no_mod ;
}
2006-03-24 09:49:52 -08:00
2007-05-18 17:17:17 -05:00
if ( r13 ! = sos - > prev_IA64_KR_CURRENT ) {
msg = " inconsistent previous current and r13 " ;
goto no_mod ;
}
2006-03-24 09:49:52 -08:00
if ( ! mca_recover_range ( ms - > pmsa_iip ) ) {
if ( ( r12 - r13 ) > = KERNEL_STACK_SIZE ) {
msg = " inconsistent r12 and r13 " ;
goto no_mod ;
}
if ( ( ar_bspstore - r13 ) > = KERNEL_STACK_SIZE ) {
msg = " inconsistent ar.bspstore and r13 " ;
goto no_mod ;
}
va . p = old_bspstore ;
if ( va . f . reg < 5 ) {
msg = " old_bspstore is in the wrong region " ;
goto no_mod ;
}
if ( ( ar_bsp - r13 ) > = KERNEL_STACK_SIZE ) {
msg = " inconsistent ar.bsp and r13 " ;
goto no_mod ;
}
size + = ( ia64_rse_skip_regs ( old_bspstore , slots ) - old_bspstore ) * 8 ;
if ( ar_bspstore + size > r12 ) {
msg = " no room for blocked state " ;
goto no_mod ;
}
2005-09-11 17:22:53 +10:00
}
2006-02-08 13:41:04 +11:00
ia64_mca_modify_comm ( previous_current ) ;
2005-09-11 17:22:53 +10:00
/* Make the original task look blocked. First stack a struct pt_regs,
* describing the state at the time of interrupt . mca_asm . S built a
* partial pt_regs , copy it and fill in the blanks using minstate .
*/
p = ( char * ) r12 - sizeof ( * regs ) ;
old_regs = ( struct pt_regs * ) p ;
memcpy ( old_regs , regs , sizeof ( * regs ) ) ;
old_regs - > loadrs = loadrs ;
old_unat = old_regs - > ar_unat ;
[IA64] Save I-resources to ia64_sal_os_state
This is a patch related to this discussion.
http://www.spinics.net/lists/linux-ia64/msg07605.html
When INIT is sent, ip/psr/pfs register is stored to the I-resources
(iip/ipsr/ifs registers), and they are copied in the min-state save
area(pmsa_{iip,ipsr,ifs}).
Therefore, in creating pt_regs at ia64_mca_modify_original_stack(),
cr_{iip,ipsr,ifs} should be derived from pmsa_{iip,ipsr,ifs}. But
current code copies pmsa_{xip,xpsr,xfs} to cr_{iip,ipsr,ifs}
when PSR.ic is 0.
finish_pt_regs(struct pt_regs *regs, const pal_min_state_area_t *ms,
unsigned long *nat)
{
(snip)
if (ia64_psr(regs)->ic) {
regs->cr_iip = ms->pmsa_iip;
regs->cr_ipsr = ms->pmsa_ipsr;
regs->cr_ifs = ms->pmsa_ifs;
} else {
regs->cr_iip = ms->pmsa_xip;
regs->cr_ipsr = ms->pmsa_xpsr;
regs->cr_ifs = ms->pmsa_xfs;
}
It's ok when PSR.ic is not 0. But when PSR.ic is 0, this could be
a problem when we investigate kernel as the value of regs->cr_iip does
not point to where INIT really interrupted.
At first I tried to change finish_pt_regs() so that it uses always
pmsa_{iip,ipsr,ifs} for cr_{iip,ipsr,ifs}, but Keith Owens pointed out
it could cause another problem if I change it.
>The only problem I can think of is an MCA/INIT
>arriving while code like SAVE_MIN or SAVE_REST is executing. Back
>tracing at that point using pmsa_iip is going to be a problem, you have
>no idea what state the registers or stack are in.
I confirmed he was right, so I decided to keep it as-is and to
save pmsa_{iip,ipsr,ifs} to ia64_sal_os_state for debugging.
An attached patch is just adding new members into ia64_sal_os_state to
save pmsa_{iip,ipsr,ifs}.
Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2009-11-19 16:39:22 -05:00
finish_pt_regs ( old_regs , sos , & old_unat ) ;
2005-09-11 17:22:53 +10:00
/* Next stack a struct switch_stack. mca_asm.S built a partial
* switch_stack , copy it and fill in the blanks using pt_regs and
* minstate .
*
* In the synthesized switch_stack , b0 points to ia64_leave_kernel ,
* ar . pfs is set to 0.
*
* unwind . c : : unw_unwind ( ) does special processing for interrupt frames .
* It checks if the PRED_NON_SYSCALL predicate is set , if the predicate
* is clear then unw_unwind ( ) does _not_ adjust bsp over pt_regs . Not
* that this is documented , of course . Set PRED_NON_SYSCALL in the
* switch_stack on the original stack so it will unwind correctly when
* unwind . c reads pt_regs .
*
* thread . ksp is updated to point to the synthesized switch_stack .
*/
p - = sizeof ( struct switch_stack ) ;
old_sw = ( struct switch_stack * ) p ;
memcpy ( old_sw , sw , sizeof ( * sw ) ) ;
old_sw - > caller_unat = old_unat ;
old_sw - > ar_fpsr = old_regs - > ar_fpsr ;
copy_reg ( & ms - > pmsa_gr [ 4 - 1 ] , ms - > pmsa_nat_bits , & old_sw - > r4 , & old_unat ) ;
copy_reg ( & ms - > pmsa_gr [ 5 - 1 ] , ms - > pmsa_nat_bits , & old_sw - > r5 , & old_unat ) ;
copy_reg ( & ms - > pmsa_gr [ 6 - 1 ] , ms - > pmsa_nat_bits , & old_sw - > r6 , & old_unat ) ;
copy_reg ( & ms - > pmsa_gr [ 7 - 1 ] , ms - > pmsa_nat_bits , & old_sw - > r7 , & old_unat ) ;
old_sw - > b0 = ( u64 ) ia64_leave_kernel ;
old_sw - > b1 = ms - > pmsa_br1 ;
old_sw - > ar_pfs = 0 ;
old_sw - > ar_unat = old_unat ;
old_sw - > pr = old_regs - > pr | ( 1UL < < PRED_NON_SYSCALL ) ;
previous_current - > thread . ksp = ( u64 ) p - 16 ;
/* Finally copy the original stack's registers back to its RBS.
* Registers from ar . bspstore through ar . bsp at the time of the event
* are in the current RBS , copy them back to the original stack . The
* copy must be done register by register because the original bspstore
* and the current one have different alignments , so the saved RNAT
* data occurs at different places .
*
* mca_asm does cover , so the old_bsp already includes all registers at
* the time of MCA / INIT . It also does flushrs , so all registers before
* this function have been written to backing store on the MCA / INIT
* stack .
*/
new_rnat = ia64_get_rnat ( ia64_rse_rnat_addr ( new_bspstore ) ) ;
old_rnat = regs - > ar_rnat ;
while ( slots - - ) {
if ( ia64_rse_is_rnat_slot ( new_bspstore ) ) {
new_rnat = ia64_get_rnat ( new_bspstore + + ) ;
}
if ( ia64_rse_is_rnat_slot ( old_bspstore ) ) {
* old_bspstore + + = old_rnat ;
old_rnat = 0 ;
}
nat = ( new_rnat > > ia64_rse_slot_num ( new_bspstore ) ) & 1UL ;
old_rnat & = ~ ( 1UL < < ia64_rse_slot_num ( old_bspstore ) ) ;
old_rnat | = ( nat < < ia64_rse_slot_num ( old_bspstore ) ) ;
* old_bspstore + + = * new_bspstore + + ;
}
old_sw - > ar_bspstore = ( unsigned long ) old_bspstore ;
old_sw - > ar_rnat = old_rnat ;
sos - > prev_task = previous_current ;
return previous_current ;
no_mod :
2008-11-17 10:18:08 +09:00
mprintk ( KERN_INFO " cpu %d, %s %s, original stack not modified \n " ,
2005-09-11 17:22:53 +10:00
smp_processor_id ( ) , type , msg ) ;
2009-10-01 17:55:16 -04:00
old_unat = regs - > ar_unat ;
[IA64] Save I-resources to ia64_sal_os_state
This is a patch related to this discussion.
http://www.spinics.net/lists/linux-ia64/msg07605.html
When INIT is sent, ip/psr/pfs register is stored to the I-resources
(iip/ipsr/ifs registers), and they are copied in the min-state save
area(pmsa_{iip,ipsr,ifs}).
Therefore, in creating pt_regs at ia64_mca_modify_original_stack(),
cr_{iip,ipsr,ifs} should be derived from pmsa_{iip,ipsr,ifs}. But
current code copies pmsa_{xip,xpsr,xfs} to cr_{iip,ipsr,ifs}
when PSR.ic is 0.
finish_pt_regs(struct pt_regs *regs, const pal_min_state_area_t *ms,
unsigned long *nat)
{
(snip)
if (ia64_psr(regs)->ic) {
regs->cr_iip = ms->pmsa_iip;
regs->cr_ipsr = ms->pmsa_ipsr;
regs->cr_ifs = ms->pmsa_ifs;
} else {
regs->cr_iip = ms->pmsa_xip;
regs->cr_ipsr = ms->pmsa_xpsr;
regs->cr_ifs = ms->pmsa_xfs;
}
It's ok when PSR.ic is not 0. But when PSR.ic is 0, this could be
a problem when we investigate kernel as the value of regs->cr_iip does
not point to where INIT really interrupted.
At first I tried to change finish_pt_regs() so that it uses always
pmsa_{iip,ipsr,ifs} for cr_{iip,ipsr,ifs}, but Keith Owens pointed out
it could cause another problem if I change it.
>The only problem I can think of is an MCA/INIT
>arriving while code like SAVE_MIN or SAVE_REST is executing. Back
>tracing at that point using pmsa_iip is going to be a problem, you have
>no idea what state the registers or stack are in.
I confirmed he was right, so I decided to keep it as-is and to
save pmsa_{iip,ipsr,ifs} to ia64_sal_os_state for debugging.
An attached patch is just adding new members into ia64_sal_os_state to
save pmsa_{iip,ipsr,ifs}.
Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2009-11-19 16:39:22 -05:00
finish_pt_regs ( regs , sos , & old_unat ) ;
2005-09-11 17:22:53 +10:00
return previous_current ;
}
/* The monarch/slave interaction is based on monarch_cpu and requires that all
* slaves have entered rendezvous before the monarch leaves . If any cpu has
* not entered rendezvous yet then wait a bit . The assumption is that any
* slave that has not rendezvoused after a reasonable time is never going to do
* so . In this context , slave includes cpus that respond to the MCA rendezvous
* interrupt , as well as cpus that receive the INIT slave event .
*/
static void
2006-04-11 14:59:41 +10:00
ia64_wait_for_slaves ( int monarch , const char * type )
2005-09-11 17:22:53 +10:00
{
2007-09-20 13:59:12 -05:00
int c , i , wait ;
/*
* wait 5 seconds total for slaves ( arbitrary )
*/
for ( i = 0 ; i < 5000 ; i + + ) {
wait = 0 ;
for_each_online_cpu ( c ) {
if ( c = = monarch )
continue ;
if ( ia64_mc_info . imi_rendez_checkin [ c ]
= = IA64_MCA_RENDEZ_CHECKIN_NOTDONE ) {
udelay ( 1000 ) ; /* short wait */
wait = 1 ;
break ;
}
2005-09-11 17:22:53 +10:00
}
2007-09-20 13:59:12 -05:00
if ( ! wait )
goto all_in ;
2005-09-11 17:22:53 +10:00
}
2007-09-20 13:59:12 -05:00
2006-09-26 14:44:37 -07:00
/*
* Maybe slave ( s ) dead . Print buffered messages immediately .
*/
ia64_mlogbuf_finish ( 0 ) ;
mprintk ( KERN_INFO " OS %s slave did not rendezvous on cpu " , type ) ;
2006-02-08 13:40:59 +11:00
for_each_online_cpu ( c ) {
if ( c = = monarch )
continue ;
if ( ia64_mc_info . imi_rendez_checkin [ c ] = = IA64_MCA_RENDEZ_CHECKIN_NOTDONE )
2006-09-26 14:44:37 -07:00
mprintk ( " %d " , c ) ;
2006-02-08 13:40:59 +11:00
}
2006-09-26 14:44:37 -07:00
mprintk ( " \n " ) ;
2006-02-08 13:40:59 +11:00
return ;
all_in :
2006-09-26 14:44:37 -07:00
mprintk ( KERN_INFO " All OS %s slaves have reached rendezvous \n " , type ) ;
2006-02-08 13:40:59 +11:00
return ;
2005-09-11 17:22:53 +10:00
}
2008-04-03 11:02:58 -07:00
/* mca_insert_tr
*
* Switch rid when TR reload and needed !
* iord : 1 : itr , 2 : itr ;
*
*/
static void mca_insert_tr ( u64 iord )
{
int i ;
u64 old_rr ;
struct ia64_tr_entry * p ;
unsigned long psr ;
int cpu = smp_processor_id ( ) ;
2010-01-07 16:10:57 -08:00
if ( ! ia64_idtrs [ cpu ] )
return ;
2008-04-03 11:02:58 -07:00
psr = ia64_clear_ic ( ) ;
for ( i = IA64_TR_ALLOC_BASE ; i < IA64_TR_ALLOC_MAX ; i + + ) {
2010-01-07 16:10:57 -08:00
p = ia64_idtrs [ cpu ] + ( iord - 1 ) * IA64_TR_ALLOC_MAX ;
2008-04-03 11:02:58 -07:00
if ( p - > pte & 0x1 ) {
old_rr = ia64_get_rr ( p - > ifa ) ;
if ( old_rr ! = p - > rr ) {
ia64_set_rr ( p - > ifa , p - > rr ) ;
ia64_srlz_d ( ) ;
}
ia64_ptr ( iord , p - > ifa , p - > itir > > 2 ) ;
ia64_srlz_i ( ) ;
if ( iord & 0x1 ) {
ia64_itr ( 0x1 , i , p - > ifa , p - > pte , p - > itir > > 2 ) ;
ia64_srlz_i ( ) ;
}
if ( iord & 0x2 ) {
ia64_itr ( 0x2 , i , p - > ifa , p - > pte , p - > itir > > 2 ) ;
ia64_srlz_i ( ) ;
}
if ( old_rr ! = p - > rr ) {
ia64_set_rr ( p - > ifa , old_rr ) ;
ia64_srlz_d ( ) ;
}
}
}
ia64_set_psr ( psr ) ;
}
2005-04-16 15:20:36 -07:00
/*
2005-09-11 17:22:53 +10:00
* ia64_mca_handler
2005-04-16 15:20:36 -07:00
*
* This is uncorrectable machine check handler called from OS_MCA
* dispatch code which is in turn called from SAL_CHECK ( ) .
* This is the place where the core of OS MCA handling is done .
* Right now the logs are extracted and displayed in a well - defined
* format . This handler code is supposed to be run only on the
* monarch processor . Once the monarch is done with MCA handling
* further MCA logging is enabled by clearing logs .
* Monarch also has the duty of sending wakeup - IPIs to pull the
* slave processors out of rendezvous spinloop .
2007-05-18 17:17:17 -05:00
*
* If multiple processors call into OS_MCA , the first will become
* the monarch . Subsequent cpus will be recorded in the mca_cpu
* bitmask . After the first monarch has processed its MCA , it
* will wake up the next cpu in the mca_cpu bitmask and then go
* into the rendezvous loop . When all processors have serviced
* their MCA , the last monarch frees up the rest of the processors .
2005-04-16 15:20:36 -07:00
*/
void
2005-09-11 17:22:53 +10:00
ia64_mca_handler ( struct pt_regs * regs , struct switch_stack * sw ,
struct ia64_sal_os_state * sos )
2005-04-16 15:20:36 -07:00
{
2005-09-11 17:22:53 +10:00
int recover , cpu = smp_processor_id ( ) ;
2006-07-03 00:25:41 -07:00
struct task_struct * previous_current ;
2006-04-03 15:26:12 +10:00
struct ia64_mca_notify_die nd =
2008-04-17 17:00:37 +09:00
{ . sos = sos , . monarch_cpu = & monarch_cpu , . data = & recover } ;
2007-05-18 17:17:17 -05:00
static atomic_t mca_count ;
static cpumask_t mca_cpu ;
2005-09-11 17:22:53 +10:00
2007-05-18 17:17:17 -05:00
if ( atomic_add_return ( 1 , & mca_count ) = = 1 ) {
monarch_cpu = cpu ;
sos - > monarch = 1 ;
} else {
2015-03-05 10:49:16 +10:30
cpumask_set_cpu ( cpu , & mca_cpu ) ;
2007-05-18 17:17:17 -05:00
sos - > monarch = 0 ;
}
2006-09-26 14:44:37 -07:00
mprintk ( KERN_INFO " Entered OS MCA handler. PSP=%lx cpu=%d "
" monarch=%ld \n " , sos - > proc_state_param , cpu , sos - > monarch ) ;
2006-02-08 13:40:59 +11:00
2005-09-11 17:22:53 +10:00
previous_current = ia64_mca_modify_original_stack ( regs , sw , sos , " MCA " ) ;
2007-05-18 17:17:17 -05:00
2008-04-17 17:00:37 +09:00
NOTIFY_MCA ( DIE_MCA_MONARCH_ENTER , regs , ( long ) & nd , 1 ) ;
2007-09-19 16:58:31 -05:00
ia64_mc_info . imi_rendez_checkin [ cpu ] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA ;
2007-05-18 17:17:17 -05:00
if ( sos - > monarch ) {
ia64_wait_for_slaves ( cpu , " MCA " ) ;
2007-09-19 16:58:31 -05:00
/* Wakeup all the processors which are spinning in the
* rendezvous loop . They will leave SAL , then spin in the OS
* with interrupts disabled until this monarch cpu leaves the
* MCA handler . That gets control back to the OS so we can
* backtrace the other cpus , backtrace when spinning in SAL
* does not work .
*/
ia64_mca_wakeup_all ( ) ;
2007-05-18 17:17:17 -05:00
} else {
2015-03-05 10:49:16 +10:30
while ( cpumask_test_cpu ( cpu , & mca_cpu ) )
2007-05-18 17:17:17 -05:00
cpu_relax ( ) ; /* spin until monarch wakes us */
2008-04-17 16:59:52 +09:00
}
2008-04-17 17:00:37 +09:00
NOTIFY_MCA ( DIE_MCA_MONARCH_PROCESS , regs , ( long ) & nd , 1 ) ;
2005-09-11 17:22:53 +10:00
2005-04-16 15:20:36 -07:00
/* Get the MCA error record and log it */
ia64_mca_log_sal_error_record ( SAL_INFO_TYPE_MCA ) ;
2006-12-14 16:01:41 -06:00
/* MCA error recovery */
recover = ( ia64_mca_ucmc_extension
2005-04-16 15:20:36 -07:00
& & ia64_mca_ucmc_extension (
IA64_LOG_CURR_BUFFER ( SAL_INFO_TYPE_MCA ) ,
2005-09-11 17:22:53 +10:00
sos ) ) ;
2005-04-16 15:20:36 -07:00
if ( recover ) {
sal_log_record_header_t * rh = IA64_LOG_CURR_BUFFER ( SAL_INFO_TYPE_MCA ) ;
rh - > severity = sal_log_severity_corrected ;
ia64_sal_clear_state_info ( SAL_INFO_TYPE_MCA ) ;
2005-09-11 17:22:53 +10:00
sos - > os_status = IA64_MCA_CORRECTED ;
2006-09-26 14:44:37 -07:00
} else {
/* Dump buffered message to console */
ia64_mlogbuf_finish ( 1 ) ;
2005-04-16 15:20:36 -07:00
}
2008-04-08 13:31:47 +09:00
ia64: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
At the end of the patch set all uses of __get_cpu_var have been removed so
the macro is removed too.
The patch set includes passes over all arches as well. Once these operations
are used throughout then specialized macros can be defined in non -x86
arches as well in order to optimize per cpu access by f.e. using a global
register that may be set to the per cpu base.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: linux-ia64@vger.kernel.org
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:47 -05:00
if ( __this_cpu_read ( ia64_mca_tr_reload ) ) {
2008-04-03 11:02:58 -07:00
mca_insert_tr ( 0x1 ) ; /*Reload dynamic itrs*/
mca_insert_tr ( 0x2 ) ; /*Reload dynamic itrs*/
}
2008-04-17 10:14:51 -07:00
2008-04-17 17:00:37 +09:00
NOTIFY_MCA ( DIE_MCA_MONARCH_LEAVE , regs , ( long ) & nd , 1 ) ;
2005-04-16 15:20:36 -07:00
2007-05-18 17:17:17 -05:00
if ( atomic_dec_return ( & mca_count ) > 0 ) {
int i ;
/* wake up the next monarch cpu,
* and put this cpu in the rendez loop .
*/
for_each_online_cpu ( i ) {
2015-03-05 10:49:16 +10:30
if ( cpumask_test_cpu ( i , & mca_cpu ) ) {
2007-05-18 17:17:17 -05:00
monarch_cpu = i ;
2015-03-05 10:49:16 +10:30
cpumask_clear_cpu ( i , & mca_cpu ) ; /* wake next cpu */
2007-05-18 17:17:17 -05:00
while ( monarch_cpu ! = - 1 )
cpu_relax ( ) ; /* spin until last cpu leaves */
2016-09-20 20:29:40 +02:00
ia64_set_curr_task ( cpu , previous_current ) ;
2007-09-19 16:58:31 -05:00
ia64_mc_info . imi_rendez_checkin [ cpu ]
= IA64_MCA_RENDEZ_CHECKIN_NOTDONE ;
2007-05-18 17:17:17 -05:00
return ;
}
}
}
2016-09-20 20:29:40 +02:00
ia64_set_curr_task ( cpu , previous_current ) ;
2007-09-19 16:58:31 -05:00
ia64_mc_info . imi_rendez_checkin [ cpu ] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE ;
monarch_cpu = - 1 ; /* This frees the slaves and previous monarchs */
2005-04-16 15:20:36 -07:00
}
2006-12-05 19:36:26 +00:00
static DECLARE_WORK ( cmc_disable_work , ia64_mca_cmc_vector_disable_keventd ) ;
static DECLARE_WORK ( cmc_enable_work , ia64_mca_cmc_vector_enable_keventd ) ;
2005-04-16 15:20:36 -07:00
/*
* ia64_mca_cmc_int_handler
*
* This is corrected machine check interrupt handler .
* Right now the logs are extracted and displayed in a well - defined
* format .
*
* Inputs
* interrupt number
* client data arg ptr
*
* Outputs
* None
*/
static irqreturn_t
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers
Maintain a per-CPU global "struct pt_regs *" variable which can be used instead
of passing regs around manually through all ~1800 interrupt handlers in the
Linux kernel.
The regs pointer is used in few places, but it potentially costs both stack
space and code to pass it around. On the FRV arch, removing the regs parameter
from all the genirq function results in a 20% speed up of the IRQ exit path
(ie: from leaving timer_interrupt() to leaving do_IRQ()).
Where appropriate, an arch may override the generic storage facility and do
something different with the variable. On FRV, for instance, the address is
maintained in GR28 at all times inside the kernel as part of general exception
handling.
Having looked over the code, it appears that the parameter may be handed down
through up to twenty or so layers of functions. Consider a USB character
device attached to a USB hub, attached to a USB controller that posts its
interrupts through a cascaded auxiliary interrupt controller. A character
device driver may want to pass regs to the sysrq handler through the input
layer which adds another few layers of parameter passing.
I've build this code with allyesconfig for x86_64 and i386. I've runtested the
main part of the code on FRV and i386, though I can't test most of the drivers.
I've also done partial conversion for powerpc and MIPS - these at least compile
with minimal configurations.
This will affect all archs. Mostly the changes should be relatively easy.
Take do_IRQ(), store the regs pointer at the beginning, saving the old one:
struct pt_regs *old_regs = set_irq_regs(regs);
And put the old one back at the end:
set_irq_regs(old_regs);
Don't pass regs through to generic_handle_irq() or __do_IRQ().
In timer_interrupt(), this sort of change will be necessary:
- update_process_times(user_mode(regs));
- profile_tick(CPU_PROFILING, regs);
+ update_process_times(user_mode(get_irq_regs()));
+ profile_tick(CPU_PROFILING);
I'd like to move update_process_times()'s use of get_irq_regs() into itself,
except that i386, alone of the archs, uses something other than user_mode().
Some notes on the interrupt handling in the drivers:
(*) input_dev() is now gone entirely. The regs pointer is no longer stored in
the input_dev struct.
(*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does
something different depending on whether it's been supplied with a regs
pointer or not.
(*) Various IRQ handler function pointers have been moved to type
irq_handler_t.
Signed-Off-By: David Howells <dhowells@redhat.com>
(cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 14:55:46 +01:00
ia64_mca_cmc_int_handler ( int cmc_irq , void * arg )
2005-04-16 15:20:36 -07:00
{
static unsigned long cmc_history [ CMC_HISTORY_LENGTH ] ;
static int index ;
static DEFINE_SPINLOCK ( cmc_history_lock ) ;
IA64_MCA_DEBUG ( " %s: received interrupt vector = %#x on CPU %d \n " ,
2008-03-04 15:15:00 -08:00
__func__ , cmc_irq , smp_processor_id ( ) ) ;
2005-04-16 15:20:36 -07:00
/* SAL spec states this should run w/ interrupts enabled */
local_irq_enable ( ) ;
spin_lock ( & cmc_history_lock ) ;
if ( ! cmc_polling_enabled ) {
int i , count = 1 ; /* we know 1 happened now */
unsigned long now = jiffies ;
for ( i = 0 ; i < CMC_HISTORY_LENGTH ; i + + ) {
if ( now - cmc_history [ i ] < = HZ )
count + + ;
}
IA64_MCA_DEBUG ( KERN_INFO " CMC threshold %d/%d \n " , count , CMC_HISTORY_LENGTH ) ;
if ( count > = CMC_HISTORY_LENGTH ) {
cmc_polling_enabled = 1 ;
spin_unlock ( & cmc_history_lock ) ;
2005-10-05 11:02:06 -06:00
/* If we're being hit with CMC interrupts, we won't
* ever execute the schedule_work ( ) below . Need to
* disable CMC interrupts on this processor now .
*/
ia64_mca_cmc_vector_disable ( NULL ) ;
2005-04-16 15:20:36 -07:00
schedule_work ( & cmc_disable_work ) ;
/*
* Corrected errors will still be corrected , but
* make sure there ' s a log somewhere that indicates
* something is generating more than we can handle .
*/
printk ( KERN_WARNING " WARNING: Switching to polling CMC handler; error records may be lost \n " ) ;
mod_timer ( & cmc_poll_timer , jiffies + CMC_POLL_INTERVAL ) ;
/* lock already released, get out now */
2006-09-26 15:27:56 -07:00
goto out ;
2005-04-16 15:20:36 -07:00
} else {
cmc_history [ index + + ] = now ;
if ( index = = CMC_HISTORY_LENGTH )
index = 0 ;
}
}
spin_unlock ( & cmc_history_lock ) ;
2006-09-26 15:27:56 -07:00
out :
/* Get the CMC error record and log it */
ia64_mca_log_sal_error_record ( SAL_INFO_TYPE_CMC ) ;
2012-02-08 15:32:13 +01:00
local_irq_disable ( ) ;
2005-04-16 15:20:36 -07:00
return IRQ_HANDLED ;
}
/*
* ia64_mca_cmc_int_caller
*
* Triggered by sw interrupt from CMC polling routine . Calls
* real interrupt handler and either triggers a sw interrupt
* on the next cpu or does cleanup at the end .
*
* Inputs
* interrupt number
* client data arg ptr
* Outputs
* handled
*/
static irqreturn_t
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers
Maintain a per-CPU global "struct pt_regs *" variable which can be used instead
of passing regs around manually through all ~1800 interrupt handlers in the
Linux kernel.
The regs pointer is used in few places, but it potentially costs both stack
space and code to pass it around. On the FRV arch, removing the regs parameter
from all the genirq function results in a 20% speed up of the IRQ exit path
(ie: from leaving timer_interrupt() to leaving do_IRQ()).
Where appropriate, an arch may override the generic storage facility and do
something different with the variable. On FRV, for instance, the address is
maintained in GR28 at all times inside the kernel as part of general exception
handling.
Having looked over the code, it appears that the parameter may be handed down
through up to twenty or so layers of functions. Consider a USB character
device attached to a USB hub, attached to a USB controller that posts its
interrupts through a cascaded auxiliary interrupt controller. A character
device driver may want to pass regs to the sysrq handler through the input
layer which adds another few layers of parameter passing.
I've build this code with allyesconfig for x86_64 and i386. I've runtested the
main part of the code on FRV and i386, though I can't test most of the drivers.
I've also done partial conversion for powerpc and MIPS - these at least compile
with minimal configurations.
This will affect all archs. Mostly the changes should be relatively easy.
Take do_IRQ(), store the regs pointer at the beginning, saving the old one:
struct pt_regs *old_regs = set_irq_regs(regs);
And put the old one back at the end:
set_irq_regs(old_regs);
Don't pass regs through to generic_handle_irq() or __do_IRQ().
In timer_interrupt(), this sort of change will be necessary:
- update_process_times(user_mode(regs));
- profile_tick(CPU_PROFILING, regs);
+ update_process_times(user_mode(get_irq_regs()));
+ profile_tick(CPU_PROFILING);
I'd like to move update_process_times()'s use of get_irq_regs() into itself,
except that i386, alone of the archs, uses something other than user_mode().
Some notes on the interrupt handling in the drivers:
(*) input_dev() is now gone entirely. The regs pointer is no longer stored in
the input_dev struct.
(*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does
something different depending on whether it's been supplied with a regs
pointer or not.
(*) Various IRQ handler function pointers have been moved to type
irq_handler_t.
Signed-Off-By: David Howells <dhowells@redhat.com>
(cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 14:55:46 +01:00
ia64_mca_cmc_int_caller ( int cmc_irq , void * arg )
2005-04-16 15:20:36 -07:00
{
static int start_count = - 1 ;
unsigned int cpuid ;
cpuid = smp_processor_id ( ) ;
/* If first cpu, update count */
if ( start_count = = - 1 )
start_count = IA64_LOG_COUNT ( SAL_INFO_TYPE_CMC ) ;
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers
Maintain a per-CPU global "struct pt_regs *" variable which can be used instead
of passing regs around manually through all ~1800 interrupt handlers in the
Linux kernel.
The regs pointer is used in few places, but it potentially costs both stack
space and code to pass it around. On the FRV arch, removing the regs parameter
from all the genirq function results in a 20% speed up of the IRQ exit path
(ie: from leaving timer_interrupt() to leaving do_IRQ()).
Where appropriate, an arch may override the generic storage facility and do
something different with the variable. On FRV, for instance, the address is
maintained in GR28 at all times inside the kernel as part of general exception
handling.
Having looked over the code, it appears that the parameter may be handed down
through up to twenty or so layers of functions. Consider a USB character
device attached to a USB hub, attached to a USB controller that posts its
interrupts through a cascaded auxiliary interrupt controller. A character
device driver may want to pass regs to the sysrq handler through the input
layer which adds another few layers of parameter passing.
I've build this code with allyesconfig for x86_64 and i386. I've runtested the
main part of the code on FRV and i386, though I can't test most of the drivers.
I've also done partial conversion for powerpc and MIPS - these at least compile
with minimal configurations.
This will affect all archs. Mostly the changes should be relatively easy.
Take do_IRQ(), store the regs pointer at the beginning, saving the old one:
struct pt_regs *old_regs = set_irq_regs(regs);
And put the old one back at the end:
set_irq_regs(old_regs);
Don't pass regs through to generic_handle_irq() or __do_IRQ().
In timer_interrupt(), this sort of change will be necessary:
- update_process_times(user_mode(regs));
- profile_tick(CPU_PROFILING, regs);
+ update_process_times(user_mode(get_irq_regs()));
+ profile_tick(CPU_PROFILING);
I'd like to move update_process_times()'s use of get_irq_regs() into itself,
except that i386, alone of the archs, uses something other than user_mode().
Some notes on the interrupt handling in the drivers:
(*) input_dev() is now gone entirely. The regs pointer is no longer stored in
the input_dev struct.
(*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does
something different depending on whether it's been supplied with a regs
pointer or not.
(*) Various IRQ handler function pointers have been moved to type
irq_handler_t.
Signed-Off-By: David Howells <dhowells@redhat.com>
(cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 14:55:46 +01:00
ia64_mca_cmc_int_handler ( cmc_irq , arg ) ;
2005-04-16 15:20:36 -07:00
2009-03-16 14:12:42 +10:30
cpuid = cpumask_next ( cpuid + 1 , cpu_online_mask ) ;
2005-04-16 15:20:36 -07:00
2009-03-16 14:12:42 +10:30
if ( cpuid < nr_cpu_ids ) {
2005-04-16 15:20:36 -07:00
platform_send_ipi ( cpuid , IA64_CMCP_VECTOR , IA64_IPI_DM_INT , 0 ) ;
} else {
/* If no log record, switch out of polling mode */
if ( start_count = = IA64_LOG_COUNT ( SAL_INFO_TYPE_CMC ) ) {
printk ( KERN_WARNING " Returning to interrupt driven CMC handler \n " ) ;
schedule_work ( & cmc_enable_work ) ;
cmc_polling_enabled = 0 ;
} else {
mod_timer ( & cmc_poll_timer , jiffies + CMC_POLL_INTERVAL ) ;
}
start_count = - 1 ;
}
return IRQ_HANDLED ;
}
/*
* ia64_mca_cmc_poll
*
* Poll for Corrected Machine Checks ( CMCs )
*
* Inputs : dummy ( unused )
* Outputs : None
*
*/
static void
ia64_mca_cmc_poll ( unsigned long dummy )
{
/* Trigger a CMC interrupt cascade */
2012-03-28 14:42:46 -07:00
platform_send_ipi ( cpumask_first ( cpu_online_mask ) , IA64_CMCP_VECTOR ,
IA64_IPI_DM_INT , 0 ) ;
2005-04-16 15:20:36 -07:00
}
/*
* ia64_mca_cpe_int_caller
*
* Triggered by sw interrupt from CPE polling routine . Calls
* real interrupt handler and either triggers a sw interrupt
* on the next cpu or does cleanup at the end .
*
* Inputs
* interrupt number
* client data arg ptr
* Outputs
* handled
*/
# ifdef CONFIG_ACPI
static irqreturn_t
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers
Maintain a per-CPU global "struct pt_regs *" variable which can be used instead
of passing regs around manually through all ~1800 interrupt handlers in the
Linux kernel.
The regs pointer is used in few places, but it potentially costs both stack
space and code to pass it around. On the FRV arch, removing the regs parameter
from all the genirq function results in a 20% speed up of the IRQ exit path
(ie: from leaving timer_interrupt() to leaving do_IRQ()).
Where appropriate, an arch may override the generic storage facility and do
something different with the variable. On FRV, for instance, the address is
maintained in GR28 at all times inside the kernel as part of general exception
handling.
Having looked over the code, it appears that the parameter may be handed down
through up to twenty or so layers of functions. Consider a USB character
device attached to a USB hub, attached to a USB controller that posts its
interrupts through a cascaded auxiliary interrupt controller. A character
device driver may want to pass regs to the sysrq handler through the input
layer which adds another few layers of parameter passing.
I've build this code with allyesconfig for x86_64 and i386. I've runtested the
main part of the code on FRV and i386, though I can't test most of the drivers.
I've also done partial conversion for powerpc and MIPS - these at least compile
with minimal configurations.
This will affect all archs. Mostly the changes should be relatively easy.
Take do_IRQ(), store the regs pointer at the beginning, saving the old one:
struct pt_regs *old_regs = set_irq_regs(regs);
And put the old one back at the end:
set_irq_regs(old_regs);
Don't pass regs through to generic_handle_irq() or __do_IRQ().
In timer_interrupt(), this sort of change will be necessary:
- update_process_times(user_mode(regs));
- profile_tick(CPU_PROFILING, regs);
+ update_process_times(user_mode(get_irq_regs()));
+ profile_tick(CPU_PROFILING);
I'd like to move update_process_times()'s use of get_irq_regs() into itself,
except that i386, alone of the archs, uses something other than user_mode().
Some notes on the interrupt handling in the drivers:
(*) input_dev() is now gone entirely. The regs pointer is no longer stored in
the input_dev struct.
(*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does
something different depending on whether it's been supplied with a regs
pointer or not.
(*) Various IRQ handler function pointers have been moved to type
irq_handler_t.
Signed-Off-By: David Howells <dhowells@redhat.com>
(cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 14:55:46 +01:00
ia64_mca_cpe_int_caller ( int cpe_irq , void * arg )
2005-04-16 15:20:36 -07:00
{
static int start_count = - 1 ;
static int poll_time = MIN_CPE_POLL_INTERVAL ;
unsigned int cpuid ;
cpuid = smp_processor_id ( ) ;
/* If first cpu, update count */
if ( start_count = = - 1 )
start_count = IA64_LOG_COUNT ( SAL_INFO_TYPE_CPE ) ;
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers
Maintain a per-CPU global "struct pt_regs *" variable which can be used instead
of passing regs around manually through all ~1800 interrupt handlers in the
Linux kernel.
The regs pointer is used in few places, but it potentially costs both stack
space and code to pass it around. On the FRV arch, removing the regs parameter
from all the genirq function results in a 20% speed up of the IRQ exit path
(ie: from leaving timer_interrupt() to leaving do_IRQ()).
Where appropriate, an arch may override the generic storage facility and do
something different with the variable. On FRV, for instance, the address is
maintained in GR28 at all times inside the kernel as part of general exception
handling.
Having looked over the code, it appears that the parameter may be handed down
through up to twenty or so layers of functions. Consider a USB character
device attached to a USB hub, attached to a USB controller that posts its
interrupts through a cascaded auxiliary interrupt controller. A character
device driver may want to pass regs to the sysrq handler through the input
layer which adds another few layers of parameter passing.
I've build this code with allyesconfig for x86_64 and i386. I've runtested the
main part of the code on FRV and i386, though I can't test most of the drivers.
I've also done partial conversion for powerpc and MIPS - these at least compile
with minimal configurations.
This will affect all archs. Mostly the changes should be relatively easy.
Take do_IRQ(), store the regs pointer at the beginning, saving the old one:
struct pt_regs *old_regs = set_irq_regs(regs);
And put the old one back at the end:
set_irq_regs(old_regs);
Don't pass regs through to generic_handle_irq() or __do_IRQ().
In timer_interrupt(), this sort of change will be necessary:
- update_process_times(user_mode(regs));
- profile_tick(CPU_PROFILING, regs);
+ update_process_times(user_mode(get_irq_regs()));
+ profile_tick(CPU_PROFILING);
I'd like to move update_process_times()'s use of get_irq_regs() into itself,
except that i386, alone of the archs, uses something other than user_mode().
Some notes on the interrupt handling in the drivers:
(*) input_dev() is now gone entirely. The regs pointer is no longer stored in
the input_dev struct.
(*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does
something different depending on whether it's been supplied with a regs
pointer or not.
(*) Various IRQ handler function pointers have been moved to type
irq_handler_t.
Signed-Off-By: David Howells <dhowells@redhat.com>
(cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 14:55:46 +01:00
ia64_mca_cpe_int_handler ( cpe_irq , arg ) ;
2005-04-16 15:20:36 -07:00
2009-03-16 14:12:42 +10:30
cpuid = cpumask_next ( cpuid + 1 , cpu_online_mask ) ;
2005-04-16 15:20:36 -07:00
if ( cpuid < NR_CPUS ) {
platform_send_ipi ( cpuid , IA64_CPEP_VECTOR , IA64_IPI_DM_INT , 0 ) ;
} else {
/*
* If a log was recorded , increase our polling frequency ,
* otherwise , backoff or return to interrupt mode .
*/
if ( start_count ! = IA64_LOG_COUNT ( SAL_INFO_TYPE_CPE ) ) {
poll_time = max ( MIN_CPE_POLL_INTERVAL , poll_time / 2 ) ;
} else if ( cpe_vector < 0 ) {
poll_time = min ( MAX_CPE_POLL_INTERVAL , poll_time * 2 ) ;
} else {
poll_time = MIN_CPE_POLL_INTERVAL ;
printk ( KERN_WARNING " Returning to interrupt driven CPE handler \n " ) ;
enable_irq ( local_vector_to_irq ( IA64_CPE_VECTOR ) ) ;
cpe_poll_enabled = 0 ;
}
if ( cpe_poll_enabled )
mod_timer ( & cpe_poll_timer , jiffies + poll_time ) ;
start_count = - 1 ;
}
return IRQ_HANDLED ;
}
/*
* ia64_mca_cpe_poll
*
* Poll for Corrected Platform Errors ( CPEs ) , trigger interrupt
* on first cpu , from there it will trickle through all the cpus .
*
* Inputs : dummy ( unused )
* Outputs : None
*
*/
static void
ia64_mca_cpe_poll ( unsigned long dummy )
{
/* Trigger a CPE interrupt cascade */
2012-03-28 14:42:46 -07:00
platform_send_ipi ( cpumask_first ( cpu_online_mask ) , IA64_CPEP_VECTOR ,
IA64_IPI_DM_INT , 0 ) ;
2005-04-16 15:20:36 -07:00
}
2005-05-31 22:34:00 -07:00
# endif /* CONFIG_ACPI */
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-11-07 11:27:13 -08:00
static int
default_monarch_init_process ( struct notifier_block * self , unsigned long val , void * data )
{
int c ;
struct task_struct * g , * t ;
if ( val ! = DIE_INIT_MONARCH_PROCESS )
return NOTIFY_DONE ;
2007-04-03 17:53:42 -07:00
# ifdef CONFIG_KEXEC
if ( atomic_read ( & kdump_in_progress ) )
return NOTIFY_DONE ;
# endif
2006-09-26 14:44:37 -07:00
/*
* FIXME : mlogbuf will brim over with INIT stack dumps .
* To enable show_stack from INIT , we use oops_in_progress which should
* be used in real oops . This would cause something wrong after INIT .
*/
BREAK_LOGLEVEL ( console_loglevel ) ;
ia64_mlogbuf_dump_from_init ( ) ;
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-11-07 11:27:13 -08:00
printk ( KERN_ERR " Processes interrupted by INIT - " ) ;
for_each_online_cpu ( c ) {
struct ia64_sal_os_state * s ;
t = __va ( __per_cpu_mca [ c ] + IA64_MCA_CPU_INIT_STACK_OFFSET ) ;
s = ( struct ia64_sal_os_state * ) ( ( char * ) t + MCA_SOS_OFFSET ) ;
g = s - > prev_task ;
if ( g ) {
if ( g - > pid )
printk ( " %d " , g - > pid ) ;
else
printk ( " %d (cpu %d task 0x%p) " , g - > pid , task_cpu ( g ) , g ) ;
}
}
printk ( " \n \n " ) ;
if ( read_trylock ( & tasklist_lock ) ) {
do_each_thread ( g , t ) {
printk ( " \n Backtrace of pid %d (%s) \n " , t - > pid , t - > comm ) ;
show_stack ( t , NULL ) ;
} while_each_thread ( g , t ) ;
read_unlock ( & tasklist_lock ) ;
}
2006-09-26 14:44:37 -07:00
/* FIXME: This will not restore zapped printk locks. */
RESTORE_LOGLEVEL ( console_loglevel ) ;
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-11-07 11:27:13 -08:00
return NOTIFY_DONE ;
}
2005-04-16 15:20:36 -07:00
/*
* C portion of the OS INIT handler
*
2005-09-11 17:22:53 +10:00
* Called from ia64_os_init_dispatch
2005-04-16 15:20:36 -07:00
*
2005-09-11 17:22:53 +10:00
* Inputs : pointer to pt_regs where processor info was saved . SAL / OS state for
* this event . This code is used for both monarch and slave INIT events , see
* sos - > monarch .
2005-04-16 15:20:36 -07:00
*
2005-09-11 17:22:53 +10:00
* All INIT events switch to the INIT stack and change the previous process to
* blocked status . If one of the INIT events is the monarch then we are
* probably processing the nmi button / command . Use the monarch cpu to dump all
* the processes . The slave INIT events all spin until the monarch cpu
* returns . We can also get INIT slave events for MCA , in which case the MCA
* process is the monarch .
2005-04-16 15:20:36 -07:00
*/
2005-09-11 17:22:53 +10:00
2005-04-16 15:20:36 -07:00
void
2005-09-11 17:22:53 +10:00
ia64_init_handler ( struct pt_regs * regs , struct switch_stack * sw ,
struct ia64_sal_os_state * sos )
2005-04-16 15:20:36 -07:00
{
2005-09-11 17:22:53 +10:00
static atomic_t slaves ;
static atomic_t monarchs ;
2006-07-03 00:25:41 -07:00
struct task_struct * previous_current ;
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-11-07 11:27:13 -08:00
int cpu = smp_processor_id ( ) ;
2006-04-03 15:26:12 +10:00
struct ia64_mca_notify_die nd =
{ . sos = sos , . monarch_cpu = & monarch_cpu } ;
2005-04-16 15:20:36 -07:00
2008-04-17 17:00:37 +09:00
NOTIFY_INIT ( DIE_INIT_ENTER , regs , ( long ) & nd , 0 ) ;
2006-04-03 15:26:12 +10:00
2006-09-26 14:44:37 -07:00
mprintk ( KERN_INFO " Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld \n " ,
2005-09-11 17:22:53 +10:00
sos - > proc_state_param , cpu , sos - > monarch ) ;
salinfo_log_wakeup ( SAL_INFO_TYPE_INIT , NULL , 0 , 0 ) ;
2005-04-16 15:20:36 -07:00
2005-09-11 17:22:53 +10:00
previous_current = ia64_mca_modify_original_stack ( regs , sw , sos , " INIT " ) ;
sos - > os_status = IA64_INIT_RESUME ;
/* FIXME: Workaround for broken proms that drive all INIT events as
* slaves . The last slave that enters is promoted to be a monarch .
* Remove this code in September 2006 , that gives platforms a year to
* fix their proms and get their customers updated .
2005-04-16 15:20:36 -07:00
*/
2005-09-11 17:22:53 +10:00
if ( ! sos - > monarch & & atomic_add_return ( 1 , & slaves ) = = num_online_cpus ( ) ) {
2006-09-26 14:44:37 -07:00
mprintk ( KERN_WARNING " %s: Promoting cpu %d to monarch. \n " ,
2008-03-04 15:15:00 -08:00
__func__ , cpu ) ;
2005-09-11 17:22:53 +10:00
atomic_dec ( & slaves ) ;
sos - > monarch = 1 ;
}
2005-04-16 15:20:36 -07:00
2005-09-11 17:22:53 +10:00
/* FIXME: Workaround for broken proms that drive all INIT events as
* monarchs . Second and subsequent monarchs are demoted to slaves .
* Remove this code in September 2006 , that gives platforms a year to
* fix their proms and get their customers updated .
*/
if ( sos - > monarch & & atomic_add_return ( 1 , & monarchs ) > 1 ) {
2006-09-26 14:44:37 -07:00
mprintk ( KERN_WARNING " %s: Demoting cpu %d to slave. \n " ,
2008-03-04 15:15:00 -08:00
__func__ , cpu ) ;
2005-09-11 17:22:53 +10:00
atomic_dec ( & monarchs ) ;
sos - > monarch = 0 ;
}
if ( ! sos - > monarch ) {
ia64_mc_info . imi_rendez_checkin [ cpu ] = IA64_MCA_RENDEZ_CHECKIN_INIT ;
2009-08-06 14:51:58 -07:00
# ifdef CONFIG_KEXEC
while ( monarch_cpu = = - 1 & & ! atomic_read ( & kdump_in_progress ) )
udelay ( 1000 ) ;
# else
2005-09-11 17:22:53 +10:00
while ( monarch_cpu = = - 1 )
2009-08-06 14:51:58 -07:00
cpu_relax ( ) ; /* spin until monarch enters */
# endif
2008-04-17 17:00:37 +09:00
NOTIFY_INIT ( DIE_INIT_SLAVE_ENTER , regs , ( long ) & nd , 1 ) ;
NOTIFY_INIT ( DIE_INIT_SLAVE_PROCESS , regs , ( long ) & nd , 1 ) ;
2009-08-06 14:51:58 -07:00
# ifdef CONFIG_KEXEC
while ( monarch_cpu ! = - 1 & & ! atomic_read ( & kdump_in_progress ) )
udelay ( 1000 ) ;
# else
2005-09-11 17:22:53 +10:00
while ( monarch_cpu ! = - 1 )
2009-08-06 14:51:58 -07:00
cpu_relax ( ) ; /* spin until monarch leaves */
# endif
2008-04-17 17:00:37 +09:00
NOTIFY_INIT ( DIE_INIT_SLAVE_LEAVE , regs , ( long ) & nd , 1 ) ;
2006-09-26 14:44:37 -07:00
mprintk ( " Slave on cpu %d returning to normal service. \n " , cpu ) ;
2016-09-20 20:29:40 +02:00
ia64_set_curr_task ( cpu , previous_current ) ;
2005-09-11 17:22:53 +10:00
ia64_mc_info . imi_rendez_checkin [ cpu ] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE ;
atomic_dec ( & slaves ) ;
return ;
}
monarch_cpu = cpu ;
2008-04-17 17:00:37 +09:00
NOTIFY_INIT ( DIE_INIT_MONARCH_ENTER , regs , ( long ) & nd , 1 ) ;
2005-09-11 17:22:53 +10:00
/*
* Wait for a bit . On some machines ( e . g . , HP ' s zx2000 and zx6000 , INIT can be
* generated via the BMC ' s command - line interface , but since the console is on the
* same serial line , the user will need some time to switch out of the BMC before
* the dump begins .
*/
2006-09-26 14:44:37 -07:00
mprintk ( " Delaying for 5 seconds... \n " ) ;
2005-09-11 17:22:53 +10:00
udelay ( 5 * 1000000 ) ;
2006-04-11 14:59:41 +10:00
ia64_wait_for_slaves ( cpu , " INIT " ) ;
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-11-07 11:27:13 -08:00
/* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through
* to default_monarch_init_process ( ) above and just print all the
* tasks .
*/
2008-04-17 17:00:37 +09:00
NOTIFY_INIT ( DIE_INIT_MONARCH_PROCESS , regs , ( long ) & nd , 1 ) ;
NOTIFY_INIT ( DIE_INIT_MONARCH_LEAVE , regs , ( long ) & nd , 1 ) ;
2006-09-26 14:44:37 -07:00
mprintk ( " \n INIT dump complete. Monarch on cpu %d returning to normal service. \n " , cpu ) ;
2005-09-11 17:22:53 +10:00
atomic_dec ( & monarchs ) ;
2016-09-20 20:29:40 +02:00
ia64_set_curr_task ( cpu , previous_current ) ;
2005-09-11 17:22:53 +10:00
monarch_cpu = - 1 ;
return ;
2005-04-16 15:20:36 -07:00
}
static int __init
ia64_mca_disable_cpe_polling ( char * str )
{
cpe_poll_enabled = 0 ;
return 1 ;
}
__setup ( " disable_cpe_poll " , ia64_mca_disable_cpe_polling ) ;
static struct irqaction cmci_irqaction = {
. handler = ia64_mca_cmc_int_handler ,
. name = " cmc_hndlr "
} ;
static struct irqaction cmcp_irqaction = {
. handler = ia64_mca_cmc_int_caller ,
. name = " cmc_poll "
} ;
static struct irqaction mca_rdzv_irqaction = {
. handler = ia64_mca_rendez_int_handler ,
. name = " mca_rdzv "
} ;
static struct irqaction mca_wkup_irqaction = {
. handler = ia64_mca_wakeup_int_handler ,
. name = " mca_wkup "
} ;
# ifdef CONFIG_ACPI
static struct irqaction mca_cpe_irqaction = {
. handler = ia64_mca_cpe_int_handler ,
. name = " cpe_hndlr "
} ;
static struct irqaction mca_cpep_irqaction = {
. handler = ia64_mca_cpe_int_caller ,
. name = " cpe_poll "
} ;
# endif /* CONFIG_ACPI */
2005-09-11 17:22:53 +10:00
/* Minimal format of the MCA/INIT stacks. The pseudo processes that run on
* these stacks can never sleep , they cannot return from the kernel to user
* space , they do not appear in a normal ps listing . So there is no need to
* format most of the fields .
*/
2013-06-17 15:51:20 -04:00
static void
2005-09-11 17:22:53 +10:00
format_mca_init_stack ( void * mca_data , unsigned long offset ,
const char * type , int cpu )
{
struct task_struct * p = ( struct task_struct * ) ( ( char * ) mca_data + offset ) ;
struct thread_info * ti ;
memset ( p , 0 , KERNEL_STACK_SIZE ) ;
2006-01-12 01:06:05 -08:00
ti = task_thread_info ( p ) ;
2005-09-11 17:22:53 +10:00
ti - > flags = _TIF_MCA_INIT ;
ti - > preempt_count = 1 ;
ti - > task = p ;
ti - > cpu = cpu ;
2007-05-09 02:35:17 -07:00
p - > stack = ti ;
2005-09-11 17:22:53 +10:00
p - > state = TASK_UNINTERRUPTIBLE ;
2015-03-05 10:49:16 +10:30
cpumask_set_cpu ( cpu , & p - > cpus_allowed ) ;
2005-09-11 17:22:53 +10:00
INIT_LIST_HEAD ( & p - > tasks ) ;
p - > parent = p - > real_parent = p - > group_leader = p ;
INIT_LIST_HEAD ( & p - > children ) ;
INIT_LIST_HEAD ( & p - > sibling ) ;
strncpy ( p - > comm , type , sizeof ( p - > comm ) - 1 ) ;
}
2007-07-30 22:50:13 +02:00
/* Caller prevents this from being called after init */
2016-08-02 14:03:33 -07:00
static void * __ref mca_bootmem ( void )
2007-07-30 22:50:13 +02:00
{
2008-02-05 17:12:32 -06:00
return __alloc_bootmem ( sizeof ( struct ia64_mca_cpu ) ,
KERNEL_STACK_SIZE , 0 ) ;
2007-07-30 22:50:13 +02:00
}
/* Do per-CPU MCA-related initialization. */
2013-06-17 15:51:20 -04:00
void
2005-04-16 15:20:36 -07:00
ia64_mca_cpu_init ( void * cpu_data )
{
void * pal_vaddr ;
2008-02-05 17:12:32 -06:00
void * data ;
long sz = sizeof ( struct ia64_mca_cpu ) ;
int cpu = smp_processor_id ( ) ;
2005-11-11 14:32:40 -08:00
static int first_time = 1 ;
2005-04-16 15:20:36 -07:00
2005-09-11 17:22:53 +10:00
/*
2008-02-05 17:12:32 -06:00
* Structure will already be allocated if cpu has been online ,
* then offlined .
2005-09-11 17:22:53 +10:00
*/
2008-02-05 17:12:32 -06:00
if ( __per_cpu_mca [ cpu ] ) {
data = __va ( __per_cpu_mca [ cpu ] ) ;
} else {
if ( first_time ) {
data = mca_bootmem ( ) ;
first_time = 0 ;
} else
2011-02-24 17:23:09 -05:00
data = ( void * ) __get_free_pages ( GFP_KERNEL ,
get_order ( sz ) ) ;
2008-02-05 17:12:32 -06:00
if ( ! data )
panic ( " Could not allocate MCA memory for cpu %d \n " ,
cpu ) ;
}
format_mca_init_stack ( data , offsetof ( struct ia64_mca_cpu , mca_stack ) ,
" MCA " , cpu ) ;
format_mca_init_stack ( data , offsetof ( struct ia64_mca_cpu , init_stack ) ,
" INIT " , cpu ) ;
ia64: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
At the end of the patch set all uses of __get_cpu_var have been removed so
the macro is removed too.
The patch set includes passes over all arches as well. Once these operations
are used throughout then specialized macros can be defined in non -x86
arches as well in order to optimize per cpu access by f.e. using a global
register that may be set to the per cpu base.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: linux-ia64@vger.kernel.org
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:47 -05:00
__this_cpu_write ( ia64_mca_data , ( __per_cpu_mca [ cpu ] = __pa ( data ) ) ) ;
2005-04-16 15:20:36 -07:00
/*
* Stash away a copy of the PTE needed to map the per - CPU page .
* We may need it during MCA recovery .
*/
ia64: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
At the end of the patch set all uses of __get_cpu_var have been removed so
the macro is removed too.
The patch set includes passes over all arches as well. Once these operations
are used throughout then specialized macros can be defined in non -x86
arches as well in order to optimize per cpu access by f.e. using a global
register that may be set to the per cpu base.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: linux-ia64@vger.kernel.org
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:47 -05:00
__this_cpu_write ( ia64_mca_per_cpu_pte ,
pte_val ( mk_pte_phys ( __pa ( cpu_data ) , PAGE_KERNEL ) ) ) ;
2005-04-16 15:20:36 -07:00
2005-09-11 17:22:53 +10:00
/*
* Also , stash away a copy of the PAL address and the PTE
* needed to map it .
*/
pal_vaddr = efi_get_pal_addr ( ) ;
2005-04-16 15:20:36 -07:00
if ( ! pal_vaddr )
return ;
ia64: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
At the end of the patch set all uses of __get_cpu_var have been removed so
the macro is removed too.
The patch set includes passes over all arches as well. Once these operations
are used throughout then specialized macros can be defined in non -x86
arches as well in order to optimize per cpu access by f.e. using a global
register that may be set to the per cpu base.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: linux-ia64@vger.kernel.org
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:47 -05:00
__this_cpu_write ( ia64_mca_pal_base ,
GRANULEROUNDDOWN ( ( unsigned long ) pal_vaddr ) ) ;
__this_cpu_write ( ia64_mca_pal_pte , pte_val ( mk_pte_phys ( __pa ( pal_vaddr ) ,
PAGE_KERNEL ) ) ) ;
2005-04-16 15:20:36 -07:00
}
2016-09-06 19:04:40 +02:00
static int ia64_mca_cpu_online ( unsigned int cpu )
2007-12-19 11:42:02 -08:00
{
unsigned long flags ;
local_irq_save ( flags ) ;
if ( ! cmc_polling_enabled )
ia64_mca_cmc_vector_enable ( NULL ) ;
local_irq_restore ( flags ) ;
2016-09-06 19:04:40 +02:00
return 0 ;
2007-12-19 11:42:02 -08:00
}
2005-04-16 15:20:36 -07:00
/*
* ia64_mca_init
*
* Do all the system level mca specific initialization .
*
* 1. Register spinloop and wakeup request interrupt vectors
*
* 2. Register OS_MCA handler entry point
*
* 3. Register OS_INIT handler entry point
*
* 4. Initialize MCA / CMC / INIT related log buffers maintained by the OS .
*
* Note that this initialization is done very early before some kernel
* services are available .
*
* Inputs : None
*
* Outputs : None
*/
void __init
ia64_mca_init ( void )
{
2005-09-11 17:22:53 +10:00
ia64_fptr_t * init_hldlr_ptr_monarch = ( ia64_fptr_t * ) ia64_os_init_dispatch_monarch ;
ia64_fptr_t * init_hldlr_ptr_slave = ( ia64_fptr_t * ) ia64_os_init_dispatch_slave ;
2005-04-16 15:20:36 -07:00
ia64_fptr_t * mca_hldlr_ptr = ( ia64_fptr_t * ) ia64_os_mca_dispatch ;
int i ;
2009-05-22 13:49:49 -07:00
long rc ;
2005-04-16 15:20:36 -07:00
struct ia64_sal_retval isrv ;
2009-05-22 13:49:49 -07:00
unsigned long timeout = IA64_MCA_RENDEZ_TIMEOUT ; /* platform specific */
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-11-07 11:27:13 -08:00
static struct notifier_block default_init_monarch_nb = {
. notifier_call = default_monarch_init_process ,
. priority = 0 /* we need to notified last */
} ;
2005-04-16 15:20:36 -07:00
2008-03-04 15:15:00 -08:00
IA64_MCA_DEBUG ( " %s: begin \n " , __func__ ) ;
2005-04-16 15:20:36 -07:00
/* Clear the Rendez checkin flag for all cpus */
for ( i = 0 ; i < NR_CPUS ; i + + )
ia64_mc_info . imi_rendez_checkin [ i ] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE ;
/*
* Register the rendezvous spinloop and wakeup mechanism with SAL
*/
/* Register the rendezvous interrupt vector with SAL */
while ( 1 ) {
isrv = ia64_sal_mc_set_params ( SAL_MC_PARAM_RENDEZ_INT ,
SAL_MC_PARAM_MECHANISM_INT ,
IA64_MCA_RENDEZ_VECTOR ,
timeout ,
SAL_MC_PARAM_RZ_ALWAYS ) ;
rc = isrv . status ;
if ( rc = = 0 )
break ;
if ( rc = = - 2 ) {
printk ( KERN_INFO " Increasing MCA rendezvous timeout from "
" %ld to %ld milliseconds \n " , timeout , isrv . v0 ) ;
timeout = isrv . v0 ;
2008-04-17 17:00:37 +09:00
NOTIFY_MCA ( DIE_MCA_NEW_TIMEOUT , NULL , timeout , 0 ) ;
2005-04-16 15:20:36 -07:00
continue ;
}
printk ( KERN_ERR " Failed to register rendezvous interrupt "
" with SAL (status %ld) \n " , rc ) ;
return ;
}
/* Register the wakeup interrupt vector with SAL */
isrv = ia64_sal_mc_set_params ( SAL_MC_PARAM_RENDEZ_WAKEUP ,
SAL_MC_PARAM_MECHANISM_INT ,
IA64_MCA_WAKEUP_VECTOR ,
0 , 0 ) ;
rc = isrv . status ;
if ( rc ) {
printk ( KERN_ERR " Failed to register wakeup interrupt with SAL "
" (status %ld) \n " , rc ) ;
return ;
}
2008-03-04 15:15:00 -08:00
IA64_MCA_DEBUG ( " %s: registered MCA rendezvous spinloop and wakeup mech. \n " , __func__ ) ;
2005-04-16 15:20:36 -07:00
ia64_mc_info . imi_mca_handler = ia64_tpa ( mca_hldlr_ptr - > fp ) ;
/*
* XXX - disable SAL checksum by setting size to 0 ; should be
* ia64_tpa ( ia64_os_mca_dispatch_end ) - ia64_tpa ( ia64_os_mca_dispatch ) ;
*/
ia64_mc_info . imi_mca_handler_size = 0 ;
/* Register the os mca handler with SAL */
if ( ( rc = ia64_sal_set_vectors ( SAL_VECTOR_OS_MCA ,
ia64_mc_info . imi_mca_handler ,
ia64_tpa ( mca_hldlr_ptr - > gp ) ,
ia64_mc_info . imi_mca_handler_size ,
0 , 0 , 0 ) ) )
{
printk ( KERN_ERR " Failed to register OS MCA handler with SAL "
" (status %ld) \n " , rc ) ;
return ;
}
2008-03-04 15:15:00 -08:00
IA64_MCA_DEBUG ( " %s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx \n " , __func__ ,
2005-04-16 15:20:36 -07:00
ia64_mc_info . imi_mca_handler , ia64_tpa ( mca_hldlr_ptr - > gp ) ) ;
/*
* XXX - disable SAL checksum by setting size to 0 , should be
* size of the actual init handler in mca_asm . S .
*/
2005-09-11 17:22:53 +10:00
ia64_mc_info . imi_monarch_init_handler = ia64_tpa ( init_hldlr_ptr_monarch - > fp ) ;
2005-04-16 15:20:36 -07:00
ia64_mc_info . imi_monarch_init_handler_size = 0 ;
2005-09-11 17:22:53 +10:00
ia64_mc_info . imi_slave_init_handler = ia64_tpa ( init_hldlr_ptr_slave - > fp ) ;
2005-04-16 15:20:36 -07:00
ia64_mc_info . imi_slave_init_handler_size = 0 ;
2008-03-04 15:15:00 -08:00
IA64_MCA_DEBUG ( " %s: OS INIT handler at %lx \n " , __func__ ,
2005-04-16 15:20:36 -07:00
ia64_mc_info . imi_monarch_init_handler ) ;
/* Register the os init handler with SAL */
if ( ( rc = ia64_sal_set_vectors ( SAL_VECTOR_OS_INIT ,
ia64_mc_info . imi_monarch_init_handler ,
ia64_tpa ( ia64_getreg ( _IA64_REG_GP ) ) ,
ia64_mc_info . imi_monarch_init_handler_size ,
ia64_mc_info . imi_slave_init_handler ,
ia64_tpa ( ia64_getreg ( _IA64_REG_GP ) ) ,
ia64_mc_info . imi_slave_init_handler_size ) ) )
{
printk ( KERN_ERR " Failed to register m/s INIT handlers with SAL "
" (status %ld) \n " , rc ) ;
return ;
}
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-11-07 11:27:13 -08:00
if ( register_die_notifier ( & default_init_monarch_nb ) ) {
printk ( KERN_ERR " Failed to register default monarch INIT process \n " ) ;
return ;
}
2005-04-16 15:20:36 -07:00
2008-03-04 15:15:00 -08:00
IA64_MCA_DEBUG ( " %s: registered OS INIT handler with SAL \n " , __func__ ) ;
2005-04-16 15:20:36 -07:00
/* Initialize the areas set aside by the OS to buffer the
* platform / processor error states for MCA / INIT / CMC
* handling .
*/
ia64_log_init ( SAL_INFO_TYPE_MCA ) ;
ia64_log_init ( SAL_INFO_TYPE_INIT ) ;
ia64_log_init ( SAL_INFO_TYPE_CMC ) ;
ia64_log_init ( SAL_INFO_TYPE_CPE ) ;
mca_init = 1 ;
printk ( KERN_INFO " MCA related initialization done \n " ) ;
}
2013-03-20 10:30:15 -07:00
2005-04-16 15:20:36 -07:00
/*
2013-03-20 10:30:15 -07:00
* These pieces cannot be done in ia64_mca_init ( ) because it is called before
* early_irq_init ( ) which would wipe out our percpu irq registrations . But we
* cannot leave them until ia64_mca_late_init ( ) because by then all the other
* processors have been brought online and have set their own CMC vectors to
* point at a non - existant action . Called from arch_early_irq_init ( ) .
2005-04-16 15:20:36 -07:00
*/
2013-03-20 10:30:15 -07:00
void __init ia64_mca_irq_init ( void )
2005-04-16 15:20:36 -07:00
{
2010-10-07 16:23:34 -07:00
/*
* Configure the CMCI / P vector and handler . Interrupts for CMC are
* per - processor , so AP CMC interrupts are setup in smp_callin ( ) ( smpboot . c ) .
*/
register_percpu_irq ( IA64_CMC_VECTOR , & cmci_irqaction ) ;
register_percpu_irq ( IA64_CMCP_VECTOR , & cmcp_irqaction ) ;
ia64_mca_cmc_vector_setup ( ) ; /* Setup vector on BSP */
/* Setup the MCA rendezvous interrupt vector */
register_percpu_irq ( IA64_MCA_RENDEZ_VECTOR , & mca_rdzv_irqaction ) ;
/* Setup the MCA wakeup interrupt vector */
register_percpu_irq ( IA64_MCA_WAKEUP_VECTOR , & mca_wkup_irqaction ) ;
# ifdef CONFIG_ACPI
/* Setup the CPEI/P handler */
register_percpu_irq ( IA64_CPEP_VECTOR , & mca_cpep_irqaction ) ;
# endif
2013-03-20 10:30:15 -07:00
}
/*
* ia64_mca_late_init
*
* Opportunity to setup things that require initialization later
* than ia64_mca_init . Setup a timer to poll for CPEs if the
* platform doesn ' t support an interrupt driven mechanism .
*
* Inputs : None
* Outputs : Status
*/
static int __init
ia64_mca_late_init ( void )
{
if ( ! mca_init )
return 0 ;
2010-10-07 16:23:34 -07:00
2005-04-16 15:20:36 -07:00
/* Setup the CMCI/P vector and handler */
2015-06-11 10:18:17 +05:30
setup_timer ( & cmc_poll_timer , ia64_mca_cmc_poll , 0UL ) ;
2005-04-16 15:20:36 -07:00
/* Unmask/enable the vector */
cmc_polling_enabled = 0 ;
2016-09-06 19:04:40 +02:00
cpuhp_setup_state ( CPUHP_AP_ONLINE_DYN , " ia64/mca:online " ,
ia64_mca_cpu_online , NULL ) ;
2008-03-04 15:15:00 -08:00
IA64_MCA_DEBUG ( " %s: CMCI/P setup and enabled. \n " , __func__ ) ;
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_ACPI
/* Setup the CPEI/P vector and handler */
2005-05-09 15:03:00 -07:00
cpe_vector = acpi_request_vector ( ACPI_INTERRUPT_CPEI ) ;
2015-06-11 10:18:17 +05:30
setup_timer ( & cpe_poll_timer , ia64_mca_cpe_poll , 0UL ) ;
2005-04-16 15:20:36 -07:00
{
unsigned int irq ;
if ( cpe_vector > = 0 ) {
/* If platform supports CPEI, enable the irq. */
2007-08-03 14:32:37 -05:00
irq = local_vector_to_irq ( cpe_vector ) ;
if ( irq > 0 ) {
cpe_poll_enabled = 0 ;
2011-03-24 16:44:38 +01:00
irq_set_status_flags ( irq , IRQ_PER_CPU ) ;
2007-08-03 14:32:37 -05:00
setup_irq ( irq , & mca_cpe_irqaction ) ;
ia64_cpe_irq = irq ;
ia64_mca_register_cpev ( cpe_vector ) ;
IA64_MCA_DEBUG ( " %s: CPEI/P setup and enabled. \n " ,
2008-03-04 15:15:00 -08:00
__func__ ) ;
2007-08-03 14:32:37 -05:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
2007-08-03 14:32:37 -05:00
printk ( KERN_ERR " %s: Failed to find irq for CPE "
" interrupt handler, vector %d \n " ,
2008-03-04 15:15:00 -08:00
__func__ , cpe_vector ) ;
2007-08-03 14:32:37 -05:00
}
/* If platform doesn't support CPEI, get the timer going. */
if ( cpe_poll_enabled ) {
ia64_mca_cpe_poll ( 0UL ) ;
2008-03-04 15:15:00 -08:00
IA64_MCA_DEBUG ( " %s: CPEP setup and enabled. \n " , __func__ ) ;
2005-04-16 15:20:36 -07:00
}
}
# endif
return 0 ;
}
device_initcall ( ia64_mca_late_init ) ;