2005-04-16 15:20:36 -07:00
/*
2009-03-26 15:24:01 +01:00
* Machine check handler
2005-04-16 15:20:36 -07:00
*
2009-03-26 15:24:01 +01:00
* Copyright IBM Corp . 2000 , 2009
* Author ( s ) : Ingo Adlung < adlung @ de . ibm . com > ,
* Martin Schwidefsky < schwidefsky @ de . ibm . com > ,
* Cornelia Huck < cornelia . huck @ de . ibm . com > ,
* Heiko Carstens < heiko . carstens @ de . ibm . com > ,
2005-04-16 15:20:36 -07:00
*/
# include <linux/init.h>
# include <linux/errno.h>
2009-04-14 15:36:18 +02:00
# include <linux/hardirq.h>
2006-05-01 12:16:14 -07:00
# include <linux/time.h>
2009-03-26 15:24:01 +01:00
# include <linux/module.h>
2005-04-16 15:20:36 -07:00
# include <asm/lowcore.h>
2009-03-26 15:24:01 +01:00
# include <asm/smp.h>
# include <asm/etr.h>
2009-06-12 10:26:21 +02:00
# include <asm/cputime.h>
2009-03-26 15:24:01 +01:00
# include <asm/nmi.h>
# include <asm/crw.h>
2005-04-16 15:20:36 -07:00
2005-06-25 14:55:30 -07:00
struct mcck_struct {
int kill_task ;
int channel_report ;
int warning ;
unsigned long long mcck_code ;
} ;
static DEFINE_PER_CPU ( struct mcck_struct , cpu_mcck ) ;
2009-03-26 15:24:01 +01:00
static NORET_TYPE void s390_handle_damage ( char * msg )
{
smp_send_stop ( ) ;
disabled_wait ( ( unsigned long ) __builtin_return_address ( 0 ) ) ;
while ( 1 ) ;
}
2005-04-16 15:20:36 -07:00
/*
2005-06-25 14:55:30 -07:00
* Main machine check handler function . Will be called with interrupts enabled
* or disabled and machine checks enabled or disabled .
2005-04-16 15:20:36 -07:00
*/
2009-03-26 15:24:01 +01:00
void s390_handle_mcck ( void )
2005-04-16 15:20:36 -07:00
{
2005-06-25 14:55:30 -07:00
unsigned long flags ;
struct mcck_struct mcck ;
2005-04-16 15:20:36 -07:00
2005-06-25 14:55:30 -07:00
/*
* Disable machine checks and get the current state of accumulated
* machine checks . Afterwards delete the old state and enable machine
* checks again .
*/
local_irq_save ( flags ) ;
local_mcck_disable ( ) ;
mcck = __get_cpu_var ( cpu_mcck ) ;
memset ( & __get_cpu_var ( cpu_mcck ) , 0 , sizeof ( struct mcck_struct ) ) ;
clear_thread_flag ( TIF_MCCK_PENDING ) ;
local_mcck_enable ( ) ;
local_irq_restore ( flags ) ;
2005-04-16 15:20:36 -07:00
2005-06-25 14:55:30 -07:00
if ( mcck . channel_report )
2009-03-26 15:24:01 +01:00
crw_handle_channel_report ( ) ;
2009-03-26 15:24:02 +01:00
/*
* A warning may remain for a prolonged period on the bare iron .
* ( actually until the machine is powered off , or the problem is gone )
* So we just stop listening for the WARNING MCH and avoid continuously
* being interrupted . One caveat is however , that we must do this per
* processor and cannot use the smp version of ctl_clear_bit ( ) .
* On VM we only get one interrupt per virtally presented machinecheck .
* Though one suffices , we may get one interrupt per ( virtual ) cpu .
*/
2005-06-25 14:55:30 -07:00
if ( mcck . warning ) { /* WARNING pending ? */
2005-04-16 15:20:36 -07:00
static int mchchk_wng_posted = 0 ;
2009-03-26 15:24:02 +01:00
/* Use single cpu clear, as we cannot handle smp here. */
2005-04-16 15:20:36 -07:00
__ctl_clear_bit ( 14 , 24 ) ; /* Disable WARNING MCH */
if ( xchg ( & mchchk_wng_posted , 1 ) = = 0 )
2006-10-02 02:19:00 -07:00
kill_cad_pid ( SIGPWR , 1 ) ;
2005-04-16 15:20:36 -07:00
}
2005-06-25 14:55:30 -07:00
if ( mcck . kill_task ) {
local_irq_enable ( ) ;
printk ( KERN_EMERG " mcck: Terminating task because of machine "
" malfunction (code 0x%016llx). \n " , mcck . mcck_code ) ;
printk ( KERN_EMERG " mcck: task: %s, pid: %d. \n " ,
current - > comm , current - > pid ) ;
do_exit ( SIGSEGV ) ;
}
}
2008-05-21 13:37:34 +02:00
EXPORT_SYMBOL_GPL ( s390_handle_mcck ) ;
2005-06-25 14:55:30 -07:00
/*
* returns 0 if all registers could be validated
* returns 1 otherwise
*/
2009-03-26 15:24:01 +01:00
static int notrace s390_revalidate_registers ( struct mci * mci )
2005-06-25 14:55:30 -07:00
{
int kill_task ;
u64 tmpclock ;
u64 zero ;
void * fpt_save_area , * fpt_creg_save_area ;
kill_task = 0 ;
zero = 0 ;
2009-03-26 15:24:01 +01:00
if ( ! mci - > gr ) {
2005-06-25 14:55:30 -07:00
/*
* General purpose registers couldn ' t be restored and have
* unknown contents . Process needs to be terminated .
*/
kill_task = 1 ;
2009-03-26 15:24:01 +01:00
}
if ( ! mci - > fp ) {
2005-06-25 14:55:30 -07:00
/*
* Floating point registers can ' t be restored and
* therefore the process needs to be terminated .
*/
kill_task = 1 ;
2009-03-26 15:24:01 +01:00
}
2006-01-06 00:19:28 -08:00
# ifndef CONFIG_64BIT
2006-09-28 16:56:43 +02:00
asm volatile (
" ld 0,0(%0) \n "
" ld 2,8(%0) \n "
" ld 4,16(%0) \n "
" ld 6,24(%0) "
: : " a " ( & S390_lowcore . floating_pt_save_area ) ) ;
2005-06-25 14:55:30 -07:00
# endif
if ( MACHINE_HAS_IEEE ) {
2006-01-06 00:19:28 -08:00
# ifdef CONFIG_64BIT
2005-06-25 14:55:30 -07:00
fpt_save_area = & S390_lowcore . floating_pt_save_area ;
fpt_creg_save_area = & S390_lowcore . fpt_creg_save_area ;
# else
fpt_save_area = ( void * ) S390_lowcore . extended_save_area_addr ;
2009-03-26 15:24:01 +01:00
fpt_creg_save_area = fpt_save_area + 128 ;
2005-06-25 14:55:30 -07:00
# endif
if ( ! mci - > fc ) {
/*
* Floating point control register can ' t be restored .
* Task will be terminated .
*/
2006-09-28 16:56:43 +02:00
asm volatile ( " lfpc 0(%0) " : : " a " (&zero), " m " (zero)) ;
2005-06-25 14:55:30 -07:00
kill_task = 1 ;
2006-09-28 16:56:43 +02:00
} else
asm volatile ( " lfpc 0(%0) " : : " a " (fpt_creg_save_area)) ;
2005-06-25 14:55:30 -07:00
2006-09-28 16:56:43 +02:00
asm volatile (
" ld 0,0(%0) \n "
" ld 1,8(%0) \n "
" ld 2,16(%0) \n "
" ld 3,24(%0) \n "
" ld 4,32(%0) \n "
" ld 5,40(%0) \n "
" ld 6,48(%0) \n "
" ld 7,56(%0) \n "
" ld 8,64(%0) \n "
" ld 9,72(%0) \n "
" ld 10,80(%0) \n "
" ld 11,88(%0) \n "
" ld 12,96(%0) \n "
" ld 13,104(%0) \n "
" ld 14,112(%0) \n "
" ld 15,120(%0) \n "
: : " a " ( fpt_save_area ) ) ;
2005-06-25 14:55:30 -07:00
}
/* Revalidate access registers */
2006-09-28 16:56:43 +02:00
asm volatile (
" lam 0,15,0(%0) "
: : " a " ( & S390_lowcore . access_regs_save_area ) ) ;
2009-03-26 15:24:01 +01:00
if ( ! mci - > ar ) {
2005-06-25 14:55:30 -07:00
/*
* Access registers have unknown contents .
* Terminating task .
*/
kill_task = 1 ;
2009-03-26 15:24:01 +01:00
}
2005-06-25 14:55:30 -07:00
/* Revalidate control registers */
2009-03-26 15:24:01 +01:00
if ( ! mci - > cr ) {
2005-06-25 14:55:30 -07:00
/*
* Control registers have unknown contents .
* Can ' t recover and therefore stopping machine .
*/
s390_handle_damage ( " invalid control registers. " ) ;
2009-03-26 15:24:01 +01:00
} else {
2006-01-06 00:19:28 -08:00
# ifdef CONFIG_64BIT
2006-09-28 16:56:43 +02:00
asm volatile (
" lctlg 0,15,0(%0) "
: : " a " ( & S390_lowcore . cregs_save_area ) ) ;
2005-06-25 14:55:30 -07:00
# else
2006-09-28 16:56:43 +02:00
asm volatile (
" lctl 0,15,0(%0) "
: : " a " ( & S390_lowcore . cregs_save_area ) ) ;
2005-06-25 14:55:30 -07:00
# endif
2009-03-26 15:24:01 +01:00
}
2005-06-25 14:55:30 -07:00
/*
* We don ' t even try to revalidate the TOD register , since we simply
* can ' t write something sensible into that register .
*/
2006-01-06 00:19:28 -08:00
# ifdef CONFIG_64BIT
2005-06-25 14:55:30 -07:00
/*
* See if we can revalidate the TOD programmable register with its
* old contents ( should be zero ) otherwise set it to zero .
*/
if ( ! mci - > pr )
2006-09-28 16:56:43 +02:00
asm volatile (
" sr 0,0 \n "
" sckpf "
: : : " 0 " , " cc " ) ;
2005-06-25 14:55:30 -07:00
else
asm volatile (
2006-09-28 16:56:43 +02:00
" l 0,0(%0) \n "
" sckpf "
: : " a " ( & S390_lowcore . tod_progreg_save_area )
: " 0 " , " cc " ) ;
2005-06-25 14:55:30 -07:00
# endif
/* Revalidate clock comparator register */
2006-09-28 16:56:43 +02:00
asm volatile (
" stck 0(%1) \n "
" sckc 0(%1) "
: " =m " ( tmpclock ) : " a " ( & ( tmpclock ) ) : " cc " , " memory " ) ;
2005-06-25 14:55:30 -07:00
/* Check if old PSW is valid */
if ( ! mci - > wp )
/*
* Can ' t tell if we come from user or kernel mode
* - > stopping machine .
*/
s390_handle_damage ( " old psw invalid. " ) ;
if ( ! mci - > ms | | ! mci - > pm | | ! mci - > ia )
kill_task = 1 ;
return kill_task ;
}
2006-04-27 18:40:23 -07:00
# define MAX_IPD_COUNT 29
2006-05-01 12:16:14 -07:00
# define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
2006-04-27 18:40:23 -07:00
2009-03-26 15:24:01 +01:00
# define ED_STP_ISLAND 6 /* External damage STP island check */
# define ED_STP_SYNC 7 /* External damage STP sync check */
# define ED_ETR_SYNC 12 /* External damage ETR sync check */
# define ED_ETR_SWITCH 13 /* External damage ETR switch to local */
2005-06-25 14:55:30 -07:00
/*
* machine check handler .
*/
2009-03-26 15:23:59 +01:00
void notrace s390_do_machine_check ( struct pt_regs * regs )
2005-06-25 14:55:30 -07:00
{
2009-03-26 15:24:01 +01:00
static int ipd_count ;
2006-04-27 18:40:23 -07:00
static DEFINE_SPINLOCK ( ipd_lock ) ;
static unsigned long long last_ipd ;
2009-03-26 15:24:01 +01:00
struct mcck_struct * mcck ;
2006-04-27 18:40:23 -07:00
unsigned long long tmp ;
2005-06-25 14:55:30 -07:00
struct mci * mci ;
int umode ;
2009-04-14 15:36:18 +02:00
nmi_enter ( ) ;
2008-12-31 15:11:41 +01:00
s390_idle_check ( ) ;
2006-07-03 00:25:00 -07:00
2005-06-25 14:55:30 -07:00
mci = ( struct mci * ) & S390_lowcore . mcck_interruption_code ;
mcck = & __get_cpu_var ( cpu_mcck ) ;
umode = user_mode ( regs ) ;
2009-03-26 15:24:01 +01:00
if ( mci - > sd ) {
2005-06-25 14:55:30 -07:00
/* System damage -> stopping machine */
s390_handle_damage ( " received system damage machine check. " ) ;
2009-03-26 15:24:01 +01:00
}
2005-06-25 14:55:30 -07:00
if ( mci - > pd ) {
if ( mci - > b ) {
/* Processing backup -> verify if we can survive this */
u64 z_mcic , o_mcic , t_mcic ;
2006-01-06 00:19:28 -08:00
# ifdef CONFIG_64BIT
2005-06-25 14:55:30 -07:00
z_mcic = ( 1ULL < < 63 | 1ULL < < 59 | 1ULL < < 29 ) ;
o_mcic = ( 1ULL < < 43 | 1ULL < < 42 | 1ULL < < 41 | 1ULL < < 40 |
1ULL < < 36 | 1ULL < < 35 | 1ULL < < 34 | 1ULL < < 32 |
1ULL < < 30 | 1ULL < < 21 | 1ULL < < 20 | 1ULL < < 17 |
1ULL < < 16 ) ;
# else
z_mcic = ( 1ULL < < 63 | 1ULL < < 59 | 1ULL < < 57 | 1ULL < < 50 |
1ULL < < 29 ) ;
o_mcic = ( 1ULL < < 43 | 1ULL < < 42 | 1ULL < < 41 | 1ULL < < 40 |
1ULL < < 36 | 1ULL < < 35 | 1ULL < < 34 | 1ULL < < 32 |
1ULL < < 30 | 1ULL < < 20 | 1ULL < < 17 | 1ULL < < 16 ) ;
# endif
t_mcic = * ( u64 * ) mci ;
if ( ( ( t_mcic & z_mcic ) ! = 0 ) | |
( ( t_mcic & o_mcic ) ! = o_mcic ) ) {
s390_handle_damage ( " processing backup machine "
" check with damage. " ) ;
}
2006-04-27 18:40:23 -07:00
/*
* Nullifying exigent condition , therefore we might
* retry this instruction .
*/
spin_lock ( & ipd_lock ) ;
tmp = get_clock ( ) ;
if ( ( ( tmp - last_ipd ) > > 12 ) < MAX_IPD_TIME )
ipd_count + + ;
else
ipd_count = 1 ;
last_ipd = tmp ;
if ( ipd_count = = MAX_IPD_COUNT )
s390_handle_damage ( " too many ipd retries. " ) ;
spin_unlock ( & ipd_lock ) ;
2009-03-26 15:24:01 +01:00
} else {
2005-06-25 14:55:30 -07:00
/* Processing damage -> stopping machine */
s390_handle_damage ( " received instruction processing "
" damage machine check. " ) ;
}
}
if ( s390_revalidate_registers ( mci ) ) {
if ( umode ) {
/*
* Couldn ' t restore all register contents while in
* user mode - > mark task for termination .
*/
mcck - > kill_task = 1 ;
mcck - > mcck_code = * ( unsigned long long * ) mci ;
set_thread_flag ( TIF_MCCK_PENDING ) ;
2009-03-26 15:24:01 +01:00
} else {
2005-06-25 14:55:30 -07:00
/*
* Couldn ' t restore all register contents while in
* kernel mode - > stopping machine .
*/
s390_handle_damage ( " unable to revalidate registers. " ) ;
2009-03-26 15:24:01 +01:00
}
2005-06-25 14:55:30 -07:00
}
2007-02-05 21:18:19 +01:00
if ( mci - > cd ) {
/* Timing facility damage */
s390_handle_damage ( " TOD clock damaged " ) ;
}
if ( mci - > ed & & mci - > ec ) {
/* External damage */
if ( S390_lowcore . external_damage_code & ( 1U < < ED_ETR_SYNC ) )
etr_sync_check ( ) ;
if ( S390_lowcore . external_damage_code & ( 1U < < ED_ETR_SWITCH ) )
etr_switch_to_local ( ) ;
2008-07-14 09:58:56 +02:00
if ( S390_lowcore . external_damage_code & ( 1U < < ED_STP_SYNC ) )
stp_sync_check ( ) ;
if ( S390_lowcore . external_damage_code & ( 1U < < ED_STP_ISLAND ) )
stp_island_check ( ) ;
2007-02-05 21:18:19 +01:00
}
2005-06-25 14:55:30 -07:00
if ( mci - > se )
/* Storage error uncorrected */
s390_handle_damage ( " received storage error uncorrected "
" machine check. " ) ;
if ( mci - > ke )
/* Storage key-error uncorrected */
s390_handle_damage ( " received storage key-error uncorrected "
" machine check. " ) ;
if ( mci - > ds & & mci - > fa )
/* Storage degradation */
s390_handle_damage ( " received storage degradation machine "
" check. " ) ;
if ( mci - > cp ) {
/* Channel report word pending */
mcck - > channel_report = 1 ;
set_thread_flag ( TIF_MCCK_PENDING ) ;
}
if ( mci - > w ) {
/* Warning pending */
mcck - > warning = 1 ;
set_thread_flag ( TIF_MCCK_PENDING ) ;
}
2009-04-14 15:36:18 +02:00
nmi_exit ( ) ;
2005-04-16 15:20:36 -07:00
}
2009-03-26 15:24:01 +01:00
static int __init machine_check_init ( void )
2005-04-16 15:20:36 -07:00
{
2007-02-05 21:18:19 +01:00
ctl_set_bit ( 14 , 25 ) ; /* enable external damage MCH */
2009-03-26 15:24:01 +01:00
ctl_set_bit ( 14 , 27 ) ; /* enable system recovery MCH */
2005-04-16 15:20:36 -07:00
ctl_set_bit ( 14 , 24 ) ; /* enable warning MCH */
return 0 ;
}
arch_initcall ( machine_check_init ) ;