2013-02-15 13:11:57 +04:00
/*
* GHES / EDAC Linux driver
*
* This file may be distributed under the terms of the GNU General Public
* License version 2.
*
* Copyright ( c ) 2013 by Mauro Carvalho Chehab < mchehab @ redhat . com >
*
* Red Hat Inc . http : //www.redhat.com
*/
2013-02-15 16:06:38 +04:00
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2013-02-15 13:11:57 +04:00
# include <acpi/ghes.h>
# include <linux/edac.h>
2013-02-14 16:11:08 +04:00
# include <linux/dmi.h>
2013-02-15 13:11:57 +04:00
# include "edac_core.h"
# define GHES_EDAC_REVISION " Ver: 1.0.0"
struct ghes_edac_pvt {
struct list_head list ;
struct ghes * ghes ;
struct mem_ctl_info * mci ;
} ;
static LIST_HEAD ( ghes_reglist ) ;
static DEFINE_MUTEX ( ghes_edac_lock ) ;
static int ghes_edac_mc_num ;
2013-02-15 16:06:38 +04:00
2013-02-14 16:11:08 +04:00
/* Memory Device - Type 17 of SMBIOS spec */
struct memdev_dmi_entry {
u8 type ;
u8 length ;
u16 handle ;
u16 phys_mem_array_handle ;
u16 mem_err_info_handle ;
u16 total_width ;
u16 data_width ;
u16 size ;
u8 form_factor ;
u8 device_set ;
u8 device_locator ;
u8 bank_locator ;
u8 memory_type ;
u16 type_detail ;
u16 speed ;
u8 manufacturer ;
u8 serial_number ;
u8 asset_tag ;
u8 part_number ;
u8 attributes ;
u32 extended_size ;
u16 conf_mem_clk_speed ;
} __attribute__ ( ( __packed__ ) ) ;
struct ghes_edac_dimm_fill {
struct mem_ctl_info * mci ;
unsigned count ;
} ;
char * memory_type [ ] = {
[ MEM_EMPTY ] = " EMPTY " ,
[ MEM_RESERVED ] = " RESERVED " ,
[ MEM_UNKNOWN ] = " UNKNOWN " ,
[ MEM_FPM ] = " FPM " ,
[ MEM_EDO ] = " EDO " ,
[ MEM_BEDO ] = " BEDO " ,
[ MEM_SDR ] = " SDR " ,
[ MEM_RDR ] = " RDR " ,
[ MEM_DDR ] = " DDR " ,
[ MEM_RDDR ] = " RDDR " ,
[ MEM_RMBS ] = " RMBS " ,
[ MEM_DDR2 ] = " DDR2 " ,
[ MEM_FB_DDR2 ] = " FB_DDR2 " ,
[ MEM_RDDR2 ] = " RDDR2 " ,
[ MEM_XDR ] = " XDR " ,
[ MEM_DDR3 ] = " DDR3 " ,
[ MEM_RDDR3 ] = " RDDR3 " ,
} ;
static void ghes_edac_count_dimms ( const struct dmi_header * dh , void * arg )
{
int * num_dimm = arg ;
if ( dh - > type = = DMI_ENTRY_MEM_DEVICE )
( * num_dimm ) + + ;
}
static void ghes_edac_dmidecode ( const struct dmi_header * dh , void * arg )
{
struct ghes_edac_dimm_fill * dimm_fill = arg ;
struct mem_ctl_info * mci = dimm_fill - > mci ;
if ( dh - > type = = DMI_ENTRY_MEM_DEVICE ) {
struct memdev_dmi_entry * entry = ( struct memdev_dmi_entry * ) dh ;
struct dimm_info * dimm = EDAC_DIMM_PTR ( mci - > layers , mci - > dimms ,
mci - > n_layers ,
dimm_fill - > count , 0 , 0 ) ;
if ( entry - > size = = 0xffff ) {
2013-02-15 16:06:38 +04:00
pr_info ( " Can't get DIMM%i size \n " ,
dimm_fill - > count ) ;
2013-02-14 16:11:08 +04:00
dimm - > nr_pages = MiB_TO_PAGES ( 32 ) ; /* Unknown */
} else if ( entry - > size = = 0x7fff ) {
dimm - > nr_pages = MiB_TO_PAGES ( entry - > extended_size ) ;
} else {
if ( entry - > size & 1 < < 15 )
dimm - > nr_pages = MiB_TO_PAGES ( ( entry - > size &
0x7fff ) < < 10 ) ;
else
dimm - > nr_pages = MiB_TO_PAGES ( entry - > size ) ;
}
switch ( entry - > memory_type ) {
case 0x12 :
if ( entry - > type_detail & 1 < < 13 )
dimm - > mtype = MEM_RDDR ;
else
dimm - > mtype = MEM_DDR ;
break ;
case 0x13 :
if ( entry - > type_detail & 1 < < 13 )
dimm - > mtype = MEM_RDDR2 ;
else
dimm - > mtype = MEM_DDR2 ;
break ;
case 0x14 :
dimm - > mtype = MEM_FB_DDR2 ;
break ;
case 0x18 :
if ( entry - > type_detail & 1 < < 13 )
dimm - > mtype = MEM_RDDR3 ;
else
dimm - > mtype = MEM_DDR3 ;
break ;
default :
if ( entry - > type_detail & 1 < < 6 )
dimm - > mtype = MEM_RMBS ;
else if ( ( entry - > type_detail & ( ( 1 < < 7 ) | ( 1 < < 13 ) ) )
= = ( ( 1 < < 7 ) | ( 1 < < 13 ) ) )
dimm - > mtype = MEM_RDR ;
else if ( entry - > type_detail & 1 < < 7 )
dimm - > mtype = MEM_SDR ;
else if ( entry - > type_detail & 1 < < 9 )
dimm - > mtype = MEM_EDO ;
else
dimm - > mtype = MEM_UNKNOWN ;
}
/*
* Actually , we can only detect if the memory has bits for
* checksum or not
*/
if ( entry - > total_width = = entry - > data_width )
dimm - > edac_mode = EDAC_NONE ;
else
dimm - > edac_mode = EDAC_SECDED ;
dimm - > dtype = DEV_UNKNOWN ;
dimm - > grain = 128 ; /* Likely, worse case */
/*
* FIXME : It shouldn ' t be hard to also fill the DIMM labels
*/
if ( dimm - > nr_pages ) {
2013-02-15 16:06:38 +04:00
edac_dbg ( 1 , " DIMM%i: %s size = %d MB%s \n " ,
2013-02-14 16:11:08 +04:00
dimm_fill - > count , memory_type [ dimm - > mtype ] ,
PAGES_TO_MiB ( dimm - > nr_pages ) ,
( dimm - > edac_mode ! = EDAC_NONE ) ? " (ECC) " : " " ) ;
2013-02-15 16:06:38 +04:00
edac_dbg ( 2 , " \t type %d, detail 0x%02x, width %d(total %d) \n " ,
2013-02-14 16:11:08 +04:00
entry - > memory_type , entry - > type_detail ,
entry - > total_width , entry - > data_width ) ;
}
dimm_fill - > count + + ;
}
}
2013-02-15 13:11:57 +04:00
void ghes_edac_report_mem_error ( struct ghes * ghes , int sev ,
2013-02-15 13:36:27 +04:00
struct cper_sec_mem_err * mem_err )
2013-02-15 13:11:57 +04:00
{
2013-02-15 13:36:27 +04:00
enum hw_event_mc_err_type type ;
struct edac_raw_error_desc * e ;
struct mem_ctl_info * mci ;
struct ghes_edac_pvt * pvt = NULL ;
list_for_each_entry ( pvt , & ghes_reglist , list ) {
if ( ghes = = pvt - > ghes )
break ;
}
if ( ! pvt ) {
pr_err ( " Internal error: Can't find EDAC structure \n " ) ;
return ;
}
mci = pvt - > mci ;
e = & mci - > error_desc ;
/* Cleans the error report buffer */
memset ( e , 0 , sizeof ( * e ) ) ;
e - > error_count = 1 ;
e - > msg = " APEI " ;
strcpy ( e - > label , " unknown " ) ;
e - > other_detail = " " ;
if ( mem_err - > validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS ) {
e - > page_frame_number = mem_err - > physical_addr > > PAGE_SHIFT ;
e - > offset_in_page = mem_err - > physical_addr & ~ PAGE_MASK ;
e - > grain = ~ ( mem_err - > physical_addr_mask & ~ PAGE_MASK ) ;
}
switch ( sev ) {
case GHES_SEV_CORRECTED :
type = HW_EVENT_ERR_CORRECTED ;
break ;
case GHES_SEV_RECOVERABLE :
type = HW_EVENT_ERR_UNCORRECTED ;
break ;
case GHES_SEV_PANIC :
type = HW_EVENT_ERR_FATAL ;
break ;
default :
case GHES_SEV_NO :
type = HW_EVENT_ERR_INFO ;
}
sprintf ( e - > location ,
" node:%d card:%d module:%d bank:%d device:%d row: %d column:%d bit_pos:%d " ,
mem_err - > node , mem_err - > card , mem_err - > module ,
mem_err - > bank , mem_err - > device , mem_err - > row , mem_err - > column ,
mem_err - > bit_pos ) ;
edac_dbg ( 3 , " error at location %s \n " , e - > location ) ;
edac_raw_mc_handle_error ( type , mci , e ) ;
2013-02-15 13:11:57 +04:00
}
EXPORT_SYMBOL_GPL ( ghes_edac_report_mem_error ) ;
int ghes_edac_register ( struct ghes * ghes , struct device * dev )
{
2013-02-14 16:11:08 +04:00
bool fake = false ;
int rc , num_dimm = 0 ;
2013-02-15 13:11:57 +04:00
struct mem_ctl_info * mci ;
struct edac_mc_layer layers [ 1 ] ;
struct ghes_edac_pvt * pvt ;
2013-02-14 16:11:08 +04:00
struct ghes_edac_dimm_fill dimm_fill ;
/* Get the number of DIMMs */
dmi_walk ( ghes_edac_count_dimms , & num_dimm ) ;
/* Check if we've got a bogus BIOS */
if ( num_dimm = = 0 ) {
fake = true ;
num_dimm = 1 ;
}
2013-02-15 13:11:57 +04:00
layers [ 0 ] . type = EDAC_MC_LAYER_ALL_MEM ;
2013-02-14 16:11:08 +04:00
layers [ 0 ] . size = num_dimm ;
2013-02-15 13:11:57 +04:00
layers [ 0 ] . is_virt_csrow = true ;
/*
* We need to serialize edac_mc_alloc ( ) and edac_mc_add_mc ( ) ,
* to avoid duplicated memory controller numbers
*/
mutex_lock ( & ghes_edac_lock ) ;
mci = edac_mc_alloc ( ghes_edac_mc_num , ARRAY_SIZE ( layers ) , layers ,
sizeof ( * pvt ) ) ;
if ( ! mci ) {
2013-02-15 16:06:38 +04:00
pr_info ( " Can't allocate memory for EDAC data \n " ) ;
2013-02-15 13:11:57 +04:00
mutex_unlock ( & ghes_edac_lock ) ;
return - ENOMEM ;
}
pvt = mci - > pvt_info ;
memset ( pvt , 0 , sizeof ( * pvt ) ) ;
2013-02-15 13:36:27 +04:00
list_add_tail ( & pvt - > list , & ghes_reglist ) ;
2013-02-15 13:11:57 +04:00
pvt - > ghes = ghes ;
pvt - > mci = mci ;
mci - > pdev = dev ;
mci - > mtype_cap = MEM_FLAG_EMPTY ;
mci - > edac_ctl_cap = EDAC_FLAG_NONE ;
mci - > edac_cap = EDAC_FLAG_NONE ;
mci - > mod_name = " ghes_edac.c " ;
mci - > mod_ver = GHES_EDAC_REVISION ;
mci - > ctl_name = " ghes_edac " ;
mci - > dev_name = " ghes " ;
2013-02-15 16:06:38 +04:00
if ( ! ghes_edac_mc_num ) {
if ( ! fake ) {
pr_info ( " This EDAC driver relies on BIOS to enumerate memory and get error reports. \n " ) ;
pr_info ( " Unfortunately, not all BIOSes reflect the memory layout correctly. \n " ) ;
pr_info ( " So, the end result of using this driver varies from vendor to vendor. \n " ) ;
pr_info ( " If you find incorrect reports, please contact your hardware vendor \n " ) ;
pr_info ( " to correct its BIOS. \n " ) ;
pr_info ( " This system has %d DIMM sockets. \n " ,
num_dimm ) ;
} else {
pr_info ( " This system has a very crappy BIOS: It doesn't even list the DIMMS. \n " ) ;
pr_info ( " Its SMBIOS info is wrong. It is doubtful that the error report would \n " ) ;
pr_info ( " work on such system. Use this driver with caution \n " ) ;
}
}
2013-02-14 16:11:08 +04:00
if ( ! fake ) {
2013-02-15 15:45:00 +04:00
/*
* Fill DIMM info from DMI for the memory controller # 0
*
* Keep it in blank for the other memory controllers , as
* there ' s no reliable way to properly credit each DIMM to
* the memory controller , as different BIOSes fill the
* DMI bank location fields on different ways
*/
if ( ! ghes_edac_mc_num ) {
dimm_fill . count = 0 ;
dimm_fill . mci = mci ;
dmi_walk ( ghes_edac_dmidecode , & dimm_fill ) ;
}
2013-02-14 16:11:08 +04:00
} else {
struct dimm_info * dimm = EDAC_DIMM_PTR ( mci - > layers , mci - > dimms ,
mci - > n_layers , 0 , 0 , 0 ) ;
2013-02-15 13:11:57 +04:00
2013-02-15 16:06:38 +04:00
dimm - > nr_pages = 1 ;
2013-02-14 16:11:08 +04:00
dimm - > grain = 128 ;
dimm - > mtype = MEM_UNKNOWN ;
dimm - > dtype = DEV_UNKNOWN ;
dimm - > edac_mode = EDAC_SECDED ;
}
2013-02-15 13:11:57 +04:00
rc = edac_mc_add_mc ( mci ) ;
if ( rc < 0 ) {
2013-02-15 16:06:38 +04:00
pr_info ( " Can't register at EDAC core \n " ) ;
2013-02-15 13:11:57 +04:00
edac_mc_free ( mci ) ;
mutex_unlock ( & ghes_edac_lock ) ;
return - ENODEV ;
}
ghes_edac_mc_num + + ;
mutex_unlock ( & ghes_edac_lock ) ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( ghes_edac_register ) ;
void ghes_edac_unregister ( struct ghes * ghes )
{
struct mem_ctl_info * mci ;
struct ghes_edac_pvt * pvt ;
list_for_each_entry ( pvt , & ghes_reglist , list ) {
if ( ghes = = pvt - > ghes ) {
mci = pvt - > mci ;
edac_mc_del_mc ( mci - > pdev ) ;
edac_mc_free ( mci ) ;
list_del ( & pvt - > list ) ;
}
}
}
EXPORT_SYMBOL_GPL ( ghes_edac_unregister ) ;