2009-06-23 05:41:15 +04:00
/* Intel 7 core Memory Controller kernel module (Nehalem)
*
* This file may be distributed under the terms of the
* GNU General Public License version 2 only .
*
* Copyright ( c ) 2009 by :
* Mauro Carvalho Chehab < mchehab @ redhat . com >
*
* Red Hat Inc . http : //www.redhat.com
*
* Forked and adapted from the i5400_edac driver
*
* Based on the following public Intel datasheets :
* Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
* Datasheet , Volume 2 :
* http : //download.intel.com/design/processor/datashts/320835.pdf
* Intel Xeon Processor 5500 Series Datasheet Volume 2
* http : //www.intel.com/Assets/PDF/datasheet/321322.pdf
* also available at :
* http : //www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
*/
# include <linux/module.h>
# include <linux/init.h>
# include <linux/pci.h>
# include <linux/pci_ids.h>
# include <linux/slab.h>
2009-11-08 06:36:40 +03:00
# include <linux/delay.h>
2009-06-23 05:41:15 +04:00
# include <linux/edac.h>
# include <linux/mmzone.h>
2009-07-10 05:06:41 +04:00
# include <linux/edac_mce.h>
2009-09-05 09:35:08 +04:00
# include <linux/smp.h>
2009-09-03 06:52:36 +04:00
# include <asm/processor.h>
2009-06-23 05:41:15 +04:00
# include "edac_core.h"
2009-09-05 09:35:08 +04:00
/*
* This is used for Nehalem - EP and Nehalem - EX devices , where the non - core
* registers start at bus 255 , and are not reported by BIOS .
* We currently find devices with only 2 sockets . In order to support more QPI
* Quick Path Interconnect , just increment this number .
*/
# define MAX_SOCKET_BUSES 2
2009-06-23 05:41:15 +04:00
/*
* Alter this version for the module when modifications are made
*/
# define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
# define EDAC_MOD_STR "i7core_edac"
/*
* Debug macros
*/
# define i7core_printk(level, fmt, arg...) \
edac_printk ( level , " i7core " , fmt , # # arg )
# define i7core_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk ( mci , level , " i7core " , fmt , # # arg )
/*
* i7core Memory Controller Registers
*/
2009-07-10 05:14:35 +04:00
/* OFFSETS for Device 0 Function 0 */
# define MC_CFG_CONTROL 0x90
2009-06-23 05:41:15 +04:00
/* OFFSETS for Device 3 Function 0 */
# define MC_CONTROL 0x48
# define MC_STATUS 0x4c
# define MC_MAX_DOD 0x64
2009-06-23 05:48:29 +04:00
/*
* OFFSETS for Device 3 Function 4 , as inicated on Xeon 5500 datasheet :
* http : //www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
*/
# define MC_TEST_ERR_RCV1 0x60
# define DIMM2_COR_ERR(r) ((r) & 0x7fff)
# define MC_TEST_ERR_RCV0 0x64
# define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
# define DIMM0_COR_ERR(r) ((r) & 0x7fff)
2009-09-03 06:49:59 +04:00
/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
# define MC_COR_ECC_CNT_0 0x80
# define MC_COR_ECC_CNT_1 0x84
# define MC_COR_ECC_CNT_2 0x88
# define MC_COR_ECC_CNT_3 0x8c
# define MC_COR_ECC_CNT_4 0x90
# define MC_COR_ECC_CNT_5 0x94
# define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
# define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
2009-06-23 05:41:15 +04:00
/* OFFSETS for Devices 4,5 and 6 Function 0 */
2009-06-23 05:48:29 +04:00
# define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
# define THREE_DIMMS_PRESENT (1 << 24)
# define SINGLE_QUAD_RANK_PRESENT (1 << 23)
# define QUAD_RANK_PRESENT (1 << 22)
# define REGISTERED_DIMM (1 << 15)
2009-06-23 05:48:29 +04:00
# define MC_CHANNEL_MAPPER 0x60
# define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
# define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
2009-06-23 05:48:29 +04:00
# define MC_CHANNEL_RANK_PRESENT 0x7c
# define RANK_PRESENT_MASK 0xffff
2009-06-23 05:41:15 +04:00
# define MC_CHANNEL_ADDR_MATCH 0xf0
2009-06-23 05:48:28 +04:00
# define MC_CHANNEL_ERROR_MASK 0xf8
# define MC_CHANNEL_ERROR_INJECT 0xfc
# define INJECT_ADDR_PARITY 0x10
# define INJECT_ECC 0x08
# define MASK_CACHELINE 0x06
# define MASK_FULL_CACHELINE 0x06
# define MASK_MSB32_CACHELINE 0x04
# define MASK_LSB32_CACHELINE 0x02
# define NO_MASK_CACHELINE 0x00
# define REPEAT_EN 0x01
2009-06-23 05:41:15 +04:00
2009-06-23 05:48:29 +04:00
/* OFFSETS for Devices 4,5 and 6 Function 1 */
2009-08-06 04:36:35 +04:00
2009-06-23 05:48:29 +04:00
# define MC_DOD_CH_DIMM0 0x48
# define MC_DOD_CH_DIMM1 0x4c
# define MC_DOD_CH_DIMM2 0x50
# define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
# define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
# define DIMM_PRESENT_MASK (1 << 9)
# define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
2009-06-23 05:48:30 +04:00
# define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
# define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
# define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
# define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
2009-06-23 05:48:31 +04:00
# define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
i7core_edac: Memory info fixes and preparation for properly filling cswrow data
Now, memory size is properly displayed:
EDAC i7core: DOD Max limits: DIMMS: 2, 1-ranked, 8-banked
EDAC i7core: DOD Max rows x colums = 0x4000 x 0x400
EDAC i7core: Memory channel configuration:
EDAC i7core: Ch0 phy rd0, wr0 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: dimm 1 (0x00001288) 1024 Mb offset: 4, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: Ch1 phy rd1, wr1 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: Ch2 phy rd3, wr3 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
Still, as the way to retrieve csrows info is not known, it does a
mapping of what's available to csrows basic unit at edac core.
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2009-06-23 05:48:31 +04:00
# define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
2009-06-23 05:48:30 +04:00
# define MC_DOD_NUMCOL_MASK 3
# define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
2009-06-23 05:48:29 +04:00
2009-06-23 05:48:29 +04:00
# define MC_RANK_PRESENT 0x7c
2009-06-23 05:48:29 +04:00
# define MC_SAG_CH_0 0x80
# define MC_SAG_CH_1 0x84
# define MC_SAG_CH_2 0x88
# define MC_SAG_CH_3 0x8c
# define MC_SAG_CH_4 0x90
# define MC_SAG_CH_5 0x94
# define MC_SAG_CH_6 0x98
# define MC_SAG_CH_7 0x9c
# define MC_RIR_LIMIT_CH_0 0x40
# define MC_RIR_LIMIT_CH_1 0x44
# define MC_RIR_LIMIT_CH_2 0x48
# define MC_RIR_LIMIT_CH_3 0x4C
# define MC_RIR_LIMIT_CH_4 0x50
# define MC_RIR_LIMIT_CH_5 0x54
# define MC_RIR_LIMIT_CH_6 0x58
# define MC_RIR_LIMIT_CH_7 0x5C
# define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
# define MC_RIR_WAY_CH 0x80
# define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
# define MC_RIR_WAY_RANK_MASK 0x7
2009-06-23 05:41:15 +04:00
/*
* i7core structs
*/
# define NUM_CHANS 3
2009-06-23 05:48:29 +04:00
# define MAX_DIMMS 3 /* Max DIMMS per channel */
# define MAX_MCR_FUNC 4
# define MAX_CHAN_FUNC 3
2009-06-23 05:41:15 +04:00
struct i7core_info {
u32 mc_control ;
u32 mc_status ;
u32 max_dod ;
2009-06-23 05:48:29 +04:00
u32 ch_map ;
2009-06-23 05:41:15 +04:00
} ;
2009-06-23 05:48:28 +04:00
struct i7core_inject {
int enable ;
u32 section ;
u32 type ;
u32 eccmask ;
/* Error address mask */
int channel , dimm , rank , bank , page , col ;
} ;
2009-06-23 05:48:29 +04:00
struct i7core_channel {
2009-06-23 05:48:29 +04:00
u32 ranks ;
u32 dimms ;
2009-06-23 05:48:29 +04:00
} ;
2009-06-23 05:48:29 +04:00
struct pci_id_descr {
2009-09-05 07:52:11 +04:00
int dev ;
int func ;
int dev_id ;
2009-10-14 15:02:40 +04:00
int optional ;
2009-06-23 05:48:29 +04:00
} ;
2009-09-05 09:35:08 +04:00
struct i7core_dev {
struct list_head list ;
u8 socket ;
struct pci_dev * * pdev ;
2009-10-14 15:02:40 +04:00
int n_devs ;
2009-09-05 09:35:08 +04:00
struct mem_ctl_info * mci ;
} ;
2009-06-23 05:41:15 +04:00
struct i7core_pvt {
2009-09-05 09:35:08 +04:00
struct pci_dev * pci_noncore ;
struct pci_dev * pci_mcr [ MAX_MCR_FUNC + 1 ] ;
struct pci_dev * pci_ch [ NUM_CHANS ] [ MAX_CHAN_FUNC + 1 ] ;
struct i7core_dev * i7core_dev ;
2009-07-15 13:56:23 +04:00
2009-06-23 05:41:15 +04:00
struct i7core_info info ;
2009-06-23 05:48:28 +04:00
struct i7core_inject inject ;
2009-09-05 09:35:08 +04:00
struct i7core_channel channel [ NUM_CHANS ] ;
2009-07-15 13:56:23 +04:00
2009-09-05 09:35:08 +04:00
int channels ; /* Number of active channels */
2009-06-23 05:48:29 +04:00
2009-09-05 09:35:08 +04:00
int ce_count_available ;
int csrow_map [ NUM_CHANS ] [ MAX_DIMMS ] ;
2009-09-03 06:49:59 +04:00
/* ECC corrected errors counts per udimm */
2009-09-05 09:35:08 +04:00
unsigned long udimm_ce_count [ MAX_DIMMS ] ;
int udimm_last_ce_count [ MAX_DIMMS ] ;
2009-09-03 06:49:59 +04:00
/* ECC corrected errors counts per rdimm */
2009-09-05 09:35:08 +04:00
unsigned long rdimm_ce_count [ NUM_CHANS ] [ MAX_DIMMS ] ;
int rdimm_last_ce_count [ NUM_CHANS ] [ MAX_DIMMS ] ;
2009-06-23 05:48:29 +04:00
2009-09-05 09:35:08 +04:00
unsigned int is_registered ;
2009-09-03 06:52:36 +04:00
2009-07-10 05:06:41 +04:00
/* mcelog glue */
struct edac_mce edac_mce ;
2009-10-04 17:15:40 +04:00
/* Fifo double buffers */
2009-07-10 05:06:41 +04:00
struct mce mce_entry [ MCE_LOG_LEN ] ;
2009-10-04 17:15:40 +04:00
struct mce mce_outentry [ MCE_LOG_LEN ] ;
/* Fifo in/out counters */
unsigned mce_in , mce_out ;
/* Count indicator to show errors not got */
unsigned mce_overrun ;
2009-06-23 05:41:15 +04:00
} ;
2009-09-05 07:52:11 +04:00
/* Static vars */
static LIST_HEAD ( i7core_edac_list ) ;
static DEFINE_MUTEX ( i7core_edac_lock ) ;
2009-06-23 05:41:15 +04:00
2009-06-23 05:48:29 +04:00
# define PCI_DESCR(device, function, device_id) \
. dev = ( device ) , \
. func = ( function ) , \
. dev_id = ( device_id )
2009-10-14 15:02:40 +04:00
struct pci_id_descr pci_dev_descr_i7core [ ] = {
2009-06-23 05:48:29 +04:00
/* Memory controller */
{ PCI_DESCR ( 3 , 0 , PCI_DEVICE_ID_INTEL_I7_MCR ) } ,
{ PCI_DESCR ( 3 , 1 , PCI_DEVICE_ID_INTEL_I7_MC_TAD ) } ,
2009-10-14 15:02:40 +04:00
/* Exists only for RDIMM */
{ PCI_DESCR ( 3 , 2 , PCI_DEVICE_ID_INTEL_I7_MC_RAS ) , . optional = 1 } ,
2009-06-23 05:48:29 +04:00
{ PCI_DESCR ( 3 , 4 , PCI_DEVICE_ID_INTEL_I7_MC_TEST ) } ,
/* Channel 0 */
{ PCI_DESCR ( 4 , 0 , PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL ) } ,
{ PCI_DESCR ( 4 , 1 , PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR ) } ,
{ PCI_DESCR ( 4 , 2 , PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK ) } ,
{ PCI_DESCR ( 4 , 3 , PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC ) } ,
/* Channel 1 */
{ PCI_DESCR ( 5 , 0 , PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL ) } ,
{ PCI_DESCR ( 5 , 1 , PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR ) } ,
{ PCI_DESCR ( 5 , 2 , PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK ) } ,
{ PCI_DESCR ( 5 , 3 , PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC ) } ,
/* Channel 2 */
{ PCI_DESCR ( 6 , 0 , PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL ) } ,
{ PCI_DESCR ( 6 , 1 , PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR ) } ,
{ PCI_DESCR ( 6 , 2 , PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK ) } ,
{ PCI_DESCR ( 6 , 3 , PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC ) } ,
2009-07-17 07:09:10 +04:00
/* Generic Non-core registers */
/*
* This is the PCI device on i7core and on Xeon 35 xx ( 8086 : 2 c41 )
* On Xeon 55 xx , however , it has a different id ( 8086 : 2 c40 ) . So ,
* the probing code needs to test for the other address in case of
* failure of this one
*/
2009-10-14 13:07:07 +04:00
{ PCI_DESCR ( 0 , 0 , PCI_DEVICE_ID_INTEL_I7_NONCORE ) } ,
2009-07-17 07:09:10 +04:00
2009-06-23 05:41:15 +04:00
} ;
2009-06-23 05:48:29 +04:00
2009-10-14 18:21:58 +04:00
struct pci_id_descr pci_dev_descr_lynnfield [ ] = {
{ PCI_DESCR ( 3 , 0 , PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR ) } ,
{ PCI_DESCR ( 3 , 1 , PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD ) } ,
{ PCI_DESCR ( 3 , 4 , PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST ) } ,
{ PCI_DESCR ( 4 , 0 , PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL ) } ,
{ PCI_DESCR ( 4 , 1 , PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR ) } ,
{ PCI_DESCR ( 4 , 2 , PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK ) } ,
{ PCI_DESCR ( 4 , 3 , PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC ) } ,
2009-10-14 20:44:37 +04:00
{ PCI_DESCR ( 5 , 0 , PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL ) } ,
{ PCI_DESCR ( 5 , 1 , PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR ) } ,
{ PCI_DESCR ( 5 , 2 , PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK ) } ,
{ PCI_DESCR ( 5 , 3 , PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC ) } ,
2009-10-14 18:21:58 +04:00
2009-10-14 20:31:06 +04:00
/*
* This is the PCI device has an alternate address on some
* processors like Core i7 860
*/
2009-10-14 18:21:58 +04:00
{ PCI_DESCR ( 0 , 0 , PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE ) } ,
} ;
2009-06-23 05:48:29 +04:00
/*
* pci_device_id table for which devices we are looking for
*/
static const struct pci_device_id i7core_pci_tbl [ ] __devinitdata = {
2009-07-11 01:39:53 +04:00
{ PCI_DEVICE ( PCI_VENDOR_ID_INTEL , PCI_DEVICE_ID_INTEL_X58_HUB_MGMT ) } ,
2009-10-14 20:31:06 +04:00
{ PCI_DEVICE ( PCI_VENDOR_ID_INTEL , PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0 ) } ,
2009-06-23 05:48:29 +04:00
{ 0 , } /* 0 terminated list. */
} ;
2009-06-23 05:41:15 +04:00
static struct edac_pci_ctl_info * i7core_pci ;
/****************************************************************************
Anciliary status routines
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* MC_CONTROL bits */
2009-06-23 05:48:30 +04:00
# define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
# define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
2009-06-23 05:41:15 +04:00
/* MC_STATUS bits */
2009-09-03 06:46:59 +04:00
# define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
2009-06-23 05:48:30 +04:00
# define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
2009-06-23 05:41:15 +04:00
/* MC_MAX_DOD read functions */
2009-06-23 05:48:30 +04:00
static inline int numdimms ( u32 dimms )
2009-06-23 05:41:15 +04:00
{
2009-06-23 05:48:30 +04:00
return ( dimms & 0x3 ) + 1 ;
2009-06-23 05:41:15 +04:00
}
2009-06-23 05:48:30 +04:00
static inline int numrank ( u32 rank )
2009-06-23 05:41:15 +04:00
{
static int ranks [ 4 ] = { 1 , 2 , 4 , - EINVAL } ;
2009-06-23 05:48:30 +04:00
return ranks [ rank & 0x3 ] ;
2009-06-23 05:41:15 +04:00
}
2009-06-23 05:48:30 +04:00
static inline int numbank ( u32 bank )
2009-06-23 05:41:15 +04:00
{
static int banks [ 4 ] = { 4 , 8 , 16 , - EINVAL } ;
2009-06-23 05:48:30 +04:00
return banks [ bank & 0x3 ] ;
2009-06-23 05:41:15 +04:00
}
2009-06-23 05:48:30 +04:00
static inline int numrow ( u32 row )
2009-06-23 05:41:15 +04:00
{
static int rows [ 8 ] = {
1 < < 12 , 1 < < 13 , 1 < < 14 , 1 < < 15 ,
1 < < 16 , - EINVAL , - EINVAL , - EINVAL ,
} ;
2009-06-23 05:48:30 +04:00
return rows [ row & 0x7 ] ;
2009-06-23 05:41:15 +04:00
}
2009-06-23 05:48:30 +04:00
static inline int numcol ( u32 col )
2009-06-23 05:41:15 +04:00
{
static int cols [ 8 ] = {
1 < < 10 , 1 < < 11 , 1 < < 12 , - EINVAL ,
} ;
2009-06-23 05:48:30 +04:00
return cols [ col & 0x3 ] ;
2009-06-23 05:41:15 +04:00
}
2009-09-05 09:35:08 +04:00
static struct i7core_dev * get_i7core_dev ( u8 socket )
2009-09-05 07:52:11 +04:00
{
struct i7core_dev * i7core_dev ;
list_for_each_entry ( i7core_dev , & i7core_edac_list , list ) {
if ( i7core_dev - > socket = = socket )
return i7core_dev ;
}
return NULL ;
}
2009-06-23 05:41:15 +04:00
/****************************************************************************
Memory check routines
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2009-07-15 13:56:23 +04:00
static struct pci_dev * get_pdev_slot_func ( u8 socket , unsigned slot ,
unsigned func )
2009-06-23 05:48:30 +04:00
{
2009-09-05 07:52:11 +04:00
struct i7core_dev * i7core_dev = get_i7core_dev ( socket ) ;
2009-06-23 05:48:30 +04:00
int i ;
2009-09-05 07:52:11 +04:00
if ( ! i7core_dev )
return NULL ;
2009-10-14 15:02:40 +04:00
for ( i = 0 ; i < i7core_dev - > n_devs ; i + + ) {
2009-09-05 07:52:11 +04:00
if ( ! i7core_dev - > pdev [ i ] )
2009-06-23 05:48:30 +04:00
continue ;
2009-09-05 07:52:11 +04:00
if ( PCI_SLOT ( i7core_dev - > pdev [ i ] - > devfn ) = = slot & &
PCI_FUNC ( i7core_dev - > pdev [ i ] - > devfn ) = = func ) {
return i7core_dev - > pdev [ i ] ;
2009-06-23 05:48:30 +04:00
}
}
2009-06-23 05:48:31 +04:00
return NULL ;
}
2009-07-18 17:44:30 +04:00
/**
* i7core_get_active_channels ( ) - gets the number of channels and csrows
* @ socket : Quick Path Interconnect socket
* @ channels : Number of channels that will be returned
* @ csrows : Number of csrows found
*
* Since EDAC core needs to know in advance the number of available channels
* and csrows , in order to allocate memory for csrows / channels , it is needed
* to run two similar steps . At the first step , implemented on this function ,
* it checks the number of csrows / channels present at one socket .
* this is used in order to properly allocate the size of mci components .
*
* It should be noticed that none of the current available datasheets explain
* or even mention how csrows are seen by the memory controller . So , we need
* to add a fake description for csrows .
* So , this driver is attributing one DIMM memory for one csrow .
*/
2009-07-15 13:56:23 +04:00
static int i7core_get_active_channels ( u8 socket , unsigned * channels ,
unsigned * csrows )
2009-06-23 05:48:31 +04:00
{
struct pci_dev * pdev = NULL ;
int i , j ;
u32 status , control ;
* channels = 0 ;
* csrows = 0 ;
2009-07-15 13:56:23 +04:00
pdev = get_pdev_slot_func ( socket , 3 , 0 ) ;
2009-06-23 05:48:30 +04:00
if ( ! pdev ) {
2009-07-15 13:56:23 +04:00
i7core_printk ( KERN_ERR , " Couldn't find socket %d fn 3.0!!! \n " ,
socket ) ;
2009-06-23 05:48:30 +04:00
return - ENODEV ;
2009-06-23 05:48:30 +04:00
}
2009-06-23 05:48:30 +04:00
/* Device 3 function 0 reads */
pci_read_config_dword ( pdev , MC_STATUS , & status ) ;
pci_read_config_dword ( pdev , MC_CONTROL , & control ) ;
for ( i = 0 ; i < NUM_CHANS ; i + + ) {
2009-06-23 05:48:31 +04:00
u32 dimm_dod [ 3 ] ;
2009-06-23 05:48:30 +04:00
/* Check if the channel is active */
if ( ! ( control & ( 1 < < ( 8 + i ) ) ) )
continue ;
/* Check if the channel is disabled */
2009-06-23 05:48:31 +04:00
if ( status & ( 1 < < i ) )
2009-06-23 05:48:30 +04:00
continue ;
2009-07-15 13:56:23 +04:00
pdev = get_pdev_slot_func ( socket , i + 4 , 1 ) ;
2009-06-23 05:48:31 +04:00
if ( ! pdev ) {
2009-07-15 13:56:23 +04:00
i7core_printk ( KERN_ERR , " Couldn't find socket %d "
" fn %d.%d!!! \n " ,
socket , i + 4 , 1 ) ;
2009-06-23 05:48:31 +04:00
return - ENODEV ;
}
/* Devices 4-6 function 1 */
pci_read_config_dword ( pdev ,
MC_DOD_CH_DIMM0 , & dimm_dod [ 0 ] ) ;
pci_read_config_dword ( pdev ,
MC_DOD_CH_DIMM1 , & dimm_dod [ 1 ] ) ;
pci_read_config_dword ( pdev ,
MC_DOD_CH_DIMM2 , & dimm_dod [ 2 ] ) ;
2009-06-23 05:48:30 +04:00
( * channels ) + + ;
2009-06-23 05:48:31 +04:00
for ( j = 0 ; j < 3 ; j + + ) {
if ( ! DIMM_PRESENT ( dimm_dod [ j ] ) )
continue ;
( * csrows ) + + ;
}
2009-06-23 05:48:30 +04:00
}
2009-07-18 17:43:08 +04:00
debugf0 ( " Number of active channels on socket %d: %d \n " ,
2009-07-15 13:56:23 +04:00
socket , * channels ) ;
2009-06-23 05:48:30 +04:00
2009-06-23 05:48:30 +04:00
return 0 ;
}
2009-09-05 09:35:08 +04:00
static int get_dimm_config ( struct mem_ctl_info * mci , int * csrow )
2009-06-23 05:41:15 +04:00
{
struct i7core_pvt * pvt = mci - > pvt_info ;
2009-06-23 05:48:30 +04:00
struct csrow_info * csr ;
2009-06-23 05:48:30 +04:00
struct pci_dev * pdev ;
2009-07-15 16:02:32 +04:00
int i , j ;
i7core_edac: Memory info fixes and preparation for properly filling cswrow data
Now, memory size is properly displayed:
EDAC i7core: DOD Max limits: DIMMS: 2, 1-ranked, 8-banked
EDAC i7core: DOD Max rows x colums = 0x4000 x 0x400
EDAC i7core: Memory channel configuration:
EDAC i7core: Ch0 phy rd0, wr0 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: dimm 1 (0x00001288) 1024 Mb offset: 4, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: Ch1 phy rd1, wr1 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: Ch2 phy rd3, wr3 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
Still, as the way to retrieve csrows info is not known, it does a
mapping of what's available to csrows basic unit at edac core.
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2009-06-23 05:48:31 +04:00
unsigned long last_page = 0 ;
2009-06-23 05:48:30 +04:00
enum edac_type mode ;
2009-06-23 05:48:30 +04:00
enum mem_type mtype ;
2009-06-23 05:41:15 +04:00
2009-06-23 05:48:30 +04:00
/* Get data from the MC register, function 0 */
2009-09-05 09:35:08 +04:00
pdev = pvt - > pci_mcr [ 0 ] ;
2009-06-23 05:48:30 +04:00
if ( ! pdev )
2009-06-23 05:48:29 +04:00
return - ENODEV ;
2009-06-23 05:48:29 +04:00
/* Device 3 function 0 reads */
2009-06-23 05:48:30 +04:00
pci_read_config_dword ( pdev , MC_CONTROL , & pvt - > info . mc_control ) ;
pci_read_config_dword ( pdev , MC_STATUS , & pvt - > info . mc_status ) ;
pci_read_config_dword ( pdev , MC_MAX_DOD , & pvt - > info . max_dod ) ;
pci_read_config_dword ( pdev , MC_CHANNEL_MAPPER , & pvt - > info . ch_map ) ;
2009-06-23 05:48:29 +04:00
2009-07-21 01:48:18 +04:00
debugf0 ( " QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x \n " ,
2009-09-24 16:58:26 +04:00
pvt - > i7core_dev - > socket , pvt - > info . mc_control , pvt - > info . mc_status ,
2009-06-23 05:48:29 +04:00
pvt - > info . max_dod , pvt - > info . ch_map ) ;
2009-06-23 05:41:15 +04:00
2009-06-23 05:48:30 +04:00
if ( ECC_ENABLED ( pvt ) ) {
2009-06-23 05:48:31 +04:00
debugf0 ( " ECC enabled with x%d SDCC \n " , ECCx8 ( pvt ) ? 8 : 4 ) ;
2009-06-23 05:48:30 +04:00
if ( ECCx8 ( pvt ) )
mode = EDAC_S8ECD8ED ;
else
mode = EDAC_S4ECD4ED ;
} else {
2009-06-23 05:41:15 +04:00
debugf0 ( " ECC disabled \n " ) ;
2009-06-23 05:48:30 +04:00
mode = EDAC_NONE ;
}
2009-06-23 05:41:15 +04:00
/* FIXME: need to handle the error codes */
2009-07-21 01:48:18 +04:00
debugf0 ( " DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
" x%x x 0x%x \n " ,
2009-06-23 05:48:30 +04:00
numdimms ( pvt - > info . max_dod ) ,
numrank ( pvt - > info . max_dod > > 2 ) ,
2009-07-23 04:45:50 +04:00
numbank ( pvt - > info . max_dod > > 4 ) ,
2009-06-23 05:48:30 +04:00
numrow ( pvt - > info . max_dod > > 6 ) ,
numcol ( pvt - > info . max_dod > > 9 ) ) ;
2009-06-23 05:41:15 +04:00
2009-06-23 05:48:29 +04:00
for ( i = 0 ; i < NUM_CHANS ; i + + ) {
2009-06-23 05:48:30 +04:00
u32 data , dimm_dod [ 3 ] , value [ 8 ] ;
2009-06-23 05:48:29 +04:00
2009-10-14 18:21:58 +04:00
if ( ! pvt - > pci_ch [ i ] [ 0 ] )
continue ;
2009-06-23 05:48:29 +04:00
if ( ! CH_ACTIVE ( pvt , i ) ) {
debugf0 ( " Channel %i is not active \n " , i ) ;
continue ;
}
if ( CH_DISABLED ( pvt , i ) ) {
debugf0 ( " Channel %i is disabled \n " , i ) ;
continue ;
}
2009-06-23 05:48:29 +04:00
/* Devices 4-6 function 0 */
2009-09-05 09:35:08 +04:00
pci_read_config_dword ( pvt - > pci_ch [ i ] [ 0 ] ,
2009-06-23 05:48:29 +04:00
MC_CHANNEL_DIMM_INIT_PARAMS , & data ) ;
2009-09-05 09:35:08 +04:00
pvt - > channel [ i ] . ranks = ( data & QUAD_RANK_PRESENT ) ?
2009-07-15 13:56:23 +04:00
4 : 2 ;
2009-06-23 05:48:29 +04:00
2009-06-23 05:48:30 +04:00
if ( data & REGISTERED_DIMM )
mtype = MEM_RDDR3 ;
2009-09-03 06:52:36 +04:00
else
2009-06-23 05:48:30 +04:00
mtype = MEM_DDR3 ;
#if 0
2009-06-23 05:48:29 +04:00
if ( data & THREE_DIMMS_PRESENT )
pvt - > channel [ i ] . dimms = 3 ;
else if ( data & SINGLE_QUAD_RANK_PRESENT )
pvt - > channel [ i ] . dimms = 1 ;
else
pvt - > channel [ i ] . dimms = 2 ;
2009-06-23 05:48:30 +04:00
# endif
/* Devices 4-6 function 1 */
2009-09-05 09:35:08 +04:00
pci_read_config_dword ( pvt - > pci_ch [ i ] [ 1 ] ,
2009-06-23 05:48:30 +04:00
MC_DOD_CH_DIMM0 , & dimm_dod [ 0 ] ) ;
2009-09-05 09:35:08 +04:00
pci_read_config_dword ( pvt - > pci_ch [ i ] [ 1 ] ,
2009-06-23 05:48:30 +04:00
MC_DOD_CH_DIMM1 , & dimm_dod [ 1 ] ) ;
2009-09-05 09:35:08 +04:00
pci_read_config_dword ( pvt - > pci_ch [ i ] [ 1 ] ,
2009-06-23 05:48:30 +04:00
MC_DOD_CH_DIMM2 , & dimm_dod [ 2 ] ) ;
2009-06-23 05:48:29 +04:00
2009-06-23 05:48:30 +04:00
debugf0 ( " Ch%d phy rd%d, wr%d (0x%08x): "
2009-06-23 05:48:30 +04:00
" %d ranks, %cDIMMs \n " ,
2009-06-23 05:48:30 +04:00
i ,
RDLCH ( pvt - > info . ch_map , i ) , WRLCH ( pvt - > info . ch_map , i ) ,
data ,
2009-09-05 09:35:08 +04:00
pvt - > channel [ i ] . ranks ,
2009-06-23 05:48:31 +04:00
( data & REGISTERED_DIMM ) ? ' R ' : ' U ' ) ;
2009-06-23 05:48:30 +04:00
for ( j = 0 ; j < 3 ; j + + ) {
u32 banks , ranks , rows , cols ;
i7core_edac: Memory info fixes and preparation for properly filling cswrow data
Now, memory size is properly displayed:
EDAC i7core: DOD Max limits: DIMMS: 2, 1-ranked, 8-banked
EDAC i7core: DOD Max rows x colums = 0x4000 x 0x400
EDAC i7core: Memory channel configuration:
EDAC i7core: Ch0 phy rd0, wr0 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: dimm 1 (0x00001288) 1024 Mb offset: 4, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: Ch1 phy rd1, wr1 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: Ch2 phy rd3, wr3 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
Still, as the way to retrieve csrows info is not known, it does a
mapping of what's available to csrows basic unit at edac core.
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2009-06-23 05:48:31 +04:00
u32 size , npages ;
2009-06-23 05:48:30 +04:00
if ( ! DIMM_PRESENT ( dimm_dod [ j ] ) )
continue ;
banks = numbank ( MC_DOD_NUMBANK ( dimm_dod [ j ] ) ) ;
ranks = numrank ( MC_DOD_NUMRANK ( dimm_dod [ j ] ) ) ;
rows = numrow ( MC_DOD_NUMROW ( dimm_dod [ j ] ) ) ;
cols = numcol ( MC_DOD_NUMCOL ( dimm_dod [ j ] ) ) ;
i7core_edac: Memory info fixes and preparation for properly filling cswrow data
Now, memory size is properly displayed:
EDAC i7core: DOD Max limits: DIMMS: 2, 1-ranked, 8-banked
EDAC i7core: DOD Max rows x colums = 0x4000 x 0x400
EDAC i7core: Memory channel configuration:
EDAC i7core: Ch0 phy rd0, wr0 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: dimm 1 (0x00001288) 1024 Mb offset: 4, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: Ch1 phy rd1, wr1 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: Ch2 phy rd3, wr3 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
Still, as the way to retrieve csrows info is not known, it does a
mapping of what's available to csrows basic unit at edac core.
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2009-06-23 05:48:31 +04:00
/* DDR3 has 8 I/O banks */
size = ( rows * cols * banks * ranks ) > > ( 20 - 3 ) ;
2009-09-05 09:35:08 +04:00
pvt - > channel [ i ] . dimms + + ;
2009-06-23 05:48:30 +04:00
2009-07-21 01:48:18 +04:00
debugf0 ( " \t dimm %d %d Mb offset: %x, "
" bank: %d, rank: %d, row: %#x, col: %#x \n " ,
j , size ,
2009-06-23 05:48:30 +04:00
RANKOFFSET ( dimm_dod [ j ] ) ,
banks , ranks , rows , cols ) ;
2009-06-23 05:48:31 +04:00
# if PAGE_SHIFT > 20
npages = size > > ( PAGE_SHIFT - 20 ) ;
# else
npages = size < < ( 20 - PAGE_SHIFT ) ;
# endif
i7core_edac: Memory info fixes and preparation for properly filling cswrow data
Now, memory size is properly displayed:
EDAC i7core: DOD Max limits: DIMMS: 2, 1-ranked, 8-banked
EDAC i7core: DOD Max rows x colums = 0x4000 x 0x400
EDAC i7core: Memory channel configuration:
EDAC i7core: Ch0 phy rd0, wr0 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: dimm 1 (0x00001288) 1024 Mb offset: 4, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: Ch1 phy rd1, wr1 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: Ch2 phy rd3, wr3 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
Still, as the way to retrieve csrows info is not known, it does a
mapping of what's available to csrows basic unit at edac core.
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2009-06-23 05:48:31 +04:00
2009-07-15 16:02:32 +04:00
csr = & mci - > csrows [ * csrow ] ;
i7core_edac: Memory info fixes and preparation for properly filling cswrow data
Now, memory size is properly displayed:
EDAC i7core: DOD Max limits: DIMMS: 2, 1-ranked, 8-banked
EDAC i7core: DOD Max rows x colums = 0x4000 x 0x400
EDAC i7core: Memory channel configuration:
EDAC i7core: Ch0 phy rd0, wr0 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: dimm 1 (0x00001288) 1024 Mb offset: 4, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: Ch1 phy rd1, wr1 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
EDAC i7core: Ch2 phy rd3, wr3 (0x063f7c31): 2 ranks, UDIMMs
EDAC i7core: dimm 0 (0x00000288) 1024 Mb offset: 0, numbank: 8,
numrank: 1, numrow: 0x4000, numcol: 0x400
Still, as the way to retrieve csrows info is not known, it does a
mapping of what's available to csrows basic unit at edac core.
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2009-06-23 05:48:31 +04:00
csr - > first_page = last_page + 1 ;
last_page + = npages ;
csr - > last_page = last_page ;
csr - > nr_pages = npages ;
2009-06-23 05:48:30 +04:00
csr - > page_mask = 0 ;
2009-06-23 05:48:31 +04:00
csr - > grain = 8 ;
2009-07-15 16:02:32 +04:00
csr - > csrow_idx = * csrow ;
2009-06-23 05:48:31 +04:00
csr - > nr_channels = 1 ;
csr - > channels [ 0 ] . chan_idx = i ;
csr - > channels [ 0 ] . ce_count = 0 ;
2009-06-23 05:48:30 +04:00
2009-09-05 09:35:08 +04:00
pvt - > csrow_map [ i ] [ j ] = * csrow ;
2009-09-03 06:49:59 +04:00
2009-06-23 05:48:30 +04:00
switch ( banks ) {
case 4 :
csr - > dtype = DEV_X4 ;
break ;
case 8 :
csr - > dtype = DEV_X8 ;
break ;
case 16 :
csr - > dtype = DEV_X16 ;
break ;
default :
csr - > dtype = DEV_UNKNOWN ;
}
csr - > edac_mode = mode ;
csr - > mtype = mtype ;
2009-07-15 16:02:32 +04:00
( * csrow ) + + ;
2009-06-23 05:48:30 +04:00
}
2009-06-23 05:48:30 +04:00
2009-06-23 05:48:30 +04:00
pci_read_config_dword ( pdev , MC_SAG_CH_0 , & value [ 0 ] ) ;
pci_read_config_dword ( pdev , MC_SAG_CH_1 , & value [ 1 ] ) ;
pci_read_config_dword ( pdev , MC_SAG_CH_2 , & value [ 2 ] ) ;
pci_read_config_dword ( pdev , MC_SAG_CH_3 , & value [ 3 ] ) ;
pci_read_config_dword ( pdev , MC_SAG_CH_4 , & value [ 4 ] ) ;
pci_read_config_dword ( pdev , MC_SAG_CH_5 , & value [ 5 ] ) ;
pci_read_config_dword ( pdev , MC_SAG_CH_6 , & value [ 6 ] ) ;
pci_read_config_dword ( pdev , MC_SAG_CH_7 , & value [ 7 ] ) ;
2009-07-21 01:48:18 +04:00
debugf1 ( " \t [%i] DIVBY3 \t REMOVED \t OFFSET \n " , i ) ;
2009-06-23 05:48:30 +04:00
for ( j = 0 ; j < 8 ; j + + )
2009-07-21 01:48:18 +04:00
debugf1 ( " \t \t %#x \t %#x \t %#x \n " ,
2009-06-23 05:48:30 +04:00
( value [ j ] > > 27 ) & 0x1 ,
( value [ j ] > > 24 ) & 0x7 ,
( value [ j ] & & ( ( 1 < < 24 ) - 1 ) ) ) ;
2009-06-23 05:48:29 +04:00
}
2009-06-23 05:41:15 +04:00
return 0 ;
}
2009-06-23 05:48:28 +04:00
/****************************************************************************
Error insertion routines
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* The i7core has independent error injection features per channel.
However , to have a simpler code , we don ' t allow enabling error injection
on more than one channel .
Also , since a change at an inject parameter will be applied only at enable ,
we ' re disabling error injection on all write calls to the sysfs nodes that
controls the error code injection .
*/
2009-06-23 05:48:29 +04:00
static int disable_inject ( struct mem_ctl_info * mci )
2009-06-23 05:48:28 +04:00
{
struct i7core_pvt * pvt = mci - > pvt_info ;
pvt - > inject . enable = 0 ;
2009-09-05 09:35:08 +04:00
if ( ! pvt - > pci_ch [ pvt - > inject . channel ] [ 0 ] )
2009-06-23 05:48:29 +04:00
return - ENODEV ;
2009-09-05 09:35:08 +04:00
pci_write_config_dword ( pvt - > pci_ch [ pvt - > inject . channel ] [ 0 ] ,
2009-08-06 03:27:15 +04:00
MC_CHANNEL_ERROR_INJECT , 0 ) ;
2009-06-23 05:48:29 +04:00
return 0 ;
2009-06-23 05:48:28 +04:00
}
/*
* i7core inject inject . section
*
* accept and store error injection inject . section value
* bit 0 - refers to the lower 32 - byte half cacheline
* bit 1 - refers to the upper 32 - byte half cacheline
*/
static ssize_t i7core_inject_section_store ( struct mem_ctl_info * mci ,
const char * data , size_t count )
{
struct i7core_pvt * pvt = mci - > pvt_info ;
unsigned long value ;
int rc ;
if ( pvt - > inject . enable )
2009-06-23 05:48:31 +04:00
disable_inject ( mci ) ;
2009-06-23 05:48:28 +04:00
rc = strict_strtoul ( data , 10 , & value ) ;
if ( ( rc < 0 ) | | ( value > 3 ) )
2009-08-06 02:28:27 +04:00
return - EIO ;
2009-06-23 05:48:28 +04:00
pvt - > inject . section = ( u32 ) value ;
return count ;
}
static ssize_t i7core_inject_section_show ( struct mem_ctl_info * mci ,
char * data )
{
struct i7core_pvt * pvt = mci - > pvt_info ;
return sprintf ( data , " 0x%08x \n " , pvt - > inject . section ) ;
}
/*
* i7core inject . type
*
* accept and store error injection inject . section value
* bit 0 - repeat enable - Enable error repetition
* bit 1 - inject ECC error
* bit 2 - inject parity error
*/
static ssize_t i7core_inject_type_store ( struct mem_ctl_info * mci ,
const char * data , size_t count )
{
struct i7core_pvt * pvt = mci - > pvt_info ;
unsigned long value ;
int rc ;
if ( pvt - > inject . enable )
2009-06-23 05:48:31 +04:00
disable_inject ( mci ) ;
2009-06-23 05:48:28 +04:00
rc = strict_strtoul ( data , 10 , & value ) ;
if ( ( rc < 0 ) | | ( value > 7 ) )
2009-08-06 02:28:27 +04:00
return - EIO ;
2009-06-23 05:48:28 +04:00
pvt - > inject . type = ( u32 ) value ;
return count ;
}
static ssize_t i7core_inject_type_show ( struct mem_ctl_info * mci ,
char * data )
{
struct i7core_pvt * pvt = mci - > pvt_info ;
return sprintf ( data , " 0x%08x \n " , pvt - > inject . type ) ;
}
/*
* i7core_inject_inject . eccmask_store
*
* The type of error ( UE / CE ) will depend on the inject . eccmask value :
* Any bits set to a 1 will flip the corresponding ECC bit
* Correctable errors can be injected by flipping 1 bit or the bits within
* a symbol pair ( 2 consecutive aligned 8 - bit pairs - i . e . 7 : 0 and 15 : 8 or
* 23 : 16 and 31 : 24 ) . Flipping bits in two symbol pairs will cause an
* uncorrectable error to be injected .
*/
static ssize_t i7core_inject_eccmask_store ( struct mem_ctl_info * mci ,
const char * data , size_t count )
{
struct i7core_pvt * pvt = mci - > pvt_info ;
unsigned long value ;
int rc ;
if ( pvt - > inject . enable )
2009-06-23 05:48:31 +04:00
disable_inject ( mci ) ;
2009-06-23 05:48:28 +04:00
rc = strict_strtoul ( data , 10 , & value ) ;
if ( rc < 0 )
2009-08-06 02:28:27 +04:00
return - EIO ;
2009-06-23 05:48:28 +04:00
pvt - > inject . eccmask = ( u32 ) value ;
return count ;
}
static ssize_t i7core_inject_eccmask_show ( struct mem_ctl_info * mci ,
char * data )
{
struct i7core_pvt * pvt = mci - > pvt_info ;
return sprintf ( data , " 0x%08x \n " , pvt - > inject . eccmask ) ;
}
/*
* i7core_addrmatch
*
* The type of error ( UE / CE ) will depend on the inject . eccmask value :
* Any bits set to a 1 will flip the corresponding ECC bit
* Correctable errors can be injected by flipping 1 bit or the bits within
* a symbol pair ( 2 consecutive aligned 8 - bit pairs - i . e . 7 : 0 and 15 : 8 or
* 23 : 16 and 31 : 24 ) . Flipping bits in two symbol pairs will cause an
* uncorrectable error to be injected .
*/
2009-09-24 01:56:47 +04:00
# define DECLARE_ADDR_MATCH(param, limit) \
static ssize_t i7core_inject_store_ # # param ( \
struct mem_ctl_info * mci , \
const char * data , size_t count ) \
{ \
2009-09-24 23:23:42 +04:00
struct i7core_pvt * pvt ; \
2009-09-24 01:56:47 +04:00
long value ; \
int rc ; \
\
2009-09-24 23:23:42 +04:00
debugf1 ( " %s() \n " , __func__ ) ; \
pvt = mci - > pvt_info ; \
\
2009-09-24 01:56:47 +04:00
if ( pvt - > inject . enable ) \
disable_inject ( mci ) ; \
\
2009-10-04 18:54:56 +04:00
if ( ! strcasecmp ( data , " any " ) | | ! strcasecmp ( data , " any \n " ) ) \
2009-09-24 01:56:47 +04:00
value = - 1 ; \
else { \
rc = strict_strtoul ( data , 10 , & value ) ; \
if ( ( rc < 0 ) | | ( value > = limit ) ) \
return - EIO ; \
} \
\
pvt - > inject . param = value ; \
\
return count ; \
} \
\
static ssize_t i7core_inject_show_ # # param ( \
struct mem_ctl_info * mci , \
char * data ) \
{ \
2009-09-24 23:23:42 +04:00
struct i7core_pvt * pvt ; \
\
pvt = mci - > pvt_info ; \
debugf1 ( " %s() pvt=%p \n " , __func__ , pvt ) ; \
2009-09-24 01:56:47 +04:00
if ( pvt - > inject . param < 0 ) \
return sprintf ( data , " any \n " ) ; \
else \
return sprintf ( data , " %d \n " , pvt - > inject . param ) ; \
2009-06-23 05:48:28 +04:00
}
2009-09-24 01:56:47 +04:00
# define ATTR_ADDR_MATCH(param) \
{ \
. attr = { \
. name = # param , \
. mode = ( S_IRUGO | S_IWUSR ) \
} , \
. show = i7core_inject_show_ # # param , \
. store = i7core_inject_store_ # # param , \
}
2009-06-23 05:48:28 +04:00
2009-09-24 01:56:47 +04:00
DECLARE_ADDR_MATCH ( channel , 3 ) ;
DECLARE_ADDR_MATCH ( dimm , 3 ) ;
DECLARE_ADDR_MATCH ( rank , 4 ) ;
DECLARE_ADDR_MATCH ( bank , 32 ) ;
DECLARE_ADDR_MATCH ( page , 0x10000 ) ;
DECLARE_ADDR_MATCH ( col , 0x4000 ) ;
2009-06-23 05:48:28 +04:00
2009-07-23 04:45:50 +04:00
static int write_and_test ( struct pci_dev * dev , int where , u32 val )
{
u32 read ;
int count ;
2009-08-06 03:27:15 +04:00
debugf0 ( " setting pci %02x:%02x.%x reg=%02x value=%08x \n " ,
dev - > bus - > number , PCI_SLOT ( dev - > devfn ) , PCI_FUNC ( dev - > devfn ) ,
where , val ) ;
2009-07-23 04:45:50 +04:00
for ( count = 0 ; count < 10 ; count + + ) {
if ( count )
2009-08-06 04:36:35 +04:00
msleep ( 100 ) ;
2009-07-23 04:45:50 +04:00
pci_write_config_dword ( dev , where , val ) ;
pci_read_config_dword ( dev , where , & read ) ;
if ( read = = val )
return 0 ;
}
2009-08-06 03:27:15 +04:00
i7core_printk ( KERN_ERR , " Error during set pci %02x:%02x.%x reg=%02x "
" write=%08x. Read=%08x \n " ,
dev - > bus - > number , PCI_SLOT ( dev - > devfn ) , PCI_FUNC ( dev - > devfn ) ,
where , val , read ) ;
2009-07-23 04:45:50 +04:00
return - EINVAL ;
}
2009-06-23 05:48:28 +04:00
/*
* This routine prepares the Memory Controller for error injection .
* The error will be injected when some process tries to write to the
* memory that matches the given criteria .
* The criteria can be set in terms of a mask where dimm , rank , bank , page
* and col can be specified .
* A - 1 value for any of the mask items will make the MCU to ignore
* that matching criteria for error injection .
*
* It should be noticed that the error will only happen after a write operation
* on a memory that matches the condition . if REPEAT_EN is not enabled at
* inject mask , then it will produce just one error . Otherwise , it will repeat
* until the injectmask would be cleaned .
*
* FIXME : This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
* is reliable enough to check if the MC is using the
* three channels . However , this is not clear at the datasheet .
*/
static ssize_t i7core_inject_enable_store ( struct mem_ctl_info * mci ,
const char * data , size_t count )
{
struct i7core_pvt * pvt = mci - > pvt_info ;
u32 injectmask ;
u64 mask = 0 ;
int rc ;
long enable ;
2009-09-05 09:35:08 +04:00
if ( ! pvt - > pci_ch [ pvt - > inject . channel ] [ 0 ] )
2009-06-23 05:48:29 +04:00
return 0 ;
2009-06-23 05:48:28 +04:00
rc = strict_strtoul ( data , 10 , & enable ) ;
if ( ( rc < 0 ) )
return 0 ;
if ( enable ) {
pvt - > inject . enable = 1 ;
} else {
disable_inject ( mci ) ;
return count ;
}
/* Sets pvt->inject.dimm mask */
if ( pvt - > inject . dimm < 0 )
2009-11-08 06:34:27 +03:00
mask | = 1LL < < 41 ;
2009-06-23 05:48:28 +04:00
else {
2009-09-05 09:35:08 +04:00
if ( pvt - > channel [ pvt - > inject . channel ] . dimms > 2 )
2009-11-08 06:34:27 +03:00
mask | = ( pvt - > inject . dimm & 0x3LL ) < < 35 ;
2009-06-23 05:48:28 +04:00
else
2009-11-08 06:34:27 +03:00
mask | = ( pvt - > inject . dimm & 0x1LL ) < < 36 ;
2009-06-23 05:48:28 +04:00
}
/* Sets pvt->inject.rank mask */
if ( pvt - > inject . rank < 0 )
2009-11-08 06:34:27 +03:00
mask | = 1LL < < 40 ;
2009-06-23 05:48:28 +04:00
else {
2009-09-05 09:35:08 +04:00
if ( pvt - > channel [ pvt - > inject . channel ] . dimms > 2 )
2009-11-08 06:34:27 +03:00
mask | = ( pvt - > inject . rank & 0x1LL ) < < 34 ;
2009-06-23 05:48:28 +04:00
else
2009-11-08 06:34:27 +03:00
mask | = ( pvt - > inject . rank & 0x3LL ) < < 34 ;
2009-06-23 05:48:28 +04:00
}
/* Sets pvt->inject.bank mask */
if ( pvt - > inject . bank < 0 )
2009-11-08 06:34:27 +03:00
mask | = 1LL < < 39 ;
2009-06-23 05:48:28 +04:00
else
2009-11-08 06:34:27 +03:00
mask | = ( pvt - > inject . bank & 0x15LL ) < < 30 ;
2009-06-23 05:48:28 +04:00
/* Sets pvt->inject.page mask */
if ( pvt - > inject . page < 0 )
2009-11-08 06:34:27 +03:00
mask | = 1LL < < 38 ;
2009-06-23 05:48:28 +04:00
else
2009-11-08 06:34:27 +03:00
mask | = ( pvt - > inject . page & 0xffff ) < < 14 ;
2009-06-23 05:48:28 +04:00
/* Sets pvt->inject.column mask */
if ( pvt - > inject . col < 0 )
2009-11-08 06:34:27 +03:00
mask | = 1LL < < 37 ;
2009-06-23 05:48:28 +04:00
else
2009-11-08 06:34:27 +03:00
mask | = ( pvt - > inject . col & 0x3fff ) ;
2009-06-23 05:48:28 +04:00
2009-07-23 04:45:50 +04:00
/*
* bit 0 : REPEAT_EN
* bits 1 - 2 : MASK_HALF_CACHELINE
* bit 3 : INJECT_ECC
* bit 4 : INJECT_ADDR_PARITY
*/
injectmask = ( pvt - > inject . type & 1 ) |
( pvt - > inject . section & 0x3 ) < < 1 |
( pvt - > inject . type & 0x6 ) < < ( 3 - 1 ) ;
/* Unlock writes to registers - this register is write only */
2009-09-05 09:35:08 +04:00
pci_write_config_dword ( pvt - > pci_noncore ,
2009-07-15 13:56:23 +04:00
MC_CFG_CONTROL , 0x2 ) ;
2009-07-10 05:14:35 +04:00
2009-09-05 09:35:08 +04:00
write_and_test ( pvt - > pci_ch [ pvt - > inject . channel ] [ 0 ] ,
2009-06-23 05:48:28 +04:00
MC_CHANNEL_ADDR_MATCH , mask ) ;
2009-09-05 09:35:08 +04:00
write_and_test ( pvt - > pci_ch [ pvt - > inject . channel ] [ 0 ] ,
2009-06-23 05:48:29 +04:00
MC_CHANNEL_ADDR_MATCH + 4 , mask > > 32L ) ;
2009-09-05 09:35:08 +04:00
write_and_test ( pvt - > pci_ch [ pvt - > inject . channel ] [ 0 ] ,
2009-06-23 05:48:28 +04:00
MC_CHANNEL_ERROR_MASK , pvt - > inject . eccmask ) ;
2009-09-05 09:35:08 +04:00
write_and_test ( pvt - > pci_ch [ pvt - > inject . channel ] [ 0 ] ,
2009-08-06 03:27:15 +04:00
MC_CHANNEL_ERROR_INJECT , injectmask ) ;
2009-07-23 04:45:50 +04:00
2009-06-23 05:48:28 +04:00
/*
2009-07-23 04:45:50 +04:00
* This is something undocumented , based on my tests
* Without writing 8 to this register , errors aren ' t injected . Not sure
* why .
2009-06-23 05:48:28 +04:00
*/
2009-09-05 09:35:08 +04:00
pci_write_config_dword ( pvt - > pci_noncore ,
2009-07-23 04:45:50 +04:00
MC_CFG_CONTROL , 8 ) ;
2009-06-23 05:48:28 +04:00
2009-06-23 05:48:31 +04:00
debugf0 ( " Error inject addr match 0x%016llx, ecc 0x%08x, "
" inject 0x%08x \n " ,
2009-06-23 05:48:28 +04:00
mask , pvt - > inject . eccmask , injectmask ) ;
2009-06-23 05:48:29 +04:00
2009-06-23 05:48:28 +04:00
return count ;
}
static ssize_t i7core_inject_enable_show ( struct mem_ctl_info * mci ,
char * data )
{
struct i7core_pvt * pvt = mci - > pvt_info ;
2009-06-23 05:48:29 +04:00
u32 injectmask ;
2009-10-14 18:21:58 +04:00
if ( ! pvt - > pci_ch [ pvt - > inject . channel ] [ 0 ] )
return 0 ;
2009-09-05 09:35:08 +04:00
pci_read_config_dword ( pvt - > pci_ch [ pvt - > inject . channel ] [ 0 ] ,
2009-08-06 03:27:15 +04:00
MC_CHANNEL_ERROR_INJECT , & injectmask ) ;
2009-06-23 05:48:29 +04:00
debugf0 ( " Inject error read: 0x%018x \n " , injectmask ) ;
if ( injectmask & 0x0c )
pvt - > inject . enable = 1 ;
2009-06-23 05:48:28 +04:00
return sprintf ( data , " %d \n " , pvt - > inject . enable ) ;
}
2009-09-25 00:25:43 +04:00
# define DECLARE_COUNTER(param) \
static ssize_t i7core_show_counter_ # # param ( \
struct mem_ctl_info * mci , \
char * data ) \
{ \
struct i7core_pvt * pvt = mci - > pvt_info ; \
\
debugf1 ( " %s() \n " , __func__ ) ; \
if ( ! pvt - > ce_count_available | | ( pvt - > is_registered ) ) \
return sprintf ( data , " data unavailable \n " ) ; \
return sprintf ( data , " %lu \n " , \
pvt - > udimm_ce_count [ param ] ) ; \
}
2009-06-23 05:48:29 +04:00
2009-09-25 00:25:43 +04:00
# define ATTR_COUNTER(param) \
{ \
. attr = { \
. name = __stringify ( udimm # # param ) , \
. mode = ( S_IRUGO | S_IWUSR ) \
} , \
. show = i7core_show_counter_ # # param \
2009-09-05 12:10:31 +04:00
}
2009-06-23 05:48:29 +04:00
2009-09-25 00:25:43 +04:00
DECLARE_COUNTER ( 0 ) ;
DECLARE_COUNTER ( 1 ) ;
DECLARE_COUNTER ( 2 ) ;
2009-06-23 05:48:29 +04:00
2009-06-23 05:48:28 +04:00
/*
* Sysfs struct
*/
2009-09-24 01:56:47 +04:00
static struct mcidev_sysfs_attribute i7core_addrmatch_attrs [ ] = {
ATTR_ADDR_MATCH ( channel ) ,
ATTR_ADDR_MATCH ( dimm ) ,
ATTR_ADDR_MATCH ( rank ) ,
ATTR_ADDR_MATCH ( bank ) ,
ATTR_ADDR_MATCH ( page ) ,
ATTR_ADDR_MATCH ( col ) ,
{ . attr = { . name = NULL } }
} ;
static struct mcidev_sysfs_group i7core_inject_addrmatch = {
. name = " inject_addrmatch " ,
. mcidev_attr = i7core_addrmatch_attrs ,
} ;
2009-09-25 00:25:43 +04:00
static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs [ ] = {
ATTR_COUNTER ( 0 ) ,
ATTR_COUNTER ( 1 ) ,
ATTR_COUNTER ( 2 ) ,
} ;
static struct mcidev_sysfs_group i7core_udimm_counters = {
. name = " all_channel_counts " ,
. mcidev_attr = i7core_udimm_counters_attrs ,
} ;
2009-09-24 01:56:47 +04:00
static struct mcidev_sysfs_attribute i7core_sysfs_attrs [ ] = {
2009-06-23 05:48:28 +04:00
{
. attr = {
. name = " inject_section " ,
. mode = ( S_IRUGO | S_IWUSR )
} ,
. show = i7core_inject_section_show ,
. store = i7core_inject_section_store ,
} , {
. attr = {
. name = " inject_type " ,
. mode = ( S_IRUGO | S_IWUSR )
} ,
. show = i7core_inject_type_show ,
. store = i7core_inject_type_store ,
} , {
. attr = {
. name = " inject_eccmask " ,
. mode = ( S_IRUGO | S_IWUSR )
} ,
. show = i7core_inject_eccmask_show ,
. store = i7core_inject_eccmask_store ,
} , {
2009-09-24 01:56:47 +04:00
. grp = & i7core_inject_addrmatch ,
2009-06-23 05:48:28 +04:00
} , {
. attr = {
. name = " inject_enable " ,
. mode = ( S_IRUGO | S_IWUSR )
} ,
. show = i7core_inject_enable_show ,
. store = i7core_inject_enable_store ,
} ,
2009-09-25 00:25:43 +04:00
{ . attr = { . name = NULL } } , /* Reserved for udimm counters */
2009-09-24 16:59:13 +04:00
{ . attr = { . name = NULL } }
2009-06-23 05:48:28 +04:00
} ;
2009-06-23 05:41:15 +04:00
/****************************************************************************
Device initialization routines : put / get , init / exit
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* i7core_put_devices ' put ' all the devices that we have
* reserved via ' get '
*/
2009-09-05 19:15:20 +04:00
static void i7core_put_devices ( struct i7core_dev * i7core_dev )
2009-06-23 05:41:15 +04:00
{
2009-09-05 19:15:20 +04:00
int i ;
2009-06-23 05:41:15 +04:00
2009-09-06 06:06:50 +04:00
debugf0 ( __FILE__ " : %s() \n " , __func__ ) ;
2009-10-14 15:02:40 +04:00
for ( i = 0 ; i < i7core_dev - > n_devs ; i + + ) {
2009-09-06 06:06:50 +04:00
struct pci_dev * pdev = i7core_dev - > pdev [ i ] ;
if ( ! pdev )
continue ;
debugf0 ( " Removing dev %02x:%02x.%d \n " ,
pdev - > bus - > number ,
PCI_SLOT ( pdev - > devfn ) , PCI_FUNC ( pdev - > devfn ) ) ;
pci_dev_put ( pdev ) ;
}
2009-09-05 19:15:20 +04:00
kfree ( i7core_dev - > pdev ) ;
2009-09-06 06:06:50 +04:00
list_del ( & i7core_dev - > list ) ;
2009-09-05 19:15:20 +04:00
kfree ( i7core_dev ) ;
}
2009-09-05 07:52:11 +04:00
2009-09-05 19:15:20 +04:00
static void i7core_put_all_devices ( void )
{
2009-09-24 16:59:13 +04:00
struct i7core_dev * i7core_dev , * tmp ;
2009-09-05 19:15:20 +04:00
2009-09-24 16:59:13 +04:00
list_for_each_entry_safe ( i7core_dev , tmp , & i7core_edac_list , list )
2009-09-05 19:15:20 +04:00
i7core_put_devices ( i7core_dev ) ;
2009-06-23 05:41:15 +04:00
}
2009-10-14 15:02:40 +04:00
static void i7core_xeon_pci_fixup ( int dev_id )
2009-09-03 07:05:05 +04:00
{
struct pci_dev * pdev = NULL ;
int i ;
/*
* On Xeon 55 xx , the Intel Quckpath Arch Generic Non - core pci buses
* aren ' t announced by acpi . So , we need to use a legacy scan probing
* to detect them
*/
2009-10-14 15:02:40 +04:00
pdev = pci_get_device ( PCI_VENDOR_ID_INTEL , dev_id , NULL ) ;
2009-09-03 07:05:05 +04:00
if ( unlikely ( ! pdev ) ) {
2009-09-05 09:35:08 +04:00
for ( i = 0 ; i < MAX_SOCKET_BUSES ; i + + )
2009-09-03 07:05:05 +04:00
pcibios_scan_specific_bus ( 255 - i ) ;
}
}
2009-06-23 05:41:15 +04:00
/*
* i7core_get_devices Find and perform ' get ' operation on the MCH ' s
* device / functions we want to reference for this driver
*
* Need to ' get ' device 16 func 1 and func 2
*/
2009-10-14 15:02:40 +04:00
int i7core_get_onedevice ( struct pci_dev * * prev , int devno ,
struct pci_id_descr * dev_descr , unsigned n_devs )
2009-06-23 05:41:15 +04:00
{
2009-09-05 07:52:11 +04:00
struct i7core_dev * i7core_dev ;
2009-06-23 05:48:29 +04:00
struct pci_dev * pdev = NULL ;
2009-07-15 13:56:23 +04:00
u8 bus = 0 ;
u8 socket = 0 ;
2009-06-23 05:41:15 +04:00
2009-07-18 17:43:08 +04:00
pdev = pci_get_device ( PCI_VENDOR_ID_INTEL ,
2009-10-14 15:02:40 +04:00
dev_descr - > dev_id , * prev ) ;
2009-07-18 17:43:08 +04:00
/*
* On Xeon 55 xx , the Intel Quckpath Arch Generic Non - core regs
* is at addr 8086 : 2 c40 , instead of 8086 : 2 c41 . So , we need
* to probe for the alternate address in case of failure
*/
2009-10-14 15:02:40 +04:00
if ( dev_descr - > dev_id = = PCI_DEVICE_ID_INTEL_I7_NONCORE & & ! pdev )
2009-07-18 17:43:08 +04:00
pdev = pci_get_device ( PCI_VENDOR_ID_INTEL ,
2009-10-14 13:07:07 +04:00
PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT , * prev ) ;
2009-07-11 01:39:53 +04:00
2009-10-14 20:31:06 +04:00
if ( dev_descr - > dev_id = = PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE & & ! pdev )
pdev = pci_get_device ( PCI_VENDOR_ID_INTEL ,
PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT ,
* prev ) ;
2009-07-18 17:43:08 +04:00
if ( ! pdev ) {
if ( * prev ) {
* prev = pdev ;
return 0 ;
2009-07-11 01:39:53 +04:00
}
2009-10-14 15:02:40 +04:00
if ( dev_descr - > optional )
2009-07-18 17:43:08 +04:00
return 0 ;
2009-07-17 07:09:10 +04:00
2009-07-18 17:43:08 +04:00
i7core_printk ( KERN_ERR ,
" Device not found: dev %02x.%d PCI ID %04x:%04x \n " ,
2009-10-14 15:02:40 +04:00
dev_descr - > dev , dev_descr - > func ,
PCI_VENDOR_ID_INTEL , dev_descr - > dev_id ) ;
2009-07-15 13:56:23 +04:00
2009-07-18 17:43:08 +04:00
/* End of list, leave */
return - ENODEV ;
}
bus = pdev - > bus - > number ;
2009-07-15 13:56:23 +04:00
2009-07-18 17:43:08 +04:00
if ( bus = = 0x3f )
socket = 0 ;
else
socket = 255 - bus ;
2009-09-05 07:52:11 +04:00
i7core_dev = get_i7core_dev ( socket ) ;
if ( ! i7core_dev ) {
i7core_dev = kzalloc ( sizeof ( * i7core_dev ) , GFP_KERNEL ) ;
if ( ! i7core_dev )
return - ENOMEM ;
2009-10-14 15:02:40 +04:00
i7core_dev - > pdev = kzalloc ( sizeof ( * i7core_dev - > pdev ) * n_devs ,
2009-09-05 07:52:11 +04:00
GFP_KERNEL ) ;
if ( ! i7core_dev - > pdev )
return - ENOMEM ;
i7core_dev - > socket = socket ;
2009-10-14 15:02:40 +04:00
i7core_dev - > n_devs = n_devs ;
2009-09-05 07:52:11 +04:00
list_add_tail ( & i7core_dev - > list , & i7core_edac_list ) ;
2009-07-18 17:43:08 +04:00
}
2009-07-15 13:56:23 +04:00
2009-09-05 07:52:11 +04:00
if ( i7core_dev - > pdev [ devno ] ) {
2009-07-18 17:43:08 +04:00
i7core_printk ( KERN_ERR ,
" Duplicated device for "
" dev %02x:%02x.%d PCI ID %04x:%04x \n " ,
2009-10-14 15:02:40 +04:00
bus , dev_descr - > dev , dev_descr - > func ,
PCI_VENDOR_ID_INTEL , dev_descr - > dev_id ) ;
2009-07-18 17:43:08 +04:00
pci_dev_put ( pdev ) ;
return - ENODEV ;
}
2009-07-15 13:56:23 +04:00
2009-09-05 07:52:11 +04:00
i7core_dev - > pdev [ devno ] = pdev ;
2009-07-18 17:43:08 +04:00
/* Sanity check */
2009-10-14 15:02:40 +04:00
if ( unlikely ( PCI_SLOT ( pdev - > devfn ) ! = dev_descr - > dev | |
PCI_FUNC ( pdev - > devfn ) ! = dev_descr - > func ) ) {
2009-07-18 17:43:08 +04:00
i7core_printk ( KERN_ERR ,
" Device PCI ID %04x:%04x "
" has dev %02x:%02x.%d instead of dev %02x:%02x.%d \n " ,
2009-10-14 15:02:40 +04:00
PCI_VENDOR_ID_INTEL , dev_descr - > dev_id ,
2009-07-18 17:43:08 +04:00
bus , PCI_SLOT ( pdev - > devfn ) , PCI_FUNC ( pdev - > devfn ) ,
2009-10-14 15:02:40 +04:00
bus , dev_descr - > dev , dev_descr - > func ) ;
2009-07-18 17:43:08 +04:00
return - ENODEV ;
}
2009-06-23 05:48:30 +04:00
2009-07-18 17:43:08 +04:00
/* Be sure that the device is enabled */
if ( unlikely ( pci_enable_device ( pdev ) < 0 ) ) {
i7core_printk ( KERN_ERR ,
" Couldn't enable "
" dev %02x:%02x.%d PCI ID %04x:%04x \n " ,
2009-10-14 15:02:40 +04:00
bus , dev_descr - > dev , dev_descr - > func ,
PCI_VENDOR_ID_INTEL , dev_descr - > dev_id ) ;
2009-07-18 17:43:08 +04:00
return - ENODEV ;
}
2009-06-23 05:48:30 +04:00
2009-09-05 11:12:02 +04:00
debugf0 ( " Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x \n " ,
2009-10-14 15:02:40 +04:00
socket , bus , dev_descr - > dev ,
dev_descr - > func ,
PCI_VENDOR_ID_INTEL , dev_descr - > dev_id ) ;
2009-06-23 05:48:29 +04:00
2009-07-18 17:43:08 +04:00
* prev = pdev ;
2009-06-23 05:48:30 +04:00
2009-07-18 17:43:08 +04:00
return 0 ;
}
2009-06-23 05:41:15 +04:00
2009-10-14 15:02:40 +04:00
static int i7core_get_devices ( struct pci_id_descr dev_descr [ ] , unsigned n_devs )
2009-07-18 17:43:08 +04:00
{
2009-10-14 15:02:40 +04:00
int i , rc ;
2009-07-18 17:43:08 +04:00
struct pci_dev * pdev = NULL ;
2009-06-23 05:48:30 +04:00
2009-10-14 15:02:40 +04:00
for ( i = 0 ; i < n_devs ; i + + ) {
2009-07-18 17:43:08 +04:00
pdev = NULL ;
do {
2009-10-14 15:02:40 +04:00
rc = i7core_get_onedevice ( & pdev , i , & dev_descr [ i ] ,
n_devs ) ;
if ( rc < 0 ) {
2009-09-05 19:15:20 +04:00
i7core_put_all_devices ( ) ;
2009-07-18 17:43:08 +04:00
return - ENODEV ;
}
} while ( pdev ) ;
}
2009-09-05 07:52:11 +04:00
2009-06-23 05:48:30 +04:00
return 0 ;
}
2009-09-05 09:35:08 +04:00
static int mci_bind_devs ( struct mem_ctl_info * mci ,
struct i7core_dev * i7core_dev )
2009-06-23 05:48:30 +04:00
{
struct i7core_pvt * pvt = mci - > pvt_info ;
struct pci_dev * pdev ;
2009-09-05 09:35:08 +04:00
int i , func , slot ;
2009-06-23 05:48:30 +04:00
2009-09-05 09:35:08 +04:00
/* Associates i7core_dev and mci for future usage */
pvt - > i7core_dev = i7core_dev ;
i7core_dev - > mci = mci ;
2009-09-05 07:52:11 +04:00
2009-09-05 09:35:08 +04:00
pvt - > is_registered = 0 ;
2009-10-14 15:02:40 +04:00
for ( i = 0 ; i < i7core_dev - > n_devs ; i + + ) {
2009-09-05 09:35:08 +04:00
pdev = i7core_dev - > pdev [ i ] ;
if ( ! pdev )
2009-09-05 07:52:11 +04:00
continue ;
2009-09-05 09:35:08 +04:00
func = PCI_FUNC ( pdev - > devfn ) ;
slot = PCI_SLOT ( pdev - > devfn ) ;
if ( slot = = 3 ) {
if ( unlikely ( func > MAX_MCR_FUNC ) )
goto error ;
pvt - > pci_mcr [ func ] = pdev ;
} else if ( likely ( slot > = 4 & & slot < 4 + NUM_CHANS ) ) {
if ( unlikely ( func > MAX_CHAN_FUNC ) )
2009-06-23 05:48:30 +04:00
goto error ;
2009-09-05 09:35:08 +04:00
pvt - > pci_ch [ slot - 4 ] [ func ] = pdev ;
} else if ( ! slot & & ! func )
pvt - > pci_noncore = pdev ;
else
goto error ;
2009-06-23 05:48:30 +04:00
2009-09-05 09:35:08 +04:00
debugf0 ( " Associated fn %d.%d, dev = %p, socket %d \n " ,
PCI_SLOT ( pdev - > devfn ) , PCI_FUNC ( pdev - > devfn ) ,
pdev , i7core_dev - > socket ) ;
2009-09-03 06:52:36 +04:00
2009-09-05 09:35:08 +04:00
if ( PCI_SLOT ( pdev - > devfn ) = = 3 & &
PCI_FUNC ( pdev - > devfn ) = = 2 )
pvt - > is_registered = 1 ;
2009-06-23 05:41:15 +04:00
}
2009-07-10 05:14:35 +04:00
2009-09-25 00:25:43 +04:00
/*
* Add extra nodes to count errors on udimm
* For registered memory , this is not needed , since the counters
* are already displayed at the standard locations
*/
if ( ! pvt - > is_registered )
i7core_sysfs_attrs [ ARRAY_SIZE ( i7core_sysfs_attrs ) - 2 ] . grp =
& i7core_udimm_counters ;
2009-06-23 05:41:15 +04:00
return 0 ;
2009-06-23 05:48:30 +04:00
error :
i7core_printk ( KERN_ERR , " Device %d, function %d "
" is out of the expected range \n " ,
slot , func ) ;
return - EINVAL ;
2009-06-23 05:41:15 +04:00
}
2009-06-23 05:48:29 +04:00
/****************************************************************************
Error check routines
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2009-09-05 09:35:08 +04:00
static void i7core_rdimm_update_csrow ( struct mem_ctl_info * mci ,
2009-09-03 06:49:59 +04:00
int chan , int dimm , int add )
{
char * msg ;
struct i7core_pvt * pvt = mci - > pvt_info ;
2009-09-05 09:35:08 +04:00
int row = pvt - > csrow_map [ chan ] [ dimm ] , i ;
2009-09-03 06:49:59 +04:00
for ( i = 0 ; i < add ; i + + ) {
msg = kasprintf ( GFP_KERNEL , " Corrected error "
2009-09-05 09:35:08 +04:00
" (Socket=%d channel=%d dimm=%d) " ,
pvt - > i7core_dev - > socket , chan , dimm ) ;
2009-09-03 06:49:59 +04:00
edac_mc_handle_fbd_ce ( mci , row , 0 , msg ) ;
kfree ( msg ) ;
}
}
static void i7core_rdimm_update_ce_count ( struct mem_ctl_info * mci ,
2009-09-05 09:35:08 +04:00
int chan , int new0 , int new1 , int new2 )
2009-09-03 06:49:59 +04:00
{
struct i7core_pvt * pvt = mci - > pvt_info ;
int add0 = 0 , add1 = 0 , add2 = 0 ;
/* Updates CE counters if it is not the first time here */
2009-09-05 09:35:08 +04:00
if ( pvt - > ce_count_available ) {
2009-09-03 06:49:59 +04:00
/* Updates CE counters */
2009-09-05 09:35:08 +04:00
add2 = new2 - pvt - > rdimm_last_ce_count [ chan ] [ 2 ] ;
add1 = new1 - pvt - > rdimm_last_ce_count [ chan ] [ 1 ] ;
add0 = new0 - pvt - > rdimm_last_ce_count [ chan ] [ 0 ] ;
2009-09-03 06:49:59 +04:00
if ( add2 < 0 )
add2 + = 0x7fff ;
2009-09-05 09:35:08 +04:00
pvt - > rdimm_ce_count [ chan ] [ 2 ] + = add2 ;
2009-09-03 06:49:59 +04:00
if ( add1 < 0 )
add1 + = 0x7fff ;
2009-09-05 09:35:08 +04:00
pvt - > rdimm_ce_count [ chan ] [ 1 ] + = add1 ;
2009-09-03 06:49:59 +04:00
if ( add0 < 0 )
add0 + = 0x7fff ;
2009-09-05 09:35:08 +04:00
pvt - > rdimm_ce_count [ chan ] [ 0 ] + = add0 ;
2009-09-03 06:49:59 +04:00
} else
2009-09-05 09:35:08 +04:00
pvt - > ce_count_available = 1 ;
2009-09-03 06:49:59 +04:00
/* Store the new values */
2009-09-05 09:35:08 +04:00
pvt - > rdimm_last_ce_count [ chan ] [ 2 ] = new2 ;
pvt - > rdimm_last_ce_count [ chan ] [ 1 ] = new1 ;
pvt - > rdimm_last_ce_count [ chan ] [ 0 ] = new0 ;
2009-09-03 06:49:59 +04:00
/*updated the edac core */
if ( add0 ! = 0 )
2009-09-05 09:35:08 +04:00
i7core_rdimm_update_csrow ( mci , chan , 0 , add0 ) ;
2009-09-03 06:49:59 +04:00
if ( add1 ! = 0 )
2009-09-05 09:35:08 +04:00
i7core_rdimm_update_csrow ( mci , chan , 1 , add1 ) ;
2009-09-03 06:49:59 +04:00
if ( add2 ! = 0 )
2009-09-05 09:35:08 +04:00
i7core_rdimm_update_csrow ( mci , chan , 2 , add2 ) ;
2009-09-03 06:49:59 +04:00
}
2009-09-05 09:35:08 +04:00
static void i7core_rdimm_check_mc_ecc_err ( struct mem_ctl_info * mci )
2009-09-03 06:49:59 +04:00
{
struct i7core_pvt * pvt = mci - > pvt_info ;
u32 rcv [ 3 ] [ 2 ] ;
int i , new0 , new1 , new2 ;
/*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
2009-09-05 09:35:08 +04:00
pci_read_config_dword ( pvt - > pci_mcr [ 2 ] , MC_COR_ECC_CNT_0 ,
2009-09-03 06:49:59 +04:00
& rcv [ 0 ] [ 0 ] ) ;
2009-09-05 09:35:08 +04:00
pci_read_config_dword ( pvt - > pci_mcr [ 2 ] , MC_COR_ECC_CNT_1 ,
2009-09-03 06:49:59 +04:00
& rcv [ 0 ] [ 1 ] ) ;
2009-09-05 09:35:08 +04:00
pci_read_config_dword ( pvt - > pci_mcr [ 2 ] , MC_COR_ECC_CNT_2 ,
2009-09-03 06:49:59 +04:00
& rcv [ 1 ] [ 0 ] ) ;
2009-09-05 09:35:08 +04:00
pci_read_config_dword ( pvt - > pci_mcr [ 2 ] , MC_COR_ECC_CNT_3 ,
2009-09-03 06:49:59 +04:00
& rcv [ 1 ] [ 1 ] ) ;
2009-09-05 09:35:08 +04:00
pci_read_config_dword ( pvt - > pci_mcr [ 2 ] , MC_COR_ECC_CNT_4 ,
2009-09-03 06:49:59 +04:00
& rcv [ 2 ] [ 0 ] ) ;
2009-09-05 09:35:08 +04:00
pci_read_config_dword ( pvt - > pci_mcr [ 2 ] , MC_COR_ECC_CNT_5 ,
2009-09-03 06:49:59 +04:00
& rcv [ 2 ] [ 1 ] ) ;
for ( i = 0 ; i < 3 ; i + + ) {
debugf3 ( " MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x \n " ,
( i * 2 ) , rcv [ i ] [ 0 ] , ( i * 2 ) + 1 , rcv [ i ] [ 1 ] ) ;
/*if the channel has 3 dimms*/
2009-09-05 09:35:08 +04:00
if ( pvt - > channel [ i ] . dimms > 2 ) {
2009-09-03 06:49:59 +04:00
new0 = DIMM_BOT_COR_ERR ( rcv [ i ] [ 0 ] ) ;
new1 = DIMM_TOP_COR_ERR ( rcv [ i ] [ 0 ] ) ;
new2 = DIMM_BOT_COR_ERR ( rcv [ i ] [ 1 ] ) ;
} else {
new0 = DIMM_TOP_COR_ERR ( rcv [ i ] [ 0 ] ) +
DIMM_BOT_COR_ERR ( rcv [ i ] [ 0 ] ) ;
new1 = DIMM_TOP_COR_ERR ( rcv [ i ] [ 1 ] ) +
DIMM_BOT_COR_ERR ( rcv [ i ] [ 1 ] ) ;
new2 = 0 ;
}
2009-09-05 09:35:08 +04:00
i7core_rdimm_update_ce_count ( mci , i , new0 , new1 , new2 ) ;
2009-09-03 06:49:59 +04:00
}
}
2009-06-23 05:48:29 +04:00
/* This function is based on the device 3 function 4 registers as described on:
* Intel Xeon Processor 5500 Series Datasheet Volume 2
* http : //www.intel.com/Assets/PDF/datasheet/321322.pdf
* also available at :
* http : //www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
*/
2009-09-05 09:35:08 +04:00
static void i7core_udimm_check_mc_ecc_err ( struct mem_ctl_info * mci )
2009-06-23 05:48:29 +04:00
{
struct i7core_pvt * pvt = mci - > pvt_info ;
u32 rcv1 , rcv0 ;
int new0 , new1 , new2 ;
2009-09-05 09:35:08 +04:00
if ( ! pvt - > pci_mcr [ 4 ] ) {
2009-08-06 04:36:35 +04:00
debugf0 ( " %s MCR registers not found \n " , __func__ ) ;
2009-06-23 05:48:29 +04:00
return ;
}
2009-09-03 06:49:59 +04:00
/* Corrected test errors */
2009-09-05 09:35:08 +04:00
pci_read_config_dword ( pvt - > pci_mcr [ 4 ] , MC_TEST_ERR_RCV1 , & rcv1 ) ;
pci_read_config_dword ( pvt - > pci_mcr [ 4 ] , MC_TEST_ERR_RCV0 , & rcv0 ) ;
2009-06-23 05:48:29 +04:00
/* Store the new values */
new2 = DIMM2_COR_ERR ( rcv1 ) ;
new1 = DIMM1_COR_ERR ( rcv0 ) ;
new0 = DIMM0_COR_ERR ( rcv0 ) ;
/* Updates CE counters if it is not the first time here */
2009-09-05 09:35:08 +04:00
if ( pvt - > ce_count_available ) {
2009-06-23 05:48:29 +04:00
/* Updates CE counters */
int add0 , add1 , add2 ;
2009-09-05 09:35:08 +04:00
add2 = new2 - pvt - > udimm_last_ce_count [ 2 ] ;
add1 = new1 - pvt - > udimm_last_ce_count [ 1 ] ;
add0 = new0 - pvt - > udimm_last_ce_count [ 0 ] ;
2009-06-23 05:48:29 +04:00
if ( add2 < 0 )
add2 + = 0x7fff ;
2009-09-05 09:35:08 +04:00
pvt - > udimm_ce_count [ 2 ] + = add2 ;
2009-06-23 05:48:29 +04:00
if ( add1 < 0 )
add1 + = 0x7fff ;
2009-09-05 09:35:08 +04:00
pvt - > udimm_ce_count [ 1 ] + = add1 ;
2009-06-23 05:48:29 +04:00
if ( add0 < 0 )
add0 + = 0x7fff ;
2009-09-05 09:35:08 +04:00
pvt - > udimm_ce_count [ 0 ] + = add0 ;
2009-09-03 06:49:59 +04:00
if ( add0 | add1 | add2 )
i7core_printk ( KERN_ERR , " New Corrected error(s): "
" dimm0: +%d, dimm1: +%d, dimm2 +%d \n " ,
add0 , add1 , add2 ) ;
2009-06-23 05:48:29 +04:00
} else
2009-09-05 09:35:08 +04:00
pvt - > ce_count_available = 1 ;
2009-06-23 05:48:29 +04:00
/* Store the new values */
2009-09-05 09:35:08 +04:00
pvt - > udimm_last_ce_count [ 2 ] = new2 ;
pvt - > udimm_last_ce_count [ 1 ] = new1 ;
pvt - > udimm_last_ce_count [ 0 ] = new0 ;
2009-06-23 05:48:29 +04:00
}
2009-07-16 02:01:08 +04:00
/*
* According with tables E - 11 and E - 12 of chapter E .3 .3 of Intel 64 and IA - 32
* Architectures Software Developer ’ s Manual Volume 3 B .
2009-07-16 02:53:24 +04:00
* Nehalem are defined as family 0x06 , model 0x1a
*
* The MCA registers used here are the following ones :
2009-07-16 02:01:08 +04:00
* struct mce field MCA Register
2009-07-16 02:53:24 +04:00
* m - > status MSR_IA32_MC8_STATUS
* m - > addr MSR_IA32_MC8_ADDR
* m - > misc MSR_IA32_MC8_MISC
2009-07-16 02:01:08 +04:00
* In the case of Nehalem , the error information is masked at . status and . misc
* fields
*/
2009-07-10 05:06:41 +04:00
static void i7core_mce_output_error ( struct mem_ctl_info * mci ,
struct mce * m )
{
2009-09-03 06:49:59 +04:00
struct i7core_pvt * pvt = mci - > pvt_info ;
2009-07-17 17:54:23 +04:00
char * type , * optype , * err , * msg ;
2009-07-16 02:01:08 +04:00
unsigned long error = m - > status & 0x1ff0000l ;
2009-07-17 17:54:23 +04:00
u32 optypenum = ( m - > status > > 4 ) & 0x07 ;
2009-07-16 02:01:08 +04:00
u32 core_err_cnt = ( m - > status > > 38 ) & & 0x7fff ;
u32 dimm = ( m - > misc > > 16 ) & 0x3 ;
u32 channel = ( m - > misc > > 18 ) & 0x3 ;
u32 syndrome = m - > misc > > 32 ;
u32 errnum = find_first_bit ( & error , 32 ) ;
2009-09-03 06:49:59 +04:00
int csrow ;
2009-07-16 02:01:08 +04:00
2009-07-17 17:28:15 +04:00
if ( m - > mcgstatus & 1 )
type = " FATAL " ;
else
type = " NON_FATAL " ;
2009-07-17 17:54:23 +04:00
switch ( optypenum ) {
2009-08-06 04:36:35 +04:00
case 0 :
optype = " generic undef request " ;
break ;
case 1 :
optype = " read error " ;
break ;
case 2 :
optype = " write error " ;
break ;
case 3 :
optype = " addr/cmd error " ;
break ;
case 4 :
optype = " scrubbing error " ;
break ;
default :
optype = " reserved " ;
break ;
2009-07-17 17:54:23 +04:00
}
2009-07-16 02:01:08 +04:00
switch ( errnum ) {
case 16 :
err = " read ECC error " ;
break ;
case 17 :
err = " RAS ECC error " ;
break ;
case 18 :
err = " write parity error " ;
break ;
case 19 :
err = " redundacy loss " ;
break ;
case 20 :
err = " reserved " ;
break ;
case 21 :
err = " memory range error " ;
break ;
case 22 :
err = " RTID out of range " ;
break ;
case 23 :
err = " address parity error " ;
break ;
case 24 :
err = " byte enable parity error " ;
break ;
default :
err = " unknown " ;
2009-07-10 05:06:41 +04:00
}
2009-07-16 02:53:24 +04:00
/* FIXME: should convert addr into bank and rank information */
2009-07-16 02:01:08 +04:00
msg = kasprintf ( GFP_ATOMIC ,
2009-09-05 09:35:08 +04:00
" %s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
2009-07-17 17:54:23 +04:00
" syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s)) \n " ,
2009-09-05 09:35:08 +04:00
type , ( long long ) m - > addr , m - > cpu , dimm , channel ,
2009-07-17 17:54:23 +04:00
syndrome , core_err_cnt , ( long long ) m - > status ,
( long long ) m - > misc , optype , err ) ;
2009-07-16 02:01:08 +04:00
debugf0 ( " %s " , msg ) ;
2009-07-10 05:06:41 +04:00
2009-09-05 09:35:08 +04:00
csrow = pvt - > csrow_map [ channel ] [ dimm ] ;
2009-09-03 06:49:59 +04:00
2009-07-10 05:06:41 +04:00
/* Call the helper to output message */
2009-09-03 06:49:59 +04:00
if ( m - > mcgstatus & 1 )
edac_mc_handle_fbd_ue ( mci , csrow , 0 ,
0 /* FIXME: should be channel here */ , msg ) ;
2009-09-05 09:35:08 +04:00
else if ( ! pvt - > is_registered )
2009-09-03 06:49:59 +04:00
edac_mc_handle_fbd_ce ( mci , csrow ,
0 /* FIXME: should be channel here */ , msg ) ;
2009-07-16 02:01:08 +04:00
kfree ( msg ) ;
2009-07-10 05:06:41 +04:00
}
2009-06-23 05:48:29 +04:00
/*
* i7core_check_error Retrieve and process errors reported by the
* hardware . Called by the Core module .
*/
static void i7core_check_error ( struct mem_ctl_info * mci )
{
2009-07-10 05:06:41 +04:00
struct i7core_pvt * pvt = mci - > pvt_info ;
int i ;
unsigned count = 0 ;
2009-10-04 17:15:40 +04:00
struct mce * m ;
2009-07-10 05:06:41 +04:00
2009-10-04 17:15:40 +04:00
/*
* MCE first step : Copy all mce errors into a temporary buffer
* We use a double buffering here , to reduce the risk of
* loosing an error .
*/
smp_rmb ( ) ;
2009-10-08 20:11:08 +04:00
count = ( pvt - > mce_out + MCE_LOG_LEN - pvt - > mce_in )
% MCE_LOG_LEN ;
2009-10-04 17:15:40 +04:00
if ( ! count )
return ;
2009-09-05 09:35:08 +04:00
2009-10-04 17:15:40 +04:00
m = pvt - > mce_outentry ;
2009-10-08 20:11:08 +04:00
if ( pvt - > mce_in + count > MCE_LOG_LEN ) {
unsigned l = MCE_LOG_LEN - pvt - > mce_in ;
2009-09-05 09:35:08 +04:00
2009-10-04 17:15:40 +04:00
memcpy ( m , & pvt - > mce_entry [ pvt - > mce_in ] , sizeof ( * m ) * l ) ;
smp_wmb ( ) ;
pvt - > mce_in = 0 ;
count - = l ;
m + = l ;
}
memcpy ( m , & pvt - > mce_entry [ pvt - > mce_in ] , sizeof ( * m ) * count ) ;
smp_wmb ( ) ;
pvt - > mce_in + = count ;
smp_rmb ( ) ;
if ( pvt - > mce_overrun ) {
i7core_printk ( KERN_ERR , " Lost %d memory errors \n " ,
pvt - > mce_overrun ) ;
smp_wmb ( ) ;
pvt - > mce_overrun = 0 ;
}
2009-07-10 05:06:41 +04:00
2009-10-04 17:15:40 +04:00
/*
* MCE second step : parse errors and display
*/
2009-07-10 05:06:41 +04:00
for ( i = 0 ; i < count ; i + + )
2009-10-04 17:15:40 +04:00
i7core_mce_output_error ( mci , & pvt - > mce_outentry [ i ] ) ;
2009-07-10 05:06:41 +04:00
2009-10-04 17:15:40 +04:00
/*
* Now , let ' s increment CE error counts
*/
2009-09-05 09:35:08 +04:00
if ( ! pvt - > is_registered )
i7core_udimm_check_mc_ecc_err ( mci ) ;
else
i7core_rdimm_check_mc_ecc_err ( mci ) ;
2009-06-23 05:48:29 +04:00
}
2009-07-10 05:06:41 +04:00
/*
* i7core_mce_check_error Replicates mcelog routine to get errors
* This routine simply queues mcelog errors , and
* return . The error itself should be handled later
* by i7core_check_error .
2009-10-05 16:40:09 +04:00
* WARNING : As this routine should be called at NMI time , extra care should
* be taken to avoid deadlocks , and to be as fast as possible .
2009-07-10 05:06:41 +04:00
*/
static int i7core_mce_check_error ( void * priv , struct mce * mce )
{
2009-07-17 17:28:15 +04:00
struct mem_ctl_info * mci = priv ;
struct i7core_pvt * pvt = mci - > pvt_info ;
2009-07-10 05:06:41 +04:00
2009-07-16 02:01:08 +04:00
/*
* Just let mcelog handle it if the error is
* outside the memory controller
*/
if ( ( ( mce - > status & 0xffff ) > > 7 ) ! = 1 )
return 0 ;
2009-07-16 02:53:24 +04:00
/* Bank 8 registers are the only ones that we know how to handle */
if ( mce - > bank ! = 8 )
return 0 ;
2009-11-08 06:36:40 +03:00
# ifdef CONFIG_SMP
2009-09-05 09:35:08 +04:00
/* Only handle if it is the right mc controller */
2009-10-05 16:40:09 +04:00
if ( cpu_data ( mce - > cpu ) . phys_proc_id ! = pvt - > i7core_dev - > socket )
2009-09-05 09:35:08 +04:00
return 0 ;
2009-11-08 06:36:40 +03:00
# endif
2009-09-05 09:35:08 +04:00
2009-10-04 17:15:40 +04:00
smp_rmb ( ) ;
2009-10-08 20:11:08 +04:00
if ( ( pvt - > mce_out + 1 ) % MCE_LOG_LEN = = pvt - > mce_in ) {
2009-10-04 17:15:40 +04:00
smp_wmb ( ) ;
pvt - > mce_overrun + + ;
return 0 ;
2009-07-10 05:06:41 +04:00
}
2009-10-05 16:40:09 +04:00
/* Copy memory error at the ringbuffer */
memcpy ( & pvt - > mce_entry [ pvt - > mce_out ] , mce , sizeof ( * mce ) ) ;
2009-10-04 17:15:40 +04:00
smp_wmb ( ) ;
2009-10-08 20:11:08 +04:00
pvt - > mce_out = ( pvt - > mce_out + 1 ) % MCE_LOG_LEN ;
2009-07-10 05:06:41 +04:00
2009-07-17 17:28:15 +04:00
/* Handle fatal errors immediately */
if ( mce - > mcgstatus & 1 )
i7core_check_error ( mci ) ;
2009-07-10 05:06:41 +04:00
/* Advice mcelog that the error were handled */
2009-07-16 02:01:08 +04:00
return 1 ;
2009-07-10 05:06:41 +04:00
}
2009-09-05 09:35:08 +04:00
static int i7core_register_mci ( struct i7core_dev * i7core_dev ,
int num_channels , int num_csrows )
2009-06-23 05:41:15 +04:00
{
struct mem_ctl_info * mci ;
struct i7core_pvt * pvt ;
2009-07-15 16:02:32 +04:00
int csrow = 0 ;
2009-09-05 09:35:08 +04:00
int rc ;
2009-06-23 05:41:15 +04:00
/* allocate a new MC control structure */
2009-09-05 11:12:02 +04:00
mci = edac_mc_alloc ( sizeof ( * pvt ) , num_csrows , num_channels ,
i7core_dev - > socket ) ;
2009-09-05 09:35:08 +04:00
if ( unlikely ( ! mci ) )
return - ENOMEM ;
2009-06-23 05:41:15 +04:00
debugf0 ( " MC: " __FILE__ " : %s(): mci = %p \n " , __func__ , mci ) ;
2009-09-05 09:35:08 +04:00
/* record ptr to the generic device */
mci - > dev = & i7core_dev - > pdev [ 0 ] - > dev ;
2009-06-23 05:41:15 +04:00
pvt = mci - > pvt_info ;
2009-06-23 05:48:30 +04:00
memset ( pvt , 0 , sizeof ( * pvt ) ) ;
2009-07-15 13:56:23 +04:00
2009-06-23 05:48:31 +04:00
/*
* FIXME : how to handle RDDR3 at MCI level ? It is possible to have
* Mixed RDDR3 / UDDR3 with Nehalem , provided that they are on different
* memory channels
*/
mci - > mtype_cap = MEM_FLAG_DDR3 ;
2009-06-23 05:41:15 +04:00
mci - > edac_ctl_cap = EDAC_FLAG_NONE ;
mci - > edac_cap = EDAC_FLAG_NONE ;
mci - > mod_name = " i7core_edac.c " ;
mci - > mod_ver = I7CORE_REVISION ;
2009-09-05 09:35:08 +04:00
mci - > ctl_name = kasprintf ( GFP_KERNEL , " i7 core #%d " ,
i7core_dev - > socket ) ;
mci - > dev_name = pci_name ( i7core_dev - > pdev [ 0 ] ) ;
2009-06-23 05:41:15 +04:00
mci - > ctl_page_to_phys = NULL ;
2009-09-24 01:56:47 +04:00
mci - > mc_driver_sysfs_attributes = i7core_sysfs_attrs ;
2009-06-23 05:48:29 +04:00
/* Set the function pointer to an actual operation function */
mci - > edac_check = i7core_check_error ;
2009-06-23 05:48:29 +04:00
2009-06-23 05:48:30 +04:00
/* Store pci devices at mci for faster access */
2009-09-05 09:35:08 +04:00
rc = mci_bind_devs ( mci , i7core_dev ) ;
2009-06-23 05:48:31 +04:00
if ( unlikely ( rc < 0 ) )
2009-09-05 09:35:08 +04:00
goto fail ;
2009-06-23 05:48:30 +04:00
/* Get dimm basic config */
2009-09-05 09:35:08 +04:00
get_dimm_config ( mci , & csrow ) ;
2009-06-23 05:48:30 +04:00
2009-06-23 05:41:15 +04:00
/* add this new MC control structure to EDAC's list of MCs */
2009-06-23 05:48:30 +04:00
if ( unlikely ( edac_mc_add_mc ( mci ) ) ) {
2009-06-23 05:41:15 +04:00
debugf0 ( " MC: " __FILE__
" : %s(): failed edac_mc_add_mc() \n " , __func__ ) ;
/* FIXME: perhaps some code should go here that disables error
* reporting if we just enabled it
*/
2009-06-23 05:48:30 +04:00
rc = - EINVAL ;
2009-09-05 09:35:08 +04:00
goto fail ;
2009-06-23 05:41:15 +04:00
}
/* allocating generic PCI control info */
2009-09-05 09:35:08 +04:00
i7core_pci = edac_pci_create_generic_ctl ( & i7core_dev - > pdev [ 0 ] - > dev ,
EDAC_MOD_STR ) ;
2009-06-23 05:48:31 +04:00
if ( unlikely ( ! i7core_pci ) ) {
2009-06-23 05:41:15 +04:00
printk ( KERN_WARNING
" %s(): Unable to create PCI control \n " ,
__func__ ) ;
printk ( KERN_WARNING
" %s(): PCI error report via EDAC not setup \n " ,
__func__ ) ;
}
2009-06-23 05:48:28 +04:00
/* Default error mask is any memory */
2009-06-23 05:48:30 +04:00
pvt - > inject . channel = 0 ;
2009-06-23 05:48:28 +04:00
pvt - > inject . dimm = - 1 ;
pvt - > inject . rank = - 1 ;
pvt - > inject . bank = - 1 ;
pvt - > inject . page = - 1 ;
pvt - > inject . col = - 1 ;
2009-07-10 05:06:41 +04:00
/* Registers on edac_mce in order to receive memory errors */
2009-07-17 17:28:15 +04:00
pvt - > edac_mce . priv = mci ;
2009-07-10 05:06:41 +04:00
pvt - > edac_mce . check_error = i7core_mce_check_error ;
rc = edac_mce_register ( & pvt - > edac_mce ) ;
2009-08-06 04:36:35 +04:00
if ( unlikely ( rc < 0 ) ) {
2009-07-10 05:06:41 +04:00
debugf0 ( " MC: " __FILE__
" : %s(): failed edac_mce_register() \n " , __func__ ) ;
2009-09-05 09:35:08 +04:00
}
fail :
edac_mc_free ( mci ) ;
return rc ;
}
/*
* i7core_probe Probe for ONE instance of device to see if it is
* present .
* return :
* 0 for FOUND a device
* < 0 for error code
*/
static int __devinit i7core_probe ( struct pci_dev * pdev ,
const struct pci_device_id * id )
{
int dev_idx = id - > driver_data ;
int rc ;
struct i7core_dev * i7core_dev ;
/*
2009-09-05 11:12:02 +04:00
* All memory controllers are allocated at the first pass .
2009-09-05 09:35:08 +04:00
*/
if ( unlikely ( dev_idx > = 1 ) )
return - EINVAL ;
/* get the pci devices we want to reserve for our use */
mutex_lock ( & i7core_edac_lock ) ;
2009-10-14 15:02:40 +04:00
2009-10-14 20:31:06 +04:00
if ( pdev - > device = = PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0 ) {
2009-10-14 18:21:58 +04:00
printk ( KERN_INFO " i7core_edac: detected a "
" Lynnfield processor \n " ) ;
rc = i7core_get_devices ( pci_dev_descr_lynnfield ,
ARRAY_SIZE ( pci_dev_descr_lynnfield ) ) ;
} else {
printk ( KERN_INFO " i7core_edac: detected a "
" Nehalem/Nehalem-EP processor \n " ) ;
rc = i7core_get_devices ( pci_dev_descr_i7core ,
ARRAY_SIZE ( pci_dev_descr_i7core ) ) ;
}
2009-09-05 09:35:08 +04:00
if ( unlikely ( rc < 0 ) )
goto fail0 ;
list_for_each_entry ( i7core_dev , & i7core_edac_list , list ) {
int channels ;
int csrows ;
/* Check the number of active and not disabled channels */
rc = i7core_get_active_channels ( i7core_dev - > socket ,
& channels , & csrows ) ;
if ( unlikely ( rc < 0 ) )
goto fail1 ;
2009-09-05 11:12:02 +04:00
rc = i7core_register_mci ( i7core_dev , channels , csrows ) ;
if ( unlikely ( rc < 0 ) )
goto fail1 ;
2009-07-10 05:06:41 +04:00
}
2009-06-23 05:48:30 +04:00
i7core_printk ( KERN_INFO , " Driver loaded. \n " ) ;
2009-06-23 05:48:29 +04:00
2009-09-05 07:52:11 +04:00
mutex_unlock ( & i7core_edac_lock ) ;
2009-06-23 05:41:15 +04:00
return 0 ;
2009-09-05 07:52:11 +04:00
fail1 :
2009-09-05 19:15:20 +04:00
i7core_put_all_devices ( ) ;
2009-09-05 07:52:11 +04:00
fail0 :
mutex_unlock ( & i7core_edac_lock ) ;
2009-06-23 05:48:30 +04:00
return rc ;
2009-06-23 05:41:15 +04:00
}
/*
* i7core_remove destructor for one instance of device
*
*/
static void __devexit i7core_remove ( struct pci_dev * pdev )
{
struct mem_ctl_info * mci ;
2009-09-06 06:06:50 +04:00
struct i7core_dev * i7core_dev , * tmp ;
2009-06-23 05:41:15 +04:00
debugf0 ( __FILE__ " : %s() \n " , __func__ ) ;
if ( i7core_pci )
edac_pci_release_generic_ctl ( i7core_pci ) ;
2009-09-06 06:06:50 +04:00
/*
* we have a trouble here : pdev value for removal will be wrong , since
* it will point to the X58 register used to detect that the machine
* is a Nehalem or upper design . However , due to the way several PCI
* devices are grouped together to provide MC functionality , we need
* to use a different method for releasing the devices
*/
2009-06-23 05:48:29 +04:00
2009-09-05 07:52:11 +04:00
mutex_lock ( & i7core_edac_lock ) ;
2009-09-06 06:06:50 +04:00
list_for_each_entry_safe ( i7core_dev , tmp , & i7core_edac_list , list ) {
mci = edac_mc_del_mc ( & i7core_dev - > pdev [ 0 ] - > dev ) ;
if ( mci ) {
struct i7core_pvt * pvt = mci - > pvt_info ;
i7core_dev = pvt - > i7core_dev ;
edac_mce_unregister ( & pvt - > edac_mce ) ;
kfree ( mci - > ctl_name ) ;
edac_mc_free ( mci ) ;
i7core_put_devices ( i7core_dev ) ;
} else {
i7core_printk ( KERN_ERR ,
" Couldn't find mci for socket %d \n " ,
i7core_dev - > socket ) ;
}
}
2009-09-05 07:52:11 +04:00
mutex_unlock ( & i7core_edac_lock ) ;
2009-06-23 05:41:15 +04:00
}
MODULE_DEVICE_TABLE ( pci , i7core_pci_tbl ) ;
/*
* i7core_driver pci_driver structure for this module
*
*/
static struct pci_driver i7core_driver = {
. name = " i7core_edac " ,
. probe = i7core_probe ,
. remove = __devexit_p ( i7core_remove ) ,
. id_table = i7core_pci_tbl ,
} ;
/*
* i7core_init Module entry function
* Try to initialize this module for its devices
*/
static int __init i7core_init ( void )
{
int pci_rc ;
debugf2 ( " MC: " __FILE__ " : %s() \n " , __func__ ) ;
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init ( ) ;
2009-10-14 15:02:40 +04:00
i7core_xeon_pci_fixup ( pci_dev_descr_i7core [ 0 ] . dev_id ) ;
2009-09-03 07:05:05 +04:00
2009-06-23 05:41:15 +04:00
pci_rc = pci_register_driver ( & i7core_driver ) ;
2009-09-03 06:43:33 +04:00
if ( pci_rc > = 0 )
return 0 ;
i7core_printk ( KERN_ERR , " Failed to register device with error %d. \n " ,
pci_rc ) ;
return pci_rc ;
2009-06-23 05:41:15 +04:00
}
/*
* i7core_exit ( ) Module exit function
* Unregister the driver
*/
static void __exit i7core_exit ( void )
{
debugf2 ( " MC: " __FILE__ " : %s() \n " , __func__ ) ;
pci_unregister_driver ( & i7core_driver ) ;
}
module_init ( i7core_init ) ;
module_exit ( i7core_exit ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_AUTHOR ( " Mauro Carvalho Chehab <mchehab@redhat.com> " ) ;
MODULE_AUTHOR ( " Red Hat Inc. (http://www.redhat.com) " ) ;
MODULE_DESCRIPTION ( " MC Driver for Intel i7 Core memory controllers - "
I7CORE_REVISION ) ;
module_param ( edac_op_state , int , 0444 ) ;
MODULE_PARM_DESC ( edac_op_state , " EDAC Error Reporting state: 0=Poll,1=NMI " ) ;