From 24214449b00b94328e239d3c35cda3e6fe0f931b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 4 May 2012 18:28:21 +0200 Subject: [PATCH 01/10] x86, amd_nb: Export model 0x10 and later PCI id Add the F3 PCI id of F15h, model 0x10 to pci_ids.h and to the amd_nb code which generates the list of northbridges on an AMD box. Shorten define name while at it so that it fits into pci_ids.h. Acked-by: Clemens Ladisch Cc: Bjorn Helgaas Acked-by: Andreas Herrmann Signed-off-by: Borislav Petkov --- arch/x86/kernel/amd_nb.c | 1 + drivers/hwmon/k10temp.c | 5 +---- include/linux/pci_ids.h | 1 + 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index be16854591cc..153a0ee88fb1 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -16,6 +16,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, {} }; EXPORT_SYMBOL(amd_nb_misc_ids); diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 7356b5ec8f67..f2fe8078633b 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -33,9 +33,6 @@ static bool force; module_param(force, bool, 0444); MODULE_PARM_DESC(force, "force loading on processors with erratum 319"); -/* PCI-IDs for Northbridge devices not used anywhere else */ -#define PCI_DEVICE_ID_AMD_15H_M10H_NB_F3 0x1403 - /* CPUID function 0x80000001, ebx */ #define CPUID_PKGTYPE_MASK 0xf0000000 #define CPUID_PKGTYPE_F 0x00000000 @@ -213,7 +210,7 @@ static DEFINE_PCI_DEVICE_TABLE(k10temp_id_table) = { { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M10H_NB_F3) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, {} }; MODULE_DEVICE_TABLE(pci, k10temp_id_table); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ab741b0d0074..05fd02e44993 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -517,6 +517,7 @@ #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 +#define PCI_DEVICE_ID_AMD_15H_M10H_F3 0x1403 #define PCI_DEVICE_ID_AMD_15H_NB_F0 0x1600 #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 From 92e26e2a1a7d5fe6538e4a676f9c383966f894a7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 2 May 2012 16:20:49 +0200 Subject: [PATCH 02/10] x86, MCE, AMD: Remove shared banks sysfs linking The code used to create a symlink on all non-BSP cores of a node when the MCi_MISCj bank is present once per node. (This is generally the case with bank 4 on AMD). However, these sysfs links cause a bunch of problems with cpu off-/onlining testing and are, as such, a bit overengineered. IOW, there's nothing wrong with having normal sysfs files for the shared banks since the corresponding MSRs are replicated across each core anyway. Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 105 ++------------------------- 1 file changed, 7 insertions(+), 98 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f4873a64f46d..2a5dd30f9969 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -60,7 +60,6 @@ struct threshold_block { struct threshold_bank { struct kobject *kobj; struct threshold_block *blocks; - cpumask_var_t cpus; }; static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); @@ -224,8 +223,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) if (!block) per_cpu(bank_map, cpu) |= (1 << bank); - if (shared_bank[bank] && c->cpu_core_id) - break; memset(&b, 0, sizeof(b)); b.cpu = cpu; @@ -555,89 +552,35 @@ local_allocate_threshold_blocks(int cpu, unsigned int bank) MSR_IA32_MC0_MISC + bank * 4); } -/* symlinks sibling shared banks to first core. first core owns dir/files. */ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { - int i, err = 0; - struct threshold_bank *b = NULL; struct device *dev = per_cpu(mce_device, cpu); + struct threshold_bank *b = NULL; char name[32]; + int err = 0; sprintf(name, "threshold_bank%i", bank); -#ifdef CONFIG_SMP - if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ - i = cpumask_first(cpu_llc_shared_mask(cpu)); - - /* first core not up yet */ - if (cpu_data(i).cpu_core_id) - goto out; - - /* already linked */ - if (per_cpu(threshold_banks, cpu)[bank]) - goto out; - - b = per_cpu(threshold_banks, i)[bank]; - - if (!b) - goto out; - - err = sysfs_create_link(&dev->kobj, b->kobj, name); - if (err) - goto out; - - cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu)); - per_cpu(threshold_banks, cpu)[bank] = b; - - goto out; - } -#endif - b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); if (!b) { err = -ENOMEM; goto out; } - if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { - kfree(b); - err = -ENOMEM; - goto out; - } b->kobj = kobject_create_and_add(name, &dev->kobj); - if (!b->kobj) + if (!b->kobj) { + err = -EINVAL; goto out_free; - -#ifndef CONFIG_SMP - cpumask_setall(b->cpus); -#else - cpumask_set_cpu(cpu, b->cpus); -#endif + } per_cpu(threshold_banks, cpu)[bank] = b; err = local_allocate_threshold_blocks(cpu, bank); - if (err) - goto out_free; - - for_each_cpu(i, b->cpus) { - if (i == cpu) - continue; - - dev = per_cpu(mce_device, i); - if (dev) - err = sysfs_create_link(&dev->kobj,b->kobj, name); - if (err) - goto out; - - per_cpu(threshold_banks, i)[bank] = b; - } - - goto out; + if (!err) + goto out; out_free: per_cpu(threshold_banks, cpu)[bank] = NULL; - free_cpumask_var(b->cpus); kfree(b); out: return err; @@ -660,12 +603,6 @@ static __cpuinit int threshold_create_device(unsigned int cpu) return err; } -/* - * let's be hotplug friendly. - * in case of multiple core processors, the first core always takes ownership - * of shared sysfs dir/files, and rest of the cores will be symlinked to it. - */ - static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) { @@ -689,9 +626,6 @@ static void deallocate_threshold_block(unsigned int cpu, static void threshold_remove_bank(unsigned int cpu, int bank) { struct threshold_bank *b; - struct device *dev; - char name[32]; - int i = 0; b = per_cpu(threshold_banks, cpu)[bank]; if (!b) @@ -699,36 +633,11 @@ static void threshold_remove_bank(unsigned int cpu, int bank) if (!b->blocks) goto free_out; - sprintf(name, "threshold_bank%i", bank); - -#ifdef CONFIG_SMP - /* sibling symlink */ - if (shared_bank[bank] && b->blocks->cpu != cpu) { - dev = per_cpu(mce_device, cpu); - sysfs_remove_link(&dev->kobj, name); - per_cpu(threshold_banks, cpu)[bank] = NULL; - - return; - } -#endif - - /* remove all sibling symlinks before unregistering */ - for_each_cpu(i, b->cpus) { - if (i == cpu) - continue; - - dev = per_cpu(mce_device, i); - if (dev) - sysfs_remove_link(&dev->kobj, name); - per_cpu(threshold_banks, i)[bank] = NULL; - } - deallocate_threshold_block(cpu, bank); free_out: kobject_del(b->kobj); kobject_put(b->kobj); - free_cpumask_var(b->cpus); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; } From 26ab256eaac7af26ecd9ba893b5159a3b38c8a1c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 2 May 2012 16:43:02 +0200 Subject: [PATCH 03/10] x86, MCE, AMD: Remove local_allocate_... wrapper It is unneeded now so drop it. Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 2a5dd30f9969..7fd02cac962b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -61,6 +61,7 @@ struct threshold_bank { struct kobject *kobj; struct threshold_block *blocks; }; + static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); static unsigned char shared_bank[NR_BANKS] = { @@ -545,13 +546,6 @@ out_free: return err; } -static __cpuinit long -local_allocate_threshold_blocks(int cpu, unsigned int bank) -{ - return allocate_threshold_blocks(cpu, bank, 0, - MSR_IA32_MC0_MISC + bank * 4); -} - static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { struct device *dev = per_cpu(mce_device, cpu); @@ -575,7 +569,8 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) per_cpu(threshold_banks, cpu)[bank] = b; - err = local_allocate_threshold_blocks(cpu, bank); + err = allocate_threshold_blocks(cpu, bank, 0, + MSR_IA32_MC0_MISC + bank * 4); if (!err) goto out; From 019f34fccfd5cf5ff1e722dafd9fe2bd54434e66 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 2 May 2012 17:16:59 +0200 Subject: [PATCH 04/10] x86, MCE, AMD: Move shared bank to node descriptor Well, instead of having a real bank 4 on the BSP of each node and symlinks on the remaining cores, we push it up into the amd_northbridge descriptor which now contains a pointer to the northbridge bank 4 because the bank is one per northbridge and, as such, belongs in the NB descriptor anyway. Each time we hotplug CPUs, we use the northbridge pointer to copy the shared bank into the per-CPU array of threshold_banks pointers, or destroy it when the last CPU on the node goes offline, or create it when the first comes online. Signed-off-by: Borislav Petkov --- arch/x86/include/asm/amd_nb.h | 21 ++++++ arch/x86/kernel/cpu/mcheck/mce_amd.c | 107 ++++++++++++++++++++++----- 2 files changed, 108 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 49ad773f4b9f..b3341e9cd8fd 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -26,10 +26,31 @@ struct amd_l3_cache { u8 subcaches[4]; }; +struct threshold_block { + unsigned int block; + unsigned int bank; + unsigned int cpu; + u32 address; + u16 interrupt_enable; + bool interrupt_capable; + u16 threshold_limit; + struct kobject kobj; + struct list_head miscj; +}; + +struct threshold_bank { + struct kobject *kobj; + struct threshold_block *blocks; + + /* initialized to the number of CPUs on the node sharing this bank */ + atomic_t cpus; +}; + struct amd_northbridge { struct pci_dev *misc; struct pci_dev *link; struct amd_l3_cache l3_cache; + struct threshold_bank *bank4; }; struct amd_northbridge_info { diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 7fd02cac962b..d67c9e56d609 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -45,23 +46,6 @@ #define MASK_BLKPTR_LO 0xFF000000 #define MCG_XBLK_ADDR 0xC0000400 -struct threshold_block { - unsigned int block; - unsigned int bank; - unsigned int cpu; - u32 address; - u16 interrupt_enable; - bool interrupt_capable; - u16 threshold_limit; - struct kobject kobj; - struct list_head miscj; -}; - -struct threshold_bank { - struct kobject *kobj; - struct threshold_block *blocks; -}; - static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); static unsigned char shared_bank[NR_BANKS] = { @@ -546,15 +530,62 @@ out_free: return err; } +static __cpuinit int __threshold_add_blocks(struct threshold_bank *b) +{ + struct list_head *head = &b->blocks->miscj; + struct threshold_block *pos = NULL; + struct threshold_block *tmp = NULL; + int err = 0; + + err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name); + if (err) + return err; + + list_for_each_entry_safe(pos, tmp, head, miscj) { + + err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name); + if (err) { + list_for_each_entry_safe_reverse(pos, tmp, head, miscj) + kobject_del(&pos->kobj); + + return err; + } + } + return err; +} + static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { struct device *dev = per_cpu(mce_device, cpu); + struct amd_northbridge *nb = NULL; struct threshold_bank *b = NULL; char name[32]; int err = 0; sprintf(name, "threshold_bank%i", bank); + if (shared_bank[bank]) { + + nb = node_to_amd_nb(amd_get_nb_id(cpu)); + WARN_ON(!nb); + + /* threshold descriptor already initialized on this node? */ + if (nb->bank4) { + /* yes, use it */ + b = nb->bank4; + err = kobject_add(b->kobj, &dev->kobj, name); + if (err) + goto out; + + per_cpu(threshold_banks, cpu)[bank] = b; + atomic_inc(&b->cpus); + + err = __threshold_add_blocks(b); + + goto out; + } + } + b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); if (!b) { err = -ENOMEM; @@ -569,15 +600,23 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) per_cpu(threshold_banks, cpu)[bank] = b; + if (shared_bank[bank]) { + atomic_set(&b->cpus, 1); + + /* nb is already initialized, see above */ + WARN_ON(nb->bank4); + nb->bank4 = b; + } + err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MC0_MISC + bank * 4); if (!err) goto out; -out_free: - per_cpu(threshold_banks, cpu)[bank] = NULL; + out_free: kfree(b); -out: + + out: return err; } @@ -618,16 +657,44 @@ static void deallocate_threshold_block(unsigned int cpu, per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; } +static void __threshold_remove_blocks(struct threshold_bank *b) +{ + struct threshold_block *pos = NULL; + struct threshold_block *tmp = NULL; + + kobject_del(b->kobj); + + list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj) + kobject_del(&pos->kobj); +} + static void threshold_remove_bank(unsigned int cpu, int bank) { + struct amd_northbridge *nb; struct threshold_bank *b; b = per_cpu(threshold_banks, cpu)[bank]; if (!b) return; + if (!b->blocks) goto free_out; + if (shared_bank[bank]) { + if (!atomic_dec_and_test(&b->cpus)) { + __threshold_remove_blocks(b); + per_cpu(threshold_banks, cpu)[bank] = NULL; + return; + } else { + /* + * the last CPU on this node using the shared bank is + * going away, remove that bank now. + */ + nb = node_to_amd_nb(amd_get_nb_id(cpu)); + nb->bank4 = NULL; + } + } + deallocate_threshold_block(cpu, bank); free_out: From 18c20f373b76a64270a991396b06542abaf9f530 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 27 Apr 2012 12:31:34 +0200 Subject: [PATCH 05/10] x86, MCE, AMD: Print decimal thresholding values If one sets the threshold limit, say to 25: $ echo 25 > machinecheck0/threshold_bank4/misc0/threshold_limit and then reads it back again, it gives $ cat machinecheck0/threshold_bank4/misc0/threshold_limit 19 which is actually 0x19 but we don't know that. Make all output decimal. Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index d67c9e56d609..0b1bb0e15881 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -308,7 +308,7 @@ struct threshold_attr { #define SHOW_FIELDS(name) \ static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ { \ - return sprintf(buf, "%lx\n", (unsigned long) b->name); \ + return sprintf(buf, "%lu\n", (unsigned long) b->name); \ } SHOW_FIELDS(interrupt_enable) SHOW_FIELDS(threshold_limit) @@ -379,7 +379,7 @@ static ssize_t show_error_count(struct threshold_block *b, char *buf) struct threshold_block_cross_cpu tbcc = { .tb = b, }; smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1); - return sprintf(buf, "%lx\n", tbcc.retval); + return sprintf(buf, "%lu\n", tbcc.retval); } static ssize_t store_error_count(struct threshold_block *b, From 2c9c42fa98c283961b7f6b6542fb4bf0c0539e15 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 27 Apr 2012 12:53:59 +0200 Subject: [PATCH 06/10] x86, MCE, AMD: Cleanup reading of error_count We have rdmsr_on_cpu() now so remove locally defined solution in favor of the generic one. No functionality change. Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 0b1bb0e15881..a7204ef37223 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -359,27 +359,14 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) return size; } -struct threshold_block_cross_cpu { - struct threshold_block *tb; - long retval; -}; - -static void local_error_count_handler(void *_tbcc) -{ - struct threshold_block_cross_cpu *tbcc = _tbcc; - struct threshold_block *b = tbcc->tb; - u32 low, high; - - rdmsr(b->address, low, high); - tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); -} - static ssize_t show_error_count(struct threshold_block *b, char *buf) { - struct threshold_block_cross_cpu tbcc = { .tb = b, }; + u32 lo, hi; - smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1); - return sprintf(buf, "%lu\n", tbcc.retval); + rdmsr_on_cpu(b->cpu, b->address, &lo, &hi); + + return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) - + (THRESHOLD_MAX - b->threshold_limit))); } static ssize_t store_error_count(struct threshold_block *b, From 6e927361bd403dbf5f6a2668a2a07df1f1b2daff Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 27 Apr 2012 15:37:25 +0200 Subject: [PATCH 07/10] x86, MCE, AMD: Make error_count read only Until now, writing to error count caused the code to reset the thresholding bank to the current thresholding limit and start counting errors from the beginning. This is misleading and unclear, and can be accomplished by writing the old thresholding limit into ->threshold_limit. Make error_count read-only with the functionality to show the current error count. Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index a7204ef37223..e5ed2c7cb4de 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -369,14 +369,10 @@ static ssize_t show_error_count(struct threshold_block *b, char *buf) (THRESHOLD_MAX - b->threshold_limit))); } -static ssize_t store_error_count(struct threshold_block *b, - const char *buf, size_t count) -{ - struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; - - smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); - return 1; -} +static struct threshold_attr error_count = { + .attr = {.name = __stringify(error_count), .mode = 0444 }, + .show = show_error_count, +}; #define RW_ATTR(val) \ static struct threshold_attr val = { \ @@ -387,7 +383,6 @@ static struct threshold_attr val = { \ RW_ATTR(interrupt_enable); RW_ATTR(threshold_limit); -RW_ATTR(error_count); static struct attribute *default_attrs[] = { &threshold_limit.attr, From 336d335a963a5a4f5d7f915b4d74ada5d7b4d05b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 4 May 2012 17:05:27 +0200 Subject: [PATCH 08/10] x86, MCE, AMD: Give proper names to the thresholding banks Having the banks numbered is ok but having real names which mean something to the user makes a lot more sense: /sys/devices/system/machinecheck/machinecheck0/ |-- bank0 |-- bank1 |-- bank2 |-- bank3 |-- bank4 |-- bank5 |-- bank6 |-- check_interval |-- cmci_disabled |-- combined_unit | |-- combined_unit | |-- error_count | |-- threshold_limit |-- dont_log_ce |-- execution_unit | |-- execution_unit | |-- error_count | |-- threshold_limit |-- ignore_ce |-- insn_fetch | |-- insn_fetch | |-- error_count | |-- threshold_limit |-- load_store | |-- load_store | |-- error_count | |-- threshold_limit |-- monarch_timeout |-- northbridge | |-- dram | | |-- error_count | | |-- interrupt_enable | | |-- threshold_limit | |-- ht_links | | |-- error_count | | |-- interrupt_enable | | |-- threshold_limit | |-- l3_cache | |-- error_count | |-- interrupt_enable | |-- threshold_limit ... Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 35 ++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index e5ed2c7cb4de..e20bdf8d7c59 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -46,6 +46,15 @@ #define MASK_BLKPTR_LO 0xFF000000 #define MCG_XBLK_ADDR 0xC0000400 +static const char * const th_names[] = { + "load_store", + "insn_fetch", + "combined_unit", + "", + "northbridge", + "execution_unit", +}; + static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); static unsigned char shared_bank[NR_BANKS] = { @@ -68,6 +77,26 @@ struct thresh_restart { u16 old_limit; }; +static const char * const bank4_names(struct threshold_block *b) +{ + switch (b->address) { + /* MSR4_MISC0 */ + case 0x00000413: + return "dram"; + + case 0xc0000408: + return "ht_links"; + + case 0xc0000409: + return "l3_cache"; + + default: + WARN(1, "Funny MSR: 0x%08x\n", b->address); + return ""; + } +}; + + static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) { /* @@ -481,7 +510,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, err = kobject_init_and_add(&b->kobj, &threshold_ktype, per_cpu(threshold_banks, cpu)[bank]->kobj, - "misc%i", block); + (bank == 4 ? bank4_names(b) : th_names[bank])); if (err) goto out_free; recurse: @@ -541,11 +570,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) struct device *dev = per_cpu(mce_device, cpu); struct amd_northbridge *nb = NULL; struct threshold_bank *b = NULL; - char name[32]; + const char *name = th_names[bank]; int err = 0; - sprintf(name, "threshold_bank%i", bank); - if (shared_bank[bank]) { nb = node_to_amd_nb(amd_get_nb_id(cpu)); From 11122570193a429f72014703ce6b849640f8d7e5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 7 May 2012 18:22:16 +0200 Subject: [PATCH 09/10] x86, MCE, AMD: Update copyrights and boilerplate Jacob is doing something else now so add myself as the loser who provides support. Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index e20bdf8d7c59..671b95a2ffb5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1,15 +1,17 @@ /* - * (c) 2005, 2006 Advanced Micro Devices, Inc. + * (c) 2005-2012 Advanced Micro Devices, Inc. * Your use of this code is subject to the terms and conditions of the * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html * * Written by Jacob Shin - AMD, Inc. * - * Support : jacob.shin@amd.com + * Support: borislav.petkov@amd.com * * April 2006 * - added support for AMD Family 0x10 processors + * May 2012 + * - major scrubbing * * All MC4_MISCi registers are shared between multi-cores */ From 6751ed65dc6642af64f7b8a440a75563c8aab7ae Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 11 Jul 2012 10:20:47 -0700 Subject: [PATCH 10/10] x86/mce: Fix siginfo_t->si_addr value for non-recoverable memory faults In commit dad1743e5993f1 ("x86/mce: Only restart instruction after machine check recovery if it is safe") we fixed mce_notify_process() to force a signal to the current process if it was not restartable (RIPV bit not set in MCG_STATUS). But doing it here means that the process doesn't get told the virtual address of the fault via siginfo_t->si_addr. This would prevent application level recovery from the fault. Make a new MF_MUST_KILL flag bit for memory_failure() et al. to use so that we will provide the right information with the signal. Signed-off-by: Tony Luck Acked-by: Borislav Petkov Cc: stable@kernel.org # 3.4+ --- arch/x86/kernel/cpu/mcheck/mce.c | 6 ++++-- include/linux/mm.h | 1 + mm/memory-failure.c | 14 ++++++++------ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index da27c5d2168a..c46ed494f002 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1186,6 +1186,7 @@ void mce_notify_process(void) { unsigned long pfn; struct mce_info *mi = mce_find_info(); + int flags = MF_ACTION_REQUIRED; if (!mi) mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); @@ -1200,8 +1201,9 @@ void mce_notify_process(void) * doomed. We still need to mark the page as poisoned and alert any * other users of the page. */ - if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || - mi->restartable == 0) { + if (!mi->restartable) + flags |= MF_MUST_KILL; + if (memory_failure(pfn, MCE_VECTOR, flags) < 0) { pr_err("Memory error not recovered"); force_sig(SIGBUS, current); } diff --git a/include/linux/mm.h b/include/linux/mm.h index b36d08ce5c57..f9f279cf5b1b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1591,6 +1591,7 @@ void vmemmap_populate_print_last(void); enum mf_flags { MF_COUNT_INCREASED = 1 << 0, MF_ACTION_REQUIRED = 1 << 1, + MF_MUST_KILL = 1 << 2, }; extern int memory_failure(unsigned long pfn, int trapno, int flags); extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ab1e7145e290..de4ce7058450 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -345,14 +345,14 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, * Also when FAIL is set do a force kill because something went * wrong earlier. */ -static void kill_procs(struct list_head *to_kill, int doit, int trapno, +static void kill_procs(struct list_head *to_kill, int forcekill, int trapno, int fail, struct page *page, unsigned long pfn, int flags) { struct to_kill *tk, *next; list_for_each_entry_safe (tk, next, to_kill, nd) { - if (doit) { + if (forcekill) { /* * In case something went wrong with munmapping * make sure the process doesn't catch the @@ -858,7 +858,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, struct address_space *mapping; LIST_HEAD(tokill); int ret; - int kill = 1; + int kill = 1, forcekill; struct page *hpage = compound_head(p); struct page *ppage; @@ -888,7 +888,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, * be called inside page lock (it's recommended but not enforced). */ mapping = page_mapping(hpage); - if (!PageDirty(hpage) && mapping && + if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping && mapping_cap_writeback_dirty(mapping)) { if (page_mkclean(hpage)) { SetPageDirty(hpage); @@ -965,12 +965,14 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, * Now that the dirty bit has been propagated to the * struct page and all unmaps done we can decide if * killing is needed or not. Only kill when the page - * was dirty, otherwise the tokill list is merely + * was dirty or the process is not restartable, + * otherwise the tokill list is merely * freed. When there was a problem unmapping earlier * use a more force-full uncatchable kill to prevent * any accesses to the poisoned memory. */ - kill_procs(&tokill, !!PageDirty(ppage), trapno, + forcekill = PageDirty(ppage) || (flags & MF_MUST_KILL); + kill_procs(&tokill, forcekill, trapno, ret != SWAP_SUCCESS, p, pfn, flags); return ret;