x86: Remove CONFIG_X86_OOSTORE
This was an optimization that made memcpy type benchmarks a little faster on ancient (Circa 1998) IDT Winchip CPUs. In real-life workloads, it wasn't even noticable, and I doubt anyone is running benchmarks on 16 year old silicon any more. Given this code has likely seen very little use over the last decade, let's just remove it. Signed-off-by: Dave Jones <davej@fedoraproject.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
8712a00514
commit
09df7c4c80
@ -341,10 +341,6 @@ config X86_USE_3DNOW
|
|||||||
def_bool y
|
def_bool y
|
||||||
depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
|
depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
|
||||||
|
|
||||||
config X86_OOSTORE
|
|
||||||
def_bool y
|
|
||||||
depends on (MWINCHIP3D || MWINCHIPC6) && MTRR
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# P6_NOPs are a relatively minor optimization that require a family >=
|
# P6_NOPs are a relatively minor optimization that require a family >=
|
||||||
# 6 processor, except that it is broken on certain VIA chips.
|
# 6 processor, except that it is broken on certain VIA chips.
|
||||||
|
@ -85,11 +85,7 @@
|
|||||||
#else
|
#else
|
||||||
# define smp_rmb() barrier()
|
# define smp_rmb() barrier()
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_X86_OOSTORE
|
#define smp_wmb() barrier()
|
||||||
# define smp_wmb() wmb()
|
|
||||||
#else
|
|
||||||
# define smp_wmb() barrier()
|
|
||||||
#endif
|
|
||||||
#define smp_read_barrier_depends() read_barrier_depends()
|
#define smp_read_barrier_depends() read_barrier_depends()
|
||||||
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
|
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
|
||||||
#else /* !SMP */
|
#else /* !SMP */
|
||||||
@ -100,7 +96,7 @@
|
|||||||
#define set_mb(var, value) do { var = value; barrier(); } while (0)
|
#define set_mb(var, value) do { var = value; barrier(); } while (0)
|
||||||
#endif /* SMP */
|
#endif /* SMP */
|
||||||
|
|
||||||
#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
|
#if defined(CONFIG_X86_PPRO_FENCE)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For either of these options x86 doesn't have a strong TSO memory
|
* For either of these options x86 doesn't have a strong TSO memory
|
||||||
|
@ -237,7 +237,7 @@ memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)
|
|||||||
|
|
||||||
static inline void flush_write_buffers(void)
|
static inline void flush_write_buffers(void)
|
||||||
{
|
{
|
||||||
#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
|
#if defined(CONFIG_X86_PPRO_FENCE)
|
||||||
asm volatile("lock; addl $0,0(%%esp)": : :"memory");
|
asm volatile("lock; addl $0,0(%%esp)": : :"memory");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -26,10 +26,9 @@
|
|||||||
# define LOCK_PTR_REG "D"
|
# define LOCK_PTR_REG "D"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(CONFIG_X86_32) && \
|
#if defined(CONFIG_X86_32) && (defined(CONFIG_X86_PPRO_FENCE))
|
||||||
(defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE))
|
|
||||||
/*
|
/*
|
||||||
* On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock
|
* On PPro SMP, we use a locked operation to unlock
|
||||||
* (PPro errata 66, 92)
|
* (PPro errata 66, 92)
|
||||||
*/
|
*/
|
||||||
# define UNLOCK_LOCK_PREFIX LOCK_PREFIX
|
# define UNLOCK_LOCK_PREFIX LOCK_PREFIX
|
||||||
|
@ -8,236 +8,6 @@
|
|||||||
|
|
||||||
#include "cpu.h"
|
#include "cpu.h"
|
||||||
|
|
||||||
#ifdef CONFIG_X86_OOSTORE
|
|
||||||
|
|
||||||
static u32 power2(u32 x)
|
|
||||||
{
|
|
||||||
u32 s = 1;
|
|
||||||
|
|
||||||
while (s <= x)
|
|
||||||
s <<= 1;
|
|
||||||
|
|
||||||
return s >>= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set up an actual MCR
|
|
||||||
*/
|
|
||||||
static void centaur_mcr_insert(int reg, u32 base, u32 size, int key)
|
|
||||||
{
|
|
||||||
u32 lo, hi;
|
|
||||||
|
|
||||||
hi = base & ~0xFFF;
|
|
||||||
lo = ~(size-1); /* Size is a power of 2 so this makes a mask */
|
|
||||||
lo &= ~0xFFF; /* Remove the ctrl value bits */
|
|
||||||
lo |= key; /* Attribute we wish to set */
|
|
||||||
wrmsr(reg+MSR_IDT_MCR0, lo, hi);
|
|
||||||
mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Figure what we can cover with MCR's
|
|
||||||
*
|
|
||||||
* Shortcut: We know you can't put 4Gig of RAM on a winchip
|
|
||||||
*/
|
|
||||||
static u32 ramtop(void)
|
|
||||||
{
|
|
||||||
u32 clip = 0xFFFFFFFFUL;
|
|
||||||
u32 top = 0;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < e820.nr_map; i++) {
|
|
||||||
unsigned long start, end;
|
|
||||||
|
|
||||||
if (e820.map[i].addr > 0xFFFFFFFFUL)
|
|
||||||
continue;
|
|
||||||
/*
|
|
||||||
* Don't MCR over reserved space. Ignore the ISA hole
|
|
||||||
* we frob around that catastrophe already
|
|
||||||
*/
|
|
||||||
if (e820.map[i].type == E820_RESERVED) {
|
|
||||||
if (e820.map[i].addr >= 0x100000UL &&
|
|
||||||
e820.map[i].addr < clip)
|
|
||||||
clip = e820.map[i].addr;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
start = e820.map[i].addr;
|
|
||||||
end = e820.map[i].addr + e820.map[i].size;
|
|
||||||
if (start >= end)
|
|
||||||
continue;
|
|
||||||
if (end > top)
|
|
||||||
top = end;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Everything below 'top' should be RAM except for the ISA hole.
|
|
||||||
* Because of the limited MCR's we want to map NV/ACPI into our
|
|
||||||
* MCR range for gunk in RAM
|
|
||||||
*
|
|
||||||
* Clip might cause us to MCR insufficient RAM but that is an
|
|
||||||
* acceptable failure mode and should only bite obscure boxes with
|
|
||||||
* a VESA hole at 15Mb
|
|
||||||
*
|
|
||||||
* The second case Clip sometimes kicks in is when the EBDA is marked
|
|
||||||
* as reserved. Again we fail safe with reasonable results
|
|
||||||
*/
|
|
||||||
if (top > clip)
|
|
||||||
top = clip;
|
|
||||||
|
|
||||||
return top;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Compute a set of MCR's to give maximum coverage
|
|
||||||
*/
|
|
||||||
static int centaur_mcr_compute(int nr, int key)
|
|
||||||
{
|
|
||||||
u32 mem = ramtop();
|
|
||||||
u32 root = power2(mem);
|
|
||||||
u32 base = root;
|
|
||||||
u32 top = root;
|
|
||||||
u32 floor = 0;
|
|
||||||
int ct = 0;
|
|
||||||
|
|
||||||
while (ct < nr) {
|
|
||||||
u32 fspace = 0;
|
|
||||||
u32 high;
|
|
||||||
u32 low;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Find the largest block we will fill going upwards
|
|
||||||
*/
|
|
||||||
high = power2(mem-top);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Find the largest block we will fill going downwards
|
|
||||||
*/
|
|
||||||
low = base/2;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Don't fill below 1Mb going downwards as there
|
|
||||||
* is an ISA hole in the way.
|
|
||||||
*/
|
|
||||||
if (base <= 1024*1024)
|
|
||||||
low = 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* See how much space we could cover by filling below
|
|
||||||
* the ISA hole
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (floor == 0)
|
|
||||||
fspace = 512*1024;
|
|
||||||
else if (floor == 512*1024)
|
|
||||||
fspace = 128*1024;
|
|
||||||
|
|
||||||
/* And forget ROM space */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Now install the largest coverage we get
|
|
||||||
*/
|
|
||||||
if (fspace > high && fspace > low) {
|
|
||||||
centaur_mcr_insert(ct, floor, fspace, key);
|
|
||||||
floor += fspace;
|
|
||||||
} else if (high > low) {
|
|
||||||
centaur_mcr_insert(ct, top, high, key);
|
|
||||||
top += high;
|
|
||||||
} else if (low > 0) {
|
|
||||||
base -= low;
|
|
||||||
centaur_mcr_insert(ct, base, low, key);
|
|
||||||
} else
|
|
||||||
break;
|
|
||||||
ct++;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* We loaded ct values. We now need to set the mask. The caller
|
|
||||||
* must do this bit.
|
|
||||||
*/
|
|
||||||
return ct;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void centaur_create_optimal_mcr(void)
|
|
||||||
{
|
|
||||||
int used;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Allocate up to 6 mcrs to mark as much of ram as possible
|
|
||||||
* as write combining and weak write ordered.
|
|
||||||
*
|
|
||||||
* To experiment with: Linux never uses stack operations for
|
|
||||||
* mmio spaces so we could globally enable stack operation wc
|
|
||||||
*
|
|
||||||
* Load the registers with type 31 - full write combining, all
|
|
||||||
* writes weakly ordered.
|
|
||||||
*/
|
|
||||||
used = centaur_mcr_compute(6, 31);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Wipe unused MCRs
|
|
||||||
*/
|
|
||||||
for (i = used; i < 8; i++)
|
|
||||||
wrmsr(MSR_IDT_MCR0+i, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void winchip2_create_optimal_mcr(void)
|
|
||||||
{
|
|
||||||
u32 lo, hi;
|
|
||||||
int used;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Allocate up to 6 mcrs to mark as much of ram as possible
|
|
||||||
* as write combining, weak store ordered.
|
|
||||||
*
|
|
||||||
* Load the registers with type 25
|
|
||||||
* 8 - weak write ordering
|
|
||||||
* 16 - weak read ordering
|
|
||||||
* 1 - write combining
|
|
||||||
*/
|
|
||||||
used = centaur_mcr_compute(6, 25);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Mark the registers we are using.
|
|
||||||
*/
|
|
||||||
rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
|
|
||||||
for (i = 0; i < used; i++)
|
|
||||||
lo |= 1<<(9+i);
|
|
||||||
wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Wipe unused MCRs
|
|
||||||
*/
|
|
||||||
|
|
||||||
for (i = used; i < 8; i++)
|
|
||||||
wrmsr(MSR_IDT_MCR0+i, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Handle the MCR key on the Winchip 2.
|
|
||||||
*/
|
|
||||||
static void winchip2_unprotect_mcr(void)
|
|
||||||
{
|
|
||||||
u32 lo, hi;
|
|
||||||
u32 key;
|
|
||||||
|
|
||||||
rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
|
|
||||||
lo &= ~0x1C0; /* blank bits 8-6 */
|
|
||||||
key = (lo>>17) & 7;
|
|
||||||
lo |= key<<6; /* replace with unlock key */
|
|
||||||
wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void winchip2_protect_mcr(void)
|
|
||||||
{
|
|
||||||
u32 lo, hi;
|
|
||||||
|
|
||||||
rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
|
|
||||||
lo &= ~0x1C0; /* blank bits 8-6 */
|
|
||||||
wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_X86_OOSTORE */
|
|
||||||
|
|
||||||
#define ACE_PRESENT (1 << 6)
|
#define ACE_PRESENT (1 << 6)
|
||||||
#define ACE_ENABLED (1 << 7)
|
#define ACE_ENABLED (1 << 7)
|
||||||
#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */
|
#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */
|
||||||
@ -362,20 +132,6 @@ static void init_centaur(struct cpuinfo_x86 *c)
|
|||||||
fcr_clr = DPDC;
|
fcr_clr = DPDC;
|
||||||
printk(KERN_NOTICE "Disabling bugged TSC.\n");
|
printk(KERN_NOTICE "Disabling bugged TSC.\n");
|
||||||
clear_cpu_cap(c, X86_FEATURE_TSC);
|
clear_cpu_cap(c, X86_FEATURE_TSC);
|
||||||
#ifdef CONFIG_X86_OOSTORE
|
|
||||||
centaur_create_optimal_mcr();
|
|
||||||
/*
|
|
||||||
* Enable:
|
|
||||||
* write combining on non-stack, non-string
|
|
||||||
* write combining on string, all types
|
|
||||||
* weak write ordering
|
|
||||||
*
|
|
||||||
* The C6 original lacks weak read order
|
|
||||||
*
|
|
||||||
* Note 0x120 is write only on Winchip 1
|
|
||||||
*/
|
|
||||||
wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
|
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
case 8:
|
case 8:
|
||||||
switch (c->x86_mask) {
|
switch (c->x86_mask) {
|
||||||
@ -392,40 +148,12 @@ static void init_centaur(struct cpuinfo_x86 *c)
|
|||||||
fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
|
fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
|
||||||
E2MMX|EAMD3D;
|
E2MMX|EAMD3D;
|
||||||
fcr_clr = DPDC;
|
fcr_clr = DPDC;
|
||||||
#ifdef CONFIG_X86_OOSTORE
|
|
||||||
winchip2_unprotect_mcr();
|
|
||||||
winchip2_create_optimal_mcr();
|
|
||||||
rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
|
|
||||||
/*
|
|
||||||
* Enable:
|
|
||||||
* write combining on non-stack, non-string
|
|
||||||
* write combining on string, all types
|
|
||||||
* weak write ordering
|
|
||||||
*/
|
|
||||||
lo |= 31;
|
|
||||||
wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
|
|
||||||
winchip2_protect_mcr();
|
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
case 9:
|
case 9:
|
||||||
name = "3";
|
name = "3";
|
||||||
fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
|
fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
|
||||||
E2MMX|EAMD3D;
|
E2MMX|EAMD3D;
|
||||||
fcr_clr = DPDC;
|
fcr_clr = DPDC;
|
||||||
#ifdef CONFIG_X86_OOSTORE
|
|
||||||
winchip2_unprotect_mcr();
|
|
||||||
winchip2_create_optimal_mcr();
|
|
||||||
rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
|
|
||||||
/*
|
|
||||||
* Enable:
|
|
||||||
* write combining on non-stack, non-string
|
|
||||||
* write combining on string, all types
|
|
||||||
* weak write ordering
|
|
||||||
*/
|
|
||||||
lo |= 31;
|
|
||||||
wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
|
|
||||||
winchip2_protect_mcr();
|
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
name = "??";
|
name = "??";
|
||||||
|
@ -40,11 +40,7 @@
|
|||||||
#define smp_rmb() barrier()
|
#define smp_rmb() barrier()
|
||||||
#endif /* CONFIG_X86_PPRO_FENCE */
|
#endif /* CONFIG_X86_PPRO_FENCE */
|
||||||
|
|
||||||
#ifdef CONFIG_X86_OOSTORE
|
|
||||||
#define smp_wmb() wmb()
|
|
||||||
#else /* CONFIG_X86_OOSTORE */
|
|
||||||
#define smp_wmb() barrier()
|
#define smp_wmb() barrier()
|
||||||
#endif /* CONFIG_X86_OOSTORE */
|
|
||||||
|
|
||||||
#define smp_read_barrier_depends() read_barrier_depends()
|
#define smp_read_barrier_depends() read_barrier_depends()
|
||||||
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
|
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user