linux/arch/powerpc/include/asm/cputable.h

545 lines
20 KiB
C
Raw Normal View History

#ifndef __ASM_POWERPC_CPUTABLE_H
#define __ASM_POWERPC_CPUTABLE_H
#define PPC_FEATURE_32 0x80000000
#define PPC_FEATURE_64 0x40000000
#define PPC_FEATURE_601_INSTR 0x20000000
#define PPC_FEATURE_HAS_ALTIVEC 0x10000000
#define PPC_FEATURE_HAS_FPU 0x08000000
#define PPC_FEATURE_HAS_MMU 0x04000000
#define PPC_FEATURE_HAS_4xxMAC 0x02000000
#define PPC_FEATURE_UNIFIED_CACHE 0x01000000
#define PPC_FEATURE_HAS_SPE 0x00800000
#define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000
#define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000
2005-10-22 10:51:34 +04:00
#define PPC_FEATURE_NO_TB 0x00100000
#define PPC_FEATURE_POWER4 0x00080000
#define PPC_FEATURE_POWER5 0x00040000
#define PPC_FEATURE_POWER5_PLUS 0x00020000
#define PPC_FEATURE_CELL 0x00010000
#define PPC_FEATURE_BOOKE 0x00008000
#define PPC_FEATURE_SMT 0x00004000
#define PPC_FEATURE_ICACHE_SNOOP 0x00002000
#define PPC_FEATURE_ARCH_2_05 0x00001000
#define PPC_FEATURE_PA6T 0x00000800
#define PPC_FEATURE_HAS_DFP 0x00000400
#define PPC_FEATURE_POWER6_EXT 0x00000200
#define PPC_FEATURE_ARCH_2_06 0x00000100
#define PPC_FEATURE_HAS_VSX 0x00000080
#define PPC_FEATURE_PSERIES_PERFMON_COMPAT \
0x00000040
#define PPC_FEATURE_TRUE_LE 0x00000002
#define PPC_FEATURE_PPC_LE 0x00000001
#ifdef __KERNEL__
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
#ifndef __ASSEMBLY__
/* This structure can grow, it's real size is used by head.S code
* via the mkdefs mechanism.
*/
struct cpu_spec;
typedef void (*cpu_setup_t)(unsigned long offset, struct cpu_spec* spec);
typedef void (*cpu_restore_t)(void);
enum powerpc_oprofile_type {
PPC_OPROFILE_INVALID = 0,
PPC_OPROFILE_RS64 = 1,
PPC_OPROFILE_POWER4 = 2,
PPC_OPROFILE_G4 = 3,
PPC_OPROFILE_FSL_EMB = 4,
[POWERPC] cell: Add oprofile support Add PPU event-based and cycle-based profiling support to Oprofile for Cell. Oprofile is expected to collect data on all CPUs simultaneously. However, there is one set of performance counters per node. There are two hardware threads or virtual CPUs on each node. Hence, OProfile must multiplex in time the performance counter collection on the two virtual CPUs. The multiplexing of the performance counters is done by a virtual counter routine. Initially, the counters are configured to collect data on the even CPUs in the system, one CPU per node. In order to capture the PC for the virtual CPU when the performance counter interrupt occurs (the specified number of events between samples has occurred), the even processors are configured to handle the performance counter interrupts for their node. The virtual counter routine is called via a kernel timer after the virtual sample time. The routine stops the counters, saves the current counts, loads the last counts for the other virtual CPU on the node, sets interrupts to be handled by the other virtual CPU and restarts the counters, the virtual timer routine is scheduled to run again. The virtual sample time is kept relatively small to make sure sampling occurs on both CPUs on the node with a relatively small granularity. Whenever the counters overflow, the performance counter interrupt is called to collect the PC for the CPU where data is being collected. The oprofile driver relies on a firmware RTAS call to setup the debug bus to route the desired signals to the performance counter hardware to be counted. The RTAS call must set the routing registers appropriately in each of the islands to pass the signals down the debug bus as well as routing the signals from a particular island onto the bus. There is a second firmware RTAS call to reset the debug bus to the non pass thru state when the counters are not in use. Signed-off-by: Carl Love <carll@us.ibm.com> Signed-off-by: Maynard Johnson <mpjohn@us.ibm.com> Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-11-20 20:45:16 +03:00
PPC_OPROFILE_CELL = 5,
PPC_OPROFILE_PA6T = 6,
};
enum powerpc_pmc_type {
PPC_PMC_DEFAULT = 0,
PPC_PMC_IBM = 1,
PPC_PMC_PA6T = 2,
PPC_PMC_G4 = 3,
};
struct pt_regs;
extern int machine_check_generic(struct pt_regs *regs);
extern int machine_check_4xx(struct pt_regs *regs);
extern int machine_check_440A(struct pt_regs *regs);
extern int machine_check_e500mc(struct pt_regs *regs);
extern int machine_check_e500(struct pt_regs *regs);
extern int machine_check_e200(struct pt_regs *regs);
extern int machine_check_47x(struct pt_regs *regs);
[POWERPC] Fix performance monitor on machines with logical PVR Some IBM machines supply a "logical" PVR (processor version register) value in the device tree in the cpu nodes rather than the real PVR. This is used for instance to indicate that the processors in a POWER6 partition have been configured by the hypervisor to run in POWER5+ mode rather than POWER6 mode. To cope with this, we call identify_cpu a second time with the logical PVR value (the first call is with the real PVR value in the very early setup code). However, POWER5+ machines can also supply a logical PVR value, and use the same value (the value that indicates a v2.04 architecture compliant processor). This causes problems for code that uses the performance monitor (such as oprofile), because the PMU registers are different in POWER6 (even in POWER5+ mode) from the real POWER5+. This change works around this problem by taking out the PMU information from the cputable entries for the logical PVR values, and changing identify_cpu so that the second call to it won't overwrite the PMU information that was established by the first call (the one with the real PVR), but does update the other fields. Specifically, if the cputable entry for the logical PVR value has num_pmcs == 0, none of the PMU-related fields get used. So that we can create a mixed cputable entry, we now make cur_cpu_spec point to a single static struct cpu_spec, and copy stuff from cpu_specs[i] into it. This has the side-effect that we can now make cpu_specs[] be initdata. Ultimately it would be good to move the PMU-related fields out to a separate structure, pointed to by the cputable entries, and change identify_cpu so that it saves the PMU info pointer, copies the whole structure, and restores the PMU info pointer, rather than identify_cpu having to list all the fields that are *not* PMU-related. Signed-off-by: Paul Mackerras <paulus@samba.org> Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2007-10-04 08:18:01 +04:00
/* NOTE WELL: Update identify_cpu() if fields are added or removed! */
struct cpu_spec {
/* CPU is matched via (PVR & pvr_mask) == pvr_value */
unsigned int pvr_mask;
unsigned int pvr_value;
char *cpu_name;
unsigned long cpu_features; /* Kernel features */
unsigned int cpu_user_features; /* Userland features */
unsigned int mmu_features; /* MMU features */
/* cache line sizes */
unsigned int icache_bsize;
unsigned int dcache_bsize;
/* number of performance monitor counters */
unsigned int num_pmcs;
enum powerpc_pmc_type pmc_type;
/* this is called to initialize various CPU bits like L1 cache,
* BHT, SPD, etc... from head.S before branching to identify_machine
*/
cpu_setup_t cpu_setup;
/* Used to restore cpu setup on secondary processors and at resume */
cpu_restore_t cpu_restore;
/* Used by oprofile userspace to select the right counters */
char *oprofile_cpu_type;
/* Processor specific oprofile operations */
enum powerpc_oprofile_type oprofile_type;
/* Bit locations inside the mmcra change */
unsigned long oprofile_mmcra_sihv;
unsigned long oprofile_mmcra_sipr;
/* Bits to clear during an oprofile exception */
unsigned long oprofile_mmcra_clear;
/* Name of processor class, for the ELF AT_PLATFORM entry */
char *platform;
/* Processor specific machine check handling. Return negative
* if the error is fatal, 1 if it was fully recovered and 0 to
* pass up (not CPU originated) */
int (*machine_check)(struct pt_regs *regs);
};
extern struct cpu_spec *cur_cpu_spec;
extern unsigned int __start___ftr_fixup, __stop___ftr_fixup;
extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr);
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-20 05:47:18 +04:00
extern void do_feature_fixups(unsigned long value, void *fixup_start,
void *fixup_end);
extern const char *powerpc_base_platform;
#endif /* __ASSEMBLY__ */
/* CPU kernel features */
/* Retain the 32b definitions all use bottom half of word */
#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0000000000000001)
#define CPU_FTR_L2CR ASM_CONST(0x0000000000000002)
#define CPU_FTR_SPEC7450 ASM_CONST(0x0000000000000004)
#define CPU_FTR_ALTIVEC ASM_CONST(0x0000000000000008)
#define CPU_FTR_TAU ASM_CONST(0x0000000000000010)
#define CPU_FTR_CAN_DOZE ASM_CONST(0x0000000000000020)
#define CPU_FTR_USE_TB ASM_CONST(0x0000000000000040)
#define CPU_FTR_L2CSR ASM_CONST(0x0000000000000080)
#define CPU_FTR_601 ASM_CONST(0x0000000000000100)
#define CPU_FTR_DBELL ASM_CONST(0x0000000000000200)
#define CPU_FTR_CAN_NAP ASM_CONST(0x0000000000000400)
#define CPU_FTR_L3CR ASM_CONST(0x0000000000000800)
#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x0000000000001000)
#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x0000000000002000)
#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x0000000000004000)
#define CPU_FTR_NO_DPM ASM_CONST(0x0000000000008000)
#define CPU_FTR_476_DD2 ASM_CONST(0x0000000000010000)
#define CPU_FTR_NEED_COHERENT ASM_CONST(0x0000000000020000)
#define CPU_FTR_NO_BTIC ASM_CONST(0x0000000000040000)
#define CPU_FTR_NODSISRALIGN ASM_CONST(0x0000000000100000)
#define CPU_FTR_PPC_LE ASM_CONST(0x0000000000200000)
#define CPU_FTR_REAL_LE ASM_CONST(0x0000000000400000)
#define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x0000000000800000)
#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x0000000001000000)
#define CPU_FTR_SPE ASM_CONST(0x0000000002000000)
#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x0000000004000000)
#define CPU_FTR_LWSYNC ASM_CONST(0x0000000008000000)
#define CPU_FTR_NOEXECUTE ASM_CONST(0x0000000010000000)
#define CPU_FTR_INDEXED_DCR ASM_CONST(0x0000000020000000)
/*
* Add the 64-bit processor unique features in the top half of the word;
* on 32-bit, make the names available but defined to be 0.
*/
#ifdef __powerpc64__
#define LONG_ASM_CONST(x) ASM_CONST(x)
#else
#define LONG_ASM_CONST(x) 0
#endif
#define CPU_FTR_HVMODE_206 LONG_ASM_CONST(0x0000000800000000)
#define CPU_FTR_IABR LONG_ASM_CONST(0x0000002000000000)
#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000004000000000)
#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000008000000000)
#define CPU_FTR_SMT LONG_ASM_CONST(0x0000010000000000)
#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000200000000000)
#define CPU_FTR_PURR LONG_ASM_CONST(0x0000400000000000)
#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000800000000000)
#define CPU_FTR_SPURR LONG_ASM_CONST(0x0001000000000000)
#define CPU_FTR_DSCR LONG_ASM_CONST(0x0002000000000000)
#define CPU_FTR_VSX LONG_ASM_CONST(0x0010000000000000)
#define CPU_FTR_SAO LONG_ASM_CONST(0x0020000000000000)
#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000)
#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000)
#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0100000000000000)
powerpc: Feature nop out reservation clear when stcx checks address The POWER architecture does not require stcx to check that it is operating on the same address as the larx. This means it is possible for an an exception handler to execute a larx, get a reservation, decide not to do the stcx and then return back with an active reservation. If the interrupted code was in the middle of a larx/stcx sequence the stcx could incorrectly succeed. All recent POWER CPUs check the address before letting the stcx succeed so we can create a CPU feature and nop it out. As Ben suggested, we can only do this in our syscall path because there is a remote possibility some kernel code gets interrupted by an exception that ends up operating on the same cacheline. Thanks to Paul Mackerras and Derek Williams for the idea. To test this I used a very simple null syscall (actually getppid) testcase at http://ozlabs.org/~anton/junkcode/null_syscall.c I tested against 2.6.35-git10 with the following changes against the pseries_defconfig: CONFIG_VIRT_CPU_ACCOUNTING=n CONFIG_AUDIT=n CONFIG_PPC_4K_PAGES=n CONFIG_PPC_64K_PAGES=y CONFIG_FORCE_MAX_ZONEORDER=9 CONFIG_PPC_SUBPAGE_PROT=n CONFIG_FUNCTION_TRACER=n CONFIG_FUNCTION_GRAPH_TRACER=n CONFIG_IRQSOFF_TRACER=n CONFIG_STACK_TRACER=n to remove the overhead of virtual CPU accounting, syscall auditing and the ftrace mcount tracers. 64kB pages were enabled to minimise TLB misses. POWER6: +8.2% POWER7: +7.0% Another suggestion was to use a larx to something in the L1 instead of a stcx. This was almost as fast as removing the larx on POWER6, but only 3.5% faster on POWER7. We can use this to speed up the reservation clear in our exception exit code. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-11 05:40:27 +04:00
#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0200000000000000)
#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0400000000000000)
#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0800000000000000)
#ifndef __ASSEMBLY__
#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_SLB | MMU_FTR_TLBIEL | \
MMU_FTR_16M_PAGE)
/* We only set the altivec features if the kernel was compiled with altivec
* support
*/
#ifdef CONFIG_ALTIVEC
#define CPU_FTR_ALTIVEC_COMP CPU_FTR_ALTIVEC
#define PPC_FEATURE_HAS_ALTIVEC_COMP PPC_FEATURE_HAS_ALTIVEC
#else
#define CPU_FTR_ALTIVEC_COMP 0
#define PPC_FEATURE_HAS_ALTIVEC_COMP 0
#endif
/* We only set the VSX features if the kernel was compiled with VSX
* support
*/
#ifdef CONFIG_VSX
#define CPU_FTR_VSX_COMP CPU_FTR_VSX
#define PPC_FEATURE_HAS_VSX_COMP PPC_FEATURE_HAS_VSX
#else
#define CPU_FTR_VSX_COMP 0
#define PPC_FEATURE_HAS_VSX_COMP 0
#endif
/* We only set the spe features if the kernel was compiled with spe
* support
*/
#ifdef CONFIG_SPE
#define CPU_FTR_SPE_COMP CPU_FTR_SPE
#define PPC_FEATURE_HAS_SPE_COMP PPC_FEATURE_HAS_SPE
#define PPC_FEATURE_HAS_EFP_SINGLE_COMP PPC_FEATURE_HAS_EFP_SINGLE
#define PPC_FEATURE_HAS_EFP_DOUBLE_COMP PPC_FEATURE_HAS_EFP_DOUBLE
#else
#define CPU_FTR_SPE_COMP 0
#define PPC_FEATURE_HAS_SPE_COMP 0
#define PPC_FEATURE_HAS_EFP_SINGLE_COMP 0
#define PPC_FEATURE_HAS_EFP_DOUBLE_COMP 0
#endif
/* We need to mark all pages as being coherent if we're SMP or we have a
* 74[45]x and an MPC107 host bridge. Also 83xx and PowerQUICC II
* require it for PCI "streaming/prefetch" to work properly.
* This is also required by 52xx family.
*/
#if defined(CONFIG_SMP) || defined(CONFIG_MPC10X_BRIDGE) \
|| defined(CONFIG_PPC_83xx) || defined(CONFIG_8260) \
|| defined(CONFIG_PPC_MPC52xx)
#define CPU_FTR_COMMON CPU_FTR_NEED_COHERENT
#else
#define CPU_FTR_COMMON 0
#endif
/* The powersave features NAP & DOZE seems to confuse BDI when
debugging. So if a BDI is used, disable theses
*/
#ifndef CONFIG_BDI_SWITCH
#define CPU_FTR_MAYBE_CAN_DOZE CPU_FTR_CAN_DOZE
#define CPU_FTR_MAYBE_CAN_NAP CPU_FTR_CAN_NAP
#else
#define CPU_FTR_MAYBE_CAN_DOZE 0
#define CPU_FTR_MAYBE_CAN_NAP 0
#endif
#define CLASSIC_PPC (!defined(CONFIG_8xx) && !defined(CONFIG_4xx) && \
!defined(CONFIG_POWER3) && !defined(CONFIG_POWER4) && \
!defined(CONFIG_BOOKE))
#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE)
#define CPU_FTRS_603 (CPU_FTR_COMMON | \
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_604 (CPU_FTR_COMMON | \
CPU_FTR_USE_TB | CPU_FTR_PPC_LE)
#define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | \
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_740 (CPU_FTR_COMMON | \
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_PPC_LE)
#define CPU_FTRS_750 (CPU_FTR_COMMON | \
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_PPC_LE)
#define CPU_FTRS_750CL (CPU_FTRS_750)
#define CPU_FTRS_750FX1 (CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX | CPU_FTR_NO_DPM)
#define CPU_FTRS_750FX2 (CPU_FTRS_750 | CPU_FTR_NO_DPM)
#define CPU_FTRS_750FX (CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX)
#define CPU_FTRS_750GX (CPU_FTRS_750FX)
#define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | \
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_7400 (CPU_FTR_COMMON | \
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_7450_20 (CPU_FTR_COMMON | \
CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7450_21 (CPU_FTR_COMMON | \
CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7450_23 (CPU_FTR_COMMON | \
CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
#define CPU_FTRS_7455_1 (CPU_FTR_COMMON | \
CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \
CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
#define CPU_FTRS_7455_20 (CPU_FTR_COMMON | \
CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
#define CPU_FTRS_7455 (CPU_FTR_COMMON | \
CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7447_10 (CPU_FTR_COMMON | \
CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE | \
CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7447 (CPU_FTR_COMMON | \
CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7447A (CPU_FTR_COMMON | \
CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7448 (CPU_FTR_COMMON | \
CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_82XX (CPU_FTR_COMMON | \
CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB)
#define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP)
#define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_COMMON)
#define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB)
#define CPU_FTRS_8XX (CPU_FTR_USE_TB)
#define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \
CPU_FTR_INDEXED_DCR)
#define CPU_FTRS_47X (CPU_FTRS_440x6)
#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \
CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
CPU_FTR_NOEXECUTE)
#define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL)
#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
/* 64-bit CPUs */
#define CPU_FTRS_POWER3 (CPU_FTR_USE_TB | \
CPU_FTR_IABR | CPU_FTR_PPC_LE)
#define CPU_FTRS_RS64 (CPU_FTR_USE_TB | \
CPU_FTR_IABR | \
CPU_FTR_MMCRA | CPU_FTR_CTRL)
#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
powerpc: Feature nop out reservation clear when stcx checks address The POWER architecture does not require stcx to check that it is operating on the same address as the larx. This means it is possible for an an exception handler to execute a larx, get a reservation, decide not to do the stcx and then return back with an active reservation. If the interrupted code was in the middle of a larx/stcx sequence the stcx could incorrectly succeed. All recent POWER CPUs check the address before letting the stcx succeed so we can create a CPU feature and nop it out. As Ben suggested, we can only do this in our syscall path because there is a remote possibility some kernel code gets interrupted by an exception that ends up operating on the same cacheline. Thanks to Paul Mackerras and Derek Williams for the idea. To test this I used a very simple null syscall (actually getppid) testcase at http://ozlabs.org/~anton/junkcode/null_syscall.c I tested against 2.6.35-git10 with the following changes against the pseries_defconfig: CONFIG_VIRT_CPU_ACCOUNTING=n CONFIG_AUDIT=n CONFIG_PPC_4K_PAGES=n CONFIG_PPC_64K_PAGES=y CONFIG_FORCE_MAX_ZONEORDER=9 CONFIG_PPC_SUBPAGE_PROT=n CONFIG_FUNCTION_TRACER=n CONFIG_FUNCTION_GRAPH_TRACER=n CONFIG_IRQSOFF_TRACER=n CONFIG_STACK_TRACER=n to remove the overhead of virtual CPU accounting, syscall auditing and the ftrace mcount tracers. 64kB pages were enabled to minimise TLB misses. POWER6: +8.2% POWER7: +7.0% Another suggestion was to use a larx to something in the L1 instead of a stcx. This was almost as fast as removing the larx on POWER6, but only 3.5% faster on POWER7. We can use this to speed up the reservation clear in our exception exit code. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-11 05:40:27 +04:00
CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \
CPU_FTR_STCX_CHECKS_ADDRESS)
#define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \
powerpc: Feature nop out reservation clear when stcx checks address The POWER architecture does not require stcx to check that it is operating on the same address as the larx. This means it is possible for an an exception handler to execute a larx, get a reservation, decide not to do the stcx and then return back with an active reservation. If the interrupted code was in the middle of a larx/stcx sequence the stcx could incorrectly succeed. All recent POWER CPUs check the address before letting the stcx succeed so we can create a CPU feature and nop it out. As Ben suggested, we can only do this in our syscall path because there is a remote possibility some kernel code gets interrupted by an exception that ends up operating on the same cacheline. Thanks to Paul Mackerras and Derek Williams for the idea. To test this I used a very simple null syscall (actually getppid) testcase at http://ozlabs.org/~anton/junkcode/null_syscall.c I tested against 2.6.35-git10 with the following changes against the pseries_defconfig: CONFIG_VIRT_CPU_ACCOUNTING=n CONFIG_AUDIT=n CONFIG_PPC_4K_PAGES=n CONFIG_PPC_64K_PAGES=y CONFIG_FORCE_MAX_ZONEORDER=9 CONFIG_PPC_SUBPAGE_PROT=n CONFIG_FUNCTION_TRACER=n CONFIG_FUNCTION_GRAPH_TRACER=n CONFIG_IRQSOFF_TRACER=n CONFIG_STACK_TRACER=n to remove the overhead of virtual CPU accounting, syscall auditing and the ftrace mcount tracers. 64kB pages were enabled to minimise TLB misses. POWER6: +8.2% POWER7: +7.0% Another suggestion was to use a larx to something in the L1 instead of a stcx. This was almost as fast as removing the larx on POWER6, but only 3.5% faster on POWER7. We can use this to speed up the reservation clear in our exception exit code. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-11 05:40:27 +04:00
CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS)
#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
powerpc: Feature nop out reservation clear when stcx checks address The POWER architecture does not require stcx to check that it is operating on the same address as the larx. This means it is possible for an an exception handler to execute a larx, get a reservation, decide not to do the stcx and then return back with an active reservation. If the interrupted code was in the middle of a larx/stcx sequence the stcx could incorrectly succeed. All recent POWER CPUs check the address before letting the stcx succeed so we can create a CPU feature and nop it out. As Ben suggested, we can only do this in our syscall path because there is a remote possibility some kernel code gets interrupted by an exception that ends up operating on the same cacheline. Thanks to Paul Mackerras and Derek Williams for the idea. To test this I used a very simple null syscall (actually getppid) testcase at http://ozlabs.org/~anton/junkcode/null_syscall.c I tested against 2.6.35-git10 with the following changes against the pseries_defconfig: CONFIG_VIRT_CPU_ACCOUNTING=n CONFIG_AUDIT=n CONFIG_PPC_4K_PAGES=n CONFIG_PPC_64K_PAGES=y CONFIG_FORCE_MAX_ZONEORDER=9 CONFIG_PPC_SUBPAGE_PROT=n CONFIG_FUNCTION_TRACER=n CONFIG_FUNCTION_GRAPH_TRACER=n CONFIG_IRQSOFF_TRACER=n CONFIG_STACK_TRACER=n to remove the overhead of virtual CPU accounting, syscall auditing and the ftrace mcount tracers. 64kB pages were enabled to minimise TLB misses. POWER6: +8.2% POWER7: +7.0% Another suggestion was to use a larx to something in the L1 instead of a stcx. This was almost as fast as removing the larx on POWER6, but only 3.5% faster on POWER7. We can use this to speed up the reservation clear in our exception exit code. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-11 05:40:27 +04:00
CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_HVMODE_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
powerpc: Feature nop out reservation clear when stcx checks address The POWER architecture does not require stcx to check that it is operating on the same address as the larx. This means it is possible for an an exception handler to execute a larx, get a reservation, decide not to do the stcx and then return back with an active reservation. If the interrupted code was in the middle of a larx/stcx sequence the stcx could incorrectly succeed. All recent POWER CPUs check the address before letting the stcx succeed so we can create a CPU feature and nop it out. As Ben suggested, we can only do this in our syscall path because there is a remote possibility some kernel code gets interrupted by an exception that ends up operating on the same cacheline. Thanks to Paul Mackerras and Derek Williams for the idea. To test this I used a very simple null syscall (actually getppid) testcase at http://ozlabs.org/~anton/junkcode/null_syscall.c I tested against 2.6.35-git10 with the following changes against the pseries_defconfig: CONFIG_VIRT_CPU_ACCOUNTING=n CONFIG_AUDIT=n CONFIG_PPC_4K_PAGES=n CONFIG_PPC_64K_PAGES=y CONFIG_FORCE_MAX_ZONEORDER=9 CONFIG_PPC_SUBPAGE_PROT=n CONFIG_FUNCTION_TRACER=n CONFIG_FUNCTION_GRAPH_TRACER=n CONFIG_IRQSOFF_TRACER=n CONFIG_STACK_TRACER=n to remove the overhead of virtual CPU accounting, syscall auditing and the ftrace mcount tracers. 64kB pages were enabled to minimise TLB misses. POWER6: +8.2% POWER7: +7.0% Another suggestion was to use a larx to something in the L1 instead of a stcx. This was almost as fast as removing the larx on POWER6, but only 3.5% faster on POWER7. We can use this to speed up the reservation clear in our exception exit code. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-11 05:40:27 +04:00
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_PAUSE_ZERO | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
CPU_FTR_UNALIGNED_LD_STD)
#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_PURR | CPU_FTR_REAL_LE)
#define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
#define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \
CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
#ifdef __powerpc64__
#ifdef CONFIG_PPC_BOOK3E
#define CPU_FTRS_POSSIBLE (CPU_FTRS_E5500 | CPU_FTRS_A2)
#else
#define CPU_FTRS_POSSIBLE \
(CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \
CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 | \
CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \
CPU_FTR_VSX)
#endif
#else
enum {
CPU_FTRS_POSSIBLE =
#if CLASSIC_PPC
CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 |
CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX |
CPU_FTRS_7400_NOTAU | CPU_FTRS_7400 | CPU_FTRS_7450_20 |
CPU_FTRS_7450_21 | CPU_FTRS_7450_23 | CPU_FTRS_7455_1 |
CPU_FTRS_7455_20 | CPU_FTRS_7455 | CPU_FTRS_7447_10 |
CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_82XX |
CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 |
CPU_FTRS_CLASSIC32 |
#else
CPU_FTRS_GENERIC_32 |
#endif
#ifdef CONFIG_8xx
CPU_FTRS_8XX |
#endif
#ifdef CONFIG_40x
CPU_FTRS_40X |
#endif
#ifdef CONFIG_44x
CPU_FTRS_44X | CPU_FTRS_440x6 |
#endif
#ifdef CONFIG_PPC_47x
CPU_FTRS_47X | CPU_FTR_476_DD2 |
#endif
#ifdef CONFIG_E200
CPU_FTRS_E200 |
#endif
#ifdef CONFIG_E500
CPU_FTRS_E500 | CPU_FTRS_E500_2 | CPU_FTRS_E500MC |
CPU_FTRS_E5500 |
#endif
0,
};
#endif /* __powerpc64__ */
#ifdef __powerpc64__
#ifdef CONFIG_PPC_BOOK3E
#define CPU_FTRS_ALWAYS (CPU_FTRS_E5500 & CPU_FTRS_A2)
#else
#define CPU_FTRS_ALWAYS \
(CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 & \
CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & CPU_FTRS_POWER6 & \
CPU_FTRS_POWER7 & CPU_FTRS_CELL & CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE)
#endif
#else
enum {
CPU_FTRS_ALWAYS =
#if CLASSIC_PPC
CPU_FTRS_PPC601 & CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 &
CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX &
CPU_FTRS_7400_NOTAU & CPU_FTRS_7400 & CPU_FTRS_7450_20 &
CPU_FTRS_7450_21 & CPU_FTRS_7450_23 & CPU_FTRS_7455_1 &
CPU_FTRS_7455_20 & CPU_FTRS_7455 & CPU_FTRS_7447_10 &
CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_82XX &
CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 &
CPU_FTRS_CLASSIC32 &
#else
CPU_FTRS_GENERIC_32 &
#endif
#ifdef CONFIG_8xx
CPU_FTRS_8XX &
#endif
#ifdef CONFIG_40x
CPU_FTRS_40X &
#endif
#ifdef CONFIG_44x
CPU_FTRS_44X & CPU_FTRS_440x6 &
#endif
#ifdef CONFIG_E200
CPU_FTRS_E200 &
#endif
#ifdef CONFIG_E500
CPU_FTRS_E500 & CPU_FTRS_E500_2 & CPU_FTRS_E500MC &
CPU_FTRS_E5500 &
#endif
CPU_FTRS_POSSIBLE,
};
#endif /* __powerpc64__ */
static inline int cpu_has_feature(unsigned long feature)
{
return (CPU_FTRS_ALWAYS & feature) ||
(CPU_FTRS_POSSIBLE
& cur_cpu_spec->cpu_features
& feature);
}
#ifdef CONFIG_HAVE_HW_BREAKPOINT
#define HBP_NUM 1
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_CPUTABLE_H */