2006-03-23 02:59:32 -08:00
# include <linux/module.h>
2006-10-18 01:47:25 -04:00
# include <linux/sched.h>
2006-03-23 02:59:32 -08:00
# include <linux/spinlock.h>
# include <linux/list.h>
2007-07-22 11:12:31 +02:00
# include <linux/kprobes.h>
# include <linux/mm.h>
# include <linux/vmalloc.h>
2006-03-23 02:59:32 -08:00
# include <asm/alternative.h>
# include <asm/sections.h>
2007-07-22 11:12:31 +02:00
# include <asm/pgtable.h>
2006-03-23 02:59:32 -08:00
2007-07-21 17:10:25 +02:00
# ifdef CONFIG_HOTPLUG_CPU
static int smp_alt_once ;
2006-03-23 02:59:32 -08:00
2006-06-26 13:56:16 +02:00
static int __init bootonly ( char * str )
{
smp_alt_once = 1 ;
return 1 ;
}
2007-05-02 19:27:13 +02:00
__setup ( " smp-alt-boot " , bootonly ) ;
2007-07-21 17:10:25 +02:00
# else
# define smp_alt_once 1
# endif
static int debug_alternative ;
2007-05-02 19:27:13 +02:00
2006-06-26 13:56:16 +02:00
static int __init debug_alt ( char * str )
{
debug_alternative = 1 ;
return 1 ;
}
__setup ( " debug-alternative " , debug_alt ) ;
2007-07-21 17:10:25 +02:00
static int noreplace_smp ;
2007-05-02 19:27:13 +02:00
static int __init setup_noreplace_smp ( char * str )
{
noreplace_smp = 1 ;
return 1 ;
}
__setup ( " noreplace-smp " , setup_noreplace_smp ) ;
2007-05-02 19:27:16 +02:00
# ifdef CONFIG_PARAVIRT
static int noreplace_paravirt = 0 ;
static int __init setup_noreplace_paravirt ( char * str )
{
noreplace_paravirt = 1 ;
return 1 ;
}
__setup ( " noreplace-paravirt " , setup_noreplace_paravirt ) ;
# endif
2007-05-02 19:27:13 +02:00
2006-06-26 13:56:16 +02:00
# define DPRINTK(fmt, args...) if (debug_alternative) \
printk ( KERN_DEBUG fmt , args )
# ifdef GENERIC_NOP1
2006-03-23 02:59:32 -08:00
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings . */
asm ( " \t .data \n intelnops: "
GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
GENERIC_NOP7 GENERIC_NOP8 ) ;
2006-06-26 13:56:16 +02:00
extern unsigned char intelnops [ ] ;
2006-03-23 02:59:32 -08:00
static unsigned char * intel_nops [ ASM_NOP_MAX + 1 ] = {
NULL ,
intelnops ,
intelnops + 1 ,
intelnops + 1 + 2 ,
intelnops + 1 + 2 + 3 ,
intelnops + 1 + 2 + 3 + 4 ,
intelnops + 1 + 2 + 3 + 4 + 5 ,
intelnops + 1 + 2 + 3 + 4 + 5 + 6 ,
intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 ,
} ;
2006-06-26 13:56:16 +02:00
# endif
# ifdef K8_NOP1
asm ( " \t .data \n k8nops: "
K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
K8_NOP7 K8_NOP8 ) ;
extern unsigned char k8nops [ ] ;
2006-03-23 02:59:32 -08:00
static unsigned char * k8_nops [ ASM_NOP_MAX + 1 ] = {
NULL ,
k8nops ,
k8nops + 1 ,
k8nops + 1 + 2 ,
k8nops + 1 + 2 + 3 ,
k8nops + 1 + 2 + 3 + 4 ,
k8nops + 1 + 2 + 3 + 4 + 5 ,
k8nops + 1 + 2 + 3 + 4 + 5 + 6 ,
k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 ,
} ;
2006-06-26 13:56:16 +02:00
# endif
# ifdef K7_NOP1
asm ( " \t .data \n k7nops: "
K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
K7_NOP7 K7_NOP8 ) ;
extern unsigned char k7nops [ ] ;
2006-03-23 02:59:32 -08:00
static unsigned char * k7_nops [ ASM_NOP_MAX + 1 ] = {
NULL ,
k7nops ,
k7nops + 1 ,
k7nops + 1 + 2 ,
k7nops + 1 + 2 + 3 ,
k7nops + 1 + 2 + 3 + 4 ,
k7nops + 1 + 2 + 3 + 4 + 5 ,
k7nops + 1 + 2 + 3 + 4 + 5 + 6 ,
k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 ,
} ;
2006-06-26 13:56:16 +02:00
# endif
# ifdef CONFIG_X86_64
extern char __vsyscall_0 ;
static inline unsigned char * * find_nop_table ( void )
{
return k8_nops ;
}
# else /* CONFIG_X86_64 */
2006-03-23 02:59:32 -08:00
static struct nop {
int cpuid ;
unsigned char * * noptable ;
} noptypes [ ] = {
{ X86_FEATURE_K8 , k8_nops } ,
{ X86_FEATURE_K7 , k7_nops } ,
{ - 1 , NULL }
} ;
static unsigned char * * find_nop_table ( void )
{
unsigned char * * noptable = intel_nops ;
int i ;
for ( i = 0 ; noptypes [ i ] . cpuid > = 0 ; i + + ) {
if ( boot_cpu_has ( noptypes [ i ] . cpuid ) ) {
noptable = noptypes [ i ] . noptable ;
break ;
}
}
return noptable ;
}
2006-06-26 13:56:16 +02:00
# endif /* CONFIG_X86_64 */
2006-12-07 02:14:08 +01:00
static void nop_out ( void * insns , unsigned int len )
{
unsigned char * * noptable = find_nop_table ( ) ;
while ( len > 0 ) {
unsigned int noplen = len ;
if ( noplen > ASM_NOP_MAX )
noplen = ASM_NOP_MAX ;
2007-07-22 11:12:31 +02:00
text_poke ( insns , noptable [ noplen ] , noplen ) ;
2006-12-07 02:14:08 +01:00
insns + = noplen ;
len - = noplen ;
}
}
2006-06-26 13:56:16 +02:00
extern struct alt_instr __alt_instructions [ ] , __alt_instructions_end [ ] ;
extern u8 * __smp_locks [ ] , * __smp_locks_end [ ] ;
2006-03-23 02:59:32 -08:00
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
self modifying code . This implies that assymetric systems where
APs have less capabilities than the boot processor are not handled .
Tough . Make sure you disable such features by hand . */
void apply_alternatives ( struct alt_instr * start , struct alt_instr * end )
{
struct alt_instr * a ;
2006-06-26 13:56:16 +02:00
u8 * instr ;
2006-12-07 02:14:08 +01:00
int diff ;
2006-03-23 02:59:32 -08:00
DPRINTK ( " %s: alt table %p -> %p \n " , __FUNCTION__ , start , end ) ;
for ( a = start ; a < end ; a + + ) {
BUG_ON ( a - > replacementlen > a - > instrlen ) ;
if ( ! boot_cpu_has ( a - > cpuid ) )
continue ;
2006-06-26 13:56:16 +02:00
instr = a - > instr ;
# ifdef CONFIG_X86_64
/* vsyscall code is not mapped yet. resolve it manually. */
if ( instr > = ( u8 * ) VSYSCALL_START & & instr < ( u8 * ) VSYSCALL_END ) {
instr = __va ( instr - ( u8 * ) VSYSCALL_START + ( u8 * ) __pa_symbol ( & __vsyscall_0 ) ) ;
DPRINTK ( " %s: vsyscall fixup: %p => %p \n " ,
__FUNCTION__ , a - > instr , instr ) ;
}
# endif
memcpy ( instr , a - > replacement , a - > replacementlen ) ;
2006-03-23 02:59:32 -08:00
diff = a - > instrlen - a - > replacementlen ;
2006-12-07 02:14:08 +01:00
nop_out ( instr + a - > replacementlen , diff ) ;
2006-03-23 02:59:32 -08:00
}
}
2006-07-01 04:36:18 -07:00
# ifdef CONFIG_SMP
2006-03-23 02:59:32 -08:00
static void alternatives_smp_lock ( u8 * * start , u8 * * end , u8 * text , u8 * text_end )
{
u8 * * ptr ;
for ( ptr = start ; ptr < end ; ptr + + ) {
if ( * ptr < text )
continue ;
if ( * ptr > text_end )
continue ;
2007-07-22 11:12:31 +02:00
text_poke ( * ptr , ( ( unsigned char [ ] ) { 0xf0 } ) , 1 ) ; /* add lock prefix */
2006-03-23 02:59:32 -08:00
} ;
}
static void alternatives_smp_unlock ( u8 * * start , u8 * * end , u8 * text , u8 * text_end )
{
u8 * * ptr ;
2007-05-02 19:27:13 +02:00
if ( noreplace_smp )
return ;
2006-03-23 02:59:32 -08:00
for ( ptr = start ; ptr < end ; ptr + + ) {
if ( * ptr < text )
continue ;
if ( * ptr > text_end )
continue ;
2006-12-07 02:14:08 +01:00
nop_out ( * ptr , 1 ) ;
2006-03-23 02:59:32 -08:00
} ;
}
struct smp_alt_module {
/* what is this ??? */
struct module * mod ;
char * name ;
/* ptrs to lock prefixes */
u8 * * locks ;
u8 * * locks_end ;
/* .text segment, needed to avoid patching init code ;) */
u8 * text ;
u8 * text_end ;
struct list_head next ;
} ;
static LIST_HEAD ( smp_alt_modules ) ;
static DEFINE_SPINLOCK ( smp_alt ) ;
void alternatives_smp_module_add ( struct module * mod , char * name ,
void * locks , void * locks_end ,
void * text , void * text_end )
{
struct smp_alt_module * smp ;
unsigned long flags ;
2007-05-02 19:27:13 +02:00
if ( noreplace_smp )
return ;
2006-03-23 02:59:32 -08:00
if ( smp_alt_once ) {
if ( boot_cpu_has ( X86_FEATURE_UP ) )
alternatives_smp_unlock ( locks , locks_end ,
text , text_end ) ;
return ;
}
smp = kzalloc ( sizeof ( * smp ) , GFP_KERNEL ) ;
if ( NULL = = smp )
return ; /* we'll run the (safe but slow) SMP code then ... */
smp - > mod = mod ;
smp - > name = name ;
smp - > locks = locks ;
smp - > locks_end = locks_end ;
smp - > text = text ;
smp - > text_end = text_end ;
DPRINTK ( " %s: locks %p -> %p, text %p -> %p, name %s \n " ,
__FUNCTION__ , smp - > locks , smp - > locks_end ,
smp - > text , smp - > text_end , smp - > name ) ;
spin_lock_irqsave ( & smp_alt , flags ) ;
list_add_tail ( & smp - > next , & smp_alt_modules ) ;
if ( boot_cpu_has ( X86_FEATURE_UP ) )
alternatives_smp_unlock ( smp - > locks , smp - > locks_end ,
smp - > text , smp - > text_end ) ;
spin_unlock_irqrestore ( & smp_alt , flags ) ;
}
void alternatives_smp_module_del ( struct module * mod )
{
struct smp_alt_module * item ;
unsigned long flags ;
2007-05-02 19:27:13 +02:00
if ( smp_alt_once | | noreplace_smp )
2006-03-23 02:59:32 -08:00
return ;
spin_lock_irqsave ( & smp_alt , flags ) ;
list_for_each_entry ( item , & smp_alt_modules , next ) {
if ( mod ! = item - > mod )
continue ;
list_del ( & item - > next ) ;
spin_unlock_irqrestore ( & smp_alt , flags ) ;
DPRINTK ( " %s: %s \n " , __FUNCTION__ , item - > name ) ;
kfree ( item ) ;
return ;
}
spin_unlock_irqrestore ( & smp_alt , flags ) ;
}
void alternatives_smp_switch ( int smp )
{
struct smp_alt_module * mod ;
unsigned long flags ;
2006-07-03 00:24:57 -07:00
# ifdef CONFIG_LOCKDEP
/*
* A not yet fixed binutils section handling bug prevents
* alternatives - replacement from working reliably , so turn
* it off :
*/
printk ( " lockdep: not fixing up alternatives. \n " ) ;
return ;
# endif
2007-05-02 19:27:13 +02:00
if ( noreplace_smp | | smp_alt_once )
2006-03-23 02:59:32 -08:00
return ;
BUG_ON ( ! smp & & ( num_online_cpus ( ) > 1 ) ) ;
spin_lock_irqsave ( & smp_alt , flags ) ;
if ( smp ) {
printk ( KERN_INFO " SMP alternatives: switching to SMP code \n " ) ;
clear_bit ( X86_FEATURE_UP , boot_cpu_data . x86_capability ) ;
clear_bit ( X86_FEATURE_UP , cpu_data [ 0 ] . x86_capability ) ;
list_for_each_entry ( mod , & smp_alt_modules , next )
alternatives_smp_lock ( mod - > locks , mod - > locks_end ,
mod - > text , mod - > text_end ) ;
} else {
printk ( KERN_INFO " SMP alternatives: switching to UP code \n " ) ;
set_bit ( X86_FEATURE_UP , boot_cpu_data . x86_capability ) ;
set_bit ( X86_FEATURE_UP , cpu_data [ 0 ] . x86_capability ) ;
list_for_each_entry ( mod , & smp_alt_modules , next )
alternatives_smp_unlock ( mod - > locks , mod - > locks_end ,
mod - > text , mod - > text_end ) ;
}
spin_unlock_irqrestore ( & smp_alt , flags ) ;
}
2006-07-01 04:36:18 -07:00
# endif
2006-12-07 02:14:08 +01:00
# ifdef CONFIG_PARAVIRT
2007-05-02 19:27:14 +02:00
void apply_paravirt ( struct paravirt_patch_site * start ,
struct paravirt_patch_site * end )
2006-12-07 02:14:08 +01:00
{
2007-05-02 19:27:14 +02:00
struct paravirt_patch_site * p ;
2006-12-07 02:14:08 +01:00
2007-05-02 19:27:16 +02:00
if ( noreplace_paravirt )
return ;
2006-12-07 02:14:08 +01:00
for ( p = start ; p < end ; p + + ) {
unsigned int used ;
used = paravirt_ops . patch ( p - > instrtype , p - > clobbers , p - > instr ,
p - > len ) ;
2007-05-02 19:27:13 +02:00
2007-05-02 19:27:14 +02:00
BUG_ON ( used > p - > len ) ;
2006-12-07 02:14:08 +01:00
/* Pad the rest with nops */
nop_out ( p - > instr + used , p - > len - used ) ;
}
}
2007-05-02 19:27:14 +02:00
extern struct paravirt_patch_site __start_parainstructions [ ] ,
2006-12-07 02:14:08 +01:00
__stop_parainstructions [ ] ;
# endif /* CONFIG_PARAVIRT */
2006-03-23 02:59:32 -08:00
void __init alternative_instructions ( void )
{
2006-10-19 23:29:04 -07:00
unsigned long flags ;
local_irq_save ( flags ) ;
2006-03-23 02:59:32 -08:00
apply_alternatives ( __alt_instructions , __alt_instructions_end ) ;
/* switch to patch-once-at-boottime-only mode and free the
* tables in case we know the number of CPUs will never ever
* change */
# ifdef CONFIG_HOTPLUG_CPU
if ( num_possible_cpus ( ) < 2 )
smp_alt_once = 1 ;
# endif
2006-07-01 04:36:18 -07:00
# ifdef CONFIG_SMP
2006-03-23 02:59:32 -08:00
if ( smp_alt_once ) {
if ( 1 = = num_possible_cpus ( ) ) {
printk ( KERN_INFO " SMP alternatives: switching to UP code \n " ) ;
set_bit ( X86_FEATURE_UP , boot_cpu_data . x86_capability ) ;
set_bit ( X86_FEATURE_UP , cpu_data [ 0 ] . x86_capability ) ;
alternatives_smp_unlock ( __smp_locks , __smp_locks_end ,
_text , _etext ) ;
}
free_init_pages ( " SMP alternatives " ,
Revert "[PATCH] x86: __pa and __pa_symbol address space separation"
This was broken. It adds complexity, for no good reason. Rather than
separate __pa() and __pa_symbol(), we should deprecate __pa_symbol(),
and preferably __pa() too - and just use "virt_to_phys()" instead, which
is more readable and has nicer semantics.
However, right now, just undo the separation, and make __pa_symbol() be
the exact same as __pa(). That fixes the bugs this patch introduced,
and we can do the fairly obvious cleanups later.
Do the new __phys_addr() function (which is now the actual workhorse for
the unified __pa()/__pa_symbol()) as a real external function, that way
all the potential issues with compile/link-time optimizations of
constant symbol addresses go away, and we can also, if we choose to, add
more sanity-checking of the argument.
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-07 08:44:24 -07:00
( unsigned long ) __smp_locks ,
( unsigned long ) __smp_locks_end ) ;
2006-03-23 02:59:32 -08:00
} else {
alternatives_smp_module_add ( NULL , " core kernel " ,
__smp_locks , __smp_locks_end ,
_text , _etext ) ;
alternatives_smp_switch ( 0 ) ;
}
2006-07-01 04:36:18 -07:00
# endif
2007-05-02 19:27:16 +02:00
apply_paravirt ( __parainstructions , __parainstructions_end ) ;
2006-10-19 23:29:04 -07:00
local_irq_restore ( flags ) ;
2006-03-23 02:59:32 -08:00
}
2007-07-22 11:12:31 +02:00
/*
* Warning :
* When you use this code to patch more than one byte of an instruction
* you need to make sure that other CPUs cannot execute this code in parallel .
* Also no thread must be currently preempted in the middle of these instructions .
* And on the local CPU you need to be protected again NMI or MCE handlers
* seeing an inconsistent instruction while you patch .
*/
void __kprobes text_poke ( void * oaddr , unsigned char * opcode , int len )
{
u8 * addr = oaddr ;
if ( ! pte_write ( * lookup_address ( ( unsigned long ) addr ) ) ) {
struct page * p [ 2 ] = { virt_to_page ( addr ) , virt_to_page ( addr + PAGE_SIZE ) } ;
addr = vmap ( p , 2 , VM_MAP , PAGE_KERNEL ) ;
if ( ! addr )
return ;
addr + = ( ( unsigned long ) oaddr ) % PAGE_SIZE ;
}
memcpy ( addr , opcode , len ) ;
sync_core ( ) ;
/* Not strictly needed, but can speed CPU recovery up. Ignore cross cacheline
case . */
if ( cpu_has_clflush )
asm ( " clflush (%0) " : : " r " ( oaddr ) : " memory " ) ;
if ( addr ! = oaddr )
vunmap ( addr ) ;
}