2020-08-18 15:57:44 +02:00
// SPDX-License-Identifier: GPL-2.0
# include <linux/static_call.h>
# include <linux/memory.h>
# include <linux/bug.h>
# include <asm/text-patching.h>
2020-08-18 15:57:48 +02:00
enum insn_type {
CALL = 0 , /* site call */
NOP = 1 , /* site cond-call */
JMP = 2 , /* tramp / site tail-call */
RET = 3 , /* tramp / site cond-tail-call */
} ;
2021-01-18 15:12:16 +01:00
/*
* data16 data16 xorq % rax , % rax - a single 5 byte instruction that clears % rax
* The REX . W cancels the effect of any data16 .
*/
static const u8 xor5rax [ ] = { 0x66 , 0x66 , 0x48 , 0x31 , 0xc0 } ;
2020-08-18 15:57:51 +02:00
static void __ref __static_call_transform ( void * insn , enum insn_type type , void * func )
2020-08-18 15:57:44 +02:00
{
2021-01-18 15:12:16 +01:00
const void * emulate = NULL ;
2020-08-18 15:57:48 +02:00
int size = CALL_INSN_SIZE ;
const void * code ;
2020-08-18 15:57:44 +02:00
2020-08-18 15:57:48 +02:00
switch ( type ) {
case CALL :
code = text_gen_insn ( CALL_INSN_OPCODE , insn , func ) ;
2021-01-18 15:12:16 +01:00
if ( func = = & __static_call_return0 ) {
emulate = code ;
code = & xor5rax ;
}
2020-08-18 15:57:48 +02:00
break ;
case NOP :
x86: Remove dynamic NOP selection
This ensures that a NOP is a NOP and not a random other instruction that
is also a NOP. It allows simplification of dynamic code patching that
wants to verify existing code before writing new instructions (ftrace,
jump_label, static_call, etc..).
Differentiating on NOPs is not a feature.
This pessimises 32bit (DONTCARE) and 32bit on 64bit CPUs (CARELESS).
32bit is not a performance target.
Everything x86_64 since AMD K10 (2007) and Intel IvyBridge (2012) is
fine with using NOPL (as opposed to prefix NOP). And per FEATURE_NOPL
being required for x86_64, all x86_64 CPUs can use NOPL. So stop
caring about NOPs, simplify things and get on with life.
[ The problem seems to be that some uarchs can only decode NOPL on a
single front-end port while others have severe decode penalties for
excessive prefixes. All modern uarchs can handle both, except Atom,
which has prefix penalties. ]
[ Also, much doubt you can actually measure any of this on normal
workloads. ]
After this, FEATURE_NOPL is unused except for required-features for
x86_64. FEATURE_K8 is only used for PTI.
[ bp: Kernel build measurements showed ~0.3s slowdown on Sandybridge
which is hardly a slowdown. Get rid of X86_FEATURE_K7, while at it. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> # bpf
Acked-by: Linus Torvalds <torvalds@linuxfoundation.org>
Link: https://lkml.kernel.org/r/20210312115749.065275711@infradead.org
2021-03-12 12:32:54 +01:00
code = x86_nops [ 5 ] ;
2020-08-18 15:57:48 +02:00
break ;
case JMP :
code = text_gen_insn ( JMP32_INSN_OPCODE , insn , func ) ;
break ;
case RET :
code = text_gen_insn ( RET_INSN_OPCODE , insn , func ) ;
size = RET_INSN_SIZE ;
break ;
}
2020-08-18 15:57:44 +02:00
2020-08-18 15:57:48 +02:00
if ( memcmp ( insn , code , size ) = = 0 )
2020-08-18 15:57:44 +02:00
return ;
2020-08-18 15:57:51 +02:00
if ( unlikely ( system_state = = SYSTEM_BOOTING ) )
return text_poke_early ( insn , code , size ) ;
2021-01-18 15:12:16 +01:00
text_poke_bp ( insn , code , size , emulate ) ;
2020-08-18 15:57:44 +02:00
}
2021-10-30 09:47:58 +02:00
static void __static_call_validate ( void * insn , bool tail , bool tramp )
2020-08-18 15:57:50 +02:00
{
u8 opcode = * ( u8 * ) insn ;
2021-10-30 09:47:58 +02:00
if ( tramp & & memcmp ( insn + 5 , " SCT " , 3 ) ) {
pr_err ( " trampoline signature fail " ) ;
BUG ( ) ;
}
2020-08-18 15:57:50 +02:00
if ( tail ) {
if ( opcode = = JMP32_INSN_OPCODE | |
opcode = = RET_INSN_OPCODE )
return ;
} else {
if ( opcode = = CALL_INSN_OPCODE | |
x86: Remove dynamic NOP selection
This ensures that a NOP is a NOP and not a random other instruction that
is also a NOP. It allows simplification of dynamic code patching that
wants to verify existing code before writing new instructions (ftrace,
jump_label, static_call, etc..).
Differentiating on NOPs is not a feature.
This pessimises 32bit (DONTCARE) and 32bit on 64bit CPUs (CARELESS).
32bit is not a performance target.
Everything x86_64 since AMD K10 (2007) and Intel IvyBridge (2012) is
fine with using NOPL (as opposed to prefix NOP). And per FEATURE_NOPL
being required for x86_64, all x86_64 CPUs can use NOPL. So stop
caring about NOPs, simplify things and get on with life.
[ The problem seems to be that some uarchs can only decode NOPL on a
single front-end port while others have severe decode penalties for
excessive prefixes. All modern uarchs can handle both, except Atom,
which has prefix penalties. ]
[ Also, much doubt you can actually measure any of this on normal
workloads. ]
After this, FEATURE_NOPL is unused except for required-features for
x86_64. FEATURE_K8 is only used for PTI.
[ bp: Kernel build measurements showed ~0.3s slowdown on Sandybridge
which is hardly a slowdown. Get rid of X86_FEATURE_K7, while at it. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> # bpf
Acked-by: Linus Torvalds <torvalds@linuxfoundation.org>
Link: https://lkml.kernel.org/r/20210312115749.065275711@infradead.org
2021-03-12 12:32:54 +01:00
! memcmp ( insn , x86_nops [ 5 ] , 5 ) | |
2021-01-18 15:12:16 +01:00
! memcmp ( insn , xor5rax , 5 ) )
2020-08-18 15:57:50 +02:00
return ;
}
/*
* If we ever trigger this , our text is corrupt , we ' ll probably not live long .
*/
2021-10-30 09:47:58 +02:00
pr_err ( " unexpected static_call insn opcode 0x%x at %pS \n " , opcode , insn ) ;
BUG ( ) ;
2020-08-18 15:57:50 +02:00
}
2020-08-18 15:57:49 +02:00
static inline enum insn_type __sc_insn ( bool null , bool tail )
{
/*
* Encode the following table without branches :
*
* tail null insn
* - - - - - + - - - - - - - + - - - - - -
* 0 | 0 | CALL
* 0 | 1 | NOP
* 1 | 0 | JMP
* 1 | 1 | RET
*/
return 2 * tail + null ;
}
void arch_static_call_transform ( void * site , void * tramp , void * func , bool tail )
2020-08-18 15:57:44 +02:00
{
mutex_lock ( & text_mutex ) ;
2020-08-18 15:57:50 +02:00
if ( tramp ) {
2021-10-30 09:47:58 +02:00
__static_call_validate ( tramp , true , true ) ;
2020-08-18 15:57:49 +02:00
__static_call_transform ( tramp , __sc_insn ( ! func , true ) , func ) ;
2020-08-18 15:57:50 +02:00
}
2020-08-18 15:57:44 +02:00
2020-08-18 15:57:50 +02:00
if ( IS_ENABLED ( CONFIG_HAVE_STATIC_CALL_INLINE ) & & site ) {
2021-10-30 09:47:58 +02:00
__static_call_validate ( site , tail , false ) ;
2020-08-18 15:57:49 +02:00
__static_call_transform ( site , __sc_insn ( ! func , tail ) , func ) ;
2020-08-18 15:57:50 +02:00
}
2020-08-18 15:57:45 +02:00
2020-08-18 15:57:44 +02:00
mutex_unlock ( & text_mutex ) ;
}
EXPORT_SYMBOL_GPL ( arch_static_call_transform ) ;