x86/asm/bitops: Use __builtin_clz{l|ll} to evaluate constant expressions
Micro-optimize the bitops code some more, similar to commits:fdb6649ab7
("x86/asm/bitops: Use __builtin_ctzl() to evaluate constant expressions")2fcff790dc
("powerpc: Use builtin functions for fls()/__fls()/fls64()") From a recent discussion, I noticed that x86 is lacking an optimization that appears in arch/powerpc/include/asm/bitops.h related to constant folding. If you add a BUILD_BUG_ON(__builtin_constant_p(param)) to these functions, you'll find that there were cases where the use of inline asm pessimized the compiler's ability to perform constant folding resulting in runtime calculation of a value that could have been computed at compile time. Signed-off-by: Nick Desaulniers <ndesaulniers@google.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lore.kernel.org/r/20230828-x86_fls-v1-1-e6a31b9f79c3@google.com
This commit is contained in:
committed by
Ingo Molnar
parent
4accdb9895
commit
3dae5c43ba
@ -293,6 +293,9 @@ static __always_inline unsigned long variable_ffz(unsigned long word)
|
|||||||
*/
|
*/
|
||||||
static __always_inline unsigned long __fls(unsigned long word)
|
static __always_inline unsigned long __fls(unsigned long word)
|
||||||
{
|
{
|
||||||
|
if (__builtin_constant_p(word))
|
||||||
|
return BITS_PER_LONG - 1 - __builtin_clzl(word);
|
||||||
|
|
||||||
asm("bsr %1,%0"
|
asm("bsr %1,%0"
|
||||||
: "=r" (word)
|
: "=r" (word)
|
||||||
: "rm" (word));
|
: "rm" (word));
|
||||||
@ -360,6 +363,9 @@ static __always_inline int fls(unsigned int x)
|
|||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
|
if (__builtin_constant_p(x))
|
||||||
|
return x ? 32 - __builtin_clz(x) : 0;
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
/*
|
/*
|
||||||
* AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
|
* AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
|
||||||
@ -401,6 +407,9 @@ static __always_inline int fls(unsigned int x)
|
|||||||
static __always_inline int fls64(__u64 x)
|
static __always_inline int fls64(__u64 x)
|
||||||
{
|
{
|
||||||
int bitpos = -1;
|
int bitpos = -1;
|
||||||
|
|
||||||
|
if (__builtin_constant_p(x))
|
||||||
|
return x ? 64 - __builtin_clzll(x) : 0;
|
||||||
/*
|
/*
|
||||||
* AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
|
* AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
|
||||||
* dest reg is undefined if x==0, but their CPU architect says its
|
* dest reg is undefined if x==0, but their CPU architect says its
|
||||||
|
Reference in New Issue
Block a user