[IA64] fix fls()
The ia64-version of fls() never worked as intended (the bitnumbering was off by 1 and fls(0) was undefined). This patch fixes the problem by using a popcnt-based fls(), which on McKinley-derived cores is slightly faster than both ia64_fls() and generic_fls(). The resulting code, however, is bigger (7-8 bundles instead of about 3 bundles). Also switch ia64_popcnt() to __builtin_popcountl() for GCC v3.4 or newer since the compiler can predicate that and schedule it better. Thanks to Simon Derr and Matt Mackall for tracking down this bug. Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
This commit is contained in:
parent
d8470b7c13
commit
821376bf15
@ -314,8 +314,8 @@ __ffs (unsigned long x)
|
||||
#ifdef __KERNEL__
|
||||
|
||||
/*
|
||||
* find_last_zero_bit - find the last zero bit in a 64 bit quantity
|
||||
* @x: The value to search
|
||||
* Return bit number of last (most-significant) bit set. Undefined
|
||||
* for x==0. Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3).
|
||||
*/
|
||||
static inline unsigned long
|
||||
ia64_fls (unsigned long x)
|
||||
@ -327,10 +327,23 @@ ia64_fls (unsigned long x)
|
||||
return exp - 0xffff;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the last (most significant) bit set. Returns 0 for x==0 and
|
||||
* bits are numbered from 1..32 (e.g., fls(9) == 4).
|
||||
*/
|
||||
static inline int
|
||||
fls (int x)
|
||||
fls (int t)
|
||||
{
|
||||
return ia64_fls((unsigned int) x);
|
||||
unsigned long x = t & 0xffffffffu;
|
||||
|
||||
if (!x)
|
||||
return 0;
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
return ia64_popcnt(x);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -133,13 +133,17 @@ register unsigned long ia64_r13 asm ("r13") __attribute_used__;
|
||||
ia64_intri_res; \
|
||||
})
|
||||
|
||||
#define ia64_popcnt(x) \
|
||||
({ \
|
||||
#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
|
||||
# define ia64_popcnt(x) __builtin_popcountl(x)
|
||||
#else
|
||||
# define ia64_popcnt(x) \
|
||||
({ \
|
||||
__u64 ia64_intri_res; \
|
||||
asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \
|
||||
\
|
||||
ia64_intri_res; \
|
||||
})
|
||||
})
|
||||
#endif
|
||||
|
||||
#define ia64_getf_exp(x) \
|
||||
({ \
|
||||
|
Loading…
Reference in New Issue
Block a user