linux/arch/x86/lib/hweight.S
Borislav Petkov f5967101e9 x86/hweight: Get rid of the special calling convention
People complained about ARCH_HWEIGHT_CFLAGS and how it throws a wrench
into kcov, lto, etc, experimentations.

Add asm versions for __sw_hweight{32,64}() and do explicit saving and
restoring of clobbered registers. This gets rid of the special calling
convention. We get to call those functions on !X86_FEATURE_POPCNT CPUs.

We still need to hardcode POPCNT and register operands as some old gas
versions which we support, do not know about POPCNT.

Btw, remove redundant REX prefix from 32-bit POPCNT because alternatives
can do padding now.

Suggested-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1464605787-20603-1-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-06-08 15:01:02 +02:00

78 lines
2.3 KiB
ArmAsm

#include <linux/linkage.h>
#include <asm/asm.h>
/*
* unsigned int __sw_hweight32(unsigned int w)
* %rdi: w
*/
ENTRY(__sw_hweight32)
#ifdef CONFIG_X86_64
movl %edi, %eax # w
#endif
__ASM_SIZE(push,) %__ASM_REG(dx)
movl %eax, %edx # w -> t
shrl %edx # t >>= 1
andl $0x55555555, %edx # t &= 0x55555555
subl %edx, %eax # w -= t
movl %eax, %edx # w -> t
shrl $2, %eax # w_tmp >>= 2
andl $0x33333333, %edx # t &= 0x33333333
andl $0x33333333, %eax # w_tmp &= 0x33333333
addl %edx, %eax # w = w_tmp + t
movl %eax, %edx # w -> t
shrl $4, %edx # t >>= 4
addl %edx, %eax # w_tmp += t
andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f
imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
shrl $24, %eax # w = w_tmp >> 24
__ASM_SIZE(pop,) %__ASM_REG(dx)
ret
ENDPROC(__sw_hweight32)
ENTRY(__sw_hweight64)
#ifdef CONFIG_X86_64
pushq %rdx
movq %rdi, %rdx # w -> t
movabsq $0x5555555555555555, %rax
shrq %rdx # t >>= 1
andq %rdx, %rax # t &= 0x5555555555555555
movabsq $0x3333333333333333, %rdx
subq %rax, %rdi # w -= t
movq %rdi, %rax # w -> t
shrq $2, %rdi # w_tmp >>= 2
andq %rdx, %rax # t &= 0x3333333333333333
andq %rdi, %rdx # w_tmp &= 0x3333333333333333
addq %rdx, %rax # w = w_tmp + t
movq %rax, %rdx # w -> t
shrq $4, %rdx # t >>= 4
addq %rdx, %rax # w_tmp += t
movabsq $0x0f0f0f0f0f0f0f0f, %rdx
andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f
movabsq $0x0101010101010101, %rdx
imulq %rdx, %rax # w_tmp *= 0x0101010101010101
shrq $56, %rax # w = w_tmp >> 56
popq %rdx
ret
#else /* CONFIG_X86_32 */
/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
pushl %ecx
call __sw_hweight32
movl %eax, %ecx # stash away result
movl %edx, %eax # second part of input
call __sw_hweight32
addl %ecx, %eax # result
popl %ecx
ret
#endif
ENDPROC(__sw_hweight64)