f334f5668b
Building a kernel with CONFIG_CC_OPTIMISE_FOR_SIZE leads to __ilog2_u32() being duplicated 50 times and __ilog2_u64() 3 times in vmlinux on a tiny powerpc32 config. __ilog2_u32() being 2 instructions it is not worth being kept out of line, so force inlining. Allthough the u64 version is a bit bigger, there is still a small benefit in keeping it inlined. On a 64 bits config there's a real benefit. With this change the size of vmlinux text is reduced by 1 kbytes, which is approx 50% more than the size of the removed functions. Before the patch there is for instance: c00d2a94 <__ilog2_u32>: c00d2a94: 7c 63 00 34 cntlzw r3,r3 c00d2a98: 20 63 00 1f subfic r3,r3,31 c00d2a9c: 4e 80 00 20 blr c00d36d8 <__order_base_2>: c00d36d8: 28 03 00 01 cmplwi r3,1 c00d36dc: 40 81 00 2c ble c00d3708 <__order_base_2+0x30> c00d36e0: 94 21 ff f0 stwu r1,-16(r1) c00d36e4: 7c 08 02 a6 mflr r0 c00d36e8: 38 63 ff ff addi r3,r3,-1 c00d36ec: 90 01 00 14 stw r0,20(r1) c00d36f0: 4b ff f3 a5 bl c00d2a94 <__ilog2_u32> c00d36f4: 80 01 00 14 lwz r0,20(r1) c00d36f8: 38 63 00 01 addi r3,r3,1 c00d36fc: 7c 08 03 a6 mtlr r0 c00d3700: 38 21 00 10 addi r1,r1,16 c00d3704: 4e 80 00 20 blr c00d3708: 38 60 00 00 li r3,0 c00d370c: 4e 80 00 20 blr With the patch it has become: c00d356c <__order_base_2>: c00d356c: 28 03 00 01 cmplwi r3,1 c00d3570: 40 81 00 14 ble c00d3584 <__order_base_2+0x18> c00d3574: 38 63 ff ff addi r3,r3,-1 c00d3578: 7c 63 00 34 cntlzw r3,r3 c00d357c: 20 63 00 20 subfic r3,r3,32 c00d3580: 4e 80 00 20 blr c00d3584: 38 60 00 00 li r3,0 c00d3588: 4e 80 00 20 blr No more need for __order_base_2() to setup a stack frame and save/restore caller address. And the following 'add 1' is merged in the subtract. Another typical use of it: c080ff28 <hugepagesz_setup>: ... c080fff8: 7f c3 f3 78 mr r3,r30 c080fffc: 4b 8f 81 f1 bl c01081ec <__ilog2_u32> c0810000: 38 63 ff f2 addi r3,r3,-14 ... Becomes c080ff1c <hugepagesz_setup>: ... c080ffec: 7f c3 00 34 cntlzw r3,r30 c080fff0: 20 63 00 11 subfic r3,r3,17 ... Here no need to move r30 argument to r3 then substract 14 to result. Just work on r30 and merge the 'sub 14' with the 'sub from 31'. Link: https://lkml.kernel.org/r/803a2ac3d923ebcfd0dd40f5886b05cae7bb0aba.1644243860.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
259 lines
6.2 KiB
C
259 lines
6.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/* Integer base 2 logarithm calculation
|
|
*
|
|
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*/
|
|
|
|
#ifndef _LINUX_LOG2_H
|
|
#define _LINUX_LOG2_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/bitops.h>
|
|
|
|
/*
|
|
* non-constant log of base 2 calculators
|
|
* - the arch may override these in asm/bitops.h if they can be implemented
|
|
* more efficiently than using fls() and fls64()
|
|
* - the arch is not required to handle n==0 if implementing the fallback
|
|
*/
|
|
#ifndef CONFIG_ARCH_HAS_ILOG2_U32
|
|
static __always_inline __attribute__((const))
|
|
int __ilog2_u32(u32 n)
|
|
{
|
|
return fls(n) - 1;
|
|
}
|
|
#endif
|
|
|
|
#ifndef CONFIG_ARCH_HAS_ILOG2_U64
|
|
static __always_inline __attribute__((const))
|
|
int __ilog2_u64(u64 n)
|
|
{
|
|
return fls64(n) - 1;
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* is_power_of_2() - check if a value is a power of two
|
|
* @n: the value to check
|
|
*
|
|
* Determine whether some value is a power of two, where zero is
|
|
* *not* considered a power of two.
|
|
* Return: true if @n is a power of 2, otherwise false.
|
|
*/
|
|
static inline __attribute__((const))
|
|
bool is_power_of_2(unsigned long n)
|
|
{
|
|
return (n != 0 && ((n & (n - 1)) == 0));
|
|
}
|
|
|
|
/**
|
|
* __roundup_pow_of_two() - round up to nearest power of two
|
|
* @n: value to round up
|
|
*/
|
|
static inline __attribute__((const))
|
|
unsigned long __roundup_pow_of_two(unsigned long n)
|
|
{
|
|
return 1UL << fls_long(n - 1);
|
|
}
|
|
|
|
/**
|
|
* __rounddown_pow_of_two() - round down to nearest power of two
|
|
* @n: value to round down
|
|
*/
|
|
static inline __attribute__((const))
|
|
unsigned long __rounddown_pow_of_two(unsigned long n)
|
|
{
|
|
return 1UL << (fls_long(n) - 1);
|
|
}
|
|
|
|
/**
|
|
* const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value
|
|
* @n: parameter
|
|
*
|
|
* Use this where sparse expects a true constant expression, e.g. for array
|
|
* indices.
|
|
*/
|
|
#define const_ilog2(n) \
|
|
( \
|
|
__builtin_constant_p(n) ? ( \
|
|
(n) < 2 ? 0 : \
|
|
(n) & (1ULL << 63) ? 63 : \
|
|
(n) & (1ULL << 62) ? 62 : \
|
|
(n) & (1ULL << 61) ? 61 : \
|
|
(n) & (1ULL << 60) ? 60 : \
|
|
(n) & (1ULL << 59) ? 59 : \
|
|
(n) & (1ULL << 58) ? 58 : \
|
|
(n) & (1ULL << 57) ? 57 : \
|
|
(n) & (1ULL << 56) ? 56 : \
|
|
(n) & (1ULL << 55) ? 55 : \
|
|
(n) & (1ULL << 54) ? 54 : \
|
|
(n) & (1ULL << 53) ? 53 : \
|
|
(n) & (1ULL << 52) ? 52 : \
|
|
(n) & (1ULL << 51) ? 51 : \
|
|
(n) & (1ULL << 50) ? 50 : \
|
|
(n) & (1ULL << 49) ? 49 : \
|
|
(n) & (1ULL << 48) ? 48 : \
|
|
(n) & (1ULL << 47) ? 47 : \
|
|
(n) & (1ULL << 46) ? 46 : \
|
|
(n) & (1ULL << 45) ? 45 : \
|
|
(n) & (1ULL << 44) ? 44 : \
|
|
(n) & (1ULL << 43) ? 43 : \
|
|
(n) & (1ULL << 42) ? 42 : \
|
|
(n) & (1ULL << 41) ? 41 : \
|
|
(n) & (1ULL << 40) ? 40 : \
|
|
(n) & (1ULL << 39) ? 39 : \
|
|
(n) & (1ULL << 38) ? 38 : \
|
|
(n) & (1ULL << 37) ? 37 : \
|
|
(n) & (1ULL << 36) ? 36 : \
|
|
(n) & (1ULL << 35) ? 35 : \
|
|
(n) & (1ULL << 34) ? 34 : \
|
|
(n) & (1ULL << 33) ? 33 : \
|
|
(n) & (1ULL << 32) ? 32 : \
|
|
(n) & (1ULL << 31) ? 31 : \
|
|
(n) & (1ULL << 30) ? 30 : \
|
|
(n) & (1ULL << 29) ? 29 : \
|
|
(n) & (1ULL << 28) ? 28 : \
|
|
(n) & (1ULL << 27) ? 27 : \
|
|
(n) & (1ULL << 26) ? 26 : \
|
|
(n) & (1ULL << 25) ? 25 : \
|
|
(n) & (1ULL << 24) ? 24 : \
|
|
(n) & (1ULL << 23) ? 23 : \
|
|
(n) & (1ULL << 22) ? 22 : \
|
|
(n) & (1ULL << 21) ? 21 : \
|
|
(n) & (1ULL << 20) ? 20 : \
|
|
(n) & (1ULL << 19) ? 19 : \
|
|
(n) & (1ULL << 18) ? 18 : \
|
|
(n) & (1ULL << 17) ? 17 : \
|
|
(n) & (1ULL << 16) ? 16 : \
|
|
(n) & (1ULL << 15) ? 15 : \
|
|
(n) & (1ULL << 14) ? 14 : \
|
|
(n) & (1ULL << 13) ? 13 : \
|
|
(n) & (1ULL << 12) ? 12 : \
|
|
(n) & (1ULL << 11) ? 11 : \
|
|
(n) & (1ULL << 10) ? 10 : \
|
|
(n) & (1ULL << 9) ? 9 : \
|
|
(n) & (1ULL << 8) ? 8 : \
|
|
(n) & (1ULL << 7) ? 7 : \
|
|
(n) & (1ULL << 6) ? 6 : \
|
|
(n) & (1ULL << 5) ? 5 : \
|
|
(n) & (1ULL << 4) ? 4 : \
|
|
(n) & (1ULL << 3) ? 3 : \
|
|
(n) & (1ULL << 2) ? 2 : \
|
|
1) : \
|
|
-1)
|
|
|
|
/**
|
|
* ilog2 - log base 2 of 32-bit or a 64-bit unsigned value
|
|
* @n: parameter
|
|
*
|
|
* constant-capable log of base 2 calculation
|
|
* - this can be used to initialise global variables from constant data, hence
|
|
* the massive ternary operator construction
|
|
*
|
|
* selects the appropriately-sized optimised version depending on sizeof(n)
|
|
*/
|
|
#define ilog2(n) \
|
|
( \
|
|
__builtin_constant_p(n) ? \
|
|
((n) < 2 ? 0 : \
|
|
63 - __builtin_clzll(n)) : \
|
|
(sizeof(n) <= 4) ? \
|
|
__ilog2_u32(n) : \
|
|
__ilog2_u64(n) \
|
|
)
|
|
|
|
/**
|
|
* roundup_pow_of_two - round the given value up to nearest power of two
|
|
* @n: parameter
|
|
*
|
|
* round the given value up to the nearest power of two
|
|
* - the result is undefined when n == 0
|
|
* - this can be used to initialise global variables from constant data
|
|
*/
|
|
#define roundup_pow_of_two(n) \
|
|
( \
|
|
__builtin_constant_p(n) ? ( \
|
|
((n) == 1) ? 1 : \
|
|
(1UL << (ilog2((n) - 1) + 1)) \
|
|
) : \
|
|
__roundup_pow_of_two(n) \
|
|
)
|
|
|
|
/**
|
|
* rounddown_pow_of_two - round the given value down to nearest power of two
|
|
* @n: parameter
|
|
*
|
|
* round the given value down to the nearest power of two
|
|
* - the result is undefined when n == 0
|
|
* - this can be used to initialise global variables from constant data
|
|
*/
|
|
#define rounddown_pow_of_two(n) \
|
|
( \
|
|
__builtin_constant_p(n) ? ( \
|
|
(1UL << ilog2(n))) : \
|
|
__rounddown_pow_of_two(n) \
|
|
)
|
|
|
|
static inline __attribute_const__
|
|
int __order_base_2(unsigned long n)
|
|
{
|
|
return n > 1 ? ilog2(n - 1) + 1 : 0;
|
|
}
|
|
|
|
/**
|
|
* order_base_2 - calculate the (rounded up) base 2 order of the argument
|
|
* @n: parameter
|
|
*
|
|
* The first few values calculated by this routine:
|
|
* ob2(0) = 0
|
|
* ob2(1) = 0
|
|
* ob2(2) = 1
|
|
* ob2(3) = 2
|
|
* ob2(4) = 2
|
|
* ob2(5) = 3
|
|
* ... and so on.
|
|
*/
|
|
#define order_base_2(n) \
|
|
( \
|
|
__builtin_constant_p(n) ? ( \
|
|
((n) == 0 || (n) == 1) ? 0 : \
|
|
ilog2((n) - 1) + 1) : \
|
|
__order_base_2(n) \
|
|
)
|
|
|
|
static inline __attribute__((const))
|
|
int __bits_per(unsigned long n)
|
|
{
|
|
if (n < 2)
|
|
return 1;
|
|
if (is_power_of_2(n))
|
|
return order_base_2(n) + 1;
|
|
return order_base_2(n);
|
|
}
|
|
|
|
/**
|
|
* bits_per - calculate the number of bits required for the argument
|
|
* @n: parameter
|
|
*
|
|
* This is constant-capable and can be used for compile time
|
|
* initializations, e.g bitfields.
|
|
*
|
|
* The first few values calculated by this routine:
|
|
* bf(0) = 1
|
|
* bf(1) = 1
|
|
* bf(2) = 2
|
|
* bf(3) = 2
|
|
* bf(4) = 3
|
|
* ... and so on.
|
|
*/
|
|
#define bits_per(n) \
|
|
( \
|
|
__builtin_constant_p(n) ? ( \
|
|
((n) == 0 || (n) == 1) \
|
|
? 1 : ilog2(n) + 1 \
|
|
) : \
|
|
__bits_per(n) \
|
|
)
|
|
#endif /* _LINUX_LOG2_H */
|