__div64_const32(): improve the generic C version
Let's rework that code to avoid large immediate values and convert some 64-bit variables to 32-bit ones when possible. This allows gcc to produce smaller and better code. This even produces optimal code on RISC-V. Signed-off-by: Nicolas Pitre <nico@fluxnic.net> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
This commit is contained in:
parent
3940ba8eea
commit
602828c1aa
@ -178,7 +178,8 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
|
|||||||
uint32_t m_hi = m >> 32;
|
uint32_t m_hi = m >> 32;
|
||||||
uint32_t n_lo = n;
|
uint32_t n_lo = n;
|
||||||
uint32_t n_hi = n >> 32;
|
uint32_t n_hi = n >> 32;
|
||||||
uint64_t res, tmp;
|
uint64_t res;
|
||||||
|
uint32_t res_lo, res_hi, tmp;
|
||||||
|
|
||||||
if (!bias) {
|
if (!bias) {
|
||||||
res = ((uint64_t)m_lo * n_lo) >> 32;
|
res = ((uint64_t)m_lo * n_lo) >> 32;
|
||||||
@ -187,8 +188,9 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
|
|||||||
res = (m + (uint64_t)m_lo * n_lo) >> 32;
|
res = (m + (uint64_t)m_lo * n_lo) >> 32;
|
||||||
} else {
|
} else {
|
||||||
res = m + (uint64_t)m_lo * n_lo;
|
res = m + (uint64_t)m_lo * n_lo;
|
||||||
tmp = (res < m) ? (1ULL << 32) : 0;
|
res_lo = res >> 32;
|
||||||
res = (res >> 32) + tmp;
|
res_hi = (res_lo < m_hi);
|
||||||
|
res = res_lo | ((uint64_t)res_hi << 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
|
if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
|
||||||
@ -197,10 +199,12 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
|
|||||||
res += (uint64_t)m_hi * n_lo;
|
res += (uint64_t)m_hi * n_lo;
|
||||||
res >>= 32;
|
res >>= 32;
|
||||||
} else {
|
} else {
|
||||||
tmp = res += (uint64_t)m_lo * n_hi;
|
res += (uint64_t)m_lo * n_hi;
|
||||||
|
tmp = res >> 32;
|
||||||
res += (uint64_t)m_hi * n_lo;
|
res += (uint64_t)m_hi * n_lo;
|
||||||
tmp = (res < tmp) ? (1ULL << 32) : 0;
|
res_lo = res >> 32;
|
||||||
res = (res >> 32) + tmp;
|
res_hi = (res_lo < tmp);
|
||||||
|
res = res_lo | ((uint64_t)res_hi << 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
res += (uint64_t)m_hi * n_hi;
|
res += (uint64_t)m_hi * n_hi;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user