ppc-linux-objdump -d vmlinux | grep -e "<csum_partial>" -e "<__csum_partial>" With gcc9 I get: c0017ef8 <__csum_partial>: c00182fc: 4b ff fb fd bl c0017ef8 <__csum_partial> c0018478: 4b ff fa 80 b c0017ef8 <__csum_partial> c03e8458: 4b c2 fa a0 b c0017ef8 <__csum_partial> c03e8518: 4b c2 f9 e1 bl c0017ef8 <__csum_partial> c03ef410: 4b c2 8a e9 bl c0017ef8 <__csum_partial> c03f0b24: 4b c2 73 d5 bl c0017ef8 <__csum_partial> c04279a4: 4b bf 05 55 bl c0017ef8 <__csum_partial> c0429820: 4b be e6 d9 bl c0017ef8 <__csum_partial> c0429944: 4b be e5 b5 bl c0017ef8 <__csum_partial> c042b478: 4b be ca 81 bl c0017ef8 <__csum_partial> c042b554: 4b be c9 a5 bl c0017ef8 <__csum_partial> c045f15c: 4b bb 8d 9d bl c0017ef8 <__csum_partial> c0492190: 4b b8 5d 69 bl c0017ef8 <__csum_partial> c0492310: 4b b8 5b e9 bl c0017ef8 <__csum_partial> c0495594: 4b b8 29 65 bl c0017ef8 <__csum_partial> c049c420: 4b b7 ba d9 bl c0017ef8 <__csum_partial> c049c870: 4b b7 b6 89 bl c0017ef8 <__csum_partial> c049c930: 4b b7 b5 c9 bl c0017ef8 <__csum_partial> c04a9ca0: 4b b6 e2 59 bl c0017ef8 <__csum_partial> c04bdde4: 4b b5 a1 15 bl c0017ef8 <__csum_partial> c04be480: 4b b5 9a 79 bl c0017ef8 <__csum_partial> c04be710: 4b b5 97 e9 bl c0017ef8 <__csum_partial> c04c969c: 4b b4 e8 5d bl c0017ef8 <__csum_partial> c04ca2fc: 4b b4 db fd bl c0017ef8 <__csum_partial> c04cf5bc: 4b b4 89 3d bl c0017ef8 <__csum_partial> c04d0440: 4b b4 7a b9 bl c0017ef8 <__csum_partial> With gcc10 I get: c0018d08 <__csum_partial>: c0019020 <csum_partial>: c0019020: 4b ff fc e8 b c0018d08 <__csum_partial> c001914c: 4b ff fe d4 b c0019020 <csum_partial> c0019250: 4b ff fd d1 bl c0019020 <csum_partial> c03e404c <csum_partial>: c03e404c: 4b c3 4c bc b c0018d08 <__csum_partial> c03e4050: 4b ff ff fc b c03e404c <csum_partial> c03e40fc: 4b ff ff 51 bl c03e404c <csum_partial> c03e6680: 4b ff d9 cd bl c03e404c <csum_partial> c03e68c4: 4b ff d7 89 bl c03e404c <csum_partial> c03e7934: 4b ff c7 19 bl c03e404c <csum_partial> c03e7bf8: 4b ff c4 55 bl c03e404c <csum_partial> c03eb148: 4b ff 8f 05 bl c03e404c <csum_partial> c03ecf68: 4b c2 bd a1 bl c0018d08 <__csum_partial> c04275b8 <csum_partial>: c04275b8: 4b bf 17 50 b c0018d08 <__csum_partial> c0427884: 4b ff fd 35 bl c04275b8 <csum_partial> c0427b18: 4b ff fa a1 bl c04275b8 <csum_partial> c0427bd8: 4b ff f9 e1 bl c04275b8 <csum_partial> c0427cd4: 4b ff f8 e5 bl c04275b8 <csum_partial> c0427e34: 4b ff f7 85 bl c04275b8 <csum_partial> c045a1c0: 4b bb eb 49 bl c0018d08 <__csum_partial> c0489464 <csum_partial>: c0489464: 4b b8 f8 a4 b c0018d08 <__csum_partial> c04896b0: 4b ff fd b5 bl c0489464 <csum_partial> c048982c: 4b ff fc 39 bl c0489464 <csum_partial> c0490158: 4b b8 8b b1 bl c0018d08 <__csum_partial> c0492f0c <csum_partial>: c0492f0c: 4b b8 5d fc b c0018d08 <__csum_partial> c049326c: 4b ff fc a1 bl c0492f0c <csum_partial> c049333c: 4b ff fb d1 bl c0492f0c <csum_partial> c0493b18: 4b ff f3 f5 bl c0492f0c <csum_partial> c0493f50: 4b ff ef bd bl c0492f0c <csum_partial> c0493ffc: 4b ff ef 11 bl c0492f0c <csum_partial> c04a0f78: 4b b7 7d 91 bl c0018d08 <__csum_partial> c04b3e3c: 4b b6 4e cd bl c0018d08 <__csum_partial> c04b40d0 <csum_partial>: c04b40d0: 4b b6 4c 38 b c0018d08 <__csum_partial> c04b4448: 4b ff fc 89 bl c04b40d0 <csum_partial> c04b46f4: 4b ff f9 dd bl c04b40d0 <csum_partial> c04bf448: 4b b5 98 c0 b c0018d08 <__csum_partial> c04c5264: 4b b5 3a a5 bl c0018d08 <__csum_partial> c04c61e4: 4b b5 2b 25 bl c0018d08 <__csum_partial> gcc10 defines multiple versions of csum_partial() which are just an unconditionnal branch to __csum_partial(). To enforce inlining of that branch to __csum_partial(), mark csum_partial() as __always_inline. With this patch with gcc10: c0018d08 <__csum_partial>: c0019148: 4b ff fb c0 b c0018d08 <__csum_partial> c001924c: 4b ff fa bd bl c0018d08 <__csum_partial> c03e40ec: 4b c3 4c 1d bl c0018d08 <__csum_partial> c03e4120: 4b c3 4b e8 b c0018d08 <__csum_partial> c03eb004: 4b c2 dd 05 bl c0018d08 <__csum_partial> c03ecef4: 4b c2 be 15 bl c0018d08 <__csum_partial> c0427558: 4b bf 17 b1 bl c0018d08 <__csum_partial> c04286e4: 4b bf 06 25 bl c0018d08 <__csum_partial> c0428cd8: 4b bf 00 31 bl c0018d08 <__csum_partial> c0428d84: 4b be ff 85 bl c0018d08 <__csum_partial> c045a17c: 4b bb eb 8d bl c0018d08 <__csum_partial> c0489450: 4b b8 f8 b9 bl c0018d08 <__csum_partial> c0491860: 4b b8 74 a9 bl c0018d08 <__csum_partial> c0492eec: 4b b8 5e 1d bl c0018d08 <__csum_partial> c04a0eac: 4b b7 7e 5d bl c0018d08 <__csum_partial> c04b3e34: 4b b6 4e d5 bl c0018d08 <__csum_partial> c04b426c: 4b b6 4a 9d bl c0018d08 <__csum_partial> c04b463c: 4b b6 46 cd bl c0018d08 <__csum_partial> c04c004c: 4b b5 8c bd bl c0018d08 <__csum_partial> c04c0368: 4b b5 89 a1 bl c0018d08 <__csum_partial> c04c5254: 4b b5 3a b5 bl c0018d08 <__csum_partial> c04c60d4: 4b b5 2c 35 bl c0018d08 <__csum_partial> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/a1d31f84ddb0926813b17fcd5cc7f3fa7b4deac2.1602759123.git.christophe.leroy@csgroup.eu
215 lines
5.6 KiB
C
215 lines
5.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
#ifndef _ASM_POWERPC_CHECKSUM_H
|
|
#define _ASM_POWERPC_CHECKSUM_H
|
|
#ifdef __KERNEL__
|
|
|
|
/*
|
|
*/
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/in6.h>
|
|
/*
|
|
* Computes the checksum of a memory block at src, length len,
|
|
* and adds in "sum" (32-bit), while copying the block to dst.
|
|
* If an access exception occurs on src or dst, it stores -EFAULT
|
|
* to *src_err or *dst_err respectively (if that pointer is not
|
|
* NULL), and, for an error on src, zeroes the rest of dst.
|
|
*
|
|
* Like csum_partial, this must be called with even lengths,
|
|
* except for the last fragment.
|
|
*/
|
|
extern __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
|
|
|
|
#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
|
|
extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
|
|
int len);
|
|
#define HAVE_CSUM_COPY_USER
|
|
extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
|
|
int len);
|
|
|
|
#define _HAVE_ARCH_CSUM_AND_COPY
|
|
#define csum_partial_copy_nocheck(src, dst, len) \
|
|
csum_partial_copy_generic((src), (dst), (len))
|
|
|
|
|
|
/*
|
|
* turns a 32-bit partial checksum (e.g. from csum_partial) into a
|
|
* 1's complement 16-bit checksum.
|
|
*/
|
|
static inline __sum16 csum_fold(__wsum sum)
|
|
{
|
|
unsigned int tmp;
|
|
|
|
/* swap the two 16-bit halves of sum */
|
|
__asm__("rlwinm %0,%1,16,0,31" : "=r" (tmp) : "r" (sum));
|
|
/* if there is a carry from adding the two 16-bit halves,
|
|
it will carry from the lower half into the upper half,
|
|
giving us the correct sum in the upper half. */
|
|
return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
|
|
}
|
|
|
|
static inline u32 from64to32(u64 x)
|
|
{
|
|
return (x + ror64(x, 32)) >> 32;
|
|
}
|
|
|
|
static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
|
|
__u8 proto, __wsum sum)
|
|
{
|
|
#ifdef __powerpc64__
|
|
u64 s = (__force u32)sum;
|
|
|
|
s += (__force u32)saddr;
|
|
s += (__force u32)daddr;
|
|
#ifdef __BIG_ENDIAN__
|
|
s += proto + len;
|
|
#else
|
|
s += (proto + len) << 8;
|
|
#endif
|
|
return (__force __wsum) from64to32(s);
|
|
#else
|
|
__asm__("\n\
|
|
addc %0,%0,%1 \n\
|
|
adde %0,%0,%2 \n\
|
|
adde %0,%0,%3 \n\
|
|
addze %0,%0 \n\
|
|
"
|
|
: "=r" (sum)
|
|
: "r" (daddr), "r"(saddr), "r"(proto + len), "0"(sum));
|
|
return sum;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* computes the checksum of the TCP/UDP pseudo-header
|
|
* returns a 16-bit checksum, already complemented
|
|
*/
|
|
static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
|
|
__u8 proto, __wsum sum)
|
|
{
|
|
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
|
|
}
|
|
|
|
#define HAVE_ARCH_CSUM_ADD
|
|
static inline __wsum csum_add(__wsum csum, __wsum addend)
|
|
{
|
|
#ifdef __powerpc64__
|
|
u64 res = (__force u64)csum;
|
|
#endif
|
|
if (__builtin_constant_p(csum) && csum == 0)
|
|
return addend;
|
|
if (__builtin_constant_p(addend) && addend == 0)
|
|
return csum;
|
|
|
|
#ifdef __powerpc64__
|
|
res += (__force u64)addend;
|
|
return (__force __wsum)((u32)res + (res >> 32));
|
|
#else
|
|
asm("addc %0,%0,%1;"
|
|
"addze %0,%0;"
|
|
: "+r" (csum) : "r" (addend) : "xer");
|
|
return csum;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* This is a version of ip_compute_csum() optimized for IP headers,
|
|
* which always checksum on 4 octet boundaries. ihl is the number
|
|
* of 32-bit words and is always >= 5.
|
|
*/
|
|
static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl)
|
|
{
|
|
const u32 *ptr = (const u32 *)iph + 1;
|
|
#ifdef __powerpc64__
|
|
unsigned int i;
|
|
u64 s = *(const u32 *)iph;
|
|
|
|
for (i = 0; i < ihl - 1; i++, ptr++)
|
|
s += *ptr;
|
|
return (__force __wsum)from64to32(s);
|
|
#else
|
|
__wsum sum, tmp;
|
|
|
|
asm("mtctr %3;"
|
|
"addc %0,%4,%5;"
|
|
"1: lwzu %1, 4(%2);"
|
|
"adde %0,%0,%1;"
|
|
"bdnz 1b;"
|
|
"addze %0,%0;"
|
|
: "=r" (sum), "=r" (tmp), "+b" (ptr)
|
|
: "r" (ihl - 2), "r" (*(const u32 *)iph), "r" (*ptr)
|
|
: "ctr", "xer", "memory");
|
|
|
|
return sum;
|
|
#endif
|
|
}
|
|
|
|
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
|
|
{
|
|
return csum_fold(ip_fast_csum_nofold(iph, ihl));
|
|
}
|
|
|
|
/*
|
|
* computes the checksum of a memory block at buff, length len,
|
|
* and adds in "sum" (32-bit)
|
|
*
|
|
* returns a 32-bit number suitable for feeding into itself
|
|
* or csum_tcpudp_magic
|
|
*
|
|
* this function must be called with even lengths, except
|
|
* for the last fragment, which may be odd
|
|
*
|
|
* it's best to have buff aligned on a 32-bit boundary
|
|
*/
|
|
__wsum __csum_partial(const void *buff, int len, __wsum sum);
|
|
|
|
static __always_inline __wsum csum_partial(const void *buff, int len, __wsum sum)
|
|
{
|
|
if (__builtin_constant_p(len) && len <= 16 && (len & 1) == 0) {
|
|
if (len == 2)
|
|
sum = csum_add(sum, (__force __wsum)*(const u16 *)buff);
|
|
if (len >= 4)
|
|
sum = csum_add(sum, (__force __wsum)*(const u32 *)buff);
|
|
if (len == 6)
|
|
sum = csum_add(sum, (__force __wsum)
|
|
*(const u16 *)(buff + 4));
|
|
if (len >= 8)
|
|
sum = csum_add(sum, (__force __wsum)
|
|
*(const u32 *)(buff + 4));
|
|
if (len == 10)
|
|
sum = csum_add(sum, (__force __wsum)
|
|
*(const u16 *)(buff + 8));
|
|
if (len >= 12)
|
|
sum = csum_add(sum, (__force __wsum)
|
|
*(const u32 *)(buff + 8));
|
|
if (len == 14)
|
|
sum = csum_add(sum, (__force __wsum)
|
|
*(const u16 *)(buff + 12));
|
|
if (len >= 16)
|
|
sum = csum_add(sum, (__force __wsum)
|
|
*(const u32 *)(buff + 12));
|
|
} else if (__builtin_constant_p(len) && (len & 3) == 0) {
|
|
sum = csum_add(sum, ip_fast_csum_nofold(buff, len >> 2));
|
|
} else {
|
|
sum = __csum_partial(buff, len, sum);
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
/*
|
|
* this routine is used for miscellaneous IP-like checksums, mainly
|
|
* in icmp.c
|
|
*/
|
|
static inline __sum16 ip_compute_csum(const void *buff, int len)
|
|
{
|
|
return csum_fold(csum_partial(buff, len, 0));
|
|
}
|
|
|
|
#define _HAVE_ARCH_IPV6_CSUM
|
|
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
|
|
const struct in6_addr *daddr,
|
|
__u32 len, __u8 proto, __wsum sum);
|
|
|
|
#endif /* __KERNEL__ */
|
|
#endif
|