powerpc: inline ip_fast_csum()
In several architectures, ip_fast_csum() is inlined There are functions like ip_send_check() which do nothing much more than calling ip_fast_csum(). Inlining ip_fast_csum() allows the compiler to optimise better Suggested-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> [scottwood: whitespace and cast fixes] Signed-off-by: Scott Wood <oss@buserror.net>
This commit is contained in:
parent
03bc8b0fc8
commit
37e08cad8f
@ -9,16 +9,9 @@
|
|||||||
* 2 of the License, or (at your option) any later version.
|
* 2 of the License, or (at your option) any later version.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
* This is a version of ip_compute_csum() optimized for IP headers,
|
|
||||||
* which always checksum on 4 octet boundaries. ihl is the number
|
|
||||||
* of 32-bit words and is always >= 5.
|
|
||||||
*/
|
|
||||||
#ifdef CONFIG_GENERIC_CSUM
|
#ifdef CONFIG_GENERIC_CSUM
|
||||||
#include <asm-generic/checksum.h>
|
#include <asm-generic/checksum.h>
|
||||||
#else
|
#else
|
||||||
extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* computes the checksum of a memory block at buff, length len,
|
* computes the checksum of a memory block at buff, length len,
|
||||||
* and adds in "sum" (32-bit)
|
* and adds in "sum" (32-bit)
|
||||||
@ -137,6 +130,44 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is a version of ip_compute_csum() optimized for IP headers,
|
||||||
|
* which always checksum on 4 octet boundaries. ihl is the number
|
||||||
|
* of 32-bit words and is always >= 5.
|
||||||
|
*/
|
||||||
|
static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl)
|
||||||
|
{
|
||||||
|
const u32 *ptr = (const u32 *)iph + 1;
|
||||||
|
#ifdef __powerpc64__
|
||||||
|
unsigned int i;
|
||||||
|
u64 s = *(const u32 *)iph;
|
||||||
|
|
||||||
|
for (i = 0; i < ihl - 1; i++, ptr++)
|
||||||
|
s += *ptr;
|
||||||
|
s += (s >> 32);
|
||||||
|
return (__force __wsum)s;
|
||||||
|
#else
|
||||||
|
__wsum sum, tmp;
|
||||||
|
|
||||||
|
asm("mtctr %3;"
|
||||||
|
"addc %0,%4,%5;"
|
||||||
|
"1: lwzu %1, 4(%2);"
|
||||||
|
"adde %0,%0,%1;"
|
||||||
|
"bdnz 1b;"
|
||||||
|
"addze %0,%0;"
|
||||||
|
: "=r" (sum), "=r" (tmp), "+b" (ptr)
|
||||||
|
: "r" (ihl - 2), "r" (*(const u32 *)iph), "r" (*ptr)
|
||||||
|
: "ctr", "xer", "memory");
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
|
||||||
|
{
|
||||||
|
return csum_fold(ip_fast_csum_nofold(iph, ihl));
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
#endif
|
#endif
|
||||||
|
@ -19,27 +19,6 @@
|
|||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
/*
|
|
||||||
* ip_fast_csum(buf, len) -- Optimized for IP header
|
|
||||||
* len is in words and is always >= 5.
|
|
||||||
*/
|
|
||||||
_GLOBAL(ip_fast_csum)
|
|
||||||
lwz r0,0(r3)
|
|
||||||
lwzu r5,4(r3)
|
|
||||||
addic. r4,r4,-2
|
|
||||||
addc r0,r0,r5
|
|
||||||
mtctr r4
|
|
||||||
blelr-
|
|
||||||
1: lwzu r4,4(r3)
|
|
||||||
adde r0,r0,r4
|
|
||||||
bdnz 1b
|
|
||||||
addze r0,r0 /* add in final carry */
|
|
||||||
rlwinm r3,r0,16,0,31 /* fold two halves together */
|
|
||||||
add r3,r0,r3
|
|
||||||
not r3,r3
|
|
||||||
srwi r3,r3,16
|
|
||||||
blr
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* computes the checksum of a memory block at buff, length len,
|
* computes the checksum of a memory block at buff, length len,
|
||||||
* and adds in "sum" (32-bit)
|
* and adds in "sum" (32-bit)
|
||||||
|
@ -17,33 +17,6 @@
|
|||||||
#include <asm/errno.h>
|
#include <asm/errno.h>
|
||||||
#include <asm/ppc_asm.h>
|
#include <asm/ppc_asm.h>
|
||||||
|
|
||||||
/*
|
|
||||||
* ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
|
|
||||||
* len is in words and is always >= 5.
|
|
||||||
*
|
|
||||||
* In practice len == 5, but this is not guaranteed. So this code does not
|
|
||||||
* attempt to use doubleword instructions.
|
|
||||||
*/
|
|
||||||
_GLOBAL(ip_fast_csum)
|
|
||||||
lwz r0,0(r3)
|
|
||||||
lwzu r5,4(r3)
|
|
||||||
addic. r4,r4,-2
|
|
||||||
addc r0,r0,r5
|
|
||||||
mtctr r4
|
|
||||||
blelr-
|
|
||||||
1: lwzu r4,4(r3)
|
|
||||||
adde r0,r0,r4
|
|
||||||
bdnz 1b
|
|
||||||
addze r0,r0 /* add in final carry */
|
|
||||||
rldicl r4,r0,32,0 /* fold two 32-bit halves together */
|
|
||||||
add r0,r0,r4
|
|
||||||
srdi r0,r0,32
|
|
||||||
rlwinm r3,r0,16,0,31 /* fold two halves together */
|
|
||||||
add r3,r0,r3
|
|
||||||
not r3,r3
|
|
||||||
srwi r3,r3,16
|
|
||||||
blr
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Computes the checksum of a memory block at buff, length len,
|
* Computes the checksum of a memory block at buff, length len,
|
||||||
* and adds in "sum" (32-bit).
|
* and adds in "sum" (32-bit).
|
||||||
|
@ -19,7 +19,6 @@ EXPORT_SYMBOL(strncmp);
|
|||||||
#ifndef CONFIG_GENERIC_CSUM
|
#ifndef CONFIG_GENERIC_CSUM
|
||||||
EXPORT_SYMBOL(csum_partial);
|
EXPORT_SYMBOL(csum_partial);
|
||||||
EXPORT_SYMBOL(csum_partial_copy_generic);
|
EXPORT_SYMBOL(csum_partial_copy_generic);
|
||||||
EXPORT_SYMBOL(ip_fast_csum);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
EXPORT_SYMBOL(__copy_tofrom_user);
|
EXPORT_SYMBOL(__copy_tofrom_user);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user