2005-04-16 15:20:36 -07:00
/*
* arch / x86_64 / lib / csum - partial . c
*
* This file contains network checksum routines that are better done
* in an architecture - specific manner due to speed .
*/
# include <linux/compiler.h>
# include <linux/module.h>
# include <asm/checksum.h>
static inline unsigned short from32to16 ( unsigned a )
{
unsigned short b = a > > 16 ;
asm ( " addw %w2,%w0 \n \t "
" adcw $0,%w0 \n "
: " =r " ( b )
: " 0 " ( b ) , " r " ( a ) ) ;
return b ;
}
/*
* Do a 64 - bit checksum on an arbitrary memory area .
* Returns a 32 bit checksum .
*
* This isn ' t as time critical as it used to be because many NICs
* do hardware checksumming these days .
*
* Things tried and found to not make it faster :
* Manual Prefetching
* Unrolling to an 128 bytes inner loop .
* Using interleaving with more registers to break the carry chains .
*/
2006-12-07 02:14:07 +01:00
static unsigned do_csum ( const unsigned char * buff , unsigned len )
2005-04-16 15:20:36 -07:00
{
unsigned odd , count ;
unsigned long result = 0 ;
if ( unlikely ( len = = 0 ) )
return result ;
odd = 1 & ( unsigned long ) buff ;
if ( unlikely ( odd ) ) {
result = * buff < < 8 ;
len - - ;
buff + + ;
}
count = len > > 1 ; /* nr of 16-bit words.. */
if ( count ) {
if ( 2 & ( unsigned long ) buff ) {
result + = * ( unsigned short * ) buff ;
count - - ;
len - = 2 ;
buff + = 2 ;
}
count > > = 1 ; /* nr of 32-bit words.. */
if ( count ) {
unsigned long zero ;
unsigned count64 ;
if ( 4 & ( unsigned long ) buff ) {
result + = * ( unsigned int * ) buff ;
count - - ;
len - = 4 ;
buff + = 4 ;
}
count > > = 1 ; /* nr of 64-bit words.. */
/* main loop using 64byte blocks */
zero = 0 ;
count64 = count > > 3 ;
while ( count64 ) {
asm ( " addq 0*8(%[src]),%[res] \n \t "
" adcq 1*8(%[src]),%[res] \n \t "
" adcq 2*8(%[src]),%[res] \n \t "
" adcq 3*8(%[src]),%[res] \n \t "
" adcq 4*8(%[src]),%[res] \n \t "
" adcq 5*8(%[src]),%[res] \n \t "
" adcq 6*8(%[src]),%[res] \n \t "
" adcq 7*8(%[src]),%[res] \n \t "
" adcq %[zero],%[res] "
: [ res ] " =r " ( result )
: [ src ] " r " ( buff ) , [ zero ] " r " ( zero ) ,
" [res] " ( result ) ) ;
buff + = 64 ;
count64 - - ;
}
/* last upto 7 8byte blocks */
count % = 8 ;
while ( count ) {
asm ( " addq %1,%0 \n \t "
" adcq %2,%0 \n "
: " =r " ( result )
: " m " ( * ( unsigned long * ) buff ) ,
" r " ( zero ) , " 0 " ( result ) ) ;
- - count ;
buff + = 8 ;
}
result = add32_with_carry ( result > > 32 ,
result & 0xffffffff ) ;
if ( len & 4 ) {
result + = * ( unsigned int * ) buff ;
buff + = 4 ;
}
}
if ( len & 2 ) {
result + = * ( unsigned short * ) buff ;
buff + = 2 ;
}
}
if ( len & 1 )
result + = * buff ;
result = add32_with_carry ( result > > 32 , result & 0xffffffff ) ;
if ( unlikely ( odd ) ) {
result = from32to16 ( result ) ;
result = ( ( result > > 8 ) & 0xff ) | ( ( result & 0xff ) < < 8 ) ;
}
return result ;
}
/*
* computes the checksum of a memory block at buff , length len ,
* and adds in " sum " ( 32 - bit )
*
* returns a 32 - bit number suitable for feeding into itself
* or csum_tcpudp_magic
*
* this function must be called with even lengths , except
* for the last fragment , which may be odd
*
* it ' s best to have buff aligned on a 64 - bit boundary
*/
2006-11-14 21:20:08 -08:00
__wsum csum_partial ( const void * buff , int len , __wsum sum )
2005-04-16 15:20:36 -07:00
{
2006-11-14 21:20:08 -08:00
return ( __force __wsum ) add32_with_carry ( do_csum ( buff , len ) ,
( __force u32 ) sum ) ;
2005-04-16 15:20:36 -07:00
}
/*
* this routine is used for miscellaneous IP - like checksums , mainly
* in icmp . c
*/
2006-11-14 21:20:08 -08:00
__sum16 ip_compute_csum ( const void * buff , int len )
2005-04-16 15:20:36 -07:00
{
return csum_fold ( csum_partial ( buff , len , 0 ) ) ;
}
2006-06-26 13:59:44 +02:00
EXPORT_SYMBOL ( ip_compute_csum ) ;
2005-04-16 15:20:36 -07:00