2009-06-12 22:01:00 +08:00
/ *
* arch/ s c o r e / l i b / c s u m _ p a r t i a l . S
*
* Score P r o c e s s o r v e r s i o n .
*
* Copyright ( C ) 2 0 0 9 S u n p l u s C o r e T e c h n o l o g y C o . , L t d .
* Lennox W u < l e n n o x . w u @sunplusct.com>
* Chen L i q i n < l i q i n . c h e n @sunplusct.com>
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
* it u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e a s p u b l i s h e d b y
* the F r e e S o f t w a r e F o u n d a t i o n ; either version 2 of the License, or
* ( at y o u r o p t i o n ) a n y l a t e r v e r s i o n .
*
* This p r o g r a m i s d i s t r i b u t e d i n t h e h o p e t h a t i t w i l l b e u s e f u l ,
* but W I T H O U T A N Y W A R R A N T Y ; without even the implied warranty of
* MERCHANTABILITY o r F I T N E S S F O R A P A R T I C U L A R P U R P O S E . S e e t h e
* GNU G e n e r a l P u b l i c L i c e n s e f o r m o r e d e t a i l s .
*
* You s h o u l d h a v e r e c e i v e d a c o p y o f t h e G N U G e n e r a l P u b l i c L i c e n s e
* along w i t h t h i s p r o g r a m ; if not, see the file COPYING, or write
* to t h e F r e e S o f t w a r e F o u n d a t i o n , I n c . ,
* 5 1 Franklin S t , F i f t h F l o o r , B o s t o n , M A 0 2 1 1 0 - 1 3 0 1 U S A
* /
# include < l i n u x / l i n k a g e . h >
# define A D D C ( s u m ,r e g ) \
add s u m , s u m , r e g ; \
cmp. c r e g , s u m ; \
bleu 9 f ; \
addi s u m , 0 x1 ; \
9 :
# define C S U M _ B I G C H U N K ( s r c , o f f s e t , s u m ) \
lw r8 , [ s r c , o f f s e t + 0 x00 ] ; \
lw r9 , [ s r c , o f f s e t + 0 x04 ] ; \
lw r10 , [ s r c , o f f s e t + 0 x08 ] ; \
lw r11 , [ s r c , o f f s e t + 0 x0 c ] ; \
ADDC( s u m , r8 ) ; \
ADDC( s u m , r9 ) ; \
ADDC( s u m , r10 ) ; \
ADDC( s u m , r11 ) ; \
lw r8 , [ s r c , o f f s e t + 0 x10 ] ; \
lw r9 , [ s r c , o f f s e t + 0 x14 ] ; \
lw r10 , [ s r c , o f f s e t + 0 x18 ] ; \
lw r11 , [ s r c , o f f s e t + 0 x1 c ] ; \
ADDC( s u m , r8 ) ; \
ADDC( s u m , r9 ) ; \
ADDC( s u m , r10 ) ; \
ADDC( s u m , r11 ) ; \
# define s r c r4
# define d e s t r5
# define s u m r27
.text
/* unknown src alignment and < 8 bytes to go */
small_csumcpy :
mv r5 , r10
ldi r9 , 0 x0
cmpi. c r25 , 0 x1
beq p a s s _ s m a l l _ s e t _ t 7 / * a l r e a d y s e t , j u m p t o p a s s _ s m a l l _ s e t _ t 7 * /
andri. c r25 ,r4 , 0 x1 / * I s s r c 2 b y t e s a l i g n e d ? * /
pass_small_set_t7 :
beq a l i g n e d
cmpi. c r5 , 0 x0
beq f o l d
lbu r9 , [ s r c ]
slli r9 ,r9 , 0 x8 / * L i t t l e e n d i a n * /
ADDC( s u m , r9 )
addi s r c , 0 x1
subi. c r5 , 0 x1
/*len still a full word */
aligned :
andri. c r8 , r5 , 0 x4 / * L e n > = 4 ? * /
beq l e n _ l e s s _ 4 b y t e s
/* Still a full word (4byte) to go,and the src is word aligned.*/
andri. c r8 , s r c , 0 x3 / * s r c i s 4 b y t e s a l i g n e d , s o u s e L W ! ! * /
beq f o u r _ b y t e _ a l i g n e d
lhu r9 , [ s r c ]
addi s r c , 2
ADDC( s u m , r9 )
lhu r9 , [ s r c ]
addi s r c , 2
ADDC( s u m , r9 )
b l e n _ l e s s _ 4 b y t e s
four_byte_aligned : /* Len >=4 and four byte aligned */
lw r9 , [ s r c ]
addi s r c , 4
ADDC( s u m , r9 )
len_less_4bytes : /* 2 byte aligned aligned and length<4B */
andri. c r8 , r5 , 0 x2
beq l e n _ l e s s _ 2 b y t e s
lhu r9 , [ s r c ]
addi s r c , 0 x2 / * s r c + =2 * /
ADDC( s u m , r9 )
len_less_2bytes : /* len = 1 */
andri. c r8 , r5 , 0 x1
beq f o l d / * l e s s t h a n 2 a n d n o t e q u a l 1 - - > l e n =0 - > f o l d * /
lbu r9 , [ s r c ]
fold_ADDC :
ADDC( s u m , r9 )
fold :
/* fold checksum */
slli r26 , s u m , 1 6
add s u m , s u m , r26
cmp. c r26 , s u m
srli s u m , s u m , 1 6
bleu 1 f / * i f r26 < =sum * /
addi s u m , 0 x1 / * r26 > s u m * /
1 :
/* odd buffer alignment? r25 was set in csum_partial */
cmpi. c r25 , 0 x0
beq 1 f
slli r26 , s u m , 8
srli s u m , s u m , 8
or s u m , s u m , r26
andi s u m , 0 x f f f f
1 :
.set optimize
/* Add the passed partial csum. */
ADDC( s u m , r6 )
mv r4 , s u m
br r3
.set volatile
.align 5
ENTRY( c s u m _ p a r t i a l )
ldi s u m , 0
ldi r25 , 0
mv r10 , r5
cmpi. c r5 , 0 x8
2013-12-05 15:38:19 -03:00
blt s m a l l _ c s u m c p y / * < 8 ( s i g n e d ) b y t e s t o c o p y * /
2009-06-12 22:01:00 +08:00
cmpi. c r5 , 0 x0
beq o u t
andri. c r25 , s r c , 0 x1 / * o d d b u f f e r ? * /
beq w o r d _ a l i g n
hword_align : /* 1 byte */
lbu r8 , [ s r c ]
subi r5 , 0 x1
slli r8 , r8 , 8
ADDC( s u m , r8 )
addi s r c , 0 x1
word_align : /* 2 bytes */
andri. c r8 , s r c , 0 x2 / * 4 b y t e s ( d w o r d ) _ a l i g n e d ? * /
beq d w o r d _ a l i g n / * n o t , m a y b e d w o r d _ a l i g n * /
lhu r8 , [ s r c ]
subi r5 , 0 x2
ADDC( s u m , r8 )
addi s r c , 0 x2
dword_align : /* 4bytes */
mv r26 , r5 / * m a y b e u s e l e s s w h e n l e n > =56 * /
ldi r8 , 5 6
cmp. c r8 , r5
bgtu d o _ e n d _ w o r d s / * i f a1 ( l e n ) < t 0 ( 5 6 ) ,u n s i g n e d * /
andri. c r26 , s r c , 0 x4
beq q w o r d _ a l i g n
lw r8 , [ s r c ]
subi r5 , 0 x4
ADDC( s u m , r8 )
addi s r c , 0 x4
qword_align : /* 8 bytes */
andri. c r26 , s r c , 0 x8
beq o w o r d _ a l i g n
lw r8 , [ s r c , 0 x0 ]
lw r9 , [ s r c , 0 x4 ]
subi r5 , 0 x8 / * l e n - =0x8 * /
ADDC( s u m , r8 )
ADDC( s u m , r9 )
addi s r c , 0 x8
oword_align : /* 16bytes */
andri. c r26 , s r c , 0 x10
beq b e g i n _ m o v e m e n t
lw r10 , [ s r c , 0 x08 ]
lw r11 , [ s r c , 0 x0 c ]
lw r8 , [ s r c , 0 x00 ]
lw r9 , [ s r c , 0 x04 ]
ADDC( s u m , r10 )
ADDC( s u m , r11 )
ADDC( s u m , r8 )
ADDC( s u m , r9 )
subi r5 , 0 x10
addi s r c , 0 x10
begin_movement :
srli. c r26 , r5 , 0 x7 / * l e n > =128 ? * /
beq 1 f / * l e n < 1 2 8 * /
/* r26 is the result that computed in oword_align */
move_128bytes :
CSUM_ B I G C H U N K ( s r c , 0 x00 , s u m )
CSUM_ B I G C H U N K ( s r c , 0 x20 , s u m )
CSUM_ B I G C H U N K ( s r c , 0 x40 , s u m )
CSUM_ B I G C H U N K ( s r c , 0 x60 , s u m )
subi. c r26 , 0 x01 / * r26 e q u a l s l e n / 1 2 8 * /
addi s r c , 0 x80
bne m o v e _ 1 2 8 b y t e s
1 : /* len<128,we process 64byte here */
andri. c r10 , r5 , 0 x40
beq 1 f
move_64bytes :
CSUM_ B I G C H U N K ( s r c , 0 x00 , s u m )
CSUM_ B I G C H U N K ( s r c , 0 x20 , s u m )
addi s r c , 0 x40
1 : /* len<64 */
andri r26 , r5 , 0 x1 c / * 0 x1 c =28 * /
andri. c r10 , r5 , 0 x20
beq d o _ e n d _ w o r d s / * d e c i d e d b y a n d r i * /
move_32bytes :
CSUM_ B I G C H U N K ( s r c , 0 x00 , s u m )
andri r26 , r5 , 0 x1 c
addri s r c , s r c , 0 x20
do_end_words : /* len<32 */
/* r26 was set already in dword_align */
cmpi. c r26 , 0 x0
beq m a y b e _ e n d _ c r u f t / * l e n < 2 8 o r l e n < 5 6 * /
srli r26 , r26 , 0 x2
end_words :
lw r8 , [ s r c ]
subi. c r26 , 0 x1 / * u n i t i s 4 b y t e * /
ADDC( s u m , r8 )
addi s r c , 0 x4
cmpi. c r26 , 0 x0
bne e n d _ w o r d s / * r26 ! =0 * /
maybe_end_cruft : /* len<4 */
andri r10 , r5 , 0 x3
small_memcpy :
mv r5 , r10
j s m a l l _ c s u m c p y
out :
mv r4 , s u m
br r3
END( c s u m _ p a r t i a l )