2005-09-26 16:04:21 +10:00
/ *
* This f i l e c o n t a i n s a s s e m b l y - l a n g u a g e i m p l e m e n t a t i o n s
* of I P - s t y l e 1 ' s c o m p l e m e n t c h e c k s u m r o u t i n e s .
*
* Copyright ( C ) 1 9 9 5 - 1 9 9 6 G a r y T h o m a s ( g d t @linuxppc.org)
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or
* modify i t u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e
* as p u b l i s h e d b y t h e F r e e S o f t w a r e F o u n d a t i o n ; either version
* 2 of t h e L i c e n s e , o r ( a t y o u r o p t i o n ) a n y l a t e r v e r s i o n .
*
* Severely h a c k e d a b o u t b y P a u l M a c k e r r a s ( p a u l u s @cs.anu.edu.au).
* /
# include < l i n u x / s y s . h >
# include < a s m / p r o c e s s o r . h >
# include < a s m / e r r n o . h >
# include < a s m / p p c _ a s m . h >
/ *
* Computes t h e c h e c k s u m o f a m e m o r y b l o c k a t b u f f , l e n g t h l e n ,
* and a d d s i n " s u m " ( 3 2 - b i t ) .
*
* csum_ p a r t i a l ( r3 =buff , r4 =len , r5 =sum )
* /
_ GLOBAL( c s u m _ p a r t i a l )
2010-08-02 20:08:34 +00:00
addic r0 ,r5 ,0 / * c l e a r c a r r y * /
srdi. r6 ,r4 ,3 / * l e s s t h a n 8 b y t e s ? * /
beq . L c s u m _ t a i l _ w o r d
/ *
* If o n l y h a l f w o r d a l i g n e d , a l i g n t o a d o u b l e w o r d . S i n c e o d d
* aligned a d d r e s s e s s h o u l d b e r a r e a n d t h e y w o u l d r e q u i r e m o r e
* work t o c a l c u l a t e t h e c o r r e c t c h e c k s u m , w e i g n o r e t h a t c a s e
* and t a k e t h e p o t e n t i a l s l o w d o w n o f u n a l i g n e d l o a d s .
* /
rldicl. r6 ,r3 ,6 4 - 1 ,6 4 - 2 / * r6 = ( r3 & 0 x3 ) > > 1 * /
beq . L c s u m _ a l i g n e d
li r7 ,4
sub r6 ,r7 ,r6
mtctr r6
1 :
lhz r6 ,0 ( r3 ) / * a l i g n t o d o u b l e w o r d * /
subi r4 ,r4 ,2
addi r3 ,r3 ,2
adde r0 ,r0 ,r6
bdnz 1 b
.Lcsum_aligned :
/ *
* We u n r o l l t h e l o o p s u c h t h a t e a c h i t e r a t i o n i s 6 4 b y t e s w i t h a n
* entry a n d e x i t l i m b o f 6 4 b y t e s , m e a n i n g a m i n i m u m s i z e o f
* 1 2 8 bytes.
* /
srdi. r6 ,r4 ,7
beq . L c s u m _ t a i l _ d o u b l e w o r d s / * l e n < 1 2 8 * /
srdi r6 ,r4 ,6
subi r6 ,r6 ,1
mtctr r6
stdu r1 ,- S T A C K F R A M E S I Z E ( r1 )
2012-06-25 13:33:10 +00:00
std r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
std r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
std r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
2010-08-02 20:08:34 +00:00
ld r6 ,0 ( r3 )
ld r9 ,8 ( r3 )
ld r10 ,1 6 ( r3 )
ld r11 ,2 4 ( r3 )
/ *
* On P O W E R 6 a n d P O W E R 7 b a c k t o b a c k a d d e s t a k e 2 c y c l e s b e c a u s e o f
* the X E R d e p e n d e n c y . T h i s m e a n s t h e f a s t e s t t h i s l o o p c a n g o i s
* 1 6 cycles p e r i t e r a t i o n . T h e s c h e d u l i n g o f t h e l o o p b e l o w h a s
* been s h o w n t o h i t t h i s o n b o t h P O W E R 6 a n d P O W E R 7 .
* /
.align 5
2 :
adde r0 ,r0 ,r6
ld r12 ,3 2 ( r3 )
ld r14 ,4 0 ( r3 )
adde r0 ,r0 ,r9
ld r15 ,4 8 ( r3 )
ld r16 ,5 6 ( r3 )
addi r3 ,r3 ,6 4
adde r0 ,r0 ,r10
adde r0 ,r0 ,r11
adde r0 ,r0 ,r12
adde r0 ,r0 ,r14
adde r0 ,r0 ,r15
ld r6 ,0 ( r3 )
ld r9 ,8 ( r3 )
adde r0 ,r0 ,r16
ld r10 ,1 6 ( r3 )
ld r11 ,2 4 ( r3 )
bdnz 2 b
adde r0 ,r0 ,r6
ld r12 ,3 2 ( r3 )
ld r14 ,4 0 ( r3 )
adde r0 ,r0 ,r9
ld r15 ,4 8 ( r3 )
ld r16 ,5 6 ( r3 )
addi r3 ,r3 ,6 4
adde r0 ,r0 ,r10
adde r0 ,r0 ,r11
adde r0 ,r0 ,r12
adde r0 ,r0 ,r14
adde r0 ,r0 ,r15
adde r0 ,r0 ,r16
2012-06-25 13:33:10 +00:00
ld r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
ld r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
ld r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
2010-08-02 20:08:34 +00:00
addi r1 ,r1 ,S T A C K F R A M E S I Z E
andi. r4 ,r4 ,6 3
.Lcsum_tail_doublewords : /* Up to 127 bytes to go */
srdi. r6 ,r4 ,3
beq . L c s u m _ t a i l _ w o r d
mtctr r6
3 :
ld r6 ,0 ( r3 )
addi r3 ,r3 ,8
adde r0 ,r0 ,r6
bdnz 3 b
andi. r4 ,r4 ,7
.Lcsum_tail_word : /* Up to 7 bytes to go */
srdi. r6 ,r4 ,2
beq . L c s u m _ t a i l _ h a l f w o r d
lwz r6 ,0 ( r3 )
2005-09-26 16:04:21 +10:00
addi r3 ,r3 ,4
2010-08-02 20:08:34 +00:00
adde r0 ,r0 ,r6
2005-09-26 16:04:21 +10:00
subi r4 ,r4 ,4
2010-08-02 20:08:34 +00:00
.Lcsum_tail_halfword : /* Up to 3 bytes to go */
srdi. r6 ,r4 ,1
beq . L c s u m _ t a i l _ b y t e
lhz r6 ,0 ( r3 )
addi r3 ,r3 ,2
adde r0 ,r0 ,r6
subi r4 ,r4 ,2
.Lcsum_tail_byte : /* Up to 1 byte to go */
andi. r6 ,r4 ,1
beq . L c s u m _ f i n i s h
lbz r6 ,0 ( r3 )
sldi r9 ,r6 ,8 / * P a d t h e b y t e o u t t o 1 6 b i t s * /
adde r0 ,r0 ,r9
.Lcsum_finish :
addze r0 ,r0 / * a d d i n f i n a l c a r r y * /
rldicl r4 ,r0 ,3 2 ,0 / * f o l d t w o 3 2 b i t h a l v e s t o g e t h e r * /
add r3 ,r4 ,r0
srdi r3 ,r3 ,3 2
blr
2005-09-26 16:04:21 +10:00
2010-08-02 20:09:52 +00:00
2013-10-01 17:11:35 +10:00
.macro srcnr
2010-08-02 20:09:52 +00:00
100 :
.section _ _ ex_ t a b l e ," a "
.align 3
2013-10-01 17:11:35 +10:00
.llong 1 0 0 b,. L s r c _ e r r o r _ n r
2010-08-02 20:09:52 +00:00
.previous
.endm
2013-10-01 17:11:35 +10:00
.macro source
150 :
.section _ _ ex_ t a b l e ," a "
.align 3
.llong 1 5 0 b,. L s r c _ e r r o r
.previous
.endm
.macro dstnr
2010-08-02 20:09:52 +00:00
200 :
.section _ _ ex_ t a b l e ," a "
.align 3
2013-10-01 17:11:35 +10:00
.llong 2 0 0 b,. L d e s t _ e r r o r _ n r
.previous
.endm
.macro dest
250 :
.section _ _ ex_ t a b l e ," a "
.align 3
.llong 2 5 0 b,. L d e s t _ e r r o r
2010-08-02 20:09:52 +00:00
.previous
.endm
2005-09-26 16:04:21 +10:00
/ *
* Computes t h e c h e c k s u m o f a m e m o r y b l o c k a t s r c , l e n g t h l e n ,
* and a d d s i n " s u m " ( 3 2 - b i t ) , w h i l e c o p y i n g t h e b l o c k t o d s t .
* If a n a c c e s s e x c e p t i o n o c c u r s o n s r c o r d s t , i t s t o r e s - E F A U L T
2010-08-02 20:09:52 +00:00
* to * s r c _ e r r o r * d s t _ e r r r e s p e c t i v e l y . T h e c a l l e r m u s t t a k e a n y a c t i o n
* required i n t h i s c a s e ( z e r o i n g m e m o r y , r e c a l c u l a t i n g p a r t i a l c h e c k s u m e t c ) .
2005-09-26 16:04:21 +10:00
*
* csum_ p a r t i a l _ c o p y _ g e n e r i c ( r3 =src , r4 =dst , r5 =len , r6 =sum , r7 =src_err , r8 =dst_err )
* /
_ GLOBAL( c s u m _ p a r t i a l _ c o p y _ g e n e r i c )
2010-08-02 20:09:52 +00:00
addic r0 ,r6 ,0 / * c l e a r c a r r y * /
srdi. r6 ,r5 ,3 / * l e s s t h a n 8 b y t e s ? * /
beq . L c o p y _ t a i l _ w o r d
/ *
* If o n l y h a l f w o r d a l i g n e d , a l i g n t o a d o u b l e w o r d . S i n c e o d d
* aligned a d d r e s s e s s h o u l d b e r a r e a n d t h e y w o u l d r e q u i r e m o r e
* work t o c a l c u l a t e t h e c o r r e c t c h e c k s u m , w e i g n o r e t h a t c a s e
* and t a k e t h e p o t e n t i a l s l o w d o w n o f u n a l i g n e d l o a d s .
*
* If t h e s o u r c e a n d d e s t i n a t i o n a r e r e l a t i v e l y u n a l i g n e d w e o n l y
* align t h e s o u r c e . T h i s k e e p s t h i n g s s i m p l e .
* /
rldicl. r6 ,r3 ,6 4 - 1 ,6 4 - 2 / * r6 = ( r3 & 0 x3 ) > > 1 * /
beq . L c o p y _ a l i g n e d
2013-10-01 16:54:05 +10:00
li r9 ,4
sub r6 ,r9 ,r6
2010-08-02 20:09:52 +00:00
mtctr r6
1 :
2013-10-01 17:11:35 +10:00
srcnr; lhz r6,0(r3) /* align to doubleword */
2005-09-26 16:04:21 +10:00
subi r5 ,r5 ,2
addi r3 ,r3 ,2
2010-08-02 20:09:52 +00:00
adde r0 ,r0 ,r6
2013-10-01 17:11:35 +10:00
dstnr; sth r6,0(r4)
2005-09-26 16:04:21 +10:00
addi r4 ,r4 ,2
2010-08-02 20:09:52 +00:00
bdnz 1 b
.Lcopy_aligned :
/ *
* We u n r o l l t h e l o o p s u c h t h a t e a c h i t e r a t i o n i s 6 4 b y t e s w i t h a n
* entry a n d e x i t l i m b o f 6 4 b y t e s , m e a n i n g a m i n i m u m s i z e o f
* 1 2 8 bytes.
* /
srdi. r6 ,r5 ,7
beq . L c o p y _ t a i l _ d o u b l e w o r d s / * l e n < 1 2 8 * /
srdi r6 ,r5 ,6
subi r6 ,r6 ,1
mtctr r6
stdu r1 ,- S T A C K F R A M E S I Z E ( r1 )
2012-06-25 13:33:10 +00:00
std r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
std r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
std r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
2010-08-02 20:09:52 +00:00
source; ld r6,0(r3)
source; ld r9,8(r3)
source; ld r10,16(r3)
source; ld r11,24(r3)
/ *
* On P O W E R 6 a n d P O W E R 7 b a c k t o b a c k a d d e s t a k e 2 c y c l e s b e c a u s e o f
* the X E R d e p e n d e n c y . T h i s m e a n s t h e f a s t e s t t h i s l o o p c a n g o i s
* 1 6 cycles p e r i t e r a t i o n . T h e s c h e d u l i n g o f t h e l o o p b e l o w h a s
* been s h o w n t o h i t t h i s o n b o t h P O W E R 6 a n d P O W E R 7 .
* /
.align 5
2 :
adde r0 ,r0 ,r6
source; ld r12,32(r3)
source; ld r14,40(r3)
adde r0 ,r0 ,r9
source; ld r15,48(r3)
source; ld r16,56(r3)
addi r3 ,r3 ,6 4
adde r0 ,r0 ,r10
dest; std r6,0(r4)
dest; std r9,8(r4)
adde r0 ,r0 ,r11
dest; std r10,16(r4)
dest; std r11,24(r4)
adde r0 ,r0 ,r12
dest; std r12,32(r4)
dest; std r14,40(r4)
adde r0 ,r0 ,r14
dest; std r15,48(r4)
dest; std r16,56(r4)
addi r4 ,r4 ,6 4
adde r0 ,r0 ,r15
source; ld r6,0(r3)
source; ld r9,8(r3)
adde r0 ,r0 ,r16
source; ld r10,16(r3)
source; ld r11,24(r3)
bdnz 2 b
2005-09-26 16:04:21 +10:00
adde r0 ,r0 ,r6
2010-08-02 20:09:52 +00:00
source; ld r12,32(r3)
source; ld r14,40(r3)
adde r0 ,r0 ,r9
source; ld r15,48(r3)
source; ld r16,56(r3)
addi r3 ,r3 ,6 4
adde r0 ,r0 ,r10
dest; std r6,0(r4)
dest; std r9,8(r4)
adde r0 ,r0 ,r11
dest; std r10,16(r4)
dest; std r11,24(r4)
adde r0 ,r0 ,r12
dest; std r12,32(r4)
dest; std r14,40(r4)
adde r0 ,r0 ,r14
dest; std r15,48(r4)
dest; std r16,56(r4)
addi r4 ,r4 ,6 4
adde r0 ,r0 ,r15
adde r0 ,r0 ,r16
2012-06-25 13:33:10 +00:00
ld r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
ld r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
ld r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
2010-08-02 20:09:52 +00:00
addi r1 ,r1 ,S T A C K F R A M E S I Z E
andi. r5 ,r5 ,6 3
.Lcopy_tail_doublewords : /* Up to 127 bytes to go */
srdi. r6 ,r5 ,3
beq . L c o p y _ t a i l _ w o r d
mtctr r6
3 :
2013-10-01 17:11:35 +10:00
srcnr; ld r6,0(r3)
2010-08-02 20:09:52 +00:00
addi r3 ,r3 ,8
2005-09-26 16:04:21 +10:00
adde r0 ,r0 ,r6
2013-10-01 17:11:35 +10:00
dstnr; std r6,0(r4)
2010-08-02 20:09:52 +00:00
addi r4 ,r4 ,8
bdnz 3 b
2005-09-26 16:04:21 +10:00
2010-08-02 20:09:52 +00:00
andi. r5 ,r5 ,7
2005-09-26 16:04:21 +10:00
2010-08-02 20:09:52 +00:00
.Lcopy_tail_word : /* Up to 7 bytes to go */
srdi. r6 ,r5 ,2
beq . L c o p y _ t a i l _ h a l f w o r d
2013-10-01 17:11:35 +10:00
srcnr; lwz r6,0(r3)
2010-08-02 20:09:52 +00:00
addi r3 ,r3 ,4
adde r0 ,r0 ,r6
2013-10-01 17:11:35 +10:00
dstnr; stw r6,0(r4)
2010-08-02 20:09:52 +00:00
addi r4 ,r4 ,4
subi r5 ,r5 ,4
.Lcopy_tail_halfword : /* Up to 3 bytes to go */
srdi. r6 ,r5 ,1
beq . L c o p y _ t a i l _ b y t e
2013-10-01 17:11:35 +10:00
srcnr; lhz r6,0(r3)
2010-08-02 20:09:52 +00:00
addi r3 ,r3 ,2
adde r0 ,r0 ,r6
2013-10-01 17:11:35 +10:00
dstnr; sth r6,0(r4)
2005-09-26 16:04:21 +10:00
addi r4 ,r4 ,2
2010-08-02 20:09:52 +00:00
subi r5 ,r5 ,2
.Lcopy_tail_byte : /* Up to 1 byte to go */
andi. r6 ,r5 ,1
beq . L c o p y _ f i n i s h
2013-10-01 17:11:35 +10:00
srcnr; lbz r6,0(r3)
2010-08-02 20:09:52 +00:00
sldi r9 ,r6 ,8 / * P a d t h e b y t e o u t t o 1 6 b i t s * /
adde r0 ,r0 ,r9
2013-10-01 17:11:35 +10:00
dstnr; stb r6,0(r4)
2010-08-02 20:09:52 +00:00
.Lcopy_finish :
addze r0 ,r0 / * a d d i n f i n a l c a r r y * /
rldicl r4 ,r0 ,3 2 ,0 / * f o l d t w o 3 2 b i t h a l v e s t o g e t h e r * /
add r3 ,r4 ,r0
srdi r3 ,r3 ,3 2
blr
.Lsrc_error :
2013-10-01 17:11:35 +10:00
ld r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
ld r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
ld r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
addi r1 ,r1 ,S T A C K F R A M E S I Z E
.Lsrc_error_nr :
2005-09-26 16:04:21 +10:00
cmpdi 0 ,r7 ,0
2010-08-02 20:09:52 +00:00
beqlr
2005-09-26 16:04:21 +10:00
li r6 ,- E F A U L T
stw r6 ,0 ( r7 )
blr
2010-08-02 20:09:52 +00:00
.Ldest_error :
2013-10-01 17:11:35 +10:00
ld r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
ld r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
ld r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
addi r1 ,r1 ,S T A C K F R A M E S I Z E
.Ldest_error_nr :
2005-09-26 16:04:21 +10:00
cmpdi 0 ,r8 ,0
2010-08-02 20:09:52 +00:00
beqlr
2005-09-26 16:04:21 +10:00
li r6 ,- E F A U L T
stw r6 ,0 ( r8 )
blr