2019-05-27 08:55:01 +02:00
/* SPDX-License-Identifier: GPL-2.0-or-later */
2005-09-26 16:04:21 +10:00
/ *
* This f i l e c o n t a i n s a s s e m b l y - l a n g u a g e i m p l e m e n t a t i o n s
* of I P - s t y l e 1 ' s c o m p l e m e n t c h e c k s u m r o u t i n e s .
*
* Copyright ( C ) 1 9 9 5 - 1 9 9 6 G a r y T h o m a s ( g d t @linuxppc.org)
*
* Severely h a c k e d a b o u t b y P a u l M a c k e r r a s ( p a u l u s @cs.anu.edu.au).
* /
# include < l i n u x / s y s . h >
# include < a s m / p r o c e s s o r . h >
# include < a s m / e r r n o . h >
# include < a s m / p p c _ a s m . h >
2016-01-13 23:33:46 -05:00
# include < a s m / e x p o r t . h >
2005-09-26 16:04:21 +10:00
/ *
* Computes t h e c h e c k s u m o f a m e m o r y b l o c k a t b u f f , l e n g t h l e n ,
* and a d d s i n " s u m " ( 3 2 - b i t ) .
*
2016-03-07 18:44:37 +01:00
* _ _ csum_ p a r t i a l ( r3 =buff , r4 =len , r5 =sum )
2005-09-26 16:04:21 +10:00
* /
2016-03-07 18:44:37 +01:00
_ GLOBAL( _ _ c s u m _ p a r t i a l )
2010-08-02 20:08:34 +00:00
addic r0 ,r5 ,0 / * c l e a r c a r r y * /
srdi. r6 ,r4 ,3 / * l e s s t h a n 8 b y t e s ? * /
beq . L c s u m _ t a i l _ w o r d
/ *
* If o n l y h a l f w o r d a l i g n e d , a l i g n t o a d o u b l e w o r d . S i n c e o d d
* aligned a d d r e s s e s s h o u l d b e r a r e a n d t h e y w o u l d r e q u i r e m o r e
* work t o c a l c u l a t e t h e c o r r e c t c h e c k s u m , w e i g n o r e t h a t c a s e
* and t a k e t h e p o t e n t i a l s l o w d o w n o f u n a l i g n e d l o a d s .
* /
2016-11-03 16:15:42 +11:00
rldicl. r6 ,r3 ,6 4 - 1 ,6 4 - 2 / * r6 = ( r3 > > 1 ) & 0 x3 * /
2010-08-02 20:08:34 +00:00
beq . L c s u m _ a l i g n e d
li r7 ,4
sub r6 ,r7 ,r6
mtctr r6
1 :
lhz r6 ,0 ( r3 ) / * a l i g n t o d o u b l e w o r d * /
subi r4 ,r4 ,2
addi r3 ,r3 ,2
adde r0 ,r0 ,r6
bdnz 1 b
.Lcsum_aligned :
/ *
* We u n r o l l t h e l o o p s u c h t h a t e a c h i t e r a t i o n i s 6 4 b y t e s w i t h a n
* entry a n d e x i t l i m b o f 6 4 b y t e s , m e a n i n g a m i n i m u m s i z e o f
* 1 2 8 bytes.
* /
srdi. r6 ,r4 ,7
beq . L c s u m _ t a i l _ d o u b l e w o r d s / * l e n < 1 2 8 * /
srdi r6 ,r4 ,6
subi r6 ,r6 ,1
mtctr r6
stdu r1 ,- S T A C K F R A M E S I Z E ( r1 )
2012-06-25 13:33:10 +00:00
std r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
std r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
std r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
2010-08-02 20:08:34 +00:00
ld r6 ,0 ( r3 )
ld r9 ,8 ( r3 )
ld r10 ,1 6 ( r3 )
ld r11 ,2 4 ( r3 )
/ *
2016-05-23 11:27:01 +10:00
* On P O W E R 6 a n d P O W E R 7 b a c k t o b a c k a d d e i n s t r u c t i o n s t a k e 2 c y c l e s
* because o f t h e X E R d e p e n d e n c y . T h i s m e a n s t h e f a s t e s t t h i s l o o p c a n
* go i s 1 6 c y c l e s p e r i t e r a t i o n . T h e s c h e d u l i n g o f t h e l o o p b e l o w h a s
2010-08-02 20:08:34 +00:00
* been s h o w n t o h i t t h i s o n b o t h P O W E R 6 a n d P O W E R 7 .
* /
.align 5
2 :
adde r0 ,r0 ,r6
ld r12 ,3 2 ( r3 )
ld r14 ,4 0 ( r3 )
adde r0 ,r0 ,r9
ld r15 ,4 8 ( r3 )
ld r16 ,5 6 ( r3 )
addi r3 ,r3 ,6 4
adde r0 ,r0 ,r10
adde r0 ,r0 ,r11
adde r0 ,r0 ,r12
adde r0 ,r0 ,r14
adde r0 ,r0 ,r15
ld r6 ,0 ( r3 )
ld r9 ,8 ( r3 )
adde r0 ,r0 ,r16
ld r10 ,1 6 ( r3 )
ld r11 ,2 4 ( r3 )
bdnz 2 b
adde r0 ,r0 ,r6
ld r12 ,3 2 ( r3 )
ld r14 ,4 0 ( r3 )
adde r0 ,r0 ,r9
ld r15 ,4 8 ( r3 )
ld r16 ,5 6 ( r3 )
addi r3 ,r3 ,6 4
adde r0 ,r0 ,r10
adde r0 ,r0 ,r11
adde r0 ,r0 ,r12
adde r0 ,r0 ,r14
adde r0 ,r0 ,r15
adde r0 ,r0 ,r16
2012-06-25 13:33:10 +00:00
ld r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
ld r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
ld r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
2010-08-02 20:08:34 +00:00
addi r1 ,r1 ,S T A C K F R A M E S I Z E
andi. r4 ,r4 ,6 3
.Lcsum_tail_doublewords : /* Up to 127 bytes to go */
srdi. r6 ,r4 ,3
beq . L c s u m _ t a i l _ w o r d
mtctr r6
3 :
ld r6 ,0 ( r3 )
addi r3 ,r3 ,8
adde r0 ,r0 ,r6
bdnz 3 b
andi. r4 ,r4 ,7
.Lcsum_tail_word : /* Up to 7 bytes to go */
srdi. r6 ,r4 ,2
beq . L c s u m _ t a i l _ h a l f w o r d
lwz r6 ,0 ( r3 )
2005-09-26 16:04:21 +10:00
addi r3 ,r3 ,4
2010-08-02 20:08:34 +00:00
adde r0 ,r0 ,r6
2005-09-26 16:04:21 +10:00
subi r4 ,r4 ,4
2010-08-02 20:08:34 +00:00
.Lcsum_tail_halfword : /* Up to 3 bytes to go */
srdi. r6 ,r4 ,1
beq . L c s u m _ t a i l _ b y t e
lhz r6 ,0 ( r3 )
addi r3 ,r3 ,2
adde r0 ,r0 ,r6
subi r4 ,r4 ,2
.Lcsum_tail_byte : /* Up to 1 byte to go */
andi. r6 ,r4 ,1
beq . L c s u m _ f i n i s h
lbz r6 ,0 ( r3 )
2016-11-03 16:15:42 +11:00
# ifdef _ _ B I G _ E N D I A N _ _
2010-08-02 20:08:34 +00:00
sldi r9 ,r6 ,8 / * P a d t h e b y t e o u t t o 1 6 b i t s * /
adde r0 ,r0 ,r9
2016-11-03 16:15:42 +11:00
# else
adde r0 ,r0 ,r6
# endif
2010-08-02 20:08:34 +00:00
.Lcsum_finish :
addze r0 ,r0 / * a d d i n f i n a l c a r r y * /
rldicl r4 ,r0 ,3 2 ,0 / * f o l d t w o 3 2 b i t h a l v e s t o g e t h e r * /
add r3 ,r4 ,r0
srdi r3 ,r3 ,3 2
blr
2016-01-13 23:33:46 -05:00
EXPORT_ S Y M B O L ( _ _ c s u m _ p a r t i a l )
2005-09-26 16:04:21 +10:00
2010-08-02 20:09:52 +00:00
2013-10-01 17:11:35 +10:00
.macro srcnr
2010-08-02 20:09:52 +00:00
100 :
2020-07-20 10:09:24 -04:00
EX_ T A B L E ( 1 0 0 b ,. L e r r o r _ n r )
2010-08-02 20:09:52 +00:00
.endm
2013-10-01 17:11:35 +10:00
.macro source
150 :
2020-07-20 10:09:24 -04:00
EX_ T A B L E ( 1 5 0 b ,. L e r r o r )
2013-10-01 17:11:35 +10:00
.endm
.macro dstnr
2010-08-02 20:09:52 +00:00
200 :
2020-07-20 10:09:24 -04:00
EX_ T A B L E ( 2 0 0 b ,. L e r r o r _ n r )
2013-10-01 17:11:35 +10:00
.endm
.macro dest
250 :
2020-07-20 10:09:24 -04:00
EX_ T A B L E ( 2 5 0 b ,. L e r r o r )
2010-08-02 20:09:52 +00:00
.endm
2005-09-26 16:04:21 +10:00
/ *
* Computes t h e c h e c k s u m o f a m e m o r y b l o c k a t s r c , l e n g t h l e n ,
2020-07-20 10:09:24 -04:00
* and a d d s i n 0 x f f f f f f f f ( 3 2 - b i t ) , w h i l e c o p y i n g t h e b l o c k t o d s t .
* If a n a c c e s s e x c e p t i o n o c c u r s , i t r e t u r n s 0 .
2005-09-26 16:04:21 +10:00
*
2020-07-20 10:09:24 -04:00
* csum_ p a r t i a l _ c o p y _ g e n e r i c ( r3 =src , r4 =dst , r5 =len )
2005-09-26 16:04:21 +10:00
* /
_ GLOBAL( c s u m _ p a r t i a l _ c o p y _ g e n e r i c )
2020-07-20 10:09:24 -04:00
li r6 ,- 1
2010-08-02 20:09:52 +00:00
addic r0 ,r6 ,0 / * c l e a r c a r r y * /
srdi. r6 ,r5 ,3 / * l e s s t h a n 8 b y t e s ? * /
beq . L c o p y _ t a i l _ w o r d
/ *
* If o n l y h a l f w o r d a l i g n e d , a l i g n t o a d o u b l e w o r d . S i n c e o d d
* aligned a d d r e s s e s s h o u l d b e r a r e a n d t h e y w o u l d r e q u i r e m o r e
* work t o c a l c u l a t e t h e c o r r e c t c h e c k s u m , w e i g n o r e t h a t c a s e
* and t a k e t h e p o t e n t i a l s l o w d o w n o f u n a l i g n e d l o a d s .
*
* If t h e s o u r c e a n d d e s t i n a t i o n a r e r e l a t i v e l y u n a l i g n e d w e o n l y
* align t h e s o u r c e . T h i s k e e p s t h i n g s s i m p l e .
* /
2016-11-03 16:15:42 +11:00
rldicl. r6 ,r3 ,6 4 - 1 ,6 4 - 2 / * r6 = ( r3 > > 1 ) & 0 x3 * /
2010-08-02 20:09:52 +00:00
beq . L c o p y _ a l i g n e d
2013-10-01 16:54:05 +10:00
li r9 ,4
sub r6 ,r9 ,r6
2010-08-02 20:09:52 +00:00
mtctr r6
1 :
2013-10-01 17:11:35 +10:00
srcnr; lhz r6,0(r3) /* align to doubleword */
2005-09-26 16:04:21 +10:00
subi r5 ,r5 ,2
addi r3 ,r3 ,2
2010-08-02 20:09:52 +00:00
adde r0 ,r0 ,r6
2013-10-01 17:11:35 +10:00
dstnr; sth r6,0(r4)
2005-09-26 16:04:21 +10:00
addi r4 ,r4 ,2
2010-08-02 20:09:52 +00:00
bdnz 1 b
.Lcopy_aligned :
/ *
* We u n r o l l t h e l o o p s u c h t h a t e a c h i t e r a t i o n i s 6 4 b y t e s w i t h a n
* entry a n d e x i t l i m b o f 6 4 b y t e s , m e a n i n g a m i n i m u m s i z e o f
* 1 2 8 bytes.
* /
srdi. r6 ,r5 ,7
beq . L c o p y _ t a i l _ d o u b l e w o r d s / * l e n < 1 2 8 * /
srdi r6 ,r5 ,6
subi r6 ,r6 ,1
mtctr r6
stdu r1 ,- S T A C K F R A M E S I Z E ( r1 )
2012-06-25 13:33:10 +00:00
std r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
std r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
std r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
2010-08-02 20:09:52 +00:00
source; ld r6,0(r3)
source; ld r9,8(r3)
source; ld r10,16(r3)
source; ld r11,24(r3)
/ *
2016-05-23 11:27:01 +10:00
* On P O W E R 6 a n d P O W E R 7 b a c k t o b a c k a d d e i n s t r u c t i o n s t a k e 2 c y c l e s
* because o f t h e X E R d e p e n d e n c y . T h i s m e a n s t h e f a s t e s t t h i s l o o p c a n
* go i s 1 6 c y c l e s p e r i t e r a t i o n . T h e s c h e d u l i n g o f t h e l o o p b e l o w h a s
2010-08-02 20:09:52 +00:00
* been s h o w n t o h i t t h i s o n b o t h P O W E R 6 a n d P O W E R 7 .
* /
.align 5
2 :
adde r0 ,r0 ,r6
source; ld r12,32(r3)
source; ld r14,40(r3)
adde r0 ,r0 ,r9
source; ld r15,48(r3)
source; ld r16,56(r3)
addi r3 ,r3 ,6 4
adde r0 ,r0 ,r10
dest; std r6,0(r4)
dest; std r9,8(r4)
adde r0 ,r0 ,r11
dest; std r10,16(r4)
dest; std r11,24(r4)
adde r0 ,r0 ,r12
dest; std r12,32(r4)
dest; std r14,40(r4)
adde r0 ,r0 ,r14
dest; std r15,48(r4)
dest; std r16,56(r4)
addi r4 ,r4 ,6 4
adde r0 ,r0 ,r15
source; ld r6,0(r3)
source; ld r9,8(r3)
adde r0 ,r0 ,r16
source; ld r10,16(r3)
source; ld r11,24(r3)
bdnz 2 b
2005-09-26 16:04:21 +10:00
adde r0 ,r0 ,r6
2010-08-02 20:09:52 +00:00
source; ld r12,32(r3)
source; ld r14,40(r3)
adde r0 ,r0 ,r9
source; ld r15,48(r3)
source; ld r16,56(r3)
addi r3 ,r3 ,6 4
adde r0 ,r0 ,r10
dest; std r6,0(r4)
dest; std r9,8(r4)
adde r0 ,r0 ,r11
dest; std r10,16(r4)
dest; std r11,24(r4)
adde r0 ,r0 ,r12
dest; std r12,32(r4)
dest; std r14,40(r4)
adde r0 ,r0 ,r14
dest; std r15,48(r4)
dest; std r16,56(r4)
addi r4 ,r4 ,6 4
adde r0 ,r0 ,r15
adde r0 ,r0 ,r16
2012-06-25 13:33:10 +00:00
ld r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
ld r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
ld r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
2010-08-02 20:09:52 +00:00
addi r1 ,r1 ,S T A C K F R A M E S I Z E
andi. r5 ,r5 ,6 3
.Lcopy_tail_doublewords : /* Up to 127 bytes to go */
srdi. r6 ,r5 ,3
beq . L c o p y _ t a i l _ w o r d
mtctr r6
3 :
2013-10-01 17:11:35 +10:00
srcnr; ld r6,0(r3)
2010-08-02 20:09:52 +00:00
addi r3 ,r3 ,8
2005-09-26 16:04:21 +10:00
adde r0 ,r0 ,r6
2013-10-01 17:11:35 +10:00
dstnr; std r6,0(r4)
2010-08-02 20:09:52 +00:00
addi r4 ,r4 ,8
bdnz 3 b
2005-09-26 16:04:21 +10:00
2010-08-02 20:09:52 +00:00
andi. r5 ,r5 ,7
2005-09-26 16:04:21 +10:00
2010-08-02 20:09:52 +00:00
.Lcopy_tail_word : /* Up to 7 bytes to go */
srdi. r6 ,r5 ,2
beq . L c o p y _ t a i l _ h a l f w o r d
2013-10-01 17:11:35 +10:00
srcnr; lwz r6,0(r3)
2010-08-02 20:09:52 +00:00
addi r3 ,r3 ,4
adde r0 ,r0 ,r6
2013-10-01 17:11:35 +10:00
dstnr; stw r6,0(r4)
2010-08-02 20:09:52 +00:00
addi r4 ,r4 ,4
subi r5 ,r5 ,4
.Lcopy_tail_halfword : /* Up to 3 bytes to go */
srdi. r6 ,r5 ,1
beq . L c o p y _ t a i l _ b y t e
2013-10-01 17:11:35 +10:00
srcnr; lhz r6,0(r3)
2010-08-02 20:09:52 +00:00
addi r3 ,r3 ,2
adde r0 ,r0 ,r6
2013-10-01 17:11:35 +10:00
dstnr; sth r6,0(r4)
2005-09-26 16:04:21 +10:00
addi r4 ,r4 ,2
2010-08-02 20:09:52 +00:00
subi r5 ,r5 ,2
.Lcopy_tail_byte : /* Up to 1 byte to go */
andi. r6 ,r5 ,1
beq . L c o p y _ f i n i s h
2013-10-01 17:11:35 +10:00
srcnr; lbz r6,0(r3)
2016-11-03 16:15:42 +11:00
# ifdef _ _ B I G _ E N D I A N _ _
2010-08-02 20:09:52 +00:00
sldi r9 ,r6 ,8 / * P a d t h e b y t e o u t t o 1 6 b i t s * /
adde r0 ,r0 ,r9
2016-11-03 16:15:42 +11:00
# else
adde r0 ,r0 ,r6
# endif
2013-10-01 17:11:35 +10:00
dstnr; stb r6,0(r4)
2010-08-02 20:09:52 +00:00
.Lcopy_finish :
addze r0 ,r0 / * a d d i n f i n a l c a r r y * /
rldicl r4 ,r0 ,3 2 ,0 / * f o l d t w o 3 2 b i t h a l v e s t o g e t h e r * /
add r3 ,r4 ,r0
srdi r3 ,r3 ,3 2
blr
2020-07-20 10:09:24 -04:00
.Lerror :
2013-10-01 17:11:35 +10:00
ld r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
ld r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
ld r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
addi r1 ,r1 ,S T A C K F R A M E S I Z E
2020-07-20 10:09:24 -04:00
.Lerror_nr :
li r3 ,0
2005-09-26 16:04:21 +10:00
blr
2016-01-13 23:33:46 -05:00
EXPORT_ S Y M B O L ( c s u m _ p a r t i a l _ c o p y _ g e n e r i c )
powerpc: Implement csum_ipv6_magic in assembly
The generic csum_ipv6_magic() generates a pretty bad result
00000000 <csum_ipv6_magic>: (PPC32)
0: 81 23 00 00 lwz r9,0(r3)
4: 81 03 00 04 lwz r8,4(r3)
8: 7c e7 4a 14 add r7,r7,r9
c: 7d 29 38 10 subfc r9,r9,r7
10: 7d 4a 51 10 subfe r10,r10,r10
14: 7d 27 42 14 add r9,r7,r8
18: 7d 2a 48 50 subf r9,r10,r9
1c: 80 e3 00 08 lwz r7,8(r3)
20: 7d 08 48 10 subfc r8,r8,r9
24: 7d 4a 51 10 subfe r10,r10,r10
28: 7d 29 3a 14 add r9,r9,r7
2c: 81 03 00 0c lwz r8,12(r3)
30: 7d 2a 48 50 subf r9,r10,r9
34: 7c e7 48 10 subfc r7,r7,r9
38: 7d 4a 51 10 subfe r10,r10,r10
3c: 7d 29 42 14 add r9,r9,r8
40: 7d 2a 48 50 subf r9,r10,r9
44: 80 e4 00 00 lwz r7,0(r4)
48: 7d 08 48 10 subfc r8,r8,r9
4c: 7d 4a 51 10 subfe r10,r10,r10
50: 7d 29 3a 14 add r9,r9,r7
54: 7d 2a 48 50 subf r9,r10,r9
58: 81 04 00 04 lwz r8,4(r4)
5c: 7c e7 48 10 subfc r7,r7,r9
60: 7d 4a 51 10 subfe r10,r10,r10
64: 7d 29 42 14 add r9,r9,r8
68: 7d 2a 48 50 subf r9,r10,r9
6c: 80 e4 00 08 lwz r7,8(r4)
70: 7d 08 48 10 subfc r8,r8,r9
74: 7d 4a 51 10 subfe r10,r10,r10
78: 7d 29 3a 14 add r9,r9,r7
7c: 7d 2a 48 50 subf r9,r10,r9
80: 81 04 00 0c lwz r8,12(r4)
84: 7c e7 48 10 subfc r7,r7,r9
88: 7d 4a 51 10 subfe r10,r10,r10
8c: 7d 29 42 14 add r9,r9,r8
90: 7d 2a 48 50 subf r9,r10,r9
94: 7d 08 48 10 subfc r8,r8,r9
98: 7d 4a 51 10 subfe r10,r10,r10
9c: 7d 29 2a 14 add r9,r9,r5
a0: 7d 2a 48 50 subf r9,r10,r9
a4: 7c a5 48 10 subfc r5,r5,r9
a8: 7c 63 19 10 subfe r3,r3,r3
ac: 7d 29 32 14 add r9,r9,r6
b0: 7d 23 48 50 subf r9,r3,r9
b4: 7c c6 48 10 subfc r6,r6,r9
b8: 7c 63 19 10 subfe r3,r3,r3
bc: 7c 63 48 50 subf r3,r3,r9
c0: 54 6a 80 3e rotlwi r10,r3,16
c4: 7c 63 52 14 add r3,r3,r10
c8: 7c 63 18 f8 not r3,r3
cc: 54 63 84 3e rlwinm r3,r3,16,16,31
d0: 4e 80 00 20 blr
0000000000000000 <.csum_ipv6_magic>: (PPC64)
0: 81 23 00 00 lwz r9,0(r3)
4: 80 03 00 04 lwz r0,4(r3)
8: 81 63 00 08 lwz r11,8(r3)
c: 7c e7 4a 14 add r7,r7,r9
10: 7f 89 38 40 cmplw cr7,r9,r7
14: 7d 47 02 14 add r10,r7,r0
18: 7d 30 10 26 mfocrf r9,1
1c: 55 29 f7 fe rlwinm r9,r9,30,31,31
20: 7d 4a 4a 14 add r10,r10,r9
24: 7f 80 50 40 cmplw cr7,r0,r10
28: 7d 2a 5a 14 add r9,r10,r11
2c: 80 03 00 0c lwz r0,12(r3)
30: 81 44 00 00 lwz r10,0(r4)
34: 7d 10 10 26 mfocrf r8,1
38: 55 08 f7 fe rlwinm r8,r8,30,31,31
3c: 7d 29 42 14 add r9,r9,r8
40: 81 04 00 04 lwz r8,4(r4)
44: 7f 8b 48 40 cmplw cr7,r11,r9
48: 7d 29 02 14 add r9,r9,r0
4c: 7d 70 10 26 mfocrf r11,1
50: 55 6b f7 fe rlwinm r11,r11,30,31,31
54: 7d 29 5a 14 add r9,r9,r11
58: 7f 80 48 40 cmplw cr7,r0,r9
5c: 7d 29 52 14 add r9,r9,r10
60: 7c 10 10 26 mfocrf r0,1
64: 54 00 f7 fe rlwinm r0,r0,30,31,31
68: 7d 69 02 14 add r11,r9,r0
6c: 7f 8a 58 40 cmplw cr7,r10,r11
70: 7c 0b 42 14 add r0,r11,r8
74: 81 44 00 08 lwz r10,8(r4)
78: 7c f0 10 26 mfocrf r7,1
7c: 54 e7 f7 fe rlwinm r7,r7,30,31,31
80: 7c 00 3a 14 add r0,r0,r7
84: 7f 88 00 40 cmplw cr7,r8,r0
88: 7d 20 52 14 add r9,r0,r10
8c: 80 04 00 0c lwz r0,12(r4)
90: 7d 70 10 26 mfocrf r11,1
94: 55 6b f7 fe rlwinm r11,r11,30,31,31
98: 7d 29 5a 14 add r9,r9,r11
9c: 7f 8a 48 40 cmplw cr7,r10,r9
a0: 7d 29 02 14 add r9,r9,r0
a4: 7d 70 10 26 mfocrf r11,1
a8: 55 6b f7 fe rlwinm r11,r11,30,31,31
ac: 7d 29 5a 14 add r9,r9,r11
b0: 7f 80 48 40 cmplw cr7,r0,r9
b4: 7d 29 2a 14 add r9,r9,r5
b8: 7c 10 10 26 mfocrf r0,1
bc: 54 00 f7 fe rlwinm r0,r0,30,31,31
c0: 7d 29 02 14 add r9,r9,r0
c4: 7f 85 48 40 cmplw cr7,r5,r9
c8: 7c 09 32 14 add r0,r9,r6
cc: 7d 50 10 26 mfocrf r10,1
d0: 55 4a f7 fe rlwinm r10,r10,30,31,31
d4: 7c 00 52 14 add r0,r0,r10
d8: 7f 80 30 40 cmplw cr7,r0,r6
dc: 7d 30 10 26 mfocrf r9,1
e0: 55 29 ef fe rlwinm r9,r9,29,31,31
e4: 7c 09 02 14 add r0,r9,r0
e8: 54 03 80 3e rotlwi r3,r0,16
ec: 7c 03 02 14 add r0,r3,r0
f0: 7c 03 00 f8 not r3,r0
f4: 78 63 84 22 rldicl r3,r3,48,48
f8: 4e 80 00 20 blr
This patch implements it in assembly for both PPC32 and PPC64
Link: https://github.com/linuxppc/linux/issues/9
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-05-24 11:33:18 +00:00
/ *
* _ _ sum1 6 c s u m _ i p v6 _ m a g i c ( c o n s t s t r u c t i n 6 _ a d d r * s a d d r ,
* const s t r u c t i n 6 _ a d d r * d a d d r ,
* _ _ u3 2 l e n , _ _ u 8 p r o t o , _ _ w s u m s u m )
* /
_ GLOBAL( c s u m _ i p v6 _ m a g i c )
ld r8 , 0 ( r3 )
ld r9 , 8 ( r3 )
add r5 , r5 , r6
addc r0 , r8 , r9
ld r10 , 0 ( r4 )
ld r11 , 8 ( r4 )
2018-09-10 06:09:04 +00:00
# ifdef C O N F I G _ C P U _ L I T T L E _ E N D I A N
rotldi r5 , r5 , 8
# endif
powerpc: Implement csum_ipv6_magic in assembly
The generic csum_ipv6_magic() generates a pretty bad result
00000000 <csum_ipv6_magic>: (PPC32)
0: 81 23 00 00 lwz r9,0(r3)
4: 81 03 00 04 lwz r8,4(r3)
8: 7c e7 4a 14 add r7,r7,r9
c: 7d 29 38 10 subfc r9,r9,r7
10: 7d 4a 51 10 subfe r10,r10,r10
14: 7d 27 42 14 add r9,r7,r8
18: 7d 2a 48 50 subf r9,r10,r9
1c: 80 e3 00 08 lwz r7,8(r3)
20: 7d 08 48 10 subfc r8,r8,r9
24: 7d 4a 51 10 subfe r10,r10,r10
28: 7d 29 3a 14 add r9,r9,r7
2c: 81 03 00 0c lwz r8,12(r3)
30: 7d 2a 48 50 subf r9,r10,r9
34: 7c e7 48 10 subfc r7,r7,r9
38: 7d 4a 51 10 subfe r10,r10,r10
3c: 7d 29 42 14 add r9,r9,r8
40: 7d 2a 48 50 subf r9,r10,r9
44: 80 e4 00 00 lwz r7,0(r4)
48: 7d 08 48 10 subfc r8,r8,r9
4c: 7d 4a 51 10 subfe r10,r10,r10
50: 7d 29 3a 14 add r9,r9,r7
54: 7d 2a 48 50 subf r9,r10,r9
58: 81 04 00 04 lwz r8,4(r4)
5c: 7c e7 48 10 subfc r7,r7,r9
60: 7d 4a 51 10 subfe r10,r10,r10
64: 7d 29 42 14 add r9,r9,r8
68: 7d 2a 48 50 subf r9,r10,r9
6c: 80 e4 00 08 lwz r7,8(r4)
70: 7d 08 48 10 subfc r8,r8,r9
74: 7d 4a 51 10 subfe r10,r10,r10
78: 7d 29 3a 14 add r9,r9,r7
7c: 7d 2a 48 50 subf r9,r10,r9
80: 81 04 00 0c lwz r8,12(r4)
84: 7c e7 48 10 subfc r7,r7,r9
88: 7d 4a 51 10 subfe r10,r10,r10
8c: 7d 29 42 14 add r9,r9,r8
90: 7d 2a 48 50 subf r9,r10,r9
94: 7d 08 48 10 subfc r8,r8,r9
98: 7d 4a 51 10 subfe r10,r10,r10
9c: 7d 29 2a 14 add r9,r9,r5
a0: 7d 2a 48 50 subf r9,r10,r9
a4: 7c a5 48 10 subfc r5,r5,r9
a8: 7c 63 19 10 subfe r3,r3,r3
ac: 7d 29 32 14 add r9,r9,r6
b0: 7d 23 48 50 subf r9,r3,r9
b4: 7c c6 48 10 subfc r6,r6,r9
b8: 7c 63 19 10 subfe r3,r3,r3
bc: 7c 63 48 50 subf r3,r3,r9
c0: 54 6a 80 3e rotlwi r10,r3,16
c4: 7c 63 52 14 add r3,r3,r10
c8: 7c 63 18 f8 not r3,r3
cc: 54 63 84 3e rlwinm r3,r3,16,16,31
d0: 4e 80 00 20 blr
0000000000000000 <.csum_ipv6_magic>: (PPC64)
0: 81 23 00 00 lwz r9,0(r3)
4: 80 03 00 04 lwz r0,4(r3)
8: 81 63 00 08 lwz r11,8(r3)
c: 7c e7 4a 14 add r7,r7,r9
10: 7f 89 38 40 cmplw cr7,r9,r7
14: 7d 47 02 14 add r10,r7,r0
18: 7d 30 10 26 mfocrf r9,1
1c: 55 29 f7 fe rlwinm r9,r9,30,31,31
20: 7d 4a 4a 14 add r10,r10,r9
24: 7f 80 50 40 cmplw cr7,r0,r10
28: 7d 2a 5a 14 add r9,r10,r11
2c: 80 03 00 0c lwz r0,12(r3)
30: 81 44 00 00 lwz r10,0(r4)
34: 7d 10 10 26 mfocrf r8,1
38: 55 08 f7 fe rlwinm r8,r8,30,31,31
3c: 7d 29 42 14 add r9,r9,r8
40: 81 04 00 04 lwz r8,4(r4)
44: 7f 8b 48 40 cmplw cr7,r11,r9
48: 7d 29 02 14 add r9,r9,r0
4c: 7d 70 10 26 mfocrf r11,1
50: 55 6b f7 fe rlwinm r11,r11,30,31,31
54: 7d 29 5a 14 add r9,r9,r11
58: 7f 80 48 40 cmplw cr7,r0,r9
5c: 7d 29 52 14 add r9,r9,r10
60: 7c 10 10 26 mfocrf r0,1
64: 54 00 f7 fe rlwinm r0,r0,30,31,31
68: 7d 69 02 14 add r11,r9,r0
6c: 7f 8a 58 40 cmplw cr7,r10,r11
70: 7c 0b 42 14 add r0,r11,r8
74: 81 44 00 08 lwz r10,8(r4)
78: 7c f0 10 26 mfocrf r7,1
7c: 54 e7 f7 fe rlwinm r7,r7,30,31,31
80: 7c 00 3a 14 add r0,r0,r7
84: 7f 88 00 40 cmplw cr7,r8,r0
88: 7d 20 52 14 add r9,r0,r10
8c: 80 04 00 0c lwz r0,12(r4)
90: 7d 70 10 26 mfocrf r11,1
94: 55 6b f7 fe rlwinm r11,r11,30,31,31
98: 7d 29 5a 14 add r9,r9,r11
9c: 7f 8a 48 40 cmplw cr7,r10,r9
a0: 7d 29 02 14 add r9,r9,r0
a4: 7d 70 10 26 mfocrf r11,1
a8: 55 6b f7 fe rlwinm r11,r11,30,31,31
ac: 7d 29 5a 14 add r9,r9,r11
b0: 7f 80 48 40 cmplw cr7,r0,r9
b4: 7d 29 2a 14 add r9,r9,r5
b8: 7c 10 10 26 mfocrf r0,1
bc: 54 00 f7 fe rlwinm r0,r0,30,31,31
c0: 7d 29 02 14 add r9,r9,r0
c4: 7f 85 48 40 cmplw cr7,r5,r9
c8: 7c 09 32 14 add r0,r9,r6
cc: 7d 50 10 26 mfocrf r10,1
d0: 55 4a f7 fe rlwinm r10,r10,30,31,31
d4: 7c 00 52 14 add r0,r0,r10
d8: 7f 80 30 40 cmplw cr7,r0,r6
dc: 7d 30 10 26 mfocrf r9,1
e0: 55 29 ef fe rlwinm r9,r9,29,31,31
e4: 7c 09 02 14 add r0,r9,r0
e8: 54 03 80 3e rotlwi r3,r0,16
ec: 7c 03 02 14 add r0,r3,r0
f0: 7c 03 00 f8 not r3,r0
f4: 78 63 84 22 rldicl r3,r3,48,48
f8: 4e 80 00 20 blr
This patch implements it in assembly for both PPC32 and PPC64
Link: https://github.com/linuxppc/linux/issues/9
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-05-24 11:33:18 +00:00
adde r0 , r0 , r10
add r5 , r5 , r7
adde r0 , r0 , r11
adde r0 , r0 , r5
addze r0 , r0
rotldi r3 , r0 , 3 2 / * f o l d t w o 3 2 b i t h a l v e s t o g e t h e r * /
add r3 , r0 , r3
srdi r0 , r3 , 3 2
rotlwi r3 , r0 , 1 6 / * f o l d t w o 1 6 b i t h a l v e s t o g e t h e r * /
add r3 , r0 , r3
not r3 , r3
rlwinm r3 , r3 , 1 6 , 1 6 , 3 1
blr
EXPORT_ S Y M B O L ( c s u m _ i p v6 _ m a g i c )