2005-09-26 16:04:21 +10:00
/ *
* Memory c o p y f u n c t i o n s f o r 3 2 - b i t P o w e r P C .
*
* Copyright ( C ) 1 9 9 6 - 2 0 0 5 P a u l M a c k e r r a s .
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or
* modify i t u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e
* as p u b l i s h e d b y t h e F r e e S o f t w a r e F o u n d a t i o n ; either version
* 2 of t h e L i c e n s e , o r ( a t y o u r o p t i o n ) a n y l a t e r v e r s i o n .
* /
# include < a s m / p r o c e s s o r . h >
# include < a s m / c a c h e . h >
# include < a s m / e r r n o . h >
# include < a s m / p p c _ a s m . h >
# define C O P Y _ 1 6 _ B Y T E S \
lwz r7 ,4 ( r4 ) ; \
lwz r8 ,8 ( r4 ) ; \
lwz r9 ,1 2 ( r4 ) ; \
lwzu r10 ,1 6 ( r4 ) ; \
stw r7 ,4 ( r6 ) ; \
stw r8 ,8 ( r6 ) ; \
stw r9 ,1 2 ( r6 ) ; \
stwu r10 ,1 6 ( r6 )
# define C O P Y _ 1 6 _ B Y T E S _ W I T H E X ( n ) \
8 # # n ## 0 : \
lwz r7 ,4 ( r4 ) ; \
8 # # n ## 1 : \
lwz r8 ,8 ( r4 ) ; \
8 # # n ## 2 : \
lwz r9 ,1 2 ( r4 ) ; \
8 # # n ## 3 : \
lwzu r10 ,1 6 ( r4 ) ; \
8 # # n ## 4 : \
stw r7 ,4 ( r6 ) ; \
8 # # n ## 5 : \
stw r8 ,8 ( r6 ) ; \
8 # # n ## 6 : \
stw r9 ,1 2 ( r6 ) ; \
8 # # n ## 7 : \
stwu r10 ,1 6 ( r6 )
# define C O P Y _ 1 6 _ B Y T E S _ E X C O D E ( n ) \
9 # # n ## 0 : \
addi r5 ,r5 ,- ( 1 6 * n ) ; \
b 1 0 4 f ; \
9 # # n ## 1 : \
addi r5 ,r5 ,- ( 1 6 * n ) ; \
b 1 0 5 f ; \
.section _ _ ex_ t a b l e ," a " ; \
.align 2 ; \
.long 8 # # n ## 0 b ,9 ## n # # 0 b ; \
.long 8 # # n ## 1 b ,9 ## n # # 0 b ; \
.long 8 # # n ## 2 b ,9 ## n # # 0 b ; \
.long 8 # # n ## 3 b ,9 ## n # # 0 b ; \
.long 8 # # n ## 4 b ,9 ## n # # 1 b ; \
.long 8 # # n ## 5 b ,9 ## n # # 1 b ; \
.long 8 # # n ## 6 b ,9 ## n # # 1 b ; \
.long 8 # # n ## 7 b ,9 ## n # # 1 b ; \
.text
.text
.stabs " arch/ p o w e r p c / l i b / " ,N _ S O ,0 ,0 ,0 f
2010-09-01 07:21:21 +00:00
.stabs " copy_ 3 2 . S " ,N _ S O ,0 ,0 ,0 f
2005-09-26 16:04:21 +10:00
0 :
2005-10-17 11:50:32 +10:00
CACHELINE_ B Y T E S = L 1 _ C A C H E _ B Y T E S
LG_ C A C H E L I N E _ B Y T E S = L 1 _ C A C H E _ S H I F T
CACHELINE_ M A S K = ( L 1 _ C A C H E _ B Y T E S - 1 )
2005-09-26 16:04:21 +10:00
2015-05-19 12:07:48 +02:00
/ *
* Use d c b z o n t h e c o m p l e t e c a c h e l i n e s i n t h e d e s t i n a t i o n
* to s e t t h e m t o z e r o . T h i s r e q u i r e s t h a t t h e d e s t i n a t i o n
* area i s c a c h e a b l e . - - p a u l u s
* /
_ GLOBAL( c a c h e a b l e _ m e m z e r o )
mr r5 ,r4
li r4 ,0
addi r6 ,r3 ,- 4
cmplwi 0 ,r5 ,4
blt 7 f
stwu r4 ,4 ( r6 )
beqlr
andi. r0 ,r6 ,3
add r5 ,r0 ,r5
subf r6 ,r0 ,r6
clrlwi r7 ,r6 ,3 2 - L G _ C A C H E L I N E _ B Y T E S
add r8 ,r7 ,r5
srwi r9 ,r8 ,L G _ C A C H E L I N E _ B Y T E S
addic. r9 ,r9 ,- 1 / * t o t a l n u m b e r o f c o m p l e t e c a c h e l i n e s * /
ble 2 f
xori r0 ,r7 ,C A C H E L I N E _ M A S K & ~ 3
srwi. r0 ,r0 ,2
beq 3 f
mtctr r0
4 : stwu r4 ,4 ( r6 )
bdnz 4 b
3 : mtctr r9
li r7 ,4
10 : dcbz r7 ,r6
addi r6 ,r6 ,C A C H E L I N E _ B Y T E S
bdnz 1 0 b
clrlwi r5 ,r8 ,3 2 - L G _ C A C H E L I N E _ B Y T E S
addi r5 ,r5 ,4
2 : srwi r0 ,r5 ,2
mtctr r0
bdz 6 f
1 : stwu r4 ,4 ( r6 )
bdnz 1 b
6 : andi. r5 ,r5 ,3
7 : cmpwi 0 ,r5 ,0
beqlr
mtctr r5
addi r6 ,r6 ,3
8 : stbu r4 ,1 ( r6 )
bdnz 8 b
blr
2005-09-26 16:04:21 +10:00
_ GLOBAL( m e m s e t )
rlwimi r4 ,r4 ,8 ,1 6 ,2 3
rlwimi r4 ,r4 ,1 6 ,0 ,1 5
addi r6 ,r3 ,- 4
cmplwi 0 ,r5 ,4
blt 7 f
stwu r4 ,4 ( r6 )
beqlr
andi. r0 ,r6 ,3
add r5 ,r0 ,r5
subf r6 ,r0 ,r6
srwi r0 ,r5 ,2
mtctr r0
bdz 6 f
1 : stwu r4 ,4 ( r6 )
bdnz 1 b
6 : andi. r5 ,r5 ,3
7 : cmpwi 0 ,r5 ,0
beqlr
mtctr r5
addi r6 ,r6 ,3
8 : stbu r4 ,1 ( r6 )
bdnz 8 b
blr
2015-05-19 12:07:48 +02:00
/ *
* This v e r s i o n u s e s d c b z o n t h e c o m p l e t e c a c h e l i n e s i n t h e
* destination a r e a t o r e d u c e m e m o r y t r a f f i c . T h i s r e q u i r e s t h a t
* the d e s t i n a t i o n a r e a i s c a c h e a b l e .
* We o n l y u s e t h i s v e r s i o n i f t h e s o u r c e a n d d e s t d o n ' t o v e r l a p .
* - - paulus.
* /
_ GLOBAL( c a c h e a b l e _ m e m c p y )
add r7 ,r3 ,r5 / * t e s t i f t h e s r c & d s t o v e r l a p * /
add r8 ,r4 ,r5
cmplw 0 ,r4 ,r7
cmplw 1 ,r3 ,r8
crand 0 ,0 ,4 / * c r0 . l t & = c r1 . l t * /
blt m e m c p y / * i f r e g i o n s o v e r l a p * /
addi r4 ,r4 ,- 4
addi r6 ,r3 ,- 4
neg r0 ,r3
andi. r0 ,r0 ,C A C H E L I N E _ M A S K / * # b y t e s t o s t a r t o f c a c h e l i n e * /
beq 5 8 f
cmplw 0 ,r5 ,r0 / * i s t h i s m o r e t h a n t o t a l t o d o ? * /
blt 6 3 f / * i f n o t m u c h t o d o * /
andi. r8 ,r0 ,3 / * g e t i t w o r d - a l i g n e d f i r s t * /
subf r5 ,r0 ,r5
mtctr r8
beq+ 6 1 f
70 : lbz r9 ,4 ( r4 ) / * d o s o m e b y t e s * /
stb r9 ,4 ( r6 )
addi r4 ,r4 ,1
addi r6 ,r6 ,1
bdnz 7 0 b
61 : srwi. r0 ,r0 ,2
mtctr r0
beq 5 8 f
72 : lwzu r9 ,4 ( r4 ) / * d o s o m e w o r d s * /
stwu r9 ,4 ( r6 )
bdnz 7 2 b
58 : srwi. r0 ,r5 ,L G _ C A C H E L I N E _ B Y T E S / * # c o m p l e t e c a c h e l i n e s * /
clrlwi r5 ,r5 ,3 2 - L G _ C A C H E L I N E _ B Y T E S
li r11 ,4
mtctr r0
beq 6 3 f
53 :
dcbz r11 ,r6
COPY_ 1 6 _ B Y T E S
# if L 1 _ C A C H E _ B Y T E S > = 3 2
COPY_ 1 6 _ B Y T E S
# if L 1 _ C A C H E _ B Y T E S > = 6 4
COPY_ 1 6 _ B Y T E S
COPY_ 1 6 _ B Y T E S
# if L 1 _ C A C H E _ B Y T E S > = 1 2 8
COPY_ 1 6 _ B Y T E S
COPY_ 1 6 _ B Y T E S
COPY_ 1 6 _ B Y T E S
COPY_ 1 6 _ B Y T E S
# endif
# endif
# endif
bdnz 5 3 b
63 : srwi. r0 ,r5 ,2
mtctr r0
beq 6 4 f
30 : lwzu r0 ,4 ( r4 )
stwu r0 ,4 ( r6 )
bdnz 3 0 b
64 : andi. r0 ,r5 ,3
mtctr r0
beq+ 6 5 f
40 : lbz r0 ,4 ( r4 )
stb r0 ,4 ( r6 )
addi r4 ,r4 ,1
addi r6 ,r6 ,1
bdnz 4 0 b
65 : blr
2005-09-26 16:04:21 +10:00
_ GLOBAL( m e m m o v e )
cmplw 0 ,r3 ,r4
bgt b a c k w a r d s _ m e m c p y
/* fall through */
_ GLOBAL( m e m c p y )
srwi. r7 ,r5 ,3
addi r6 ,r3 ,- 4
addi r4 ,r4 ,- 4
beq 2 f / * i f l e s s t h a n 8 b y t e s t o d o * /
andi. r0 ,r6 ,3 / * g e t d e s t w o r d a l i g n e d * /
mtctr r7
bne 5 f
1 : lwz r7 ,4 ( r4 )
lwzu r8 ,8 ( r4 )
stw r7 ,4 ( r6 )
stwu r8 ,8 ( r6 )
bdnz 1 b
andi. r5 ,r5 ,7
2 : cmplwi 0 ,r5 ,4
blt 3 f
lwzu r0 ,4 ( r4 )
addi r5 ,r5 ,- 4
stwu r0 ,4 ( r6 )
3 : cmpwi 0 ,r5 ,0
beqlr
mtctr r5
addi r4 ,r4 ,3
addi r6 ,r6 ,3
4 : lbzu r0 ,1 ( r4 )
stbu r0 ,1 ( r6 )
bdnz 4 b
blr
5 : subfic r0 ,r0 ,4
mtctr r0
6 : lbz r7 ,4 ( r4 )
addi r4 ,r4 ,1
stb r7 ,4 ( r6 )
addi r6 ,r6 ,1
bdnz 6 b
subf r5 ,r0 ,r5
rlwinm. r7 ,r5 ,3 2 - 3 ,3 ,3 1
beq 2 b
mtctr r7
b 1 b
_ GLOBAL( b a c k w a r d s _ m e m c p y )
rlwinm. r7 ,r5 ,3 2 - 3 ,3 ,3 1 / * r0 = r5 > > 3 * /
add r6 ,r3 ,r5
add r4 ,r4 ,r5
beq 2 f
andi. r0 ,r6 ,3
mtctr r7
bne 5 f
1 : lwz r7 ,- 4 ( r4 )
lwzu r8 ,- 8 ( r4 )
stw r7 ,- 4 ( r6 )
stwu r8 ,- 8 ( r6 )
bdnz 1 b
andi. r5 ,r5 ,7
2 : cmplwi 0 ,r5 ,4
blt 3 f
lwzu r0 ,- 4 ( r4 )
subi r5 ,r5 ,4
stwu r0 ,- 4 ( r6 )
3 : cmpwi 0 ,r5 ,0
beqlr
mtctr r5
4 : lbzu r0 ,- 1 ( r4 )
stbu r0 ,- 1 ( r6 )
bdnz 4 b
blr
5 : mtctr r0
6 : lbzu r7 ,- 1 ( r4 )
stbu r7 ,- 1 ( r6 )
bdnz 6 b
subf r5 ,r0 ,r5
rlwinm. r7 ,r5 ,3 2 - 3 ,3 ,3 1
beq 2 b
mtctr r7
b 1 b
_ GLOBAL( _ _ c o p y _ t o f r o m _ u s e r )
addi r4 ,r4 ,- 4
addi r6 ,r3 ,- 4
neg r0 ,r3
andi. r0 ,r0 ,C A C H E L I N E _ M A S K / * # b y t e s t o s t a r t o f c a c h e l i n e * /
beq 5 8 f
cmplw 0 ,r5 ,r0 / * i s t h i s m o r e t h a n t o t a l t o d o ? * /
blt 6 3 f / * i f n o t m u c h t o d o * /
andi. r8 ,r0 ,3 / * g e t i t w o r d - a l i g n e d f i r s t * /
mtctr r8
beq+ 6 1 f
70 : lbz r9 ,4 ( r4 ) / * d o s o m e b y t e s * /
71 : stb r9 ,4 ( r6 )
addi r4 ,r4 ,1
addi r6 ,r6 ,1
bdnz 7 0 b
61 : subf r5 ,r0 ,r5
srwi. r0 ,r0 ,2
mtctr r0
beq 5 8 f
72 : lwzu r9 ,4 ( r4 ) / * d o s o m e w o r d s * /
73 : stwu r9 ,4 ( r6 )
bdnz 7 2 b
.section _ _ ex_ t a b l e ," a "
.align 2
.long 7 0 b,1 0 0 f
.long 7 1 b,1 0 1 f
.long 7 2 b,1 0 2 f
.long 7 3 b,1 0 3 f
.text
58 : srwi. r0 ,r5 ,L G _ C A C H E L I N E _ B Y T E S / * # c o m p l e t e c a c h e l i n e s * /
clrlwi r5 ,r5 ,3 2 - L G _ C A C H E L I N E _ B Y T E S
li r11 ,4
beq 6 3 f
/* Here we decide how far ahead to prefetch the source */
li r3 ,4
cmpwi r0 ,1
li r7 ,0
ble 1 1 4 f
li r7 ,1
# if M A X _ C O P Y _ P R E F E T C H > 1
/ * Heuristically, f o r l a r g e t r a n s f e r s w e p r e f e t c h
MAX_ C O P Y _ P R E F E T C H c a c h e l i n e s a h e a d . F o r s m a l l t r a n s f e r s
we p r e f e t c h 1 c a c h e l i n e a h e a d . * /
cmpwi r0 ,M A X _ C O P Y _ P R E F E T C H
ble 1 1 2 f
li r7 ,M A X _ C O P Y _ P R E F E T C H
112 : mtctr r7
111 : dcbt r3 ,r4
addi r3 ,r3 ,C A C H E L I N E _ B Y T E S
bdnz 1 1 1 b
# else
dcbt r3 ,r4
addi r3 ,r3 ,C A C H E L I N E _ B Y T E S
# endif / * M A X _ C O P Y _ P R E F E T C H > 1 * /
114 : subf r8 ,r7 ,r0
mr r0 ,r7
mtctr r8
53 : dcbt r3 ,r4
54 : dcbz r11 ,r6
.section _ _ ex_ t a b l e ," a "
.align 2
.long 5 4 b,1 0 5 f
.text
/* the main body of the cacheline loop */
COPY_ 1 6 _ B Y T E S _ W I T H E X ( 0 )
2005-10-17 11:50:32 +10:00
# if L 1 _ C A C H E _ B Y T E S > = 3 2
2005-09-26 16:04:21 +10:00
COPY_ 1 6 _ B Y T E S _ W I T H E X ( 1 )
2005-10-17 11:50:32 +10:00
# if L 1 _ C A C H E _ B Y T E S > = 6 4
2005-09-26 16:04:21 +10:00
COPY_ 1 6 _ B Y T E S _ W I T H E X ( 2 )
COPY_ 1 6 _ B Y T E S _ W I T H E X ( 3 )
2005-10-17 11:50:32 +10:00
# if L 1 _ C A C H E _ B Y T E S > = 1 2 8
2005-09-26 16:04:21 +10:00
COPY_ 1 6 _ B Y T E S _ W I T H E X ( 4 )
COPY_ 1 6 _ B Y T E S _ W I T H E X ( 5 )
COPY_ 1 6 _ B Y T E S _ W I T H E X ( 6 )
COPY_ 1 6 _ B Y T E S _ W I T H E X ( 7 )
# endif
# endif
# endif
bdnz 5 3 b
cmpwi r0 ,0
li r3 ,4
li r7 ,0
bne 1 1 4 b
63 : srwi. r0 ,r5 ,2
mtctr r0
beq 6 4 f
30 : lwzu r0 ,4 ( r4 )
31 : stwu r0 ,4 ( r6 )
bdnz 3 0 b
64 : andi. r0 ,r5 ,3
mtctr r0
beq+ 6 5 f
40 : lbz r0 ,4 ( r4 )
41 : stb r0 ,4 ( r6 )
addi r4 ,r4 ,1
addi r6 ,r6 ,1
bdnz 4 0 b
65 : li r3 ,0
blr
/* read fault, initial single-byte copy */
100 : li r9 ,0
b 9 0 f
/* write fault, initial single-byte copy */
101 : li r9 ,1
90 : subf r5 ,r8 ,r5
li r3 ,0
b 9 9 f
/* read fault, initial word copy */
102 : li r9 ,0
b 9 1 f
/* write fault, initial word copy */
103 : li r9 ,1
91 : li r3 ,2
b 9 9 f
/ *
* this s t u f f h a n d l e s f a u l t s i n t h e c a c h e l i n e l o o p a n d b r a n c h e s t o e i t h e r
* 1 0 4 f ( i f i n r e a d p a r t ) o r 1 0 5 f ( i f i n w r i t e p a r t ) , a f t e r u p d a t i n g r5
* /
COPY_ 1 6 _ B Y T E S _ E X C O D E ( 0 )
2005-10-17 11:50:32 +10:00
# if L 1 _ C A C H E _ B Y T E S > = 3 2
2005-09-26 16:04:21 +10:00
COPY_ 1 6 _ B Y T E S _ E X C O D E ( 1 )
2005-10-17 11:50:32 +10:00
# if L 1 _ C A C H E _ B Y T E S > = 6 4
2005-09-26 16:04:21 +10:00
COPY_ 1 6 _ B Y T E S _ E X C O D E ( 2 )
COPY_ 1 6 _ B Y T E S _ E X C O D E ( 3 )
2005-10-17 11:50:32 +10:00
# if L 1 _ C A C H E _ B Y T E S > = 1 2 8
2005-09-26 16:04:21 +10:00
COPY_ 1 6 _ B Y T E S _ E X C O D E ( 4 )
COPY_ 1 6 _ B Y T E S _ E X C O D E ( 5 )
COPY_ 1 6 _ B Y T E S _ E X C O D E ( 6 )
COPY_ 1 6 _ B Y T E S _ E X C O D E ( 7 )
# endif
# endif
# endif
/* read fault in cacheline loop */
104 : li r9 ,0
b 9 2 f
/* fault on dcbz (effectively a write fault) */
/* or write fault in cacheline loop */
105 : li r9 ,1
92 : li r3 ,L G _ C A C H E L I N E _ B Y T E S
mfctr r8
add r0 ,r0 ,r8
b 1 0 6 f
/* read fault in final word loop */
108 : li r9 ,0
b 9 3 f
/* write fault in final word loop */
109 : li r9 ,1
93 : andi. r5 ,r5 ,3
li r3 ,2
b 9 9 f
/* read fault in final byte loop */
110 : li r9 ,0
b 9 4 f
/* write fault in final byte loop */
111 : li r9 ,1
94 : li r5 ,0
li r3 ,0
/ *
* At t h i s s t a g e t h e n u m b e r o f b y t e s n o t c o p i e d i s
* r5 + ( c t r < < r3 ) , a n d r9 i s 0 f o r r e a d o r 1 f o r w r i t e .
* /
99 : mfctr r0
106 : slw r3 ,r0 ,r3
add. r3 ,r3 ,r5
beq 1 2 0 f / * s h o u l d n ' t h a p p e n * /
cmpwi 0 ,r9 ,0
bne 1 2 0 f
/* for a read fault, first try to continue the copy one byte at a time */
mtctr r3
130 : lbz r0 ,4 ( r4 )
131 : stb r0 ,4 ( r6 )
addi r4 ,r4 ,1
addi r6 ,r6 ,1
bdnz 1 3 0 b
/* then clear out the destination: r3 bytes starting at 4(r6) */
132 : mfctr r3
srwi. r0 ,r3 ,2
li r9 ,0
mtctr r0
beq 1 1 3 f
112 : stwu r9 ,4 ( r6 )
bdnz 1 1 2 b
113 : andi. r0 ,r3 ,3
mtctr r0
beq 1 2 0 f
114 : stb r9 ,4 ( r6 )
addi r6 ,r6 ,1
bdnz 1 1 4 b
120 : blr
.section _ _ ex_ t a b l e ," a "
.align 2
.long 3 0 b,1 0 8 b
.long 3 1 b,1 0 9 b
.long 4 0 b,1 1 0 b
.long 4 1 b,1 1 1 b
.long 1 3 0 b,1 3 2 b
.long 1 3 1 b,1 2 0 b
.long 1 1 2 b,1 2 0 b
.long 1 1 4 b,1 2 0 b
.text