2019-05-27 08:55:05 +02:00
/* SPDX-License-Identifier: GPL-2.0-or-later */
2012-05-30 20:22:09 +00:00
/ *
*
* Copyright ( C ) I B M C o r p o r a t i o n , 2 0 1 2
*
* Author : Anton B l a n c h a r d < a n t o n @au.ibm.com>
* /
# include < a s m / p p c _ a s m . h >
2018-08-03 20:13:04 +10:00
# ifndef S E L F T E S T _ C A S E
/* 0 == don't use VMX, 1 == use VMX */
# define S E L F T E S T _ C A S E 0
# endif
2013-09-23 12:04:35 +10:00
# ifdef _ _ B I G _ E N D I A N _ _
# define L V S ( V R T ,R A ,R B ) l v s l V R T ,R A ,R B
# define V P E R M ( V R T ,V R A ,V R B ,V R C ) v p e r m V R T ,V R A ,V R B ,V R C
# else
# define L V S ( V R T ,R A ,R B ) l v s r V R T ,R A ,R B
# define V P E R M ( V R T ,V R A ,V R B ,V R C ) v p e r m V R T ,V R B ,V R A ,V R C
# endif
2018-08-03 20:13:04 +10:00
_ GLOBAL( m e m c p y _ p o w e r7 )
2012-05-30 20:22:09 +00:00
cmpldi r5 ,1 6
cmpldi c r1 ,r5 ,4 0 9 6
2014-02-14 19:21:03 +01:00
std r3 ,- S T A C K F R A M E S I Z E + S T K _ R E G ( R 3 1 ) ( r1 )
2012-05-30 20:22:09 +00:00
blt . L s h o r t _ c o p y
2018-08-03 20:13:04 +10:00
# ifdef C O N F I G _ A L T I V E C
test_ f e a t u r e = S E L F T E S T _ C A S E
BEGIN_ F T R _ S E C T I O N
bgt c r1 , . L v m x _ c o p y
END_ F T R _ S E C T I O N _ I F S E T ( C P U _ F T R _ A L T I V E C )
2012-05-30 20:22:09 +00:00
# endif
.Lnonvmx_copy :
/* Get the source 8B aligned */
neg r6 ,r4
mtocrf 0 x01 ,r6
clrldi r6 ,r6 ,( 6 4 - 3 )
bf c r7 * 4 + 3 ,1 f
lbz r0 ,0 ( r4 )
addi r4 ,r4 ,1
stb r0 ,0 ( r3 )
addi r3 ,r3 ,1
1 : bf c r7 * 4 + 2 ,2 f
lhz r0 ,0 ( r4 )
addi r4 ,r4 ,2
sth r0 ,0 ( r3 )
addi r3 ,r3 ,2
2 : bf c r7 * 4 + 1 ,3 f
lwz r0 ,0 ( r4 )
addi r4 ,r4 ,4
stw r0 ,0 ( r3 )
addi r3 ,r3 ,4
3 : sub r5 ,r5 ,r6
cmpldi r5 ,1 2 8
blt 5 f
mflr r0
stdu r1 ,- S T A C K F R A M E S I Z E ( r1 )
2012-06-25 13:33:10 +00:00
std r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
std r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
std r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
std r17 ,S T K _ R E G ( R 1 7 ) ( r1 )
std r18 ,S T K _ R E G ( R 1 8 ) ( r1 )
std r19 ,S T K _ R E G ( R 1 9 ) ( r1 )
std r20 ,S T K _ R E G ( R 2 0 ) ( r1 )
std r21 ,S T K _ R E G ( R 2 1 ) ( r1 )
std r22 ,S T K _ R E G ( R 2 2 ) ( r1 )
2012-05-30 20:22:09 +00:00
std r0 ,S T A C K F R A M E S I Z E + 1 6 ( r1 )
srdi r6 ,r5 ,7
mtctr r6
/* Now do cacheline (128B) sized loads and stores. */
.align 5
4 :
ld r0 ,0 ( r4 )
ld r6 ,8 ( r4 )
ld r7 ,1 6 ( r4 )
ld r8 ,2 4 ( r4 )
ld r9 ,3 2 ( r4 )
ld r10 ,4 0 ( r4 )
ld r11 ,4 8 ( r4 )
ld r12 ,5 6 ( r4 )
ld r14 ,6 4 ( r4 )
ld r15 ,7 2 ( r4 )
ld r16 ,8 0 ( r4 )
ld r17 ,8 8 ( r4 )
ld r18 ,9 6 ( r4 )
ld r19 ,1 0 4 ( r4 )
ld r20 ,1 1 2 ( r4 )
ld r21 ,1 2 0 ( r4 )
addi r4 ,r4 ,1 2 8
std r0 ,0 ( r3 )
std r6 ,8 ( r3 )
std r7 ,1 6 ( r3 )
std r8 ,2 4 ( r3 )
std r9 ,3 2 ( r3 )
std r10 ,4 0 ( r3 )
std r11 ,4 8 ( r3 )
std r12 ,5 6 ( r3 )
std r14 ,6 4 ( r3 )
std r15 ,7 2 ( r3 )
std r16 ,8 0 ( r3 )
std r17 ,8 8 ( r3 )
std r18 ,9 6 ( r3 )
std r19 ,1 0 4 ( r3 )
std r20 ,1 1 2 ( r3 )
std r21 ,1 2 0 ( r3 )
addi r3 ,r3 ,1 2 8
bdnz 4 b
clrldi r5 ,r5 ,( 6 4 - 7 )
2012-06-25 13:33:10 +00:00
ld r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
ld r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
ld r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
ld r17 ,S T K _ R E G ( R 1 7 ) ( r1 )
ld r18 ,S T K _ R E G ( R 1 8 ) ( r1 )
ld r19 ,S T K _ R E G ( R 1 9 ) ( r1 )
ld r20 ,S T K _ R E G ( R 2 0 ) ( r1 )
ld r21 ,S T K _ R E G ( R 2 1 ) ( r1 )
ld r22 ,S T K _ R E G ( R 2 2 ) ( r1 )
2012-05-30 20:22:09 +00:00
addi r1 ,r1 ,S T A C K F R A M E S I Z E
/* Up to 127B to go */
5 : srdi r6 ,r5 ,4
mtocrf 0 x01 ,r6
6 : bf c r7 * 4 + 1 ,7 f
ld r0 ,0 ( r4 )
ld r6 ,8 ( r4 )
ld r7 ,1 6 ( r4 )
ld r8 ,2 4 ( r4 )
ld r9 ,3 2 ( r4 )
ld r10 ,4 0 ( r4 )
ld r11 ,4 8 ( r4 )
ld r12 ,5 6 ( r4 )
addi r4 ,r4 ,6 4
std r0 ,0 ( r3 )
std r6 ,8 ( r3 )
std r7 ,1 6 ( r3 )
std r8 ,2 4 ( r3 )
std r9 ,3 2 ( r3 )
std r10 ,4 0 ( r3 )
std r11 ,4 8 ( r3 )
std r12 ,5 6 ( r3 )
addi r3 ,r3 ,6 4
/* Up to 63B to go */
7 : bf c r7 * 4 + 2 ,8 f
ld r0 ,0 ( r4 )
ld r6 ,8 ( r4 )
ld r7 ,1 6 ( r4 )
ld r8 ,2 4 ( r4 )
addi r4 ,r4 ,3 2
std r0 ,0 ( r3 )
std r6 ,8 ( r3 )
std r7 ,1 6 ( r3 )
std r8 ,2 4 ( r3 )
addi r3 ,r3 ,3 2
/* Up to 31B to go */
8 : bf c r7 * 4 + 3 ,9 f
ld r0 ,0 ( r4 )
ld r6 ,8 ( r4 )
addi r4 ,r4 ,1 6
std r0 ,0 ( r3 )
std r6 ,8 ( r3 )
addi r3 ,r3 ,1 6
9 : clrldi r5 ,r5 ,( 6 4 - 4 )
/* Up to 15B to go */
.Lshort_copy :
mtocrf 0 x01 ,r5
bf c r7 * 4 + 0 ,1 2 f
lwz r0 ,0 ( r4 ) / * L e s s c h a n c e o f a r e j e c t w i t h w o r d o p s * /
lwz r6 ,4 ( r4 )
addi r4 ,r4 ,8
stw r0 ,0 ( r3 )
stw r6 ,4 ( r3 )
addi r3 ,r3 ,8
12 : bf c r7 * 4 + 1 ,1 3 f
lwz r0 ,0 ( r4 )
addi r4 ,r4 ,4
stw r0 ,0 ( r3 )
addi r3 ,r3 ,4
13 : bf c r7 * 4 + 2 ,1 4 f
lhz r0 ,0 ( r4 )
addi r4 ,r4 ,2
sth r0 ,0 ( r3 )
addi r3 ,r3 ,2
14 : bf c r7 * 4 + 3 ,1 5 f
lbz r0 ,0 ( r4 )
stb r0 ,0 ( r3 )
2014-02-14 19:21:03 +01:00
15 : ld r3 ,- S T A C K F R A M E S I Z E + S T K _ R E G ( R 3 1 ) ( r1 )
2012-05-30 20:22:09 +00:00
blr
.Lunwind_stack_nonvmx_copy :
addi r1 ,r1 ,S T A C K F R A M E S I Z E
b . L n o n v m x _ c o p y
.Lvmx_copy :
2018-08-03 20:13:04 +10:00
# ifdef C O N F I G _ A L T I V E C
2012-05-30 20:22:09 +00:00
mflr r0
2014-02-14 19:21:03 +01:00
std r4 ,- S T A C K F R A M E S I Z E + S T K _ R E G ( R 3 0 ) ( r1 )
std r5 ,- S T A C K F R A M E S I Z E + S T K _ R E G ( R 2 9 ) ( r1 )
2012-05-30 20:22:09 +00:00
std r0 ,1 6 ( r1 )
stdu r1 ,- S T A C K F R A M E S I Z E ( r1 )
2018-06-07 09:57:53 +08:00
bl e n t e r _ v m x _ o p s
2012-08-07 17:51:41 +00:00
cmpwi c r1 ,r3 ,0
2012-05-30 20:22:09 +00:00
ld r0 ,S T A C K F R A M E S I Z E + 1 6 ( r1 )
2014-02-14 19:21:03 +01:00
ld r3 ,S T K _ R E G ( R 3 1 ) ( r1 )
ld r4 ,S T K _ R E G ( R 3 0 ) ( r1 )
ld r5 ,S T K _ R E G ( R 2 9 ) ( r1 )
2012-05-30 20:22:09 +00:00
mtlr r0
/ *
* We p r e f e t c h b o t h t h e s o u r c e a n d d e s t i n a t i o n u s i n g e n h a n c e d t o u c h
* instructions. W e u s e a s t r e a m I D o f 0 f o r t h e l o a d s i d e a n d
* 1 for t h e s t o r e s i d e .
* /
clrrdi r6 ,r4 ,7
clrrdi r9 ,r3 ,7
ori r9 ,r9 ,1 / * s t r e a m =1 * /
srdi r7 ,r5 ,7 / * l e n g t h i n c a c h e l i n e s , c a p p e d a t 0 x3 F F * /
2012-10-01 14:59:13 +00:00
cmpldi r7 ,0 x3 F F
ble 1 f
2012-05-30 20:22:09 +00:00
li r7 ,0 x3 F F
1 : lis r0 ,0 x0 E 0 0 / * d e p t h =7 * /
sldi r7 ,r7 ,7
or r7 ,r7 ,r0
ori r10 ,r7 ,1 / * s t r e a m =1 * /
lis r8 ,0 x80 0 0 / * G O =1 * /
clrldi r8 ,r8 ,3 2
2017-08-05 19:55:11 +02:00
dcbt 0 ,r6 ,0 b01 0 0 0
dcbt 0 ,r7 ,0 b01 0 1 0
dcbtst 0 ,r9 ,0 b01 0 0 0
dcbtst 0 ,r10 ,0 b01 0 1 0
2012-05-30 20:22:09 +00:00
eieio
2017-08-05 19:55:11 +02:00
dcbt 0 ,r8 ,0 b01 0 1 0 / * G O * /
2012-05-30 20:22:09 +00:00
2012-08-07 17:51:41 +00:00
beq c r1 ,. L u n w i n d _ s t a c k _ n o n v m x _ c o p y
2012-05-30 20:22:09 +00:00
/ *
* If s o u r c e a n d d e s t i n a t i o n a r e n o t r e l a t i v e l y a l i g n e d w e u s e a
* slower p e r m u t e l o o p .
* /
xor r6 ,r4 ,r3
rldicl. r6 ,r6 ,0 ,( 6 4 - 4 )
bne . L v m x _ u n a l i g n e d _ c o p y
/* Get the destination 16B aligned */
neg r6 ,r3
mtocrf 0 x01 ,r6
clrldi r6 ,r6 ,( 6 4 - 4 )
bf c r7 * 4 + 3 ,1 f
lbz r0 ,0 ( r4 )
addi r4 ,r4 ,1
stb r0 ,0 ( r3 )
addi r3 ,r3 ,1
1 : bf c r7 * 4 + 2 ,2 f
lhz r0 ,0 ( r4 )
addi r4 ,r4 ,2
sth r0 ,0 ( r3 )
addi r3 ,r3 ,2
2 : bf c r7 * 4 + 1 ,3 f
lwz r0 ,0 ( r4 )
addi r4 ,r4 ,4
stw r0 ,0 ( r3 )
addi r3 ,r3 ,4
3 : bf c r7 * 4 + 0 ,4 f
ld r0 ,0 ( r4 )
addi r4 ,r4 ,8
std r0 ,0 ( r3 )
addi r3 ,r3 ,8
4 : sub r5 ,r5 ,r6
/* Get the desination 128B aligned */
neg r6 ,r3
srdi r7 ,r6 ,4
mtocrf 0 x01 ,r7
clrldi r6 ,r6 ,( 6 4 - 7 )
li r9 ,1 6
li r10 ,3 2
li r11 ,4 8
bf c r7 * 4 + 3 ,5 f
2017-08-05 19:55:11 +02:00
lvx v1 ,0 ,r4
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,1 6
2017-08-05 19:55:11 +02:00
stvx v1 ,0 ,r3
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,1 6
5 : bf c r7 * 4 + 2 ,6 f
2017-08-05 19:55:11 +02:00
lvx v1 ,0 ,r4
2015-02-10 09:51:22 +11:00
lvx v0 ,r4 ,r9
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,3 2
2017-08-05 19:55:11 +02:00
stvx v1 ,0 ,r3
2015-02-10 09:51:22 +11:00
stvx v0 ,r3 ,r9
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,3 2
6 : bf c r7 * 4 + 1 ,7 f
2017-08-05 19:55:11 +02:00
lvx v3 ,0 ,r4
2015-02-10 09:51:22 +11:00
lvx v2 ,r4 ,r9
lvx v1 ,r4 ,r10
lvx v0 ,r4 ,r11
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,6 4
2017-08-05 19:55:11 +02:00
stvx v3 ,0 ,r3
2015-02-10 09:51:22 +11:00
stvx v2 ,r3 ,r9
stvx v1 ,r3 ,r10
stvx v0 ,r3 ,r11
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,6 4
7 : sub r5 ,r5 ,r6
srdi r6 ,r5 ,7
2012-06-25 13:33:10 +00:00
std r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
std r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
std r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
2012-05-30 20:22:09 +00:00
li r12 ,6 4
li r14 ,8 0
li r15 ,9 6
li r16 ,1 1 2
mtctr r6
/ *
* Now d o c a c h e l i n e s i z e d l o a d s a n d s t o r e s . B y t h i s s t a g e t h e
* cacheline s t o r e s a r e a l s o c a c h e l i n e a l i g n e d .
* /
.align 5
8 :
2017-08-05 19:55:11 +02:00
lvx v7 ,0 ,r4
2015-02-10 09:51:22 +11:00
lvx v6 ,r4 ,r9
lvx v5 ,r4 ,r10
lvx v4 ,r4 ,r11
lvx v3 ,r4 ,r12
lvx v2 ,r4 ,r14
lvx v1 ,r4 ,r15
lvx v0 ,r4 ,r16
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,1 2 8
2017-08-05 19:55:11 +02:00
stvx v7 ,0 ,r3
2015-02-10 09:51:22 +11:00
stvx v6 ,r3 ,r9
stvx v5 ,r3 ,r10
stvx v4 ,r3 ,r11
stvx v3 ,r3 ,r12
stvx v2 ,r3 ,r14
stvx v1 ,r3 ,r15
stvx v0 ,r3 ,r16
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,1 2 8
bdnz 8 b
2012-06-25 13:33:10 +00:00
ld r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
ld r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
ld r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
2012-05-30 20:22:09 +00:00
/* Up to 127B to go */
clrldi r5 ,r5 ,( 6 4 - 7 )
srdi r6 ,r5 ,4
mtocrf 0 x01 ,r6
bf c r7 * 4 + 1 ,9 f
2017-08-05 19:55:11 +02:00
lvx v3 ,0 ,r4
2015-02-10 09:51:22 +11:00
lvx v2 ,r4 ,r9
lvx v1 ,r4 ,r10
lvx v0 ,r4 ,r11
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,6 4
2017-08-05 19:55:11 +02:00
stvx v3 ,0 ,r3
2015-02-10 09:51:22 +11:00
stvx v2 ,r3 ,r9
stvx v1 ,r3 ,r10
stvx v0 ,r3 ,r11
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,6 4
9 : bf c r7 * 4 + 2 ,1 0 f
2017-08-05 19:55:11 +02:00
lvx v1 ,0 ,r4
2015-02-10 09:51:22 +11:00
lvx v0 ,r4 ,r9
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,3 2
2017-08-05 19:55:11 +02:00
stvx v1 ,0 ,r3
2015-02-10 09:51:22 +11:00
stvx v0 ,r3 ,r9
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,3 2
10 : bf c r7 * 4 + 3 ,1 1 f
2017-08-05 19:55:11 +02:00
lvx v1 ,0 ,r4
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,1 6
2017-08-05 19:55:11 +02:00
stvx v1 ,0 ,r3
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,1 6
/* Up to 15B to go */
11 : clrldi r5 ,r5 ,( 6 4 - 4 )
mtocrf 0 x01 ,r5
bf c r7 * 4 + 0 ,1 2 f
ld r0 ,0 ( r4 )
addi r4 ,r4 ,8
std r0 ,0 ( r3 )
addi r3 ,r3 ,8
12 : bf c r7 * 4 + 1 ,1 3 f
lwz r0 ,0 ( r4 )
addi r4 ,r4 ,4
stw r0 ,0 ( r3 )
addi r3 ,r3 ,4
13 : bf c r7 * 4 + 2 ,1 4 f
lhz r0 ,0 ( r4 )
addi r4 ,r4 ,2
sth r0 ,0 ( r3 )
addi r3 ,r3 ,2
14 : bf c r7 * 4 + 3 ,1 5 f
lbz r0 ,0 ( r4 )
stb r0 ,0 ( r3 )
15 : addi r1 ,r1 ,S T A C K F R A M E S I Z E
2014-02-14 19:21:03 +01:00
ld r3 ,- S T A C K F R A M E S I Z E + S T K _ R E G ( R 3 1 ) ( r1 )
2018-06-07 09:57:53 +08:00
b e x i t _ v m x _ o p s / * t a i l c a l l o p t i m i s e * /
2012-05-30 20:22:09 +00:00
.Lvmx_unaligned_copy :
/* Get the destination 16B aligned */
neg r6 ,r3
mtocrf 0 x01 ,r6
clrldi r6 ,r6 ,( 6 4 - 4 )
bf c r7 * 4 + 3 ,1 f
lbz r0 ,0 ( r4 )
addi r4 ,r4 ,1
stb r0 ,0 ( r3 )
addi r3 ,r3 ,1
1 : bf c r7 * 4 + 2 ,2 f
lhz r0 ,0 ( r4 )
addi r4 ,r4 ,2
sth r0 ,0 ( r3 )
addi r3 ,r3 ,2
2 : bf c r7 * 4 + 1 ,3 f
lwz r0 ,0 ( r4 )
addi r4 ,r4 ,4
stw r0 ,0 ( r3 )
addi r3 ,r3 ,4
3 : bf c r7 * 4 + 0 ,4 f
lwz r0 ,0 ( r4 ) / * L e s s c h a n c e o f a r e j e c t w i t h w o r d o p s * /
lwz r7 ,4 ( r4 )
addi r4 ,r4 ,8
stw r0 ,0 ( r3 )
stw r7 ,4 ( r3 )
addi r3 ,r3 ,8
4 : sub r5 ,r5 ,r6
/* Get the desination 128B aligned */
neg r6 ,r3
srdi r7 ,r6 ,4
mtocrf 0 x01 ,r7
clrldi r6 ,r6 ,( 6 4 - 7 )
li r9 ,1 6
li r10 ,3 2
li r11 ,4 8
2015-02-10 09:51:22 +11:00
LVS( v16 ,0 ,r4 ) / * S e t u p p e r m u t e c o n t r o l v e c t o r * /
lvx v0 ,0 ,r4
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,1 6
bf c r7 * 4 + 3 ,5 f
2017-08-05 19:55:11 +02:00
lvx v1 ,0 ,r4
2015-02-10 09:51:22 +11:00
VPERM( v8 ,v0 ,v1 ,v16 )
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,1 6
2017-08-05 19:55:11 +02:00
stvx v8 ,0 ,r3
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,1 6
2015-02-10 09:51:22 +11:00
vor v0 ,v1 ,v1
2012-05-30 20:22:09 +00:00
5 : bf c r7 * 4 + 2 ,6 f
2017-08-05 19:55:11 +02:00
lvx v1 ,0 ,r4
2015-02-10 09:51:22 +11:00
VPERM( v8 ,v0 ,v1 ,v16 )
lvx v0 ,r4 ,r9
VPERM( v9 ,v1 ,v0 ,v16 )
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,3 2
2017-08-05 19:55:11 +02:00
stvx v8 ,0 ,r3
2015-02-10 09:51:22 +11:00
stvx v9 ,r3 ,r9
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,3 2
6 : bf c r7 * 4 + 1 ,7 f
2017-08-05 19:55:11 +02:00
lvx v3 ,0 ,r4
2015-02-10 09:51:22 +11:00
VPERM( v8 ,v0 ,v3 ,v16 )
lvx v2 ,r4 ,r9
VPERM( v9 ,v3 ,v2 ,v16 )
lvx v1 ,r4 ,r10
VPERM( v10 ,v2 ,v1 ,v16 )
lvx v0 ,r4 ,r11
VPERM( v11 ,v1 ,v0 ,v16 )
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,6 4
2017-08-05 19:55:11 +02:00
stvx v8 ,0 ,r3
2015-02-10 09:51:22 +11:00
stvx v9 ,r3 ,r9
stvx v10 ,r3 ,r10
stvx v11 ,r3 ,r11
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,6 4
7 : sub r5 ,r5 ,r6
srdi r6 ,r5 ,7
2012-06-25 13:33:10 +00:00
std r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
std r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
std r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
2012-05-30 20:22:09 +00:00
li r12 ,6 4
li r14 ,8 0
li r15 ,9 6
li r16 ,1 1 2
mtctr r6
/ *
* Now d o c a c h e l i n e s i z e d l o a d s a n d s t o r e s . B y t h i s s t a g e t h e
* cacheline s t o r e s a r e a l s o c a c h e l i n e a l i g n e d .
* /
.align 5
8 :
2017-08-05 19:55:11 +02:00
lvx v7 ,0 ,r4
2015-02-10 09:51:22 +11:00
VPERM( v8 ,v0 ,v7 ,v16 )
lvx v6 ,r4 ,r9
VPERM( v9 ,v7 ,v6 ,v16 )
lvx v5 ,r4 ,r10
VPERM( v10 ,v6 ,v5 ,v16 )
lvx v4 ,r4 ,r11
VPERM( v11 ,v5 ,v4 ,v16 )
lvx v3 ,r4 ,r12
VPERM( v12 ,v4 ,v3 ,v16 )
lvx v2 ,r4 ,r14
VPERM( v13 ,v3 ,v2 ,v16 )
lvx v1 ,r4 ,r15
VPERM( v14 ,v2 ,v1 ,v16 )
lvx v0 ,r4 ,r16
VPERM( v15 ,v1 ,v0 ,v16 )
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,1 2 8
2017-08-05 19:55:11 +02:00
stvx v8 ,0 ,r3
2015-02-10 09:51:22 +11:00
stvx v9 ,r3 ,r9
stvx v10 ,r3 ,r10
stvx v11 ,r3 ,r11
stvx v12 ,r3 ,r12
stvx v13 ,r3 ,r14
stvx v14 ,r3 ,r15
stvx v15 ,r3 ,r16
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,1 2 8
bdnz 8 b
2012-06-25 13:33:10 +00:00
ld r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
ld r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
ld r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
2012-05-30 20:22:09 +00:00
/* Up to 127B to go */
clrldi r5 ,r5 ,( 6 4 - 7 )
srdi r6 ,r5 ,4
mtocrf 0 x01 ,r6
bf c r7 * 4 + 1 ,9 f
2017-08-05 19:55:11 +02:00
lvx v3 ,0 ,r4
2015-02-10 09:51:22 +11:00
VPERM( v8 ,v0 ,v3 ,v16 )
lvx v2 ,r4 ,r9
VPERM( v9 ,v3 ,v2 ,v16 )
lvx v1 ,r4 ,r10
VPERM( v10 ,v2 ,v1 ,v16 )
lvx v0 ,r4 ,r11
VPERM( v11 ,v1 ,v0 ,v16 )
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,6 4
2017-08-05 19:55:11 +02:00
stvx v8 ,0 ,r3
2015-02-10 09:51:22 +11:00
stvx v9 ,r3 ,r9
stvx v10 ,r3 ,r10
stvx v11 ,r3 ,r11
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,6 4
9 : bf c r7 * 4 + 2 ,1 0 f
2017-08-05 19:55:11 +02:00
lvx v1 ,0 ,r4
2015-02-10 09:51:22 +11:00
VPERM( v8 ,v0 ,v1 ,v16 )
lvx v0 ,r4 ,r9
VPERM( v9 ,v1 ,v0 ,v16 )
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,3 2
2017-08-05 19:55:11 +02:00
stvx v8 ,0 ,r3
2015-02-10 09:51:22 +11:00
stvx v9 ,r3 ,r9
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,3 2
10 : bf c r7 * 4 + 3 ,1 1 f
2017-08-05 19:55:11 +02:00
lvx v1 ,0 ,r4
2015-02-10 09:51:22 +11:00
VPERM( v8 ,v0 ,v1 ,v16 )
2012-05-30 20:22:09 +00:00
addi r4 ,r4 ,1 6
2017-08-05 19:55:11 +02:00
stvx v8 ,0 ,r3
2012-05-30 20:22:09 +00:00
addi r3 ,r3 ,1 6
/* Up to 15B to go */
11 : clrldi r5 ,r5 ,( 6 4 - 4 )
addi r4 ,r4 ,- 1 6 / * U n w i n d t h e + 1 6 l o a d o f f s e t * /
mtocrf 0 x01 ,r5
bf c r7 * 4 + 0 ,1 2 f
lwz r0 ,0 ( r4 ) / * L e s s c h a n c e o f a r e j e c t w i t h w o r d o p s * /
lwz r6 ,4 ( r4 )
addi r4 ,r4 ,8
stw r0 ,0 ( r3 )
stw r6 ,4 ( r3 )
addi r3 ,r3 ,8
12 : bf c r7 * 4 + 1 ,1 3 f
lwz r0 ,0 ( r4 )
addi r4 ,r4 ,4
stw r0 ,0 ( r3 )
addi r3 ,r3 ,4
13 : bf c r7 * 4 + 2 ,1 4 f
lhz r0 ,0 ( r4 )
addi r4 ,r4 ,2
sth r0 ,0 ( r3 )
addi r3 ,r3 ,2
14 : bf c r7 * 4 + 3 ,1 5 f
lbz r0 ,0 ( r4 )
stb r0 ,0 ( r3 )
15 : addi r1 ,r1 ,S T A C K F R A M E S I Z E
2014-02-14 19:21:03 +01:00
ld r3 ,- S T A C K F R A M E S I Z E + S T K _ R E G ( R 3 1 ) ( r1 )
2018-06-07 09:57:53 +08:00
b e x i t _ v m x _ o p s / * t a i l c a l l o p t i m i s e * /
2014-09-26 19:45:34 +02:00
# endif / * C O N F I G _ A L T I V E C * /