2010-05-28 23:09:12 -04:00
/ *
* Copyright 2 0 1 0 T i l e r a C o r p o r a t i o n . A l l R i g h t s R e s e r v e d .
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or
* modify i t u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e
* as p u b l i s h e d b y t h e F r e e S o f t w a r e F o u n d a t i o n , v e r s i o n 2 .
*
* This p r o g r a m i s d i s t r i b u t e d i n t h e h o p e t h a t i t w i l l b e u s e f u l , b u t
* WITHOUT A N Y W A R R A N T Y ; without even the implied warranty of
* MERCHANTABILITY O R F I T N E S S F O R A P A R T I C U L A R P U R P O S E , G O O D T I T L E o r
* NON I N F R I N G E M E N T . S e e t h e G N U G e n e r a l P u b l i c L i c e n s e f o r
* more d e t a i l s .
* /
# include < a r c h / c h i p . h >
2010-10-14 16:39:42 -04:00
/ *
* This f i l e s h a r e s t h e i m p l e m e n t a t i o n o f t h e u s e r s p a c e m e m c p y a n d
* the k e r n e l ' s m e m c p y , c o p y _ t o _ u s e r a n d c o p y _ f r o m _ u s e r .
* /
2010-05-28 23:09:12 -04:00
# include < l i n u x / l i n k a g e . h >
# define I S _ M E M C P Y 0
# define I S _ C O P Y _ F R O M _ U S E R 1
# define I S _ C O P Y _ T O _ U S E R - 1
.section .text .memcpy_common , " ax"
.align 64
/ * Use t h i s t o p r e f a c e e a c h b u n d l e t h a t c a n c a u s e a n e x c e p t i o n s o
* the k e r n e l c a n c l e a n u p p r o p e r l y . T h e s p e c i a l c l e a n u p c o d e s h o u l d
* not u s e t h e s e , s i n c e i t k n o w s w h a t i t i s d o i n g .
* /
# define E X \
.pushsection _ _ ex_ t a b l e , " a " ; \
2013-08-09 15:38:43 -04:00
.align 4 ; \
2010-05-28 23:09:12 -04:00
.word 9 f, m e m c p y _ c o m m o n _ f i x u p ; \
.popsection ; \
9
2017-03-21 14:27:36 -04:00
/ * raw_ c o p y _ f r o m _ u s e r t a k e s t h e k e r n e l t a r g e t a d d r e s s i n r0 ,
2010-05-28 23:09:12 -04:00
* the u s e r s o u r c e i n r1 , a n d t h e b y t e s t o c o p y i n r2 .
* It r e t u r n s t h e n u m b e r o f u n c o p i a b l e b y t e s ( h o p e f u l l y z e r o ) i n r0 .
* /
2017-03-21 14:27:36 -04:00
ENTRY( r a w _ c o p y _ f r o m _ u s e r )
.type raw_ c o p y _ f r o m _ u s e r , @function
FEEDBACK_ E N T E R _ E X P L I C I T ( r a w _ c o p y _ f r o m _ u s e r , \
2010-05-28 23:09:12 -04:00
.text .memcpy_common , \
2017-03-21 14:27:36 -04:00
.Lend_memcpy_common - raw_ c o p y _ f r o m _ u s e r )
2010-05-28 23:09:12 -04:00
{ movei r29 , I S _ C O P Y _ F R O M _ U S E R ; j memcpy_common }
2017-03-21 14:27:36 -04:00
.size raw_ c o p y _ f r o m _ u s e r , . - r a w _ c o p y _ f r o m _ u s e r
2010-05-28 23:09:12 -04:00
2017-03-21 14:27:36 -04:00
/ * raw_ c o p y _ t o _ u s e r t a k e s t h e u s e r t a r g e t a d d r e s s i n r0 ,
2010-05-28 23:09:12 -04:00
* the k e r n e l s o u r c e i n r1 , a n d t h e b y t e s t o c o p y i n r2 .
* It r e t u r n s t h e n u m b e r o f u n c o p i a b l e b y t e s ( h o p e f u l l y z e r o ) i n r0 .
* /
2017-03-21 14:27:36 -04:00
ENTRY( r a w _ c o p y _ t o _ u s e r )
.type raw_ c o p y _ t o _ u s e r , @function
FEEDBACK_ R E E N T E R ( r a w _ c o p y _ f r o m _ u s e r )
2010-05-28 23:09:12 -04:00
{ movei r29 , I S _ C O P Y _ T O _ U S E R ; j memcpy_common }
2017-03-21 14:27:36 -04:00
.size raw_ c o p y _ t o _ u s e r , . - r a w _ c o p y _ t o _ u s e r
2010-05-28 23:09:12 -04:00
ENTRY( m e m c p y )
.type memcpy, @function
2017-03-21 14:27:36 -04:00
FEEDBACK_ R E E N T E R ( r a w _ c o p y _ f r o m _ u s e r )
2010-05-28 23:09:12 -04:00
{ movei r29 , I S _ M E M C P Y }
.size memcpy, . - m e m c p y
/* Fall through */
.type memcpy_ c o m m o n , @function
memcpy_common :
/* On entry, r29 holds one of the IS_* macro values from above. */
/* r0 is the dest, r1 is the source, r2 is the size. */
/* Save aside original dest so we can return it at the end. */
{ sw s p , l r ; move r23, r0; or r4, r0, r1 }
/* Check for an empty size. */
{ bz r2 , . L d o n e ; andi r4, r4, 3 }
/* Save aside original values in case of a fault. */
{ move r24 , r1 ; move r25, r2 }
move r27 , l r
/* Check for an unaligned source or dest. */
{ bnz r4 , . L c o p y _ u n a l i g n e d _ m a y b e _ m a n y ; addli r4, r2, -256 }
.Lcheck_aligned_copy_size :
/* If we are copying < 256 bytes, branch to simple case. */
{ blzt r4 , . L c o p y _ 8 _ c h e c k ; slti_u r8, r2, 8 }
/* Copying >= 256 bytes, so jump to complex prefetching loop. */
{ andi r6 , r1 , 6 3 ; j .Lcopy_many }
/ *
*
* Aligned 4 b y t e a t a t i m e c o p y l o o p
*
* /
.Lcopy_8_loop :
/* Copy two words at a time to hide load latency. */
EX : { lw r3 , r1 ; addi r1, r1, 4; slti_u r8, r2, 16 }
EX : { lw r4 , r1 ; addi r1, r1, 4 }
EX : { sw r0 , r3 ; addi r0, r0, 4; addi r2, r2, -4 }
EX : { sw r0 , r4 ; addi r0, r0, 4; addi r2, r2, -4 }
.Lcopy_8_check :
{ bzt r8 , . L c o p y _ 8 _ l o o p ; slti_u r4, r2, 4 }
/* Copy odd leftover word, if any. */
{ bnzt r4 , . L c h e c k _ o d d _ s t r a g g l e r s }
EX : { lw r3 , r1 ; addi r1, r1, 4 }
EX : { sw r0 , r3 ; addi r0, r0, 4; addi r2, r2, -4 }
.Lcheck_odd_stragglers :
{ bnz r2 , . L c o p y _ u n a l i g n e d _ f e w }
.Ldone :
/* For memcpy return original dest address, else zero. */
{ mz r0 , r29 , r23 ; jrp lr }
/ *
*
* Prefetching m u l t i p l e c a c h e l i n e c o p y h a n d l e r ( f o r l a r g e t r a n s f e r s ) .
*
* /
/* Copy words until r1 is cache-line-aligned. */
.Lalign_loop :
EX : { lw r3 , r1 ; addi r1, r1, 4 }
{ andi r6 , r1 , 6 3 }
EX : { sw r0 , r3 ; addi r0, r0, 4; addi r2, r2, -4 }
.Lcopy_many :
{ bnzt r6 , . L a l i g n _ l o o p ; addi r9, r0, 63 }
{ addi r3 , r1 , 6 0 ; andi r9, r9, -64 }
2010-10-14 16:39:42 -04:00
/ * No n e e d t o p r e f e t c h d s t , w e ' l l j u s t d o t h e w h64
* right b e f o r e w e c o p y a l i n e .
2010-05-28 23:09:12 -04:00
* /
EX : { lw r5 , r3 ; addi r3, r3, 64; movei r4, 1 }
2010-10-14 16:39:42 -04:00
/* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bnzt z e r o , . ; move r27, lr }
2010-05-28 23:09:12 -04:00
EX : { lw r6 , r3 ; addi r3, r3, 64 }
2010-10-14 16:39:42 -04:00
/* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bnzt z e r o , . }
2010-05-28 23:09:12 -04:00
EX : { lw r7 , r3 ; addi r3, r3, 64 }
2010-10-14 16:39:42 -04:00
/* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bz z e r o , . L b i g _ l o o p2 }
2010-05-28 23:09:12 -04:00
/ * On e n t r y t o t h i s l o o p :
* - r0 p o i n t s t o t h e s t a r t o f d s t l i n e 0
* - r1 p o i n t s t o s t a r t o f s r c l i n e 0
* - r2 > = ( 2 5 6 - 6 0 ) , o n l y t h e f i r s t t i m e t h e l o o p t r i p s .
* - r3 c o n t a i n s r1 + 1 2 8 + 6 0 [ p o i n t e r t o e n d o f s o u r c e l i n e 2 ]
* This i s o u r p r e f e t c h a d d r e s s . W h e n w e g e t n e a r t h e e n d
* rather t h a n p r e f e t c h i n g o f f t h e e n d t h i s i s c h a n g e d t o p o i n t
* to s o m e " s a f e " r e c e n t l y l o a d e d a d d r e s s .
* - r5 c o n t a i n s * ( r1 + 6 0 ) [ i . e . l a s t w o r d o f s o u r c e l i n e 0 ]
* - r6 c o n t a i n s * ( r1 + 6 4 + 6 0 ) [ i . e . l a s t w o r d o f s o u r c e l i n e 1 ]
2010-10-14 16:39:42 -04:00
* - r9 c o n t a i n s ( ( r0 + 6 3 ) & - 6 4 )
2010-05-28 23:09:12 -04:00
* [ start o f n e x t d s t c a c h e l i n e . ]
* /
.Lbig_loop :
{ jal . L c o p y _ l i n e 2 ; add r15, r1, r2 }
.Lbig_loop2 :
/* Copy line 0, first stalling until r5 is ready. */
EX : { move r12 , r5 ; lw r16, r1 }
{ bz r4 , . L c o p y _ 8 _ c h e c k ; slti_u r8, r2, 8 }
2010-10-14 16:39:42 -04:00
/* Prefetch several lines ahead. */
2010-05-28 23:09:12 -04:00
EX : { lw r5 , r3 ; addi r3, r3, 64 }
2010-10-14 16:39:42 -04:00
{ jal . L c o p y _ l i n e }
2010-05-28 23:09:12 -04:00
/* Copy line 1, first stalling until r6 is ready. */
EX : { move r12 , r6 ; lw r16, r1 }
{ bz r4 , . L c o p y _ 8 _ c h e c k ; slti_u r8, r2, 8 }
2010-10-14 16:39:42 -04:00
/* Prefetch several lines ahead. */
2010-05-28 23:09:12 -04:00
EX : { lw r6 , r3 ; addi r3, r3, 64 }
{ jal . L c o p y _ l i n e }
/* Copy line 2, first stalling until r7 is ready. */
EX : { move r12 , r7 ; lw r16, r1 }
{ bz r4 , . L c o p y _ 8 _ c h e c k ; slti_u r8, r2, 8 }
2010-10-14 16:39:42 -04:00
/* Prefetch several lines ahead. */
2010-05-28 23:09:12 -04:00
EX : { lw r7 , r3 ; addi r3, r3, 64 }
2010-10-14 16:39:42 -04:00
/ * Use u p a c a c h e s - b u s y c y c l e b y j u m p i n g b a c k t o t h e t o p o f t h e
* loop. M i g h t a s w e l l g e t i t o u t o f t h e w a y n o w .
* /
{ j . L b i g _ l o o p }
2010-05-28 23:09:12 -04:00
/ * On e n t r y :
* - r0 p o i n t s t o t h e d e s t i n a t i o n l i n e .
* - r1 p o i n t s t o t h e s o u r c e l i n e .
2010-10-14 16:39:42 -04:00
* - r3 i s t h e n e x t p r e f e t c h a d d r e s s .
2010-05-28 23:09:12 -04:00
* - r9 h o l d s t h e l a s t a d d r e s s u s e d f o r w h64 .
* - r1 2 = W O R D _ 1 5
2010-10-14 16:39:42 -04:00
* - r1 6 = W O R D _ 0 .
* - r1 7 = = r1 + 1 6 .
* - r2 7 h o l d s s a v e d l r t o r e s t o r e .
2010-05-28 23:09:12 -04:00
*
* On e x i t :
* - r0 i s i n c r e m e n t e d b y 6 4 .
* - r1 i s i n c r e m e n t e d b y 6 4 , u n l e s s t h a t w o u l d p o i n t t o a w o r d
2010-10-14 16:39:42 -04:00
* beyond t h e e n d o f t h e s o u r c e a r r a y , i n w h i c h c a s e i t i s r e d i r e c t e d
* to p o i n t t o a n a r b i t r a r y w o r d a l r e a d y i n t h e c a c h e .
2010-05-28 23:09:12 -04:00
* - r2 i s d e c r e m e n t e d b y 6 4 .
2010-10-14 16:39:42 -04:00
* - r3 i s u n c h a n g e d , u n l e s s i t p o i n t s t o a w o r d b e y o n d t h e
* end o f t h e s o u r c e a r r a y , i n w h i c h c a s e i t i s r e d i r e c t e d
* to p o i n t t o a n a r b i t r a r y w o r d a l r e a d y i n t h e c a c h e .
* Redirecting i s O K s i n c e i f w e a r e t h a t c l o s e t o t h e e n d
* of t h e a r r a y w e w i l l n o t c o m e b a c k t o t h i s s u b r o u t i n e
* and u s e t h e c o n t e n t s o f t h e p r e f e t c h e d a d d r e s s .
2010-05-28 23:09:12 -04:00
* - r4 i s n o n z e r o i f f r2 > = 6 4 .
2010-10-14 16:39:42 -04:00
* - r9 i s i n c r e m e n t e d b y 6 4 , u n l e s s i t p o i n t s b e y o n d t h e
* end o f t h e l a s t f u l l d e s t i n a t i o n c a c h e l i n e , i n w h i c h
* case i t i s r e d i r e c t e d t o a " s a f e a d d r e s s " t h a t c a n b e
* clobbered ( s p - 6 4 )
2010-05-28 23:09:12 -04:00
* - lr c o n t a i n s t h e v a l u e i n r27 .
* /
/* r26 unused */
.Lcopy_line :
2010-10-14 16:39:42 -04:00
/ * TODO : when r3 g o e s p a s t t h e e n d , w e w o u l d l i k e t o r e d i r e c t i t
* to p r e f e t c h t h e l a s t p a r t i a l c a c h e l i n e ( i f a n y ) j u s t o n c e , f o r t h e
* benefit o f t h e f i n a l c l e a n u p l o o p . B u t w e d o n ' t w a n t t o
* prefetch t h a t l i n e m o r e t h a n o n c e , o r s u b s e q u e n t p r e f e t c h e s
* will g o i n t o t h e R T F . B u t t h e n . L b i g _ l o o p s h o u l d u n c o n d i t i o n a l l y
* branch t o t o p o f l o o p t o e x e c u t e f i n a l p r e f e t c h , a n d i t s
* nop s h o u l d b e c o m e a c o n d i t i o n a l b r a n c h .
* /
/ * We n e e d t w o n o n - m e m o r y c y c l e s h e r e t o c o v e r t h e r e s o u r c e s
* used b y t h e l o a d s i n i t i a t e d b y t h e c a l l e r .
* /
{ add r15 , r1 , r2 }
2010-05-28 23:09:12 -04:00
.Lcopy_line2 :
2010-10-14 16:39:42 -04:00
{ slt_ u r13 , r3 , r15 ; addi r17, r1, 16 }
2010-05-28 23:09:12 -04:00
2010-10-14 16:39:42 -04:00
/* NOTE: this will stall for one cycle as L1 is busy. */
2010-05-28 23:09:12 -04:00
2010-10-14 16:39:42 -04:00
/* Fill second L1D line. */
2010-05-28 23:09:12 -04:00
EX : { lw r17 , r17 ; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
2010-10-14 16:39:42 -04:00
/* Prepare destination line for writing. */
2010-05-28 23:09:12 -04:00
EX : { wh6 4 r9 ; addi r9, r9, 64 }
2010-10-14 16:39:42 -04:00
/* Load seven words that are L1D hits to cover wh64 L2 usage. */
2010-05-28 23:09:12 -04:00
2010-10-14 16:39:42 -04:00
/ * Load t h e t h r e e r e m a i n i n g w o r d s f r o m t h e l a s t L 1 D l i n e , w h i c h
* we k n o w h a s a l r e a d y f i l l e d t h e L 1 D .
* /
2010-05-28 23:09:12 -04:00
EX : { lw r4 , r1 ; addi r1, r1, 4; addi r20, r1, 16 } /* r4 = WORD_12 */
EX : { lw r8 , r1 ; addi r1, r1, 4; slt_u r13, r20, r15 }/* r8 = WORD_13 */
EX : { lw r11 , r1 ; addi r1, r1, -52; mvz r20, r13, r1 } /* r11 = WORD_14 */
2010-10-14 16:39:42 -04:00
/ * Load t h e t h r e e r e m a i n i n g w o r d s f r o m t h e f i r s t L 1 D l i n e , f i r s t
* stalling u n t i l i t h a s f i l l e d b y " l o o k i n g a t " r16 .
* /
2010-05-28 23:09:12 -04:00
EX : { lw r13 , r1 ; addi r1, r1, 4; move zero, r16 } /* r13 = WORD_1 */
EX : { lw r14 , r1 ; addi r1, r1, 4 } /* r14 = WORD_2 */
EX : { lw r15 , r1 ; addi r1, r1, 8; addi r10, r0, 60 } /* r15 = WORD_3 */
2010-10-14 16:39:42 -04:00
/ * Load s e c o n d w o r d f r o m t h e s e c o n d L 1 D l i n e , f i r s t
* stalling u n t i l i t h a s f i l l e d b y " l o o k i n g a t " r17 .
* /
2010-05-28 23:09:12 -04:00
EX : { lw r19 , r1 ; addi r1, r1, 4; move zero, r17 } /* r19 = WORD_5 */
2010-10-14 16:39:42 -04:00
/ * Store l a s t w o r d t o t h e d e s t i n a t i o n l i n e , p o t e n t i a l l y d i r t y i n g i t
* for t h e f i r s t t i m e , w h i c h k e e p s t h e L 2 b u s y f o r t w o c y c l e s .
* /
2010-05-28 23:09:12 -04:00
EX : { sw r10 , r12 } / * s t o r e ( W O R D _ 1 5 ) * /
2010-10-14 16:39:42 -04:00
/* Use two L1D hits to cover the sw L2 access above. */
2010-05-28 23:09:12 -04:00
EX : { lw r10 , r1 ; addi r1, r1, 4 } /* r10 = WORD_6 */
EX : { lw r12 , r1 ; addi r1, r1, 4 } /* r12 = WORD_7 */
2010-10-14 16:39:42 -04:00
/* Fill third L1D line. */
2010-05-28 23:09:12 -04:00
EX : { lw r18 , r1 ; addi r1, r1, 4 } /* r18 = WORD_8 */
2010-10-14 16:39:42 -04:00
/* Store first L1D line. */
2010-05-28 23:09:12 -04:00
EX : { sw r0 , r16 ; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
EX : { sw r0 , r13 ; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
EX : { sw r0 , r14 ; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
EX : { sw r0 , r15 ; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
2010-10-14 16:39:42 -04:00
/* Store second L1D line. */
2010-05-28 23:09:12 -04:00
EX : { sw r0 , r17 ; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */
EX : { sw r0 , r19 ; addi r0, r0, 4 } /* store(WORD_5) */
EX : { sw r0 , r10 ; addi r0, r0, 4 } /* store(WORD_6) */
EX : { sw r0 , r12 ; addi r0, r0, 4 } /* store(WORD_7) */
EX : { lw r13 , r1 ; addi r1, r1, 4; move zero, r18 } /* r13 = WORD_9 */
EX : { lw r14 , r1 ; addi r1, r1, 4 } /* r14 = WORD_10 */
EX : { lw r15 , r1 ; move r1, r20 } /* r15 = WORD_11 */
2010-10-14 16:39:42 -04:00
/* Store third L1D line. */
2010-05-28 23:09:12 -04:00
EX : { sw r0 , r18 ; addi r0, r0, 4 } /* store(WORD_8) */
EX : { sw r0 , r13 ; addi r0, r0, 4 } /* store(WORD_9) */
EX : { sw r0 , r14 ; addi r0, r0, 4 } /* store(WORD_10) */
EX : { sw r0 , r15 ; addi r0, r0, 4 } /* store(WORD_11) */
2010-10-14 16:39:42 -04:00
/* Store rest of fourth L1D line. */
2010-05-28 23:09:12 -04:00
EX : { sw r0 , r4 ; addi r0, r0, 4 } /* store(WORD_12) */
2010-10-14 16:39:42 -04:00
{
2010-05-28 23:09:12 -04:00
EX : sw r0 , r8 / * s t o r e ( W O R D _ 1 3 ) * /
2010-10-14 16:39:42 -04:00
addi r0 , r0 , 4
2010-05-28 23:09:12 -04:00
/* Will r2 be > 64 after we subtract 64 below? */
2010-10-14 16:39:42 -04:00
shri r4 , r2 , 7
}
{
2010-05-28 23:09:12 -04:00
EX : sw r0 , r11 / * s t o r e ( W O R D _ 1 4 ) * /
2010-10-14 16:39:42 -04:00
addi r0 , r0 , 8
/* Record 64 bytes successfully copied. */
addi r2 , r2 , - 6 4
}
2010-05-28 23:09:12 -04:00
{ jrp l r ; move lr, r27 }
2010-10-14 16:39:42 -04:00
/ * Convey t o t h e b a c k t r a c e l i b r a r y t h a t t h e s t a c k f r a m e i s s i z e
2010-05-28 23:09:12 -04:00
* zero, a n d t h e r e a l r e t u r n a d d r e s s i s o n t h e s t a c k r a t h e r t h a n
* in ' l r ' .
* /
{ info 8 }
.align 64
.Lcopy_unaligned_maybe_many :
/* Skip the setup overhead if we aren't copying many bytes. */
{ slti_ u r8 , r2 , 2 0 ; sub r4, zero, r0 }
{ bnzt r8 , . L c o p y _ u n a l i g n e d _ f e w ; andi r4, r4, 3 }
{ bz r4 , . L d e s t _ i s _ w o r d _ a l i g n e d ; add r18, r1, r2 }
/ *
*
* unaligned 4 b y t e a t a t i m e c o p y h a n d l e r .
*
* /
/* Copy single bytes until r0 == 0 mod 4, so we can store words. */
.Lalign_dest_loop :
EX : { lb_ u r3 , r1 ; addi r1, r1, 1; addi r4, r4, -1 }
EX : { sb r0 , r3 ; addi r0, r0, 1; addi r2, r2, -1 }
{ bnzt r4 , . L a l i g n _ d e s t _ l o o p ; andi r3, r1, 3 }
/* If source and dest are now *both* aligned, do an aligned copy. */
{ bz r3 , . L c h e c k _ a l i g n e d _ c o p y _ s i z e ; addli r4, r2, -256 }
.Ldest_is_word_aligned :
EX : { andi r8 , r0 , 6 3 ; lwadd_na r6, r1, 4}
{ slti_ u r9 , r2 , 6 4 ; bz r8, .Ldest_is_L2_line_aligned }
/ * This c o p i e s u n a l i g n e d w o r d s u n t i l e i t h e r t h e r e a r e f e w e r
* than 4 b y t e s l e f t t o c o p y , o r u n t i l t h e d e s t i n a t i o n p o i n t e r
* is c a c h e - a l i g n e d , w h i c h e v e r c o m e s f i r s t .
*
* On e n t r y :
* - r0 i s t h e n e x t s t o r e a d d r e s s .
* - r1 p o i n t s 4 b y t e s p a s t t h e l o a d a d d r e s s c o r r e s p o n d i n g t o r0 .
* - r2 > = 4
* - r6 i s t h e n e x t a l i g n e d w o r d l o a d e d .
* /
.Lcopy_unaligned_src_words :
EX : { lwadd_ n a r7 , r1 , 4 ; slti_u r8, r2, 4 + 4 }
/* stall */
{ dword_ a l i g n r6 , r7 , r1 ; slti_u r9, r2, 64 + 4 }
EX : { swadd r0 , r6 , 4 ; addi r2, r2, -4 }
{ bnz r8 , . L c l e a n u p _ u n a l i g n e d _ w o r d s ; andi r8, r0, 63 }
{ bnzt r8 , . L c o p y _ u n a l i g n e d _ s r c _ w o r d s ; move r6, r7 }
/ * On e n t r y :
* - r0 i s t h e n e x t s t o r e a d d r e s s .
* - r1 p o i n t s 4 b y t e s p a s t t h e l o a d a d d r e s s c o r r e s p o n d i n g t o r0 .
* - r2 > = 4 ( # o f b y t e s l e f t t o s t o r e ) .
* - r6 i s t h e n e x t a l i g n e d s r c w o r d v a l u e .
* - r9 = ( r2 < 6 4 U ) .
* - r1 8 p o i n t s o n e b y t e p a s t t h e e n d o f s o u r c e m e m o r y .
* /
.Ldest_is_L2_line_aligned :
{
/* Not a full cache line remains. */
bnz r9 , . L c l e a n u p _ u n a l i g n e d _ w o r d s
move r7 , r6
}
/* r2 >= 64 */
/* Kick off two prefetches, but don't go past the end. */
{ addi r3 , r1 , 6 3 - 4 ; addi r8, r1, 64 + 63 - 4 }
{ prefetch r3 ; move r3, r8; slt_u r8, r8, r18 }
{ mvz r3 , r8 , r1 ; addi r8, r3, 64 }
{ prefetch r3 ; move r3, r8; slt_u r8, r8, r18 }
{ mvz r3 , r8 , r1 ; movei r17, 0 }
.Lcopy_unaligned_line :
/* Prefetch another line. */
{ prefetch r3 ; addi r15, r1, 60; addi r3, r3, 64 }
/* Fire off a load of the last word we are about to copy. */
EX : { lw_ n a r15 , r15 ; slt_u r8, r3, r18 }
EX : { mvz r3 , r8 , r1 ; wh64 r0 }
/ * This l o o p r u n s t w i c e .
*
* On e n t r y :
* - r1 7 i s e v e n b e f o r e t h e f i r s t i t e r a t i o n , a n d o d d b e f o r e
* the s e c o n d . I t i s i n c r e m e n t e d i n s i d e t h e l o o p . E n c o u n t e r i n g
* an e v e n v a l u e a t t h e e n d o f t h e l o o p m a k e s i t s t o p .
* /
.Lcopy_half_an_unaligned_line :
EX : {
/ * Stall u n t i l t h e l a s t b y t e i s r e a d y . I n t h e s t e a d y s t a t e t h i s
* guarantees a l l w o r d s t o l o a d b e l o w w i l l b e i n t h e L 2 c a c h e , w h i c h
* avoids s h u n t i n g t h e l o a d s t o t h e R T F .
* /
move z e r o , r15
lwadd_ n a r7 , r1 , 1 6
}
EX : { lwadd_ n a r11 , r1 , 1 2 }
EX : { lwadd_ n a r14 , r1 , - 2 4 }
EX : { lwadd_ n a r8 , r1 , 4 }
EX : { lwadd_ n a r9 , r1 , 4 }
EX : {
lwadd_ n a r10 , r1 , 8
/* r16 = (r2 < 64), after we subtract 32 from r2 below. */
slti_ u r16 , r2 , 6 4 + 3 2
}
EX : { lwadd_ n a r12 , r1 , 4 ; addi r17, r17, 1 }
EX : { lwadd_ n a r13 , r1 , 8 ; dword_align r6, r7, r1 }
EX : { swadd r0 , r6 , 4 ; dword_align r7, r8, r1 }
EX : { swadd r0 , r7 , 4 ; dword_align r8, r9, r1 }
EX : { swadd r0 , r8 , 4 ; dword_align r9, r10, r1 }
EX : { swadd r0 , r9 , 4 ; dword_align r10, r11, r1 }
EX : { swadd r0 , r10 , 4 ; dword_align r11, r12, r1 }
EX : { swadd r0 , r11 , 4 ; dword_align r12, r13, r1 }
EX : { swadd r0 , r12 , 4 ; dword_align r13, r14, r1 }
EX : { swadd r0 , r13 , 4 ; addi r2, r2, -32 }
{ move r6 , r14 ; bbst r17, .Lcopy_half_an_unaligned_line }
{ bzt r16 , . L c o p y _ u n a l i g n e d _ l i n e ; move r7, r6 }
/ * On e n t r y :
* - r0 i s t h e n e x t s t o r e a d d r e s s .
* - r1 p o i n t s 4 b y t e s p a s t t h e l o a d a d d r e s s c o r r e s p o n d i n g t o r0 .
* - r2 > = 0 ( # o f b y t e s l e f t t o s t o r e ) .
* - r7 i s t h e n e x t a l i g n e d s r c w o r d v a l u e .
* /
.Lcleanup_unaligned_words :
/* Handle any trailing bytes. */
{ bz r2 , . L c o p y _ u n a l i g n e d _ d o n e ; slti_u r8, r2, 4 }
{ bzt r8 , . L c o p y _ u n a l i g n e d _ s r c _ w o r d s ; move r6, r7 }
/* Move r1 back to the point where it corresponds to r0. */
{ addi r1 , r1 , - 4 }
/* Fall through */
/ *
*
* 1 byte a t a t i m e c o p y h a n d l e r .
*
* /
.Lcopy_unaligned_few :
EX : { lb_ u r3 , r1 ; addi r1, r1, 1 }
EX : { sb r0 , r3 ; addi r0, r0, 1; addi r2, r2, -1 }
{ bnzt r2 , . L c o p y _ u n a l i g n e d _ f e w }
.Lcopy_unaligned_done :
/* For memcpy return original dest address, else zero. */
{ mz r0 , r29 , r23 ; jrp lr }
.Lend_memcpy_common :
.size memcpy_ c o m m o n , . L e n d _ m e m c p y _ c o m m o n - m e m c p y _ c o m m o n
.section .fixup , " ax"
memcpy_common_fixup :
.type memcpy_ c o m m o n _ f i x u p , @function
/ * Skip a n y b y t e s w e a l r e a d y s u c c e s s f u l l y c o p i e d .
* r2 ( n u m r e m a i n i n g ) i s c o r r e c t , b u t r0 ( d s t ) a n d r1 ( s r c )
* may n o t b e q u i t e r i g h t b e c a u s e o f u n r o l l i n g a n d p r e f e t c h i n g .
* So w e n e e d t o r e c o m p u t e t h e i r v a l u e s a s t h e a d d r e s s j u s t
* after t h e l a s t b y t e w e a r e s u r e w a s s u c c e s s f u l l y l o a d e d a n d
* then s t o r e d .
* /
/* Determine how many bytes we successfully copied. */
{ sub r3 , r25 , r2 }
/* Add this to the original r0 and r1 to get their new values. */
{ add r0 , r23 , r3 ; add r1, r24, r3 }
{ bzt r29 , m e m c p y _ f i x u p _ l o o p }
{ blzt r29 , c o p y _ t o _ u s e r _ f i x u p _ l o o p }
copy_from_user_fixup_loop :
/* Try copying the rest one byte at a time, expecting a load fault. */
.Lcfu : { lb_ u r3 , r1 ; addi r1, r1, 1 }
{ sb r0 , r3 ; addi r0, r0, 1; addi r2, r2, -1 }
{ bnzt r2 , c o p y _ f r o m _ u s e r _ f i x u p _ l o o p }
.Lcopy_from_user_fixup_zero_remainder :
2017-03-21 14:27:36 -04:00
move l r , r27
2010-05-28 23:09:12 -04:00
{ move r0 , r2 ; jrp lr }
copy_to_user_fixup_loop :
/* Try copying the rest one byte at a time, expecting a store fault. */
{ lb_ u r3 , r1 ; addi r1, r1, 1 }
.Lctu : { sb r0 , r3 ; addi r0, r0, 1; addi r2, r2, -1 }
{ bnzt r2 , c o p y _ t o _ u s e r _ f i x u p _ l o o p }
.Lcopy_to_user_fixup_done :
move l r , r27
{ move r0 , r2 ; jrp lr }
memcpy_fixup_loop :
/ * Try c o p y i n g t h e r e s t o n e b y t e a t a t i m e . W e e x p e c t a d i s a s t r o u s
* fault t o h a p p e n s i n c e w e a r e i n f i x u p c o d e , b u t l e t i t h a p p e n .
* /
{ lb_ u r3 , r1 ; addi r1, r1, 1 }
{ sb r0 , r3 ; addi r0, r0, 1; addi r2, r2, -1 }
{ bnzt r2 , m e m c p y _ f i x u p _ l o o p }
/ * This s h o u l d b e u n r e a c h a b l e , w e s h o u l d h a v e f a u l t e d a g a i n .
* But b e p a r a n o i d a n d h a n d l e i t i n c a s e s o m e i n t e r r u p t c h a n g e d
* the T L B o r s o m e t h i n g .
* /
move l r , r27
{ move r0 , r23 ; jrp lr }
.size memcpy_ c o m m o n _ f i x u p , . - m e m c p y _ c o m m o n _ f i x u p
.section _ _ ex_ t a b l e ," a "
2013-08-09 15:38:43 -04:00
.align 4
2010-05-28 23:09:12 -04:00
.word .Lcfu , .Lcopy_from_user_fixup_zero_remainder
.word .Lctu , .Lcopy_to_user_fixup_done