2006-03-06 03:41:56 +03:00
/ * NGmemcpy. S : N i a g a r a o p t i m i z e d m e m c p y .
*
2007-10-02 12:03:09 +04:00
* Copyright ( C ) 2 0 0 6 , 2 0 0 7 D a v i d S . M i l l e r ( d a v e m @davemloft.net)
2006-03-06 03:41:56 +03:00
* /
# ifdef _ _ K E R N E L _ _
2016-10-25 05:32:12 +03:00
# include < l i n u x / l i n k a g e . h >
2006-03-06 03:41:56 +03:00
# include < a s m / a s i . h >
2006-02-11 21:30:41 +03:00
# include < a s m / t h r e a d _ i n f o . h >
2006-03-06 03:41:56 +03:00
# define G L O B A L _ S P A R E % g 7
2006-02-11 21:30:41 +03:00
# define R E S T O R E _ A S I ( T M P ) \
ldub [ % g 6 + T I _ C U R R E N T _ D S ] , T M P ; \
wr T M P , 0 x0 , % a s i ;
2006-03-06 03:41:56 +03:00
# else
# define G L O B A L _ S P A R E % g 5
2006-02-11 21:30:41 +03:00
# define R E S T O R E _ A S I ( T M P ) \
wr % g 0 , A S I _ P N F , % a s i
2006-03-06 03:41:56 +03:00
# endif
2007-10-02 12:03:09 +04:00
# ifdef _ _ s p a r c _ v9 _ _
# define S A V E _ A M O U N T 1 2 8
# else
# define S A V E _ A M O U N T 6 4
# endif
2006-03-06 03:41:56 +03:00
# ifndef S T O R E _ A S I
# define S T O R E _ A S I A S I _ B L K _ I N I T _ Q U A D _ L D D _ P
# endif
# ifndef E X _ L D
2016-10-25 05:32:12 +03:00
# define E X _ L D ( x ,y ) x
2006-03-06 03:41:56 +03:00
# endif
# ifndef E X _ S T
2016-10-25 05:32:12 +03:00
# define E X _ S T ( x ,y ) x
2006-03-06 03:41:56 +03:00
# endif
# ifndef L O A D
# ifndef M E M C P Y _ D E B U G
# define L O A D ( t y p e ,a d d r ,d e s t ) t y p e [ a d d r ] , d e s t
# else
# define L O A D ( t y p e ,a d d r ,d e s t ) t y p e ## a [ a d d r ] 0x80 , d e s t
# endif
# endif
# ifndef L O A D _ T W I N
# define L O A D _ T W I N ( a d d r _ r e g ,d e s t 0 ,d e s t 1 ) \
ldda [ a d d r _ r e g ] A S I _ B L K _ I N I T _ Q U A D _ L D D _ P , d e s t 0
# endif
# ifndef S T O R E
# define S T O R E ( t y p e ,s r c ,a d d r ) t y p e s r c , [ a d d r ]
# endif
# ifndef S T O R E _ I N I T
2007-10-02 12:03:09 +04:00
# ifndef S I M U L A T E _ N I A G A R A _ O N _ N O N _ N I A G A R A
2006-03-06 03:41:56 +03:00
# define S T O R E _ I N I T ( s r c ,a d d r ) s t x a s r c , [ a d d r ] % a s i
2007-10-02 12:03:09 +04:00
# else
# define S T O R E _ I N I T ( s r c ,a d d r ) s t x s r c , [ a d d r + 0 x00 ]
# endif
2006-03-06 03:41:56 +03:00
# endif
# ifndef F U N C _ N A M E
# define F U N C _ N A M E N G m e m c p y
# endif
# ifndef P R E A M B L E
# define P R E A M B L E
# endif
# ifndef X C C
# define X C C x c c
# endif
.register % g2 ,#s c r a t c h
.register % g3 ,#s c r a t c h
.text
2016-10-25 05:32:12 +03:00
# ifndef E X _ R E T V A L
# define E X _ R E T V A L ( x ) x
__restore_asi :
ret
wr % g 0 , A S I _ A I U S , % a s i
restore
ENTRY( N G _ r e t _ i 2 _ p l u s _ i 4 _ p l u s _ 1 )
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % i 5 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ i 4 _ p l u s _ 1 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ g 1 )
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % g 1 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ g 1 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 8 )
sub % g 1 , 8 , % g 1
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % g 1 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 8 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 1 6 )
sub % g 1 , 1 6 , % g 1
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % g 1 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 1 6 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 2 4 )
sub % g 1 , 2 4 , % g 1
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % g 1 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 2 4 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 3 2 )
sub % g 1 , 3 2 , % g 1
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % g 1 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 3 2 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 0 )
sub % g 1 , 4 0 , % g 1
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % g 1 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 0 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 8 )
sub % g 1 , 4 8 , % g 1
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % g 1 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 8 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 5 6 )
sub % g 1 , 5 6 , % g 1
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % g 1 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 5 6 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ i 4 )
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % i 4 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ i 4 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ i 4 _ m i n u s _ 8 )
sub % i 4 , 8 , % i 4
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % i 4 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ i 4 _ m i n u s _ 8 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ 8 )
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , 8 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ 8 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ 4 )
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , 4 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ 4 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ 1 )
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , 1 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ 1 )
ENTRY( N G _ r e t _ i 2 _ p l u s _ g 1 _ p l u s _ 1 )
add % g 1 , 1 , % g 1
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % g 1 , % i 0
ENDPROC( N G _ r e t _ i 2 _ p l u s _ g 1 _ p l u s _ 1 )
ENTRY( N G _ r e t _ i 2 )
ba,p t % x c c , _ _ r e s t o r e _ a s i
mov % i 2 , % i 0
ENDPROC( N G _ r e t _ i 2 )
ENTRY( N G _ r e t _ i 2 _ a n d _ 7 _ p l u s _ i 4 )
and % i 2 , 7 , % i 2
ba,p t % x c c , _ _ r e s t o r e _ a s i
add % i 2 , % i 4 , % i 0
ENDPROC( N G _ r e t _ i 2 _ a n d _ 7 _ p l u s _ i 4 )
# endif
2006-03-06 03:41:56 +03:00
.align 64
.globl FUNC_NAME
.type FUNC_ N A M E ,#f u n c t i o n
2007-10-02 12:03:09 +04:00
FUNC_NAME : /* %i0=dst, %i1=src, %i2=len */
PREAMBLE
save % s p , - S A V E _ A M O U N T , % s p
srlx % i 2 , 3 1 , % g 2
2006-03-06 03:41:56 +03:00
cmp % g 2 , 0
tne % x c c , 5
2007-10-02 12:03:09 +04:00
mov % i 0 , % o 0
cmp % i 2 , 0
2006-03-06 03:41:56 +03:00
be,p n % X C C , 8 5 f
2007-10-02 12:03:09 +04:00
or % o 0 , % i 1 , % i 3
cmp % i 2 , 1 6
2006-03-06 03:41:56 +03:00
blu,a ,p n % X C C , 8 0 f
2007-10-02 12:03:09 +04:00
or % i 3 , % i 2 , % i 3
2006-03-06 03:41:56 +03:00
/ * 2 blocks ( 1 2 8 b y t e s ) i s t h e m i n i m u m w e c a n d o t h e b l o c k
* copy w i t h . W e n e e d t o e n s u r e t h a t w e ' l l i t e r a t e a t l e a s t
* once i n t h e b l o c k c o p y l o o p . A t w o r s t w e ' l l n e e d t o a l i g n
* the d e s t i n a t i o n t o a 6 4 - b y t e b o u n d a r y w h i c h c a n c h e w u p
* to ( 6 4 - 1 ) b y t e s f r o m t h e l e n g t h b e f o r e w e p e r f o r m t h e
* block c o p y l o o p .
* /
2007-10-02 12:03:09 +04:00
cmp % i 2 , ( 2 * 6 4 )
2006-03-06 03:41:56 +03:00
blu,p t % X C C , 7 0 f
2007-10-02 12:03:09 +04:00
andcc % i 3 , 0 x7 , % g 0
2006-03-06 03:41:56 +03:00
/ * % o0 : dst
2007-10-02 12:03:09 +04:00
* % i1 : src
* % i2 : len ( k n o w n t o b e > = 1 2 8 )
2006-03-06 03:41:56 +03:00
*
2007-10-02 12:03:09 +04:00
* The b l o c k c o p y l o o p s w i l l u s e % i 4 / % i 5 ,% g 2 / % g 3 a s
2006-03-06 03:41:56 +03:00
* temporaries w h i l e c o p y i n g t h e d a t a .
* /
2007-10-02 12:03:09 +04:00
LOAD( p r e f e t c h , % i 1 , #o n e _ r e a d )
2006-03-06 03:41:56 +03:00
wr % g 0 , S T O R E _ A S I , % a s i
/* Align destination on 64-byte boundary. */
2007-10-02 12:03:09 +04:00
andcc % o 0 , ( 6 4 - 1 ) , % i 4
2006-03-06 03:41:56 +03:00
be,p t % X C C , 2 f
2007-10-02 12:03:09 +04:00
sub % i 4 , 6 4 , % i 4
sub % g 0 , % i 4 , % i 4 ! b y t e s t o a l i g n d s t
sub % i 2 , % i 4 , % i 2
1 : subcc % i 4 , 1 , % i 4
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D ( l d u b , % i 1 , % g 1 ) , N G _ r e t _ i 2 _ p l u s _ i 4 _ p l u s _ 1 )
EX_ S T ( S T O R E ( s t b , % g 1 , % o 0 ) , N G _ r e t _ i 2 _ p l u s _ i 4 _ p l u s _ 1 )
2007-10-02 12:03:09 +04:00
add % i 1 , 1 , % i 1
2006-03-06 03:41:56 +03:00
bne,p t % X C C , 1 b
add % o 0 , 1 , % o 0
/ * If t h e s o u r c e i s o n a 1 6 - b y t e b o u n d a r y w e c a n d o
* the d i r e c t b l o c k c o p y l o o p . I f i t i s 8 - b y t e a l i g n e d
* we c a n d o t h e 1 6 - b y t e l o a d s o f f s e t b y - 8 b y t e s a n d t h e
* init s t o r e s o f f s e t b y o n e r e g i s t e r .
*
* If t h e s o u r c e i s n o t e v e n 8 - b y t e a l i g n e d , w e n e e d t o d o
* shifting a n d m a s k i n g ( b a s i c a l l y i n t e g e r f a l i g n d a t a ) .
*
* The c a r e f u l b i t w i t h i n i t s t o r e s i s t h a t i f w e s t o r e
* to a n y p a r t o f t h e c a c h e l i n e w e h a v e t o s t o r e t h e w h o l e
* cacheline e l s e w e c a n e n d u p w i t h c o r r u p t L 2 c a c h e l i n e
* contents. S i n c e t h e l o o p w o r k s o n 6 4 - b y t e s o f 6 4 - b y t e
* aligned s t o r e d a t a a t a t i m e , t h i s i s e a s y t o e n s u r e .
* /
2 :
2007-10-02 12:03:09 +04:00
andcc % i 1 , ( 1 6 - 1 ) , % i 4
andn % i 2 , ( 6 4 - 1 ) , % g 1 ! b l o c k c o p y l o o p i t e r a t o r
2006-03-06 03:41:56 +03:00
be,p t % X C C , 5 0 f
2007-10-02 12:03:09 +04:00
sub % i 2 , % g 1 , % i 2 ! f i n a l s u b - b l o c k c o p y b y t e s
cmp % i 4 , 8
be,p t % X C C , 1 0 f
sub % i 1 , % i 4 , % i 1
2006-03-06 03:41:56 +03:00
/* Neither 8-byte nor 16-byte aligned, shift and mask. */
2007-10-02 12:03:09 +04:00
and % i 4 , 0 x7 , G L O B A L _ S P A R E
sll G L O B A L _ S P A R E , 3 , G L O B A L _ S P A R E
mov 6 4 , % i 5
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D _ T W I N ( % i 1 , % g 2 , % g 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 )
2007-10-02 12:03:09 +04:00
sub % i 5 , G L O B A L _ S P A R E , % i 5
mov 1 6 , % o 4
mov 3 2 , % o 5
mov 4 8 , % o 7
mov 6 4 , % i 3
bg,p n % X C C , 9 f
nop
2006-03-06 03:41:56 +03:00
2007-10-02 12:03:09 +04:00
# define M I X _ T H R E E _ W O R D S ( W O R D 1 , W O R D 2 , W O R D 3 , P R E _ S H I F T , P O S T _ S H I F T , T M P ) \
sllx W O R D 1 , P O S T _ S H I F T , W O R D 1 ; \
srlx W O R D 2 , P R E _ S H I F T , T M P ; \
sllx W O R D 2 , P O S T _ S H I F T , W O R D 2 ; \
or W O R D 1 , T M P , W O R D 1 ; \
srlx W O R D 3 , P R E _ S H I F T , T M P ; \
or W O R D 2 , T M P , W O R D 2 ;
2016-10-25 05:32:12 +03:00
8 : EX_ L D ( L O A D _ T W I N ( % i 1 + % o 4 , % o 2 , % o 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 )
2007-10-02 12:03:09 +04:00
MIX_ T H R E E _ W O R D S ( % g 2 , % g 3 , % o 2 , % i 5 , G L O B A L _ S P A R E , % o 1 )
LOAD( p r e f e t c h , % i 1 + % i 3 , #o n e _ r e a d )
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E _ I N I T ( % g 2 , % o 0 + 0 x00 ) , N G _ r e t _ i 2 _ p l u s _ g 1 )
EX_ S T ( S T O R E _ I N I T ( % g 3 , % o 0 + 0 x08 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 8 )
2007-10-02 12:03:09 +04:00
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D _ T W I N ( % i 1 + % o 5 , % g 2 , % g 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 1 6 )
2007-10-02 12:03:09 +04:00
MIX_ T H R E E _ W O R D S ( % o 2 , % o 3 , % g 2 , % i 5 , G L O B A L _ S P A R E , % o 1 )
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E _ I N I T ( % o 2 , % o 0 + 0 x10 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 1 6 )
EX_ S T ( S T O R E _ I N I T ( % o 3 , % o 0 + 0 x18 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 2 4 )
2007-10-02 12:03:09 +04:00
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D _ T W I N ( % i 1 + % o 7 , % o 2 , % o 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 3 2 )
2007-10-02 12:03:09 +04:00
MIX_ T H R E E _ W O R D S ( % g 2 , % g 3 , % o 2 , % i 5 , G L O B A L _ S P A R E , % o 1 )
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E _ I N I T ( % g 2 , % o 0 + 0 x20 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 3 2 )
EX_ S T ( S T O R E _ I N I T ( % g 3 , % o 0 + 0 x28 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 0 )
2007-10-02 12:03:09 +04:00
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D _ T W I N ( % i 1 + % i 3 , % g 2 , % g 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 8 )
2007-10-02 12:03:09 +04:00
add % i 1 , 6 4 , % i 1
MIX_ T H R E E _ W O R D S ( % o 2 , % o 3 , % g 2 , % i 5 , G L O B A L _ S P A R E , % o 1 )
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E _ I N I T ( % o 2 , % o 0 + 0 x30 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 8 )
EX_ S T ( S T O R E _ I N I T ( % o 3 , % o 0 + 0 x38 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 5 6 )
2007-10-02 12:03:09 +04:00
subcc % g 1 , 6 4 , % g 1
bne,p t % X C C , 8 b
2006-03-06 03:41:56 +03:00
add % o 0 , 6 4 , % o 0
2007-10-02 12:03:09 +04:00
ba,p t % X C C , 6 0 f
add % i 1 , % i 4 , % i 1
2016-10-25 05:32:12 +03:00
9 : EX_ L D ( L O A D _ T W I N ( % i 1 + % o 4 , % o 2 , % o 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 )
2007-10-02 12:03:09 +04:00
MIX_ T H R E E _ W O R D S ( % g 3 , % o 2 , % o 3 , % i 5 , G L O B A L _ S P A R E , % o 1 )
LOAD( p r e f e t c h , % i 1 + % i 3 , #o n e _ r e a d )
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E _ I N I T ( % g 3 , % o 0 + 0 x00 ) , N G _ r e t _ i 2 _ p l u s _ g 1 )
EX_ S T ( S T O R E _ I N I T ( % o 2 , % o 0 + 0 x08 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 8 )
2007-10-02 12:03:09 +04:00
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D _ T W I N ( % i 1 + % o 5 , % g 2 , % g 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 1 6 )
2007-10-02 12:03:09 +04:00
MIX_ T H R E E _ W O R D S ( % o 3 , % g 2 , % g 3 , % i 5 , G L O B A L _ S P A R E , % o 1 )
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E _ I N I T ( % o 3 , % o 0 + 0 x10 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 1 6 )
EX_ S T ( S T O R E _ I N I T ( % g 2 , % o 0 + 0 x18 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 2 4 )
2007-10-02 12:03:09 +04:00
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D _ T W I N ( % i 1 + % o 7 , % o 2 , % o 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 3 2 )
2007-10-02 12:03:09 +04:00
MIX_ T H R E E _ W O R D S ( % g 3 , % o 2 , % o 3 , % i 5 , G L O B A L _ S P A R E , % o 1 )
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E _ I N I T ( % g 3 , % o 0 + 0 x20 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 3 2 )
EX_ S T ( S T O R E _ I N I T ( % o 2 , % o 0 + 0 x28 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 0 )
2007-10-02 12:03:09 +04:00
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D _ T W I N ( % i 1 + % i 3 , % g 2 , % g 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 8 )
2007-10-02 12:03:09 +04:00
add % i 1 , 6 4 , % i 1
MIX_ T H R E E _ W O R D S ( % o 3 , % g 2 , % g 3 , % i 5 , G L O B A L _ S P A R E , % o 1 )
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E _ I N I T ( % o 3 , % o 0 + 0 x30 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 8 )
EX_ S T ( S T O R E _ I N I T ( % g 2 , % o 0 + 0 x38 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 5 6 )
2007-10-02 12:03:09 +04:00
subcc % g 1 , 6 4 , % g 1
bne,p t % X C C , 9 b
add % o 0 , 6 4 , % o 0
2006-03-06 03:41:56 +03:00
ba,p t % X C C , 6 0 f
2007-10-02 12:03:09 +04:00
add % i 1 , % i 4 , % i 1
2006-03-06 03:41:56 +03:00
10 : / * Destination i s 6 4 - b y t e a l i g n e d , s o u r c e w a s o n l y 8 - b y t e
* aligned b u t i t h a s b e e n s u b t r a c t e d b y 8 a n d w e p e r f o r m
* one t w i n l o a d a h e a d , t h e n a d d 8 b a c k i n t o s o u r c e w h e n
* we f i n i s h t h e l o o p .
* /
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D _ T W I N ( % i 1 , % o 4 , % o 5 ) , N G _ r e t _ i 2 _ p l u s _ g 1 )
2007-10-02 12:03:09 +04:00
mov 1 6 , % o 7
mov 3 2 , % g 2
mov 4 8 , % g 3
mov 6 4 , % o 1
2016-10-25 05:32:12 +03:00
1 : EX_ L D ( L O A D _ T W I N ( % i 1 + % o 7 , % o 2 , % o 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 )
2007-10-02 12:03:09 +04:00
LOAD( p r e f e t c h , % i 1 + % o 1 , #o n e _ r e a d )
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E _ I N I T ( % o 5 , % o 0 + 0 x00 ) , N G _ r e t _ i 2 _ p l u s _ g 1 ) ! i n i t i a l i z e s c a c h e l i n e
EX_ S T ( S T O R E _ I N I T ( % o 2 , % o 0 + 0 x08 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 8 )
EX_ L D ( L O A D _ T W I N ( % i 1 + % g 2 , % o 4 , % o 5 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 1 6 )
EX_ S T ( S T O R E _ I N I T ( % o 3 , % o 0 + 0 x10 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 1 6 )
EX_ S T ( S T O R E _ I N I T ( % o 4 , % o 0 + 0 x18 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 2 4 )
EX_ L D ( L O A D _ T W I N ( % i 1 + % g 3 , % o 2 , % o 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 3 2 )
EX_ S T ( S T O R E _ I N I T ( % o 5 , % o 0 + 0 x20 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 3 2 )
EX_ S T ( S T O R E _ I N I T ( % o 2 , % o 0 + 0 x28 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 0 )
EX_ L D ( L O A D _ T W I N ( % i 1 + % o 1 , % o 4 , % o 5 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 8 )
2007-10-02 12:03:09 +04:00
add % i 1 , 6 4 , % i 1
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E _ I N I T ( % o 3 , % o 0 + 0 x30 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 8 )
EX_ S T ( S T O R E _ I N I T ( % o 4 , % o 0 + 0 x38 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 5 6 )
2006-03-06 03:41:56 +03:00
subcc % g 1 , 6 4 , % g 1
bne,p t % X C C , 1 b
add % o 0 , 6 4 , % o 0
ba,p t % X C C , 6 0 f
2007-10-02 12:03:09 +04:00
add % i 1 , 0 x8 , % i 1
2006-03-06 03:41:56 +03:00
50 : / * Destination i s 6 4 - b y t e a l i g n e d , a n d s o u r c e i s 1 6 - b y t e
* aligned.
* /
2007-10-02 12:03:09 +04:00
mov 1 6 , % o 7
mov 3 2 , % g 2
mov 4 8 , % g 3
mov 6 4 , % o 1
2016-10-25 05:32:12 +03:00
1 : EX_ L D ( L O A D _ T W I N ( % i 1 + % g 0 , % o 4 , % o 5 ) , N G _ r e t _ i 2 _ p l u s _ g 1 )
EX_ L D ( L O A D _ T W I N ( % i 1 + % o 7 , % o 2 , % o 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 )
2007-10-02 12:03:09 +04:00
LOAD( p r e f e t c h , % i 1 + % o 1 , #o n e _ r e a d )
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E _ I N I T ( % o 4 , % o 0 + 0 x00 ) , N G _ r e t _ i 2 _ p l u s _ g 1 ) ! i n i t i a l i z e s c a c h e l i n e
EX_ S T ( S T O R E _ I N I T ( % o 5 , % o 0 + 0 x08 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 8 )
EX_ L D ( L O A D _ T W I N ( % i 1 + % g 2 , % o 4 , % o 5 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 1 6 )
EX_ S T ( S T O R E _ I N I T ( % o 2 , % o 0 + 0 x10 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 1 6 )
EX_ S T ( S T O R E _ I N I T ( % o 3 , % o 0 + 0 x18 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 2 4 )
EX_ L D ( L O A D _ T W I N ( % i 1 + % g 3 , % o 2 , % o 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 3 2 )
2007-10-02 12:03:09 +04:00
add % i 1 , 6 4 , % i 1
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E _ I N I T ( % o 4 , % o 0 + 0 x20 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 3 2 )
EX_ S T ( S T O R E _ I N I T ( % o 5 , % o 0 + 0 x28 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 0 )
EX_ S T ( S T O R E _ I N I T ( % o 2 , % o 0 + 0 x30 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 4 8 )
EX_ S T ( S T O R E _ I N I T ( % o 3 , % o 0 + 0 x38 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ m i n u s _ 5 6 )
2006-03-06 03:41:56 +03:00
subcc % g 1 , 6 4 , % g 1
bne,p t % X C C , 1 b
add % o 0 , 6 4 , % o 0
/* fall through */
60 :
2007-03-19 23:27:33 +03:00
membar #S y n c
2007-10-02 12:03:09 +04:00
/ * % i2 c o n t a i n s a n y f i n a l b y t e s s t i l l n e e d e d t o b e c o p i e d
2006-03-06 03:41:56 +03:00
* over. I f a n y t h i n g i s l e f t , w e c o p y i t o n e b y t e a t a t i m e .
* /
2007-10-02 12:03:09 +04:00
RESTORE_ A S I ( % i 3 )
brz,p t % i 2 , 8 5 f
sub % o 0 , % i 1 , % i 3
2006-03-06 03:41:56 +03:00
ba,a ,p t % X C C , 9 0 f
arch/sparc: Avoid DCTI Couples
Avoid un-intended DCTI Couples. Use of DCTI couples is deprecated.
Also address the "Programming Note" for optimal performance.
Here is the complete text from Oracle SPARC Architecture Specs.
6.3.4.7 DCTI Couples
"A delayed control transfer instruction (DCTI) in the delay slot of
another DCTI is referred to as a “DCTI couple”. The use of DCTI couples
is deprecated in the Oracle SPARC Architecture; no new software should
place a DCTI in the delay slot of another DCTI, because on future Oracle
SPARC Architecture implementations DCTI couples may execute either
slowly or differently than the programmer assumes it will.
SPARC V8 and SPARC V9 Compatibility Note
The SPARC V8 architecture left behavior undefined for a DCTI couple. The
SPARC V9 architecture defined behavior in that case, but as of
UltraSPARC Architecture 2005, use of DCTI couples was deprecated.
Software should not expect high performance from DCTI couples, and
performance of DCTI couples should be expected to decline further in
future processors.
Programming Note
As noted in TABLE 6-5 on page 115, an annulled branch-always
(branch-always with a = 1) instruction is not architecturally a DCTI.
However, since not all implementations make that distinction, for
optimal performance, a DCTI should not be placed in the instruction word
immediately following an annulled branch-always instruction (BA,A or
BPA,A)."
Signed-off-by: Babu Moger <babu.moger@oracle.com>
Reviewed-by: Rob Gardner <rob.gardner@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-17 23:52:21 +03:00
nop
2006-03-06 03:41:56 +03:00
.align 64
70 : /* 16 < len <= 64 */
bne,p n % X C C , 7 5 f
2007-10-02 12:03:09 +04:00
sub % o 0 , % i 1 , % i 3
2006-03-06 03:41:56 +03:00
72 :
2007-10-02 12:03:09 +04:00
andn % i 2 , 0 x f , % i 4
and % i 2 , 0 x f , % i 2
1 : subcc % i 4 , 0 x10 , % i 4
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D ( l d x , % i 1 , % o 4 ) , N G _ r e t _ i 2 _ p l u s _ i 4 )
2007-10-02 12:03:09 +04:00
add % i 1 , 0 x08 , % i 1
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D ( l d x , % i 1 , % g 1 ) , N G _ r e t _ i 2 _ p l u s _ i 4 )
2007-10-02 12:03:09 +04:00
sub % i 1 , 0 x08 , % i 1
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E ( s t x , % o 4 , % i 1 + % i 3 ) , N G _ r e t _ i 2 _ p l u s _ i 4 )
2007-10-02 12:03:09 +04:00
add % i 1 , 0 x8 , % i 1
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E ( s t x , % g 1 , % i 1 + % i 3 ) , N G _ r e t _ i 2 _ p l u s _ i 4 _ m i n u s _ 8 )
2006-03-06 03:41:56 +03:00
bgu,p t % X C C , 1 b
2007-10-02 12:03:09 +04:00
add % i 1 , 0 x8 , % i 1
73 : andcc % i 2 , 0 x8 , % g 0
2006-03-06 03:41:56 +03:00
be,p t % X C C , 1 f
nop
2007-10-02 12:03:09 +04:00
sub % i 2 , 0 x8 , % i 2
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D ( l d x , % i 1 , % o 4 ) , N G _ r e t _ i 2 _ p l u s _ 8 )
EX_ S T ( S T O R E ( s t x , % o 4 , % i 1 + % i 3 ) , N G _ r e t _ i 2 _ p l u s _ 8 )
2007-10-02 12:03:09 +04:00
add % i 1 , 0 x8 , % i 1
1 : andcc % i 2 , 0 x4 , % g 0
2006-03-06 03:41:56 +03:00
be,p t % X C C , 1 f
nop
2007-10-02 12:03:09 +04:00
sub % i 2 , 0 x4 , % i 2
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D ( l d u w , % i 1 , % i 5 ) , N G _ r e t _ i 2 _ p l u s _ 4 )
EX_ S T ( S T O R E ( s t w , % i 5 , % i 1 + % i 3 ) , N G _ r e t _ i 2 _ p l u s _ 4 )
2007-10-02 12:03:09 +04:00
add % i 1 , 0 x4 , % i 1
1 : cmp % i 2 , 0
2006-03-06 03:41:56 +03:00
be,p t % X C C , 8 5 f
nop
ba,p t % x c c , 9 0 f
nop
75 :
andcc % o 0 , 0 x7 , % g 1
sub % g 1 , 0 x8 , % g 1
be,p n % i c c , 2 f
sub % g 0 , % g 1 , % g 1
2007-10-02 12:03:09 +04:00
sub % i 2 , % g 1 , % i 2
2006-03-06 03:41:56 +03:00
1 : subcc % g 1 , 1 , % g 1
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D ( l d u b , % i 1 , % i 5 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ p l u s _ 1 )
EX_ S T ( S T O R E ( s t b , % i 5 , % i 1 + % i 3 ) , N G _ r e t _ i 2 _ p l u s _ g 1 _ p l u s _ 1 )
2006-03-06 03:41:56 +03:00
bgu,p t % i c c , 1 b
2007-10-02 12:03:09 +04:00
add % i 1 , 1 , % i 1
2006-03-06 03:41:56 +03:00
2007-10-02 12:03:09 +04:00
2 : add % i 1 , % i 3 , % o 0
andcc % i 1 , 0 x7 , % g 1
2006-03-06 03:41:56 +03:00
bne,p t % i c c , 8 f
sll % g 1 , 3 , % g 1
2007-10-02 12:03:09 +04:00
cmp % i 2 , 1 6
2006-03-06 03:41:56 +03:00
bgeu,p t % i c c , 7 2 b
nop
ba,a ,p t % x c c , 7 3 b
2007-10-02 12:03:09 +04:00
8 : mov 6 4 , % i 3
andn % i 1 , 0 x7 , % i 1
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D ( l d x , % i 1 , % g 2 ) , N G _ r e t _ i 2 )
2007-10-02 12:03:09 +04:00
sub % i 3 , % g 1 , % i 3
andn % i 2 , 0 x7 , % i 4
2006-03-06 03:41:56 +03:00
sllx % g 2 , % g 1 , % g 2
2007-10-02 12:03:09 +04:00
1 : add % i 1 , 0 x8 , % i 1
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D ( l d x , % i 1 , % g 3 ) , N G _ r e t _ i 2 _ a n d _ 7 _ p l u s _ i 4 )
2007-10-02 12:03:09 +04:00
subcc % i 4 , 0 x8 , % i 4
srlx % g 3 , % i 3 , % i 5
or % i 5 , % g 2 , % i 5
2016-10-25 05:32:12 +03:00
EX_ S T ( S T O R E ( s t x , % i 5 , % o 0 ) , N G _ r e t _ i 2 _ a n d _ 7 _ p l u s _ i 4 )
2006-03-06 03:41:56 +03:00
add % o 0 , 0 x8 , % o 0
bgu,p t % i c c , 1 b
sllx % g 3 , % g 1 , % g 2
srl % g 1 , 3 , % g 1
2007-10-02 12:03:09 +04:00
andcc % i 2 , 0 x7 , % i 2
2006-03-06 03:41:56 +03:00
be,p n % i c c , 8 5 f
2007-10-02 12:03:09 +04:00
add % i 1 , % g 1 , % i 1
2006-03-06 03:41:56 +03:00
ba,p t % x c c , 9 0 f
2007-10-02 12:03:09 +04:00
sub % o 0 , % i 1 , % i 3
2006-03-06 03:41:56 +03:00
.align 64
80 : /* 0 < len <= 16 */
2007-10-02 12:03:09 +04:00
andcc % i 3 , 0 x3 , % g 0
2006-03-06 03:41:56 +03:00
bne,p n % X C C , 9 0 f
2007-10-02 12:03:09 +04:00
sub % o 0 , % i 1 , % i 3
2006-03-06 03:41:56 +03:00
1 :
2007-10-02 12:03:09 +04:00
subcc % i 2 , 4 , % i 2
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D ( l d u w , % i 1 , % g 1 ) , N G _ r e t _ i 2 _ p l u s _ 4 )
EX_ S T ( S T O R E ( s t w , % g 1 , % i 1 + % i 3 ) , N G _ r e t _ i 2 _ p l u s _ 4 )
2006-03-06 03:41:56 +03:00
bgu,p t % X C C , 1 b
2007-10-02 12:03:09 +04:00
add % i 1 , 4 , % i 1
2006-03-06 03:41:56 +03:00
2007-10-02 12:03:09 +04:00
85 : ret
restore E X _ R E T V A L ( % i 0 ) , % g 0 , % o 0
2006-03-06 03:41:56 +03:00
.align 32
90 :
2007-10-02 12:03:09 +04:00
subcc % i 2 , 1 , % i 2
2016-10-25 05:32:12 +03:00
EX_ L D ( L O A D ( l d u b , % i 1 , % g 1 ) , N G _ r e t _ i 2 _ p l u s _ 1 )
EX_ S T ( S T O R E ( s t b , % g 1 , % i 1 + % i 3 ) , N G _ r e t _ i 2 _ p l u s _ 1 )
2006-03-06 03:41:56 +03:00
bgu,p t % X C C , 9 0 b
2007-10-02 12:03:09 +04:00
add % i 1 , 1 , % i 1
ret
restore E X _ R E T V A L ( % i 0 ) , % g 0 , % o 0
2006-03-06 03:41:56 +03:00
.size FUNC_ N A M E , . - F U N C _ N A M E