2019-06-04 10:11:33 +02:00
/* SPDX-License-Identifier: GPL-2.0-only */
2014-11-21 13:39:25 +05:30
/ *
* Copyright ( C ) 2 0 1 4 - 1 5 S y n o p s y s , I n c . ( w w w . s y n o p s y s . c o m )
* /
# include < l i n u x / l i n k a g e . h >
# ifdef _ _ L I T T L E _ E N D I A N _ _
# define S H I F T _ 1 ( R X ,R Y ,I M M ) a s l R X , R Y , I M M ; <<
# define S H I F T _ 2 ( R X ,R Y ,I M M ) l s r R X , R Y , I M M ; >>
# define M E R G E _ 1 ( R X ,R Y ,I M M ) a s l R X , R Y , I M M
# define M E R G E _ 2 ( R X ,R Y ,I M M )
# define E X T R A C T _ 1 ( R X ,R Y ,I M M ) a n d R X , R Y , 0 x F F F F
# define E X T R A C T _ 2 ( R X ,R Y ,I M M ) l s r R X , R Y , I M M
# else
# define S H I F T _ 1 ( R X ,R Y ,I M M ) l s r R X , R Y , I M M ; >>
# define S H I F T _ 2 ( R X ,R Y ,I M M ) a s l R X , R Y , I M M ; <<
# define M E R G E _ 1 ( R X ,R Y ,I M M ) a s l R X , R Y , I M M ; <<
# define M E R G E _ 2 ( R X ,R Y ,I M M ) a s l R X , R Y , I M M ; <<
# define E X T R A C T _ 1 ( R X ,R Y ,I M M ) l s r R X , R Y , I M M
# define E X T R A C T _ 2 ( R X ,R Y ,I M M ) l s r R X , R Y , 0 x08
# endif
# ifdef C O N F I G _ A R C _ H A S _ L L 6 4
# define L O A D X ( D S T ,R X ) l d d . a b D S T , [ R X , 8 ]
# define S T O R E X ( S R C ,R X ) s t d . a b S R C , [ R X , 8 ]
# define Z O L S H F T 5
# define Z O L A N D 0 x1 F
# else
# define L O A D X ( D S T ,R X ) l d . a b D S T , [ R X , 4 ]
# define S T O R E X ( S R C ,R X ) s t . a b S R C , [ R X , 4 ]
# define Z O L S H F T 4
# define Z O L A N D 0 x F
# endif
2016-09-19 16:42:25 -07:00
ENTRY_ C F I ( m e m c p y )
2014-11-21 13:39:25 +05:30
mov. f 0 , r2
;;; if size is zero
jz. d [ b l i n k ]
mov r3 , r0 ; don;t clobber ret val
;;; if size <= 8
cmp r2 , 8
2015-10-29 19:36:03 +05:30
bls. d @.Lsmallchunk
2014-11-21 13:39:25 +05:30
mov. f l p _ c o u n t , r2
and. f r4 , r0 , 0 x03
rsub l p _ c o u n t , r4 , 4
2015-10-29 19:36:03 +05:30
lpnz @.Laligndestination
2014-11-21 13:39:25 +05:30
;; LOOP BEGIN
ldb. a b r5 , [ r1 ,1 ]
sub r2 , r2 , 1
stb. a b r5 , [ r3 ,1 ]
2015-10-29 19:36:03 +05:30
.Laligndestination :
2014-11-21 13:39:25 +05:30
;;; Check the alignment of the source
and. f r4 , r1 , 0 x03
2015-10-29 19:36:03 +05:30
bnz. d @.Lsourceunaligned
2014-11-21 13:39:25 +05:30
;;; CASE 0: Both source and destination are 32bit aligned
;;; Convert len to Dwords, unfold x4
lsr. f l p _ c o u n t , r2 , Z O L S H F T
2015-10-29 19:36:03 +05:30
lpnz @.Lcopy32_64bytes
2014-11-21 13:39:25 +05:30
;; LOOP START
LOADX ( r6 , r1 )
LOADX ( r8 , r1 )
LOADX ( r10 , r1 )
LOADX ( r4 , r1 )
STOREX ( r6 , r3 )
STOREX ( r8 , r3 )
STOREX ( r10 , r3 )
STOREX ( r4 , r3 )
2015-10-29 19:36:03 +05:30
.Lcopy32_64bytes :
2014-11-21 13:39:25 +05:30
and. f l p _ c o u n t , r2 , Z O L A N D ;Last remaining 31 bytes
2015-10-29 19:36:03 +05:30
.Lsmallchunk :
lpnz @.Lcopyremainingbytes
2014-11-21 13:39:25 +05:30
;; LOOP START
ldb. a b r5 , [ r1 ,1 ]
stb. a b r5 , [ r3 ,1 ]
2015-10-29 19:36:03 +05:30
.Lcopyremainingbytes :
2014-11-21 13:39:25 +05:30
j [ b l i n k ]
;;; END CASE 0
2015-10-29 19:36:03 +05:30
.Lsourceunaligned :
2014-11-21 13:39:25 +05:30
cmp r4 , 2
2015-10-29 19:36:03 +05:30
beq. d @.LunalignedOffby2
2014-11-21 13:39:25 +05:30
sub r2 , r2 , 1
2015-10-29 19:36:03 +05:30
bhi. d @.LunalignedOffby3
2014-11-21 13:39:25 +05:30
ldb. a b r5 , [ r1 , 1 ]
;;; CASE 1: The source is unaligned, off by 1
;; Hence I need to read 1 byte for a 16bit alignment
;; and 2bytes to reach 32bit alignment
ldh. a b r6 , [ r1 , 2 ]
sub r2 , r2 , 2
;; Convert to words, unfold x2
lsr. f l p _ c o u n t , r2 , 3
MERGE_ 1 ( r6 , r6 , 8 )
MERGE_ 2 ( r5 , r5 , 2 4 )
or r5 , r5 , r6
;; Both src and dst are aligned
2015-10-29 19:36:03 +05:30
lpnz @.Lcopy8bytes_1
2014-11-21 13:39:25 +05:30
;; LOOP START
ld. a b r6 , [ r1 , 4 ]
ld. a b r8 , [ r1 ,4 ]
SHIFT_ 1 ( r7 , r6 , 2 4 )
or r7 , r7 , r5
SHIFT_ 2 ( r5 , r6 , 8 )
SHIFT_ 1 ( r9 , r8 , 2 4 )
or r9 , r9 , r5
SHIFT_ 2 ( r5 , r8 , 8 )
st. a b r7 , [ r3 , 4 ]
st. a b r9 , [ r3 , 4 ]
2015-10-29 19:36:03 +05:30
.Lcopy8bytes_1 :
2014-11-21 13:39:25 +05:30
;; Write back the remaining 16bits
EXTRACT_ 1 ( r6 , r5 , 1 6 )
sth. a b r6 , [ r3 , 2 ]
;; Write back the remaining 8bits
EXTRACT_ 2 ( r5 , r5 , 1 6 )
stb. a b r5 , [ r3 , 1 ]
and. f l p _ c o u n t , r2 , 0 x07 ;Last 8bytes
2015-10-29 19:36:03 +05:30
lpnz @.Lcopybytewise_1
2014-11-21 13:39:25 +05:30
;; LOOP START
ldb. a b r6 , [ r1 ,1 ]
stb. a b r6 , [ r3 ,1 ]
2015-10-29 19:36:03 +05:30
.Lcopybytewise_1 :
2014-11-21 13:39:25 +05:30
j [ b l i n k ]
2015-10-29 19:36:03 +05:30
.LunalignedOffby2 :
2014-11-21 13:39:25 +05:30
;;; CASE 2: The source is unaligned, off by 2
ldh. a b r5 , [ r1 , 2 ]
sub r2 , r2 , 1
;; Both src and dst are aligned
;; Convert to words, unfold x2
lsr. f l p _ c o u n t , r2 , 3
# ifdef _ _ B I G _ E N D I A N _ _
asl. n z r5 , r5 , 1 6
# endif
2015-10-29 19:36:03 +05:30
lpnz @.Lcopy8bytes_2
2014-11-21 13:39:25 +05:30
;; LOOP START
ld. a b r6 , [ r1 , 4 ]
ld. a b r8 , [ r1 ,4 ]
SHIFT_ 1 ( r7 , r6 , 1 6 )
or r7 , r7 , r5
SHIFT_ 2 ( r5 , r6 , 1 6 )
SHIFT_ 1 ( r9 , r8 , 1 6 )
or r9 , r9 , r5
SHIFT_ 2 ( r5 , r8 , 1 6 )
st. a b r7 , [ r3 , 4 ]
st. a b r9 , [ r3 , 4 ]
2015-10-29 19:36:03 +05:30
.Lcopy8bytes_2 :
2014-11-21 13:39:25 +05:30
# ifdef _ _ B I G _ E N D I A N _ _
lsr. n z r5 , r5 , 1 6
# endif
sth. a b r5 , [ r3 , 2 ]
and. f l p _ c o u n t , r2 , 0 x07 ;Last 8bytes
2015-10-29 19:36:03 +05:30
lpnz @.Lcopybytewise_2
2014-11-21 13:39:25 +05:30
;; LOOP START
ldb. a b r6 , [ r1 ,1 ]
stb. a b r6 , [ r3 ,1 ]
2015-10-29 19:36:03 +05:30
.Lcopybytewise_2 :
2014-11-21 13:39:25 +05:30
j [ b l i n k ]
2015-10-29 19:36:03 +05:30
.LunalignedOffby3 :
2014-11-21 13:39:25 +05:30
;;; CASE 3: The source is unaligned, off by 3
;;; Hence, I need to read 1byte for achieve the 32bit alignment
;; Both src and dst are aligned
;; Convert to words, unfold x2
lsr. f l p _ c o u n t , r2 , 3
# ifdef _ _ B I G _ E N D I A N _ _
asl. n e r5 , r5 , 2 4
# endif
2015-10-29 19:36:03 +05:30
lpnz @.Lcopy8bytes_3
2014-11-21 13:39:25 +05:30
;; LOOP START
ld. a b r6 , [ r1 , 4 ]
ld. a b r8 , [ r1 ,4 ]
SHIFT_ 1 ( r7 , r6 , 8 )
or r7 , r7 , r5
SHIFT_ 2 ( r5 , r6 , 2 4 )
SHIFT_ 1 ( r9 , r8 , 8 )
or r9 , r9 , r5
SHIFT_ 2 ( r5 , r8 , 2 4 )
st. a b r7 , [ r3 , 4 ]
st. a b r9 , [ r3 , 4 ]
2015-10-29 19:36:03 +05:30
.Lcopy8bytes_3 :
2014-11-21 13:39:25 +05:30
# ifdef _ _ B I G _ E N D I A N _ _
lsr. n z r5 , r5 , 2 4
# endif
stb. a b r5 , [ r3 , 1 ]
and. f l p _ c o u n t , r2 , 0 x07 ;Last 8bytes
2015-10-29 19:36:03 +05:30
lpnz @.Lcopybytewise_3
2014-11-21 13:39:25 +05:30
;; LOOP START
ldb. a b r6 , [ r1 ,1 ]
stb. a b r6 , [ r3 ,1 ]
2015-10-29 19:36:03 +05:30
.Lcopybytewise_3 :
2014-11-21 13:39:25 +05:30
j [ b l i n k ]
2016-09-19 16:42:25 -07:00
END_ C F I ( m e m c p y )