2005-04-16 15:20:36 -07:00
/ *
* linux/ a r c h / a r m / l i b / l i b1 f u n c s . S : O p t i m i z e d A R M d i v i s i o n r o u t i n e s
*
2009-09-14 03:25:28 -04:00
* Author : Nicolas P i t r e < n i c o @fluxnic.net>
2005-04-16 15:20:36 -07:00
* - contributed t o g c c - 3 . 4 o n S e p 3 0 , 2 0 0 3
* - adapted f o r t h e L i n u x k e r n e l o n O c t 2 , 2 0 0 3
* /
/ * Copyright 1 9 9 5 , 1 9 9 6 , 1 9 9 8 , 1 9 9 9 , 2 0 0 0 , 2 0 0 3 F r e e S o f t w a r e F o u n d a t i o n , I n c .
This f i l e i s f r e e s o f t w a r e ; you can redistribute it and/or modify it
under t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e a s p u b l i s h e d b y t h e
Free S o f t w a r e F o u n d a t i o n ; either version 2, or (at your option) any
later v e r s i o n .
In a d d i t i o n t o t h e p e r m i s s i o n s i n t h e G N U G e n e r a l P u b l i c L i c e n s e , t h e
Free S o f t w a r e F o u n d a t i o n g i v e s y o u u n l i m i t e d p e r m i s s i o n t o l i n k t h e
compiled v e r s i o n o f t h i s f i l e i n t o c o m b i n a t i o n s w i t h o t h e r p r o g r a m s ,
and t o d i s t r i b u t e t h o s e c o m b i n a t i o n s w i t h o u t a n y r e s t r i c t i o n c o m i n g
from t h e u s e o f t h i s f i l e . ( T h e G e n e r a l P u b l i c L i c e n s e r e s t r i c t i o n s
do a p p l y i n o t h e r r e s p e c t s ; for example, they cover modification of
the f i l e , a n d d i s t r i b u t i o n w h e n n o t l i n k e d i n t o a c o m b i n e
executable. )
This f i l e i s d i s t r i b u t e d i n t h e h o p e t h a t i t w i l l b e u s e f u l , b u t
WITHOUT A N Y W A R R A N T Y ; without even the implied warranty of
MERCHANTABILITY o r F I T N E S S F O R A P A R T I C U L A R P U R P O S E . S e e t h e G N U
General P u b l i c L i c e n s e f o r m o r e d e t a i l s .
You s h o u l d h a v e r e c e i v e d a c o p y o f t h e G N U G e n e r a l P u b l i c L i c e n s e
along w i t h t h i s p r o g r a m ; see the file COPYING. If not, write to
the F r e e S o f t w a r e F o u n d a t i o n , 5 9 T e m p l e P l a c e - S u i t e 3 3 0 ,
Boston, M A 0 2 1 1 1 - 1 3 0 7 , U S A . * /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
.macro ARM_DIV_BODY dividend, d i v i s o r , r e s u l t , c u r b i t
# if _ _ L I N U X _ A R M _ A R C H _ _ > = 5
clz \ c u r b i t , \ d i v i s o r
clz \ r e s u l t , \ d i v i d e n d
sub \ r e s u l t , \ c u r b i t , \ r e s u l t
mov \ c u r b i t , #1
mov \ d i v i s o r , \ d i v i s o r , l s l \ r e s u l t
mov \ c u r b i t , \ c u r b i t , l s l \ r e s u l t
mov \ r e s u l t , #0
# else
@ Initially shift the divisor left 3 bits if possible,
@ set curbit accordingly. This allows for curbit to be located
@ at the left end of each 4 bit nibbles in the division loop
@ to save one loop in most cases.
tst \ d i v i s o r , #0xe0000000
moveq \ d i v i s o r , \ d i v i s o r , l s l #3
moveq \ c u r b i t , #8
movne \ c u r b i t , #1
@ Unless the divisor is very big, shift it up in multiples of
@ four bits, since this is the amount of unwinding in the main
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
1 : cmp \ d i v i s o r , #0x10000000
cmplo \ d i v i s o r , \ d i v i d e n d
movlo \ d i v i s o r , \ d i v i s o r , l s l #4
movlo \ c u r b i t , \ c u r b i t , l s l #4
blo 1 b
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
1 : cmp \ d i v i s o r , #0x80000000
cmplo \ d i v i s o r , \ d i v i d e n d
movlo \ d i v i s o r , \ d i v i s o r , l s l #1
movlo \ c u r b i t , \ c u r b i t , l s l #1
blo 1 b
mov \ r e s u l t , #0
# endif
@ Division loop
1 : cmp \ d i v i d e n d , \ d i v i s o r
subhs \ d i v i d e n d , \ d i v i d e n d , \ d i v i s o r
orrhs \ r e s u l t , \ r e s u l t , \ c u r b i t
cmp \ d i v i d e n d , \ d i v i s o r , l s r #1
subhs \ d i v i d e n d , \ d i v i d e n d , \ d i v i s o r , l s r #1
orrhs \ r e s u l t , \ r e s u l t , \ c u r b i t , l s r #1
cmp \ d i v i d e n d , \ d i v i s o r , l s r #2
subhs \ d i v i d e n d , \ d i v i d e n d , \ d i v i s o r , l s r #2
orrhs \ r e s u l t , \ r e s u l t , \ c u r b i t , l s r #2
cmp \ d i v i d e n d , \ d i v i s o r , l s r #3
subhs \ d i v i d e n d , \ d i v i d e n d , \ d i v i s o r , l s r #3
orrhs \ r e s u l t , \ r e s u l t , \ c u r b i t , l s r #3
cmp \ d i v i d e n d , #0 @ Early termination?
movnes \ c u r b i t , \ c u r b i t , l s r #4 @ No, any more bits to do?
movne \ d i v i s o r , \ d i v i s o r , l s r #4
bne 1 b
.endm
.macro ARM_DIV2_ORDER divisor, o r d e r
# if _ _ L I N U X _ A R M _ A R C H _ _ > = 5
clz \ o r d e r , \ d i v i s o r
rsb \ o r d e r , \ o r d e r , #31
# else
cmp \ d i v i s o r , #( 1 < < 1 6 )
movhs \ d i v i s o r , \ d i v i s o r , l s r #16
movhs \ o r d e r , #16
movlo \ o r d e r , #0
cmp \ d i v i s o r , #( 1 < < 8 )
movhs \ d i v i s o r , \ d i v i s o r , l s r #8
addhs \ o r d e r , \ o r d e r , #8
cmp \ d i v i s o r , #( 1 < < 4 )
movhs \ d i v i s o r , \ d i v i s o r , l s r #4
addhs \ o r d e r , \ o r d e r , #4
cmp \ d i v i s o r , #( 1 < < 2 )
addhi \ o r d e r , \ o r d e r , #3
addls \ o r d e r , \ o r d e r , \ d i v i s o r , l s r #1
# endif
.endm
.macro ARM_MOD_BODY dividend, d i v i s o r , o r d e r , s p a r e
# if _ _ L I N U X _ A R M _ A R C H _ _ > = 5
clz \ o r d e r , \ d i v i s o r
clz \ s p a r e , \ d i v i d e n d
sub \ o r d e r , \ o r d e r , \ s p a r e
mov \ d i v i s o r , \ d i v i s o r , l s l \ o r d e r
# else
mov \ o r d e r , #0
@ Unless the divisor is very big, shift it up in multiples of
@ four bits, since this is the amount of unwinding in the main
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
1 : cmp \ d i v i s o r , #0x10000000
cmplo \ d i v i s o r , \ d i v i d e n d
movlo \ d i v i s o r , \ d i v i s o r , l s l #4
addlo \ o r d e r , \ o r d e r , #4
blo 1 b
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
1 : cmp \ d i v i s o r , #0x80000000
cmplo \ d i v i s o r , \ d i v i d e n d
movlo \ d i v i s o r , \ d i v i s o r , l s l #1
addlo \ o r d e r , \ o r d e r , #1
blo 1 b
# endif
@ Perform all needed substractions to keep only the reminder.
@ Do comparisons in batch of 4 first.
subs \ o r d e r , \ o r d e r , #3 @ yes, 3 is intended here
blt 2 f
1 : cmp \ d i v i d e n d , \ d i v i s o r
subhs \ d i v i d e n d , \ d i v i d e n d , \ d i v i s o r
cmp \ d i v i d e n d , \ d i v i s o r , l s r #1
subhs \ d i v i d e n d , \ d i v i d e n d , \ d i v i s o r , l s r #1
cmp \ d i v i d e n d , \ d i v i s o r , l s r #2
subhs \ d i v i d e n d , \ d i v i d e n d , \ d i v i s o r , l s r #2
cmp \ d i v i d e n d , \ d i v i s o r , l s r #3
subhs \ d i v i d e n d , \ d i v i d e n d , \ d i v i s o r , l s r #3
cmp \ d i v i d e n d , #1
mov \ d i v i s o r , \ d i v i s o r , l s r #4
subges \ o r d e r , \ o r d e r , #4
bge 1 b
tst \ o r d e r , #3
teqne \ d i v i d e n d , #0
beq 5 f
@ Either 1, 2 or 3 comparison/substractions are left.
2 : cmn \ o r d e r , #2
blt 4 f
beq 3 f
cmp \ d i v i d e n d , \ d i v i s o r
subhs \ d i v i d e n d , \ d i v i d e n d , \ d i v i s o r
mov \ d i v i s o r , \ d i v i s o r , l s r #1
3 : cmp \ d i v i d e n d , \ d i v i s o r
subhs \ d i v i d e n d , \ d i v i d e n d , \ d i v i s o r
mov \ d i v i s o r , \ d i v i s o r , l s r #1
4 : cmp \ d i v i d e n d , \ d i v i s o r
subhs \ d i v i d e n d , \ d i v i d e n d , \ d i v i s o r
5 :
.endm
ENTRY( _ _ u d i v s i 3 )
2006-01-14 16:18:29 +00:00
ENTRY( _ _ a e a b i _ u i d i v )
2005-04-16 15:20:36 -07:00
subs r2 , r1 , #1
moveq p c , l r
bcc L d i v0
cmp r0 , r1
bls 1 1 f
tst r1 , r2
beq 1 2 f
ARM_ D I V _ B O D Y r0 , r1 , r2 , r3
mov r0 , r2
mov p c , l r
11 : moveq r0 , #1
movne r0 , #0
mov p c , l r
12 : ARM_ D I V 2 _ O R D E R r1 , r2
mov r0 , r0 , l s r r2
mov p c , l r
2008-08-28 11:22:32 +01:00
ENDPROC( _ _ u d i v s i 3 )
ENDPROC( _ _ a e a b i _ u i d i v )
2005-04-16 15:20:36 -07:00
ENTRY( _ _ u m o d s i 3 )
subs r2 , r1 , #1 @ compare divisor with 1
bcc L d i v0
cmpne r0 , r1 @ compare dividend with divisor
moveq r0 , #0
tsthi r1 , r2 @ see if divisor is power of 2
andeq r0 , r0 , r2
movls p c , l r
ARM_ M O D _ B O D Y r0 , r1 , r2 , r3
mov p c , l r
2008-08-28 11:22:32 +01:00
ENDPROC( _ _ u m o d s i 3 )
2005-04-16 15:20:36 -07:00
ENTRY( _ _ d i v s i 3 )
2006-01-14 16:18:29 +00:00
ENTRY( _ _ a e a b i _ i d i v )
2005-04-16 15:20:36 -07:00
cmp r1 , #0
eor i p , r0 , r1 @ save the sign of the result.
beq L d i v0
rsbmi r1 , r1 , #0 @ loops below use unsigned.
subs r2 , r1 , #1 @ division by 1 or -1 ?
beq 1 0 f
movs r3 , r0
rsbmi r3 , r0 , #0 @ positive dividend value
cmp r3 , r1
bls 1 1 f
tst r1 , r2 @ divisor is power of 2 ?
beq 1 2 f
ARM_ D I V _ B O D Y r3 , r1 , r0 , r2
cmp i p , #0
rsbmi r0 , r0 , #0
mov p c , l r
10 : teq i p , r0 @ same sign ?
rsbmi r0 , r0 , #0
mov p c , l r
11 : movlo r0 , #0
moveq r0 , i p , a s r #31
orreq r0 , r0 , #1
mov p c , l r
12 : ARM_ D I V 2 _ O R D E R r1 , r2
cmp i p , #0
mov r0 , r3 , l s r r2
rsbmi r0 , r0 , #0
mov p c , l r
2008-08-28 11:22:32 +01:00
ENDPROC( _ _ d i v s i 3 )
ENDPROC( _ _ a e a b i _ i d i v )
2005-04-16 15:20:36 -07:00
ENTRY( _ _ m o d s i 3 )
cmp r1 , #0
beq L d i v0
rsbmi r1 , r1 , #0 @ loops below use unsigned.
movs i p , r0 @ preserve sign of dividend
rsbmi r0 , r0 , #0 @ if negative make positive
subs r2 , r1 , #1 @ compare divisor with 1
cmpne r0 , r1 @ compare dividend with divisor
moveq r0 , #0
tsthi r1 , r2 @ see if divisor is power of 2
andeq r0 , r0 , r2
bls 1 0 f
ARM_ M O D _ B O D Y r0 , r1 , r2 , r3
10 : cmp i p , #0
rsbmi r0 , r0 , #0
mov p c , l r
2008-08-28 11:22:32 +01:00
ENDPROC( _ _ m o d s i 3 )
2006-01-14 16:18:29 +00:00
# ifdef C O N F I G _ A E A B I
ENTRY( _ _ a e a b i _ u i d i v m o d )
stmfd s p ! , { r0 , r1 , i p , l r }
bl _ _ a e a b i _ u i d i v
ldmfd s p ! , { r1 , r2 , i p , l r }
mul r3 , r0 , r2
sub r1 , r1 , r3
mov p c , l r
2008-08-28 11:22:32 +01:00
ENDPROC( _ _ a e a b i _ u i d i v m o d )
2006-01-14 16:18:29 +00:00
ENTRY( _ _ a e a b i _ i d i v m o d )
stmfd s p ! , { r0 , r1 , i p , l r }
bl _ _ a e a b i _ i d i v
ldmfd s p ! , { r1 , r2 , i p , l r }
mul r3 , r0 , r2
sub r1 , r1 , r3
mov p c , l r
2008-08-28 11:22:32 +01:00
ENDPROC( _ _ a e a b i _ i d i v m o d )
2006-01-14 16:18:29 +00:00
# endif
2005-04-16 15:20:36 -07:00
Ldiv0 :
2006-01-14 16:18:09 +00:00
str l r , [ s p , #- 8 ] !
2005-04-16 15:20:36 -07:00
bl _ _ d i v0
mov r0 , #0 @ About as wrong as it could be.
2006-01-14 16:18:09 +00:00
ldr p c , [ s p ] , #8
2005-04-16 15:20:36 -07:00