2005-04-16 15:20:36 -07:00
/ *
* linux/ a r c h / a r m / l i b / d i v64 . S
*
* Optimized c o m p u t a t i o n o f 6 4 - b i t d i v i d e n d / 3 2 - b i t d i v i s o r
*
* Author : Nicolas P i t r e
* Created : Oct 5 , 2 0 0 3
* Copyright : Monta V i s t a S o f t w a r e , I n c .
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
* it u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e v e r s i o n 2 a s
* published b y t h e F r e e S o f t w a r e F o u n d a t i o n .
* /
# include < l i n u x / l i n k a g e . h >
2011-10-06 20:53:14 +01:00
# include < a s m / u n w i n d . h >
2005-04-16 15:20:36 -07:00
# ifdef _ _ A R M E B _ _
# define x h r0
# define x l r1
# define y h r2
# define y l r3
# else
# define x l r0
# define x h r1
# define y l r2
# define y h r3
# endif
/ *
* __do_div64 : perform a d i v i s i o n w i t h 6 4 - b i t d i v i d e n d a n d 3 2 - b i t d i v i s o r .
*
* Note : Calling c o n v e n t i o n i s t o t a l l y n o n s t a n d a r d f o r o p t i m a l c o d e .
* This i s m e a n t t o b e u s e d b y d o _ d i v ( ) f r o m i n c l u d e / a s m / d i v64 . h o n l y .
*
* Input p a r a m e t e r s :
* xh- x l = d i v i d e n d ( c l o b b e r e d )
* r4 = d i v i s o r ( p r e s e r v e d )
*
* Output v a l u e s :
* yh- y l = r e s u l t
* xh = r e m a i n d e r
*
* Clobbered r e g s : x l , i p
* /
ENTRY( _ _ d o _ d i v64 )
2011-10-06 20:53:14 +01:00
UNWIND( . f n s t a r t )
2005-04-16 15:20:36 -07:00
@ Test for easy paths first.
subs i p , r4 , #1
bls 9 f @ divisor is 0 or 1
tst i p , r4
beq 8 f @ divisor is power of 2
@ See if we need to handle upper 32-bit result.
cmp x h , r4
mov y h , #0
blo 3 f
@ Align divisor with upper part of dividend.
@ The aligned divisor is stored in yl preserving the original.
@ The bit position is stored in ip.
# if _ _ L I N U X _ A R M _ A R C H _ _ > = 5
clz y l , r4
clz i p , x h
sub y l , y l , i p
mov i p , #1
mov i p , i p , l s l y l
mov y l , r4 , l s l y l
# else
mov y l , r4
mov i p , #1
1 : cmp y l , #0x80000000
cmpcc y l , x h
movcc y l , y l , l s l #1
movcc i p , i p , l s l #1
bcc 1 b
# endif
@ The division loop for needed upper bit positions.
@ Break out early if dividend reaches 0.
2 : cmp x h , y l
orrcs y h , y h , i p
subcss x h , x h , y l
movnes i p , i p , l s r #1
mov y l , y l , l s r #1
bne 2 b
@ See if we need to handle lower 32-bit result.
3 : cmp x h , #0
mov y l , #0
cmpeq x l , r4
movlo x h , x l
movlo p c , l r
@ The division loop for lower bit positions.
@ Here we shift remainer bits leftwards rather than moving the
@ divisor for comparisons, considering the carry-out bit as well.
mov i p , #0x80000000
4 : movs x l , x l , l s l #1
adcs x h , x h , x h
beq 6 f
cmpcc x h , r4
5 : orrcs y l , y l , i p
subcs x h , x h , r4
movs i p , i p , l s r #1
bne 4 b
mov p c , l r
@ The top part of remainder became zero. If carry is set
@ (the 33th bit) this is a false positive so resume the loop.
@ Otherwise, if lower part is also null then we are done.
6 : bcs 5 b
cmp x l , #0
moveq p c , l r
@ We still have remainer bits in the low part. Bring them up.
# if _ _ L I N U X _ A R M _ A R C H _ _ > = 5
clz x h , x l @ we know xh is zero here so...
add x h , x h , #1
mov x l , x l , l s l x h
mov i p , i p , l s r x h
# else
7 : movs x l , x l , l s l #1
mov i p , i p , l s r #1
bcc 7 b
# endif
@ Current remainder is now 1. It is worthless to compare with
@ divisor at this point since divisor can not be smaller than 3 here.
@ If possible, branch for another shift in the division loop.
@ If no bit position left then we are done.
movs i p , i p , l s r #1
mov x h , #1
bne 4 b
mov p c , l r
8 : @ Division by a power of 2: determine what that divisor order is
@ then simply shift values around
# if _ _ L I N U X _ A R M _ A R C H _ _ > = 5
clz i p , r4
rsb i p , i p , #31
# else
mov y l , r4
cmp r4 , #( 1 < < 1 6 )
mov i p , #0
movhs y l , y l , l s r #16
movhs i p , #16
cmp y l , #( 1 < < 8 )
movhs y l , y l , l s r #8
addhs i p , i p , #8
cmp y l , #( 1 < < 4 )
movhs y l , y l , l s r #4
addhs i p , i p , #4
cmp y l , #( 1 < < 2 )
addhi i p , i p , #3
addls i p , i p , y l , l s r #1
# endif
mov y h , x h , l s r i p
mov y l , x l , l s r i p
rsb i p , i p , #32
2009-07-24 12:32:57 +01:00
ARM( o r r y l , y l , x h , l s l i p )
THUMB( l s l x h , x h , i p )
THUMB( o r r y l , y l , x h )
2005-04-16 15:20:36 -07:00
mov x h , x l , l s l i p
mov x h , x h , l s r i p
mov p c , l r
@ eq -> division by 1: obvious enough...
9 : moveq y l , x l
moveq y h , x h
moveq x h , #0
moveq p c , l r
2011-10-06 20:53:14 +01:00
UNWIND( . f n e n d )
2005-04-16 15:20:36 -07:00
2011-10-06 20:53:14 +01:00
UNWIND( . f n s t a r t )
UNWIND( . p a d #4 )
UNWIND( . s a v e { l r } )
Ldiv0_64 :
2005-04-16 15:20:36 -07:00
@ Division by 0:
2006-05-16 11:29:46 +01:00
str l r , [ s p , #- 8 ] !
2005-04-16 15:20:36 -07:00
bl _ _ d i v0
@ as wrong as it could be...
mov y l , #0
mov y h , #0
mov x h , #0
2006-05-16 11:29:46 +01:00
ldr p c , [ s p ] , #8
2005-04-16 15:20:36 -07:00
2011-10-06 20:53:14 +01:00
UNWIND( . f n e n d )
2008-08-28 11:22:32 +01:00
ENDPROC( _ _ d o _ d i v64 )