2005-04-16 15:20:36 -07:00
/ * linux/ a r c h / s p a r c / l i b / m e m s e t . S : S p a r c o p t i m i z e d m e m s e t , b z e r o a n d c l e a r _ u s e r c o d e
* Copyright ( C ) 1 9 9 1 ,1 9 9 6 F r e e S o f t w a r e F o u n d a t i o n
* Copyright ( C ) 1 9 9 6 ,1 9 9 7 J a k u b J e l i n e k ( j j @sunsite.mff.cuni.cz)
* Copyright ( C ) 1 9 9 6 D a v i d S . M i l l e r ( d a v e m @caip.rutgers.edu)
*
* Returns 0 , i f o k , a n d n u m b e r o f b y t e s n o t y e t s e t i f e x c e p t i o n
* occurs a n d w e w e r e c a l l e d a s c l e a r _ u s e r .
* /
# include < a s m / p t r a c e . h >
/* Work around cpp -rob */
# define A L L O C #a l l o c
# define E X E C I N S T R #e x e c i n s t r
# define E X ( x ,y ,a ,b ) \
98 : x,y ; \
.section .fixup , ALLOC,E X E C I N S T R ; \
.align 4 ; \
99 : ba 3 0 f ; \
a, b , % o 0 ; \
.section _ _ ex_ t a b l e ,A L L O C ; \
.align 4 ; \
.word 9 8 b, 9 9 b ; \
.text ; \
.align 4
# define E X T ( s t a r t ,e n d ,h a n d l e r ) \
.section _ _ ex_ t a b l e ,A L L O C ; \
.align 4 ; \
.word start, 0 , e n d , h a n d l e r ; \
.text ; \
.align 4
/ * Please d o n ' t c h a n g e t h e s e m a c r o s , u n l e s s y o u c h a n g e t h e l o g i c
* in t h e . f i x u p s e c t i o n b e l o w a s w e l l .
* Store 6 4 b y t e s a t ( B A S E + O F F S E T ) u s i n g v a l u e S O U R C E . * /
# define Z E R O _ B I G _ B L O C K ( b a s e , o f f s e t , s o u r c e ) \
std s o u r c e , [ b a s e + o f f s e t + 0 x00 ] ; \
std s o u r c e , [ b a s e + o f f s e t + 0 x08 ] ; \
std s o u r c e , [ b a s e + o f f s e t + 0 x10 ] ; \
std s o u r c e , [ b a s e + o f f s e t + 0 x18 ] ; \
std s o u r c e , [ b a s e + o f f s e t + 0 x20 ] ; \
std s o u r c e , [ b a s e + o f f s e t + 0 x28 ] ; \
std s o u r c e , [ b a s e + o f f s e t + 0 x30 ] ; \
std s o u r c e , [ b a s e + o f f s e t + 0 x38 ] ;
# define Z E R O _ L A S T _ B L O C K S ( b a s e , o f f s e t , s o u r c e ) \
std s o u r c e , [ b a s e - o f f s e t - 0 x38 ] ; \
std s o u r c e , [ b a s e - o f f s e t - 0 x30 ] ; \
std s o u r c e , [ b a s e - o f f s e t - 0 x28 ] ; \
std s o u r c e , [ b a s e - o f f s e t - 0 x20 ] ; \
std s o u r c e , [ b a s e - o f f s e t - 0 x18 ] ; \
std s o u r c e , [ b a s e - o f f s e t - 0 x10 ] ; \
std s o u r c e , [ b a s e - o f f s e t - 0 x08 ] ; \
std s o u r c e , [ b a s e - o f f s e t - 0 x00 ] ;
.text
.align 4
.globl __bzero_begin
__bzero_begin :
2009-12-10 23:32:10 -08:00
.globl __bzero
2005-04-16 15:20:36 -07:00
.globl memset
.globl _ _ memset_ s t a r t , _ _ m e m s e t _ e n d
__memset_start :
memset :
and % o 1 , 0 x f f , % g 3
sll % g 3 , 8 , % g 2
or % g 3 , % g 2 , % g 3
sll % g 3 , 1 6 , % g 2
or % g 3 , % g 2 , % g 3
b 1 f
mov % o 2 , % o 1
3 :
cmp % o 2 , 3
be 2 f
EX( s t b % g 3 , [ % o 0 ] , s u b % o 1 , 0 )
cmp % o 2 , 2
be 2 f
EX( s t b % g 3 , [ % o 0 + 0 x01 ] , s u b % o 1 , 1 )
EX( s t b % g 3 , [ % o 0 + 0 x02 ] , s u b % o 1 , 2 )
2 :
sub % o 2 , 4 , % o 2
add % o 1 , % o 2 , % o 1
b 4 f
sub % o 0 , % o 2 , % o 0
__bzero :
mov % g 0 , % g 3
1 :
cmp % o 1 , 7
bleu 7 f
andcc % o 0 , 3 , % o 2
bne 3 b
4 :
andcc % o 0 , 4 , % g 0
be 2 f
mov % g 3 , % g 2
EX( s t % g 3 , [ % o 0 ] , s u b % o 1 , 0 )
sub % o 1 , 4 , % o 1
add % o 0 , 4 , % o 0
2 :
andcc % o 1 , 0 x f f f f f f80 , % o 3 ! N o w e v e r y t h i n g i s 8 a l i g n e d a n d o 1 i s l e n t o r u n
be 9 f
andcc % o 1 , 0 x78 , % o 2
10 :
ZERO_ B I G _ B L O C K ( % o 0 , 0 x00 , % g 2 )
subcc % o 3 , 1 2 8 , % o 3
ZERO_ B I G _ B L O C K ( % o 0 , 0 x40 , % g 2 )
11 :
EXT( 1 0 b , 1 1 b , 2 0 f )
bne 1 0 b
add % o 0 , 1 2 8 , % o 0
orcc % o 2 , % g 0 , % g 0
9 :
be 1 3 f
andcc % o 1 , 7 , % o 1
srl % o 2 , 1 , % o 3
set 1 3 f , % o 4
sub % o 4 , % o 3 , % o 4
jmp % o 4
add % o 0 , % o 2 , % o 0
12 :
ZERO_ L A S T _ B L O C K S ( % o 0 , 0 x48 , % g 2 )
ZERO_ L A S T _ B L O C K S ( % o 0 , 0 x08 , % g 2 )
13 :
be 8 f
andcc % o 1 , 4 , % g 0
be 1 f
andcc % o 1 , 2 , % g 0
EX( s t % g 3 , [ % o 0 ] , a n d % o 1 , 7 )
add % o 0 , 4 , % o 0
1 :
be 1 f
andcc % o 1 , 1 , % g 0
EX( s t h % g 3 , [ % o 0 ] , a n d % o 1 , 3 )
add % o 0 , 2 , % o 0
1 :
bne,a 8 f
EX( s t b % g 3 , [ % o 0 ] , a n d % o 1 , 1 )
8 :
retl
clr % o 0
7 :
be 1 3 b
orcc % o 1 , 0 , % g 0
be 0 f
8 :
add % o 0 , 1 , % o 0
subcc % o 1 , 1 , % o 1
[SPARC32]: Fix bug in sparc optimized memset.
Sparc optimized memset (arch/sparc/lib/memset.S) does not fill last
byte of the memory area, if area size is less than 8 bytes and start
address is not word (4-bytes) aligned.
Here is code chunk where bug located:
/* %o0 - memory address, %o1 - size, %g3 - value */
8:
add %o0, 1, %o0
subcc %o1, 1, %o1
bne,a 8b
stb %g3, [%o0 - 1]
This code should write byte every loop iteration, but last time delay
instruction stb is not executed because branch instruction sets
"annul" bit.
Patch replaces bne,a by bne instruction.
Error can be reproduced by simple kernel module:
--------------------
#include <linux/module.h>
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <string.h>
static void do_memset(void **p, int size)
{
memset(p, 0x00, size);
}
static int __init memset_test_init(void)
{
char fooc[8];
int *fooi;
memset(fooc, 0xba, sizeof(fooc));
do_memset((void**)(fooc + 3), 1);
fooi = (int*) fooc;
printk("%08X %08X\n", fooi[0], fooi[1]);
return -1;
}
static void __exit memset_test_cleanup(void)
{
return;
}
module_init(memset_test_init);
module_exit(memset_test_cleanup);
MODULE_LICENSE("GPL");
EXPORT_NO_SYMBOLS;
--------------------
Signed-off-by: Alexander Shmelev <ashmelev@task.sun.mcst.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
2007-07-24 13:41:44 -07:00
bne 8 b
2005-04-16 15:20:36 -07:00
EX( s t b % g 3 , [ % o 0 - 1 ] , a d d % o 1 , 1 )
0 :
retl
clr % o 0
__memset_end :
.section .fixup , # alloc,#e x e c i n s t r
.align 4
20 :
cmp % g 2 , 8
bleu 1 f
and % o 1 , 0 x7 f , % o 1
sub % g 2 , 9 , % g 2
add % o 3 , 6 4 , % o 3
1 :
sll % g 2 , 3 , % g 2
add % o 3 , % o 1 , % o 0
b 3 0 f
sub % o 0 , % g 2 , % o 0
21 :
mov 8 , % o 0
and % o 1 , 7 , % o 1
sub % o 0 , % g 2 , % o 0
sll % o 0 , 3 , % o 0
b 3 0 f
add % o 0 , % o 1 , % o 0
30 :
/* %o4 is faulting address, %o5 is %pc where fault occurred */
save % s p , - 1 0 4 , % s p
mov % i 5 , % o 0
mov % i 7 , % o 1
call l o o k u p _ f a u l t
mov % i 4 , % o 2
ret
restore
.globl __bzero_end
__bzero_end :