2005-04-17 02:20:36 +04:00
/* Generate assembler source containing symbol information
*
* Copyright 2002 by Kai Germaschewski
*
* This software may be used and distributed according to the terms
* of the GNU General Public License , incorporated herein by reference .
*
* Usage : nm - n vmlinux | scripts / kallsyms [ - - all - symbols ] > symbols . S
*
* ChangeLog :
*
* ( 25 / Aug / 2004 ) Paulo Marques < pmarques @ grupopie . com >
* Changed the compression method from stem compression to " table lookup "
* compression
*
* Table compression uses all the unused char codes on the symbols and
* maps these to the most used substrings ( tokens ) . For instance , it might
* map char code 0xF7 to represent " write_ " and then in every symbol where
* " write_ " appears it can be replaced by 0xF7 , saving 5 bytes .
* The used codes themselves are also placed in the table so that the
* decompresion can work without " special cases " .
* Applied to kernel symbols , this usually produces a compression ratio
* of about 50 % .
*
*/
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <ctype.h>
2007-07-17 15:03:51 +04:00
# define KSYM_NAME_LEN 128
2005-04-17 02:20:36 +04:00
struct sym_entry {
unsigned long long addr ;
2005-09-07 02:16:31 +04:00
unsigned int len ;
2005-04-17 02:20:36 +04:00
unsigned char * sym ;
} ;
static struct sym_entry * table ;
2005-09-07 02:16:31 +04:00
static unsigned int table_size , table_cnt ;
2008-02-06 12:36:26 +03:00
static unsigned long long _text , _stext , _etext , _sinittext , _einittext ;
2005-04-17 02:20:36 +04:00
static int all_symbols = 0 ;
2005-05-01 19:59:06 +04:00
static char symbol_prefix_char = ' \0 ' ;
2005-04-17 02:20:36 +04:00
2005-09-07 02:16:31 +04:00
int token_profit [ 0x10000 ] ;
2005-04-17 02:20:36 +04:00
/* the table that holds the result of the compression */
2005-09-07 02:16:31 +04:00
unsigned char best_table [ 256 ] [ 2 ] ;
2005-04-17 02:20:36 +04:00
unsigned char best_table_len [ 256 ] ;
2005-09-07 02:16:31 +04:00
static void usage ( void )
2005-04-17 02:20:36 +04:00
{
2005-05-01 19:59:06 +04:00
fprintf ( stderr , " Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S \n " ) ;
2005-04-17 02:20:36 +04:00
exit ( 1 ) ;
}
/*
* This ignores the intensely annoying " mapping symbols " found
* in ARM ELF files : $ a , $ t and $ d .
*/
2005-09-07 02:16:31 +04:00
static inline int is_arm_mapping_symbol ( const char * str )
2005-04-17 02:20:36 +04:00
{
return str [ 0 ] = = ' $ ' & & strchr ( " atd " , str [ 1 ] )
& & ( str [ 2 ] = = ' \0 ' | | str [ 2 ] = = ' . ' ) ;
}
2005-09-07 02:16:31 +04:00
static int read_symbol ( FILE * in , struct sym_entry * s )
2005-04-17 02:20:36 +04:00
{
char str [ 500 ] ;
2005-09-07 02:16:31 +04:00
char * sym , stype ;
2005-04-17 02:20:36 +04:00
int rc ;
2005-09-07 02:16:31 +04:00
rc = fscanf ( in , " %llx %c %499s \n " , & s - > addr , & stype , str ) ;
2005-04-17 02:20:36 +04:00
if ( rc ! = 3 ) {
if ( rc ! = EOF ) {
/* skip line */
fgets ( str , 500 , in ) ;
}
return - 1 ;
}
2005-05-01 19:59:06 +04:00
sym = str ;
/* skip prefix char */
if ( symbol_prefix_char & & str [ 0 ] = = symbol_prefix_char )
sym + + ;
2005-04-17 02:20:36 +04:00
/* Ignore most absolute/undefined (?) symbols. */
2006-12-07 04:14:04 +03:00
if ( strcmp ( sym , " _text " ) = = 0 )
_text = s - > addr ;
else if ( strcmp ( sym , " _stext " ) = = 0 )
2005-04-17 02:20:36 +04:00
_stext = s - > addr ;
2005-05-01 19:59:06 +04:00
else if ( strcmp ( sym , " _etext " ) = = 0 )
2005-04-17 02:20:36 +04:00
_etext = s - > addr ;
2005-05-01 19:59:06 +04:00
else if ( strcmp ( sym , " _sinittext " ) = = 0 )
2005-04-17 02:20:36 +04:00
_sinittext = s - > addr ;
2005-05-01 19:59:06 +04:00
else if ( strcmp ( sym , " _einittext " ) = = 0 )
2005-04-17 02:20:36 +04:00
_einittext = s - > addr ;
2005-09-07 02:16:31 +04:00
else if ( toupper ( stype ) = = ' A ' )
2005-04-17 02:20:36 +04:00
{
/* Keep these useful absolute symbols */
2005-05-01 19:59:06 +04:00
if ( strcmp ( sym , " __kernel_syscall_via_break " ) & &
strcmp ( sym , " __kernel_syscall_via_epc " ) & &
strcmp ( sym , " __kernel_sigtramp " ) & &
strcmp ( sym , " __gp " ) )
2005-04-17 02:20:36 +04:00
return - 1 ;
}
2005-09-07 02:16:31 +04:00
else if ( toupper ( stype ) = = ' U ' | |
2005-05-01 19:59:06 +04:00
is_arm_mapping_symbol ( sym ) )
2005-04-17 02:20:36 +04:00
return - 1 ;
2005-09-07 02:16:41 +04:00
/* exclude also MIPS ELF local symbols ($L123 instead of .L123) */
else if ( str [ 0 ] = = ' $ ' )
return - 1 ;
2005-04-17 02:20:36 +04:00
/* include the type field in the symbol name, so that it gets
* compressed together */
s - > len = strlen ( str ) + 1 ;
2005-09-07 02:16:31 +04:00
s - > sym = malloc ( s - > len + 1 ) ;
2006-03-25 14:07:46 +03:00
if ( ! s - > sym ) {
fprintf ( stderr , " kallsyms failure: "
" unable to allocate required amount of memory \n " ) ;
exit ( EXIT_FAILURE ) ;
}
2005-09-07 02:16:31 +04:00
strcpy ( ( char * ) s - > sym + 1 , str ) ;
s - > sym [ 0 ] = stype ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2005-09-07 02:16:31 +04:00
static int symbol_valid ( struct sym_entry * s )
2005-04-17 02:20:36 +04:00
{
/* Symbols which vary between passes. Passes 1 and 2 must have
* identical symbol lists . The kallsyms_ * symbols below are only added
* after pass 1 , they would be included in pass 2 when - - all - symbols is
* specified so exclude them to get a stable symbol list .
*/
static char * special_symbols [ ] = {
" kallsyms_addresses " ,
" kallsyms_num_syms " ,
" kallsyms_names " ,
" kallsyms_markers " ,
" kallsyms_token_table " ,
" kallsyms_token_index " ,
/* Exclude linker generated symbols which vary between passes */
" _SDA_BASE_ " , /* ppc */
" _SDA2_BASE_ " , /* ppc */
NULL } ;
int i ;
2005-05-01 19:59:06 +04:00
int offset = 1 ;
/* skip prefix char */
if ( symbol_prefix_char & & * ( s - > sym + 1 ) = = symbol_prefix_char )
offset + + ;
2005-04-17 02:20:36 +04:00
/* if --all-symbols is not specified, then symbols outside the text
* and inittext sections are discarded */
if ( ! all_symbols ) {
if ( ( s - > addr < _stext | | s - > addr > _etext )
2008-02-06 12:36:26 +03:00
& & ( s - > addr < _sinittext | | s - > addr > _einittext ) )
2005-04-17 02:20:36 +04:00
return 0 ;
/* Corner case. Discard any symbols with the same value as
2008-02-06 12:36:26 +03:00
* _etext _einittext ; they can move between pass 1 and 2 when
* the kallsyms data are added . If these symbols move then
* they may get dropped in pass 2 , which breaks the kallsyms
* rules .
2005-04-17 02:20:36 +04:00
*/
2008-02-06 12:36:26 +03:00
if ( ( s - > addr = = _etext & &
strcmp ( ( char * ) s - > sym + offset , " _etext " ) ) | |
( s - > addr = = _einittext & &
strcmp ( ( char * ) s - > sym + offset , " _einittext " ) ) )
2005-04-17 02:20:36 +04:00
return 0 ;
}
/* Exclude symbols which vary between passes. */
2005-09-07 02:16:31 +04:00
if ( strstr ( ( char * ) s - > sym + offset , " _compiled. " ) )
2005-04-17 02:20:36 +04:00
return 0 ;
for ( i = 0 ; special_symbols [ i ] ; i + + )
2005-09-07 02:16:31 +04:00
if ( strcmp ( ( char * ) s - > sym + offset , special_symbols [ i ] ) = = 0 )
2005-04-17 02:20:36 +04:00
return 0 ;
return 1 ;
}
2005-09-07 02:16:31 +04:00
static void read_map ( FILE * in )
2005-04-17 02:20:36 +04:00
{
while ( ! feof ( in ) ) {
2005-09-07 02:16:31 +04:00
if ( table_cnt > = table_size ) {
table_size + = 10000 ;
table = realloc ( table , sizeof ( * table ) * table_size ) ;
2005-04-17 02:20:36 +04:00
if ( ! table ) {
fprintf ( stderr , " out of memory \n " ) ;
exit ( 1 ) ;
}
}
2005-09-07 02:16:31 +04:00
if ( read_symbol ( in , & table [ table_cnt ] ) = = 0 )
table_cnt + + ;
2005-04-17 02:20:36 +04:00
}
}
static void output_label ( char * label )
{
2005-05-01 19:59:06 +04:00
if ( symbol_prefix_char )
printf ( " .globl %c%s \n " , symbol_prefix_char , label ) ;
else
printf ( " .globl %s \n " , label ) ;
2005-04-17 02:20:36 +04:00
printf ( " \t ALGN \n " ) ;
2005-05-01 19:59:06 +04:00
if ( symbol_prefix_char )
printf ( " %c%s: \n " , symbol_prefix_char , label ) ;
else
printf ( " %s: \n " , label ) ;
2005-04-17 02:20:36 +04:00
}
/* uncompress a compressed symbol. When this function is called, the best table
* might still be compressed itself , so the function needs to be recursive */
static int expand_symbol ( unsigned char * data , int len , char * result )
{
int c , rlen , total = 0 ;
while ( len ) {
c = * data ;
/* if the table holds a single char that is the same as the one
* we are looking for , then end the search */
if ( best_table [ c ] [ 0 ] = = c & & best_table_len [ c ] = = 1 ) {
* result + + = c ;
total + + ;
} else {
/* if not, recurse and expand */
rlen = expand_symbol ( best_table [ c ] , best_table_len [ c ] , result ) ;
total + = rlen ;
result + = rlen ;
}
data + + ;
len - - ;
}
* result = 0 ;
return total ;
}
2005-09-07 02:16:31 +04:00
static void write_src ( void )
2005-04-17 02:20:36 +04:00
{
2005-09-07 02:16:31 +04:00
unsigned int i , k , off ;
2005-04-17 02:20:36 +04:00
unsigned int best_idx [ 256 ] ;
unsigned int * markers ;
2007-07-17 15:03:51 +04:00
char buf [ KSYM_NAME_LEN ] ;
2005-04-17 02:20:36 +04:00
printf ( " #include <asm/types.h> \n " ) ;
printf ( " #if BITS_PER_LONG == 64 \n " ) ;
printf ( " #define PTR .quad \n " ) ;
printf ( " #define ALGN .align 8 \n " ) ;
printf ( " #else \n " ) ;
printf ( " #define PTR .long \n " ) ;
printf ( " #define ALGN .align 4 \n " ) ;
printf ( " #endif \n " ) ;
2006-12-08 13:35:57 +03:00
printf ( " \t .section .rodata, \" a \" \n " ) ;
2005-04-17 02:20:36 +04:00
2006-12-07 04:14:04 +03:00
/* Provide proper symbols relocatability by their '_text'
* relativeness . The symbol names cannot be used to construct
* normal symbol references as the list of symbols contains
* symbols that are declared static and are private to their
* . o files . This prevents . tmp_kallsyms . o or any other
* object from referencing them .
*/
2005-04-17 02:20:36 +04:00
output_label ( " kallsyms_addresses " ) ;
2005-09-07 02:16:31 +04:00
for ( i = 0 ; i < table_cnt ; i + + ) {
2006-12-07 04:14:04 +03:00
if ( toupper ( table [ i ] . sym [ 0 ] ) ! = ' A ' ) {
2006-12-07 04:14:10 +03:00
if ( _text < = table [ i ] . addr )
printf ( " \t PTR \t _text + %#llx \n " ,
table [ i ] . addr - _text ) ;
else
printf ( " \t PTR \t _text - %#llx \n " ,
_text - table [ i ] . addr ) ;
2006-12-07 04:14:04 +03:00
} else {
printf ( " \t PTR \t %#llx \n " , table [ i ] . addr ) ;
}
2005-04-17 02:20:36 +04:00
}
printf ( " \n " ) ;
output_label ( " kallsyms_num_syms " ) ;
2005-09-07 02:16:31 +04:00
printf ( " \t PTR \t %d \n " , table_cnt ) ;
2005-04-17 02:20:36 +04:00
printf ( " \n " ) ;
/* table of offset markers, that give the offset in the compressed stream
* every 256 symbols */
2006-03-25 14:07:46 +03:00
markers = malloc ( sizeof ( unsigned int ) * ( ( table_cnt + 255 ) / 256 ) ) ;
if ( ! markers ) {
fprintf ( stderr , " kallsyms failure: "
" unable to allocate required memory \n " ) ;
exit ( EXIT_FAILURE ) ;
}
2005-04-17 02:20:36 +04:00
output_label ( " kallsyms_names " ) ;
off = 0 ;
2005-09-07 02:16:31 +04:00
for ( i = 0 ; i < table_cnt ; i + + ) {
if ( ( i & 0xFF ) = = 0 )
markers [ i > > 8 ] = off ;
2005-04-17 02:20:36 +04:00
printf ( " \t .byte 0x%02x " , table [ i ] . len ) ;
for ( k = 0 ; k < table [ i ] . len ; k + + )
printf ( " , 0x%02x " , table [ i ] . sym [ k ] ) ;
printf ( " \n " ) ;
off + = table [ i ] . len + 1 ;
}
printf ( " \n " ) ;
output_label ( " kallsyms_markers " ) ;
2005-09-07 02:16:31 +04:00
for ( i = 0 ; i < ( ( table_cnt + 255 ) > > 8 ) ; i + + )
2005-04-17 02:20:36 +04:00
printf ( " \t PTR \t %d \n " , markers [ i ] ) ;
printf ( " \n " ) ;
free ( markers ) ;
output_label ( " kallsyms_token_table " ) ;
off = 0 ;
for ( i = 0 ; i < 256 ; i + + ) {
best_idx [ i ] = off ;
2005-09-07 02:16:31 +04:00
expand_symbol ( best_table [ i ] , best_table_len [ i ] , buf ) ;
2005-04-17 02:20:36 +04:00
printf ( " \t .asciz \t \" %s \" \n " , buf ) ;
off + = strlen ( buf ) + 1 ;
}
printf ( " \n " ) ;
output_label ( " kallsyms_token_index " ) ;
for ( i = 0 ; i < 256 ; i + + )
printf ( " \t .short \t %d \n " , best_idx [ i ] ) ;
printf ( " \n " ) ;
}
/* table lookup compression functions */
/* count all the possible tokens in a symbol */
static void learn_symbol ( unsigned char * symbol , int len )
{
int i ;
for ( i = 0 ; i < len - 1 ; i + + )
2005-09-07 02:16:31 +04:00
token_profit [ symbol [ i ] + ( symbol [ i + 1 ] < < 8 ) ] + + ;
2005-04-17 02:20:36 +04:00
}
/* decrease the count for all the possible tokens in a symbol */
static void forget_symbol ( unsigned char * symbol , int len )
{
int i ;
for ( i = 0 ; i < len - 1 ; i + + )
2005-09-07 02:16:31 +04:00
token_profit [ symbol [ i ] + ( symbol [ i + 1 ] < < 8 ) ] - - ;
2005-04-17 02:20:36 +04:00
}
2005-09-07 02:16:31 +04:00
/* remove all the invalid symbols from the table and do the initial token count */
2005-04-17 02:20:36 +04:00
static void build_initial_tok_table ( void )
{
2005-09-07 02:16:31 +04:00
unsigned int i , pos ;
2005-04-17 02:20:36 +04:00
2005-09-07 02:16:31 +04:00
pos = 0 ;
for ( i = 0 ; i < table_cnt ; i + + ) {
2005-04-17 02:20:36 +04:00
if ( symbol_valid ( & table [ i ] ) ) {
2005-09-07 02:16:31 +04:00
if ( pos ! = i )
table [ pos ] = table [ i ] ;
learn_symbol ( table [ pos ] . sym , table [ pos ] . len ) ;
pos + + ;
2005-04-17 02:20:36 +04:00
}
}
2005-09-07 02:16:31 +04:00
table_cnt = pos ;
2005-04-17 02:20:36 +04:00
}
2007-06-20 21:09:00 +04:00
static void * find_token ( unsigned char * str , int len , unsigned char * token )
{
int i ;
for ( i = 0 ; i < len - 1 ; i + + ) {
if ( str [ i ] = = token [ 0 ] & & str [ i + 1 ] = = token [ 1 ] )
return & str [ i ] ;
}
return NULL ;
}
2005-04-17 02:20:36 +04:00
/* replace a given token in all the valid symbols. Use the sampled symbols
* to update the counts */
2005-09-07 02:16:31 +04:00
static void compress_symbols ( unsigned char * str , int idx )
2005-04-17 02:20:36 +04:00
{
2005-09-07 02:16:31 +04:00
unsigned int i , len , size ;
unsigned char * p1 , * p2 ;
2005-04-17 02:20:36 +04:00
2005-09-07 02:16:31 +04:00
for ( i = 0 ; i < table_cnt ; i + + ) {
2005-04-17 02:20:36 +04:00
len = table [ i ] . len ;
2005-09-07 02:16:31 +04:00
p1 = table [ i ] . sym ;
/* find the token on the symbol */
2007-06-20 21:09:00 +04:00
p2 = find_token ( p1 , len , str ) ;
2005-09-07 02:16:31 +04:00
if ( ! p2 ) continue ;
/* decrease the counts for this symbol's tokens */
forget_symbol ( table [ i ] . sym , len ) ;
size = len ;
2005-04-17 02:20:36 +04:00
do {
2005-09-07 02:16:31 +04:00
* p2 = idx ;
p2 + + ;
size - = ( p2 - p1 ) ;
memmove ( p2 , p2 + 1 , size ) ;
p1 = p2 ;
len - - ;
if ( size < 2 ) break ;
2005-04-17 02:20:36 +04:00
/* find the token on the symbol */
2007-06-20 21:09:00 +04:00
p2 = find_token ( p1 , size , str ) ;
2005-04-17 02:20:36 +04:00
2005-09-07 02:16:31 +04:00
} while ( p2 ) ;
2005-04-17 02:20:36 +04:00
2005-09-07 02:16:31 +04:00
table [ i ] . len = len ;
2005-04-17 02:20:36 +04:00
2005-09-07 02:16:31 +04:00
/* increase the counts for this symbol's new tokens */
learn_symbol ( table [ i ] . sym , len ) ;
2005-04-17 02:20:36 +04:00
}
}
/* search the token with the maximum profit */
2005-09-07 02:16:31 +04:00
static int find_best_token ( void )
2005-04-17 02:20:36 +04:00
{
2005-09-07 02:16:31 +04:00
int i , best , bestprofit ;
2005-04-17 02:20:36 +04:00
bestprofit = - 10000 ;
2005-09-07 02:16:31 +04:00
best = 0 ;
2005-04-17 02:20:36 +04:00
2005-09-07 02:16:31 +04:00
for ( i = 0 ; i < 0x10000 ; i + + ) {
if ( token_profit [ i ] > bestprofit ) {
best = i ;
bestprofit = token_profit [ i ] ;
2005-04-17 02:20:36 +04:00
}
}
return best ;
}
/* this is the core of the algorithm: calculate the "best" table */
static void optimize_result ( void )
{
2005-09-07 02:16:31 +04:00
int i , best ;
2005-04-17 02:20:36 +04:00
/* using the '\0' symbol last allows compress_symbols to use standard
* fast string functions */
for ( i = 255 ; i > = 0 ; i - - ) {
/* if this table slot is empty (it is not used by an actual
* original char code */
if ( ! best_table_len [ i ] ) {
/* find the token with the breates profit value */
best = find_best_token ( ) ;
/* place it in the "best" table */
2005-09-07 02:16:31 +04:00
best_table_len [ i ] = 2 ;
best_table [ i ] [ 0 ] = best & 0xFF ;
best_table [ i ] [ 1 ] = ( best > > 8 ) & 0xFF ;
2005-04-17 02:20:36 +04:00
/* replace this token in all the valid symbols */
2005-09-07 02:16:31 +04:00
compress_symbols ( best_table [ i ] , i ) ;
2005-04-17 02:20:36 +04:00
}
}
}
/* start by placing the symbols that are actually used on the table */
static void insert_real_symbols_in_table ( void )
{
2005-09-07 02:16:31 +04:00
unsigned int i , j , c ;
2005-04-17 02:20:36 +04:00
memset ( best_table , 0 , sizeof ( best_table ) ) ;
memset ( best_table_len , 0 , sizeof ( best_table_len ) ) ;
2005-09-07 02:16:31 +04:00
for ( i = 0 ; i < table_cnt ; i + + ) {
for ( j = 0 ; j < table [ i ] . len ; j + + ) {
c = table [ i ] . sym [ j ] ;
best_table [ c ] [ 0 ] = c ;
best_table_len [ c ] = 1 ;
2005-04-17 02:20:36 +04:00
}
}
}
static void optimize_token_table ( void )
{
build_initial_tok_table ( ) ;
insert_real_symbols_in_table ( ) ;
2005-05-01 19:59:06 +04:00
/* When valid symbol is not registered, exit to error */
2005-09-07 02:16:31 +04:00
if ( ! table_cnt ) {
2005-05-01 19:59:06 +04:00
fprintf ( stderr , " No valid symbol. \n " ) ;
exit ( 1 ) ;
}
2005-04-17 02:20:36 +04:00
optimize_result ( ) ;
}
2005-09-07 02:16:31 +04:00
int main ( int argc , char * * argv )
2005-04-17 02:20:36 +04:00
{
2005-05-01 19:59:06 +04:00
if ( argc > = 2 ) {
int i ;
for ( i = 1 ; i < argc ; i + + ) {
if ( strcmp ( argv [ i ] , " --all-symbols " ) = = 0 )
all_symbols = 1 ;
else if ( strncmp ( argv [ i ] , " --symbol-prefix= " , 16 ) = = 0 ) {
char * p = & argv [ i ] [ 16 ] ;
/* skip quote */
if ( ( * p = = ' " ' & & * ( p + 2 ) = = ' " ' ) | | ( * p = = ' \' ' & & * ( p + 2 ) = = ' \' ' ) )
p + + ;
symbol_prefix_char = * p ;
} else
usage ( ) ;
}
} else if ( argc ! = 1 )
2005-04-17 02:20:36 +04:00
usage ( ) ;
read_map ( stdin ) ;
optimize_token_table ( ) ;
write_src ( ) ;
return 0 ;
}