2011-01-12 17:01:22 -08:00
/*
* Branch / Call / Jump ( BCJ ) filter decoders
*
* Authors : Lasse Collin < lasse . collin @ tukaani . org >
* Igor Pavlov < http : //7-zip.org/>
*
* This file has been put into the public domain .
* You can do whatever you want with this file .
*/
# include "xz_private.h"
/*
* The rest of the file is inside this ifdef . It makes things a little more
* convenient when building without support for any BCJ filters .
*/
# ifdef XZ_DEC_BCJ
struct xz_dec_bcj {
/* Type of the BCJ filter being used */
enum {
BCJ_X86 = 4 , /* x86 or x86-64 */
BCJ_POWERPC = 5 , /* Big endian only */
BCJ_IA64 = 6 , /* Big or little endian */
BCJ_ARM = 7 , /* Little endian only */
BCJ_ARMTHUMB = 8 , /* Little endian only */
BCJ_SPARC = 9 /* Big or little endian */
} type ;
/*
* Return value of the next filter in the chain . We need to preserve
* this information across calls , because we must not call the next
* filter anymore once it has returned XZ_STREAM_END .
*/
enum xz_ret ret ;
/* True if we are operating in single-call mode. */
bool single_call ;
/*
* Absolute position relative to the beginning of the uncompressed
* data ( in a single . xz Block ) . We care only about the lowest 32
* bits so this doesn ' t need to be uint64_t even with big files .
*/
uint32_t pos ;
/* x86 filter state */
uint32_t x86_prev_mask ;
/* Temporary space to hold the variables from struct xz_buf */
uint8_t * out ;
size_t out_pos ;
size_t out_size ;
struct {
/* Amount of already filtered data in the beginning of buf */
size_t filtered ;
/* Total amount of data currently stored in buf */
size_t size ;
/*
* Buffer to hold a mix of filtered and unfiltered data . This
* needs to be big enough to hold Alignment + 2 * Look - ahead :
*
* Type Alignment Look - ahead
* x86 1 4
* PowerPC 4 0
* IA - 64 16 0
* ARM 4 0
* ARM - Thumb 2 2
* SPARC 4 0
*/
uint8_t buf [ 16 ] ;
} temp ;
} ;
# ifdef XZ_DEC_X86
/*
* This is used to test the most significant byte of a memory address
* in an x86 instruction .
*/
static inline int bcj_x86_test_msbyte ( uint8_t b )
{
return b = = 0x00 | | b = = 0xFF ;
}
static size_t bcj_x86 ( struct xz_dec_bcj * s , uint8_t * buf , size_t size )
{
static const bool mask_to_allowed_status [ 8 ]
= { true , true , true , false , true , false , false , false } ;
static const uint8_t mask_to_bit_num [ 8 ] = { 0 , 1 , 2 , 2 , 3 , 3 , 3 , 3 } ;
size_t i ;
size_t prev_pos = ( size_t ) - 1 ;
uint32_t prev_mask = s - > x86_prev_mask ;
uint32_t src ;
uint32_t dest ;
uint32_t j ;
uint8_t b ;
if ( size < = 4 )
return 0 ;
size - = 4 ;
for ( i = 0 ; i < size ; + + i ) {
if ( ( buf [ i ] & 0xFE ) ! = 0xE8 )
continue ;
prev_pos = i - prev_pos ;
if ( prev_pos > 3 ) {
prev_mask = 0 ;
} else {
prev_mask = ( prev_mask < < ( prev_pos - 1 ) ) & 7 ;
if ( prev_mask ! = 0 ) {
b = buf [ i + 4 - mask_to_bit_num [ prev_mask ] ] ;
if ( ! mask_to_allowed_status [ prev_mask ]
| | bcj_x86_test_msbyte ( b ) ) {
prev_pos = i ;
prev_mask = ( prev_mask < < 1 ) | 1 ;
continue ;
}
}
}
prev_pos = i ;
if ( bcj_x86_test_msbyte ( buf [ i + 4 ] ) ) {
src = get_unaligned_le32 ( buf + i + 1 ) ;
while ( true ) {
dest = src - ( s - > pos + ( uint32_t ) i + 5 ) ;
if ( prev_mask = = 0 )
break ;
j = mask_to_bit_num [ prev_mask ] * 8 ;
b = ( uint8_t ) ( dest > > ( 24 - j ) ) ;
if ( ! bcj_x86_test_msbyte ( b ) )
break ;
src = dest ^ ( ( ( uint32_t ) 1 < < ( 32 - j ) ) - 1 ) ;
}
dest & = 0x01FFFFFF ;
dest | = ( uint32_t ) 0 - ( dest & 0x01000000 ) ;
put_unaligned_le32 ( dest , buf + i + 1 ) ;
i + = 4 ;
} else {
prev_mask = ( prev_mask < < 1 ) | 1 ;
}
}
prev_pos = i - prev_pos ;
s - > x86_prev_mask = prev_pos > 3 ? 0 : prev_mask < < ( prev_pos - 1 ) ;
return i ;
}
# endif
# ifdef XZ_DEC_POWERPC
static size_t bcj_powerpc ( struct xz_dec_bcj * s , uint8_t * buf , size_t size )
{
size_t i ;
uint32_t instr ;
for ( i = 0 ; i + 4 < = size ; i + = 4 ) {
instr = get_unaligned_be32 ( buf + i ) ;
if ( ( instr & 0xFC000003 ) = = 0x48000001 ) {
instr & = 0x03FFFFFC ;
instr - = s - > pos + ( uint32_t ) i ;
instr & = 0x03FFFFFC ;
instr | = 0x48000001 ;
put_unaligned_be32 ( instr , buf + i ) ;
}
}
return i ;
}
# endif
# ifdef XZ_DEC_IA64
static size_t bcj_ia64 ( struct xz_dec_bcj * s , uint8_t * buf , size_t size )
{
static const uint8_t branch_table [ 32 ] = {
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
4 , 4 , 6 , 6 , 0 , 0 , 7 , 7 ,
4 , 4 , 0 , 0 , 4 , 4 , 0 , 0
} ;
/*
* The local variables take a little bit stack space , but it ' s less
* than what LZMA2 decoder takes , so it doesn ' t make sense to reduce
* stack usage here without doing that for the LZMA2 decoder too .
*/
/* Loop counters */
size_t i ;
size_t j ;
/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
uint32_t slot ;
/* Bitwise offset of the instruction indicated by slot */
uint32_t bit_pos ;
/* bit_pos split into byte and bit parts */
uint32_t byte_pos ;
uint32_t bit_res ;
/* Address part of an instruction */
uint32_t addr ;
/* Mask used to detect which instructions to convert */
uint32_t mask ;
/* 41-bit instruction stored somewhere in the lowest 48 bits */
uint64_t instr ;
/* Instruction normalized with bit_res for easier manipulation */
uint64_t norm ;
for ( i = 0 ; i + 16 < = size ; i + = 16 ) {
mask = branch_table [ buf [ i ] & 0x1F ] ;
for ( slot = 0 , bit_pos = 5 ; slot < 3 ; + + slot , bit_pos + = 41 ) {
if ( ( ( mask > > slot ) & 1 ) = = 0 )
continue ;
byte_pos = bit_pos > > 3 ;
bit_res = bit_pos & 7 ;
instr = 0 ;
for ( j = 0 ; j < 6 ; + + j )
instr | = ( uint64_t ) ( buf [ i + j + byte_pos ] )
< < ( 8 * j ) ;
norm = instr > > bit_res ;
if ( ( ( norm > > 37 ) & 0x0F ) = = 0x05
& & ( ( norm > > 9 ) & 0x07 ) = = 0 ) {
addr = ( norm > > 13 ) & 0x0FFFFF ;
addr | = ( ( uint32_t ) ( norm > > 36 ) & 1 ) < < 20 ;
addr < < = 4 ;
addr - = s - > pos + ( uint32_t ) i ;
addr > > = 4 ;
norm & = ~ ( ( uint64_t ) 0x8FFFFF < < 13 ) ;
norm | = ( uint64_t ) ( addr & 0x0FFFFF ) < < 13 ;
norm | = ( uint64_t ) ( addr & 0x100000 )
< < ( 36 - 20 ) ;
instr & = ( 1 < < bit_res ) - 1 ;
instr | = norm < < bit_res ;
for ( j = 0 ; j < 6 ; j + + )
buf [ i + j + byte_pos ]
= ( uint8_t ) ( instr > > ( 8 * j ) ) ;
}
}
}
return i ;
}
# endif
# ifdef XZ_DEC_ARM
static size_t bcj_arm ( struct xz_dec_bcj * s , uint8_t * buf , size_t size )
{
size_t i ;
uint32_t addr ;
for ( i = 0 ; i + 4 < = size ; i + = 4 ) {
if ( buf [ i + 3 ] = = 0xEB ) {
addr = ( uint32_t ) buf [ i ] | ( ( uint32_t ) buf [ i + 1 ] < < 8 )
| ( ( uint32_t ) buf [ i + 2 ] < < 16 ) ;
addr < < = 2 ;
addr - = s - > pos + ( uint32_t ) i + 8 ;
addr > > = 2 ;
buf [ i ] = ( uint8_t ) addr ;
buf [ i + 1 ] = ( uint8_t ) ( addr > > 8 ) ;
buf [ i + 2 ] = ( uint8_t ) ( addr > > 16 ) ;
}
}
return i ;
}
# endif
# ifdef XZ_DEC_ARMTHUMB
static size_t bcj_armthumb ( struct xz_dec_bcj * s , uint8_t * buf , size_t size )
{
size_t i ;
uint32_t addr ;
for ( i = 0 ; i + 4 < = size ; i + = 2 ) {
if ( ( buf [ i + 1 ] & 0xF8 ) = = 0xF0
& & ( buf [ i + 3 ] & 0xF8 ) = = 0xF8 ) {
addr = ( ( ( uint32_t ) buf [ i + 1 ] & 0x07 ) < < 19 )
| ( ( uint32_t ) buf [ i ] < < 11 )
| ( ( ( uint32_t ) buf [ i + 3 ] & 0x07 ) < < 8 )
| ( uint32_t ) buf [ i + 2 ] ;
addr < < = 1 ;
addr - = s - > pos + ( uint32_t ) i + 4 ;
addr > > = 1 ;
buf [ i + 1 ] = ( uint8_t ) ( 0xF0 | ( ( addr > > 19 ) & 0x07 ) ) ;
buf [ i ] = ( uint8_t ) ( addr > > 11 ) ;
buf [ i + 3 ] = ( uint8_t ) ( 0xF8 | ( ( addr > > 8 ) & 0x07 ) ) ;
buf [ i + 2 ] = ( uint8_t ) addr ;
i + = 2 ;
}
}
return i ;
}
# endif
# ifdef XZ_DEC_SPARC
static size_t bcj_sparc ( struct xz_dec_bcj * s , uint8_t * buf , size_t size )
{
size_t i ;
uint32_t instr ;
for ( i = 0 ; i + 4 < = size ; i + = 4 ) {
instr = get_unaligned_be32 ( buf + i ) ;
if ( ( instr > > 22 ) = = 0x100 | | ( instr > > 22 ) = = 0x1FF ) {
instr < < = 2 ;
instr - = s - > pos + ( uint32_t ) i ;
instr > > = 2 ;
instr = ( ( uint32_t ) 0x40000000 - ( instr & 0x400000 ) )
| 0x40000000 | ( instr & 0x3FFFFF ) ;
put_unaligned_be32 ( instr , buf + i ) ;
}
}
return i ;
}
# endif
/*
* Apply the selected BCJ filter . Update * pos and s - > pos to match the amount
* of data that got filtered .
*
* NOTE : This is implemented as a switch statement to avoid using function
* pointers , which could be problematic in the kernel boot code , which must
* avoid pointers to static data ( at least on x86 ) .
*/
static void bcj_apply ( struct xz_dec_bcj * s ,
uint8_t * buf , size_t * pos , size_t size )
{
size_t filtered ;
buf + = * pos ;
size - = * pos ;
switch ( s - > type ) {
# ifdef XZ_DEC_X86
case BCJ_X86 :
filtered = bcj_x86 ( s , buf , size ) ;
break ;
# endif
# ifdef XZ_DEC_POWERPC
case BCJ_POWERPC :
filtered = bcj_powerpc ( s , buf , size ) ;
break ;
# endif
# ifdef XZ_DEC_IA64
case BCJ_IA64 :
filtered = bcj_ia64 ( s , buf , size ) ;
break ;
# endif
# ifdef XZ_DEC_ARM
case BCJ_ARM :
filtered = bcj_arm ( s , buf , size ) ;
break ;
# endif
# ifdef XZ_DEC_ARMTHUMB
case BCJ_ARMTHUMB :
filtered = bcj_armthumb ( s , buf , size ) ;
break ;
# endif
# ifdef XZ_DEC_SPARC
case BCJ_SPARC :
filtered = bcj_sparc ( s , buf , size ) ;
break ;
# endif
default :
/* Never reached but silence compiler warnings. */
filtered = 0 ;
break ;
}
* pos + = filtered ;
s - > pos + = filtered ;
}
/*
* Flush pending filtered data from temp to the output buffer .
* Move the remaining mixture of possibly filtered and unfiltered
* data to the beginning of temp .
*/
static void bcj_flush ( struct xz_dec_bcj * s , struct xz_buf * b )
{
size_t copy_size ;
copy_size = min_t ( size_t , s - > temp . filtered , b - > out_size - b - > out_pos ) ;
memcpy ( b - > out + b - > out_pos , s - > temp . buf , copy_size ) ;
b - > out_pos + = copy_size ;
s - > temp . filtered - = copy_size ;
s - > temp . size - = copy_size ;
memmove ( s - > temp . buf , s - > temp . buf + copy_size , s - > temp . size ) ;
}
/*
* The BCJ filter functions are primitive in sense that they process the
* data in chunks of 1 - 16 bytes . To hide this issue , this function does
* some buffering .
*/
XZ_EXTERN enum xz_ret xz_dec_bcj_run ( struct xz_dec_bcj * s ,
struct xz_dec_lzma2 * lzma2 ,
struct xz_buf * b )
{
size_t out_start ;
/*
* Flush pending already filtered data to the output buffer . Return
* immediatelly if we couldn ' t flush everything , or if the next
* filter in the chain had already returned XZ_STREAM_END .
*/
if ( s - > temp . filtered > 0 ) {
bcj_flush ( s , b ) ;
if ( s - > temp . filtered > 0 )
return XZ_OK ;
if ( s - > ret = = XZ_STREAM_END )
return XZ_STREAM_END ;
}
/*
* If we have more output space than what is currently pending in
* temp , copy the unfiltered data from temp to the output buffer
* and try to fill the output buffer by decoding more data from the
* next filter in the chain . Apply the BCJ filter on the new data
* in the output buffer . If everything cannot be filtered , copy it
* to temp and rewind the output buffer position accordingly .
2011-09-21 17:30:50 +03:00
*
* This needs to be always run when temp . size = = 0 to handle a special
* case where the output buffer is full and the next filter has no
* more output coming but hasn ' t returned XZ_STREAM_END yet .
2011-01-12 17:01:22 -08:00
*/
2011-09-21 17:30:50 +03:00
if ( s - > temp . size < b - > out_size - b - > out_pos | | s - > temp . size = = 0 ) {
2011-01-12 17:01:22 -08:00
out_start = b - > out_pos ;
memcpy ( b - > out + b - > out_pos , s - > temp . buf , s - > temp . size ) ;
b - > out_pos + = s - > temp . size ;
s - > ret = xz_dec_lzma2_run ( lzma2 , b ) ;
if ( s - > ret ! = XZ_STREAM_END
& & ( s - > ret ! = XZ_OK | | s - > single_call ) )
return s - > ret ;
bcj_apply ( s , b - > out , & out_start , b - > out_pos ) ;
/*
* As an exception , if the next filter returned XZ_STREAM_END ,
* we can do that too , since the last few bytes that remain
* unfiltered are meant to remain unfiltered .
*/
if ( s - > ret = = XZ_STREAM_END )
return XZ_STREAM_END ;
s - > temp . size = b - > out_pos - out_start ;
b - > out_pos - = s - > temp . size ;
memcpy ( s - > temp . buf , b - > out + b - > out_pos , s - > temp . size ) ;
2011-09-21 17:30:50 +03:00
/*
* If there wasn ' t enough input to the next filter to fill
* the output buffer with unfiltered data , there ' s no point
* to try decoding more data to temp .
*/
if ( b - > out_pos + s - > temp . size < b - > out_size )
return XZ_OK ;
2011-01-12 17:01:22 -08:00
}
/*
2011-09-21 17:30:50 +03:00
* We have unfiltered data in temp . If the output buffer isn ' t full
* yet , try to fill the temp buffer by decoding more data from the
* next filter . Apply the BCJ filter on temp . Then we hopefully can
* fill the actual output buffer by copying filtered data from temp .
* A mix of filtered and unfiltered data may be left in temp ; it will
* be taken care on the next call to this function .
2011-01-12 17:01:22 -08:00
*/
2011-09-21 17:30:50 +03:00
if ( b - > out_pos < b - > out_size ) {
2011-01-12 17:01:22 -08:00
/* Make b->out{,_pos,_size} temporarily point to s->temp. */
s - > out = b - > out ;
s - > out_pos = b - > out_pos ;
s - > out_size = b - > out_size ;
b - > out = s - > temp . buf ;
b - > out_pos = s - > temp . size ;
b - > out_size = sizeof ( s - > temp . buf ) ;
s - > ret = xz_dec_lzma2_run ( lzma2 , b ) ;
s - > temp . size = b - > out_pos ;
b - > out = s - > out ;
b - > out_pos = s - > out_pos ;
b - > out_size = s - > out_size ;
if ( s - > ret ! = XZ_OK & & s - > ret ! = XZ_STREAM_END )
return s - > ret ;
bcj_apply ( s , s - > temp . buf , & s - > temp . filtered , s - > temp . size ) ;
/*
* If the next filter returned XZ_STREAM_END , we mark that
* everything is filtered , since the last unfiltered bytes
* of the stream are meant to be left as is .
*/
if ( s - > ret = = XZ_STREAM_END )
s - > temp . filtered = s - > temp . size ;
bcj_flush ( s , b ) ;
if ( s - > temp . filtered > 0 )
return XZ_OK ;
}
return s - > ret ;
}
XZ_EXTERN struct xz_dec_bcj * xz_dec_bcj_create ( bool single_call )
{
struct xz_dec_bcj * s = kmalloc ( sizeof ( * s ) , GFP_KERNEL ) ;
if ( s ! = NULL )
s - > single_call = single_call ;
return s ;
}
XZ_EXTERN enum xz_ret xz_dec_bcj_reset ( struct xz_dec_bcj * s , uint8_t id )
{
switch ( id ) {
# ifdef XZ_DEC_X86
case BCJ_X86 :
# endif
# ifdef XZ_DEC_POWERPC
case BCJ_POWERPC :
# endif
# ifdef XZ_DEC_IA64
case BCJ_IA64 :
# endif
# ifdef XZ_DEC_ARM
case BCJ_ARM :
# endif
# ifdef XZ_DEC_ARMTHUMB
case BCJ_ARMTHUMB :
# endif
# ifdef XZ_DEC_SPARC
case BCJ_SPARC :
# endif
break ;
default :
/* Unsupported Filter ID */
return XZ_OPTIONS_ERROR ;
}
s - > type = id ;
s - > ret = XZ_OK ;
s - > pos = 0 ;
s - > x86_prev_mask = 0 ;
s - > temp . filtered = 0 ;
s - > temp . size = 0 ;
return XZ_OK ;
}
# endif