2008-07-10 13:31:43 +04:00
/*
* Copyright ( C ) Matthieu Suiche 2008
*
* All rights reserved .
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions
* are met :
*
* 1. Redistributions of source code must retain the above copyright
* notice , this list of conditions and the following disclaimer .
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice , this list of conditions and the following disclaimer in the
* documentation and / or other materials provided with the distribution .
*
* 3. Neither the name of the author nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission .
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ` ` AS IS ' ' AND
* ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED . IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR CONSEQUENTIAL
* DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS INTERRUPTION )
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT
* LIABILITY , OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE .
*
*/
# include "replace.h"
# include "lzxpress.h"
2010-11-26 23:45:13 +03:00
# include "../lib/util/byteorder.h"
2008-07-10 13:31:43 +04:00
2021-03-23 19:33:34 +03:00
# define __CHECK_BYTES(__size, __index, __needed) do { \
if ( unlikely ( __index > = __size ) ) { \
return - 1 ; \
} else { \
uint32_t __avail = __size - __index ; \
if ( unlikely ( __needed > __avail ) ) { \
return - 1 ; \
} \
} \
} while ( 0 )
2022-11-17 06:15:00 +03:00
struct write_context {
uint8_t * compressed ;
uint32_t compressed_pos ;
uint32_t max_compressed_size ;
uint32_t indic ;
uint32_t indic_bit ;
uint32_t indic_pos ;
uint32_t nibble_index ;
} ;
2022-03-07 09:38:08 +03:00
# define CHECK_INPUT_BYTES(__needed) \
__CHECK_BYTES ( uncompressed_size , uncompressed_pos , __needed )
# define CHECK_OUTPUT_BYTES(__needed) \
2022-11-17 06:15:00 +03:00
__CHECK_BYTES ( wc . max_compressed_size , wc . compressed_pos , __needed )
2022-03-07 09:38:08 +03:00
2008-07-10 13:31:43 +04:00
ssize_t lzxpress_compress ( const uint8_t * uncompressed ,
uint32_t uncompressed_size ,
uint8_t * compressed ,
uint32_t max_compressed_size )
{
2022-05-11 07:20:46 +03:00
/*
* This is the algorithm in [ MS - XCA ] 2.3 " Plain LZ77 Compression " .
*
* It avoids Huffman encoding by including literal bytes inline when a
* match is not found . Every so often it includes a uint32 bit map
* flagging which positions contain matches and which contain
* literals . The encoding of matches is of variable size , depending on
* the match length ; they are always at least 16 bits long , and can
* implicitly use unused half - bytes from earlier in the stream .
*/
2022-11-17 06:15:00 +03:00
uint32_t uncompressed_pos ;
struct write_context wc = {
. indic = 0 ,
. indic_pos = 0 ,
. indic_bit = 0 ,
. nibble_index = 0 ,
. compressed = compressed ,
. compressed_pos = 0 ,
. max_compressed_size = max_compressed_size
} ;
2008-07-10 13:31:43 +04:00
if ( ! uncompressed_size ) {
return 0 ;
}
uncompressed_pos = 0 ;
2022-03-08 02:11:51 +03:00
CHECK_OUTPUT_BYTES ( sizeof ( uint32_t ) ) ;
2022-11-17 06:15:00 +03:00
PUSH_LE_U32 ( wc . compressed , wc . compressed_pos , 0 ) ;
wc . compressed_pos + = sizeof ( uint32_t ) ;
2008-07-10 13:31:43 +04:00
2021-03-23 19:33:34 +03:00
while ( ( uncompressed_pos < uncompressed_size ) & &
2022-11-17 06:15:00 +03:00
( wc . compressed_pos < wc . max_compressed_size ) ) {
2008-07-10 13:31:43 +04:00
bool found = false ;
2022-03-08 02:17:15 +03:00
uint32_t best_len = 2 ;
uint32_t best_offset = 0 ;
2008-07-10 13:31:43 +04:00
2022-03-08 02:17:15 +03:00
int32_t offset ;
2008-07-10 13:31:43 +04:00
2022-03-08 02:27:10 +03:00
const uint32_t max_offset = MIN ( 0x2000 , uncompressed_pos ) ;
/* maximum len we can encode into metadata */
const uint32_t max_len = MIN ( 0xFFFF + 3 , uncompressed_size - uncompressed_pos ) ;
2008-07-10 13:31:43 +04:00
/* search for the longest match in the window for the lookahead buffer */
for ( offset = 1 ; ( uint32_t ) offset < = max_offset ; offset + + ) {
2022-03-08 02:17:15 +03:00
uint32_t len ;
2008-07-10 13:31:43 +04:00
2022-03-08 02:10:01 +03:00
for ( len = 0 ;
( len < max_len ) & & ( uncompressed [ uncompressed_pos + len ] = =
uncompressed [ uncompressed_pos + len - offset ] ) ;
len + + ) ;
2008-07-10 13:31:43 +04:00
/*
* We check if len is better than the value found before , including the
* sequence of identical bytes
*/
if ( len > best_len ) {
found = true ;
best_len = len ;
best_offset = offset ;
2022-05-15 03:28:32 +03:00
if ( best_len = = max_len ) {
/* We're not going to do better than this */
break ;
}
2008-07-10 13:31:43 +04:00
}
}
2021-03-23 19:33:34 +03:00
if ( ! found ) {
2022-05-11 07:20:46 +03:00
/*
* This is going to literal byte , which we flag by
* setting a bit in an indicator field somewhere
* earlier in the stream .
*/
2022-03-07 09:38:08 +03:00
CHECK_INPUT_BYTES ( sizeof ( uint8_t ) ) ;
CHECK_OUTPUT_BYTES ( sizeof ( uint8_t ) ) ;
2022-11-17 06:15:00 +03:00
wc . compressed [ wc . compressed_pos + + ] = uncompressed [ uncompressed_pos + + ] ;
2021-03-23 19:33:34 +03:00
2022-11-17 06:15:00 +03:00
wc . indic < < = 1 ;
wc . indic_bit + = 1 ;
2021-03-23 19:33:34 +03:00
2022-11-17 06:15:00 +03:00
if ( wc . indic_bit = = 32 ) {
PUSH_LE_U32 ( wc . compressed , wc . indic_pos , wc . indic ) ;
wc . indic_bit = 0 ;
2022-03-07 09:38:08 +03:00
CHECK_OUTPUT_BYTES ( sizeof ( uint32_t ) ) ;
2022-11-17 06:15:00 +03:00
wc . indic_pos = wc . compressed_pos ;
wc . compressed_pos + = sizeof ( uint32_t ) ;
2021-03-23 19:33:34 +03:00
}
} else {
2022-03-08 02:17:15 +03:00
uint32_t match_len = best_len ;
uint16_t metadata ;
2008-07-10 13:31:43 +04:00
2021-03-23 19:33:34 +03:00
match_len - = 3 ;
best_offset - = 1 ;
2022-03-07 09:58:51 +03:00
/* Classical meta-data */
2022-03-07 09:38:08 +03:00
CHECK_OUTPUT_BYTES ( sizeof ( uint16_t ) ) ;
2022-03-07 09:58:51 +03:00
metadata = ( uint16_t ) ( ( best_offset < < 3 ) | MIN ( match_len , 7 ) ) ;
2022-11-17 06:15:00 +03:00
PUSH_LE_U16 ( wc . compressed , wc . compressed_pos , metadata ) ;
wc . compressed_pos + = sizeof ( uint16_t ) ;
2008-07-10 13:31:43 +04:00
2022-03-07 09:58:51 +03:00
if ( match_len > = 7 ) {
2021-03-23 19:33:34 +03:00
match_len - = 7 ;
2022-11-17 06:15:00 +03:00
if ( ! wc . nibble_index ) {
wc . nibble_index = wc . compressed_pos ;
2022-03-07 09:58:51 +03:00
2022-03-07 09:38:08 +03:00
CHECK_OUTPUT_BYTES ( sizeof ( uint8_t ) ) ;
2022-11-17 06:15:00 +03:00
wc . compressed [ wc . nibble_index ] = MIN ( match_len , 15 ) ;
wc . compressed_pos + = sizeof ( uint8_t ) ;
2008-07-10 13:31:43 +04:00
} else {
2022-11-17 06:15:00 +03:00
wc . compressed [ wc . nibble_index ] | = MIN ( match_len , 15 ) < < 4 ;
wc . nibble_index = 0 ;
2021-03-23 19:33:34 +03:00
}
2008-07-10 13:31:43 +04:00
2022-03-07 09:58:51 +03:00
if ( match_len > = 15 ) {
2021-03-23 19:33:34 +03:00
match_len - = 15 ;
2008-07-10 13:31:43 +04:00
2022-03-07 09:38:08 +03:00
CHECK_OUTPUT_BYTES ( sizeof ( uint8_t ) ) ;
2022-11-17 06:15:00 +03:00
wc . compressed [ wc . compressed_pos ] = MIN ( match_len , 255 ) ;
wc . compressed_pos + = sizeof ( uint8_t ) ;
2022-03-07 09:58:51 +03:00
if ( match_len > = 255 ) {
2021-03-23 19:33:34 +03:00
/* Additional match_len */
2008-07-10 13:31:43 +04:00
2021-03-23 19:33:34 +03:00
match_len + = 7 + 15 ;
if ( match_len < ( 1 < < 16 ) ) {
2022-03-07 09:38:08 +03:00
CHECK_OUTPUT_BYTES ( sizeof ( uint16_t ) ) ;
2022-11-17 06:15:00 +03:00
PUSH_LE_U16 ( wc . compressed , wc . compressed_pos , match_len ) ;
wc . compressed_pos + = sizeof ( uint16_t ) ;
2021-03-23 19:33:34 +03:00
} else {
2022-03-07 09:38:08 +03:00
CHECK_OUTPUT_BYTES ( sizeof ( uint16_t ) + sizeof ( uint32_t ) ) ;
2022-11-17 06:15:00 +03:00
PUSH_LE_U16 ( wc . compressed , wc . compressed_pos , 0 ) ;
wc . compressed_pos + = sizeof ( uint16_t ) ;
2022-03-07 10:12:46 +03:00
2022-11-17 06:15:00 +03:00
PUSH_LE_U32 ( wc . compressed , wc . compressed_pos , match_len ) ;
wc . compressed_pos + = sizeof ( uint32_t ) ;
2021-03-23 19:33:34 +03:00
}
}
2008-07-10 13:31:43 +04:00
}
}
2022-11-17 06:15:00 +03:00
wc . indic = ( wc . indic < < 1 ) | 1 ;
wc . indic_bit + = 1 ;
2008-07-10 13:31:43 +04:00
2022-11-17 06:15:00 +03:00
if ( wc . indic_bit = = 32 ) {
PUSH_LE_U32 ( wc . compressed , wc . indic_pos , wc . indic ) ;
wc . indic_bit = 0 ;
2022-03-07 10:30:42 +03:00
CHECK_OUTPUT_BYTES ( sizeof ( uint32_t ) ) ;
2022-11-17 06:15:00 +03:00
wc . indic_pos = wc . compressed_pos ;
wc . compressed_pos + = sizeof ( uint32_t ) ;
2008-07-10 13:31:43 +04:00
}
uncompressed_pos + = best_len ;
}
}
2022-11-17 06:15:00 +03:00
if ( wc . indic_bit ! = 0 ) {
wc . indic < < = 32 - wc . indic_bit ;
2022-03-08 02:02:13 +03:00
}
2022-11-17 06:15:00 +03:00
wc . indic | = UINT32_MAX > > wc . indic_bit ;
PUSH_LE_U32 ( wc . compressed , wc . indic_pos , wc . indic ) ;
2021-03-23 19:33:34 +03:00
2022-11-17 06:15:00 +03:00
return wc . compressed_pos ;
2008-07-10 13:31:43 +04:00
}
2008-07-10 13:31:43 +04:00
ssize_t lzxpress_decompress ( const uint8_t * input ,
uint32_t input_size ,
uint8_t * output ,
uint32_t max_output_size )
2008-07-10 13:31:43 +04:00
{
2022-05-11 07:20:46 +03:00
/*
* This is the algorithm in [ MS - XCA ] 2.4 " Plain LZ77 Decompression
* Algorithm Details " .
*/
2008-07-10 13:31:43 +04:00
uint32_t output_index , input_index ;
uint32_t indicator , indicator_bit ;
uint32_t nibble_index ;
2022-05-11 07:06:38 +03:00
if ( input_size = = 0 ) {
return 0 ;
}
2008-07-10 13:31:43 +04:00
output_index = 0 ;
input_index = 0 ;
indicator = 0 ;
indicator_bit = 0 ;
nibble_index = 0 ;
2022-03-07 09:38:08 +03:00
# undef CHECK_INPUT_BYTES
2019-11-07 12:03:36 +03:00
# define CHECK_INPUT_BYTES(__needed) \
__CHECK_BYTES ( input_size , input_index , __needed )
2022-03-07 09:38:08 +03:00
# undef CHECK_OUTPUT_BYTES
2019-11-07 12:03:36 +03:00
# define CHECK_OUTPUT_BYTES(__needed) \
__CHECK_BYTES ( max_output_size , output_index , __needed )
2008-07-10 13:31:43 +04:00
do {
if ( indicator_bit = = 0 ) {
2022-03-07 09:30:43 +03:00
CHECK_INPUT_BYTES ( sizeof ( uint32_t ) ) ;
2022-03-07 09:34:00 +03:00
indicator = PULL_LE_U32 ( input , input_index ) ;
2008-07-10 13:31:43 +04:00
input_index + = sizeof ( uint32_t ) ;
2022-05-11 03:46:21 +03:00
if ( input_index = = input_size ) {
/*
* The compressor left room for indicator
* flags for data that doesn ' t exist .
*/
break ;
}
2008-07-10 13:31:43 +04:00
indicator_bit = 32 ;
}
indicator_bit - - ;
/*
* check whether the bit specified by indicator_bit is set or not
* set in indicator . For example , if indicator_bit has value 4
* check whether the 4 th bit of the value in indicator is set
*/
if ( ( ( indicator > > indicator_bit ) & 1 ) = = 0 ) {
2022-03-07 09:30:43 +03:00
CHECK_INPUT_BYTES ( sizeof ( uint8_t ) ) ;
CHECK_OUTPUT_BYTES ( sizeof ( uint8_t ) ) ;
2008-07-10 13:31:43 +04:00
output [ output_index ] = input [ input_index ] ;
input_index + = sizeof ( uint8_t ) ;
output_index + = sizeof ( uint8_t ) ;
} else {
2022-03-08 02:17:15 +03:00
uint32_t length ;
uint32_t offset ;
2022-03-08 02:21:02 +03:00
2022-03-07 09:30:43 +03:00
CHECK_INPUT_BYTES ( sizeof ( uint16_t ) ) ;
2022-03-07 09:34:00 +03:00
length = PULL_LE_U16 ( input , input_index ) ;
2008-07-10 13:31:43 +04:00
input_index + = sizeof ( uint16_t ) ;
2022-03-08 02:21:02 +03:00
offset = ( length > > 3 ) + 1 ;
length & = 7 ;
2008-07-10 13:31:43 +04:00
if ( length = = 7 ) {
if ( nibble_index = = 0 ) {
2022-03-07 09:30:43 +03:00
CHECK_INPUT_BYTES ( sizeof ( uint8_t ) ) ;
2008-07-10 13:31:43 +04:00
nibble_index = input_index ;
2022-03-08 02:21:02 +03:00
length = input [ input_index ] & 0xf ;
2008-07-10 13:31:43 +04:00
input_index + = sizeof ( uint8_t ) ;
} else {
2022-03-08 02:21:02 +03:00
length = input [ nibble_index ] > > 4 ;
2008-07-10 13:31:43 +04:00
nibble_index = 0 ;
}
if ( length = = 15 ) {
2022-03-07 09:30:43 +03:00
CHECK_INPUT_BYTES ( sizeof ( uint8_t ) ) ;
2008-07-10 13:31:43 +04:00
length = input [ input_index ] ;
input_index + = sizeof ( uint8_t ) ;
2008-07-10 13:31:43 +04:00
if ( length = = 255 ) {
2022-03-07 09:30:43 +03:00
CHECK_INPUT_BYTES ( sizeof ( uint16_t ) ) ;
2022-03-07 09:34:00 +03:00
length = PULL_LE_U16 ( input , input_index ) ;
2008-07-10 13:31:43 +04:00
input_index + = sizeof ( uint16_t ) ;
2021-06-15 02:52:37 +03:00
if ( length = = 0 ) {
2022-03-07 09:30:43 +03:00
CHECK_INPUT_BYTES ( sizeof ( uint32_t ) ) ;
2022-03-07 09:34:00 +03:00
length = PULL_LE_U32 ( input , input_index ) ;
2021-06-15 02:52:37 +03:00
input_index + = sizeof ( uint32_t ) ;
}
if ( length < ( 15 + 7 ) ) {
return - 1 ;
}
2008-07-10 13:31:43 +04:00
length - = ( 15 + 7 ) ;
}
2008-07-10 13:31:43 +04:00
length + = 15 ;
}
length + = 7 ;
}
length + = 3 ;
2021-06-15 02:52:37 +03:00
if ( length = = 0 ) {
2019-11-07 12:03:36 +03:00
return - 1 ;
}
2008-07-10 13:31:43 +04:00
2022-03-08 02:19:45 +03:00
for ( ; length > 0 ; - - length ) {
2021-06-15 02:52:37 +03:00
if ( offset > output_index ) {
return - 1 ;
}
2022-03-07 09:30:43 +03:00
CHECK_OUTPUT_BYTES ( sizeof ( uint8_t ) ) ;
2021-06-15 02:52:37 +03:00
output [ output_index ] = output [ output_index - offset ] ;
2008-07-10 13:31:43 +04:00
output_index + = sizeof ( uint8_t ) ;
2021-06-15 02:52:37 +03:00
}
2008-07-10 13:31:43 +04:00
}
2008-07-10 13:31:43 +04:00
} while ( ( output_index < max_output_size ) & & ( input_index < ( input_size ) ) ) ;
2008-07-10 13:31:43 +04:00
return output_index ;
}