2011-01-12 17:01:22 -08:00
/*
* LZMA2 definitions
*
* Authors : Lasse Collin < lasse . collin @ tukaani . org >
2020-08-11 18:34:50 -07:00
* Igor Pavlov < https : //7-zip.org/>
2011-01-12 17:01:22 -08:00
*
* This file has been put into the public domain .
* You can do whatever you want with this file .
*/
# ifndef XZ_LZMA2_H
# define XZ_LZMA2_H
/* Range coder constants */
# define RC_SHIFT_BITS 8
# define RC_TOP_BITS 24
# define RC_TOP_VALUE (1 << RC_TOP_BITS)
# define RC_BIT_MODEL_TOTAL_BITS 11
# define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
# define RC_MOVE_BITS 5
/*
* Maximum number of position states . A position state is the lowest pb
* number of bits of the current uncompressed offset . In some places there
* are different sets of probabilities for different position states .
*/
# define POS_STATES_MAX (1 << 4)
/*
* This enum is used to track which LZMA symbols have occurred most recently
* and in which order . This information is used to predict the next symbol .
*
* Symbols :
* - Literal : One 8 - bit byte
* - Match : Repeat a chunk of data at some distance
* - Long repeat : Multi - byte match at a recently seen distance
* - Short repeat : One - byte repeat at a recently seen distance
*
* The symbol names are in from STATE_oldest_older_previous . REP means
* either short or long repeated match , and NONLIT means any non - literal .
*/
enum lzma_state {
STATE_LIT_LIT ,
STATE_MATCH_LIT_LIT ,
STATE_REP_LIT_LIT ,
STATE_SHORTREP_LIT_LIT ,
STATE_MATCH_LIT ,
STATE_REP_LIT ,
STATE_SHORTREP_LIT ,
STATE_LIT_MATCH ,
STATE_LIT_LONGREP ,
STATE_LIT_SHORTREP ,
STATE_NONLIT_MATCH ,
STATE_NONLIT_REP
} ;
/* Total number of states */
# define STATES 12
/* The lowest 7 states indicate that the previous state was a literal. */
# define LIT_STATES 7
/* Indicate that the latest symbol was a literal. */
static inline void lzma_state_literal ( enum lzma_state * state )
{
if ( * state < = STATE_SHORTREP_LIT_LIT )
* state = STATE_LIT_LIT ;
else if ( * state < = STATE_LIT_SHORTREP )
* state - = 3 ;
else
* state - = 6 ;
}
/* Indicate that the latest symbol was a match. */
static inline void lzma_state_match ( enum lzma_state * state )
{
* state = * state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH ;
}
/* Indicate that the latest state was a long repeated match. */
static inline void lzma_state_long_rep ( enum lzma_state * state )
{
* state = * state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP ;
}
/* Indicate that the latest symbol was a short match. */
static inline void lzma_state_short_rep ( enum lzma_state * state )
{
* state = * state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP ;
}
/* Test if the previous symbol was a literal. */
static inline bool lzma_state_is_literal ( enum lzma_state state )
{
return state < LIT_STATES ;
}
/* Each literal coder is divided in three sections:
* - 0x001 - 0x0FF : Without match byte
* - 0x101 - 0x1FF : With match byte ; match bit is 0
* - 0x201 - 0x2FF : With match byte ; match bit is 1
*
* Match byte is used when the previous LZMA symbol was something else than
* a literal ( that is , it was some kind of match ) .
*/
# define LITERAL_CODER_SIZE 0x300
/* Maximum number of literal coders */
# define LITERAL_CODERS_MAX (1 << 4)
/* Minimum length of a match is two bytes. */
# define MATCH_LEN_MIN 2
/* Match length is encoded with 4, 5, or 10 bits.
*
* Length Bits
* 2 - 9 4 = Choice = 0 + 3 bits
* 10 - 17 5 = Choice = 1 + Choice2 = 0 + 3 bits
* 18 - 273 10 = Choice = 1 + Choice2 = 1 + 8 bits
*/
# define LEN_LOW_BITS 3
# define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
# define LEN_MID_BITS 3
# define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
# define LEN_HIGH_BITS 8
# define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
# define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
/*
* Maximum length of a match is 273 which is a result of the encoding
* described above .
*/
# define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
/*
* Different sets of probabilities are used for match distances that have
* very short match length : Lengths of 2 , 3 , and 4 bytes have a separate
* set of probabilities for each length . The matches with longer length
* use a shared set of probabilities .
*/
# define DIST_STATES 4
/*
* Get the index of the appropriate probability array for decoding
* the distance slot .
*/
static inline uint32_t lzma_get_dist_state ( uint32_t len )
{
return len < DIST_STATES + MATCH_LEN_MIN
? len - MATCH_LEN_MIN : DIST_STATES - 1 ;
}
/*
* The highest two bits of a 32 - bit match distance are encoded using six bits .
* This six - bit value is called a distance slot . This way encoding a 32 - bit
* value takes 6 - 36 bits , larger values taking more bits .
*/
# define DIST_SLOT_BITS 6
# define DIST_SLOTS (1 << DIST_SLOT_BITS)
/* Match distances up to 127 are fully encoded using probabilities. Since
* the highest two bits ( distance slot ) are always encoded using six bits ,
* the distances 0 - 3 don ' t need any additional bits to encode , since the
* distance slot itself is the same as the actual distance . DIST_MODEL_START
* indicates the first distance slot where at least one additional bit is
* needed .
*/
# define DIST_MODEL_START 4
/*
* Match distances greater than 127 are encoded in three pieces :
* - distance slot : the highest two bits
* - direct bits : 2 - 26 bits below the highest two bits
* - alignment bits : four lowest bits
*
* Direct bits don ' t use any probabilities .
*
* The distance slot value of 14 is for distances 128 - 191.
*/
# define DIST_MODEL_END 14
/* Distance slots that indicate a distance <= 127. */
# define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
# define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
/*
* For match distances greater than 127 , only the highest two bits and the
* lowest four bits ( alignment ) is encoded using probabilities .
*/
# define ALIGN_BITS 4
# define ALIGN_SIZE (1 << ALIGN_BITS)
# define ALIGN_MASK (ALIGN_SIZE - 1)
/* Total number of all probability variables */
# define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
/*
* LZMA remembers the four most recent match distances . Reusing these
* distances tends to take less space than re - encoding the actual
* distance value .
*/
# define REPS 4
# endif