2010-09-11 01:34:05 +04:00
/*
* set . c - base62 , golomb and set - string routines
*
* Copyright ( C ) 2010 Alexey Tourbin < at @ altlinux . org >
*
* License : GPLv2 + or LGPL , see RPM COPYING
*/
# ifdef SELF_TEST
# undef NDEBUG
# include <stdio.h>
# endif
# include <string.h>
# include <stdlib.h>
# include <assert.h>
/*
* Base62 routines - encode bits with alnum characters .
*
* This is a base64 - based base62 implementation . Values 0. .61 are encoded
* with ' 0 ' . . ' 9 ' , ' a ' . . ' z ' , and ' A ' . . ' Z ' . However , ' Z ' is special : it will
* also encode 62 and 63. To achieve this , ' Z ' will occupy two high bits in
* the next character . Thus ' Z ' can be interpreted as an escape character
* ( which indicates that the next character must be handled specially ) .
* Note that setting high bits to " 00 " , " 01 " or " 10 " cannot contribute
* to another ' Z ' ( which would require high bits set to " 11 " ) . This is
* how multiple escapes can be effectively avoided .
*/
2011-01-02 06:39:32 +03:00
// Estimate base62 buffer size required to encode a given number of bits.
2010-09-11 01:34:05 +04:00
static inline
int encode_base62_size ( int bitc )
{
2011-01-02 06:39:32 +03:00
// Four bits can make a character; the remaining bits can make
// a character, too. And the string should be null-terminated.
2010-09-11 01:34:05 +04:00
return ( bitc > > 2 ) + 2 ;
}
2011-01-02 06:39:32 +03:00
// Main base62 encoding routine: pack bitv into base62 string.
2010-09-11 01:34:05 +04:00
static
2011-01-02 06:39:32 +03:00
int encode_base62 ( int bitc , const char * bitv , char * base62 )
2010-09-11 01:34:05 +04:00
{
char * base62_start = base62 ;
2011-01-02 06:39:32 +03:00
void put_digit ( int c )
{
assert ( c > = 0 & & c < = 61 ) ;
if ( c < 10 )
* base62 + + = c + ' 0 ' ;
else if ( c < 36 )
* base62 + + = c - 10 + ' a ' ;
else if ( c < 62 )
* base62 + + = c - 36 + ' A ' ;
}
int bits2 = 0 ; // number of high bits set
int bits6 = 0 ; // number of regular bits set
int num6b = 0 ; // pending 6-bit number
while ( bitc - - > 0 ) {
num6b | = ( * bitv + + < < bits6 + + ) ;
2011-01-03 00:00:58 +03:00
if ( bits6 + bits2 < 6 )
continue ;
switch ( num6b ) {
case 61 :
// escape
put_digit ( 61 ) ;
// extra "00...." high bits (in the next character)
bits2 = 2 ;
bits6 = 0 ;
num6b = 0 ;
break ;
case 62 :
put_digit ( 61 ) ;
// extra "01...." high bits
bits2 = 2 ;
bits6 = 0 ;
num6b = 16 ;
break ;
case 63 :
put_digit ( 61 ) ;
// extra "10...." high bits
bits2 = 2 ;
bits6 = 0 ;
num6b = 32 ;
break ;
default :
assert ( num6b < 61 ) ;
put_digit ( num6b ) ;
bits2 = 0 ;
bits6 = 0 ;
num6b = 0 ;
break ;
2010-09-11 01:34:05 +04:00
}
}
if ( bits6 + bits2 ) {
assert ( num6b < 61 ) ;
2011-01-02 06:39:32 +03:00
put_digit ( num6b ) ;
2010-09-11 01:34:05 +04:00
}
* base62 = ' \0 ' ;
return base62 - base62_start ;
}
2011-01-02 06:39:32 +03:00
// Estimate how many bits will result from decoding a base62 string.
2010-09-11 01:34:05 +04:00
static inline
int decode_base62_size ( const char * base62 )
{
int len = strlen ( base62 ) ;
2011-01-02 06:39:32 +03:00
// Each character will fill at most 6 bits.
2010-09-11 01:34:05 +04:00
return ( len < < 2 ) + ( len < < 1 ) ;
}
2011-01-03 02:06:07 +03:00
// This table maps alnum characters to their numeric values.
static
2011-05-24 19:55:31 +04:00
const int char_to_num [ 256 ] = {
[ 0 . . . 255 ] = 0xee ,
[ 0 ] = 0xff ,
# define C1(c, b) [c] = c - b
# define C2(c, b) C1(c, b), C1(c + 1, b)
# define C5(c, b) C1(c, b), C2(c + 1, b), C2(c + 3, b)
# define C10(c, b) C5(c, b), C5(c + 5, b)
C10 ( ' 0 ' , ' 0 ' ) ,
# define C26(c, b) C1(c, b), C5(c + 1, b), C10(c + 6, b), C10(c + 16, b)
C26 ( ' a ' , ' a ' + 10 ) ,
C26 ( ' A ' , ' A ' + 36 ) ,
2011-01-03 02:06:07 +03:00
} ;
// Main base62 decoding routine: unpack base62 string into bitmap.
2010-09-11 01:34:05 +04:00
static
2011-01-02 06:39:32 +03:00
int decode_base62 ( const char * base62 , char * bitv )
{
char * bitv_start = bitv ;
2011-01-03 00:00:58 +03:00
inline
2011-01-02 06:39:32 +03:00
void put6bits ( int c )
{
* bitv + + = ( c > > 0 ) & 1 ;
* bitv + + = ( c > > 1 ) & 1 ;
* bitv + + = ( c > > 2 ) & 1 ;
* bitv + + = ( c > > 3 ) & 1 ;
* bitv + + = ( c > > 4 ) & 1 ;
* bitv + + = ( c > > 5 ) & 1 ;
}
2011-01-03 00:00:58 +03:00
inline
2011-01-02 06:39:32 +03:00
void put4bits ( int c )
{
* bitv + + = ( c > > 0 ) & 1 ;
* bitv + + = ( c > > 1 ) & 1 ;
* bitv + + = ( c > > 2 ) & 1 ;
* bitv + + = ( c > > 3 ) & 1 ;
}
2011-01-03 02:57:02 +03:00
// ----8<----
2011-05-24 19:55:31 +04:00
while ( 1 ) {
int c = ( unsigned char ) * base62 + + ;
2011-01-03 02:06:07 +03:00
int num6b = char_to_num [ c ] ;
2011-05-24 19:55:31 +04:00
while ( num6b < 61 ) {
2011-01-02 06:39:32 +03:00
put6bits ( num6b ) ;
2011-05-24 19:55:31 +04:00
c = ( unsigned char ) * base62 + + ;
num6b = char_to_num [ c ] ;
2011-01-03 00:00:58 +03:00
}
2011-05-24 19:55:31 +04:00
if ( num6b = = 0xff )
break ;
if ( num6b = = 0xee )
return - 1 ;
2011-01-03 00:00:58 +03:00
assert ( num6b = = 61 ) ;
2011-01-03 02:06:07 +03:00
c = ( unsigned char ) * base62 + + ;
int num4b = char_to_num [ c ] ;
2011-05-24 19:55:31 +04:00
if ( num4b = = 0xff )
return - 2 ;
if ( num4b = = 0xee )
2011-01-03 00:00:58 +03:00
return - 3 ;
switch ( num4b & ( 16 + 32 ) ) {
case 0 :
break ;
case 16 :
num6b = 62 ;
num4b & = ~ 16 ;
break ;
case 32 :
num6b = 63 ;
num4b & = ~ 32 ;
break ;
default :
return - 4 ;
2010-11-16 16:51:07 +03:00
}
2011-01-03 00:00:58 +03:00
put6bits ( num6b ) ;
put4bits ( num4b ) ;
2010-09-11 01:34:05 +04:00
}
2011-01-03 02:57:02 +03:00
// ---->8----
2011-01-02 06:39:32 +03:00
return bitv - bitv_start ;
2010-09-11 01:34:05 +04:00
}
# ifdef SELF_TEST
2011-01-03 09:24:15 +03:00
static
2011-01-02 06:39:32 +03:00
void test_base62 ( )
2010-09-11 01:34:05 +04:00
{
const char rnd_bitv [ ] = {
1 , 0 , 0 , 1 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 0 , 1 , 0 , 0 , 1 ,
1 , 1 , 0 , 1 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 0 , 1 ,
0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 0 ,
0 , 1 , 1 , 0 , 1 , 0 , 0 , 1 , 0 , 1 , 0 , 1 , 1 , 0 , 1 , 0 ,
2011-01-02 06:39:32 +03:00
// trigger some 'Z'
2010-09-11 01:34:05 +04:00
1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
} ;
2011-01-02 06:39:32 +03:00
const int rnd_bitc = sizeof rnd_bitv ;
// encode
char base62 [ encode_base62_size ( rnd_bitc ) ] ;
int len = encode_base62 ( rnd_bitc , rnd_bitv , base62 ) ;
2010-09-11 01:34:05 +04:00
assert ( len > 0 ) ;
2011-01-02 06:39:32 +03:00
assert ( len = = ( int ) strlen ( base62 ) ) ;
2010-09-11 01:34:05 +04:00
fprintf ( stderr , " len=%d base62=%s \n " , len , base62 ) ;
2011-01-02 06:39:32 +03:00
// The length cannot be shorter than 6 bits per symbol.
2010-09-11 01:34:05 +04:00
assert ( len > = rnd_bitc / 6 ) ;
2011-01-02 06:39:32 +03:00
// Neither too long: each second character must fill at least 4 bits.
2010-09-11 01:34:05 +04:00
assert ( len < = rnd_bitc / 2 / 4 + rnd_bitc / 2 / 6 + 1 ) ;
2011-01-02 06:39:32 +03:00
// decode
char bitv [ decode_base62_size ( base62 ) ] ;
int bitc = decode_base62 ( base62 , bitv ) ;
2010-09-11 01:34:05 +04:00
fprintf ( stderr , " rnd_bitc=%d bitc=%d \n " , rnd_bitc , bitc ) ;
assert ( bitc > = rnd_bitc ) ;
2011-01-02 06:39:32 +03:00
// Decoded bits must match.
int i ;
for ( i = 0 ; i < rnd_bitc ; i + + )
assert ( rnd_bitv [ i ] = = bitv [ i ] ) ;
// The remaining bits must be zero bits.
for ( i = rnd_bitc ; i < bitc ; i + + )
assert ( bitv [ i ] = = 0 ) ;
2010-09-11 01:34:05 +04:00
fprintf ( stderr , " %s: base62 test OK \n " , __FILE__ ) ;
}
# endif
/*
* Golomb - Rice routines - compress integer values into bits .
*
* The idea is as follows . Input values are assumed to be small integers .
* Each value is split into two parts : an integer resulting from its higher
* bits and an integer resulting from its lower bits ( with the number of lower
2010-09-17 11:41:04 +04:00
* bits specified by the Mshift parameter ) . The frist integer is then stored
2010-09-11 01:34:05 +04:00
* in unary coding ( which is a variable - length sequence of ' 0 ' followed by a
* terminating ' 1 ' ) ; the second part is stored in normal binary coding ( using
* Mshift bits ) .
*
* The method is justified by the fact that , since most of the values are
2010-09-17 11:41:04 +04:00
* small , their first parts will be short ( typically 1. .3 bits ) . In particular ,
* the method is known to be optimal for uniformly distributed hash values ,
* after the values are sorted and delta - encoded . See e . g .
2010-09-11 01:34:05 +04:00
* Putze , F . ; Sanders , P . ; Singler , J . ( 2007 ) ,
* " Cache-, Hash- and Space-Efficient Bloom Filters " ,
* http : //algo2.iti.uni-karlsruhe.de/singler/publications/cacheefficientbloomfilters-wea2007.pdf
*/
2011-01-02 06:39:32 +03:00
// Calculate Mshift paramter for encoding.
2010-09-11 01:34:05 +04:00
static
int encode_golomb_Mshift ( int c , int bpp )
{
2011-01-02 06:39:32 +03:00
int log2i ( int n )
{
int m = 0 ;
while ( n > > = 1 )
m + + ;
return m ;
}
// XXX Slightly better Mshift estimations are probably possible.
// Recheck "Compression and coding algorithms" by Moffat & Turpin.
int Mshift = bpp - log2i ( c ) - 1 ;
// Adjust out-of-range values.
2010-09-11 01:34:05 +04:00
if ( Mshift < 7 )
Mshift = 7 ;
if ( Mshift > 31 )
Mshift = 31 ;
assert ( Mshift < bpp ) ;
return Mshift ;
}
2011-01-02 06:39:32 +03:00
// Estimate how many bits can be filled up.
2010-09-11 01:34:05 +04:00
static inline
int encode_golomb_size ( int c , int Mshift )
{
2011-01-02 06:39:32 +03:00
// XXX No precise estimation. However, we do not expect unary-encoded bits
// to take more than binary-encoded Mshift bits.
2010-09-11 01:34:05 +04:00
return ( Mshift < < 1 ) * c + 16 ;
}
2011-01-02 06:39:32 +03:00
// Main golomb encoding routine: package integers into bits.
2010-09-11 01:34:05 +04:00
static
2011-01-02 06:39:32 +03:00
int encode_golomb ( int c , const unsigned * v , int Mshift , char * bitv )
2010-09-11 01:34:05 +04:00
{
2011-01-02 06:39:32 +03:00
char * bitv_start = bitv ;
2010-09-11 01:34:05 +04:00
const unsigned mask = ( 1 < < Mshift ) - 1 ;
2011-01-02 06:39:32 +03:00
while ( c > 0 ) {
c - - ;
unsigned v0 = * v + + ;
2010-09-11 01:34:05 +04:00
int i ;
2011-01-02 06:39:32 +03:00
// first part: variable-length sequence
unsigned q = v0 > > Mshift ;
for ( i = 0 ; i < ( int ) q ; i + + )
* bitv + + = 0 ;
* bitv + + = 1 ;
// second part: lower Mshift bits
unsigned r = v0 & mask ;
for ( i = 0 ; i < Mshift ; i + + )
* bitv + + = ( r > > i ) & 1 ;
2010-09-11 01:34:05 +04:00
}
2011-01-02 06:39:32 +03:00
return bitv - bitv_start ;
2010-09-11 01:34:05 +04:00
}
2011-01-02 06:39:32 +03:00
// Estimate how many values will emerge.
2010-09-11 01:34:05 +04:00
static inline
int decode_golomb_size ( int bitc , int Mshift )
{
2011-01-02 06:39:32 +03:00
// Each (Mshift + 1) bits can make a value.
// The remaining bits cannot make a value, though.
2010-09-11 01:34:05 +04:00
return bitc / ( Mshift + 1 ) ;
}
2011-01-02 06:39:32 +03:00
// Main golomb decoding routine: unpackage bits into values.
2010-09-11 01:34:05 +04:00
static
2011-01-02 06:39:32 +03:00
int decode_golomb ( int bitc , const char * bitv , int Mshift , unsigned * v )
2010-09-11 01:34:05 +04:00
{
unsigned * v_start = v ;
2011-01-02 06:39:32 +03:00
// next value
while ( bitc > 0 ) {
// first part
unsigned q = 0 ;
char bit = 0 ;
while ( bitc > 0 ) {
bitc - - ;
bit = * bitv + + ;
2010-09-11 01:34:05 +04:00
if ( bit = = 0 )
q + + ;
else
break ;
}
2011-01-02 06:39:32 +03:00
// trailing zero bits in the input are okay
if ( bitc = = 0 & & bit = = 0 )
2010-09-11 01:34:05 +04:00
break ;
2011-01-02 06:39:32 +03:00
// otherwise, incomplete value is not okay
if ( bitc < Mshift )
2011-01-03 02:57:02 +03:00
return - 10 ;
2011-01-02 06:39:32 +03:00
// second part
unsigned r = 0 ;
int i ;
for ( i = 0 ; i < Mshift ; i + + ) {
bitc - - ;
if ( * bitv + + )
r | = ( 1 < < i ) ;
}
// the value
2010-09-11 01:34:05 +04:00
* v + + = ( q < < Mshift ) | r ;
}
return v - v_start ;
}
2011-01-03 02:57:02 +03:00
// Combined base62+golomb decoding routine, no need for bitv[].
static
int decode_base62_golomb ( const char * base62 , int Mshift , unsigned * v )
{
unsigned * v_start = v ;
unsigned q = 0 ;
unsigned r = 0 ;
int rfill = 0 ;
enum { ST_VLEN , ST_MBITS } state = ST_VLEN ;
inline
void putNbits ( unsigned c , int n )
{
if ( state = = ST_VLEN )
goto vlen ;
r | = ( c < < rfill ) ;
rfill + = n ;
int left = rfill - Mshift ;
if ( left < 0 )
return ;
r & = ( 1 < < Mshift ) - 1 ;
* v + + = ( q < < Mshift ) | r ;
q = 0 ;
state = ST_VLEN ;
if ( left = = 0 )
return ;
c > > = n - left ;
n = left ;
vlen :
do {
n - - ;
if ( c & 1 ) {
r = ( c > > 1 ) ;
rfill = n ;
state = ST_MBITS ;
return ;
}
q + + ;
c > > = 1 ;
}
while ( n > 0 ) ;
}
inline
void put6bits ( unsigned c )
{
putNbits ( c , 6 ) ;
}
inline
void put4bits ( unsigned c )
{
putNbits ( c , 4 ) ;
}
// ----8<----
2011-05-24 19:55:31 +04:00
while ( 1 ) {
int c = ( unsigned char ) * base62 + + ;
2011-01-03 02:57:02 +03:00
int num6b = char_to_num [ c ] ;
2011-05-24 19:55:31 +04:00
while ( num6b < 61 ) {
2011-01-03 02:57:02 +03:00
put6bits ( num6b ) ;
2011-05-24 19:55:31 +04:00
c = ( unsigned char ) * base62 + + ;
num6b = char_to_num [ c ] ;
2011-01-03 02:57:02 +03:00
}
2011-05-24 19:55:31 +04:00
if ( num6b = = 0xff )
break ;
if ( num6b = = 0xee )
return - 1 ;
2011-01-03 02:57:02 +03:00
assert ( num6b = = 61 ) ;
c = ( unsigned char ) * base62 + + ;
int num4b = char_to_num [ c ] ;
2011-05-24 19:55:31 +04:00
if ( num4b = = 0xff )
return - 2 ;
if ( num4b = = 0xee )
2011-01-03 02:57:02 +03:00
return - 3 ;
switch ( num4b & ( 16 + 32 ) ) {
case 0 :
break ;
case 16 :
num6b = 62 ;
num4b & = ~ 16 ;
break ;
case 32 :
num6b = 63 ;
num4b & = ~ 32 ;
break ;
default :
return - 4 ;
}
put6bits ( num6b ) ;
put4bits ( num4b ) ;
}
// ---->8----
if ( state ! = ST_VLEN )
return - 10 ;
return v - v_start ;
}
2010-09-11 01:34:05 +04:00
# ifdef SELF_TEST
2011-01-03 02:57:02 +03:00
static
2011-01-02 06:39:32 +03:00
void test_golomb ( )
2010-09-11 01:34:05 +04:00
{
const unsigned rnd_v [ ] = {
2011-01-02 06:39:32 +03:00
// do re mi fa sol la si
2010-09-11 01:34:05 +04:00
1 , 2 , 3 , 4 , 5 , 6 , 7 ,
2011-01-02 06:39:32 +03:00
// koshka sela na taksi
2010-09-11 01:34:05 +04:00
7 , 6 , 5 , 4 , 3 , 2 , 1 ,
} ;
2011-01-02 06:39:32 +03:00
const int rnd_c = sizeof rnd_v / sizeof * rnd_v ;
int bpp = 10 ;
int Mshift = encode_golomb_Mshift ( rnd_c , bpp ) ;
2010-09-11 01:34:05 +04:00
fprintf ( stderr , " rnd_c=%d bpp=%d Mshift=%d \n " , rnd_c , bpp , Mshift ) ;
assert ( Mshift > 0 ) ;
assert ( Mshift < bpp ) ;
2011-01-02 06:39:32 +03:00
// encode
int alloc_bitc = encode_golomb_size ( rnd_c , Mshift ) ;
2010-09-11 01:34:05 +04:00
assert ( alloc_bitc > rnd_c ) ;
2011-01-02 06:39:32 +03:00
char bitv [ alloc_bitc ] ;
int bitc = encode_golomb ( rnd_c , rnd_v , Mshift , bitv ) ;
2010-09-11 01:34:05 +04:00
fprintf ( stderr , " alloc_bitc=%d bitc=%d \n " , alloc_bitc , bitc ) ;
assert ( bitc > rnd_c ) ;
assert ( bitc < = alloc_bitc ) ;
2011-01-02 06:39:32 +03:00
// decode
int alloc_c = decode_golomb_size ( bitc , Mshift ) ;
2010-09-11 01:34:05 +04:00
assert ( alloc_c > = rnd_c ) ;
2011-01-02 06:39:32 +03:00
unsigned v [ alloc_c ] ;
int c = decode_golomb ( bitc , bitv , Mshift , v ) ;
2010-09-11 01:34:05 +04:00
fprintf ( stderr , " rnd_c=%d alloc_c=%d c=%d \n " , rnd_c , alloc_c , c ) ;
assert ( alloc_c > = c ) ;
2011-01-02 06:39:32 +03:00
// Decoded values must match.
2010-09-11 01:34:05 +04:00
assert ( rnd_c = = c ) ;
2011-01-02 06:39:32 +03:00
int i ;
2010-09-11 01:34:05 +04:00
for ( i = 0 ; i < c ; i + + )
assert ( rnd_v [ i ] = = v [ i ] ) ;
2011-01-02 06:39:32 +03:00
// At the end of the day, did it save your money?
int golomb_bpp = bitc / c ;
2010-09-11 01:34:05 +04:00
fprintf ( stderr , " bpp=%d golomb_bpp=%d \n " , bpp , golomb_bpp ) ;
assert ( golomb_bpp < bpp ) ;
fprintf ( stderr , " %s: golomb test OK \n " , __FILE__ ) ;
}
2011-01-03 02:57:02 +03:00
static
void test_base62_golomb ( )
{
// test combinded base62+golomb decoder
const char str [ ] = " set:hdf7q2P5VZwtLGr9TKxhrEM1 " ;
const char * base62 = str + 4 + 2 ;
int Mshift = 10 ;
char bitv [ 256 ] ;
int bitc = decode_base62 ( base62 , bitv ) ;
assert ( bitc > 0 ) ;
unsigned v1 [ 32 ] , v2 [ 32 ] ;
int c1 = decode_golomb ( bitc , bitv , Mshift , v1 ) ;
assert ( c1 > 0 ) ;
int c2 = decode_base62_golomb ( base62 , Mshift , v2 ) ;
assert ( c2 > 0 ) ;
assert ( c1 = = c2 ) ;
int i ;
for ( i = 0 ; i < c1 ; i + + )
assert ( v1 [ i ] = = v2 [ i ] ) ;
fprintf ( stderr , " %s: base62_golomb test OK \n " , __FILE__ ) ;
}
2010-09-11 01:34:05 +04:00
# endif
/*
* Delta encoding routines - replace an increasing sequence of integer values
* by the sequence of their differences .
*/
static
void encode_delta ( int c , unsigned * v )
{
2010-11-16 16:51:07 +03:00
assert ( c > 0 ) ;
2011-01-03 07:27:29 +03:00
unsigned * v_end = v + c ;
unsigned v0 = * v + + ;
while ( v < v_end ) {
2010-09-11 01:34:05 +04:00
* v - = v0 ;
v0 + = * v + + ;
}
}
static
void decode_delta ( int c , unsigned * v )
{
2010-11-16 16:51:07 +03:00
assert ( c > 0 ) ;
2011-01-03 07:27:29 +03:00
unsigned * v_end = v + c ;
unsigned v0 = * v + + ;
while ( v < v_end ) {
2010-09-11 01:34:05 +04:00
* v + = v0 ;
v0 = * v + + ;
}
}
# ifdef SELF_TEST
2011-01-03 09:24:15 +03:00
static
2011-01-02 06:39:32 +03:00
void test_delta ( )
2010-09-11 01:34:05 +04:00
{
unsigned v [ ] = {
1 , 3 , 7 , 0
} ;
int c = 3 ;
encode_delta ( c , v ) ;
assert ( v [ 0 ] = = 1 ) ;
assert ( v [ 1 ] = = 2 ) ;
assert ( v [ 2 ] = = 4 ) ;
assert ( v [ 3 ] = = 0 ) ;
decode_delta ( c , v ) ;
assert ( v [ 0 ] = = 1 ) ;
assert ( v [ 1 ] = = 3 ) ;
assert ( v [ 2 ] = = 7 ) ;
assert ( v [ 3 ] = = 0 ) ;
fprintf ( stderr , " %s: delta test OK \n " , __FILE__ ) ;
}
# endif
/*
* Auxiliary routines .
*/
static
void maskv ( int c , unsigned * v , unsigned mask )
{
2011-01-03 07:27:29 +03:00
unsigned * v_end = v + c ;
while ( v < v_end )
2010-09-11 01:34:05 +04:00
* v + + & = mask ;
}
static
void sortv ( int c , unsigned * v )
{
2011-01-02 06:39:32 +03:00
int cmp ( const void * arg1 , const void * arg2 )
{
unsigned v1 = * ( unsigned * ) arg1 ;
unsigned v2 = * ( unsigned * ) arg2 ;
if ( v1 > v2 )
return 1 ;
if ( v1 < v2 )
return - 1 ;
return 0 ;
}
qsort ( v , c , sizeof * v , cmp ) ;
2010-09-11 01:34:05 +04:00
}
static
int uniqv ( int c , unsigned * v )
{
int i , j ;
for ( i = 0 , j = 0 ; i < c ; i + + ) {
while ( i + 1 < c & & v [ i ] = = v [ i + 1 ] )
i + + ;
v [ j + + ] = v [ i ] ;
}
assert ( j < = c ) ;
return j ;
}
# ifdef SELF_TEST
static
2011-01-02 06:39:32 +03:00
void test_aux ( )
2010-09-11 01:34:05 +04:00
{
unsigned v [ ] = { 2 , 3 , 1 , 2 , 7 , 6 , 5 } ;
2011-01-02 06:39:32 +03:00
int c = sizeof v / sizeof * v ;
2010-09-11 01:34:05 +04:00
maskv ( c , v , 4 - 1 ) ;
sortv ( c , v ) ;
c = uniqv ( c , v ) ;
assert ( c = = 3 ) ;
assert ( v [ 0 ] = = 1 ) ;
assert ( v [ 1 ] = = 2 ) ;
assert ( v [ 2 ] = = 3 ) ;
fprintf ( stderr , " %s: aux test OK \n " , __FILE__ ) ;
}
# endif
/*
* Higher - level set - string routines - serialize integers into a set - string .
*
* A set - string looks like this : " set:bMxyz... "
*
* The " set: " prefix marks set - versions in rpm ( to distinguish them between
* regular rpm versions ) . It is assumed to be stripped here .
*
* The next two characters ( denoted ' b ' and ' M ' ) encode two small integers
* in the range 7. .32 using ' a ' . . ' z ' . The first character encodes bpp .
* Valid bpp range is 10. .32 . The second character encodes Mshift . Valid
* Mshift range is 7. .31 . Also , valid Mshift must be less than bpp .
*
* The rest ( " xyz... " ) is a variable - length sequence of alnum characters .
* It encodes a ( sorted ) set of ( non - negative ) integer values , as follows :
* integers are delta - encoded , golomb - compressed and base62 - serialized .
*/
static
int encode_set_size ( int c , int bpp )
{
int Mshift = encode_golomb_Mshift ( c , bpp ) ;
int bitc = encode_golomb_size ( c , Mshift ) ;
2011-01-02 06:39:32 +03:00
// two leading characters are special
2010-09-11 01:34:05 +04:00
return 2 + encode_base62_size ( bitc ) ;
}
static
int encode_set ( int c , unsigned * v , int bpp , char * base62 )
{
2011-01-02 06:39:32 +03:00
// XXX v is non-const due to encode_delta
2010-09-11 01:34:05 +04:00
int Mshift = encode_golomb_Mshift ( c , bpp ) ;
int bitc = encode_golomb_size ( c , Mshift ) ;
2011-01-02 06:39:32 +03:00
char bitv [ bitc ] ;
// bpp
2010-09-11 01:34:05 +04:00
if ( bpp < 10 | | bpp > 32 )
return - 1 ;
* base62 + + = bpp - 7 + ' a ' ;
2011-01-02 06:39:32 +03:00
// golomb parameter
2010-09-11 01:34:05 +04:00
if ( Mshift < 7 | | Mshift > 31 )
return - 2 ;
* base62 + + = Mshift - 7 + ' a ' ;
2011-01-02 06:39:32 +03:00
// delta
2010-09-11 01:34:05 +04:00
encode_delta ( c , v ) ;
2011-01-02 06:39:32 +03:00
// golomb
bitc = encode_golomb ( c , v , Mshift , bitv ) ;
2010-09-11 01:34:05 +04:00
# ifdef SELF_TEST
decode_delta ( c , v ) ;
# endif
if ( bitc < 0 )
return - 3 ;
2011-01-02 06:39:32 +03:00
// base62
int len = encode_base62 ( bitc , bitv , base62 ) ;
2010-09-11 01:34:05 +04:00
if ( len < 0 )
return - 4 ;
return 2 + len ;
}
static
int decode_set_init ( const char * str , int * pbpp , int * pMshift )
{
2011-01-02 06:39:32 +03:00
// 7..32 values encoded with 'a'..'z'
int bpp = * str + + + 7 - ' a ' ;
2010-09-11 01:34:05 +04:00
if ( bpp < 10 | | bpp > 32 )
return - 1 ;
2011-01-02 06:39:32 +03:00
// golomb parameter
int Mshift = * str + + + 7 - ' a ' ;
2010-09-11 01:34:05 +04:00
if ( Mshift < 7 | | Mshift > 31 )
return - 2 ;
if ( Mshift > = bpp )
return - 3 ;
2011-01-02 06:39:32 +03:00
// no empty sets for now
2010-09-11 01:34:05 +04:00
if ( * str = = ' \0 ' )
return - 4 ;
* pbpp = bpp ;
* pMshift = Mshift ;
return 0 ;
}
static inline
int decode_set_size ( const char * str , int Mshift )
{
2011-01-03 09:24:15 +03:00
const char * base62 = str + 2 ;
int bitc = decode_base62_size ( base62 ) ;
2010-09-11 01:34:05 +04:00
return decode_golomb_size ( bitc , Mshift ) ;
}
static
int decode_set ( const char * str , int Mshift , unsigned * v )
{
2011-01-03 02:57:02 +03:00
const char * base62 = str + 2 ;
// separate base62+golomb stages, for reference
if ( 0 ) {
// base62
char bitv [ decode_base62_size ( base62 ) ] ;
int bitc = decode_base62 ( base62 , bitv ) ;
if ( bitc < 0 )
return bitc ;
// golomb
int c = decode_golomb ( bitc , bitv , Mshift , v ) ;
if ( c < 0 )
return c ;
// delta
decode_delta ( c , v ) ;
return c ;
}
// combined base62+golomb stage
int c = decode_base62_golomb ( base62 , Mshift , v ) ;
2010-09-11 01:34:05 +04:00
if ( c < 0 )
2011-01-03 02:57:02 +03:00
return c ;
2011-01-02 06:39:32 +03:00
// delta
2010-09-11 01:34:05 +04:00
decode_delta ( c , v ) ;
return c ;
}
2011-01-02 06:39:32 +03:00
// Special decode_set version with LRU caching.
2010-12-04 14:44:07 +03:00
static
int cache_decode_set ( const char * str , int Mshift , unsigned * v )
{
2011-01-07 06:06:00 +03:00
const int cache_size = 192 ;
const int pivot_size = 172 ;
2011-01-03 08:19:07 +03:00
unsigned * v_start = v , * v_end ;
2010-12-04 14:44:07 +03:00
struct cache_ent {
struct cache_ent * next ;
2011-01-03 08:19:07 +03:00
char * str ;
2011-01-03 04:22:06 +03:00
unsigned hash ;
2010-12-04 14:44:07 +03:00
int c ;
2011-01-03 08:19:07 +03:00
unsigned * v ;
unsigned short * dv ;
2010-12-04 14:44:07 +03:00
} ;
static __thread
struct cache_ent * cache ;
2011-01-02 06:39:32 +03:00
// lookup in the cache
2010-12-04 14:44:07 +03:00
struct cache_ent * cur = cache , * prev = NULL ;
2011-01-07 06:06:00 +03:00
struct cache_ent * pivot_cur = NULL , * pivot_prev = NULL ;
2011-01-03 04:22:06 +03:00
unsigned hash = str [ 0 ] | ( str [ 2 ] < < 8 ) | ( str [ 3 ] < < 16 ) ;
2010-12-04 14:44:07 +03:00
int count = 0 ;
while ( cur ) {
2011-01-03 04:22:06 +03:00
if ( hash = = cur - > hash & & strcmp ( str , cur - > str ) = = 0 ) {
2011-01-02 06:39:32 +03:00
// hit, move to front
2010-12-04 14:44:07 +03:00
if ( cur ! = cache ) {
prev - > next = cur - > next ;
cur - > next = cache ;
cache = cur ;
}
2011-01-03 08:19:07 +03:00
// stored as values
if ( cur - > v ) {
memcpy ( v , cur - > v , cur - > c * sizeof ( * cur - > v ) ) ;
return cur - > c ;
}
// stored as short deltas
unsigned short * dv = cur - > dv ;
unsigned short * dv_end = dv + cur - > c ;
while ( dv < dv_end )
* v + + = * dv + + ;
v = v_start ;
decode_delta ( cur - > c , v ) ;
2010-12-04 14:44:07 +03:00
return cur - > c ;
}
count + + ;
if ( cur - > next = = NULL )
break ;
prev = cur ;
cur = cur - > next ;
2011-01-07 06:06:00 +03:00
if ( count = = pivot_size ) {
pivot_cur = cur ;
pivot_prev = prev ;
}
2010-12-04 14:44:07 +03:00
}
2011-01-02 06:39:32 +03:00
// miss, decode
2011-01-03 08:19:07 +03:00
int c = decode_base62_golomb ( str + 2 , Mshift , v ) ;
2010-12-04 14:44:07 +03:00
if ( c < = 0 )
return c ;
2011-01-03 08:19:07 +03:00
v_end = v_start + c ;
2011-01-02 06:39:32 +03:00
// truncate
2010-12-04 14:44:07 +03:00
if ( count > = cache_size ) {
free ( cur ) ;
prev - > next = NULL ;
}
2011-01-03 08:19:07 +03:00
// check delta
int delta = 1 ;
while ( v < v_end ) {
if ( * v + + > 65535 ) {
delta = 0 ;
break ;
}
}
v = v_start ;
// new entry
cur = malloc ( sizeof ( * cur ) + strlen ( str ) + 1 +
c * ( delta ? sizeof * cur - > dv : sizeof * cur - > v ) ) ;
if ( cur = = NULL ) {
decode_delta ( c , v ) ;
2010-12-04 14:44:07 +03:00
return c ;
2011-01-03 08:19:07 +03:00
}
cur - > c = c ;
if ( delta ) {
cur - > v = NULL ;
unsigned short * dv = cur - > dv = ( unsigned short * ) ( cur + 1 ) ;
while ( v < v_end )
* dv + + = * v + + ;
v = v_start ;
decode_delta ( c , v ) ;
cur - > str = ( char * ) dv ;
}
else {
cur - > dv = NULL ;
cur - > v = ( unsigned * ) ( cur + 1 ) ;
decode_delta ( c , v ) ;
memcpy ( cur - > v , v , c * sizeof ( * v ) ) ;
cur - > str = ( char * ) ( cur - > v + c ) ;
}
strcpy ( cur - > str , str ) ;
cur - > hash = hash ;
2011-01-07 06:06:00 +03:00
// pivotal insertion!
if ( count > = cache_size ) {
cur - > next = pivot_cur ;
pivot_prev - > next = cur ;
}
// early bird, push to front
else {
cur - > next = cache ;
cache = cur ;
}
2010-12-04 14:44:07 +03:00
return c ;
}
2010-09-11 01:34:05 +04:00
static
int downsample_set ( int c , unsigned * v , int bpp )
{
unsigned mask = ( 1 < < bpp ) - 1 ;
maskv ( c , v , mask ) ;
sortv ( c , v ) ;
return uniqv ( c , v ) ;
}
# ifdef SELF_TEST
static
2011-01-02 06:39:32 +03:00
void test_set ( )
2010-09-11 01:34:05 +04:00
{
unsigned rnd_v [ ] = {
0x020a , 0x07e5 , 0x3305 , 0x35f5 ,
0x4980 , 0x4c4f , 0x74ef , 0x7739 ,
0x82ae , 0x8415 , 0xa3e7 , 0xb07e ,
0xb584 , 0xb89f , 0xbb40 , 0xf39e ,
} ;
2011-01-02 06:39:32 +03:00
int rnd_c = sizeof rnd_v / sizeof * rnd_v ;
// encode
int bpp = 16 ;
char base62 [ encode_set_size ( rnd_c , bpp ) ] ;
int len = encode_set ( rnd_c , rnd_v , bpp , base62 ) ;
2010-09-11 01:34:05 +04:00
assert ( len > 0 ) ;
fprintf ( stderr , " len=%d set=%s \n " , len , base62 ) ;
2011-01-02 06:39:32 +03:00
// decode
int Mshift = bpp ;
int rc = decode_set_init ( base62 , & bpp , & Mshift ) ;
2010-09-11 01:34:05 +04:00
assert ( rc = = 0 ) ;
assert ( bpp = = 16 ) ;
assert ( Mshift < bpp ) ;
2011-01-02 06:39:32 +03:00
int c = decode_set_size ( base62 , Mshift ) ;
2010-09-11 01:34:05 +04:00
assert ( c > = rnd_c ) ;
2011-01-02 06:39:32 +03:00
unsigned v [ c ] ;
2010-09-11 01:34:05 +04:00
c = decode_set ( base62 , Mshift , v ) ;
2011-01-02 06:39:32 +03:00
// Decoded values must match.
2010-09-11 01:34:05 +04:00
assert ( c = = rnd_c ) ;
2011-01-02 06:39:32 +03:00
int i ;
2011-01-03 08:19:07 +03:00
for ( i = 0 ; i < c ; i + + )
assert ( v [ i ] = = rnd_v [ i ] ) ;
// Cached version.
c = cache_decode_set ( base62 , Mshift , v ) ;
assert ( c = = rnd_c ) ;
2010-09-11 01:34:05 +04:00
for ( i = 0 ; i < c ; i + + )
assert ( v [ i ] = = rnd_v [ i ] ) ;
fprintf ( stderr , " %s: set test OK \n " , __FILE__ ) ;
}
# endif
/*
* API routines start here .
*/
# include "set.h"
2011-01-02 06:39:32 +03:00
// main API routine
2010-09-11 01:34:05 +04:00
int rpmsetcmp ( const char * str1 , const char * str2 )
{
if ( strncmp ( str1 , " set: " , 4 ) = = 0 )
str1 + = 4 ;
if ( strncmp ( str2 , " set: " , 4 ) = = 0 )
str2 + = 4 ;
2011-01-02 06:39:32 +03:00
// initialize decoding
int bpp1 , Mshift1 ;
int bpp2 , Mshift2 ;
2010-09-11 01:34:05 +04:00
if ( decode_set_init ( str1 , & bpp1 , & Mshift1 ) < 0 )
return - 3 ;
if ( decode_set_init ( str2 , & bpp2 , & Mshift2 ) < 0 )
return - 4 ;
2011-01-02 06:39:32 +03:00
// make room for hash values
set.c: optimize array access in rpmsetcmp
callgrind results for "apt-cache unmet", 4.0.4-alt100.6:
2,198,298,537 PROGRAM TOTALS
1,115,738,267 lib/set.c:decode_set
484,035,006 lib/set.c:rpmsetcmp
143,078,002 ???:strcmp
79,477,321 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
callgrind results for "apt-cache unmet", this commit:
1,755,431,664 PROGRAM TOTALS
764,189,271 lib/set.c:decode_base62_golomb
404,493,494 lib/set.c:rpmsetcmp
143,076,968 ???:strcmp
70,833,953 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
2011-01-03 03:46:15 +03:00
unsigned v1buf [ decode_set_size ( str1 , Mshift1 ) ] , * v1 = v1buf ;
unsigned v2buf [ decode_set_size ( str2 , Mshift2 ) ] , * v2 = v2buf ;
2011-01-02 06:39:32 +03:00
// decode hash values
// str1 comes on behalf of provides, decode with caching
int c1 = cache_decode_set ( str1 , Mshift1 , v1 ) ;
2010-09-11 01:34:05 +04:00
if ( c1 < 0 )
return - 3 ;
2011-01-02 06:39:32 +03:00
int c2 = decode_set ( str2 , Mshift2 , v2 ) ;
2010-09-13 18:54:45 +04:00
if ( c2 < 0 )
2010-09-11 01:34:05 +04:00
return - 4 ;
2011-01-02 06:39:32 +03:00
// adjust for comparison
2010-09-11 01:34:05 +04:00
if ( bpp1 > bpp2 ) {
bpp1 = bpp2 ;
c1 = downsample_set ( c1 , v1 , bpp1 ) ;
}
if ( bpp2 > bpp1 ) {
bpp2 = bpp1 ;
c2 = downsample_set ( c2 , v2 , bpp2 ) ;
}
2011-01-02 06:39:32 +03:00
// compare
int ge = 1 ;
int le = 1 ;
set.c: optimize array access in rpmsetcmp
callgrind results for "apt-cache unmet", 4.0.4-alt100.6:
2,198,298,537 PROGRAM TOTALS
1,115,738,267 lib/set.c:decode_set
484,035,006 lib/set.c:rpmsetcmp
143,078,002 ???:strcmp
79,477,321 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
callgrind results for "apt-cache unmet", this commit:
1,755,431,664 PROGRAM TOTALS
764,189,271 lib/set.c:decode_base62_golomb
404,493,494 lib/set.c:rpmsetcmp
143,076,968 ???:strcmp
70,833,953 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
2011-01-03 03:46:15 +03:00
unsigned * v1end = v1 + c1 ;
unsigned * v2end = v2 + c2 ;
while ( v1 < v1end & & v2 < v2end ) {
if ( * v1 < * v2 ) {
2010-09-11 01:34:05 +04:00
le = 0 ;
set.c: optimize array access in rpmsetcmp
callgrind results for "apt-cache unmet", 4.0.4-alt100.6:
2,198,298,537 PROGRAM TOTALS
1,115,738,267 lib/set.c:decode_set
484,035,006 lib/set.c:rpmsetcmp
143,078,002 ???:strcmp
79,477,321 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
callgrind results for "apt-cache unmet", this commit:
1,755,431,664 PROGRAM TOTALS
764,189,271 lib/set.c:decode_base62_golomb
404,493,494 lib/set.c:rpmsetcmp
143,076,968 ???:strcmp
70,833,953 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
2011-01-03 03:46:15 +03:00
v1 + + ;
2011-01-02 06:39:32 +03:00
}
set.c: optimize array access in rpmsetcmp
callgrind results for "apt-cache unmet", 4.0.4-alt100.6:
2,198,298,537 PROGRAM TOTALS
1,115,738,267 lib/set.c:decode_set
484,035,006 lib/set.c:rpmsetcmp
143,078,002 ???:strcmp
79,477,321 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
callgrind results for "apt-cache unmet", this commit:
1,755,431,664 PROGRAM TOTALS
764,189,271 lib/set.c:decode_base62_golomb
404,493,494 lib/set.c:rpmsetcmp
143,076,968 ???:strcmp
70,833,953 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
2011-01-03 03:46:15 +03:00
else if ( * v1 > * v2 ) {
2010-09-11 01:34:05 +04:00
ge = 0 ;
set.c: optimize array access in rpmsetcmp
callgrind results for "apt-cache unmet", 4.0.4-alt100.6:
2,198,298,537 PROGRAM TOTALS
1,115,738,267 lib/set.c:decode_set
484,035,006 lib/set.c:rpmsetcmp
143,078,002 ???:strcmp
79,477,321 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
callgrind results for "apt-cache unmet", this commit:
1,755,431,664 PROGRAM TOTALS
764,189,271 lib/set.c:decode_base62_golomb
404,493,494 lib/set.c:rpmsetcmp
143,076,968 ???:strcmp
70,833,953 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
2011-01-03 03:46:15 +03:00
v2 + + ;
2011-01-02 06:39:32 +03:00
}
else {
set.c: optimize array access in rpmsetcmp
callgrind results for "apt-cache unmet", 4.0.4-alt100.6:
2,198,298,537 PROGRAM TOTALS
1,115,738,267 lib/set.c:decode_set
484,035,006 lib/set.c:rpmsetcmp
143,078,002 ???:strcmp
79,477,321 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
callgrind results for "apt-cache unmet", this commit:
1,755,431,664 PROGRAM TOTALS
764,189,271 lib/set.c:decode_base62_golomb
404,493,494 lib/set.c:rpmsetcmp
143,076,968 ???:strcmp
70,833,953 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
2011-01-03 03:46:15 +03:00
v1 + + ;
v2 + + ;
2010-09-11 01:34:05 +04:00
}
set.c: optimize array access in rpmsetcmp
callgrind results for "apt-cache unmet", 4.0.4-alt100.6:
2,198,298,537 PROGRAM TOTALS
1,115,738,267 lib/set.c:decode_set
484,035,006 lib/set.c:rpmsetcmp
143,078,002 ???:strcmp
79,477,321 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
callgrind results for "apt-cache unmet", this commit:
1,755,431,664 PROGRAM TOTALS
764,189,271 lib/set.c:decode_base62_golomb
404,493,494 lib/set.c:rpmsetcmp
143,076,968 ???:strcmp
70,833,953 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
2011-01-03 03:46:15 +03:00
}
2011-01-02 06:39:32 +03:00
// return
set.c: optimize array access in rpmsetcmp
callgrind results for "apt-cache unmet", 4.0.4-alt100.6:
2,198,298,537 PROGRAM TOTALS
1,115,738,267 lib/set.c:decode_set
484,035,006 lib/set.c:rpmsetcmp
143,078,002 ???:strcmp
79,477,321 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
callgrind results for "apt-cache unmet", this commit:
1,755,431,664 PROGRAM TOTALS
764,189,271 lib/set.c:decode_base62_golomb
404,493,494 lib/set.c:rpmsetcmp
143,076,968 ???:strcmp
70,833,953 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
2011-01-03 03:46:15 +03:00
if ( v1 < v1end )
2010-09-11 01:34:05 +04:00
le = 0 ;
set.c: optimize array access in rpmsetcmp
callgrind results for "apt-cache unmet", 4.0.4-alt100.6:
2,198,298,537 PROGRAM TOTALS
1,115,738,267 lib/set.c:decode_set
484,035,006 lib/set.c:rpmsetcmp
143,078,002 ???:strcmp
79,477,321 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
callgrind results for "apt-cache unmet", this commit:
1,755,431,664 PROGRAM TOTALS
764,189,271 lib/set.c:decode_base62_golomb
404,493,494 lib/set.c:rpmsetcmp
143,076,968 ???:strcmp
70,833,953 ???:strlen
61,780,572 ???:0x0000000000033080'2
54,466,947 ???:memcpy
31,161,399 ???:strcpy
24,438,336 ???:pkgCache::DepIterator::AllTargets()
2011-01-03 03:46:15 +03:00
if ( v2 < v2end )
2010-09-11 01:34:05 +04:00
ge = 0 ;
if ( le & & ge )
return 0 ;
if ( ge )
return 1 ;
if ( le )
return - 1 ;
return - 2 ;
}
/*
* Simple API for creating set - versions .
*/
# include "system.h"
# include "rpmlib.h"
2011-01-02 06:39:32 +03:00
// Internally, "struct set" is just a bag of strings and their hash values.
2010-09-11 01:34:05 +04:00
struct set {
int c ;
struct sv {
const char * s ;
unsigned v ;
} * sv ;
} ;
struct set * set_new ( )
{
2011-01-02 06:39:32 +03:00
struct set * set = xmalloc ( sizeof * set ) ;
2010-09-11 01:34:05 +04:00
set - > c = 0 ;
set - > sv = NULL ;
return set ;
}
void set_add ( struct set * set , const char * sym )
{
const int delta = 1024 ;
if ( ( set - > c & ( delta - 1 ) ) = = 0 )
set - > sv = xrealloc ( set - > sv , sizeof ( * set - > sv ) * ( set - > c + delta ) ) ;
set - > sv [ set - > c ] . s = xstrdup ( sym ) ;
set - > sv [ set - > c ] . v = 0 ;
set - > c + + ;
}
2010-10-05 13:00:31 +04:00
struct set * set_free ( struct set * set )
2010-09-11 01:34:05 +04:00
{
2011-01-02 06:39:32 +03:00
if ( set ) {
int i ;
for ( i = 0 ; i < set - > c ; i + + )
set - > sv [ i ] . s = _free ( set - > sv [ i ] . s ) ;
set - > sv = _free ( set - > sv ) ;
2010-11-16 16:19:26 +03:00
}
2011-01-02 06:39:32 +03:00
return NULL ;
2010-11-16 16:19:26 +03:00
}
2011-01-02 06:39:32 +03:00
// This routine does the whole job.
2010-09-11 01:34:05 +04:00
const char * set_fini ( struct set * set , int bpp )
{
if ( set - > c < 1 )
return NULL ;
if ( bpp < 10 )
return NULL ;
if ( bpp > 32 )
return NULL ;
2011-01-02 06:39:32 +03:00
unsigned mask = ( bpp < 32 ) ? ( 1u < < bpp ) - 1 : ~ 0u ;
// Jenkins' one-at-a-time hash
unsigned int hash ( const char * str )
{
unsigned int hash = 0x9e3779b9 ;
const unsigned char * p = ( const unsigned char * ) str ;
while ( * p ) {
hash + = * p + + ;
hash + = ( hash < < 10 ) ;
hash ^ = ( hash > > 6 ) ;
}
hash + = ( hash < < 3 ) ;
hash ^ = ( hash > > 11 ) ;
hash + = ( hash < < 15 ) ;
return hash ;
}
// hash sv strings
int i ;
2010-09-11 01:34:05 +04:00
for ( i = 0 ; i < set - > c ; i + + )
2011-01-02 06:39:32 +03:00
set - > sv [ i ] . v = hash ( set - > sv [ i ] . s ) & mask ;
// sort by hash value
int cmp ( const void * arg1 , const void * arg2 )
{
struct sv * sv1 = ( struct sv * ) arg1 ;
struct sv * sv2 = ( struct sv * ) arg2 ;
if ( sv1 - > v > sv2 - > v )
return 1 ;
if ( sv2 - > v > sv1 - > v )
return - 1 ;
return 0 ;
}
qsort ( set - > sv , set - > c , sizeof * set - > sv , cmp ) ;
// warn on hash collisions
2010-09-11 01:34:05 +04:00
for ( i = 0 ; i < set - > c - 1 ; i + + ) {
if ( set - > sv [ i ] . v ! = set - > sv [ i + 1 ] . v )
continue ;
2010-09-17 11:41:04 +04:00
if ( strcmp ( set - > sv [ i ] . s , set - > sv [ i + 1 ] . s ) = = 0 )
2010-09-11 01:34:05 +04:00
continue ;
fprintf ( stderr , " warning: hash collision: %s %s \n " ,
set - > sv [ i ] . s , set - > sv [ i + 1 ] . s ) ;
}
2011-01-02 06:39:32 +03:00
// encode
unsigned v [ set - > c ] ;
2010-09-11 01:34:05 +04:00
for ( i = 0 ; i < set - > c ; i + + )
v [ i ] = set - > sv [ i ] . v ;
2011-01-02 06:39:32 +03:00
int c = uniqv ( set - > c , v ) ;
char base62 [ encode_set_size ( c , bpp ) ] ;
int len = encode_set ( c , v , bpp , base62 ) ;
2010-09-11 01:34:05 +04:00
if ( len < 0 )
return NULL ;
return xstrdup ( base62 ) ;
}
# ifdef SELF_TEST
static
2011-01-02 06:39:32 +03:00
void test_api ( )
2010-09-11 01:34:05 +04:00
{
2011-01-02 06:39:32 +03:00
struct set * set1 = set_new ( ) ;
2010-09-11 01:34:05 +04:00
set_add ( set1 , " mama " ) ;
set_add ( set1 , " myla " ) ;
set_add ( set1 , " ramu " ) ;
2011-01-02 06:39:32 +03:00
const char * str10 = set_fini ( set1 , 16 ) ;
2010-09-11 01:34:05 +04:00
fprintf ( stderr , " set10=%s \n " , str10 ) ;
2011-01-02 06:39:32 +03:00
int cmp ;
struct set * set2 = set_new ( ) ;
2010-09-11 01:34:05 +04:00
set_add ( set2 , " myla " ) ;
set_add ( set2 , " mama " ) ;
2011-01-02 06:39:32 +03:00
const char * str20 = set_fini ( set2 , 16 ) ;
2010-09-11 01:34:05 +04:00
fprintf ( stderr , " set20=%s \n " , str20 ) ;
cmp = rpmsetcmp ( str10 , str20 ) ;
assert ( cmp = = 1 ) ;
set_add ( set2 , " ramu " ) ;
2011-01-02 06:39:32 +03:00
const char * str21 = set_fini ( set2 , 16 ) ;
2010-09-11 01:34:05 +04:00
fprintf ( stderr , " set21=%s \n " , str21 ) ;
cmp = rpmsetcmp ( str10 , str21 ) ;
assert ( cmp = = 0 ) ;
set_add ( set2 , " baba " ) ;
2011-01-02 06:39:32 +03:00
const char * str22 = set_fini ( set2 , 16 ) ;
2010-09-11 01:34:05 +04:00
cmp = rpmsetcmp ( str10 , str22 ) ;
assert ( cmp = = - 1 ) ;
set_add ( set1 , " deda " ) ;
2011-01-02 06:39:32 +03:00
const char * str11 = set_fini ( set1 , 16 ) ;
2010-09-11 01:34:05 +04:00
cmp = rpmsetcmp ( str11 , str22 ) ;
assert ( cmp = = - 2 ) ;
set1 = set_free ( set1 ) ;
set2 = set_free ( set2 ) ;
str10 = _free ( str10 ) ;
str11 = _free ( str11 ) ;
str20 = _free ( str20 ) ;
str21 = _free ( str21 ) ;
str22 = _free ( str22 ) ;
fprintf ( stderr , " %s: api test OK \n " , __FILE__ ) ;
}
# endif
# ifdef SELF_TEST
2011-01-02 06:39:32 +03:00
int main ( )
2010-09-11 01:34:05 +04:00
{
test_base62 ( ) ;
test_golomb ( ) ;
2011-01-03 02:57:02 +03:00
test_base62_golomb ( ) ;
2010-09-11 01:34:05 +04:00
test_delta ( ) ;
test_aux ( ) ;
test_set ( ) ;
test_api ( ) ;
return 0 ;
}
# endif
2011-01-02 06:39:32 +03:00
// ex: set ts=8 sts=4 sw=4 noet: