set.c: improved rpmsetcmp main loop performance
The loop is logically impeccable, but its main condition (v1 < v1end && v2 < v2end) is somewhat redundant: in two of the three cases, only one pointer gets advanced. To save instructions, the conditions are now handled within the cases. The loop is now a while (1) loop, a disguised form of goto. Also not that, when comparing Requires against Provides, the Requires is usually sparse: P: a b c d e f g h i j k l ... R: a c h j ... This means that a nested loop which skips intermediate Provides elements towards the next Requires element may improve performance. while (v1 < v1end && *v1 < *v2) v1++; However, note that the first condition (v1 < v1end) is also somewhat redundant. This kind of boundary checking can be partially omitted if the loop gets unrolled. There is a better technique, however, called the barrier: *v1end must contain the biggest element possible, so that the trailing *v1 is never smaller than any of *v2. The nested loop is then becomes as simple as while (*v1 < *v2) v1++; callgrind annotations, 4.0.4-alt100.27: 1,899,657,916 PROGRAM TOTALS 694,132,522 decode_base62_golomb 583,376,772 rpmsetcmp 106,225,572 __GI_strcmp 102,459,314 __GI_strlen ... callgrind annotations, this commit (rebuilt in hasher): 1,526,256,208 PROGRAM TOTALS 470,195,400 decode_base62_golomb 434,006,244 rpmsetcmp 106,137,949 __GI_strcmp 102,459,314 __GI_strlen ... Note that rpmsetcmp also absorbs cache_decode_set and decode_delta; the loop is now about twice as faster.
This commit is contained in:
parent
2651bb3246
commit
3ff35a310c
33
lib/set.c
33
lib/set.c
@ -1092,8 +1092,9 @@ int rpmsetcmp(const char *str1, const char *str2)
|
||||
if (decode_set_init(str2, &bpp2, &Mshift2) < 0)
|
||||
return -4;
|
||||
// make room for hash values
|
||||
unsigned v1buf[decode_set_size(str1, Mshift1)], *v1 = v1buf;
|
||||
unsigned v2buf[decode_set_size(str2, Mshift2)], *v2 = v2buf;
|
||||
// str1 comes on behalf of provides, allocate a barrier
|
||||
unsigned v1buf[decode_set_size(str1, Mshift1) + 1], *v1 = v1buf;
|
||||
unsigned v2buf[decode_set_size(str2, Mshift2) + 0], *v2 = v2buf;
|
||||
// decode hash values
|
||||
// str1 comes on behalf of provides, decode with caching
|
||||
int c1 = cache_decode_set(str1, Mshift1, v1);
|
||||
@ -1116,18 +1117,32 @@ int rpmsetcmp(const char *str1, const char *str2)
|
||||
int le = 1;
|
||||
unsigned *v1end = v1 + c1;
|
||||
unsigned *v2end = v2 + c2;
|
||||
while (v1 < v1end && v2 < v2end) {
|
||||
if (*v1 < *v2) {
|
||||
*v1end = ~0u;
|
||||
unsigned v2val = *v2;
|
||||
while (1) {
|
||||
if (*v1 < v2val) {
|
||||
le = 0;
|
||||
v1++;
|
||||
while (*v1 < v2val)
|
||||
v1++;
|
||||
if (v1 == v1end)
|
||||
break;
|
||||
}
|
||||
else if (*v1 > *v2) {
|
||||
ge = 0;
|
||||
v2++;
|
||||
}
|
||||
else {
|
||||
if (*v1 == v2val) {
|
||||
v1++;
|
||||
v2++;
|
||||
if (v1 == v1end)
|
||||
break;
|
||||
if (v2 == v2end)
|
||||
break;
|
||||
v2val = *v2;
|
||||
}
|
||||
else {
|
||||
ge = 0;
|
||||
v2++;
|
||||
if (v2 == v2end)
|
||||
break;
|
||||
v2val = *v2;
|
||||
}
|
||||
}
|
||||
// return
|
||||
|
Loading…
x
Reference in New Issue
Block a user