1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-22 13:34:15 +03:00

lib/compression: script to test 3 byte hash

Compression uses a 3 byte hash remember LZ77 matches in a 14-bit table.
This script runs the hash over all 16M combinations, then again over
all ASCII combinations, counting collisions to find hot-spots.

If you think you have a better hash, you are probably right, but you
should try it here -- alter h() -- before committing to it. This one is
literally the first one I thought of.

Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Reviewed-by: Joseph Sutton <josephsutton@catalyst.net.nz>
This commit is contained in:
Douglas Bagnall 2022-11-22 08:35:14 +13:00 committed by Joseph Sutton
parent dadecede54
commit 7804570a37
2 changed files with 50 additions and 0 deletions

View File

@ -0,0 +1,49 @@
#!/usr/bin/python3
"""Print statistics about a certain three byte hash.
USAGE: three_byte_hash
"""
import sys
if '--help' in sys.argv or '-h' in sys.argv or len(sys.argv) > 1:
print(__doc__)
exit(not ('--help' in sys.argv or '-h' in sys.argv))
from statistics import mean, pstdev, median
def h(*args, bits=12):
a = args[0]
b = args[1] ^ 0x2e
c = args[2] ^ 0x55
d = ((a + b) << 8) ^ (((c - a) & 0xffff) << 5) ^ (c + b) ^ (0xcab + a)
return d & ((1 << bits) - 1)
def count(fn, bits, filter=None):
counts = [0] * (1 << bits)
for i in range(256 ** 3):
a, b, c = i & 255, (i >> 8) & 255, i >> 16
if filter and not (filter(a) and filter(b) and filter(c)):
continue
h = fn(a, b, c, bits=bits)
counts[h] += 1
print(f" {bits} bits; {len(counts)} buckets, "
f"expected {(1<<24) / len(counts)}")
print(f"median {median(counts)}")
print(f"mean {mean(counts)}")
print(f"min {min(counts)}")
print(f"max {max(counts)}")
print(f"stddev {pstdev(counts)}")
for b in (12, 13, 14):
count(h, b)
print("With ASCII filter")
letters = set(range(32, 127))
letters |= set(b'\r\n\t\0')
count(h, b, filter=letters.__contains__)

View File

@ -116,6 +116,7 @@ EXCLUDE_USAGE = {
'python/samba/tests/krb5/claims_tests.py',
'python/samba/tests/krb5/lockout_tests.py',
'python/samba/tests/krb5/group_tests.py',
'lib/compression/tests/scripts/three-byte-hash',
}
EXCLUDE_HELP = {