mirror of
https://github.com/samba-team/samba.git
synced 2024-12-22 13:34:15 +03:00
lib/compression: script to test 3 byte hash
Compression uses a 3 byte hash remember LZ77 matches in a 14-bit table. This script runs the hash over all 16M combinations, then again over all ASCII combinations, counting collisions to find hot-spots. If you think you have a better hash, you are probably right, but you should try it here -- alter h() -- before committing to it. This one is literally the first one I thought of. Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz> Reviewed-by: Joseph Sutton <josephsutton@catalyst.net.nz>
This commit is contained in:
parent
dadecede54
commit
7804570a37
49
lib/compression/tests/scripts/three-byte-hash
Executable file
49
lib/compression/tests/scripts/three-byte-hash
Executable file
@ -0,0 +1,49 @@
|
||||
#!/usr/bin/python3
|
||||
"""Print statistics about a certain three byte hash.
|
||||
|
||||
USAGE: three_byte_hash
|
||||
"""
|
||||
import sys
|
||||
|
||||
if '--help' in sys.argv or '-h' in sys.argv or len(sys.argv) > 1:
|
||||
print(__doc__)
|
||||
exit(not ('--help' in sys.argv or '-h' in sys.argv))
|
||||
|
||||
|
||||
from statistics import mean, pstdev, median
|
||||
|
||||
|
||||
def h(*args, bits=12):
|
||||
a = args[0]
|
||||
b = args[1] ^ 0x2e
|
||||
c = args[2] ^ 0x55
|
||||
d = ((a + b) << 8) ^ (((c - a) & 0xffff) << 5) ^ (c + b) ^ (0xcab + a)
|
||||
return d & ((1 << bits) - 1)
|
||||
|
||||
|
||||
def count(fn, bits, filter=None):
|
||||
counts = [0] * (1 << bits)
|
||||
for i in range(256 ** 3):
|
||||
a, b, c = i & 255, (i >> 8) & 255, i >> 16
|
||||
if filter and not (filter(a) and filter(b) and filter(c)):
|
||||
continue
|
||||
|
||||
h = fn(a, b, c, bits=bits)
|
||||
counts[h] += 1
|
||||
|
||||
print(f" {bits} bits; {len(counts)} buckets, "
|
||||
f"expected {(1<<24) / len(counts)}")
|
||||
print(f"median {median(counts)}")
|
||||
print(f"mean {mean(counts)}")
|
||||
print(f"min {min(counts)}")
|
||||
print(f"max {max(counts)}")
|
||||
print(f"stddev {pstdev(counts)}")
|
||||
|
||||
|
||||
for b in (12, 13, 14):
|
||||
count(h, b)
|
||||
|
||||
print("With ASCII filter")
|
||||
letters = set(range(32, 127))
|
||||
letters |= set(b'\r\n\t\0')
|
||||
count(h, b, filter=letters.__contains__)
|
@ -116,6 +116,7 @@ EXCLUDE_USAGE = {
|
||||
'python/samba/tests/krb5/claims_tests.py',
|
||||
'python/samba/tests/krb5/lockout_tests.py',
|
||||
'python/samba/tests/krb5/group_tests.py',
|
||||
'lib/compression/tests/scripts/three-byte-hash',
|
||||
}
|
||||
|
||||
EXCLUDE_HELP = {
|
||||
|
Loading…
Reference in New Issue
Block a user