mirror of
https://github.com/samba-team/samba.git
synced 2025-01-11 05:18:09 +03:00
dadecede54
Huffman tree re-quantisation and perhaps other code paths are only triggered by pathological data like this. Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz> Reviewed-by: Joseph Sutton <josephsutton@catalyst.net.nz>
186 lines
4.1 KiB
Python
Executable File
186 lines
4.1 KiB
Python
Executable File
#!/usr/bin/python3
|
|
"""Generate a few strings with unbalanced distributions to test the
|
|
regeneration of the Huffman tree when it gets too deep.
|
|
|
|
USAGE: make-test-vectors DIR
|
|
|
|
This will fill up DIR with test files.
|
|
"""
|
|
import sys
|
|
import random
|
|
from collections import defaultdict
|
|
|
|
|
|
if '--help' in sys.argv or '-h' in sys.argv or len(sys.argv) != 2:
|
|
print(__doc__)
|
|
exit(len(sys.argv) != 2)
|
|
|
|
|
|
DIR = sys.argv[1]
|
|
|
|
SIZE = (1 << 17) + (23) # two and a bit blocks
|
|
SIZE_NAME = "128k+"
|
|
# SIZE = (1 << 16)
|
|
# SIZE_NAME = "64"
|
|
|
|
|
|
random.seed(1)
|
|
|
|
|
|
def squares(n):
|
|
array = []
|
|
for i in range(n):
|
|
a = random.random()
|
|
b = random.random()
|
|
array.append(int(a * b * 256))
|
|
return bytes(array)
|
|
|
|
|
|
def skewed_choices(n):
|
|
b = list(range(256))
|
|
array = random.choices(b, weights=b, k=n)
|
|
return bytes(array)
|
|
|
|
|
|
def fib_shuffle(n):
|
|
array = []
|
|
a, b = 1, 1
|
|
for i in range(100):
|
|
array.extend([i] * a)
|
|
a, b = a + b, a
|
|
if len(array) > 1000000:
|
|
break
|
|
random.shuffle(array)
|
|
return bytes(array[:n])
|
|
|
|
|
|
def exp_shuffle(n):
|
|
array = []
|
|
for i in range(256):
|
|
array.extend([i] * int(1.04 ** i))
|
|
if len(array) > 1000000:
|
|
break
|
|
random.shuffle(array)
|
|
return bytes(array[:n])
|
|
|
|
|
|
def and_rand(n):
|
|
array = []
|
|
for i in range(n):
|
|
a = random.randrange(256)
|
|
b = random.randrange(256)
|
|
array.append(a & b)
|
|
return bytes(array)
|
|
|
|
|
|
def betavar(n, a, b):
|
|
array = []
|
|
for i in range(n):
|
|
x = random.betavariate(a, b)
|
|
array.append(int(x * 255.999999999999))
|
|
return bytes(array)
|
|
|
|
|
|
def repeated_alphabet(n):
|
|
a = b'abcdefghijklmnopqrstuvwxyz'
|
|
na = n // len(a) + 1
|
|
s = a * na
|
|
return s[:n]
|
|
|
|
|
|
def decayed_alphabet(n):
|
|
s = list(repeated_alphabet(n))
|
|
for i in range(256):
|
|
j = random.randrange(n)
|
|
s[j] = i
|
|
|
|
return bytes(s)
|
|
|
|
|
|
def trigram_model(n):
|
|
with open(__file__, 'rb') as f:
|
|
data = f.read()
|
|
lut = defaultdict(list)
|
|
for a, b, c in zip(data, data[1:], data[2:]):
|
|
k = bytes([a, b])
|
|
lut[k].append(c)
|
|
|
|
k = random.choice(list(lut.keys()))
|
|
s = []
|
|
p = k[1]
|
|
for i in range(n + 10):
|
|
c = random.choice(lut[k])
|
|
s.append(c)
|
|
k = bytes([p, c])
|
|
p = c
|
|
|
|
return bytes(s[10:])
|
|
|
|
|
|
def trigram_sum_model(n):
|
|
with open(__file__, 'rb') as f:
|
|
data = f.read()
|
|
lut = [[random.randrange(256)] for i in range(512)]
|
|
for a, b, c in zip(data, data[1:], data[2:]):
|
|
lut[a + b].append(c)
|
|
|
|
s = []
|
|
i = random.randrange(len(data) - 1)
|
|
a = data[i]
|
|
b = data[i + 1]
|
|
|
|
for i in range(n + 10):
|
|
x = lut[a + b]
|
|
c = random.choice(x)
|
|
s.append(c)
|
|
a = b
|
|
b = c
|
|
|
|
return bytes(s[10:])
|
|
|
|
|
|
def the_classics():
|
|
# this used to be main()
|
|
sq = squares(SIZE)
|
|
ch = skewed_choices(SIZE)
|
|
fs = fib_shuffle(SIZE)
|
|
es = exp_shuffle(SIZE)
|
|
ar = and_rand(SIZE)
|
|
bv1 = betavar(SIZE, 0.1, 1.5)
|
|
bv2 = betavar(SIZE, 0.5, 2.0)
|
|
bv3 = betavar(SIZE, 0.05, 0.05)
|
|
|
|
print("n sq ch fs es")
|
|
for i in range(256):
|
|
print(f"{i:3} {sq.count(i):5} {ch.count(i):5} "
|
|
f"{fs.count(i):5} {es.count(i):5}"
|
|
f"{ar.count(i):5} {bv1.count(i):5}"
|
|
f"{bv2.count(i):5} {bv3.count(i):5}"
|
|
)
|
|
|
|
for series, fn in ((sq, "square_series"),
|
|
(ch, "skewed_choices"),
|
|
(fs, "fib_shuffle"),
|
|
(es, "exp_shuffle"),
|
|
(ar, "and_rand"),
|
|
(bv1, "beta-variate1"),
|
|
(bv2, "beta-variate2"),
|
|
(bv3, "beta-variate3"),
|
|
):
|
|
with open(f"{DIR}/{fn}-{SIZE_NAME}", "wb") as f:
|
|
f.write(series)
|
|
|
|
|
|
def main():
|
|
if True:
|
|
the_classics()
|
|
for series, fn in ((decayed_alphabet(SIZE), "decayed_alphabet"),
|
|
(trigram_model(SIZE), "trigram"),
|
|
(trigram_sum_model(SIZE), "trigram_sum"),
|
|
):
|
|
with open(f"{DIR}/{fn}_{SIZE_NAME}", "wb") as f:
|
|
f.write(series)
|
|
|
|
|
|
main()
|