Add tool for comparing srclists

This commit is contained in:
Ivan A. Melnikov 2018-06-04 16:23:37 +04:00
parent 0b0d4a1f20
commit 31ddf2eda8
3 changed files with 141 additions and 0 deletions

4
.gitignore vendored
View File

@ -1,2 +1,6 @@
.*
*.pyc
*.log
srclists/srclist*
srclists/report*

24
srclists/get_stats.sh Executable file
View File

@ -0,0 +1,24 @@
#!/bin/bash
set -eu
NEW_SRCLIST=${1:-http://10.64.0.6/mipsel/Sisyphus/mipsel/base/srclist.classic.xz}
BASE_SRCLIST=${2:-http://10.64.0.6/sisyphus/Sisyphus/x86_64/base/srclist.classic.xz}
REPORT_FILENAME="report-$(date +'%Y-%m-%d').txt"
echo "Retrieving the new srclist ($NEW_SRCLIST)"
curl "$NEW_SRCLIST" | xz -d > srclist_new
echo "Retrieving the base srclist ($BASE_SRCLIST)"
curl "$BASE_SRCLIST" | xz -d > srclist_base
echo "Comparing:"
python ./stats.py srclist_base srclist_new > "$REPORT_FILENAME"
head -n 11 "$REPORT_FILENAME"
xz "$REPORT_FILENAME"
echo "Full report is written to $REPORT_FILENAME.xz"

113
srclists/stats.py Normal file
View File

@ -0,0 +1,113 @@
from __future__ import print_function
import rpm
import sys
def nevr(header, default_epoch=None):
return (header[rpm.RPMTAG_NAME],
header[rpm.RPMTAG_EPOCH] or default_epoch,
header[rpm.RPMTAG_VERSION],
header[rpm.RPMTAG_RELEASE])
COLOR_LEGEND = {
"RED": "totally missing",
"GREEN": "same NEVR in both repositories",
"EXTRA": "present in new repo only (distro-specific or not yet deleted)",
"ORANGE": "EV is different (we need the latest version)",
"YELLOW": "EV is the same, release is smaller (we need the latest build)",
"SLATE": "EV is the same, release is larger (we need to push the fixes"
}
def print_totals(label, name_set):
print("%10s:%6s // %s" % (label, len(name_set), COLOR_LEGEND[label]))
def format_version(versions, name):
try:
e, v, r = versions[name]
except KeyError:
return 'MISSING'
se = str(e) + ':' if e else ''
return '%s%s-%s' % (se, v, r)
def print_names(label, names, base_labels, new_labels):
print("\n\n%s (%s):" % (label, COLOR_LEGEND[label]))
for n in sorted(names):
print("\t%s\t%s -> %s" % (
n, format_version(new_labels, n), format_version(base_labels, n)))
def evr_cmp(name, evr1, evr2):
# rpm module reads epoch as int, requires string for comparison
label1 = tuple(str(x) for x in evr1)
label2 = tuple(str(x) for x in evr2)
try:
return rpm.labelCompare(label1, label2)
except BaseException as ex:
raise ValueError(
"Failed to compare EVRs for %s: %s %s: %s: %s"
% (name, evr1, evr2, type(ex).__name__, str(ex)))
def main(base_arch_path, new_arch_path):
base_list = rpm.readHeaderListFromFile(base_arch_path)
new_list = rpm.readHeaderListFromFile(new_arch_path)
print("Base repo size:", len(base_list))
print("New repo size:", len(new_list))
base_nevrs = frozenset(nevr(h) for h in base_list)
new_nevrs = frozenset(nevr(h) for h in new_list)
base_versions = dict((n, (e,v)) for n,e,v,r in base_nevrs)
new_versions = dict((n, (e,v)) for n,e,v,r in new_nevrs)
base_labels = dict((n, (e,v,r)) for n,e,v,r in base_nevrs)
new_labels = dict((n, (e,v,r)) for n,e,v,r in new_nevrs)
# RED
base_names = frozenset(t[0] for t in base_nevrs)
new_names = frozenset(t[0] for t in new_nevrs)
red_names = base_names - new_names
# GREEN
green_nevrs = base_nevrs & new_nevrs
green_names = frozenset(t[0] for t in green_nevrs)
# EXTRA
extra_names = new_names - base_names
# OTHER: same name, different EVR
other_names = new_names - green_names - extra_names
orange_names = frozenset(n for n in other_names
if base_versions[n] != new_versions[n])
yellow_names = frozenset(n for n in other_names
if base_versions[n] == new_versions[n]
and evr_cmp(n, base_labels[n], new_labels[n]) < 0)
slate_names = other_names - yellow_names - orange_names
print("Statistics:")
print_totals('RED', red_names)
print_totals('ORANGE', orange_names)
print_totals('YELLOW', yellow_names)
print_totals('SLATE', slate_names)
print_totals('GREEN', green_names)
print_totals('EXTRA', extra_names)
print_names('RED', red_names, base_labels, new_labels)
print_names('ORANGE', orange_names, base_labels, new_labels)
print_names('YELLOW', yellow_names, base_labels, new_labels)
print_names('SLATE', slate_names, base_labels, new_labels)
print_names('GREEN', green_names, base_labels, new_labels)
print_names('EXTRA', extra_names, base_labels, new_labels)
if __name__ == '__main__':
sys.exit(main(*sys.argv[1:]))