repos-cmp/repos_cmp/repos.py
2023-07-24 17:46:24 +04:00

531 lines
18 KiB
Python

# This module does not use str: use bytes everywhere.
import collections
import graphlib
import itertools
import logging
import re
import rpm
from repos_cmp import lists
from repos_cmp import rpm_ffi
LOG = logging.getLogger(__name__)
class Dependency(collections.namedtuple(
'Dependency', ['name', 'flags', 'version'])):
HEADER_TRIPLETS = {
'require': (rpm.RPMTAG_REQUIRENAME,
rpm.RPMTAG_REQUIREFLAGS,
rpm.RPMTAG_REQUIREVERSION),
'provide': (rpm.RPMTAG_PROVIDENAME,
rpm.RPMTAG_PROVIDEFLAGS,
rpm.RPMTAG_PROVIDEVERSION)
}
RPMSENSE_COMPARE = (rpm.RPMSENSE_EQUAL
| rpm.RPMSENSE_GREATER
| rpm.RPMSENSE_LESS)
@classmethod
def from_header(cls, header, kind):
tp, tf, tv = cls.HEADER_TRIPLETS[kind]
triplets = zip(header[tp], header[tf], header[tv])
for name, flags, version in triplets:
if not name.startswith(b'rpmlib('):
yield cls(name, flags, version or None)
if kind == 'provide':
for name in header[rpm.RPMTAG_FILENAMES]:
yield cls(name, 0, None)
yield cls(header[rpm.RPMTAG_NAME],
rpm.RPMSENSE_EQUAL,
header.format('%{EVR}:%{DISTTAG}').encode('utf-8'))
@classmethod
def wrap(cls, other):
if isinstance(other, cls):
return other
if isinstance(other, bytes):
return cls(other, 0, None)
raise ValueError("Don't know how to make %s from %s"
% (cls, repr(other)))
def is_provide_for(self, other):
if self.name != other.name:
return False
if self.version is None or other.version is None:
return True
return bool(rpm_ffi.ranges_overlap(
self.name, self.version, self.flags,
other.name, other.version, other.flags))
def is_setversion(self):
return self.version and self.version.startswith(b'set:')
def pretty_str(self):
if not self.flags and not self.version:
return '{}[ANY]'.format(self.name.decode())
if self.is_setversion():
return '{}[set:<>]'.format(self.name.decode())
return '{}[{} {}]'.format(
self.name.decode(), self.flags,
self.version.decode() if self.version else None)
class Source:
def __init__(self, name, epoch, version, release):
self.name = name
self.epoch = epoch
self.version = version
self.release = release
self.requires = set()
self.bin_names = set()
@classmethod
def from_header(cls, header):
'''Initialize Source from rpm.header corresponding to SRPM'''
src = cls(header[rpm.RPMTAG_NAME],
header[rpm.RPMTAG_EPOCH],
header[rpm.RPMTAG_VERSION],
header[rpm.RPMTAG_RELEASE])
src.requires.update(Dependency.from_header(header, 'require'))
return src
@property
def source_rpm(self):
return b'%s-%s-%s.src.rpm' % (
self.name, self.version, self.release)
def __repr__(self):
return 'Source[{} {} {} {}]'.format(
self.name, self.epoch, self.version, self.release)
class Binary:
def __init__(self, name, epoch, version, release, source_rpm):
self.name = name
self.epoch = epoch
self.version = version
self.release = release
self.source_rpm = source_rpm
self.source_name = source_rpm.rsplit(b'-', 2)[0]
self.requires = set()
self.provides = set()
@classmethod
def from_header(cls, header):
'''Initialize Source from rpm.header corresponding to SRPM'''
pkg = cls(header[rpm.RPMTAG_NAME],
header[rpm.RPMTAG_EPOCH],
header[rpm.RPMTAG_VERSION],
header[rpm.RPMTAG_RELEASE],
header[rpm.RPMTAG_SOURCERPM])
pkg.requires.update(Dependency.from_header(header, 'require'))
pkg.provides.update(Dependency.from_header(header, 'provide'))
return pkg
def __repr__(self):
return 'Binary[{} {} {} {}]'.format(
self.name, self.epoch, self.version, self.release)
class Repository:
def __init__(self, repo_name, sources, binaries, bits):
self.name = repo_name
self.sources = sources
self.binaries = binaries
self.bits = bits
self.reverse_prov = {} # name -> [(provide, binary)]
self.update_indexes()
def copy(self, new_name):
return Repository(new_name,
dict(self.sources),
dict(self.binaries),
self.bits)
def update_indexes(self):
rprov = collections.defaultdict(list)
for b in self.binaries.values():
for p in b.provides:
rprov[p.name].append((p, b))
self.reverse_prov = dict(rprov)
@classmethod
def load(cls, repo_name, path, arch, components=('classic',)):
src_list, bin_list = lists.read_pkglist_heders_for_repo(
path, arch, components)
# xxx: not the very best heuristics
bits = 64 if any('64' in a for a in arch) else 32
sources = {}
for header in src_list:
name = header[rpm.RPMTAG_NAME]
if name not in sources:
sources[name] = Source.from_header(header)
binaries = {}
for header in bin_list:
name = header[rpm.RPMTAG_NAME]
found = binaries.get(name)
if not found:
binaries[name] = Binary.from_header(header)
else:
LOG.warning('Duplicate binaries: %s %s', found,
header.format('%{NAME}-%{EVR}:%{DISTTAG})'))
return cls(repo_name, sources, binaries, bits=bits)
@classmethod
def load_from_config(cls, repo_name, config):
r = config['repos'][repo_name]
return cls.load(repo_name, r['path'], r['arch'])
def binaries_from(self, *source_names):
'''Return binary packages build from this source'''
sources = set(source_names)
for b in self.binaries.values():
if b.source_name in sources:
yield b
def providers(self, dependency):
for item in self.reverse_prov.get(dependency.name, ()):
if item[0].is_provide_for(dependency):
yield item
def _unmets(self, packages):
result = []
for pkg in packages:
for dep in pkg.requires:
if not any(self.providers(dep)):
result.append((pkg, dep))
return result
def unmets(self):
return self._unmets(self.binaries.values())
def build_unmets(self):
return self._unmets(self.sources.values())
def delete_sources(self, *source_names):
bin_names = [b.name for b in self.binaries_from(*source_names)]
for source in source_names:
del self.sources[source]
for name in bin_names:
del self.binaries[name]
self.update_indexes()
def chroot_for(self, requires):
stack = list(reversed(list(requires)))
chroot_provides = collections.defaultdict(set)
chroot_binaries = []
unmets = []
while stack:
dep = stack.pop()
if any(p.is_provide_for(dep) for p in chroot_provides[dep.name]):
continue
LOG.debug("looking up %s", dep)
providers = set(item[1] for item in self.providers(dep))
if not providers:
unmets.append(dep)
continue
if len(providers) > 1:
LOG.warning('Ambiguous provide: %s (%s)', dep, providers)
# that's almost random if ambigous:
p = providers.pop()
LOG.debug(" installing %s", p)
chroot_binaries.append(p)
for prov in p.provides:
chroot_provides[prov.name].add(prov)
stack.extend(p.requires)
return unmets, chroot_binaries
_SPECIAL_DEPS = (
# ABI parts:
b'(GCC_', b'(CXXABI_', b'(GLIBC_',
# elf loaders:
b'/lib64/ld', b'/lib/ld', b'ld-linux')
_GHC_HASHDEP = re.compile(rb'ghc[0-9\.]+\(.+-[^-]+-[^-]+\)')
_GHC_HASHLIB = re.compile(rb'^lib.*-ghc[0-9.]+.so')
def _from_64bit_dep(dep):
if dep.name.endswith(b'()(64bit)'):
new_name = dep.name[:-9]
elif dep.name.endswith(b'(64bit)'):
new_name = dep.name[:-7]
elif b'/lib64/' in dep.name:
new_name = dep.name.replace(b'/lib64/', b'/lib/')
else:
return dep
return Dependency(new_name, dep.flags, dep.version)
def have_same_source(repoA, repoB, source_name):
sourceA = repoA.sources.get(source_name)
sourceB = repoB.sources.get(source_name)
return (sourceA and sourceB
and sourceA.epoch == sourceB.epoch
and sourceA.version == sourceB.version
and sourceA.release == sourceB.release)
_BUILDREQ = '0000-BR'
def _missing_requires(from_repo, to_repo, requires, kind, ignore):
missing_buidreqs = (dep for dep in requires
if not any(to_repo.providers(dep)))
for dep in missing_buidreqs:
for _dep, provider in from_repo.providers(dep):
if provider.source_name not in ignore:
yield kind, dep, provider
def _raw_build_report(from_repo, to_repo, source_name, ignore=()):
"""Build report for one source, by name
Returns an iterable over tuples (kind, dep, provider).
"""
# XXX: assumes from_repo is x86_64
assert from_repo.bits == 64
translate = (to_repo.bits != 64)
result = set(_missing_requires(
from_repo, to_repo, from_repo.sources[source_name].requires,
_BUILDREQ, ignore))
for b in from_repo.binaries_from(source_name):
for dep in b.requires:
# skip some platform-specific stuff
if any(x in dep.name for x in _SPECIAL_DEPS):
continue
# if needed, try to translate from 64 to 32 bits
the_dep = _from_64bit_dep(dep) if translate else dep
# skip dependencies already present in to_repo
if any(to_repo.providers(the_dep)):
continue
# skip inter-sub-package dependencies
if any(p.source_name == source_name
for _d, p in from_repo.providers(dep)):
continue
# set-versions may be platform-dependent.
# double-check that if have the same source
if (dep.is_setversion()
or _GHC_HASHLIB.match(dep.name)
or _GHC_HASHDEP.fullmatch(dep.name)):
if any(have_same_source(from_repo, to_repo, p.source_name)
for _d, p in from_repo.providers(dep)):
continue
# ok, it's true missing dependency
for _dep, provider in from_repo.providers(dep):
if provider.source_name not in ignore:
result.add((b.name.decode(), dep, provider))
return result
def format_triplet_report(report, title=None):
"""Format a report
Arguments:
report: iterable over triplets (kind, dep, binary)
title: the title of report, if any
"""
lines = []
if title:
lines.append('\n== %s ==\n' % title)
by_source = collections.defaultdict(list)
for item in report:
by_source[item[2].source_rpm].append(item)
for that_source in sorted(by_source):
lines.append(that_source.decode())
by_knp = collections.defaultdict(list)
for kind, dep, provider in by_source[that_source]:
by_knp[(kind, provider.name.decode())].append(dep.pretty_str())
for (kind, pname), pdep in sorted(by_knp.items()):
lines.append('\t {} {} {}'.format(kind, pname, ' '.join(pdep)))
return '\n'.join(lines)
def build_report(from_repo, to_repo, source_name):
report = _raw_build_report(from_repo, to_repo, source_name)
return format_triplet_report(report, source_name.decode())
def _src_name(source):
if source.endswith(b'.src.rpm'):
return source.rsplit(b'-', 2)[0]
return source
def recursive_build_report(from_repo, to_repo, *source_names,
ignore=(), ignore_sort=()):
reports = []
build_source_deps = {}
pkg_source_deps = {}
requested_source_names = set(_src_name(s) for s in source_names)
stack = list(requested_source_names)
seen = set(stack)
while stack:
cur = stack.pop()
report = _raw_build_report(from_repo, to_repo, cur, ignore)
cur_in_to = to_repo.binaries.get(cur)
cur_source_srpm = cur_in_to.source_rpm if cur_in_to else b'NONE'
reports.append('\n== %s ==\n%s has %s\n' % (
cur.decode(), to_repo.name, cur_source_srpm.decode()))
reports.append(format_triplet_report(report))
# find all sources in the report
bld_deps = set(p.source_name for k, _d, p in report if k == _BUILDREQ)
pkg_deps = set(p.source_name for k, _d, p in report if k != _BUILDREQ)
pkg_deps -= bld_deps
build_source_deps[cur] = sorted(bld_deps)
pkg_source_deps[cur] = sorted(pkg_deps)
unseen = bld_deps.union(pkg_deps) - seen
seen.update(unseen)
stack.extend(unseen)
# expand the build requires with the pkg reqs of dependencies:
full_req = {}
for source in build_source_deps:
_u, bins = from_repo.chroot_for(from_repo.sources[source].requires)
dep_sources = set()
for b in bins:
if b.source_name not in build_source_deps:
continue
if (b.source_name, source) in ignore_sort:
continue
dep_sources.add(b.source_name)
full_req[source] = dep_sources
cycles = []
while True:
try:
order = list(graphlib.TopologicalSorter(full_req).static_order())
except graphlib.CycleError as ex:
LOG.warning("Cycle detected: %s", ex)
cycle = ex.args[1]
cycles.append(cycle)
# break the cycle and retry
full_req[cycle[1]].remove(cycle[0])
else:
break
reports.append('\n== SUMMARY ==\n')
for source in order:
reports.append('{}\t{:50} {} :: {}'.format(
'' if source in requested_source_names else 'N',
from_repo.sources[source].source_rpm.decode(),
b' '.join(build_source_deps[source]).decode(),
b' '.join(pkg_source_deps[source]).decode()))
if cycles:
reports.append('\nCycles:')
reports.extend('\t%s' % ' '.join(source.decode() for source in c)
for c in cycles)
special = set()
for source_name in order:
source = to_repo.sources.get(source_name)
if not source:
continue
if b'.mips' in source.release or b'.rv64' in source.release:
special.add(source.source_rpm)
if special:
reports.append('\nThe followng packages are special:')
reports.extend('\t%s' % source.decode()
for source in sorted(special))
return '\n'.join(reports)
def requires_build_report(from_repo, to_repo, *requires,
ignore=(), ignore_sort=()):
missing_reqs = list(_missing_requires(
from_repo, to_repo,
[Dependency.wrap(r) for r in requires],
ignore, ignore_sort))
reports = [format_triplet_report(missing_reqs, 'Requested')]
source_names = set(p.source_name for _k, _d, p in missing_reqs)
reports.append(recursive_build_report(
from_repo, to_repo, *source_names,
ignore=ignore, ignore_sort=ignore_sort))
return '\n'.join(reports)
def _unmets(repo, old_repo=None):
old_unmets = set(old_repo.unmets()) if old_repo else set()
unmets = (u for u in repo.unmets() if u not in old_unmets)
old_build_unmets = set(old_repo.build_unmets()) if old_repo else set()
build_unmets = (u for u in repo.build_unmets()
if u not in old_build_unmets)
return itertools.chain(
(('unmet', d, b) for b, d in unmets),
(('BUILD', d, b) for b, d in build_unmets))
def unmets_report(repo):
return format_triplet_report(_unmets(repo))
def who_cares(repo, *source_names):
'''What new unmets deletion of the pacakge will produce?'''
next_repo = repo.copy('next_' + repo.name)
next_repo.delete_sources(*source_names)
return format_triplet_report(_unmets(next_repo, repo))
_GOOD_EXTRAS_PREFIXES = (b'kernel-', b'u-boot', b'riscv', b'fu540', b'opensbi')
def extras_to_remove(base_repo, repo, ignore=()):
extras = set(repo.sources) - set(base_repo.sources) - set(ignore)
# filter out certain packages that must be left alone
filtered_extras = set(
name for name in extras
if (not any(name.startswith(p) for p in _GOOD_EXTRAS_PREFIXES)
and b'jpp' not in repo.sources[name].release))
while True:
LOG.info('Checking %d filtered extras', len(filtered_extras))
next_repo = repo.copy('removal_test_' + repo.name)
next_repo.delete_sources(*filtered_extras)
leave_alone = set()
for kind, req, pkg in _unmets(next_repo, repo):
for b in repo.binaries_from(*filtered_extras):
if b.source_name in leave_alone:
continue
if any(p.is_provide_for(req) for p in b.provides):
leave_alone.add(b.source_name)
if leave_alone:
filtered_extras -= leave_alone
else:
break
for name in sorted(filtered_extras):
print(name.decode())
if __name__ == '__main__':
from repos_cmp.utils import interactive_setup
from pydoc import pager # noqa
CONFIG = interactive_setup()
riscv64 = Repository.load_from_config('sisyphus_riscv64', CONFIG)
x86_64 = Repository.load_from_config('sisyphus', CONFIG)
x86_64.bits = 64
mipsel = Repository.load_from_config('sisyphus_mipsel', CONFIG)