repos: Introduce BuildReporter

Instead of global cache, we now have an object
which encapuslates it, together with parameters
that affect reports. This seems to be simpler
and more robust model.

missing_rating and recursive_build_report
now use it, also the refactoring is not yet
complete.
This commit is contained in:
Ivan A. Melnikov 2023-12-05 14:03:36 +04:00
parent 51aa8967d0
commit 09b4700064

View File

@ -17,6 +17,12 @@ from repos_cmp import rpm_ffi
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
def _src_name(source):
if source.endswith(b'.src.rpm'):
return source.rsplit(b'-', 2)[0]
return source
class Dependency(collections.namedtuple( class Dependency(collections.namedtuple(
'Dependency', ['name', 'flags', 'version'])): 'Dependency', ['name', 'flags', 'version'])):
@ -416,15 +422,6 @@ def _from_64bit_dep(dep):
return Dependency(new_name, dep.flags, dep.version) return Dependency(new_name, dep.flags, dep.version)
def have_same_source(repoA, repoB, source_name):
sourceA = repoA.sources.get(source_name)
sourceB = repoB.sources.get(source_name)
return (sourceA and sourceB
and sourceA.epoch == sourceB.epoch
and sourceA.version == sourceB.version
and sourceA.release == sourceB.release)
_BUILDREQ = '0000-BR' _BUILDREQ = '0000-BR'
_BUILD_UNMET = '0001-UN' _BUILD_UNMET = '0001-UN'
@ -433,41 +430,65 @@ def _is_build_kind(kind):
return kind in (_BUILDREQ, _BUILD_UNMET) return kind in (_BUILDREQ, _BUILD_UNMET)
__BUILD_REPORT_CACHE = {} class BuildReporter:
def __init__(self, from_repo, to_repo, ignore=(), prefer=(), caching=True):
self.from_repo = from_repo
self.to_repo = to_repo
self.ignore = ignore
self.prefer = prefer
self._cache = {} if caching else None
def build_report(from_repo, to_repo, source_name, ignore=(), prefer=()): def report(self, source_name):
if self._cache:
cached_result = self._cache.get(source_name)
if cached_result:
return cached_result
report = self._raw_report(source_name)
if self._cache:
self.cache[source_name] = report
return report
def _same_source(self, source_name):
sourceA = self.from_repo.sources.get(source_name)
sourceB = self.to_repo.sources.get(source_name)
return (sourceA and sourceB
and sourceA.epoch == sourceB.epoch
and sourceA.version == sourceB.version
and sourceA.release == sourceB.release)
def _raw_report(self, source_name):
"""Build report for one source, by name """Build report for one source, by name
Returns an iterable over tuples (kind, dep, provider). Returns an iterable over tuples (kind, dep, provider).
""" """
cache_key = (from_repo.name, to_repo.name, source_name, LOG.debug("Building build report for %s", source_name)
frozenset(ignore), frozenset(prefer))
cached_result = __BUILD_REPORT_CACHE.get(cache_key)
if cached_result is not None:
if cached_result.is_valid([from_repo, to_repo]):
return cached_result
LOG.debug("Building build report: %s", cache_key)
# XXX: assumes from_repo is x86_64 # XXX: assumes from_repo is x86_64
assert from_repo.bits == 64 assert self.from_repo.bits == 64
translate = (to_repo.bits != 64) translate = (self.to_repo.bits != 64)
from_source = self.from_repo.sources[source_name]
# find what's missing for the build chroot in to_repo # find what's missing for the build chroot in to_repo
chroot = to_repo.chroot_for( chroot = self.to_repo.chroot_for(
from_repo.sources[source_name].requires, prefer=prefer) from_source.requires, prefer=self.prefer)
# distinguish unmets from build requirements that are missing # distinguish unmets from build requirements that are missing
buildreqs = {dep for dep in from_repo.sources[source_name].requires} buildreqs = {dep for dep in from_source.requires}
missing_reqs = ((_BUILDREQ, dep) for dep in buildreqs missing_reqs = ((_BUILDREQ, dep) for dep in buildreqs
if not chroot.is_provided(dep)) if not chroot.is_provided(dep))
chroot_unmets = ((_BUILD_UNMET, dep) for dep in chroot.unmets() chroot_unmets = ((_BUILD_UNMET, dep) for dep in chroot.unmets()
if dep not in buildreqs) if dep not in buildreqs)
# add requirements for the binary packages this source package # add requirements for the binary packages this source package produces
# produces in the from_repo; if needed, try to translate from 64 to 32 bits # in the from_repo; if needed, try to translate from 64 to 32 bits
binreqs = ((b.name.decode(), (_from_64bit_dep(dep) if translate else dep)) binreqs = (
for b in from_repo.binaries_from(source_name) (b.name.decode(), (_from_64bit_dep(dep) if translate else dep))
for b in self.from_repo.binaries_from(source_name)
for dep in b.requires) for dep in b.requires)
result = set() result = set()
@ -476,83 +497,63 @@ def build_report(from_repo, to_repo, source_name, ignore=(), prefer=()):
if any(x in dep.name for x in _SPECIAL_DEPS): if any(x in dep.name for x in _SPECIAL_DEPS):
continue continue
# skip dependencies already present in to_repo # skip dependencies already present in to_repo
if any(to_repo.providers(dep)): if any(self.to_repo.providers(dep)):
continue continue
# skip inter-sub-package dependencies # skip inter-sub-package dependencies
if any(p.source_name == source_name if any(p.source_name == source_name
for _d, p in from_repo.providers(dep)): for _d, p in self.from_repo.providers(dep)):
continue continue
# set-versions may be platform-dependent. # set-versions may be platform-dependent.
# double-check that if have the same source # double-check that if have the same source
if (dep.is_setversion() if (dep.is_setversion()
or _GHC_HASHLIB.match(dep.name) or _GHC_HASHLIB.match(dep.name)
or _GHC_HASHDEP.fullmatch(dep.name)): or _GHC_HASHDEP.fullmatch(dep.name)):
if any(have_same_source(from_repo, to_repo, p.source_name) if any(self._same_source(p.source_name)
for _d, p in from_repo.providers(dep)): for _d, p in self.from_repo.providers(dep)):
continue continue
# ok, it's true missing dependency # ok, it's true missing dependency
# let's look up providers # let's look up providers
providers = set(provider providers = set(provider
for _dep, provider in from_repo.providers(dep) for _dep, provider in self.from_repo.providers(dep)
if provider.source_name not in ignore) if provider.source_name not in self.ignore)
if not providers: if not providers:
LOG.warning("No providers for %s", dep) LOG.warning("No providers for %s", dep)
elif len(providers) > 1: elif len(providers) > 1:
LOG.warning("%d provider(s) for %s: %s", LOG.warning("%d provider(s) for %s: %s",
len(providers), dep, list(providers)) len(providers), dep, list(providers))
preferred = [p for p in providers preferred = [p for p in providers
if p.source_name in prefer] if p.source_name in self.prefer]
if preferred and len(preferred) < len(providers): if preferred and len(preferred) < len(providers):
LOG.info("Using preferred providers: %s", preferred) LOG.info("Using preferred providers: %s", preferred)
providers = preferred providers = preferred
result.update((kind, dep, provider) for provider in providers) result.update((kind, dep, provider) for provider in providers)
the_report = TripletReport([from_repo, to_repo], result) return TripletReport([self.from_repo, self.to_repo], result)
__BUILD_REPORT_CACHE[cache_key] = the_report
return the_report
def recursive_closure(self, source_names, stop_names=()):
seen = list(source_names)
queue = list(source_names)
stop_names = set(stop_names)
def _src_name(source):
if source.endswith(b'.src.rpm'):
return source.rsplit(b'-', 2)[0]
return source
class RecursiveBuildReport:
def __init__(self, from_repo, to_repo, source_names,
stop_names=(), prefer=()):
self.from_repo = from_repo
self.to_repo = to_repo
self.stop_names = frozenset(stop_names)
self.prefer = prefer
self.requested_names = [_src_name(s) for s in source_names]
self.reported_names = self._closure()
def _build_report(self, name):
return build_report(self.from_repo, self.to_repo, name,
prefer=self.prefer)
def _closure(self):
seen = list(self.requested_names)
queue = list(self.requested_names)
while queue: while queue:
cur = queue.pop() cur = queue.pop()
missing = self._build_report(cur).sources_set() missing = self.report(cur).sources_set()
unseen = missing.difference(seen) unseen = missing.difference(seen)
seen.extend(unseen) seen.extend(unseen)
# don't recur into packages from stop_names # don't recur into packages from stop_names
queue.extend(unseen - self.stop_names) queue.extend(unseen - stop_names)
return seen return seen
def format_build_reports(self, to=None): def format_reports(self, source_names, stop_names=(), to=None):
stop_names = set(stop_names)
result = [] result = []
for name in self.reported_names: for name in source_names:
result.append(f"\n== {name.decode()} ==") result.append(f"\n== {name.decode()} ==")
in_to = self.to_repo.sources.get(name) in_to = self.to_repo.sources.get(name)
srpm = in_to.source_rpm.decode() if in_to else 'NONE' srpm = in_to.source_rpm.decode() if in_to else 'NONE'
result.append(f"{self.to_repo.name} has {srpm}\n") result.append(f"{self.to_repo.name} has {srpm}\n")
if name in self.stop_names: if name in stop_names:
result.append('not going deeper (stop_names)') result.append('not going deeper (stop_names)')
else: else:
result.append(self._build_report(name).basic_format().strip()) result.append(self._build_report(name).basic_format().strip())
@ -561,16 +562,17 @@ class RecursiveBuildReport:
def missing_rating(from_repo, to_repo): def missing_rating(from_repo, to_repo):
result = collections.defaultdict(set) result = collections.defaultdict(set)
reporter = BuildReporter(from_repo, to_repo)
for name in from_repo.sources: for name in from_repo.sources:
if name in to_repo.sources or b'gost' in name: if name in to_repo.sources or b'gost' in name:
continue continue
try: try:
rbr = RecursiveBuildReport(from_repo, to_repo, [name]) rbr = reporter.recursive_closure([name])
except Exception: except Exception:
LOG.error("Failed to build recursive build report" LOG.error("Failed to build recursive build report"
"for %s", name.decode(), exc_info=True) "for %s", name.decode(), exc_info=True)
else: else:
for other_name in rbr.reported_names: for other_name in rbr:
if other_name != name: if other_name != name:
result[other_name].add(name) result[other_name].add(name)
return sorted(result.items(), key=lambda x: (len(x[1]), x[0])) return sorted(result.items(), key=lambda x: (len(x[1]), x[0]))
@ -592,12 +594,14 @@ def recursive_build_report(from_repo, to_repo, *source_names,
build_source_deps = {} build_source_deps = {}
pkg_source_deps = {} pkg_source_deps = {}
requested_source_names = set(_src_name(s) for s in source_names) requested_source_names = set(_src_name(s) for s in source_names)
reporter = BuildReporter(from_repo, to_repo, ignore=ignore,
prefer=requested_source_names)
stack = list(requested_source_names) stack = list(requested_source_names)
seen = set(stack) seen = set(stack)
while stack: while stack:
cur = stack.pop() cur = stack.pop()
report = build_report(from_repo, to_repo, cur, ignore, prefer=seen) report = reporter.report(cur)
cur_in_to = to_repo.sources.get(cur) cur_in_to = to_repo.sources.get(cur)
cur_source_srpm = cur_in_to.source_rpm if cur_in_to else b'NONE' cur_source_srpm = cur_in_to.source_rpm if cur_in_to else b'NONE'
reports.append('\n== %s ==\n%s has %s\n' % ( reports.append('\n== %s ==\n%s has %s\n' % (