repos: Introduce BuildReporter

Instead of global cache, we now have an object
which encapuslates it, together with parameters
that affect reports. This seems to be simpler
and more robust model.

missing_rating and recursive_build_report
now use it, also the refactoring is not yet
complete.
This commit is contained in:
Ivan A. Melnikov 2023-12-05 14:03:36 +04:00
parent 51aa8967d0
commit 09b4700064

View File

@ -17,6 +17,12 @@ from repos_cmp import rpm_ffi
LOG = logging.getLogger(__name__)
def _src_name(source):
if source.endswith(b'.src.rpm'):
return source.rsplit(b'-', 2)[0]
return source
class Dependency(collections.namedtuple(
'Dependency', ['name', 'flags', 'version'])):
@ -416,15 +422,6 @@ def _from_64bit_dep(dep):
return Dependency(new_name, dep.flags, dep.version)
def have_same_source(repoA, repoB, source_name):
sourceA = repoA.sources.get(source_name)
sourceB = repoB.sources.get(source_name)
return (sourceA and sourceB
and sourceA.epoch == sourceB.epoch
and sourceA.version == sourceB.version
and sourceA.release == sourceB.release)
_BUILDREQ = '0000-BR'
_BUILD_UNMET = '0001-UN'
@ -433,126 +430,130 @@ def _is_build_kind(kind):
return kind in (_BUILDREQ, _BUILD_UNMET)
__BUILD_REPORT_CACHE = {}
class BuildReporter:
def build_report(from_repo, to_repo, source_name, ignore=(), prefer=()):
"""Build report for one source, by name
Returns an iterable over tuples (kind, dep, provider).
"""
cache_key = (from_repo.name, to_repo.name, source_name,
frozenset(ignore), frozenset(prefer))
cached_result = __BUILD_REPORT_CACHE.get(cache_key)
if cached_result is not None:
if cached_result.is_valid([from_repo, to_repo]):
return cached_result
LOG.debug("Building build report: %s", cache_key)
# XXX: assumes from_repo is x86_64
assert from_repo.bits == 64
translate = (to_repo.bits != 64)
# find what's missing for the build chroot in to_repo
chroot = to_repo.chroot_for(
from_repo.sources[source_name].requires, prefer=prefer)
# distinguish unmets from build requirements that are missing
buildreqs = {dep for dep in from_repo.sources[source_name].requires}
missing_reqs = ((_BUILDREQ, dep) for dep in buildreqs
if not chroot.is_provided(dep))
chroot_unmets = ((_BUILD_UNMET, dep) for dep in chroot.unmets()
if dep not in buildreqs)
# add requirements for the binary packages this source package
# produces in the from_repo; if needed, try to translate from 64 to 32 bits
binreqs = ((b.name.decode(), (_from_64bit_dep(dep) if translate else dep))
for b in from_repo.binaries_from(source_name)
for dep in b.requires)
result = set()
for kind, dep in itertools.chain(missing_reqs, chroot_unmets, binreqs):
# skip some platform-specific stuff
if any(x in dep.name for x in _SPECIAL_DEPS):
continue
# skip dependencies already present in to_repo
if any(to_repo.providers(dep)):
continue
# skip inter-sub-package dependencies
if any(p.source_name == source_name
for _d, p in from_repo.providers(dep)):
continue
# set-versions may be platform-dependent.
# double-check that if have the same source
if (dep.is_setversion()
or _GHC_HASHLIB.match(dep.name)
or _GHC_HASHDEP.fullmatch(dep.name)):
if any(have_same_source(from_repo, to_repo, p.source_name)
for _d, p in from_repo.providers(dep)):
continue
# ok, it's true missing dependency
# let's look up providers
providers = set(provider
for _dep, provider in from_repo.providers(dep)
if provider.source_name not in ignore)
if not providers:
LOG.warning("No providers for %s", dep)
elif len(providers) > 1:
LOG.warning("%d provider(s) for %s: %s",
len(providers), dep, list(providers))
preferred = [p for p in providers
if p.source_name in prefer]
if preferred and len(preferred) < len(providers):
LOG.info("Using preferred providers: %s", preferred)
providers = preferred
result.update((kind, dep, provider) for provider in providers)
the_report = TripletReport([from_repo, to_repo], result)
__BUILD_REPORT_CACHE[cache_key] = the_report
return the_report
def _src_name(source):
if source.endswith(b'.src.rpm'):
return source.rsplit(b'-', 2)[0]
return source
class RecursiveBuildReport:
def __init__(self, from_repo, to_repo, source_names,
stop_names=(), prefer=()):
def __init__(self, from_repo, to_repo, ignore=(), prefer=(), caching=True):
self.from_repo = from_repo
self.to_repo = to_repo
self.stop_names = frozenset(stop_names)
self.ignore = ignore
self.prefer = prefer
self.requested_names = [_src_name(s) for s in source_names]
self.reported_names = self._closure()
self._cache = {} if caching else None
def _build_report(self, name):
return build_report(self.from_repo, self.to_repo, name,
prefer=self.prefer)
def report(self, source_name):
if self._cache:
cached_result = self._cache.get(source_name)
if cached_result:
return cached_result
report = self._raw_report(source_name)
if self._cache:
self.cache[source_name] = report
return report
def _same_source(self, source_name):
sourceA = self.from_repo.sources.get(source_name)
sourceB = self.to_repo.sources.get(source_name)
return (sourceA and sourceB
and sourceA.epoch == sourceB.epoch
and sourceA.version == sourceB.version
and sourceA.release == sourceB.release)
def _raw_report(self, source_name):
"""Build report for one source, by name
Returns an iterable over tuples (kind, dep, provider).
"""
LOG.debug("Building build report for %s", source_name)
# XXX: assumes from_repo is x86_64
assert self.from_repo.bits == 64
translate = (self.to_repo.bits != 64)
from_source = self.from_repo.sources[source_name]
# find what's missing for the build chroot in to_repo
chroot = self.to_repo.chroot_for(
from_source.requires, prefer=self.prefer)
# distinguish unmets from build requirements that are missing
buildreqs = {dep for dep in from_source.requires}
missing_reqs = ((_BUILDREQ, dep) for dep in buildreqs
if not chroot.is_provided(dep))
chroot_unmets = ((_BUILD_UNMET, dep) for dep in chroot.unmets()
if dep not in buildreqs)
# add requirements for the binary packages this source package produces
# in the from_repo; if needed, try to translate from 64 to 32 bits
binreqs = (
(b.name.decode(), (_from_64bit_dep(dep) if translate else dep))
for b in self.from_repo.binaries_from(source_name)
for dep in b.requires)
result = set()
for kind, dep in itertools.chain(missing_reqs, chroot_unmets, binreqs):
# skip some platform-specific stuff
if any(x in dep.name for x in _SPECIAL_DEPS):
continue
# skip dependencies already present in to_repo
if any(self.to_repo.providers(dep)):
continue
# skip inter-sub-package dependencies
if any(p.source_name == source_name
for _d, p in self.from_repo.providers(dep)):
continue
# set-versions may be platform-dependent.
# double-check that if have the same source
if (dep.is_setversion()
or _GHC_HASHLIB.match(dep.name)
or _GHC_HASHDEP.fullmatch(dep.name)):
if any(self._same_source(p.source_name)
for _d, p in self.from_repo.providers(dep)):
continue
# ok, it's true missing dependency
# let's look up providers
providers = set(provider
for _dep, provider in self.from_repo.providers(dep)
if provider.source_name not in self.ignore)
if not providers:
LOG.warning("No providers for %s", dep)
elif len(providers) > 1:
LOG.warning("%d provider(s) for %s: %s",
len(providers), dep, list(providers))
preferred = [p for p in providers
if p.source_name in self.prefer]
if preferred and len(preferred) < len(providers):
LOG.info("Using preferred providers: %s", preferred)
providers = preferred
result.update((kind, dep, provider) for provider in providers)
return TripletReport([self.from_repo, self.to_repo], result)
def recursive_closure(self, source_names, stop_names=()):
seen = list(source_names)
queue = list(source_names)
stop_names = set(stop_names)
def _closure(self):
seen = list(self.requested_names)
queue = list(self.requested_names)
while queue:
cur = queue.pop()
missing = self._build_report(cur).sources_set()
missing = self.report(cur).sources_set()
unseen = missing.difference(seen)
seen.extend(unseen)
# don't recur into packages from stop_names
queue.extend(unseen - self.stop_names)
queue.extend(unseen - stop_names)
return seen
def format_build_reports(self, to=None):
def format_reports(self, source_names, stop_names=(), to=None):
stop_names = set(stop_names)
result = []
for name in self.reported_names:
for name in source_names:
result.append(f"\n== {name.decode()} ==")
in_to = self.to_repo.sources.get(name)
srpm = in_to.source_rpm.decode() if in_to else 'NONE'
result.append(f"{self.to_repo.name} has {srpm}\n")
if name in self.stop_names:
if name in stop_names:
result.append('not going deeper (stop_names)')
else:
result.append(self._build_report(name).basic_format().strip())
@ -561,16 +562,17 @@ class RecursiveBuildReport:
def missing_rating(from_repo, to_repo):
result = collections.defaultdict(set)
reporter = BuildReporter(from_repo, to_repo)
for name in from_repo.sources:
if name in to_repo.sources or b'gost' in name:
continue
try:
rbr = RecursiveBuildReport(from_repo, to_repo, [name])
rbr = reporter.recursive_closure([name])
except Exception:
LOG.error("Failed to build recursive build report"
"for %s", name.decode(), exc_info=True)
else:
for other_name in rbr.reported_names:
for other_name in rbr:
if other_name != name:
result[other_name].add(name)
return sorted(result.items(), key=lambda x: (len(x[1]), x[0]))
@ -592,12 +594,14 @@ def recursive_build_report(from_repo, to_repo, *source_names,
build_source_deps = {}
pkg_source_deps = {}
requested_source_names = set(_src_name(s) for s in source_names)
reporter = BuildReporter(from_repo, to_repo, ignore=ignore,
prefer=requested_source_names)
stack = list(requested_source_names)
seen = set(stack)
while stack:
cur = stack.pop()
report = build_report(from_repo, to_repo, cur, ignore, prefer=seen)
report = reporter.report(cur)
cur_in_to = to_repo.sources.get(cur)
cur_source_srpm = cur_in_to.source_rpm if cur_in_to else b'NONE'
reports.append('\n== %s ==\n%s has %s\n' % (