lists: Parallelize read_pkglist_heders_for_repo
Use multiprocessing module to read the headers for each repo in parallel. Usualy reading data for repository means loading 6 lists, so the gain is considerable.
This commit is contained in:
parent
bbf24803fa
commit
8388c0627b
@ -1,6 +1,7 @@
|
||||
|
||||
import collections
|
||||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
@ -84,9 +85,11 @@ def read_pkglist_headers_rpm(path):
|
||||
|
||||
|
||||
def read_pkglist_heders_for_repo(repo_path, arches, components=None):
|
||||
bin_headers = []
|
||||
src_headers = []
|
||||
bin_lists = []
|
||||
src_lists = []
|
||||
seen = set()
|
||||
|
||||
# collect the files
|
||||
for arch in arches:
|
||||
basedir = os.path.join(repo_path, arch, 'base')
|
||||
for pkglist in os.listdir(basedir):
|
||||
@ -103,9 +106,12 @@ def read_pkglist_heders_for_repo(repo_path, arches, components=None):
|
||||
LOG.info('Ignoring %s/%s', basedir, pkglist)
|
||||
continue
|
||||
seen.add(what)
|
||||
(src_headers if parts[0] == 'srclist' else bin_headers).extend(
|
||||
read_pkglist_headers_rpm(os.path.join(basedir, pkglist)))
|
||||
return src_headers, bin_headers
|
||||
(src_lists if parts[0] == 'srclist' else bin_lists).append(
|
||||
os.path.join(basedir, pkglist))
|
||||
with multiprocessing.Pool() as p:
|
||||
src_res = p.map_async(read_pkglist_headers_rpm, src_lists)
|
||||
bin_res = p.map_async(read_pkglist_headers_rpm, bin_lists)
|
||||
return sum(src_res.get(), []), sum(bin_res.get(), [])
|
||||
|
||||
|
||||
def _read_pkglist_rpm(path):
|
||||
|
Loading…
x
Reference in New Issue
Block a user