image-forge/build.py

902 lines
28 KiB
Python
Executable File

#!/usr/bin/python3
import argparse
import functools
import json
import re
import subprocess
import textwrap
from dataclasses import dataclass
from graphlib import TopologicalSorter
from pathlib import Path
import requests
import tomli
import yaml
from jinja2 import Template
ORG_DIR = Path("org")
PKG_VERSIONS: dict | None = None
@dataclass
class Image:
canonical_name: str
is_versioned: bool | None
verion_template: str | None
source_packages: list[str] | None
def __init__(self, canonical_name: str):
self.canonical_name = canonical_name
self.path = ORG_DIR / canonical_name
self.base_name = re.sub("^[^/]+/", "", canonical_name)
info_file = self.path / "info.yaml"
if not info_file.exists():
self.is_versioned = None
self.source_packages = None
return
info: dict = yaml.safe_load(info_file.read_text())
if "is_versioned" not in info:
raise RuntimeError(
f"info.yaml for {self.canonical_name} doesn't contain 'is_versioned' key"
)
if "source_packages" not in info:
raise RuntimeError(
f"info.yaml for {self.canonical_name} doesn't contain 'source_packages' key"
)
self.is_versioned = info["is_versioned"]
self.source_packages = info["source_packages"]
if self.is_versioned and not self.source_packages:
raise RuntimeError(
f"source_packages for {self.canonical_name} doesn't contain any values"
)
self.version_template = None
if "version_template" in info:
self.version_template = info["version_template"]
class Tasks:
def __init__(self, tasks):
if tasks is None:
self._tasks = None
else:
self._tasks = tomli.loads(Path(tasks).read_text())
def __str__(self):
return f"{self._tasks}"
def get(self, branch, image: Image):
if self._tasks is None:
return []
else:
if branch_tasks := self._tasks.get(branch):
return [
n
for n, i in branch_tasks.items()
if image.canonical_name in i or len(i) == 0
]
def api_get_source_package_version(branch: str, package_name: str) -> str:
api_url = "https://rdb.altlinux.org/api/site/package_versions_from_tasks"
params = {"branch": branch, "name": package_name}
response = requests.get(api_url, params)
if response.status_code != 200:
print(response)
raise RuntimeError(
f"failed to retrieve source package version: source package {package_name!r}, branch {branch!r} "
)
result = response.json()
return result["versions"][0]["version"]
class Tags:
def __init__(self, tags_file: str | None, latest: str):
if tags_file is None:
self._tags = None
else:
self._tags = tomli.loads(Path(tags_file).read_text())
self._latest = latest
def tags(self, branch: str, image: Image):
if self._tags is None:
if image.is_versioned and image.source_packages:
package_name = image.source_packages[0]
if mat := re.search("{%.*%}", package_name):
package_name = Template(package_name).render(branch=branch).strip()
print(f"{package_name=}")
if mat := re.search("{{.*}}", package_name):
if PKG_VERSIONS is None:
raise RuntimeError(
f"--package-versions option is not specified, required for {image.canonical_name!r}"
)
if image.canonical_name not in PKG_VERSIONS:
raise RuntimeError(
f"--package-versions option does not contain version for image {image.canonical_name!r}"
)
if not PKG_VERSIONS[image.canonical_name]:
raise RuntimeError(
f"invalid version for image {image.canonical_name!r}: {PKG_VERSIONS[image.canonical_name]!r}"
)
package_name = Template(package_name).render(
version=PKG_VERSIONS[image.canonical_name]
)
version = api_get_source_package_version(branch, package_name)
if image.version_template is not None:
version = (
Template(image.version_template).render(version=version).strip()
)
tags = [version]
else:
tags = [branch]
else:
tags = self._tags[image.canonical_name][branch].copy()
if branch == self._latest:
tags.append("latest")
return tags
class Distroless:
def __init__(self, distrolessfile, renderer):
dd = tomli.loads(distrolessfile.read_text())
self.raw_from = dd["from"]
self.renderer = renderer
self.from_ = renderer(dd["from"])
self.file_lists = dd.get("file-lists", [])
self.files = dd.get("files", [])
self.library_files = dd.get("library-files", [])
self.packages = dd.get("packages", [])
self.library_packages = dd.get("library-packages", [])
self.exclude_regexes = dd.get("exclude-regexes", [])
self.builder_install_packages = dd.get("builder-install-packages", [])
for file in dd.get("full-files", []):
self.builder_install_packages.append(file)
self.files.append(file)
self.library_files.append(file)
self.timezone = dd.get("timezone")
if self.timezone:
self.files.append("/etc/localtime")
self.copy = dd.get("copy", {})
self.config_options = []
for option in ["cmd", "entrypoint", "user"]:
if value := dd.get(option):
if isinstance(value, list):
value = json.dumps(value)
self.config_options.append(f"--{option}={value}")
if value := dd.get("workdir"):
self.config_options.append(f"--workingdir={value}")
elif value := dd.get("workingdir"):
self.config_options.append(f"--workingdir={value}")
def render_arch_branch(self, arch, branch):
def if_arches(arches, value, default=""):
if arch in arches or not arches:
return value
else:
return default
def if_branches(branches, value, default=""):
if branch in branches or not branches:
return value
else:
return default
def if_arches_branches(arches, branches, value, default=""):
if arches and arch not in arches or branches and branch not in branches:
return default
else:
return value
renderer = functools.partial(
self.renderer,
if_arches=if_arches,
if_branches=if_branches,
if_arches_branches=if_arches_branches,
)
def filter_map(values):
if isinstance(values, dict):
return {k: r for k, v in values.items() if (r := renderer(v)) != ""}
else:
return [r for v in values if (r := renderer(v)) != ""]
self.builder_install_packages = filter_map(self.builder_install_packages)
self.config_options = filter_map(self.config_options)
self.copy = filter_map(self.copy)
self.file_lists = filter_map(self.file_lists)
self.files = filter_map(self.files)
self.library_files = filter_map(self.library_files)
self.library_packages = filter_map(self.library_packages)
self.packages = filter_map(self.packages)
class DockerBuilder:
def make_image_re(self):
registry = r"(?P<registry>[\w.:]+)"
organization = r"(?P<organization>\w+)"
name = r"(?P<name>[-.\w]+)"
tag = r"(?P<tag>[\w.]+)"
return rf"(:?{registry}/)?(:?{organization}/)?{name}(:?:{tag})?"
def make_dockerfile_from_re(self):
image_re = self.make_image_re()
return rf"^\s*FROM\s+{image_re}$"
def __init__(
self,
registry,
branch,
organization,
overwrite_organization,
latest,
dry_run,
images_info,
tasks: Tasks,
tags: Tags,
):
self.image_re = re.compile(self.make_image_re())
self.dockerfile_from_re = re.compile(self.make_dockerfile_from_re())
self.org_dir = ORG_DIR
self.images_dir = ORG_DIR / organization
self.registry = registry
self.branch = branch
self.organization = organization
if overwrite_organization:
self.overwrite_organization = overwrite_organization
else:
self.overwrite_organization = organization
self.latest = latest
self.dry_run = dry_run
self.images_info = images_info
self.tasks = tasks
self.tags = tags
self.distrolesses = {}
def forall_images(consume_result):
def forall_images_decorator(f):
def wrapped(self, *args, **kwargs):
for image_path in self.images_dir.iterdir():
image = Image("/".join(image_path.parts[1:]))
local_kwargs = {
"image": image,
"dockerfile": image_path / "Dockerfile",
"dockerfile_template": image_path / "Dockerfile.template",
"distrolessfile": image_path / "distroless.toml",
}
new_kwargs = kwargs | local_kwargs
yield f(self, *args, **new_kwargs)
def consumer(*args, **kwargs):
for _ in wrapped(*args, **kwargs):
pass
if consume_result:
return consumer
else:
return wrapped
return forall_images_decorator
@forall_images(consume_result=True)
def remove_dockerfiles(self, **kwargs):
if kwargs["dockerfile"].exists():
kwargs["dockerfile"].unlink()
def render_template(
self,
template: str,
organization: str,
install_packages=None,
**kwargs,
) -> str:
if self.registry:
registry = self.registry.rstrip("/") + "/"
alt_image = "alt/alt"
else:
registry = ""
alt_image = "alt"
rendered = Template(template).render(
alt_image=alt_image,
branch=self.branch,
install_packages=install_packages,
organization=organization,
registry=registry,
**kwargs,
)
return rendered
@forall_images(consume_result=True)
def render_dockerfiles(self, **kwargs):
def install_packages(*names):
tasks = self.tasks.get(self.branch, kwargs["image"])
linux32 = '$([ "$(rpm --eval %_host_cpu)" = i586 ] && echo linux32)'
if tasks:
apt_repo = "\\\n apt-get install apt-repo -y && \\"
for task in tasks:
apt_repo += f"\n {linux32} apt-repo add {task} && \\"
apt_repo += "\n apt-get update && \\"
else:
apt_repo = "\\"
update_command = f"""RUN apt-get update && {apt_repo}"""
install_command = f"""
{linux32} apt-get install -y {' '.join(names)} && \\
rm -f /var/cache/apt/archives/*.rpm \\
/var/cache/apt/*.bin \\
/var/lib/apt/lists/*.*
"""
install_command = textwrap.dedent(install_command).rstrip("\n")
install_command = textwrap.indent(install_command, " " * 4)
return update_command + install_command
dockerfile_template = kwargs["dockerfile_template"]
if dockerfile_template.exists():
rendered = self.render_template(
dockerfile_template.read_text(),
self.overwrite_organization,
install_packages,
)
kwargs["dockerfile"].write_text(rendered + "\n")
@forall_images(consume_result=True)
def load_distrolesses(self, **kwargs):
renderer = functools.partial(
self.render_template,
organization=self.overwrite_organization,
)
distrolessfile = kwargs["distrolessfile"]
canonical_name = "/".join(distrolessfile.parts[-3:-1])
if distrolessfile.exists():
self.distrolesses[canonical_name] = Distroless(distrolessfile, renderer)
@forall_images(consume_result=False)
def get_requires(self, **kwargs):
requires = set()
dockerfile_template = kwargs["dockerfile_template"]
distrolessfile = kwargs["distrolessfile"]
canonical_name = kwargs["image"].canonical_name
if dockerfile_template.exists():
for line in dockerfile_template.read_text().splitlines():
if not re.match(r"\s*FROM", line):
continue
line = self.render_template(line, self.organization)
if match := re.match(self.dockerfile_from_re, line):
from_image = match.groupdict()
if from_image["name"] != "scratch":
requires.add(
f"{from_image['organization']}/{from_image['name']}"
)
elif distrolessfile.exists():
requires.add("alt/distroless-builder")
raw_from = self.distrolesses[canonical_name].raw_from
from_ = self.render_template(raw_from, self.organization)
if match := re.match(self.image_re, from_):
from_image = match.groupdict()
if from_image["name"] != "scratch":
requires.add(f"{from_image['organization']}/{from_image['name']}")
return (canonical_name, requires)
def get_build_order(self):
requires = {}
for canonical_name, image_requires in self.get_requires():
requires[canonical_name] = image_requires
ts = TopologicalSorter(requires)
return (Image(i) for i in ts.static_order())
def render_full_tag(self, image: Image, tag: str):
if self.registry:
registry = self.registry.rstrip("/") + "/"
else:
registry = ""
if tag:
tag = f":{tag}"
return f"{registry}{self.overwrite_organization}/{image.base_name}{tag}"
def run(self, cmd, *args, **kwargs):
if "check" not in kwargs:
kwargs["check"] = True
if self.dry_run:
pre_cmd = ["echo"]
else:
pre_cmd = []
subprocess.run(pre_cmd + cmd, *args, **kwargs)
def distroless_build(self, image: Image, arches):
def distroless_build_arch(arch, manifest):
distroless_builder = self.render_full_tag(
Image("alt/distroless-builder"), self.branch
)
distroless = self.distrolesses[image.canonical_name]
distroless.render_arch_branch(arch, self.branch)
builder = f"distroless-builder-{arch}"
new = f"distroless-new-{arch}"
run = functools.partial(self.run, cwd=image.path)
run(
["buildah", "rm", builder, new],
check=False,
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
)
run(
[
"buildah",
"from",
"--arch",
arch,
"--name",
builder,
distroless_builder,
]
)
run(["buildah", "from", "--arch", arch, "--name", new, distroless.from_])
if packages := distroless.builder_install_packages:
tasks = self.tasks.get(self.branch, image)
if tasks:
if arch == "386":
apt_repo_add = ["linux32", "apt-repo", "add"]
else:
apt_repo_add = ["apt-repo", "add"]
for task in tasks:
run(["buildah", "run", builder] + apt_repo_add + [task])
run(["buildah", "run", builder, "apt-get", "update"])
run(
["buildah", "run", builder, "apt-get", "reinstall", "-y"] + packages
)
if timezone := distroless.timezone:
run(
[
"buildah",
"run",
builder,
"ln",
"-s",
f"/usr/share/zoneinfo/{timezone}",
"/etc/localtime",
]
)
options = []
if distroless.files:
options += ["-f"] + distroless.files
if distroless.library_files:
options += ["--library-files"] + distroless.library_files
if file_lists := distroless.file_lists:
options += ["-l"]
options += [f"file-lists/{f}" for f in file_lists]
for file_list in file_lists:
run(
[
"buildah",
"copy",
builder,
f"./{file_list}",
f"file-lists/{file_list}",
]
)
if distroless.packages:
options += ["-p"] + distroless.packages
if distroless.library_packages:
options += ["--library-packages"] + distroless.library_packages
run(
[
"buildah",
"run",
builder,
"./distroless-builder.py",
"add",
"--clean",
]
+ options
)
exclude_regexes_options = []
if distroless.exclude_regexes:
exclude_regexes_options = ["-r"] + distroless.exclude_regexes
run(
[
"buildah",
"run",
builder,
"./distroless-builder.py",
"tar",
]
+ exclude_regexes_options
)
run(
[
"buildah",
"add",
"--from",
builder,
new,
"/usr/src/distroless/distroless.tar",
"/",
]
)
for local_file, image_file in distroless.copy.items():
run(
[
"buildah",
"copy",
new,
f"./{local_file}",
image_file,
]
)
run(["buildah", "config"] + distroless.config_options + [new])
run(["buildah", "commit", "--rm", "--manifest", manifest, new])
run(
["buildah", "rm", builder],
check=False,
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
)
if self.images_info.skip_branch(image.canonical_name, self.branch):
return
build_arches = set(arches) - set(
self.images_info.skip_arches(image.canonical_name)
)
tags = self.tags.tags(self.branch, image)
manifest = self.render_full_tag(image, tags[0])
msg = "Building image {} for {} arches".format(
manifest,
arches,
)
print(msg)
rm_image_cmd = [
"podman",
"image",
"rm",
"--force",
manifest,
]
self.run(
rm_image_cmd,
check=False,
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
)
rm_manifest_cmd = [
"podman",
"manifest",
"rm",
manifest,
]
self.run(
rm_manifest_cmd,
check=False,
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
)
for arch in build_arches:
distroless_build_arch(arch, manifest)
for tag in tags[1:]:
other_manifest = self.render_full_tag(image, tag)
tag_cmd = ["podman", "tag", manifest, other_manifest]
self.run(tag_cmd)
def podman_build(self, image: Image, arches):
if self.images_info.skip_branch(image.canonical_name, self.branch):
return
build_arches = set(arches) - set(
self.images_info.skip_arches(image.canonical_name)
)
platforms = ",".join([f"linux/{a}" for a in build_arches])
tags = self.tags.tags(self.branch, image)
manifest = self.render_full_tag(image, tags[0])
msg = "Building image {} for {} arches".format(
manifest,
arches,
)
print(msg)
rm_image_cmd = [
"podman",
"image",
"rm",
"--force",
manifest,
]
self.run(
rm_image_cmd,
check=False,
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
)
rm_manifest_cmd = [
"podman",
"manifest",
"rm",
manifest,
]
self.run(
rm_manifest_cmd,
check=False,
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
)
build_cmd = [
"podman",
"build",
"--rm",
"--force-rm",
f"--manifest={manifest}",
f"--platform={platforms}",
".",
]
if PKG_VERSIONS is not None and image.canonical_name in PKG_VERSIONS:
build_cmd.insert(
-1, f"--build-arg=PKG_VERSION={PKG_VERSIONS[image.canonical_name]}"
)
self.run(build_cmd, cwd=image.path)
for tag in tags[1:]:
other_manifest = self.render_full_tag(image, tag)
tag_cmd = ["podman", "tag", manifest, other_manifest]
self.run(tag_cmd)
def podman_push(self, image: Image, sign=None):
if self.images_info.skip_branch(image.canonical_name, self.branch):
return
tags = self.tags.tags(self.branch, image)
manifests = [self.render_full_tag(image, t) for t in tags]
for manifest in manifests:
print(f"Push manifest {manifest}")
cmd = [
"podman",
"manifest",
"push",
manifest,
f"docker://{manifest}",
]
if sign is not None:
cmd.append(f"--sign-by={sign}")
self.run(cmd)
class ImagesInfo:
def __init__(self):
info = {}
images_info = Path("images-info.toml")
if images_info.exists():
info = tomli.loads(images_info.read_text())
self._info = info
def skip_arch(self, canonical_name, arch):
info = self._info.get(canonical_name, {})
return arch in info.get("skip-arches", [])
def skip_arches(self, canonical_name):
info = self._info.get(canonical_name, {})
return info.get("skip-arches", [])
def skip_branch(self, canonical_name, branch):
info = self._info.get(canonical_name, {})
return branch in info.get("skip-branches", [])
def skip_branches(self, canonical_name):
info = self._info.get(canonical_name, {})
return info.get("skip-branches", [])
def parse_args():
stages = ["build", "remove_dockerfiles", "render_dockerfiles", "push"]
arches = ["amd64", "386", "arm64", "loong64", "riscv64"]
branches = ["p11", "p10", "sisyphus", "c10f1", "c10f2"]
organizations = list(ORG_DIR.iterdir())
images = [f"{o.name}/{i.name}" for o in organizations for i in o.iterdir()]
organizations = [o.name for o in organizations]
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
images_group = parser.add_mutually_exclusive_group(required=True)
images_group.add_argument(
"-i",
"--images",
nargs="+",
default=images,
choices=images,
metavar="IMAGE_NAME",
help="list of images to build",
)
images_group.add_argument(
"-o",
"--organizations",
nargs="+",
default=organizations,
choices=organizations,
help="build all images from these organizations",
)
parser.add_argument(
"--skip-images",
nargs="+",
default=[],
choices=images,
metavar="IMAGE_NAME",
help="list of images to skip",
)
parser.add_argument(
"-r",
"--registry",
default="gitea.basealt.ru",
)
parser.add_argument(
"--overwrite-organization",
)
parser.add_argument(
"-l",
"--latest",
default="p10",
help="tag images in this branch as latest",
)
parser.add_argument(
"--tasks",
type=Tasks,
default=Tasks(None),
help="use tasks from TASKS file",
)
parser.add_argument(
"--tags",
help="use tags from TAGS file",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="print instead of running docker commands",
)
parser.add_argument(
"--sign",
)
parser.add_argument(
"-a",
"--arches",
nargs="+",
default=arches,
choices=arches,
help="build images for these architectures",
)
parser.add_argument(
"--skip-arches",
nargs="+",
default=[],
choices=arches,
help="list of architectures to skip",
)
parser.add_argument(
"-b",
"--branches",
nargs="+",
default=branches,
choices=branches,
help="build images for these branches",
)
parser.add_argument(
"--skip-branches",
nargs="+",
default=[],
choices=branches,
help="list of branches to skip",
)
parser.add_argument(
"--stages",
nargs="+",
default=stages,
choices=stages,
help="list of stages to go through",
)
parser.add_argument(
"--skip-stages",
nargs="+",
default=[],
choices=stages,
help="list of stages to skip",
)
parser.add_argument(
"--package-versions",
type=json.loads,
help="json string where key is image name, value is the package version",
)
args = parser.parse_args()
args.stages = set(args.stages) - set(args.skip_stages)
args.arches = set(args.arches) - set(args.skip_arches)
args.branches = set(args.branches) - set(args.skip_branches)
args.images = set(args.images) - set(args.skip_images)
return args
def main():
global PKG_VERSIONS
args = parse_args()
PKG_VERSIONS = args.package_versions
arches = args.arches
images_info = ImagesInfo()
tags = Tags(args.tags, args.latest)
for organization in args.organizations:
for branch in args.branches:
db = DockerBuilder(
args.registry,
branch,
organization,
args.overwrite_organization,
args.latest,
args.dry_run,
images_info,
args.tasks,
tags,
)
if "remove_dockerfiles" in args.stages:
db.remove_dockerfiles()
if "render_dockerfiles" in args.stages:
db.render_dockerfiles()
db.load_distrolesses()
for image in db.get_build_order():
if image.canonical_name not in args.images:
continue
if "build" in args.stages:
if image.canonical_name in db.distrolesses:
db.distroless_build(image, arches)
else:
db.podman_build(image, arches)
if "push" in args.stages:
db.podman_push(image, args.sign)
if __name__ == "__main__":
main()
# vim: colorcolumn=89