ovirt-imageio/test/client_test.py

1152 lines
34 KiB
Python
Raw Permalink Normal View History

# SPDX-FileCopyrightText: Red Hat, Inc.
# SPDX-License-Identifier: GPL-2.0-or-later
import os
client: Add info() and measure() helpers Before uploading images, we need to know: - The virtual size, used to set the provisioned_size of the target disk - The required size, used to set the initial_size of the target disk when uploading sparse image to block storage. Both are available using "qemu-img info" and "qemu-img measure" using the special "json:" URI[1], but this is too complex for users, so we provide now an easy to use interface to get this info. Richard Jones suggested a neat trick[2], exposing the image using nbd server, and using NBD URL with qemu-img. However this requires nddkit tar plugin, which is not available yet in RHEL 8.2, and require support for upload from URL, which we don't have yet. Kevin Wolf explained how to enable format probing using the "json:" URI by not specifying the top level "driver"[3]. Add new public client functions, supporting both regular images and images inside tar files: - info(filename, [member]): returns info about an image using qemu-img info. When member is specified, return info about file named member in the tar file filename. - measure(filename, dst_fmt, [member]): measure required size for convert image to dst_fmt. When member is specified, return info about file named member in the tar file filename. Here is an example usage: >>> client.info("fedora-32.ova", member="fedora-32.qcow2") {'cluster-size': 65536, 'dirty-flag': False, 'filename': 'nbd+unix://?socket=/tmp/imageio-98a5jum4/sock', 'format': 'qcow2', 'format-specific': {'data': {'compat': '1.1', 'corrupt': False, 'lazy-refcounts': False, 'refcount-bits': 16}, 'type': 'qcow2'}, 'member-offset': 1536, 'member-size': 676659200, 'virtual-size': 6442450944} >>> client.measure("fedora-32.ova", "qcow2", member="fedora-32.qcow2") {'fully-allocated': 6443696128, 'member-offset': 1536, 'member-size': 676659200, 'required': 1381302272} >>> client.info("fedora-32.qcow2") {'cluster-size': 65536, 'dirty-flag': False, 'filename': 'nbd+unix://?socket=/tmp/imageio-kel0a0oj/sock', 'format': 'qcow2', 'format-specific': {'data': {'compat': '1.1', 'corrupt': False, 'lazy-refcounts': False, 'refcount-bits': 16}, 'type': 'qcow2'}, 'virtual-size': 6442450944} >>> client.measure("fedora-32.qcow2", "qcow2") {'fully-allocated': 6443696128, 'required': 1381302272} [1] https://bugzilla.redhat.com/1849981#c3 [2] https://lists.nongnu.org/archive/html/qemu-discuss/2020-06/msg00074.html [3] https://lists.nongnu.org/archive/html/qemu-discuss/2020-06/msg00094.html Change-Id: I9bbc846ada839e60ad46719d901bba96a796d507 Bug-Url: https://bugzilla.redhat.com/1849981 Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-06-27 18:16:58 +03:00
import tarfile
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from ovirt_imageio import client
checksum: Optimize zero hashing Use block based hashing algorithm: H( H(block 1) + H(block 2) + ... + H(block N) ) This is basically creating a hash list[1], and using the root hash as the result. The algorithm is similar to eD2k hash algorithm[2], but we support any algorithm and block size. The default algorithm is blake2b[3] and block size is 4 MiB. blake2b is as fast as sha-1, but secure at least as sha-3. When we don't have extents information, for example when using preallocated image, or sparse image on storage that does not report sparseness information (e.g. NFS < 4.2, GlusterFS with sharding) we detect zero blocks and optimize hashing. Checksum calculation time is limited by storage read throughput. If we have extent information, we can compute the hash for zero blocks without reading anything from storage, speeding up the calculation dramatically. When hashing zero blocks, instead of hashing entire block (4 MiB) we hash a precomputed digest bytes (32 bytes). This is up to 131072 times faster. Since the checksum depends on the block size, the response includes now also the block size: $ curl -k https://localhost:54322/images/nbd/checksum | jq { "checksum": "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131", "algorithm": "blake2b", "block_size": 4194304 } To compare the checksum to a local file, you must use the same algorithm and block_size: >> from ovirt_imageio import client >> client.checksum("disk.img", block_size=4194304, algorithm="blake2b") "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131" To compare to a pre-computed checksum, the caller can specify the block_size using q query parameter: $ curl -k https://localhost:54322/images/nbd/checksum?block_size=2097152 | jq { "checksum": "777b9c2f6598d503d43c14a39b31cdd8aee9f48b475f2af0f4c668e33297016c", "algorithm": "blake2b", "block_size": 2097152 } Here are initial results: tool fedora-32[4] full-6g[5] empty-6g[6] empty-100g[7] empty-1t[8] ---------------------------------------------------------------------------- checksum 2.84 3.04 0.03 0.06 0.28 b2sum[9] 8.29 8.42 8.48 161.00 1648.64 [1] https://en.wikipedia.org/wiki/Hash_list [2] https://en.wikipedia.org/wiki/Ed2k_URI_scheme#eD2k_hash_algorithm [3] https://blake2.net/ [4] virt-builder fedora-32 -o fedora-32.raw --root-password=password:root [5] dd if=/zero/zero bs=8M count=768 of=full-6g.raw [6] truncate -s 6g empty-6g.raw [7] truncate -s 100g empty-100g.raw [8] truncate -s 1t empty-1t.raw [9] b2sum --length 256 {path} Change-Id: I0661daf6a36e3eee57ef54128782e2a9aa11943e Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-08-12 03:07:43 +03:00
from ovirt_imageio._internal import blkhash
from ovirt_imageio._internal import config
from ovirt_imageio._internal import ipv6
from ovirt_imageio._internal import nbd
from ovirt_imageio._internal import nbdutil
from ovirt_imageio._internal import qemu_img
from ovirt_imageio._internal import qemu_nbd
from ovirt_imageio._internal import server
from ovirt_imageio._internal.extent import ZeroExtent, DirtyExtent
from . import testutil
from . import ci
log = logging.getLogger("test")
CLUSTER_SIZE = 64 * 1024
IMAGE_SIZE = 3 * CLUSTER_SIZE
@pytest.fixture(scope="module")
def srv():
cfg = config.load(["test/conf/daemon.conf"])
s = server.Server(cfg)
s.start()
yield s
s.stop()
def prepare_transfer(srv, url, sparse=True, size=IMAGE_SIZE):
ticket = testutil.create_ticket(
url=url,
size=size,
sparse=sparse,
ops=["read", "write"])
srv.auth.add(ticket)
host, port = srv.remote_service.address
host = ipv6.quote_address(host)
return "https://{}:{}/images/{}".format(host, port, ticket["uuid"])
class FakeProgress:
def __init__(self):
self.size = None
self.updates = []
def update(self, n):
self.updates.append(n)
# TODO:
# - verify that upload optimized the upload using unix socket. Need a way to
# enable only OPTIONS on the remote server.
# - verify that upload fall back to HTTPS if server does not support unix
# socket. We don't have a way to disable unix socket currently.
# - verify that upload fall back to HTTPS if server support unix socket but is
# not the local host. Probbly not feasble for these tests, unless we can
# start a daemon on another host.
# - Test negative flows
@pytest.mark.parametrize("fmt", ["raw", "qcow2"])
def test_upload_empty_sparse(tmpdir, srv, fmt):
src = str(tmpdir.join("src"))
qemu_img.create(src, fmt, size=IMAGE_SIZE)
dst = str(tmpdir.join("dst"))
with open(dst, "wb") as f:
f.write(b"a" * IMAGE_SIZE)
url = prepare_transfer(srv, "file://" + dst)
client.upload(src, url, srv.config.tls.ca_file)
# We cannot compare allocation of raw and qcow2 images since qemu-img
# reports holes only in qcow2 images.
qemu_img.compare(src, dst, format1=fmt, format2="raw", strict=fmt == "raw")
@pytest.mark.parametrize("fmt", [
pytest.param(
"raw",
marks=pytest.mark.xfail(
ci.is_ovirt(),
reason="Broken in oVirt CI for unknown reason")
),
"qcow2"
])
def test_upload_hole_at_start_sparse(tmpdir, srv, fmt):
size = 3 * 1024**2
src = str(tmpdir.join("src"))
qemu_img.create(src, fmt, size=size)
with qemu_nbd.open(src, fmt) as c:
c.write(size - 1024**2, b"b" * 1024**2)
c.flush()
log.debug("src extents: %s", list(nbdutil.extents(c)))
dst = str(tmpdir.join("dst"))
with open(dst, "wb") as f:
f.write(b"a" * size)
url = prepare_transfer(srv, "file://" + dst, size=size)
client.upload(src, url, srv.config.tls.ca_file)
with qemu_nbd.open(dst, "raw", read_only=True) as c:
log.debug("dst extents: %s", list(nbdutil.extents(c)))
qemu_img.compare(src, dst, format1=fmt, format2="raw", strict=fmt == "raw")
@pytest.mark.parametrize("fmt", ["raw", "qcow2"])
def test_upload_hole_at_middle_sparse(tmpdir, srv, fmt):
size = 3 * 1024**2
src = str(tmpdir.join("src"))
qemu_img.create(src, fmt, size=size)
with qemu_nbd.open(src, fmt) as c:
c.write(0, b"b" * 1024**2)
c.write(size - 1024**2, b"b" * 1024**2)
c.flush()
dst = str(tmpdir.join("dst"))
with open(dst, "wb") as f:
f.write(b"a" * size)
url = prepare_transfer(srv, "file://" + dst, size=size)
client.upload(src, url, srv.config.tls.ca_file)
qemu_img.compare(src, dst, format1=fmt, format2="raw", strict=fmt == "raw")
@pytest.mark.parametrize("fmt", ["raw", "qcow2"])
def test_upload_hole_at_end_sparse(tmpdir, srv, fmt):
size = 3 * 1024**2
src = str(tmpdir.join("src"))
qemu_img.create(src, fmt, size=size)
with qemu_nbd.open(src, fmt) as c:
c.write(0, b"b" * 1024**2)
c.flush()
dst = str(tmpdir.join("dst"))
with open(dst, "wb") as f:
f.write(b"a" * size)
url = prepare_transfer(srv, "file://" + dst, size=size)
client.upload(src, url, srv.config.tls.ca_file)
qemu_img.compare(src, dst, format1=fmt, format2="raw", strict=fmt == "raw")
@pytest.mark.parametrize("fmt", ["raw", "qcow2"])
def test_upload_full_sparse(tmpdir, srv, fmt):
src = str(tmpdir.join("src"))
qemu_img.create(src, fmt, size=IMAGE_SIZE)
with qemu_nbd.open(src, fmt) as c:
c.write(0, b"b" * IMAGE_SIZE)
c.flush()
dst = str(tmpdir.join("dst"))
with open(dst, "wb") as f:
f.write(b"a" * IMAGE_SIZE)
url = prepare_transfer(srv, "file://" + dst)
client.upload(src, url, srv.config.tls.ca_file)
qemu_img.compare(src, dst, strict=True)
@pytest.mark.parametrize("fmt", ["raw", "qcow2"])
def test_upload_preallocated(tmpdir, srv, fmt):
src = str(tmpdir.join("src"))
qemu_img.create(src, fmt, size=IMAGE_SIZE)
dst = str(tmpdir.join("dst"))
with open(dst, "wb") as f:
f.write(b"a" * IMAGE_SIZE)
url = prepare_transfer(srv, "file://" + dst, sparse=False)
client.upload(src, url, srv.config.tls.ca_file)
qemu_img.compare(src, dst)
assert os.stat(dst).st_blocks * 512 == IMAGE_SIZE
@pytest.mark.parametrize("fmt,compressed", [
("raw", False),
("qcow2", False),
("qcow2", True),
])
def test_upload_from_ova(tmpdir, srv, fmt, compressed):
offset = CLUSTER_SIZE
data = b"I can eat glass and it doesn't hurt me."
# Create raw disk with some data.
tmp = str(tmpdir.join("tmp"))
with open(tmp, "wb") as f:
f.truncate(IMAGE_SIZE)
f.seek(offset)
f.write(data)
# Create source disk.
src = str(tmpdir.join("src"))
qemu_img.convert(tmp, src, "raw", fmt, compressed=compressed)
# Create OVA package.
ova = str(tmpdir.join("src.ova"))
with tarfile.open(ova, "w") as tar:
tar.add(src, arcname=os.path.basename(src))
# Prepare destination file.
dst = str(tmpdir.join("dst"))
with open(dst, "wb") as f:
f.truncate(IMAGE_SIZE)
# Test uploading src from ova.
url = prepare_transfer(srv, "file://" + dst)
client.upload(
ova,
url,
srv.config.tls.ca_file,
member=os.path.basename(src))
qemu_img.compare(src, dst)
@pytest.mark.parametrize("disk_is_zero", [True, False])
@pytest.mark.parametrize("src_fmt", ["raw", "qcow2"])
@pytest.mark.parametrize("dst_fmt", ["raw", "qcow2"])
def test_upload_nbd(srv, nbd_server, tmpdir, src_fmt, dst_fmt, disk_is_zero):
size = 5 * CLUSTER_SIZE
# Create qcow2 src image with all kinds of extents.
src = str(tmpdir.join("src." + src_fmt))
qemu_img.create(src, src_fmt, size=size)
with qemu_nbd.open(src, src_fmt) as c:
# Cluster 0: data.
c.write(0 * CLUSTER_SIZE, b"A" * CLUSTER_SIZE)
# Cluster 1: data, zero.
c.write(1 * CLUSTER_SIZE, b"\0" * CLUSTER_SIZE)
# Cluster 2: qcow2: zero cluster, raw: hole.
c.zero(2 * CLUSTER_SIZE, CLUSTER_SIZE, punch_hole=True)
# Cluster 3: qcow2: zero cluster, raw: allocated.
c.zero(3 * CLUSTER_SIZE, CLUSTER_SIZE, punch_hole=False)
# Cluster 4: unallocated.
c.flush()
log.debug("src extents: %s", list(nbdutil.extents(c)))
# Create empty target image.
dst = str(tmpdir.join("dst." + dst_fmt))
qemu_img.create(dst, dst_fmt, size=size)
# Upload image.
nbd_server.image = dst
nbd_server.fmt = dst_fmt
nbd_server.start()
url = prepare_transfer(srv, nbd_server.sock.url(), size=size)
client.upload(
src,
url,
srv.config.tls.ca_file,
disk_is_zero=disk_is_zero)
nbd_server.stop()
with qemu_nbd.open(dst, dst_fmt, read_only=True) as c:
log.debug("dst extents: %s", list(nbdutil.extents(c)))
# Compare image content. We cannot compare allocation since we skip zeroes
# and sparsify the destination image.
qemu_img.compare(src, dst, format1=src_fmt, format2=dst_fmt, strict=False)
@pytest.mark.parametrize("disk_is_zero", [True, False])
@pytest.mark.parametrize("base_fmt", ["raw", "qcow2"])
def test_upload_nbd_shallow(srv, nbd_server, tmpdir, base_fmt, disk_is_zero):
size = 10 * 1024**2
# Create base image with some data in first 3 clusters.
src_base = str(tmpdir.join("src_base." + base_fmt))
qemu_img.create(src_base, base_fmt, size=size)
with qemu_nbd.open(src_base, base_fmt) as c:
c.write(0 * CLUSTER_SIZE, b"a" * CLUSTER_SIZE)
c.write(1 * CLUSTER_SIZE, b"b" * CLUSTER_SIZE)
c.write(2 * CLUSTER_SIZE, b"c" * CLUSTER_SIZE)
c.flush()
log.debug("src_base extents: %s", list(nbdutil.extents(c)))
# Create src image with some data in second cluster and zero in third
# cluster.
src_top = str(tmpdir.join("src_top.qcow2"))
qemu_img.create(
src_top, "qcow2", backing_file=src_base, backing_format=base_fmt)
with qemu_nbd.open(src_top, "qcow2") as c:
c.write(1 * CLUSTER_SIZE, b"B" * CLUSTER_SIZE)
c.zero(2 * CLUSTER_SIZE, CLUSTER_SIZE)
c.flush()
with qemu_nbd.open(
src_top, "qcow2", read_only=True, backing_chain=False) as c:
log.debug("src_top extents: %s", list(nbdutil.extents(c)))
# Create empty destination base image.
dst_base = str(tmpdir.join("dst_base." + base_fmt))
qemu_img.create(dst_base, base_fmt, size=size)
# Create empty destination top image.
dst_top = str(tmpdir.join("dst_top.qcow2"))
qemu_img.create(
dst_top, "qcow2", backing_file=dst_base, backing_format=base_fmt)
# Upload base image.
nbd_server.image = dst_base
nbd_server.fmt = base_fmt
nbd_server.start()
url = prepare_transfer(srv, nbd_server.sock.url(), size=size)
client.upload(
src_base,
url,
srv.config.tls.ca_file,
disk_is_zero=disk_is_zero,
backing_chain=False)
nbd_server.stop()
with qemu_nbd.open(dst_base, base_fmt, read_only=True) as c:
log.debug("dst_base extents: %s", list(nbdutil.extents(c)))
# Compare image content - must match.
qemu_img.compare(
src_base, dst_base, format1=base_fmt, format2=base_fmt, strict=False)
# Comparing allocation is not possible with raw format since qemu-nbd does
# not report allocation for raw images, so we treat unallocated areas as
# zero area instead of a hole.
if base_fmt == "qcow2":
qemu_img.compare(
src_base, dst_base, format1=base_fmt, format2=base_fmt,
strict=True)
# Upload top image.
nbd_server.image = dst_top
nbd_server.fmt = "qcow2"
nbd_server.start()
url = prepare_transfer(srv, nbd_server.sock.url(), size=size)
client.upload(
src_top,
url,
srv.config.tls.ca_file,
disk_is_zero=disk_is_zero,
backing_chain=False)
nbd_server.stop()
with qemu_nbd.open(
dst_top, "qcow2", read_only=True, backing_chain=False) as c:
log.debug("dst_top extents: %s", list(nbdutil.extents(c)))
# Test image content - must match.
qemu_img.compare(
src_top, dst_top, format1="qcow2", format2="qcow2", strict=False)
# Compare allocation for qcow2 chain - nice to have.
if base_fmt == "qcow2":
qemu_img.compare(
src_top, dst_top, format1="qcow2", format2="qcow2", strict=True)
@pytest.mark.parametrize("fmt", ["raw", "qcow2"])
def test_download_raw(tmpdir, srv, fmt):
src = str(tmpdir.join("src"))
with open(src, "wb") as f:
f.truncate(IMAGE_SIZE)
f.seek(IMAGE_SIZE // 2)
f.write(b"data")
url = prepare_transfer(srv, "file://" + src)
dst = str(tmpdir.join("dst"))
# When we download raw data, we can convert it on-the-fly to other format.
client.download(url, dst, srv.config.tls.ca_file, fmt=fmt)
# file backend does not support extents, so downloaded data is always
# fully allocated.
qemu_img.compare(src, dst, format1="raw", format2=fmt)
def test_download_qcow2_as_raw(tmpdir, srv):
src = str(tmpdir.join("src.qcow2"))
qemu_img.create(src, "qcow2", size=IMAGE_SIZE)
# Allocate one cluster in the middle of the image.
with qemu_nbd.open(src, "qcow2") as c:
c.write(CLUSTER_SIZE, b"a" * CLUSTER_SIZE)
c.flush()
actual_size = os.path.getsize(src)
url = prepare_transfer(srv, "file://" + src, size=actual_size)
dst = str(tmpdir.join("dst.qcow2"))
# When downloading qcow2 image using the nbd backend, we get raw data and
# we can convert it to any format we want. Howver when downloading using
# the file backend, we get qcow2 bytestream and we cannot convert it.
#
# To store the qcow2 bytestream, we must use fmt="raw". This instructs
# qemu-nbd on the client side to treat the data as raw bytes, storing them
# without any change on the local file.
#
# This is baisically like:
#
# qemu-img convert -f raw -O raw src.qcow2 dst.qcow2
#
client.download(url, dst, srv.config.tls.ca_file, fmt="raw")
# The result should be identical qcow2 image content. Allocation may
# differ but for this test we get identical allocation.
qemu_img.compare(src, dst, format1="qcow2", format2="qcow2", strict=True)
@pytest.mark.parametrize("base_fmt", ["raw", "qcow2"])
def test_download_shallow(srv, nbd_server, tmpdir, base_fmt):
size = 10 * 1024**2
# Create source base image with some data in first clusters.
src_base = str(tmpdir.join("src_base." + base_fmt))
qemu_img.create(src_base, base_fmt, size=size)
with qemu_nbd.open(src_base, base_fmt) as c:
c.write(0 * CLUSTER_SIZE, b"a" * CLUSTER_SIZE)
c.write(1 * CLUSTER_SIZE, b"b" * CLUSTER_SIZE)
c.write(2 * CLUSTER_SIZE, b"c" * CLUSTER_SIZE)
c.flush()
log.debug("src_base extents: %s", list(nbdutil.extents(c)))
# Create source top image with some data in second cluster and zero in the
# third cluster.
src_top = str(tmpdir.join("src_top.qcow2"))
qemu_img.create(
src_top, "qcow2", backing_file=src_base, backing_format=base_fmt)
with qemu_nbd.open(src_top, "qcow2") as c:
c.write(1 * CLUSTER_SIZE, b"B" * CLUSTER_SIZE)
c.zero(2 * CLUSTER_SIZE, CLUSTER_SIZE)
c.flush()
with qemu_nbd.open(
src_top, "qcow2", read_only=True, backing_chain=False) as c:
log.debug("src_top extents: %s", list(nbdutil.extents(c)))
dst_base = str(tmpdir.join("dst_base." + base_fmt))
dst_top = str(tmpdir.join("dst_top.qcow2"))
# Download base image.
nbd_server.image = src_base
nbd_server.fmt = base_fmt
nbd_server.start()
url = prepare_transfer(srv, nbd_server.sock.url(), size=size)
client.download(
url,
dst_base,
srv.config.tls.ca_file,
fmt=base_fmt)
nbd_server.stop()
with qemu_nbd.open(dst_base, base_fmt, read_only=True) as c:
log.debug("dst_base extents: %s", list(nbdutil.extents(c)))
# Compare image content - must match.
qemu_img.compare(
src_base, dst_base, format1=base_fmt, format2=base_fmt, strict=False)
# And allocation - nice to have.
if base_fmt == "qcow2" or qemu_nbd.version() >= (6, 0, 0):
qemu_img.compare(
src_base, dst_base, format1=base_fmt, format2=base_fmt,
strict=True)
# Download top image.
nbd_server.image = src_top
nbd_server.fmt = "qcow2"
nbd_server.backing_chain = False
nbd_server.start()
url = prepare_transfer(srv, nbd_server.sock.url(), size=size)
client.download(
url,
dst_top,
srv.config.tls.ca_file,
backing_file=dst_base,
backing_format=base_fmt)
nbd_server.stop()
with qemu_nbd.open(
dst_top, "qcow2", read_only=True, backing_chain=False) as c:
log.debug("dst_top extents: %s", list(nbdutil.extents(c)))
# Compare both image content - must match.
qemu_img.compare(
src_top, dst_top, format1="qcow2", format2="qcow2", strict=False)
# And allocation - nice to have.
qemu_img.compare(
src_top, dst_top, format1="qcow2", format2="qcow2", strict=True)
def test_upload_proxy_url(tmpdir, srv):
src = str(tmpdir.join("src"))
with open(src, "wb") as f:
f.truncate(IMAGE_SIZE)
dst = str(tmpdir.join("dst"))
with open(dst, "wb") as f:
f.truncate(IMAGE_SIZE)
# If transfer_url is not accessible, proxy_url is used.
transfer_url = "https://no.server:54322/images/no-ticket"
proxy_url = prepare_transfer(srv, "file://" + dst)
client.upload(src, transfer_url, srv.config.tls.ca_file,
proxy_url=proxy_url)
qemu_img.compare(src, dst, format1="raw", format2="raw", strict=True)
def test_upload_proxy_url_unused(tmpdir, srv):
src = str(tmpdir.join("src"))
with open(src, "wb") as f:
f.truncate(IMAGE_SIZE)
dst = str(tmpdir.join("dst"))
with open(dst, "wb") as f:
f.truncate(IMAGE_SIZE)
# If transfer_url is accessible, proxy_url is not used.
transfer_url = prepare_transfer(srv, "file://" + dst)
proxy_url = "https://no.proxy:54322/images/no-ticket"
client.upload(src, transfer_url, srv.config.tls.ca_file,
proxy_url=proxy_url)
qemu_img.compare(src, dst, format1="raw", format2="raw", strict=True)
def test_download_proxy_url(tmpdir, srv):
src = str(tmpdir.join("src"))
with open(src, "wb") as f:
f.truncate(IMAGE_SIZE)
dst = str(tmpdir.join("dst"))
# If transfer_url is not accessible, proxy_url is used.
transfer_url = "https://no.server:54322/images/no-ticket"
proxy_url = prepare_transfer(srv, "file://" + src)
client.download(transfer_url, dst, srv.config.tls.ca_file, fmt="raw",
proxy_url=proxy_url)
qemu_img.compare(src, dst, format1="raw", format2="raw")
def test_download_proxy_url_unused(tmpdir, srv):
src = str(tmpdir.join("src"))
with open(src, "wb") as f:
f.truncate(IMAGE_SIZE)
dst = str(tmpdir.join("dst"))
# If transfer_url is accessible, proxy_url is not used.
transfer_url = prepare_transfer(srv, "file://" + src)
proxy_url = "https://no.proxy:54322/images/no-ticket"
client.download(transfer_url, dst, srv.config.tls.ca_file, fmt="raw",
proxy_url=proxy_url)
qemu_img.compare(src, dst, format1="raw", format2="raw")
def test_progress(tmpdir, srv):
src = str(tmpdir.join("src"))
with open(src, "wb") as f:
f.write(b"b" * 4096)
f.seek(IMAGE_SIZE // 2)
f.write(b"b" * 4096)
f.truncate(IMAGE_SIZE)
dst = str(tmpdir.join("dst"))
with open(dst, "wb") as f:
f.truncate(IMAGE_SIZE)
url = prepare_transfer(srv, "file://" + dst, sparse=True)
progress = FakeProgress()
client.upload(
src, url, srv.config.tls.ca_file, progress=progress)
assert progress.size == IMAGE_SIZE
# Note: when using multiple connections order of updates is not
# predictable.
assert set(progress.updates) == {
# First write.
4096,
# First zero.
IMAGE_SIZE // 2 - 4096,
# Second write.
4096,
# Second zero
IMAGE_SIZE // 2 - 4096,
}
def test_progress_callback(tmpdir, srv):
src = str(tmpdir.join("src"))
with open(src, "wb") as f:
f.truncate(IMAGE_SIZE)
dst = str(tmpdir.join("dst"))
with open(dst, "wb") as f:
f.truncate(IMAGE_SIZE)
url = prepare_transfer(srv, "file://" + dst, size=IMAGE_SIZE, sparse=True)
progress = []
client.upload(
src,
url,
srv.config.tls.ca_file,
progress=progress.append)
assert progress == [IMAGE_SIZE]
client: Add info() and measure() helpers Before uploading images, we need to know: - The virtual size, used to set the provisioned_size of the target disk - The required size, used to set the initial_size of the target disk when uploading sparse image to block storage. Both are available using "qemu-img info" and "qemu-img measure" using the special "json:" URI[1], but this is too complex for users, so we provide now an easy to use interface to get this info. Richard Jones suggested a neat trick[2], exposing the image using nbd server, and using NBD URL with qemu-img. However this requires nddkit tar plugin, which is not available yet in RHEL 8.2, and require support for upload from URL, which we don't have yet. Kevin Wolf explained how to enable format probing using the "json:" URI by not specifying the top level "driver"[3]. Add new public client functions, supporting both regular images and images inside tar files: - info(filename, [member]): returns info about an image using qemu-img info. When member is specified, return info about file named member in the tar file filename. - measure(filename, dst_fmt, [member]): measure required size for convert image to dst_fmt. When member is specified, return info about file named member in the tar file filename. Here is an example usage: >>> client.info("fedora-32.ova", member="fedora-32.qcow2") {'cluster-size': 65536, 'dirty-flag': False, 'filename': 'nbd+unix://?socket=/tmp/imageio-98a5jum4/sock', 'format': 'qcow2', 'format-specific': {'data': {'compat': '1.1', 'corrupt': False, 'lazy-refcounts': False, 'refcount-bits': 16}, 'type': 'qcow2'}, 'member-offset': 1536, 'member-size': 676659200, 'virtual-size': 6442450944} >>> client.measure("fedora-32.ova", "qcow2", member="fedora-32.qcow2") {'fully-allocated': 6443696128, 'member-offset': 1536, 'member-size': 676659200, 'required': 1381302272} >>> client.info("fedora-32.qcow2") {'cluster-size': 65536, 'dirty-flag': False, 'filename': 'nbd+unix://?socket=/tmp/imageio-kel0a0oj/sock', 'format': 'qcow2', 'format-specific': {'data': {'compat': '1.1', 'corrupt': False, 'lazy-refcounts': False, 'refcount-bits': 16}, 'type': 'qcow2'}, 'virtual-size': 6442450944} >>> client.measure("fedora-32.qcow2", "qcow2") {'fully-allocated': 6443696128, 'required': 1381302272} [1] https://bugzilla.redhat.com/1849981#c3 [2] https://lists.nongnu.org/archive/html/qemu-discuss/2020-06/msg00074.html [3] https://lists.nongnu.org/archive/html/qemu-discuss/2020-06/msg00094.html Change-Id: I9bbc846ada839e60ad46719d901bba96a796d507 Bug-Url: https://bugzilla.redhat.com/1849981 Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-06-27 18:16:58 +03:00
@pytest.mark.parametrize("fmt, compressed", [
("raw", False),
("qcow2", False),
("qcow2", True),
])
def test_info(tmpdir, fmt, compressed):
# Created temporary file with some data.
size = 2 * 1024**2
tmp = str(tmpdir.join("tmp"))
with open(tmp, "wb") as f:
f.truncate(size)
f.write(b"x" * CLUSTER_SIZE)
# Created test image from temporary file.
img = str(tmpdir.join("img"))
qemu_img.convert(tmp, img, "raw", fmt, compressed=compressed)
img_info = client.info(img)
# Check image info.
assert img_info["format"] == fmt
assert img_info["virtual-size"] == size
# We don't add member info if member was not specified.
assert "member-offset" not in img_info
assert "member-size" not in img_info
# Create ova with test image.
member = os.path.basename(img)
ova = str(tmpdir.join("ova"))
with tarfile.open(ova, "w") as tar:
tar.add(img, arcname=member)
# Get info for the member from the ova.
ova_info = client.info(ova, member=member)
# Image info from ova should be the same.
assert ova_info["format"] == fmt
assert ova_info["virtual-size"] == size
# If member was specified, we report also the offset and size.
with tarfile.open(ova) as tar:
member_info = tar.getmember(member)
assert ova_info["member-offset"] == member_info.offset_data
assert ova_info["member-size"] == member_info.size
@pytest.mark.parametrize("fmt, compressed", [
("raw", False),
("qcow2", False),
("qcow2", True),
])
def test_measure_to_raw(tmpdir, fmt, compressed):
# Create temporary file with some data.
size = 2 * 1024**2
tmp = str(tmpdir.join("tmp"))
with open(tmp, "wb") as f:
f.truncate(size)
f.write(b"x" * CLUSTER_SIZE)
# Created test image from temporary file.
img = str(tmpdir.join("img"))
qemu_img.convert(tmp, img, "raw", fmt, compressed=compressed)
measure = client.measure(img, "raw")
assert measure["required"] == size
@pytest.mark.parametrize("fmt, compressed", [
("raw", False),
("qcow2", False),
("qcow2", True),
])
def test_measure_to_qcow2(tmpdir, fmt, compressed):
# Create temporary file with some data.
size = 2 * 1024**2
tmp = str(tmpdir.join("tmp"))
with open(tmp, "wb") as f:
f.truncate(size)
f.write(b"x" * CLUSTER_SIZE)
# Created test image from temporary file.
img = str(tmpdir.join("img"))
qemu_img.convert(tmp, img, "raw", fmt, compressed=compressed)
measure = client.measure(img, "qcow2")
assert measure["required"] == 393216
@pytest.mark.parametrize("compressed", [False, True])
@pytest.mark.parametrize("fmt", ["raw", "qcow2"])
def test_measure_from_ova(tmpdir, compressed, fmt):
# Create temporary file with some data.
size = 2 * 1024**2
tmp = str(tmpdir.join("tmp"))
with open(tmp, "wb") as f:
f.truncate(size)
f.write(b"x" * CLUSTER_SIZE)
# Created test image from temporary file.
img = str(tmpdir.join("img"))
qemu_img.convert(tmp, img, "raw", "qcow2", compressed=compressed)
# Measure the image.
img_measure = client.measure(img, fmt)
# We don't add member info if member was not specified.
assert "member-offset" not in img_measure
assert "member-size" not in img_measure
# Add test image to ova.
member = os.path.basename(img)
ova = str(tmpdir.join("ova"))
with tarfile.open(ova, "w") as tar:
tar.add(img, arcname=member)
# Measure the image from the ova.
ova_measure = client.measure(ova, fmt, member=member)
# Measurement from ova should be same.
assert ova_measure["required"] == img_measure["required"]
assert ova_measure["fully-allocated"] == img_measure["fully-allocated"]
# If member was specified, we report also the offset and size.
with tarfile.open(ova) as tar:
member_info = tar.getmember(member)
assert ova_measure["member-offset"] == member_info.offset_data
assert ova_measure["member-size"] == member_info.size
daemon: Compute checksum for remote images Users like to verify uploaded images checksums. This cannot be done using standard tools like shasum since it does not understand sparseness or qcow2 format, but we can support this using using the nbd backend. Add /images/{ticket-id}/checksum resource handler, computing a checksum of the guest visible data. Here is an example usage: $ time curl -k https://localhost:54322/images/nbd/checksum {"checksum": "2df96976518821e000fcf92fae0bfc6a7fb5b2d2", "algorithm": "sha1"} real 0m7.737s user 0m0.009s sys 0m0.009s Comparing to raw image with same content: $ time sha1sum /var/tmp/disk.raw 2df96976518821e000fcf92fae0bfc6a7fb5b2d2 /var/tmp/disk.raw real 0m6.912s user 0m5.900s sys 0m0.991s It possible to use any of the algorithms supported by python. To query the supported values use: $ curl -k https://localhost:54322/images/nbd/checksum/algorithms {"algorithms": ["blake2b", "blake2s", "md5", "sha1", "sha224", "sha256", "sha384", "sha3_224", "sha3_256", "sha3_384", "sha3_512", "sha512", "shake_128", "shake_256"]} And use any of the returned values: $ time curl -k https://localhost:54322/images/nbd/checksum?algorithm=sha256 {"checksum": "c4e2cd5e5d77a5890d888a5b25eed2813ab3678b8c5e47dc6d219bf44b3f41d7", "algorithm": "sha256"} real 0m14.834s user 0m0.011s sys 0m0.004s To allow comparing checksum with local images in any image format, add client.checksum(). Here are few examples, using identical images in different formats: $ python >>> from ovirt_imageio import client >>> client.checksum("/var/tmp/disk.raw") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' >>> client.checksum("/var/tmp/disk.qcow2") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' >>> client.checksum("/var/tmp/compressed.qcow2") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' Like info() and measure() it also works for images inside ova file: >>> client.checksum("/var/tmp/vm.ova", member="disk.qcow2") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' Computing checksums will not work for uploaded raw disks which were not aligned to storage minimum alignment (4k or 128m). In this case oVirt extends the disk size and the disk checksum will not match the original file checksum. Change-Id: I2a0e2e235d2767cc094e6d107f15f1a8f1695123 Bug-Url: https://bugzilla.redhat.com/1787906 Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-07-10 16:21:10 +03:00
@pytest.mark.parametrize("fmt, compressed", [
("raw", False),
("qcow2", False),
("qcow2", True),
])
def test_checksum(tmpdir, fmt, compressed):
# Create temporary file with some data.
size = 2 * 1024**2
tmp = str(tmpdir.join("tmp"))
with open(tmp, "wb") as f:
f.truncate(size)
f.write(b"x" * CLUSTER_SIZE)
# Create test image from temporary file.
img = str(tmpdir.join("img"))
qemu_img.convert(tmp, img, "raw", fmt, compressed=compressed)
checksum: Optimize zero hashing Use block based hashing algorithm: H( H(block 1) + H(block 2) + ... + H(block N) ) This is basically creating a hash list[1], and using the root hash as the result. The algorithm is similar to eD2k hash algorithm[2], but we support any algorithm and block size. The default algorithm is blake2b[3] and block size is 4 MiB. blake2b is as fast as sha-1, but secure at least as sha-3. When we don't have extents information, for example when using preallocated image, or sparse image on storage that does not report sparseness information (e.g. NFS < 4.2, GlusterFS with sharding) we detect zero blocks and optimize hashing. Checksum calculation time is limited by storage read throughput. If we have extent information, we can compute the hash for zero blocks without reading anything from storage, speeding up the calculation dramatically. When hashing zero blocks, instead of hashing entire block (4 MiB) we hash a precomputed digest bytes (32 bytes). This is up to 131072 times faster. Since the checksum depends on the block size, the response includes now also the block size: $ curl -k https://localhost:54322/images/nbd/checksum | jq { "checksum": "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131", "algorithm": "blake2b", "block_size": 4194304 } To compare the checksum to a local file, you must use the same algorithm and block_size: >> from ovirt_imageio import client >> client.checksum("disk.img", block_size=4194304, algorithm="blake2b") "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131" To compare to a pre-computed checksum, the caller can specify the block_size using q query parameter: $ curl -k https://localhost:54322/images/nbd/checksum?block_size=2097152 | jq { "checksum": "777b9c2f6598d503d43c14a39b31cdd8aee9f48b475f2af0f4c668e33297016c", "algorithm": "blake2b", "block_size": 2097152 } Here are initial results: tool fedora-32[4] full-6g[5] empty-6g[6] empty-100g[7] empty-1t[8] ---------------------------------------------------------------------------- checksum 2.84 3.04 0.03 0.06 0.28 b2sum[9] 8.29 8.42 8.48 161.00 1648.64 [1] https://en.wikipedia.org/wiki/Hash_list [2] https://en.wikipedia.org/wiki/Ed2k_URI_scheme#eD2k_hash_algorithm [3] https://blake2.net/ [4] virt-builder fedora-32 -o fedora-32.raw --root-password=password:root [5] dd if=/zero/zero bs=8M count=768 of=full-6g.raw [6] truncate -s 6g empty-6g.raw [7] truncate -s 100g empty-100g.raw [8] truncate -s 1t empty-1t.raw [9] b2sum --length 256 {path} Change-Id: I0661daf6a36e3eee57ef54128782e2a9aa11943e Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-08-12 03:07:43 +03:00
expected = blkhash.checksum(tmp, block_size=1024**2)
actual = client.checksum(img, block_size=1024**2)
assert actual == expected
daemon: Compute checksum for remote images Users like to verify uploaded images checksums. This cannot be done using standard tools like shasum since it does not understand sparseness or qcow2 format, but we can support this using using the nbd backend. Add /images/{ticket-id}/checksum resource handler, computing a checksum of the guest visible data. Here is an example usage: $ time curl -k https://localhost:54322/images/nbd/checksum {"checksum": "2df96976518821e000fcf92fae0bfc6a7fb5b2d2", "algorithm": "sha1"} real 0m7.737s user 0m0.009s sys 0m0.009s Comparing to raw image with same content: $ time sha1sum /var/tmp/disk.raw 2df96976518821e000fcf92fae0bfc6a7fb5b2d2 /var/tmp/disk.raw real 0m6.912s user 0m5.900s sys 0m0.991s It possible to use any of the algorithms supported by python. To query the supported values use: $ curl -k https://localhost:54322/images/nbd/checksum/algorithms {"algorithms": ["blake2b", "blake2s", "md5", "sha1", "sha224", "sha256", "sha384", "sha3_224", "sha3_256", "sha3_384", "sha3_512", "sha512", "shake_128", "shake_256"]} And use any of the returned values: $ time curl -k https://localhost:54322/images/nbd/checksum?algorithm=sha256 {"checksum": "c4e2cd5e5d77a5890d888a5b25eed2813ab3678b8c5e47dc6d219bf44b3f41d7", "algorithm": "sha256"} real 0m14.834s user 0m0.011s sys 0m0.004s To allow comparing checksum with local images in any image format, add client.checksum(). Here are few examples, using identical images in different formats: $ python >>> from ovirt_imageio import client >>> client.checksum("/var/tmp/disk.raw") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' >>> client.checksum("/var/tmp/disk.qcow2") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' >>> client.checksum("/var/tmp/compressed.qcow2") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' Like info() and measure() it also works for images inside ova file: >>> client.checksum("/var/tmp/vm.ova", member="disk.qcow2") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' Computing checksums will not work for uploaded raw disks which were not aligned to storage minimum alignment (4k or 128m). In this case oVirt extends the disk size and the disk checksum will not match the original file checksum. Change-Id: I2a0e2e235d2767cc094e6d107f15f1a8f1695123 Bug-Url: https://bugzilla.redhat.com/1787906 Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-07-10 16:21:10 +03:00
@pytest.mark.parametrize("fmt, compressed", [
("raw", False),
("qcow2", False),
("qcow2", True),
])
def test_checksum_from_ova(tmpdir, fmt, compressed):
# Create temporary file with some data.
size = 2 * 1024**2
tmp = str(tmpdir.join("tmp"))
with open(tmp, "wb") as f:
f.truncate(size)
f.write(b"x" * CLUSTER_SIZE)
# Create test image from temporary file.
img = str(tmpdir.join("img"))
qemu_img.convert(tmp, img, "raw", fmt, compressed=compressed)
# Add test image to ova.
member = os.path.basename(img)
ova = str(tmpdir.join("ova"))
with tarfile.open(ova, "w") as tar:
tar.add(img, arcname=member)
checksum: Optimize zero hashing Use block based hashing algorithm: H( H(block 1) + H(block 2) + ... + H(block N) ) This is basically creating a hash list[1], and using the root hash as the result. The algorithm is similar to eD2k hash algorithm[2], but we support any algorithm and block size. The default algorithm is blake2b[3] and block size is 4 MiB. blake2b is as fast as sha-1, but secure at least as sha-3. When we don't have extents information, for example when using preallocated image, or sparse image on storage that does not report sparseness information (e.g. NFS < 4.2, GlusterFS with sharding) we detect zero blocks and optimize hashing. Checksum calculation time is limited by storage read throughput. If we have extent information, we can compute the hash for zero blocks without reading anything from storage, speeding up the calculation dramatically. When hashing zero blocks, instead of hashing entire block (4 MiB) we hash a precomputed digest bytes (32 bytes). This is up to 131072 times faster. Since the checksum depends on the block size, the response includes now also the block size: $ curl -k https://localhost:54322/images/nbd/checksum | jq { "checksum": "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131", "algorithm": "blake2b", "block_size": 4194304 } To compare the checksum to a local file, you must use the same algorithm and block_size: >> from ovirt_imageio import client >> client.checksum("disk.img", block_size=4194304, algorithm="blake2b") "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131" To compare to a pre-computed checksum, the caller can specify the block_size using q query parameter: $ curl -k https://localhost:54322/images/nbd/checksum?block_size=2097152 | jq { "checksum": "777b9c2f6598d503d43c14a39b31cdd8aee9f48b475f2af0f4c668e33297016c", "algorithm": "blake2b", "block_size": 2097152 } Here are initial results: tool fedora-32[4] full-6g[5] empty-6g[6] empty-100g[7] empty-1t[8] ---------------------------------------------------------------------------- checksum 2.84 3.04 0.03 0.06 0.28 b2sum[9] 8.29 8.42 8.48 161.00 1648.64 [1] https://en.wikipedia.org/wiki/Hash_list [2] https://en.wikipedia.org/wiki/Ed2k_URI_scheme#eD2k_hash_algorithm [3] https://blake2.net/ [4] virt-builder fedora-32 -o fedora-32.raw --root-password=password:root [5] dd if=/zero/zero bs=8M count=768 of=full-6g.raw [6] truncate -s 6g empty-6g.raw [7] truncate -s 100g empty-100g.raw [8] truncate -s 1t empty-1t.raw [9] b2sum --length 256 {path} Change-Id: I0661daf6a36e3eee57ef54128782e2a9aa11943e Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-08-12 03:07:43 +03:00
expected = blkhash.checksum(tmp, block_size=1024**2)
actual = client.checksum(ova, member=member, block_size=1024**2)
assert actual == expected
daemon: Compute checksum for remote images Users like to verify uploaded images checksums. This cannot be done using standard tools like shasum since it does not understand sparseness or qcow2 format, but we can support this using using the nbd backend. Add /images/{ticket-id}/checksum resource handler, computing a checksum of the guest visible data. Here is an example usage: $ time curl -k https://localhost:54322/images/nbd/checksum {"checksum": "2df96976518821e000fcf92fae0bfc6a7fb5b2d2", "algorithm": "sha1"} real 0m7.737s user 0m0.009s sys 0m0.009s Comparing to raw image with same content: $ time sha1sum /var/tmp/disk.raw 2df96976518821e000fcf92fae0bfc6a7fb5b2d2 /var/tmp/disk.raw real 0m6.912s user 0m5.900s sys 0m0.991s It possible to use any of the algorithms supported by python. To query the supported values use: $ curl -k https://localhost:54322/images/nbd/checksum/algorithms {"algorithms": ["blake2b", "blake2s", "md5", "sha1", "sha224", "sha256", "sha384", "sha3_224", "sha3_256", "sha3_384", "sha3_512", "sha512", "shake_128", "shake_256"]} And use any of the returned values: $ time curl -k https://localhost:54322/images/nbd/checksum?algorithm=sha256 {"checksum": "c4e2cd5e5d77a5890d888a5b25eed2813ab3678b8c5e47dc6d219bf44b3f41d7", "algorithm": "sha256"} real 0m14.834s user 0m0.011s sys 0m0.004s To allow comparing checksum with local images in any image format, add client.checksum(). Here are few examples, using identical images in different formats: $ python >>> from ovirt_imageio import client >>> client.checksum("/var/tmp/disk.raw") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' >>> client.checksum("/var/tmp/disk.qcow2") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' >>> client.checksum("/var/tmp/compressed.qcow2") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' Like info() and measure() it also works for images inside ova file: >>> client.checksum("/var/tmp/vm.ova", member="disk.qcow2") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' Computing checksums will not work for uploaded raw disks which were not aligned to storage minimum alignment (4k or 128m). In this case oVirt extends the disk size and the disk checksum will not match the original file checksum. Change-Id: I2a0e2e235d2767cc094e6d107f15f1a8f1695123 Bug-Url: https://bugzilla.redhat.com/1787906 Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-07-10 16:21:10 +03:00
checksum: Optimize zero hashing Use block based hashing algorithm: H( H(block 1) + H(block 2) + ... + H(block N) ) This is basically creating a hash list[1], and using the root hash as the result. The algorithm is similar to eD2k hash algorithm[2], but we support any algorithm and block size. The default algorithm is blake2b[3] and block size is 4 MiB. blake2b is as fast as sha-1, but secure at least as sha-3. When we don't have extents information, for example when using preallocated image, or sparse image on storage that does not report sparseness information (e.g. NFS < 4.2, GlusterFS with sharding) we detect zero blocks and optimize hashing. Checksum calculation time is limited by storage read throughput. If we have extent information, we can compute the hash for zero blocks without reading anything from storage, speeding up the calculation dramatically. When hashing zero blocks, instead of hashing entire block (4 MiB) we hash a precomputed digest bytes (32 bytes). This is up to 131072 times faster. Since the checksum depends on the block size, the response includes now also the block size: $ curl -k https://localhost:54322/images/nbd/checksum | jq { "checksum": "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131", "algorithm": "blake2b", "block_size": 4194304 } To compare the checksum to a local file, you must use the same algorithm and block_size: >> from ovirt_imageio import client >> client.checksum("disk.img", block_size=4194304, algorithm="blake2b") "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131" To compare to a pre-computed checksum, the caller can specify the block_size using q query parameter: $ curl -k https://localhost:54322/images/nbd/checksum?block_size=2097152 | jq { "checksum": "777b9c2f6598d503d43c14a39b31cdd8aee9f48b475f2af0f4c668e33297016c", "algorithm": "blake2b", "block_size": 2097152 } Here are initial results: tool fedora-32[4] full-6g[5] empty-6g[6] empty-100g[7] empty-1t[8] ---------------------------------------------------------------------------- checksum 2.84 3.04 0.03 0.06 0.28 b2sum[9] 8.29 8.42 8.48 161.00 1648.64 [1] https://en.wikipedia.org/wiki/Hash_list [2] https://en.wikipedia.org/wiki/Ed2k_URI_scheme#eD2k_hash_algorithm [3] https://blake2.net/ [4] virt-builder fedora-32 -o fedora-32.raw --root-password=password:root [5] dd if=/zero/zero bs=8M count=768 of=full-6g.raw [6] truncate -s 6g empty-6g.raw [7] truncate -s 100g empty-100g.raw [8] truncate -s 1t empty-1t.raw [9] b2sum --length 256 {path} Change-Id: I0661daf6a36e3eee57ef54128782e2a9aa11943e Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-08-12 03:07:43 +03:00
@pytest.mark.parametrize("algorithm,digest_size", [
("blake2b", 32),
("sha1", None),
])
def test_checksum_algorithm(tmpdir, algorithm, digest_size):
daemon: Compute checksum for remote images Users like to verify uploaded images checksums. This cannot be done using standard tools like shasum since it does not understand sparseness or qcow2 format, but we can support this using using the nbd backend. Add /images/{ticket-id}/checksum resource handler, computing a checksum of the guest visible data. Here is an example usage: $ time curl -k https://localhost:54322/images/nbd/checksum {"checksum": "2df96976518821e000fcf92fae0bfc6a7fb5b2d2", "algorithm": "sha1"} real 0m7.737s user 0m0.009s sys 0m0.009s Comparing to raw image with same content: $ time sha1sum /var/tmp/disk.raw 2df96976518821e000fcf92fae0bfc6a7fb5b2d2 /var/tmp/disk.raw real 0m6.912s user 0m5.900s sys 0m0.991s It possible to use any of the algorithms supported by python. To query the supported values use: $ curl -k https://localhost:54322/images/nbd/checksum/algorithms {"algorithms": ["blake2b", "blake2s", "md5", "sha1", "sha224", "sha256", "sha384", "sha3_224", "sha3_256", "sha3_384", "sha3_512", "sha512", "shake_128", "shake_256"]} And use any of the returned values: $ time curl -k https://localhost:54322/images/nbd/checksum?algorithm=sha256 {"checksum": "c4e2cd5e5d77a5890d888a5b25eed2813ab3678b8c5e47dc6d219bf44b3f41d7", "algorithm": "sha256"} real 0m14.834s user 0m0.011s sys 0m0.004s To allow comparing checksum with local images in any image format, add client.checksum(). Here are few examples, using identical images in different formats: $ python >>> from ovirt_imageio import client >>> client.checksum("/var/tmp/disk.raw") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' >>> client.checksum("/var/tmp/disk.qcow2") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' >>> client.checksum("/var/tmp/compressed.qcow2") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' Like info() and measure() it also works for images inside ova file: >>> client.checksum("/var/tmp/vm.ova", member="disk.qcow2") '2df96976518821e000fcf92fae0bfc6a7fb5b2d2' Computing checksums will not work for uploaded raw disks which were not aligned to storage minimum alignment (4k or 128m). In this case oVirt extends the disk size and the disk checksum will not match the original file checksum. Change-Id: I2a0e2e235d2767cc094e6d107f15f1a8f1695123 Bug-Url: https://bugzilla.redhat.com/1787906 Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-07-10 16:21:10 +03:00
img = str(tmpdir.join("img"))
qemu_img.create(img, "raw", size="2m")
checksum: Optimize zero hashing Use block based hashing algorithm: H( H(block 1) + H(block 2) + ... + H(block N) ) This is basically creating a hash list[1], and using the root hash as the result. The algorithm is similar to eD2k hash algorithm[2], but we support any algorithm and block size. The default algorithm is blake2b[3] and block size is 4 MiB. blake2b is as fast as sha-1, but secure at least as sha-3. When we don't have extents information, for example when using preallocated image, or sparse image on storage that does not report sparseness information (e.g. NFS < 4.2, GlusterFS with sharding) we detect zero blocks and optimize hashing. Checksum calculation time is limited by storage read throughput. If we have extent information, we can compute the hash for zero blocks without reading anything from storage, speeding up the calculation dramatically. When hashing zero blocks, instead of hashing entire block (4 MiB) we hash a precomputed digest bytes (32 bytes). This is up to 131072 times faster. Since the checksum depends on the block size, the response includes now also the block size: $ curl -k https://localhost:54322/images/nbd/checksum | jq { "checksum": "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131", "algorithm": "blake2b", "block_size": 4194304 } To compare the checksum to a local file, you must use the same algorithm and block_size: >> from ovirt_imageio import client >> client.checksum("disk.img", block_size=4194304, algorithm="blake2b") "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131" To compare to a pre-computed checksum, the caller can specify the block_size using q query parameter: $ curl -k https://localhost:54322/images/nbd/checksum?block_size=2097152 | jq { "checksum": "777b9c2f6598d503d43c14a39b31cdd8aee9f48b475f2af0f4c668e33297016c", "algorithm": "blake2b", "block_size": 2097152 } Here are initial results: tool fedora-32[4] full-6g[5] empty-6g[6] empty-100g[7] empty-1t[8] ---------------------------------------------------------------------------- checksum 2.84 3.04 0.03 0.06 0.28 b2sum[9] 8.29 8.42 8.48 161.00 1648.64 [1] https://en.wikipedia.org/wiki/Hash_list [2] https://en.wikipedia.org/wiki/Ed2k_URI_scheme#eD2k_hash_algorithm [3] https://blake2.net/ [4] virt-builder fedora-32 -o fedora-32.raw --root-password=password:root [5] dd if=/zero/zero bs=8M count=768 of=full-6g.raw [6] truncate -s 6g empty-6g.raw [7] truncate -s 100g empty-100g.raw [8] truncate -s 1t empty-1t.raw [9] b2sum --length 256 {path} Change-Id: I0661daf6a36e3eee57ef54128782e2a9aa11943e Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-08-12 03:07:43 +03:00
expected = blkhash.checksum(
img, block_size=1024**2, algorithm=algorithm, digest_size=digest_size)
actual = client.checksum(img, block_size=1024**2, algorithm=algorithm)
assert actual == expected
def test_zero_extents_raw(tmpdir):
size = 10 * 1024**2
# Create image with some data, zero and holes.
image = str(tmpdir.join("image.raw"))
qemu_img.create(image, "raw", size=size)
with qemu_nbd.open(image, "raw") as c:
c.write(0 * CLUSTER_SIZE, b"A" * CLUSTER_SIZE)
c.zero(1 * CLUSTER_SIZE, CLUSTER_SIZE)
c.write(2 * CLUSTER_SIZE, b"B" * CLUSTER_SIZE)
c.flush()
extents = list(client.extents(image))
nbd: Use qemu:allocation-depth meta context Since qemu 5.2.0 qemu-nbd provides the --allocation-depth option, adding new meta context "qemu:allocation-depth"[1]. This context provides reliable information about extent allocation, unlike "base:allocation" NBD_STATE_HOLE bit, which is optional. Allocation depth reports 32 bit value for every extents: - 0: Extent does not exist. If the image has a backing chain, the content is determined by the backing chain. - 1: Extent is part of the top layer in the backing chain - N > 1: Extent is part of layer N. We don't care about the specific depth, only about non existing extents, which we want to report as "hole" in /extents response. When copying single qcow2 image, we *must* skip these holes and leave them as unallocated area in the target image. Extent have now an EXTENT_BACKING bit, marking non-exiting extents (depth=0) that expose information from the backing chain. This bit is used to report a hole, instead of NBD_STATE_HOLE bit. See [3] for more info on this. A bad side effect of this change, is that we again cannot report holes in raw image, since raw image always reports depth=1. However this is not a practical issue for users. With this change, the tests broken with qemu-6.0.0 are fixed, and silent data corruption caused by change in qemu behavior[2] is avoided. Our backup helper module uses legacy APIs, so "qemu:allocation-depth" is not available yet when testing backup flows.. [1] https://github.com/qemu/qemu/blob/master/docs/interop/nbd.txt [2] https://bugzilla.redhat.com/1968693 [3] https://lists.nongnu.org/archive/html/qemu-block/2021-06/msg00756.html Change-Id: Ibc3d285d967516533a6e925d6ca7299750c85b09 Bug-Url: https://bugzilla.redhat.com/1971182 Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2021-06-12 20:32:02 +03:00
# Unallocated area in raw image is not reported as a hole.
assert extents == [
ZeroExtent(
start=0 * CLUSTER_SIZE,
length=CLUSTER_SIZE,
zero=False,
hole=False),
ZeroExtent(
start=1 * CLUSTER_SIZE,
length=CLUSTER_SIZE,
zero=True,
nbd: Use qemu:allocation-depth meta context Since qemu 5.2.0 qemu-nbd provides the --allocation-depth option, adding new meta context "qemu:allocation-depth"[1]. This context provides reliable information about extent allocation, unlike "base:allocation" NBD_STATE_HOLE bit, which is optional. Allocation depth reports 32 bit value for every extents: - 0: Extent does not exist. If the image has a backing chain, the content is determined by the backing chain. - 1: Extent is part of the top layer in the backing chain - N > 1: Extent is part of layer N. We don't care about the specific depth, only about non existing extents, which we want to report as "hole" in /extents response. When copying single qcow2 image, we *must* skip these holes and leave them as unallocated area in the target image. Extent have now an EXTENT_BACKING bit, marking non-exiting extents (depth=0) that expose information from the backing chain. This bit is used to report a hole, instead of NBD_STATE_HOLE bit. See [3] for more info on this. A bad side effect of this change, is that we again cannot report holes in raw image, since raw image always reports depth=1. However this is not a practical issue for users. With this change, the tests broken with qemu-6.0.0 are fixed, and silent data corruption caused by change in qemu behavior[2] is avoided. Our backup helper module uses legacy APIs, so "qemu:allocation-depth" is not available yet when testing backup flows.. [1] https://github.com/qemu/qemu/blob/master/docs/interop/nbd.txt [2] https://bugzilla.redhat.com/1968693 [3] https://lists.nongnu.org/archive/html/qemu-block/2021-06/msg00756.html Change-Id: Ibc3d285d967516533a6e925d6ca7299750c85b09 Bug-Url: https://bugzilla.redhat.com/1971182 Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2021-06-12 20:32:02 +03:00
hole=False),
ZeroExtent(
start=2 * CLUSTER_SIZE,
length=CLUSTER_SIZE,
zero=False,
hole=False),
ZeroExtent(
start=3 * CLUSTER_SIZE,
length=size - 3 * CLUSTER_SIZE,
zero=True,
nbd: Use qemu:allocation-depth meta context Since qemu 5.2.0 qemu-nbd provides the --allocation-depth option, adding new meta context "qemu:allocation-depth"[1]. This context provides reliable information about extent allocation, unlike "base:allocation" NBD_STATE_HOLE bit, which is optional. Allocation depth reports 32 bit value for every extents: - 0: Extent does not exist. If the image has a backing chain, the content is determined by the backing chain. - 1: Extent is part of the top layer in the backing chain - N > 1: Extent is part of layer N. We don't care about the specific depth, only about non existing extents, which we want to report as "hole" in /extents response. When copying single qcow2 image, we *must* skip these holes and leave them as unallocated area in the target image. Extent have now an EXTENT_BACKING bit, marking non-exiting extents (depth=0) that expose information from the backing chain. This bit is used to report a hole, instead of NBD_STATE_HOLE bit. See [3] for more info on this. A bad side effect of this change, is that we again cannot report holes in raw image, since raw image always reports depth=1. However this is not a practical issue for users. With this change, the tests broken with qemu-6.0.0 are fixed, and silent data corruption caused by change in qemu behavior[2] is avoided. Our backup helper module uses legacy APIs, so "qemu:allocation-depth" is not available yet when testing backup flows.. [1] https://github.com/qemu/qemu/blob/master/docs/interop/nbd.txt [2] https://bugzilla.redhat.com/1968693 [3] https://lists.nongnu.org/archive/html/qemu-block/2021-06/msg00756.html Change-Id: Ibc3d285d967516533a6e925d6ca7299750c85b09 Bug-Url: https://bugzilla.redhat.com/1971182 Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2021-06-12 20:32:02 +03:00
hole=False),
]
def test_zero_extents_qcow2(tmpdir):
size = 10 * 1024**2
# Create base image with one data and one zero cluster.
base = str(tmpdir.join("base.qcow2"))
qemu_img.create(base, "qcow2", size=size)
with qemu_nbd.open(base, "qcow2") as c:
c.write(0 * CLUSTER_SIZE, b"A" * CLUSTER_SIZE)
c.zero(1 * CLUSTER_SIZE, CLUSTER_SIZE)
c.flush()
# Create top image with one data and one zero cluster.
top = str(tmpdir.join("top.qcow2"))
qemu_img.create(
top, "qcow2", backing_file=base, backing_format="qcow2")
with qemu_nbd.open(top, "qcow2") as c:
c.write(3 * CLUSTER_SIZE, b"B" * CLUSTER_SIZE)
c.zero(4 * CLUSTER_SIZE, CLUSTER_SIZE)
c.flush()
extents = list(client.extents(top))
assert extents == [
# Extents from base...
ZeroExtent(
start=0 * CLUSTER_SIZE,
length=CLUSTER_SIZE,
zero=False,
hole=False),
ZeroExtent(
start=1 * CLUSTER_SIZE,
length=CLUSTER_SIZE,
zero=True,
hole=False),
ZeroExtent(
start=2 * CLUSTER_SIZE,
length=CLUSTER_SIZE,
zero=True,
hole=True),
# Extents from top...
ZeroExtent(
start=3 * CLUSTER_SIZE,
length=CLUSTER_SIZE,
zero=False,
hole=False),
ZeroExtent(
start=4 * CLUSTER_SIZE,
length=CLUSTER_SIZE,
zero=True,
hole=False),
# Rest of unallocated data...
ZeroExtent(
start=5 * CLUSTER_SIZE,
length=size - 5 * CLUSTER_SIZE,
zero=True,
hole=True),
]
def test_zero_extents_from_ova(tmpdir):
size = 10 * 1024**2
# Create image with data, zero and hole clusters.
disk = str(tmpdir.join("disk.qcow2"))
qemu_img.create(disk, "qcow2", size=size)
with qemu_nbd.open(disk, "qcow2") as c:
c.write(0 * CLUSTER_SIZE, b"A" * CLUSTER_SIZE)
c.zero(1 * CLUSTER_SIZE, CLUSTER_SIZE)
c.flush()
# Create OVA whith this image.
ova = str(tmpdir.join("vm.ova"))
with tarfile.open(ova, "w") as tar:
tar.add(disk, arcname=os.path.basename(disk))
extents = list(client.extents(ova, member="disk.qcow2"))
assert extents == [
ZeroExtent(
start=0 * CLUSTER_SIZE,
length=CLUSTER_SIZE,
zero=False,
hole=False),
# Broken since qemu-nbd 6.0.0.
ZeroExtent(
start=1 * CLUSTER_SIZE,
length=CLUSTER_SIZE,
zero=True,
hole=False),
ZeroExtent(
start=2 * CLUSTER_SIZE,
length=size - 2 * CLUSTER_SIZE,
zero=True,
hole=True),
]
def test_dirty_extents(tmpdir):
size = 1024**2
# Create base image with empty dirty bitmap.
base = str(tmpdir.join("base.qcow2"))
qemu_img.create(base, "qcow2", size=size)
qemu_img.bitmap_add(base, "b0")
# Write data, modifying the dirty bitmap.
with qemu_nbd.open(base, "qcow2") as c:
c.write(0 * CLUSTER_SIZE, b"A" * CLUSTER_SIZE)
c.zero(1 * CLUSTER_SIZE, CLUSTER_SIZE)
c.flush()
# Create top image with empty dirty bitmap.
top = str(tmpdir.join("top.qcow2"))
qemu_img.create(top, "qcow2", backing_file=base, backing_format="qcow2")
qemu_img.bitmap_add(top, "b0")
# Write data, modifying the dirty bitmap.
with qemu_nbd.open(top, "qcow2") as c:
c.write(3 * CLUSTER_SIZE, b"B" * CLUSTER_SIZE)
c.zero(4 * CLUSTER_SIZE, CLUSTER_SIZE)
c.flush()
dirty_extents = list(client.extents(base, bitmap="b0"))
expected = [
# First cluster is dirty data.
DirtyExtent(
start=0 * CLUSTER_SIZE,
length=1 * CLUSTER_SIZE,
dirty=True,
zero=False),
# Second cluster is dirty zero.
DirtyExtent(
start=1 * CLUSTER_SIZE,
length=1 * CLUSTER_SIZE,
dirty=True,
zero=True),
# Third cluster is clean zero.
DirtyExtent(
start=2 * CLUSTER_SIZE,
length=size - 2 * CLUSTER_SIZE,
dirty=False,
zero=True),
]
log.debug("base image dirty extents: %s", dirty_extents)
assert dirty_extents == expected
dirty_extents = list(client.extents(top, bitmap="b0"))
# Note: qemu-nbd reports dirty extents only for the top image, but zero
# extents are read from the base image.
expected = [
# First cluster is clean data, read from base image.
DirtyExtent(
start=0 * CLUSTER_SIZE,
length=1 * CLUSTER_SIZE,
dirty=False,
zero=False),
# Second and third clusters are read from base image. Because they are
# both clean zero, they are merged.
DirtyExtent(
start=1 * CLUSTER_SIZE,
length=2 * CLUSTER_SIZE,
dirty=False,
zero=True),
# Forth cluster is a data extent modified in top image.
DirtyExtent(
start=3 * CLUSTER_SIZE,
length=1 * CLUSTER_SIZE,
dirty=True,
zero=False),
# Fifth cluster is a zero extent modifed in to image.
DirtyExtent(
start=4 * CLUSTER_SIZE,
length=1 * CLUSTER_SIZE,
dirty=True,
zero=True),
# The rest is clean zero extent.
DirtyExtent(
start=5 * CLUSTER_SIZE,
length=size - 5 * CLUSTER_SIZE,
dirty=False,
zero=True),
]
log.debug("top image dirty extents: %s", dirty_extents)
assert dirty_extents == expected
@pytest.mark.parametrize("fmt", ["raw", "qcow2"])
def test_stress(srv, nbd_server, tmpdir, fmt):
size = 10 * 1024**2
# Create empty source and destination images.
src = str(tmpdir.join("src." + fmt))
qemu_img.create(src, fmt, size=size)
dst = str(tmpdir.join("dst." + fmt))
qemu_img.create(dst, fmt, size=size)
# Upload and download the image multiple times. This used to fail randomly
# when the executor closed the destination backend before it was cloned by
# the workers.
nbd_server.image = dst
nbd_server.fmt = fmt
nbd_server.start()
url = prepare_transfer(srv, nbd_server.sock.url(), size=size)
for i in range(20):
client.upload(src, url, srv.config.tls.ca_file)
client.download(url, src, srv.config.tls.ca_file, fmt=fmt)
nbd_server.stop()
def test_concurrent_downloads(srv, tmpdir):
# Testing that we can serve 10 conccurent transfers, assuming 4
# connections per client.
# https://bugzilla.redhat.com/2066113
size = 10 * 1024**2
def download(url, dst):
client.download(url, dst, srv.config.tls.ca_file)
downloads = []
try:
for i in range(10):
src = str(tmpdir.join(f"{i:02d}.src.qcow2"))
qemu_img.create(src, "qcow2", size=size)
dst = str(tmpdir.join(f"{i:02d}.dst.qcow2"))
qemu_img.create(dst, "qcow2", size=size)
sock = str(tmpdir.join(f"{i:02d}.sock"))
nbd_server = qemu_nbd.Server(src, "qcow2", nbd.UnixAddress(sock))
url = prepare_transfer(srv, nbd_server.sock.url(), size=size)
nbd_server.start()
downloads.append((nbd_server, url, dst))
with ThreadPoolExecutor(max_workers=10) as executor:
tasks = [executor.submit(download, url, dst)
for _, url, dst in downloads]
for t in as_completed(tasks):
t.result()
finally:
for nbd_server, _, _ in downloads:
nbd_server.stop()