2022-10-06 13:33:45 +02:00
# SPDX-FileCopyrightText: Red Hat, Inc.
# SPDX-License-Identifier: GPL-2.0-or-later
2018-08-27 15:11:42 +03:00
import os
client: Add info() and measure() helpers
Before uploading images, we need to know:
- The virtual size, used to set the provisioned_size of the target disk
- The required size, used to set the initial_size of the target disk
when uploading sparse image to block storage.
Both are available using "qemu-img info" and "qemu-img measure" using
the special "json:" URI[1], but this is too complex for users, so we
provide now an easy to use interface to get this info.
Richard Jones suggested a neat trick[2], exposing the image using nbd
server, and using NBD URL with qemu-img. However this requires nddkit
tar plugin, which is not available yet in RHEL 8.2, and require support
for upload from URL, which we don't have yet.
Kevin Wolf explained how to enable format probing using the "json:" URI
by not specifying the top level "driver"[3].
Add new public client functions, supporting both regular images and
images inside tar files:
- info(filename, [member]): returns info about an image using qemu-img
info. When member is specified, return info about file named member in
the tar file filename.
- measure(filename, dst_fmt, [member]): measure required size for
convert image to dst_fmt. When member is specified, return info about
file named member in the tar file filename.
Here is an example usage:
>>> client.info("fedora-32.ova", member="fedora-32.qcow2")
{'cluster-size': 65536,
'dirty-flag': False,
'filename': 'nbd+unix://?socket=/tmp/imageio-98a5jum4/sock',
'format': 'qcow2',
'format-specific': {'data': {'compat': '1.1',
'corrupt': False,
'lazy-refcounts': False,
'refcount-bits': 16},
'type': 'qcow2'},
'member-offset': 1536,
'member-size': 676659200,
'virtual-size': 6442450944}
>>> client.measure("fedora-32.ova", "qcow2", member="fedora-32.qcow2")
{'fully-allocated': 6443696128,
'member-offset': 1536,
'member-size': 676659200,
'required': 1381302272}
>>> client.info("fedora-32.qcow2")
{'cluster-size': 65536,
'dirty-flag': False,
'filename': 'nbd+unix://?socket=/tmp/imageio-kel0a0oj/sock',
'format': 'qcow2',
'format-specific': {'data': {'compat': '1.1',
'corrupt': False,
'lazy-refcounts': False,
'refcount-bits': 16},
'type': 'qcow2'},
'virtual-size': 6442450944}
>>> client.measure("fedora-32.qcow2", "qcow2")
{'fully-allocated': 6443696128, 'required': 1381302272}
[1] https://bugzilla.redhat.com/1849981#c3
[2] https://lists.nongnu.org/archive/html/qemu-discuss/2020-06/msg00074.html
[3] https://lists.nongnu.org/archive/html/qemu-discuss/2020-06/msg00094.html
Change-Id: I9bbc846ada839e60ad46719d901bba96a796d507
Bug-Url: https://bugzilla.redhat.com/1849981
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-06-27 18:16:58 +03:00
import tarfile
2021-06-06 16:09:36 +03:00
import logging
2018-08-27 15:11:42 +03:00
2022-03-21 01:18:07 +02:00
from concurrent . futures import ThreadPoolExecutor , as_completed
2018-08-27 15:11:42 +03:00
import pytest
2020-02-21 15:29:02 +01:00
from ovirt_imageio import client
checksum: Optimize zero hashing
Use block based hashing algorithm:
H( H(block 1) + H(block 2) + ... + H(block N) )
This is basically creating a hash list[1], and using the root hash as
the result. The algorithm is similar to eD2k hash algorithm[2], but we
support any algorithm and block size. The default algorithm is
blake2b[3] and block size is 4 MiB. blake2b is as fast as sha-1, but
secure at least as sha-3.
When we don't have extents information, for example when using
preallocated image, or sparse image on storage that does not report
sparseness information (e.g. NFS < 4.2, GlusterFS with sharding) we
detect zero blocks and optimize hashing. Checksum calculation time is
limited by storage read throughput.
If we have extent information, we can compute the hash for zero blocks
without reading anything from storage, speeding up the calculation
dramatically.
When hashing zero blocks, instead of hashing entire block (4 MiB) we
hash a precomputed digest bytes (32 bytes). This is up to 131072 times
faster.
Since the checksum depends on the block size, the response includes now
also the block size:
$ curl -k https://localhost:54322/images/nbd/checksum | jq
{
"checksum": "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131",
"algorithm": "blake2b",
"block_size": 4194304
}
To compare the checksum to a local file, you must use the same algorithm
and block_size:
>> from ovirt_imageio import client
>> client.checksum("disk.img", block_size=4194304, algorithm="blake2b")
"061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131"
To compare to a pre-computed checksum, the caller can specify the
block_size using q query parameter:
$ curl -k https://localhost:54322/images/nbd/checksum?block_size=2097152 | jq
{
"checksum": "777b9c2f6598d503d43c14a39b31cdd8aee9f48b475f2af0f4c668e33297016c",
"algorithm": "blake2b",
"block_size": 2097152
}
Here are initial results:
tool fedora-32[4] full-6g[5] empty-6g[6] empty-100g[7] empty-1t[8]
----------------------------------------------------------------------------
checksum 2.84 3.04 0.03 0.06 0.28
b2sum[9] 8.29 8.42 8.48 161.00 1648.64
[1] https://en.wikipedia.org/wiki/Hash_list
[2] https://en.wikipedia.org/wiki/Ed2k_URI_scheme#eD2k_hash_algorithm
[3] https://blake2.net/
[4] virt-builder fedora-32 -o fedora-32.raw --root-password=password:root
[5] dd if=/zero/zero bs=8M count=768 of=full-6g.raw
[6] truncate -s 6g empty-6g.raw
[7] truncate -s 100g empty-100g.raw
[8] truncate -s 1t empty-1t.raw
[9] b2sum --length 256 {path}
Change-Id: I0661daf6a36e3eee57ef54128782e2a9aa11943e
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-08-12 03:07:43 +03:00
from ovirt_imageio . _internal import blkhash
2020-05-10 09:40:03 +03:00
from ovirt_imageio . _internal import config
2020-06-08 15:05:03 +02:00
from ovirt_imageio . _internal import ipv6
2022-03-21 01:18:07 +02:00
from ovirt_imageio . _internal import nbd
2021-06-06 16:09:36 +03:00
from ovirt_imageio . _internal import nbdutil
2020-05-24 00:41:10 +03:00
from ovirt_imageio . _internal import qemu_img
2020-05-24 01:20:58 +03:00
from ovirt_imageio . _internal import qemu_nbd
2020-05-10 09:40:03 +03:00
from ovirt_imageio . _internal import server
2018-08-27 15:11:42 +03:00
2022-01-18 23:25:07 +02:00
from ovirt_imageio . _internal . extent import ZeroExtent , DirtyExtent
2020-10-16 22:58:05 +03:00
2020-02-14 14:48:47 +01:00
from . import testutil
2021-06-09 01:24:18 +03:00
from . import ci
2018-08-27 15:11:42 +03:00
2021-06-06 16:09:36 +03:00
log = logging . getLogger ( " test " )
2020-05-24 04:43:56 +03:00
CLUSTER_SIZE = 64 * 1024
IMAGE_SIZE = 3 * CLUSTER_SIZE
2018-08-27 15:11:42 +03:00
2020-02-25 23:34:21 +02:00
@pytest.fixture ( scope = " module " )
2020-02-27 01:17:38 +02:00
def srv ( ) :
2020-02-25 23:34:21 +02:00
cfg = config . load ( [ " test/conf/daemon.conf " ] )
2020-02-27 01:17:38 +02:00
s = server . Server ( cfg )
s . start ( )
yield s
s . stop ( )
2018-08-27 15:11:42 +03:00
2020-09-16 00:26:21 +03:00
def prepare_transfer ( srv , url , sparse = True , size = IMAGE_SIZE ) :
2020-02-14 14:48:47 +01:00
ticket = testutil . create_ticket (
2020-09-16 00:26:21 +03:00
url = url ,
client: Add progress support
client.upload() accepts now "progress" function argument for updating
current progress. The function is called after every write or zero with
the number of transferred bytes.
To allow progress for zero requests, we split now zero requests to 1g
chunks, so we get a progress update at least every few seconds with very
slow storage, and about 40 updates per second with fast storage.
New ui module added with a simple ProgressBar class that can be used
with the client. The upload example use it to show upload progress.
Here is an example run, uploading empty 100g image:
$ truncate -s 100g empty.img
$ ./upload --insecure -u /var/tmp/empty.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 2.34 seconds, 43815.49 MiB/s
And uploading real fedora 28 image with 19g of data:
$ ./upload --insecure -u fedora-28-19g.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 72.81 seconds, 1406.45 MiB/s
Same image using HTTPS instead of unix socket:
$ ./upload --insecure fedora-28-19g.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 88.95 seconds, 1151.27 MiB/s
Change-Id: I851cf12d9f1419e9330e69a58d4effc839eab8ad
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2018-09-06 20:18:54 +03:00
size = size ,
2020-05-24 00:54:25 +03:00
sparse = sparse ,
ops = [ " read " , " write " ] )
2018-08-27 15:11:42 +03:00
2020-02-27 01:17:38 +02:00
srv . auth . add ( ticket )
2018-08-27 15:11:42 +03:00
2020-06-08 15:05:03 +02:00
host , port = srv . remote_service . address
host = ipv6 . quote_address ( host )
return " https:// {} : {} /images/ {} " . format ( host , port , ticket [ " uuid " ] )
2018-08-27 15:11:42 +03:00
2019-12-09 05:09:49 +02:00
class FakeProgress :
2020-05-23 22:53:08 +03:00
def __init__ ( self ) :
self . size = None
2019-12-09 05:09:49 +02:00
self . updates = [ ]
def update ( self , n ) :
self . updates . append ( n )
2018-08-27 15:11:42 +03:00
# TODO:
2018-09-14 23:08:37 +03:00
# - verify that upload optimized the upload using unix socket. Need a way to
# enable only OPTIONS on the remote server.
# - verify that upload fall back to HTTPS if server does not support unix
# socket. We don't have a way to disable unix socket currently.
# - verify that upload fall back to HTTPS if server support unix socket but is
# not the local host. Probbly not feasble for these tests, unless we can
# start a daemon on another host.
2018-08-27 15:11:42 +03:00
# - Test negative flows
2020-05-24 04:43:56 +03:00
@pytest.mark.parametrize ( " fmt " , [ " raw " , " qcow2 " ] )
def test_upload_empty_sparse ( tmpdir , srv , fmt ) :
2018-08-27 15:11:42 +03:00
src = str ( tmpdir . join ( " src " ) )
2020-05-24 04:43:56 +03:00
qemu_img . create ( src , fmt , size = IMAGE_SIZE )
2018-08-27 15:11:42 +03:00
dst = str ( tmpdir . join ( " dst " ) )
2020-05-24 00:54:25 +03:00
with open ( dst , " wb " ) as f :
f . write ( b " a " * IMAGE_SIZE )
2020-09-16 00:26:21 +03:00
url = prepare_transfer ( srv , " file:// " + dst )
2018-08-27 15:11:42 +03:00
2020-03-22 00:37:16 +02:00
client . upload ( src , url , srv . config . tls . ca_file )
2018-08-27 15:11:42 +03:00
2022-07-31 22:24:54 +03:00
# We cannot compare allocation of raw and qcow2 images since qemu-img
# reports holes only in qcow2 images.
2020-05-24 04:43:56 +03:00
qemu_img . compare ( src , dst , format1 = fmt , format2 = " raw " , strict = fmt == " raw " )
2018-08-27 15:11:42 +03:00
2021-06-06 23:38:12 +03:00
@pytest.mark.parametrize ( " fmt " , [
pytest . param (
" raw " ,
marks = pytest . mark . xfail (
2021-06-09 01:24:18 +03:00
ci . is_ovirt ( ) ,
reason = " Broken in oVirt CI for unknown reason " )
2021-06-06 23:38:12 +03:00
) ,
" qcow2 "
] )
2020-05-24 04:43:56 +03:00
def test_upload_hole_at_start_sparse ( tmpdir , srv , fmt ) :
2021-06-06 23:38:12 +03:00
size = 3 * 1024 * * 2
2018-08-27 15:11:42 +03:00
src = str ( tmpdir . join ( " src " ) )
2021-06-06 23:38:12 +03:00
qemu_img . create ( src , fmt , size = size )
2020-05-24 04:43:56 +03:00
with qemu_nbd . open ( src , fmt ) as c :
2021-06-06 23:38:12 +03:00
c . write ( size - 1024 * * 2 , b " b " * 1024 * * 2 )
2020-05-24 04:43:56 +03:00
c . flush ( )
2021-06-09 01:24:18 +03:00
log . debug ( " src extents: %s " , list ( nbdutil . extents ( c ) ) )
2018-08-27 15:11:42 +03:00
dst = str ( tmpdir . join ( " dst " ) )
2020-05-24 00:54:25 +03:00
with open ( dst , " wb " ) as f :
2021-06-06 23:38:12 +03:00
f . write ( b " a " * size )
2020-05-24 00:54:25 +03:00
2021-06-06 23:38:12 +03:00
url = prepare_transfer ( srv , " file:// " + dst , size = size )
2018-08-27 15:11:42 +03:00
2020-03-22 00:37:16 +02:00
client . upload ( src , url , srv . config . tls . ca_file )
2018-08-27 15:11:42 +03:00
2021-06-09 01:24:18 +03:00
with qemu_nbd . open ( dst , " raw " , read_only = True ) as c :
log . debug ( " dst extents: %s " , list ( nbdutil . extents ( c ) ) )
2020-05-24 04:43:56 +03:00
qemu_img . compare ( src , dst , format1 = fmt , format2 = " raw " , strict = fmt == " raw " )
2018-08-27 15:11:42 +03:00
2020-05-24 04:43:56 +03:00
@pytest.mark.parametrize ( " fmt " , [ " raw " , " qcow2 " ] )
def test_upload_hole_at_middle_sparse ( tmpdir , srv , fmt ) :
2021-06-06 23:38:12 +03:00
size = 3 * 1024 * * 2
2018-08-27 15:11:42 +03:00
src = str ( tmpdir . join ( " src " ) )
2021-06-06 23:38:12 +03:00
qemu_img . create ( src , fmt , size = size )
2020-05-24 04:43:56 +03:00
with qemu_nbd . open ( src , fmt ) as c :
2021-06-06 23:38:12 +03:00
c . write ( 0 , b " b " * 1024 * * 2 )
c . write ( size - 1024 * * 2 , b " b " * 1024 * * 2 )
2020-05-24 04:43:56 +03:00
c . flush ( )
2018-08-27 15:11:42 +03:00
dst = str ( tmpdir . join ( " dst " ) )
2020-05-24 00:54:25 +03:00
with open ( dst , " wb " ) as f :
2021-06-06 23:38:12 +03:00
f . write ( b " a " * size )
2020-05-24 00:54:25 +03:00
2021-06-06 23:38:12 +03:00
url = prepare_transfer ( srv , " file:// " + dst , size = size )
2018-08-27 15:11:42 +03:00
2020-03-22 00:37:16 +02:00
client . upload ( src , url , srv . config . tls . ca_file )
2018-08-27 15:11:42 +03:00
2020-05-24 04:43:56 +03:00
qemu_img . compare ( src , dst , format1 = fmt , format2 = " raw " , strict = fmt == " raw " )
2018-08-27 15:11:42 +03:00
2020-05-24 04:43:56 +03:00
@pytest.mark.parametrize ( " fmt " , [ " raw " , " qcow2 " ] )
def test_upload_hole_at_end_sparse ( tmpdir , srv , fmt ) :
2021-06-06 23:38:12 +03:00
size = 3 * 1024 * * 2
2018-08-27 15:11:42 +03:00
src = str ( tmpdir . join ( " src " ) )
2021-06-06 23:38:12 +03:00
qemu_img . create ( src , fmt , size = size )
2020-05-24 04:43:56 +03:00
with qemu_nbd . open ( src , fmt ) as c :
2021-06-06 23:38:12 +03:00
c . write ( 0 , b " b " * 1024 * * 2 )
2020-05-24 04:43:56 +03:00
c . flush ( )
2018-08-27 15:11:42 +03:00
dst = str ( tmpdir . join ( " dst " ) )
2020-05-24 00:54:25 +03:00
with open ( dst , " wb " ) as f :
2021-06-06 23:38:12 +03:00
f . write ( b " a " * size )
2020-05-24 00:54:25 +03:00
2021-06-06 23:38:12 +03:00
url = prepare_transfer ( srv , " file:// " + dst , size = size )
2018-08-27 15:11:42 +03:00
2020-03-22 00:37:16 +02:00
client . upload ( src , url , srv . config . tls . ca_file )
2018-08-27 15:11:42 +03:00
2020-05-24 04:43:56 +03:00
qemu_img . compare ( src , dst , format1 = fmt , format2 = " raw " , strict = fmt == " raw " )
2018-08-27 15:11:42 +03:00
2020-05-24 04:43:56 +03:00
@pytest.mark.parametrize ( " fmt " , [ " raw " , " qcow2 " ] )
def test_upload_full_sparse ( tmpdir , srv , fmt ) :
2018-08-27 15:11:42 +03:00
src = str ( tmpdir . join ( " src " ) )
2020-05-24 04:43:56 +03:00
qemu_img . create ( src , fmt , size = IMAGE_SIZE )
with qemu_nbd . open ( src , fmt ) as c :
c . write ( 0 , b " b " * IMAGE_SIZE )
c . flush ( )
2018-08-27 15:11:42 +03:00
dst = str ( tmpdir . join ( " dst " ) )
2020-05-24 00:54:25 +03:00
with open ( dst , " wb " ) as f :
f . write ( b " a " * IMAGE_SIZE )
2020-09-16 00:26:21 +03:00
url = prepare_transfer ( srv , " file:// " + dst )
2018-08-27 15:11:42 +03:00
2020-03-22 00:37:16 +02:00
client . upload ( src , url , srv . config . tls . ca_file )
2018-08-27 15:11:42 +03:00
2020-05-24 00:41:10 +03:00
qemu_img . compare ( src , dst , strict = True )
2018-08-27 15:11:42 +03:00
2020-05-24 04:43:56 +03:00
@pytest.mark.parametrize ( " fmt " , [ " raw " , " qcow2 " ] )
def test_upload_preallocated ( tmpdir , srv , fmt ) :
2018-08-27 15:11:42 +03:00
src = str ( tmpdir . join ( " src " ) )
2020-05-24 04:43:56 +03:00
qemu_img . create ( src , fmt , size = IMAGE_SIZE )
2018-08-27 15:11:42 +03:00
dst = str ( tmpdir . join ( " dst " ) )
2020-05-24 00:54:25 +03:00
with open ( dst , " wb " ) as f :
f . write ( b " a " * IMAGE_SIZE )
2020-09-16 00:26:21 +03:00
url = prepare_transfer ( srv , " file:// " + dst , sparse = False )
2018-08-27 15:11:42 +03:00
2020-03-22 00:37:16 +02:00
client . upload ( src , url , srv . config . tls . ca_file )
2018-08-27 15:11:42 +03:00
2020-05-24 00:41:10 +03:00
qemu_img . compare ( src , dst )
2018-08-27 15:11:42 +03:00
assert os . stat ( dst ) . st_blocks * 512 == IMAGE_SIZE
2020-06-27 18:17:10 +03:00
@pytest.mark.parametrize ( " fmt,compressed " , [
( " raw " , False ) ,
( " qcow2 " , False ) ,
( " qcow2 " , True ) ,
] )
def test_upload_from_ova ( tmpdir , srv , fmt , compressed ) :
offset = CLUSTER_SIZE
data = b " I can eat glass and it doesn ' t hurt me. "
# Create raw disk with some data.
tmp = str ( tmpdir . join ( " tmp " ) )
with open ( tmp , " wb " ) as f :
f . truncate ( IMAGE_SIZE )
f . seek ( offset )
f . write ( data )
# Create source disk.
src = str ( tmpdir . join ( " src " ) )
qemu_img . convert ( tmp , src , " raw " , fmt , compressed = compressed )
# Create OVA package.
ova = str ( tmpdir . join ( " src.ova " ) )
with tarfile . open ( ova , " w " ) as tar :
tar . add ( src , arcname = os . path . basename ( src ) )
# Prepare destination file.
dst = str ( tmpdir . join ( " dst " ) )
with open ( dst , " wb " ) as f :
f . truncate ( IMAGE_SIZE )
# Test uploading src from ova.
2020-09-16 00:26:21 +03:00
url = prepare_transfer ( srv , " file:// " + dst )
2020-06-27 18:17:10 +03:00
client . upload (
ova ,
url ,
srv . config . tls . ca_file ,
member = os . path . basename ( src ) )
qemu_img . compare ( src , dst )
client: Optimize upload to new image
When we upload to a new disk using qcow2 format or new file we know that
the disk is empty and the entire disk contents is read as zeros by the
guest. In this case we can skip zero extents on the source image instead
of zeroing them on the destination image.
Zeroing on the destination image is usually fast, but when uploading big
images it can slow down the upload a lot. When uploading to qcow2 image,
zeroing creates suboptimal image that will be slower to read and copy
later.
Example upload of empty 8 TiB image:
Before:
./ovirt-img upload-disk -c engine --storage-domain fc-01 empty-8t.qcow2
[ 100% ] 8.00 TiB, 222.82 s, 36.77 GiB/s | upload completed
After:
./ovirt-img upload-disk -c engine --storage-domain fc-01 empty-8t.qcow2
[ 100% ] 8.00 TiB, 11.51 s, 711.88 GiB/s | upload completed
Example upload of 8 TiB Fedora 35 image:
Before:
$ ./ovirt-img upload-disk -c engine --storage-domain fc-01 fedora-35-8t.qcow2
[ 100% ] 8.00 TiB, 317.88 s, 25.77 GiB/s | upload completed
After:
$ ./ovirt-img upload-disk -c engine --storage-domain fc-01 fedora-35-8t.qcow2
[ 100% ] 8.00 TiB, 109.13 s, 75.07 GiB/s | upload completed
Fixes: #76
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2022-08-01 14:13:53 +03:00
@pytest.mark.parametrize ( " disk_is_zero " , [ True , False ] )
@pytest.mark.parametrize ( " src_fmt " , [ " raw " , " qcow2 " ] )
@pytest.mark.parametrize ( " dst_fmt " , [ " raw " , " qcow2 " ] )
def test_upload_nbd ( srv , nbd_server , tmpdir , src_fmt , dst_fmt , disk_is_zero ) :
size = 5 * CLUSTER_SIZE
# Create qcow2 src image with all kinds of extents.
src = str ( tmpdir . join ( " src. " + src_fmt ) )
qemu_img . create ( src , src_fmt , size = size )
with qemu_nbd . open ( src , src_fmt ) as c :
# Cluster 0: data.
c . write ( 0 * CLUSTER_SIZE , b " A " * CLUSTER_SIZE )
# Cluster 1: data, zero.
c . write ( 1 * CLUSTER_SIZE , b " \0 " * CLUSTER_SIZE )
# Cluster 2: qcow2: zero cluster, raw: hole.
c . zero ( 2 * CLUSTER_SIZE , CLUSTER_SIZE , punch_hole = True )
# Cluster 3: qcow2: zero cluster, raw: allocated.
c . zero ( 3 * CLUSTER_SIZE , CLUSTER_SIZE , punch_hole = False )
# Cluster 4: unallocated.
c . flush ( )
log . debug ( " src extents: %s " , list ( nbdutil . extents ( c ) ) )
# Create empty target image.
dst = str ( tmpdir . join ( " dst. " + dst_fmt ) )
qemu_img . create ( dst , dst_fmt , size = size )
# Upload image.
nbd_server . image = dst
nbd_server . fmt = dst_fmt
nbd_server . start ( )
url = prepare_transfer ( srv , nbd_server . sock . url ( ) , size = size )
client . upload (
src ,
url ,
srv . config . tls . ca_file ,
disk_is_zero = disk_is_zero )
nbd_server . stop ( )
with qemu_nbd . open ( dst , dst_fmt , read_only = True ) as c :
log . debug ( " dst extents: %s " , list ( nbdutil . extents ( c ) ) )
# Compare image content. We cannot compare allocation since we skip zeroes
# and sparsify the destination image.
qemu_img . compare ( src , dst , format1 = src_fmt , format2 = dst_fmt , strict = False )
@pytest.mark.parametrize ( " disk_is_zero " , [ True , False ] )
2020-10-01 22:12:43 +03:00
@pytest.mark.parametrize ( " base_fmt " , [ " raw " , " qcow2 " ] )
client: Optimize upload to new image
When we upload to a new disk using qcow2 format or new file we know that
the disk is empty and the entire disk contents is read as zeros by the
guest. In this case we can skip zero extents on the source image instead
of zeroing them on the destination image.
Zeroing on the destination image is usually fast, but when uploading big
images it can slow down the upload a lot. When uploading to qcow2 image,
zeroing creates suboptimal image that will be slower to read and copy
later.
Example upload of empty 8 TiB image:
Before:
./ovirt-img upload-disk -c engine --storage-domain fc-01 empty-8t.qcow2
[ 100% ] 8.00 TiB, 222.82 s, 36.77 GiB/s | upload completed
After:
./ovirt-img upload-disk -c engine --storage-domain fc-01 empty-8t.qcow2
[ 100% ] 8.00 TiB, 11.51 s, 711.88 GiB/s | upload completed
Example upload of 8 TiB Fedora 35 image:
Before:
$ ./ovirt-img upload-disk -c engine --storage-domain fc-01 fedora-35-8t.qcow2
[ 100% ] 8.00 TiB, 317.88 s, 25.77 GiB/s | upload completed
After:
$ ./ovirt-img upload-disk -c engine --storage-domain fc-01 fedora-35-8t.qcow2
[ 100% ] 8.00 TiB, 109.13 s, 75.07 GiB/s | upload completed
Fixes: #76
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2022-08-01 14:13:53 +03:00
def test_upload_nbd_shallow ( srv , nbd_server , tmpdir , base_fmt , disk_is_zero ) :
2020-10-01 22:12:43 +03:00
size = 10 * 1024 * * 2
# Create base image with some data in first 3 clusters.
src_base = str ( tmpdir . join ( " src_base. " + base_fmt ) )
qemu_img . create ( src_base , base_fmt , size = size )
with qemu_nbd . open ( src_base , base_fmt ) as c :
c . write ( 0 * CLUSTER_SIZE , b " a " * CLUSTER_SIZE )
c . write ( 1 * CLUSTER_SIZE , b " b " * CLUSTER_SIZE )
c . write ( 2 * CLUSTER_SIZE , b " c " * CLUSTER_SIZE )
2020-09-16 00:27:56 +03:00
c . flush ( )
2021-06-09 01:50:33 +03:00
log . debug ( " src_base extents: %s " , list ( nbdutil . extents ( c ) ) )
2020-09-16 00:27:56 +03:00
2020-10-01 22:12:43 +03:00
# Create src image with some data in second cluster and zero in third
# cluster.
src_top = str ( tmpdir . join ( " src_top.qcow2 " ) )
qemu_img . create (
src_top , " qcow2 " , backing_file = src_base , backing_format = base_fmt )
with qemu_nbd . open ( src_top , " qcow2 " ) as c :
c . write ( 1 * CLUSTER_SIZE , b " B " * CLUSTER_SIZE )
c . zero ( 2 * CLUSTER_SIZE , CLUSTER_SIZE )
2020-09-16 00:27:56 +03:00
c . flush ( )
2021-06-09 02:05:47 +03:00
with qemu_nbd . open (
src_top , " qcow2 " , read_only = True , backing_chain = False ) as c :
2021-06-09 01:50:33 +03:00
log . debug ( " src_top extents: %s " , list ( nbdutil . extents ( c ) ) )
2020-09-16 00:27:56 +03:00
2020-10-01 22:12:43 +03:00
# Create empty destination base image.
dst_base = str ( tmpdir . join ( " dst_base. " + base_fmt ) )
qemu_img . create ( dst_base , base_fmt , size = size )
# Create empty destination top image.
dst_top = str ( tmpdir . join ( " dst_top.qcow2 " ) )
qemu_img . create (
dst_top , " qcow2 " , backing_file = dst_base , backing_format = base_fmt )
2020-09-16 00:27:56 +03:00
2021-06-06 16:09:36 +03:00
# Upload base image.
nbd_server . image = dst_base
nbd_server . fmt = base_fmt
nbd_server . start ( )
url = prepare_transfer ( srv , nbd_server . sock . url ( ) , size = size )
client . upload (
src_base ,
url ,
srv . config . tls . ca_file ,
client: Optimize upload to new image
When we upload to a new disk using qcow2 format or new file we know that
the disk is empty and the entire disk contents is read as zeros by the
guest. In this case we can skip zero extents on the source image instead
of zeroing them on the destination image.
Zeroing on the destination image is usually fast, but when uploading big
images it can slow down the upload a lot. When uploading to qcow2 image,
zeroing creates suboptimal image that will be slower to read and copy
later.
Example upload of empty 8 TiB image:
Before:
./ovirt-img upload-disk -c engine --storage-domain fc-01 empty-8t.qcow2
[ 100% ] 8.00 TiB, 222.82 s, 36.77 GiB/s | upload completed
After:
./ovirt-img upload-disk -c engine --storage-domain fc-01 empty-8t.qcow2
[ 100% ] 8.00 TiB, 11.51 s, 711.88 GiB/s | upload completed
Example upload of 8 TiB Fedora 35 image:
Before:
$ ./ovirt-img upload-disk -c engine --storage-domain fc-01 fedora-35-8t.qcow2
[ 100% ] 8.00 TiB, 317.88 s, 25.77 GiB/s | upload completed
After:
$ ./ovirt-img upload-disk -c engine --storage-domain fc-01 fedora-35-8t.qcow2
[ 100% ] 8.00 TiB, 109.13 s, 75.07 GiB/s | upload completed
Fixes: #76
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2022-08-01 14:13:53 +03:00
disk_is_zero = disk_is_zero ,
2021-06-06 16:09:36 +03:00
backing_chain = False )
nbd_server . stop ( )
2021-06-09 02:13:56 +03:00
with qemu_nbd . open ( dst_base , base_fmt , read_only = True ) as c :
log . debug ( " dst_base extents: %s " , list ( nbdutil . extents ( c ) ) )
2021-06-06 16:09:36 +03:00
# Compare image content - must match.
qemu_img . compare (
src_base , dst_base , format1 = base_fmt , format2 = base_fmt , strict = False )
2021-09-29 18:06:36 +03:00
# Comparing allocation is not possible with raw format since qemu-nbd does
# not report allocation for raw images, so we treat unallocated areas as
# zero area instead of a hole.
if base_fmt == " qcow2 " :
2021-06-06 16:09:36 +03:00
qemu_img . compare (
src_base , dst_base , format1 = base_fmt , format2 = base_fmt ,
strict = True )
# Upload top image.
2020-10-01 22:12:43 +03:00
nbd_server . image = dst_top
2020-09-16 00:27:56 +03:00
nbd_server . fmt = " qcow2 "
nbd_server . start ( )
url = prepare_transfer ( srv , nbd_server . sock . url ( ) , size = size )
client . upload (
2020-10-01 22:12:43 +03:00
src_top ,
2020-09-16 00:27:56 +03:00
url ,
srv . config . tls . ca_file ,
client: Optimize upload to new image
When we upload to a new disk using qcow2 format or new file we know that
the disk is empty and the entire disk contents is read as zeros by the
guest. In this case we can skip zero extents on the source image instead
of zeroing them on the destination image.
Zeroing on the destination image is usually fast, but when uploading big
images it can slow down the upload a lot. When uploading to qcow2 image,
zeroing creates suboptimal image that will be slower to read and copy
later.
Example upload of empty 8 TiB image:
Before:
./ovirt-img upload-disk -c engine --storage-domain fc-01 empty-8t.qcow2
[ 100% ] 8.00 TiB, 222.82 s, 36.77 GiB/s | upload completed
After:
./ovirt-img upload-disk -c engine --storage-domain fc-01 empty-8t.qcow2
[ 100% ] 8.00 TiB, 11.51 s, 711.88 GiB/s | upload completed
Example upload of 8 TiB Fedora 35 image:
Before:
$ ./ovirt-img upload-disk -c engine --storage-domain fc-01 fedora-35-8t.qcow2
[ 100% ] 8.00 TiB, 317.88 s, 25.77 GiB/s | upload completed
After:
$ ./ovirt-img upload-disk -c engine --storage-domain fc-01 fedora-35-8t.qcow2
[ 100% ] 8.00 TiB, 109.13 s, 75.07 GiB/s | upload completed
Fixes: #76
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2022-08-01 14:13:53 +03:00
disk_is_zero = disk_is_zero ,
2020-09-16 00:27:56 +03:00
backing_chain = False )
nbd_server . stop ( )
2021-06-09 02:13:56 +03:00
with qemu_nbd . open (
dst_top , " qcow2 " , read_only = True , backing_chain = False ) as c :
log . debug ( " dst_top extents: %s " , list ( nbdutil . extents ( c ) ) )
2021-06-06 16:09:36 +03:00
# Test image content - must match.
2020-10-01 22:12:43 +03:00
qemu_img . compare (
2021-06-06 16:09:36 +03:00
src_top , dst_top , format1 = " qcow2 " , format2 = " qcow2 " , strict = False )
2021-09-29 18:06:36 +03:00
# Compare allocation for qcow2 chain - nice to have.
if base_fmt == " qcow2 " :
2021-06-06 16:09:36 +03:00
qemu_img . compare (
src_top , dst_top , format1 = " qcow2 " , format2 = " qcow2 " , strict = True )
2020-09-16 00:27:56 +03:00
2020-05-24 01:19:25 +03:00
@pytest.mark.parametrize ( " fmt " , [ " raw " , " qcow2 " ] )
def test_download_raw ( tmpdir , srv , fmt ) :
src = str ( tmpdir . join ( " src " ) )
with open ( src , " wb " ) as f :
f . truncate ( IMAGE_SIZE )
f . seek ( IMAGE_SIZE / / 2 )
f . write ( b " data " )
2020-09-16 00:26:21 +03:00
url = prepare_transfer ( srv , " file:// " + src )
2020-05-24 01:19:25 +03:00
dst = str ( tmpdir . join ( " dst " ) )
# When we download raw data, we can convert it on-the-fly to other format.
client . download ( url , dst , srv . config . tls . ca_file , fmt = fmt )
# file backend does not support extents, so downloaded data is always
# fully allocated.
qemu_img . compare ( src , dst , format1 = " raw " , format2 = fmt )
2020-05-24 01:20:58 +03:00
def test_download_qcow2_as_raw ( tmpdir , srv ) :
src = str ( tmpdir . join ( " src.qcow2 " ) )
qemu_img . create ( src , " qcow2 " , size = IMAGE_SIZE )
# Allocate one cluster in the middle of the image.
with qemu_nbd . open ( src , " qcow2 " ) as c :
2020-05-24 04:43:56 +03:00
c . write ( CLUSTER_SIZE , b " a " * CLUSTER_SIZE )
2020-05-24 01:20:58 +03:00
c . flush ( )
actual_size = os . path . getsize ( src )
2020-09-16 00:26:21 +03:00
url = prepare_transfer ( srv , " file:// " + src , size = actual_size )
2020-05-24 01:20:58 +03:00
dst = str ( tmpdir . join ( " dst.qcow2 " ) )
# When downloading qcow2 image using the nbd backend, we get raw data and
# we can convert it to any format we want. Howver when downloading using
# the file backend, we get qcow2 bytestream and we cannot convert it.
#
# To store the qcow2 bytestream, we must use fmt="raw". This instructs
# qemu-nbd on the client side to treat the data as raw bytes, storing them
# without any change on the local file.
#
# This is baisically like:
#
# qemu-img convert -f raw -O raw src.qcow2 dst.qcow2
#
client . download ( url , dst , srv . config . tls . ca_file , fmt = " raw " )
# The result should be identical qcow2 image content. Allocation may
# differ but for this test we get identical allocation.
qemu_img . compare ( src , dst , format1 = " qcow2 " , format2 = " qcow2 " , strict = True )
2020-10-01 22:12:43 +03:00
@pytest.mark.parametrize ( " base_fmt " , [ " raw " , " qcow2 " ] )
def test_download_shallow ( srv , nbd_server , tmpdir , base_fmt ) :
size = 10 * 1024 * * 2
2020-09-27 19:06:36 +03:00
2020-10-01 22:12:43 +03:00
# Create source base image with some data in first clusters.
src_base = str ( tmpdir . join ( " src_base. " + base_fmt ) )
qemu_img . create ( src_base , base_fmt , size = size )
with qemu_nbd . open ( src_base , base_fmt ) as c :
c . write ( 0 * CLUSTER_SIZE , b " a " * CLUSTER_SIZE )
c . write ( 1 * CLUSTER_SIZE , b " b " * CLUSTER_SIZE )
c . write ( 2 * CLUSTER_SIZE , b " c " * CLUSTER_SIZE )
2020-09-27 19:06:36 +03:00
c . flush ( )
2021-06-09 01:50:33 +03:00
log . debug ( " src_base extents: %s " , list ( nbdutil . extents ( c ) ) )
2020-09-27 19:06:36 +03:00
2020-10-01 22:12:43 +03:00
# Create source top image with some data in second cluster and zero in the
# third cluster.
2020-09-27 19:06:36 +03:00
src_top = str ( tmpdir . join ( " src_top.qcow2 " ) )
qemu_img . create (
2020-10-01 22:12:43 +03:00
src_top , " qcow2 " , backing_file = src_base , backing_format = base_fmt )
2020-09-27 19:06:36 +03:00
with qemu_nbd . open ( src_top , " qcow2 " ) as c :
2020-10-01 22:12:43 +03:00
c . write ( 1 * CLUSTER_SIZE , b " B " * CLUSTER_SIZE )
c . zero ( 2 * CLUSTER_SIZE , CLUSTER_SIZE )
2020-09-27 19:06:36 +03:00
c . flush ( )
2021-06-09 02:05:47 +03:00
with qemu_nbd . open (
src_top , " qcow2 " , read_only = True , backing_chain = False ) as c :
2021-06-09 01:50:33 +03:00
log . debug ( " src_top extents: %s " , list ( nbdutil . extents ( c ) ) )
2020-09-27 19:06:36 +03:00
2020-10-01 22:12:43 +03:00
dst_base = str ( tmpdir . join ( " dst_base. " + base_fmt ) )
2020-09-27 19:06:36 +03:00
dst_top = str ( tmpdir . join ( " dst_top.qcow2 " ) )
2021-06-06 16:09:36 +03:00
# Download base image.
nbd_server . image = src_base
nbd_server . fmt = base_fmt
nbd_server . start ( )
url = prepare_transfer ( srv , nbd_server . sock . url ( ) , size = size )
client . download (
url ,
dst_base ,
srv . config . tls . ca_file ,
fmt = base_fmt )
nbd_server . stop ( )
2021-06-09 02:13:56 +03:00
with qemu_nbd . open ( dst_base , base_fmt , read_only = True ) as c :
log . debug ( " dst_base extents: %s " , list ( nbdutil . extents ( c ) ) )
2021-06-09 01:48:47 +03:00
# Compare image content - must match.
qemu_img . compare (
src_base , dst_base , format1 = base_fmt , format2 = base_fmt , strict = False )
# And allocation - nice to have.
2021-09-29 18:14:03 +03:00
if base_fmt == " qcow2 " or qemu_nbd . version ( ) > = ( 6 , 0 , 0 ) :
2021-06-09 01:48:47 +03:00
qemu_img . compare (
src_base , dst_base , format1 = base_fmt , format2 = base_fmt ,
strict = True )
2021-06-06 16:09:36 +03:00
# Download top image.
2020-09-27 19:06:36 +03:00
nbd_server . image = src_top
nbd_server . fmt = " qcow2 "
nbd_server . backing_chain = False
nbd_server . start ( )
url = prepare_transfer ( srv , nbd_server . sock . url ( ) , size = size )
client . download (
url ,
dst_top ,
srv . config . tls . ca_file ,
backing_file = dst_base ,
2020-10-01 22:12:43 +03:00
backing_format = base_fmt )
2020-09-27 19:06:36 +03:00
nbd_server . stop ( )
2021-06-09 02:05:47 +03:00
with qemu_nbd . open (
dst_top , " qcow2 " , read_only = True , backing_chain = False ) as c :
2021-06-09 01:50:33 +03:00
log . debug ( " dst_top extents: %s " , list ( nbdutil . extents ( c ) ) )
2021-06-09 02:13:56 +03:00
# Compare both image content - must match.
qemu_img . compare (
src_top , dst_top , format1 = " qcow2 " , format2 = " qcow2 " , strict = False )
2021-06-06 16:09:36 +03:00
# And allocation - nice to have.
2020-09-27 19:06:36 +03:00
qemu_img . compare (
src_top , dst_top , format1 = " qcow2 " , format2 = " qcow2 " , strict = True )
2020-05-23 23:18:53 +03:00
def test_upload_proxy_url ( tmpdir , srv ) :
src = str ( tmpdir . join ( " src " ) )
with open ( src , " wb " ) as f :
f . truncate ( IMAGE_SIZE )
dst = str ( tmpdir . join ( " dst " ) )
with open ( dst , " wb " ) as f :
f . truncate ( IMAGE_SIZE )
# If transfer_url is not accessible, proxy_url is used.
transfer_url = " https://no.server:54322/images/no-ticket "
2020-09-16 00:26:21 +03:00
proxy_url = prepare_transfer ( srv , " file:// " + dst )
2020-05-23 23:18:53 +03:00
client . upload ( src , transfer_url , srv . config . tls . ca_file ,
proxy_url = proxy_url )
qemu_img . compare ( src , dst , format1 = " raw " , format2 = " raw " , strict = True )
def test_upload_proxy_url_unused ( tmpdir , srv ) :
src = str ( tmpdir . join ( " src " ) )
with open ( src , " wb " ) as f :
f . truncate ( IMAGE_SIZE )
dst = str ( tmpdir . join ( " dst " ) )
with open ( dst , " wb " ) as f :
f . truncate ( IMAGE_SIZE )
# If transfer_url is accessible, proxy_url is not used.
2020-09-16 00:26:21 +03:00
transfer_url = prepare_transfer ( srv , " file:// " + dst )
2020-05-23 23:18:53 +03:00
proxy_url = " https://no.proxy:54322/images/no-ticket "
client . upload ( src , transfer_url , srv . config . tls . ca_file ,
proxy_url = proxy_url )
qemu_img . compare ( src , dst , format1 = " raw " , format2 = " raw " , strict = True )
def test_download_proxy_url ( tmpdir , srv ) :
src = str ( tmpdir . join ( " src " ) )
with open ( src , " wb " ) as f :
f . truncate ( IMAGE_SIZE )
dst = str ( tmpdir . join ( " dst " ) )
# If transfer_url is not accessible, proxy_url is used.
transfer_url = " https://no.server:54322/images/no-ticket "
2020-09-16 00:26:21 +03:00
proxy_url = prepare_transfer ( srv , " file:// " + src )
2020-05-23 23:18:53 +03:00
client . download ( transfer_url , dst , srv . config . tls . ca_file , fmt = " raw " ,
proxy_url = proxy_url )
qemu_img . compare ( src , dst , format1 = " raw " , format2 = " raw " )
def test_download_proxy_url_unused ( tmpdir , srv ) :
src = str ( tmpdir . join ( " src " ) )
with open ( src , " wb " ) as f :
f . truncate ( IMAGE_SIZE )
dst = str ( tmpdir . join ( " dst " ) )
# If transfer_url is accessible, proxy_url is not used.
2020-09-16 00:26:21 +03:00
transfer_url = prepare_transfer ( srv , " file:// " + src )
2020-05-23 23:18:53 +03:00
proxy_url = " https://no.proxy:54322/images/no-ticket "
client . download ( transfer_url , dst , srv . config . tls . ca_file , fmt = " raw " ,
proxy_url = proxy_url )
qemu_img . compare ( src , dst , format1 = " raw " , format2 = " raw " )
2020-02-27 01:17:38 +02:00
def test_progress ( tmpdir , srv ) :
client: Add progress support
client.upload() accepts now "progress" function argument for updating
current progress. The function is called after every write or zero with
the number of transferred bytes.
To allow progress for zero requests, we split now zero requests to 1g
chunks, so we get a progress update at least every few seconds with very
slow storage, and about 40 updates per second with fast storage.
New ui module added with a simple ProgressBar class that can be used
with the client. The upload example use it to show upload progress.
Here is an example run, uploading empty 100g image:
$ truncate -s 100g empty.img
$ ./upload --insecure -u /var/tmp/empty.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 2.34 seconds, 43815.49 MiB/s
And uploading real fedora 28 image with 19g of data:
$ ./upload --insecure -u fedora-28-19g.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 72.81 seconds, 1406.45 MiB/s
Same image using HTTPS instead of unix socket:
$ ./upload --insecure fedora-28-19g.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 88.95 seconds, 1151.27 MiB/s
Change-Id: I851cf12d9f1419e9330e69a58d4effc839eab8ad
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2018-09-06 20:18:54 +03:00
src = str ( tmpdir . join ( " src " ) )
with open ( src , " wb " ) as f :
f . write ( b " b " * 4096 )
f . seek ( IMAGE_SIZE / / 2 )
f . write ( b " b " * 4096 )
f . truncate ( IMAGE_SIZE )
dst = str ( tmpdir . join ( " dst " ) )
2020-05-24 00:54:25 +03:00
with open ( dst , " wb " ) as f :
f . truncate ( IMAGE_SIZE )
2020-09-16 00:26:21 +03:00
url = prepare_transfer ( srv , " file:// " + dst , sparse = True )
client: Add progress support
client.upload() accepts now "progress" function argument for updating
current progress. The function is called after every write or zero with
the number of transferred bytes.
To allow progress for zero requests, we split now zero requests to 1g
chunks, so we get a progress update at least every few seconds with very
slow storage, and about 40 updates per second with fast storage.
New ui module added with a simple ProgressBar class that can be used
with the client. The upload example use it to show upload progress.
Here is an example run, uploading empty 100g image:
$ truncate -s 100g empty.img
$ ./upload --insecure -u /var/tmp/empty.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 2.34 seconds, 43815.49 MiB/s
And uploading real fedora 28 image with 19g of data:
$ ./upload --insecure -u fedora-28-19g.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 72.81 seconds, 1406.45 MiB/s
Same image using HTTPS instead of unix socket:
$ ./upload --insecure fedora-28-19g.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 88.95 seconds, 1151.27 MiB/s
Change-Id: I851cf12d9f1419e9330e69a58d4effc839eab8ad
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2018-09-06 20:18:54 +03:00
2020-05-23 22:53:08 +03:00
progress = FakeProgress ( )
2018-07-15 01:39:51 +03:00
client . upload (
2020-03-22 00:37:16 +02:00
src , url , srv . config . tls . ca_file , progress = progress )
client: Add progress support
client.upload() accepts now "progress" function argument for updating
current progress. The function is called after every write or zero with
the number of transferred bytes.
To allow progress for zero requests, we split now zero requests to 1g
chunks, so we get a progress update at least every few seconds with very
slow storage, and about 40 updates per second with fast storage.
New ui module added with a simple ProgressBar class that can be used
with the client. The upload example use it to show upload progress.
Here is an example run, uploading empty 100g image:
$ truncate -s 100g empty.img
$ ./upload --insecure -u /var/tmp/empty.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 2.34 seconds, 43815.49 MiB/s
And uploading real fedora 28 image with 19g of data:
$ ./upload --insecure -u fedora-28-19g.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 72.81 seconds, 1406.45 MiB/s
Same image using HTTPS instead of unix socket:
$ ./upload --insecure fedora-28-19g.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 88.95 seconds, 1151.27 MiB/s
Change-Id: I851cf12d9f1419e9330e69a58d4effc839eab8ad
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2018-09-06 20:18:54 +03:00
2020-05-23 22:53:08 +03:00
assert progress . size == IMAGE_SIZE
2020-06-07 23:39:52 +03:00
# Note: when using multiple connections order of updates is not
# predictable.
assert set ( progress . updates ) == {
client: Add progress support
client.upload() accepts now "progress" function argument for updating
current progress. The function is called after every write or zero with
the number of transferred bytes.
To allow progress for zero requests, we split now zero requests to 1g
chunks, so we get a progress update at least every few seconds with very
slow storage, and about 40 updates per second with fast storage.
New ui module added with a simple ProgressBar class that can be used
with the client. The upload example use it to show upload progress.
Here is an example run, uploading empty 100g image:
$ truncate -s 100g empty.img
$ ./upload --insecure -u /var/tmp/empty.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 2.34 seconds, 43815.49 MiB/s
And uploading real fedora 28 image with 19g of data:
$ ./upload --insecure -u fedora-28-19g.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 72.81 seconds, 1406.45 MiB/s
Same image using HTTPS instead of unix socket:
$ ./upload --insecure fedora-28-19g.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 88.95 seconds, 1151.27 MiB/s
Change-Id: I851cf12d9f1419e9330e69a58d4effc839eab8ad
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2018-09-06 20:18:54 +03:00
# First write.
4096 ,
# First zero.
IMAGE_SIZE / / 2 - 4096 ,
# Second write.
4096 ,
# Second zero
IMAGE_SIZE / / 2 - 4096 ,
2020-06-07 23:39:52 +03:00
}
client: Add progress support
client.upload() accepts now "progress" function argument for updating
current progress. The function is called after every write or zero with
the number of transferred bytes.
To allow progress for zero requests, we split now zero requests to 1g
chunks, so we get a progress update at least every few seconds with very
slow storage, and about 40 updates per second with fast storage.
New ui module added with a simple ProgressBar class that can be used
with the client. The upload example use it to show upload progress.
Here is an example run, uploading empty 100g image:
$ truncate -s 100g empty.img
$ ./upload --insecure -u /var/tmp/empty.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 2.34 seconds, 43815.49 MiB/s
And uploading real fedora 28 image with 19g of data:
$ ./upload --insecure -u fedora-28-19g.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 72.81 seconds, 1406.45 MiB/s
Same image using HTTPS instead of unix socket:
$ ./upload --insecure fedora-28-19g.img https://localhost:54322/images/test
[ 100.00% ] 100.00 GiB, 88.95 seconds, 1151.27 MiB/s
Change-Id: I851cf12d9f1419e9330e69a58d4effc839eab8ad
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2018-09-06 20:18:54 +03:00
2020-02-27 01:17:38 +02:00
def test_progress_callback ( tmpdir , srv ) :
2019-12-10 12:25:33 +02:00
src = str ( tmpdir . join ( " src " ) )
with open ( src , " wb " ) as f :
f . truncate ( IMAGE_SIZE )
dst = str ( tmpdir . join ( " dst " ) )
2020-05-24 00:54:25 +03:00
with open ( dst , " wb " ) as f :
f . truncate ( IMAGE_SIZE )
2020-09-16 00:26:21 +03:00
url = prepare_transfer ( srv , " file:// " + dst , size = IMAGE_SIZE , sparse = True )
2019-12-10 12:25:33 +02:00
progress = [ ]
2018-07-15 01:39:51 +03:00
client . upload (
src ,
url ,
2020-03-18 14:38:47 +01:00
srv . config . tls . ca_file ,
2018-07-15 01:39:51 +03:00
progress = progress . append )
2019-12-10 12:25:33 +02:00
assert progress == [ IMAGE_SIZE ]
client: Add info() and measure() helpers
Before uploading images, we need to know:
- The virtual size, used to set the provisioned_size of the target disk
- The required size, used to set the initial_size of the target disk
when uploading sparse image to block storage.
Both are available using "qemu-img info" and "qemu-img measure" using
the special "json:" URI[1], but this is too complex for users, so we
provide now an easy to use interface to get this info.
Richard Jones suggested a neat trick[2], exposing the image using nbd
server, and using NBD URL with qemu-img. However this requires nddkit
tar plugin, which is not available yet in RHEL 8.2, and require support
for upload from URL, which we don't have yet.
Kevin Wolf explained how to enable format probing using the "json:" URI
by not specifying the top level "driver"[3].
Add new public client functions, supporting both regular images and
images inside tar files:
- info(filename, [member]): returns info about an image using qemu-img
info. When member is specified, return info about file named member in
the tar file filename.
- measure(filename, dst_fmt, [member]): measure required size for
convert image to dst_fmt. When member is specified, return info about
file named member in the tar file filename.
Here is an example usage:
>>> client.info("fedora-32.ova", member="fedora-32.qcow2")
{'cluster-size': 65536,
'dirty-flag': False,
'filename': 'nbd+unix://?socket=/tmp/imageio-98a5jum4/sock',
'format': 'qcow2',
'format-specific': {'data': {'compat': '1.1',
'corrupt': False,
'lazy-refcounts': False,
'refcount-bits': 16},
'type': 'qcow2'},
'member-offset': 1536,
'member-size': 676659200,
'virtual-size': 6442450944}
>>> client.measure("fedora-32.ova", "qcow2", member="fedora-32.qcow2")
{'fully-allocated': 6443696128,
'member-offset': 1536,
'member-size': 676659200,
'required': 1381302272}
>>> client.info("fedora-32.qcow2")
{'cluster-size': 65536,
'dirty-flag': False,
'filename': 'nbd+unix://?socket=/tmp/imageio-kel0a0oj/sock',
'format': 'qcow2',
'format-specific': {'data': {'compat': '1.1',
'corrupt': False,
'lazy-refcounts': False,
'refcount-bits': 16},
'type': 'qcow2'},
'virtual-size': 6442450944}
>>> client.measure("fedora-32.qcow2", "qcow2")
{'fully-allocated': 6443696128, 'required': 1381302272}
[1] https://bugzilla.redhat.com/1849981#c3
[2] https://lists.nongnu.org/archive/html/qemu-discuss/2020-06/msg00074.html
[3] https://lists.nongnu.org/archive/html/qemu-discuss/2020-06/msg00094.html
Change-Id: I9bbc846ada839e60ad46719d901bba96a796d507
Bug-Url: https://bugzilla.redhat.com/1849981
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-06-27 18:16:58 +03:00
@pytest.mark.parametrize ( " fmt, compressed " , [
( " raw " , False ) ,
( " qcow2 " , False ) ,
( " qcow2 " , True ) ,
] )
def test_info ( tmpdir , fmt , compressed ) :
# Created temporary file with some data.
size = 2 * 1024 * * 2
tmp = str ( tmpdir . join ( " tmp " ) )
with open ( tmp , " wb " ) as f :
f . truncate ( size )
f . write ( b " x " * CLUSTER_SIZE )
# Created test image from temporary file.
img = str ( tmpdir . join ( " img " ) )
qemu_img . convert ( tmp , img , " raw " , fmt , compressed = compressed )
img_info = client . info ( img )
# Check image info.
assert img_info [ " format " ] == fmt
assert img_info [ " virtual-size " ] == size
# We don't add member info if member was not specified.
assert " member-offset " not in img_info
assert " member-size " not in img_info
# Create ova with test image.
member = os . path . basename ( img )
ova = str ( tmpdir . join ( " ova " ) )
with tarfile . open ( ova , " w " ) as tar :
tar . add ( img , arcname = member )
# Get info for the member from the ova.
ova_info = client . info ( ova , member = member )
# Image info from ova should be the same.
assert ova_info [ " format " ] == fmt
assert ova_info [ " virtual-size " ] == size
# If member was specified, we report also the offset and size.
with tarfile . open ( ova ) as tar :
member_info = tar . getmember ( member )
assert ova_info [ " member-offset " ] == member_info . offset_data
assert ova_info [ " member-size " ] == member_info . size
@pytest.mark.parametrize ( " fmt, compressed " , [
( " raw " , False ) ,
( " qcow2 " , False ) ,
( " qcow2 " , True ) ,
] )
def test_measure_to_raw ( tmpdir , fmt , compressed ) :
# Create temporary file with some data.
size = 2 * 1024 * * 2
tmp = str ( tmpdir . join ( " tmp " ) )
with open ( tmp , " wb " ) as f :
f . truncate ( size )
f . write ( b " x " * CLUSTER_SIZE )
# Created test image from temporary file.
img = str ( tmpdir . join ( " img " ) )
qemu_img . convert ( tmp , img , " raw " , fmt , compressed = compressed )
measure = client . measure ( img , " raw " )
assert measure [ " required " ] == size
@pytest.mark.parametrize ( " fmt, compressed " , [
( " raw " , False ) ,
( " qcow2 " , False ) ,
( " qcow2 " , True ) ,
] )
def test_measure_to_qcow2 ( tmpdir , fmt , compressed ) :
# Create temporary file with some data.
size = 2 * 1024 * * 2
tmp = str ( tmpdir . join ( " tmp " ) )
with open ( tmp , " wb " ) as f :
f . truncate ( size )
f . write ( b " x " * CLUSTER_SIZE )
# Created test image from temporary file.
img = str ( tmpdir . join ( " img " ) )
qemu_img . convert ( tmp , img , " raw " , fmt , compressed = compressed )
measure = client . measure ( img , " qcow2 " )
assert measure [ " required " ] == 393216
@pytest.mark.parametrize ( " compressed " , [ False , True ] )
@pytest.mark.parametrize ( " fmt " , [ " raw " , " qcow2 " ] )
def test_measure_from_ova ( tmpdir , compressed , fmt ) :
# Create temporary file with some data.
size = 2 * 1024 * * 2
tmp = str ( tmpdir . join ( " tmp " ) )
with open ( tmp , " wb " ) as f :
f . truncate ( size )
f . write ( b " x " * CLUSTER_SIZE )
# Created test image from temporary file.
img = str ( tmpdir . join ( " img " ) )
qemu_img . convert ( tmp , img , " raw " , " qcow2 " , compressed = compressed )
# Measure the image.
img_measure = client . measure ( img , fmt )
# We don't add member info if member was not specified.
assert " member-offset " not in img_measure
assert " member-size " not in img_measure
# Add test image to ova.
member = os . path . basename ( img )
ova = str ( tmpdir . join ( " ova " ) )
with tarfile . open ( ova , " w " ) as tar :
tar . add ( img , arcname = member )
# Measure the image from the ova.
ova_measure = client . measure ( ova , fmt , member = member )
# Measurement from ova should be same.
assert ova_measure [ " required " ] == img_measure [ " required " ]
assert ova_measure [ " fully-allocated " ] == img_measure [ " fully-allocated " ]
# If member was specified, we report also the offset and size.
with tarfile . open ( ova ) as tar :
member_info = tar . getmember ( member )
assert ova_measure [ " member-offset " ] == member_info . offset_data
assert ova_measure [ " member-size " ] == member_info . size
daemon: Compute checksum for remote images
Users like to verify uploaded images checksums. This cannot be done
using standard tools like shasum since it does not understand sparseness
or qcow2 format, but we can support this using using the nbd backend.
Add /images/{ticket-id}/checksum resource handler, computing a checksum
of the guest visible data.
Here is an example usage:
$ time curl -k https://localhost:54322/images/nbd/checksum
{"checksum": "2df96976518821e000fcf92fae0bfc6a7fb5b2d2", "algorithm": "sha1"}
real 0m7.737s
user 0m0.009s
sys 0m0.009s
Comparing to raw image with same content:
$ time sha1sum /var/tmp/disk.raw
2df96976518821e000fcf92fae0bfc6a7fb5b2d2 /var/tmp/disk.raw
real 0m6.912s
user 0m5.900s
sys 0m0.991s
It possible to use any of the algorithms supported by python. To query
the supported values use:
$ curl -k https://localhost:54322/images/nbd/checksum/algorithms
{"algorithms": ["blake2b", "blake2s", "md5", "sha1", "sha224",
"sha256", "sha384", "sha3_224", "sha3_256", "sha3_384", "sha3_512",
"sha512", "shake_128", "shake_256"]}
And use any of the returned values:
$ time curl -k https://localhost:54322/images/nbd/checksum?algorithm=sha256
{"checksum": "c4e2cd5e5d77a5890d888a5b25eed2813ab3678b8c5e47dc6d219bf44b3f41d7", "algorithm": "sha256"}
real 0m14.834s
user 0m0.011s
sys 0m0.004s
To allow comparing checksum with local images in any image format, add
client.checksum(). Here are few examples, using identical images in
different formats:
$ python
>>> from ovirt_imageio import client
>>> client.checksum("/var/tmp/disk.raw")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
>>> client.checksum("/var/tmp/disk.qcow2")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
>>> client.checksum("/var/tmp/compressed.qcow2")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
Like info() and measure() it also works for images inside ova file:
>>> client.checksum("/var/tmp/vm.ova", member="disk.qcow2")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
Computing checksums will not work for uploaded raw disks which were not
aligned to storage minimum alignment (4k or 128m). In this case oVirt
extends the disk size and the disk checksum will not match the original
file checksum.
Change-Id: I2a0e2e235d2767cc094e6d107f15f1a8f1695123
Bug-Url: https://bugzilla.redhat.com/1787906
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-07-10 16:21:10 +03:00
@pytest.mark.parametrize ( " fmt, compressed " , [
( " raw " , False ) ,
( " qcow2 " , False ) ,
( " qcow2 " , True ) ,
] )
def test_checksum ( tmpdir , fmt , compressed ) :
# Create temporary file with some data.
size = 2 * 1024 * * 2
tmp = str ( tmpdir . join ( " tmp " ) )
with open ( tmp , " wb " ) as f :
f . truncate ( size )
f . write ( b " x " * CLUSTER_SIZE )
# Create test image from temporary file.
img = str ( tmpdir . join ( " img " ) )
qemu_img . convert ( tmp , img , " raw " , fmt , compressed = compressed )
checksum: Optimize zero hashing
Use block based hashing algorithm:
H( H(block 1) + H(block 2) + ... + H(block N) )
This is basically creating a hash list[1], and using the root hash as
the result. The algorithm is similar to eD2k hash algorithm[2], but we
support any algorithm and block size. The default algorithm is
blake2b[3] and block size is 4 MiB. blake2b is as fast as sha-1, but
secure at least as sha-3.
When we don't have extents information, for example when using
preallocated image, or sparse image on storage that does not report
sparseness information (e.g. NFS < 4.2, GlusterFS with sharding) we
detect zero blocks and optimize hashing. Checksum calculation time is
limited by storage read throughput.
If we have extent information, we can compute the hash for zero blocks
without reading anything from storage, speeding up the calculation
dramatically.
When hashing zero blocks, instead of hashing entire block (4 MiB) we
hash a precomputed digest bytes (32 bytes). This is up to 131072 times
faster.
Since the checksum depends on the block size, the response includes now
also the block size:
$ curl -k https://localhost:54322/images/nbd/checksum | jq
{
"checksum": "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131",
"algorithm": "blake2b",
"block_size": 4194304
}
To compare the checksum to a local file, you must use the same algorithm
and block_size:
>> from ovirt_imageio import client
>> client.checksum("disk.img", block_size=4194304, algorithm="blake2b")
"061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131"
To compare to a pre-computed checksum, the caller can specify the
block_size using q query parameter:
$ curl -k https://localhost:54322/images/nbd/checksum?block_size=2097152 | jq
{
"checksum": "777b9c2f6598d503d43c14a39b31cdd8aee9f48b475f2af0f4c668e33297016c",
"algorithm": "blake2b",
"block_size": 2097152
}
Here are initial results:
tool fedora-32[4] full-6g[5] empty-6g[6] empty-100g[7] empty-1t[8]
----------------------------------------------------------------------------
checksum 2.84 3.04 0.03 0.06 0.28
b2sum[9] 8.29 8.42 8.48 161.00 1648.64
[1] https://en.wikipedia.org/wiki/Hash_list
[2] https://en.wikipedia.org/wiki/Ed2k_URI_scheme#eD2k_hash_algorithm
[3] https://blake2.net/
[4] virt-builder fedora-32 -o fedora-32.raw --root-password=password:root
[5] dd if=/zero/zero bs=8M count=768 of=full-6g.raw
[6] truncate -s 6g empty-6g.raw
[7] truncate -s 100g empty-100g.raw
[8] truncate -s 1t empty-1t.raw
[9] b2sum --length 256 {path}
Change-Id: I0661daf6a36e3eee57ef54128782e2a9aa11943e
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-08-12 03:07:43 +03:00
expected = blkhash . checksum ( tmp , block_size = 1024 * * 2 )
actual = client . checksum ( img , block_size = 1024 * * 2 )
assert actual == expected
daemon: Compute checksum for remote images
Users like to verify uploaded images checksums. This cannot be done
using standard tools like shasum since it does not understand sparseness
or qcow2 format, but we can support this using using the nbd backend.
Add /images/{ticket-id}/checksum resource handler, computing a checksum
of the guest visible data.
Here is an example usage:
$ time curl -k https://localhost:54322/images/nbd/checksum
{"checksum": "2df96976518821e000fcf92fae0bfc6a7fb5b2d2", "algorithm": "sha1"}
real 0m7.737s
user 0m0.009s
sys 0m0.009s
Comparing to raw image with same content:
$ time sha1sum /var/tmp/disk.raw
2df96976518821e000fcf92fae0bfc6a7fb5b2d2 /var/tmp/disk.raw
real 0m6.912s
user 0m5.900s
sys 0m0.991s
It possible to use any of the algorithms supported by python. To query
the supported values use:
$ curl -k https://localhost:54322/images/nbd/checksum/algorithms
{"algorithms": ["blake2b", "blake2s", "md5", "sha1", "sha224",
"sha256", "sha384", "sha3_224", "sha3_256", "sha3_384", "sha3_512",
"sha512", "shake_128", "shake_256"]}
And use any of the returned values:
$ time curl -k https://localhost:54322/images/nbd/checksum?algorithm=sha256
{"checksum": "c4e2cd5e5d77a5890d888a5b25eed2813ab3678b8c5e47dc6d219bf44b3f41d7", "algorithm": "sha256"}
real 0m14.834s
user 0m0.011s
sys 0m0.004s
To allow comparing checksum with local images in any image format, add
client.checksum(). Here are few examples, using identical images in
different formats:
$ python
>>> from ovirt_imageio import client
>>> client.checksum("/var/tmp/disk.raw")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
>>> client.checksum("/var/tmp/disk.qcow2")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
>>> client.checksum("/var/tmp/compressed.qcow2")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
Like info() and measure() it also works for images inside ova file:
>>> client.checksum("/var/tmp/vm.ova", member="disk.qcow2")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
Computing checksums will not work for uploaded raw disks which were not
aligned to storage minimum alignment (4k or 128m). In this case oVirt
extends the disk size and the disk checksum will not match the original
file checksum.
Change-Id: I2a0e2e235d2767cc094e6d107f15f1a8f1695123
Bug-Url: https://bugzilla.redhat.com/1787906
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-07-10 16:21:10 +03:00
@pytest.mark.parametrize ( " fmt, compressed " , [
( " raw " , False ) ,
( " qcow2 " , False ) ,
( " qcow2 " , True ) ,
] )
def test_checksum_from_ova ( tmpdir , fmt , compressed ) :
# Create temporary file with some data.
size = 2 * 1024 * * 2
tmp = str ( tmpdir . join ( " tmp " ) )
with open ( tmp , " wb " ) as f :
f . truncate ( size )
f . write ( b " x " * CLUSTER_SIZE )
# Create test image from temporary file.
img = str ( tmpdir . join ( " img " ) )
qemu_img . convert ( tmp , img , " raw " , fmt , compressed = compressed )
# Add test image to ova.
member = os . path . basename ( img )
ova = str ( tmpdir . join ( " ova " ) )
with tarfile . open ( ova , " w " ) as tar :
tar . add ( img , arcname = member )
checksum: Optimize zero hashing
Use block based hashing algorithm:
H( H(block 1) + H(block 2) + ... + H(block N) )
This is basically creating a hash list[1], and using the root hash as
the result. The algorithm is similar to eD2k hash algorithm[2], but we
support any algorithm and block size. The default algorithm is
blake2b[3] and block size is 4 MiB. blake2b is as fast as sha-1, but
secure at least as sha-3.
When we don't have extents information, for example when using
preallocated image, or sparse image on storage that does not report
sparseness information (e.g. NFS < 4.2, GlusterFS with sharding) we
detect zero blocks and optimize hashing. Checksum calculation time is
limited by storage read throughput.
If we have extent information, we can compute the hash for zero blocks
without reading anything from storage, speeding up the calculation
dramatically.
When hashing zero blocks, instead of hashing entire block (4 MiB) we
hash a precomputed digest bytes (32 bytes). This is up to 131072 times
faster.
Since the checksum depends on the block size, the response includes now
also the block size:
$ curl -k https://localhost:54322/images/nbd/checksum | jq
{
"checksum": "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131",
"algorithm": "blake2b",
"block_size": 4194304
}
To compare the checksum to a local file, you must use the same algorithm
and block_size:
>> from ovirt_imageio import client
>> client.checksum("disk.img", block_size=4194304, algorithm="blake2b")
"061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131"
To compare to a pre-computed checksum, the caller can specify the
block_size using q query parameter:
$ curl -k https://localhost:54322/images/nbd/checksum?block_size=2097152 | jq
{
"checksum": "777b9c2f6598d503d43c14a39b31cdd8aee9f48b475f2af0f4c668e33297016c",
"algorithm": "blake2b",
"block_size": 2097152
}
Here are initial results:
tool fedora-32[4] full-6g[5] empty-6g[6] empty-100g[7] empty-1t[8]
----------------------------------------------------------------------------
checksum 2.84 3.04 0.03 0.06 0.28
b2sum[9] 8.29 8.42 8.48 161.00 1648.64
[1] https://en.wikipedia.org/wiki/Hash_list
[2] https://en.wikipedia.org/wiki/Ed2k_URI_scheme#eD2k_hash_algorithm
[3] https://blake2.net/
[4] virt-builder fedora-32 -o fedora-32.raw --root-password=password:root
[5] dd if=/zero/zero bs=8M count=768 of=full-6g.raw
[6] truncate -s 6g empty-6g.raw
[7] truncate -s 100g empty-100g.raw
[8] truncate -s 1t empty-1t.raw
[9] b2sum --length 256 {path}
Change-Id: I0661daf6a36e3eee57ef54128782e2a9aa11943e
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-08-12 03:07:43 +03:00
expected = blkhash . checksum ( tmp , block_size = 1024 * * 2 )
actual = client . checksum ( ova , member = member , block_size = 1024 * * 2 )
assert actual == expected
daemon: Compute checksum for remote images
Users like to verify uploaded images checksums. This cannot be done
using standard tools like shasum since it does not understand sparseness
or qcow2 format, but we can support this using using the nbd backend.
Add /images/{ticket-id}/checksum resource handler, computing a checksum
of the guest visible data.
Here is an example usage:
$ time curl -k https://localhost:54322/images/nbd/checksum
{"checksum": "2df96976518821e000fcf92fae0bfc6a7fb5b2d2", "algorithm": "sha1"}
real 0m7.737s
user 0m0.009s
sys 0m0.009s
Comparing to raw image with same content:
$ time sha1sum /var/tmp/disk.raw
2df96976518821e000fcf92fae0bfc6a7fb5b2d2 /var/tmp/disk.raw
real 0m6.912s
user 0m5.900s
sys 0m0.991s
It possible to use any of the algorithms supported by python. To query
the supported values use:
$ curl -k https://localhost:54322/images/nbd/checksum/algorithms
{"algorithms": ["blake2b", "blake2s", "md5", "sha1", "sha224",
"sha256", "sha384", "sha3_224", "sha3_256", "sha3_384", "sha3_512",
"sha512", "shake_128", "shake_256"]}
And use any of the returned values:
$ time curl -k https://localhost:54322/images/nbd/checksum?algorithm=sha256
{"checksum": "c4e2cd5e5d77a5890d888a5b25eed2813ab3678b8c5e47dc6d219bf44b3f41d7", "algorithm": "sha256"}
real 0m14.834s
user 0m0.011s
sys 0m0.004s
To allow comparing checksum with local images in any image format, add
client.checksum(). Here are few examples, using identical images in
different formats:
$ python
>>> from ovirt_imageio import client
>>> client.checksum("/var/tmp/disk.raw")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
>>> client.checksum("/var/tmp/disk.qcow2")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
>>> client.checksum("/var/tmp/compressed.qcow2")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
Like info() and measure() it also works for images inside ova file:
>>> client.checksum("/var/tmp/vm.ova", member="disk.qcow2")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
Computing checksums will not work for uploaded raw disks which were not
aligned to storage minimum alignment (4k or 128m). In this case oVirt
extends the disk size and the disk checksum will not match the original
file checksum.
Change-Id: I2a0e2e235d2767cc094e6d107f15f1a8f1695123
Bug-Url: https://bugzilla.redhat.com/1787906
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-07-10 16:21:10 +03:00
checksum: Optimize zero hashing
Use block based hashing algorithm:
H( H(block 1) + H(block 2) + ... + H(block N) )
This is basically creating a hash list[1], and using the root hash as
the result. The algorithm is similar to eD2k hash algorithm[2], but we
support any algorithm and block size. The default algorithm is
blake2b[3] and block size is 4 MiB. blake2b is as fast as sha-1, but
secure at least as sha-3.
When we don't have extents information, for example when using
preallocated image, or sparse image on storage that does not report
sparseness information (e.g. NFS < 4.2, GlusterFS with sharding) we
detect zero blocks and optimize hashing. Checksum calculation time is
limited by storage read throughput.
If we have extent information, we can compute the hash for zero blocks
without reading anything from storage, speeding up the calculation
dramatically.
When hashing zero blocks, instead of hashing entire block (4 MiB) we
hash a precomputed digest bytes (32 bytes). This is up to 131072 times
faster.
Since the checksum depends on the block size, the response includes now
also the block size:
$ curl -k https://localhost:54322/images/nbd/checksum | jq
{
"checksum": "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131",
"algorithm": "blake2b",
"block_size": 4194304
}
To compare the checksum to a local file, you must use the same algorithm
and block_size:
>> from ovirt_imageio import client
>> client.checksum("disk.img", block_size=4194304, algorithm="blake2b")
"061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131"
To compare to a pre-computed checksum, the caller can specify the
block_size using q query parameter:
$ curl -k https://localhost:54322/images/nbd/checksum?block_size=2097152 | jq
{
"checksum": "777b9c2f6598d503d43c14a39b31cdd8aee9f48b475f2af0f4c668e33297016c",
"algorithm": "blake2b",
"block_size": 2097152
}
Here are initial results:
tool fedora-32[4] full-6g[5] empty-6g[6] empty-100g[7] empty-1t[8]
----------------------------------------------------------------------------
checksum 2.84 3.04 0.03 0.06 0.28
b2sum[9] 8.29 8.42 8.48 161.00 1648.64
[1] https://en.wikipedia.org/wiki/Hash_list
[2] https://en.wikipedia.org/wiki/Ed2k_URI_scheme#eD2k_hash_algorithm
[3] https://blake2.net/
[4] virt-builder fedora-32 -o fedora-32.raw --root-password=password:root
[5] dd if=/zero/zero bs=8M count=768 of=full-6g.raw
[6] truncate -s 6g empty-6g.raw
[7] truncate -s 100g empty-100g.raw
[8] truncate -s 1t empty-1t.raw
[9] b2sum --length 256 {path}
Change-Id: I0661daf6a36e3eee57ef54128782e2a9aa11943e
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-08-12 03:07:43 +03:00
@pytest.mark.parametrize ( " algorithm,digest_size " , [
( " blake2b " , 32 ) ,
( " sha1 " , None ) ,
] )
def test_checksum_algorithm ( tmpdir , algorithm , digest_size ) :
daemon: Compute checksum for remote images
Users like to verify uploaded images checksums. This cannot be done
using standard tools like shasum since it does not understand sparseness
or qcow2 format, but we can support this using using the nbd backend.
Add /images/{ticket-id}/checksum resource handler, computing a checksum
of the guest visible data.
Here is an example usage:
$ time curl -k https://localhost:54322/images/nbd/checksum
{"checksum": "2df96976518821e000fcf92fae0bfc6a7fb5b2d2", "algorithm": "sha1"}
real 0m7.737s
user 0m0.009s
sys 0m0.009s
Comparing to raw image with same content:
$ time sha1sum /var/tmp/disk.raw
2df96976518821e000fcf92fae0bfc6a7fb5b2d2 /var/tmp/disk.raw
real 0m6.912s
user 0m5.900s
sys 0m0.991s
It possible to use any of the algorithms supported by python. To query
the supported values use:
$ curl -k https://localhost:54322/images/nbd/checksum/algorithms
{"algorithms": ["blake2b", "blake2s", "md5", "sha1", "sha224",
"sha256", "sha384", "sha3_224", "sha3_256", "sha3_384", "sha3_512",
"sha512", "shake_128", "shake_256"]}
And use any of the returned values:
$ time curl -k https://localhost:54322/images/nbd/checksum?algorithm=sha256
{"checksum": "c4e2cd5e5d77a5890d888a5b25eed2813ab3678b8c5e47dc6d219bf44b3f41d7", "algorithm": "sha256"}
real 0m14.834s
user 0m0.011s
sys 0m0.004s
To allow comparing checksum with local images in any image format, add
client.checksum(). Here are few examples, using identical images in
different formats:
$ python
>>> from ovirt_imageio import client
>>> client.checksum("/var/tmp/disk.raw")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
>>> client.checksum("/var/tmp/disk.qcow2")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
>>> client.checksum("/var/tmp/compressed.qcow2")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
Like info() and measure() it also works for images inside ova file:
>>> client.checksum("/var/tmp/vm.ova", member="disk.qcow2")
'2df96976518821e000fcf92fae0bfc6a7fb5b2d2'
Computing checksums will not work for uploaded raw disks which were not
aligned to storage minimum alignment (4k or 128m). In this case oVirt
extends the disk size and the disk checksum will not match the original
file checksum.
Change-Id: I2a0e2e235d2767cc094e6d107f15f1a8f1695123
Bug-Url: https://bugzilla.redhat.com/1787906
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-07-10 16:21:10 +03:00
img = str ( tmpdir . join ( " img " ) )
qemu_img . create ( img , " raw " , size = " 2m " )
checksum: Optimize zero hashing
Use block based hashing algorithm:
H( H(block 1) + H(block 2) + ... + H(block N) )
This is basically creating a hash list[1], and using the root hash as
the result. The algorithm is similar to eD2k hash algorithm[2], but we
support any algorithm and block size. The default algorithm is
blake2b[3] and block size is 4 MiB. blake2b is as fast as sha-1, but
secure at least as sha-3.
When we don't have extents information, for example when using
preallocated image, or sparse image on storage that does not report
sparseness information (e.g. NFS < 4.2, GlusterFS with sharding) we
detect zero blocks and optimize hashing. Checksum calculation time is
limited by storage read throughput.
If we have extent information, we can compute the hash for zero blocks
without reading anything from storage, speeding up the calculation
dramatically.
When hashing zero blocks, instead of hashing entire block (4 MiB) we
hash a precomputed digest bytes (32 bytes). This is up to 131072 times
faster.
Since the checksum depends on the block size, the response includes now
also the block size:
$ curl -k https://localhost:54322/images/nbd/checksum | jq
{
"checksum": "061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131",
"algorithm": "blake2b",
"block_size": 4194304
}
To compare the checksum to a local file, you must use the same algorithm
and block_size:
>> from ovirt_imageio import client
>> client.checksum("disk.img", block_size=4194304, algorithm="blake2b")
"061bbe365935437440f7372204b85acc4bfb76fe3fc20347a20b788bf445c131"
To compare to a pre-computed checksum, the caller can specify the
block_size using q query parameter:
$ curl -k https://localhost:54322/images/nbd/checksum?block_size=2097152 | jq
{
"checksum": "777b9c2f6598d503d43c14a39b31cdd8aee9f48b475f2af0f4c668e33297016c",
"algorithm": "blake2b",
"block_size": 2097152
}
Here are initial results:
tool fedora-32[4] full-6g[5] empty-6g[6] empty-100g[7] empty-1t[8]
----------------------------------------------------------------------------
checksum 2.84 3.04 0.03 0.06 0.28
b2sum[9] 8.29 8.42 8.48 161.00 1648.64
[1] https://en.wikipedia.org/wiki/Hash_list
[2] https://en.wikipedia.org/wiki/Ed2k_URI_scheme#eD2k_hash_algorithm
[3] https://blake2.net/
[4] virt-builder fedora-32 -o fedora-32.raw --root-password=password:root
[5] dd if=/zero/zero bs=8M count=768 of=full-6g.raw
[6] truncate -s 6g empty-6g.raw
[7] truncate -s 100g empty-100g.raw
[8] truncate -s 1t empty-1t.raw
[9] b2sum --length 256 {path}
Change-Id: I0661daf6a36e3eee57ef54128782e2a9aa11943e
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2020-08-12 03:07:43 +03:00
expected = blkhash . checksum (
img , block_size = 1024 * * 2 , algorithm = algorithm , digest_size = digest_size )
actual = client . checksum ( img , block_size = 1024 * * 2 , algorithm = algorithm )
assert actual == expected
2020-10-16 22:58:05 +03:00
def test_zero_extents_raw ( tmpdir ) :
size = 10 * 1024 * * 2
# Create image with some data, zero and holes.
image = str ( tmpdir . join ( " image.raw " ) )
qemu_img . create ( image , " raw " , size = size )
with qemu_nbd . open ( image , " raw " ) as c :
c . write ( 0 * CLUSTER_SIZE , b " A " * CLUSTER_SIZE )
c . zero ( 1 * CLUSTER_SIZE , CLUSTER_SIZE )
c . write ( 2 * CLUSTER_SIZE , b " B " * CLUSTER_SIZE )
c . flush ( )
extents = list ( client . extents ( image ) )
nbd: Use qemu:allocation-depth meta context
Since qemu 5.2.0 qemu-nbd provides the --allocation-depth option, adding
new meta context "qemu:allocation-depth"[1]. This context provides
reliable information about extent allocation, unlike "base:allocation"
NBD_STATE_HOLE bit, which is optional.
Allocation depth reports 32 bit value for every extents:
- 0: Extent does not exist. If the image has a backing chain, the
content is determined by the backing chain.
- 1: Extent is part of the top layer in the backing chain
- N > 1: Extent is part of layer N.
We don't care about the specific depth, only about non existing extents,
which we want to report as "hole" in /extents response. When copying
single qcow2 image, we *must* skip these holes and leave them as
unallocated area in the target image.
Extent have now an EXTENT_BACKING bit, marking non-exiting extents
(depth=0) that expose information from the backing chain. This bit is
used to report a hole, instead of NBD_STATE_HOLE bit. See [3] for more
info on this.
A bad side effect of this change, is that we again cannot report holes
in raw image, since raw image always reports depth=1. However this is
not a practical issue for users.
With this change, the tests broken with qemu-6.0.0 are fixed, and silent
data corruption caused by change in qemu behavior[2] is avoided.
Our backup helper module uses legacy APIs, so "qemu:allocation-depth" is
not available yet when testing backup flows..
[1] https://github.com/qemu/qemu/blob/master/docs/interop/nbd.txt
[2] https://bugzilla.redhat.com/1968693
[3] https://lists.nongnu.org/archive/html/qemu-block/2021-06/msg00756.html
Change-Id: Ibc3d285d967516533a6e925d6ca7299750c85b09
Bug-Url: https://bugzilla.redhat.com/1971182
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2021-06-12 20:32:02 +03:00
# Unallocated area in raw image is not reported as a hole.
2020-10-16 22:58:05 +03:00
assert extents == [
ZeroExtent (
start = 0 * CLUSTER_SIZE ,
length = CLUSTER_SIZE ,
zero = False ,
hole = False ) ,
ZeroExtent (
start = 1 * CLUSTER_SIZE ,
length = CLUSTER_SIZE ,
zero = True ,
nbd: Use qemu:allocation-depth meta context
Since qemu 5.2.0 qemu-nbd provides the --allocation-depth option, adding
new meta context "qemu:allocation-depth"[1]. This context provides
reliable information about extent allocation, unlike "base:allocation"
NBD_STATE_HOLE bit, which is optional.
Allocation depth reports 32 bit value for every extents:
- 0: Extent does not exist. If the image has a backing chain, the
content is determined by the backing chain.
- 1: Extent is part of the top layer in the backing chain
- N > 1: Extent is part of layer N.
We don't care about the specific depth, only about non existing extents,
which we want to report as "hole" in /extents response. When copying
single qcow2 image, we *must* skip these holes and leave them as
unallocated area in the target image.
Extent have now an EXTENT_BACKING bit, marking non-exiting extents
(depth=0) that expose information from the backing chain. This bit is
used to report a hole, instead of NBD_STATE_HOLE bit. See [3] for more
info on this.
A bad side effect of this change, is that we again cannot report holes
in raw image, since raw image always reports depth=1. However this is
not a practical issue for users.
With this change, the tests broken with qemu-6.0.0 are fixed, and silent
data corruption caused by change in qemu behavior[2] is avoided.
Our backup helper module uses legacy APIs, so "qemu:allocation-depth" is
not available yet when testing backup flows..
[1] https://github.com/qemu/qemu/blob/master/docs/interop/nbd.txt
[2] https://bugzilla.redhat.com/1968693
[3] https://lists.nongnu.org/archive/html/qemu-block/2021-06/msg00756.html
Change-Id: Ibc3d285d967516533a6e925d6ca7299750c85b09
Bug-Url: https://bugzilla.redhat.com/1971182
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2021-06-12 20:32:02 +03:00
hole = False ) ,
2020-10-16 22:58:05 +03:00
ZeroExtent (
start = 2 * CLUSTER_SIZE ,
length = CLUSTER_SIZE ,
zero = False ,
hole = False ) ,
ZeroExtent (
start = 3 * CLUSTER_SIZE ,
length = size - 3 * CLUSTER_SIZE ,
zero = True ,
nbd: Use qemu:allocation-depth meta context
Since qemu 5.2.0 qemu-nbd provides the --allocation-depth option, adding
new meta context "qemu:allocation-depth"[1]. This context provides
reliable information about extent allocation, unlike "base:allocation"
NBD_STATE_HOLE bit, which is optional.
Allocation depth reports 32 bit value for every extents:
- 0: Extent does not exist. If the image has a backing chain, the
content is determined by the backing chain.
- 1: Extent is part of the top layer in the backing chain
- N > 1: Extent is part of layer N.
We don't care about the specific depth, only about non existing extents,
which we want to report as "hole" in /extents response. When copying
single qcow2 image, we *must* skip these holes and leave them as
unallocated area in the target image.
Extent have now an EXTENT_BACKING bit, marking non-exiting extents
(depth=0) that expose information from the backing chain. This bit is
used to report a hole, instead of NBD_STATE_HOLE bit. See [3] for more
info on this.
A bad side effect of this change, is that we again cannot report holes
in raw image, since raw image always reports depth=1. However this is
not a practical issue for users.
With this change, the tests broken with qemu-6.0.0 are fixed, and silent
data corruption caused by change in qemu behavior[2] is avoided.
Our backup helper module uses legacy APIs, so "qemu:allocation-depth" is
not available yet when testing backup flows..
[1] https://github.com/qemu/qemu/blob/master/docs/interop/nbd.txt
[2] https://bugzilla.redhat.com/1968693
[3] https://lists.nongnu.org/archive/html/qemu-block/2021-06/msg00756.html
Change-Id: Ibc3d285d967516533a6e925d6ca7299750c85b09
Bug-Url: https://bugzilla.redhat.com/1971182
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
2021-06-12 20:32:02 +03:00
hole = False ) ,
2020-10-16 22:58:05 +03:00
]
def test_zero_extents_qcow2 ( tmpdir ) :
size = 10 * 1024 * * 2
# Create base image with one data and one zero cluster.
base = str ( tmpdir . join ( " base.qcow2 " ) )
qemu_img . create ( base , " qcow2 " , size = size )
with qemu_nbd . open ( base , " qcow2 " ) as c :
c . write ( 0 * CLUSTER_SIZE , b " A " * CLUSTER_SIZE )
c . zero ( 1 * CLUSTER_SIZE , CLUSTER_SIZE )
c . flush ( )
# Create top image with one data and one zero cluster.
top = str ( tmpdir . join ( " top.qcow2 " ) )
qemu_img . create (
top , " qcow2 " , backing_file = base , backing_format = " qcow2 " )
with qemu_nbd . open ( top , " qcow2 " ) as c :
c . write ( 3 * CLUSTER_SIZE , b " B " * CLUSTER_SIZE )
c . zero ( 4 * CLUSTER_SIZE , CLUSTER_SIZE )
c . flush ( )
extents = list ( client . extents ( top ) )
assert extents == [
# Extents from base...
ZeroExtent (
start = 0 * CLUSTER_SIZE ,
length = CLUSTER_SIZE ,
zero = False ,
hole = False ) ,
ZeroExtent (
start = 1 * CLUSTER_SIZE ,
length = CLUSTER_SIZE ,
zero = True ,
hole = False ) ,
ZeroExtent (
start = 2 * CLUSTER_SIZE ,
length = CLUSTER_SIZE ,
zero = True ,
hole = True ) ,
# Extents from top...
ZeroExtent (
start = 3 * CLUSTER_SIZE ,
length = CLUSTER_SIZE ,
zero = False ,
hole = False ) ,
ZeroExtent (
start = 4 * CLUSTER_SIZE ,
length = CLUSTER_SIZE ,
zero = True ,
hole = False ) ,
# Rest of unallocated data...
ZeroExtent (
start = 5 * CLUSTER_SIZE ,
length = size - 5 * CLUSTER_SIZE ,
zero = True ,
hole = True ) ,
]
def test_zero_extents_from_ova ( tmpdir ) :
size = 10 * 1024 * * 2
# Create image with data, zero and hole clusters.
disk = str ( tmpdir . join ( " disk.qcow2 " ) )
qemu_img . create ( disk , " qcow2 " , size = size )
with qemu_nbd . open ( disk , " qcow2 " ) as c :
c . write ( 0 * CLUSTER_SIZE , b " A " * CLUSTER_SIZE )
c . zero ( 1 * CLUSTER_SIZE , CLUSTER_SIZE )
c . flush ( )
# Create OVA whith this image.
ova = str ( tmpdir . join ( " vm.ova " ) )
with tarfile . open ( ova , " w " ) as tar :
tar . add ( disk , arcname = os . path . basename ( disk ) )
extents = list ( client . extents ( ova , member = " disk.qcow2 " ) )
assert extents == [
ZeroExtent (
start = 0 * CLUSTER_SIZE ,
length = CLUSTER_SIZE ,
zero = False ,
hole = False ) ,
2021-06-06 16:09:36 +03:00
# Broken since qemu-nbd 6.0.0.
2020-10-16 22:58:05 +03:00
ZeroExtent (
start = 1 * CLUSTER_SIZE ,
length = CLUSTER_SIZE ,
zero = True ,
hole = False ) ,
2021-06-06 16:09:36 +03:00
2020-10-16 22:58:05 +03:00
ZeroExtent (
start = 2 * CLUSTER_SIZE ,
length = size - 2 * CLUSTER_SIZE ,
zero = True ,
hole = True ) ,
]
2020-10-17 02:17:18 +03:00
def test_dirty_extents ( tmpdir ) :
size = 1024 * * 2
# Create base image with empty dirty bitmap.
base = str ( tmpdir . join ( " base.qcow2 " ) )
qemu_img . create ( base , " qcow2 " , size = size )
qemu_img . bitmap_add ( base , " b0 " )
# Write data, modifying the dirty bitmap.
with qemu_nbd . open ( base , " qcow2 " ) as c :
c . write ( 0 * CLUSTER_SIZE , b " A " * CLUSTER_SIZE )
c . zero ( 1 * CLUSTER_SIZE , CLUSTER_SIZE )
c . flush ( )
# Create top image with empty dirty bitmap.
top = str ( tmpdir . join ( " top.qcow2 " ) )
qemu_img . create ( top , " qcow2 " , backing_file = base , backing_format = " qcow2 " )
qemu_img . bitmap_add ( top , " b0 " )
# Write data, modifying the dirty bitmap.
with qemu_nbd . open ( top , " qcow2 " ) as c :
c . write ( 3 * CLUSTER_SIZE , b " B " * CLUSTER_SIZE )
c . zero ( 4 * CLUSTER_SIZE , CLUSTER_SIZE )
c . flush ( )
dirty_extents = list ( client . extents ( base , bitmap = " b0 " ) )
2021-06-16 00:21:34 +03:00
expected = [
# First cluster is dirty data.
2020-10-17 02:17:18 +03:00
DirtyExtent (
start = 0 * CLUSTER_SIZE ,
2021-06-16 00:21:34 +03:00
length = 1 * CLUSTER_SIZE ,
dirty = True ,
zero = False ) ,
# Second cluster is dirty zero.
DirtyExtent (
start = 1 * CLUSTER_SIZE ,
length = 1 * CLUSTER_SIZE ,
dirty = True ,
zero = True ) ,
# Third cluster is clean zero.
2020-10-17 02:17:18 +03:00
DirtyExtent (
start = 2 * CLUSTER_SIZE ,
length = size - 2 * CLUSTER_SIZE ,
2021-06-16 00:21:34 +03:00
dirty = False ,
zero = True ) ,
2020-10-17 02:17:18 +03:00
]
2021-06-16 00:21:34 +03:00
log . debug ( " base image dirty extents: %s " , dirty_extents )
assert dirty_extents == expected
2020-10-17 02:17:18 +03:00
dirty_extents = list ( client . extents ( top , bitmap = " b0 " ) )
2021-06-16 00:21:34 +03:00
# Note: qemu-nbd reports dirty extents only for the top image, but zero
# extents are read from the base image.
expected = [
# First cluster is clean data, read from base image.
2020-10-17 02:17:18 +03:00
DirtyExtent (
start = 0 * CLUSTER_SIZE ,
2021-06-16 00:21:34 +03:00
length = 1 * CLUSTER_SIZE ,
dirty = False ,
zero = False ) ,
# Second and third clusters are read from base image. Because they are
# both clean zero, they are merged.
2020-10-17 02:17:18 +03:00
DirtyExtent (
2021-06-16 00:21:34 +03:00
start = 1 * CLUSTER_SIZE ,
2020-10-17 02:17:18 +03:00
length = 2 * CLUSTER_SIZE ,
2021-06-16 00:21:34 +03:00
dirty = False ,
zero = True ) ,
# Forth cluster is a data extent modified in top image.
DirtyExtent (
start = 3 * CLUSTER_SIZE ,
length = 1 * CLUSTER_SIZE ,
dirty = True ,
zero = False ) ,
# Fifth cluster is a zero extent modifed in to image.
DirtyExtent (
start = 4 * CLUSTER_SIZE ,
length = 1 * CLUSTER_SIZE ,
dirty = True ,
zero = True ) ,
# The rest is clean zero extent.
2020-10-17 02:17:18 +03:00
DirtyExtent (
start = 5 * CLUSTER_SIZE ,
length = size - 5 * CLUSTER_SIZE ,
2021-06-16 00:21:34 +03:00
dirty = False ,
zero = True ) ,
2020-10-17 02:17:18 +03:00
]
2021-06-16 00:21:34 +03:00
log . debug ( " top image dirty extents: %s " , dirty_extents )
assert dirty_extents == expected
2022-01-19 01:44:37 +02:00
@pytest.mark.parametrize ( " fmt " , [ " raw " , " qcow2 " ] )
def test_stress ( srv , nbd_server , tmpdir , fmt ) :
size = 10 * 1024 * * 2
# Create empty source and destination images.
src = str ( tmpdir . join ( " src. " + fmt ) )
qemu_img . create ( src , fmt , size = size )
dst = str ( tmpdir . join ( " dst. " + fmt ) )
qemu_img . create ( dst , fmt , size = size )
# Upload and download the image multiple times. This used to fail randomly
# when the executor closed the destination backend before it was cloned by
# the workers.
nbd_server . image = dst
nbd_server . fmt = fmt
nbd_server . start ( )
url = prepare_transfer ( srv , nbd_server . sock . url ( ) , size = size )
for i in range ( 20 ) :
client . upload ( src , url , srv . config . tls . ca_file )
client . download ( url , src , srv . config . tls . ca_file , fmt = fmt )
nbd_server . stop ( )
2022-03-21 01:18:07 +02:00
def test_concurrent_downloads ( srv , tmpdir ) :
# Testing that we can serve 10 conccurent transfers, assuming 4
# connections per client.
# https://bugzilla.redhat.com/2066113
size = 10 * 1024 * * 2
def download ( url , dst ) :
client . download ( url , dst , srv . config . tls . ca_file )
downloads = [ ]
try :
for i in range ( 10 ) :
src = str ( tmpdir . join ( f " { i : 02d } .src.qcow2 " ) )
qemu_img . create ( src , " qcow2 " , size = size )
dst = str ( tmpdir . join ( f " { i : 02d } .dst.qcow2 " ) )
qemu_img . create ( dst , " qcow2 " , size = size )
sock = str ( tmpdir . join ( f " { i : 02d } .sock " ) )
nbd_server = qemu_nbd . Server ( src , " qcow2 " , nbd . UnixAddress ( sock ) )
url = prepare_transfer ( srv , nbd_server . sock . url ( ) , size = size )
nbd_server . start ( )
downloads . append ( ( nbd_server , url , dst ) )
with ThreadPoolExecutor ( max_workers = 10 ) as executor :
tasks = [ executor . submit ( download , url , dst )
for _ , url , dst in downloads ]
for t in as_completed ( tasks ) :
t . result ( )
finally :
for nbd_server , _ , _ in downloads :
nbd_server . stop ( )