ovirt-imageio/test/blkhash_test.py

# SPDX-FileCopyrightText: Red Hat, Inc.
# SPDX-License-Identifier: GPL-2.0-or-later

import hashlib
import pytest
from functools import partial

from ovirt_imageio._internal import blkhash

default_hash = partial(hashlib.blake2b, digest_size=32)


def test_algorithm_basic():
    h = default_hash()
    for i in range(10):
        block = (b"%02d\n" % i).ljust(blkhash.BLOCK_SIZE, b"\0")
        block_digest = default_hash(block).digest()
        h.update(block_digest)
    assert h.hexdigest() == (
        "7934079f80b53142d738d2bb7efaedf696a3d34d76a7865a24130bc7b4a7acfe"
    )


def test_algorithm_zero_optimization():
    # Hash the entire image. This is the case when we don't have extent
    # information, and do not use zero detection.
    zero_block = b"\0" * blkhash.BLOCK_SIZE
    h1 = default_hash()
    for i in range(10):
        block_digest = default_hash(zero_block).digest()
        h1.update(block_digest)

    # Hash a pre-computed digest instead of the actual bytes. Here we either
    # have extent information, or we detected that the blocks are zero blocks.
    h2 = default_hash()
    block_digest = default_hash(zero_block).digest()
    for i in range(10):
        h2.update(block_digest)

    # We must get the same checksum in both cases.
    assert h1.hexdigest() == h2.hexdigest()


def test_hasher_data():
    h1 = blkhash.Hash()
    for i in range(10):
        block = (b"%02d\n" % i).ljust(blkhash.BLOCK_SIZE, b"\0")
        h1.update(block)

    h2 = default_hash()
    for i in range(10):
        block = (b"%02d\n" % i).ljust(blkhash.BLOCK_SIZE, b"\0")
        block_digest = default_hash(block).digest()
        h2.update(block_digest)

    assert h1.hexdigest() == h2.hexdigest()


def test_hasher_zero():
    block = b"\0" * blkhash.BLOCK_SIZE

    h1 = blkhash.Hash()
    h1.update(block)
    h1.update(block)

    h2 = blkhash.Hash()
    h2.zero(len(block))
    h2.zero(len(block))

    assert h1.hexdigest() == h2.hexdigest()


@pytest.mark.parametrize("size,algorithm,digest_size,checksum", [
    # Files aligned to block size.
    (4 * 1024**2, "blake2b", 32,
        "f426bb2cf1e1901fe4e87423950944ecfed6d9d18a09e6e802aa4912e1c9b2d6"),
    (4 * 1024**2, "sha1", None,
        "3ed03b375b6658d99b63ced1867a95aeef080b79"),
    # Files not aligned to block size.
    (3 * 1024**2, "blake2b", 32,
        "42f3b76772a6d3dcffae2a24697721687975e2c60ddfd4ba7831ea9ce772ca71"),
    (3 * 1024**2, "sha1", None,
        "6cba43b908381be45a55ab9b4361f8370b928354"),
    (5 * 1024**2, "blake2b", 32,
        "0da53b583fc1fbbac7edea14454c79f84a8107613e614f2c7a47071dfdcf41a6"),
    (5 * 1024**2, "sha1", None,
        "d3936edd8e3a8ff10e8257a9f460d8da67838549"),
    # Empty file.
    (0, "blake2b", 32,
        "0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8"),
    (0, "sha1", None,
        "da39a3ee5e6b4b0d3255bfef95601890afd80709"),
])
def test_checksum(tmpdir, size, algorithm, digest_size, checksum):
    path = str(tmpdir.join("file"))

    with open(path, "wb") as f:
        f.write(b"data")
        f.truncate(size)

    actual = blkhash.checksum(
        path,
        block_size=blkhash.BLOCK_SIZE,
        algorithm=algorithm,
        digest_size=digest_size)

    assert actual == {
        "algorithm": algorithm,
        "block_size": blkhash.BLOCK_SIZE,
        "checksum": checksum,
    }