From aae596ee1813fa3ca272ea5f3125714bb6866500 Mon Sep 17 00:00:00 2001
From: Dominik Csapak <d.csapak@proxmox.com>
Date: Mon, 5 Aug 2024 11:24:13 +0200
Subject: [PATCH] datastore: data blob: increase compression throughput

Increase the zstd compression throughput by not using the
`zstd::stream::copy_encode` method, because it seems it uses an
internal buffer size of 32 KiB [0], copies at least once extra in the
target buffer and might have some additional (allocation and/or
syscall) overhead. Due to the amount of wrappers and indirections it's
a bit hard to tell for sure.  In anyway, there can be a reduced
throughput observed if all, the target and source storage and the
network are so fast that the operations from creating chunks, like
compressions, can become the bottleneck.

Instead use the lower-level `zstd_safe::compress` which avoids (big)
allocations, since we provide the target buffer.

In case of a compression error just return the uncompressed data,
there's nothing we can do and saving uncompressed data is better than
having none. Additionally, log any such error besides the one for the
target buffer being too small.

Some benchmarks on my machine (Intel i7-12700K with DDR5-4800 memory
using a ASUS Prime Z690-A motherboard) from a tmpfs to a datastore on
tmpfs:

Type                without patches (MiB/s)  with patches (MiB/s)
.img file           ~614                     ~767
pxar one big file   ~657                     ~807
pxar small files    ~576                     ~627

The new approach is faster by a factor of 1.19.

Note that the new approach should not have a measurable negative
impact, e.g. (peak) memory usage wise. That is because we always
reserved a vector with max-data-size (data length + header length) and
thus did not have to add a new buffer, rather we actually removed the
buffer that the high-level zstd wrapper crate used internally.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
 pbs-datastore/src/data_blob.rs | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)
diff --git a/pbs-datastore/src/data_blob.rs b/pbs-datastore/src/data_blob.rs
index ec948bfe..40bab7f6 100644
--- a/pbs-datastore/src/data_blob.rs
+++ b/pbs-datastore/src/data_blob.rs
@@ -136,39 +136,40 @@ impl DataBlob {
 
             DataBlob { raw_data }
         } else {
-            let max_data_len = data.len() + std::mem::size_of::<DataBlobHeader>();
+            let header_len = std::mem::size_of::<DataBlobHeader>();
+            let max_data_len = data.len() + header_len;
+            let mut raw_data = vec![0; max_data_len];
             if compress {
-                let mut comp_data = Vec::with_capacity(max_data_len);
-
                 let head = DataBlobHeader {
                     magic: COMPRESSED_BLOB_MAGIC_1_0,
                     crc: [0; 4],
                 };
                 unsafe {
-                    comp_data.write_le_value(head)?;
+                    (&mut raw_data[0..header_len]).write_le_value(head)?;
                 }
 
-                zstd::stream::copy_encode(data, &mut comp_data, 1)?;
-
-                if comp_data.len() < max_data_len {
-                    let mut blob = DataBlob {
-                        raw_data: comp_data,
-                    };
-                    blob.set_crc(blob.compute_crc());
-                    return Ok(blob);
+                match zstd_safe::compress(&mut raw_data[header_len..], data, 1) {
+                    Ok(size) if size <= data.len() => {
+                        raw_data.truncate(header_len + size);
+                        let mut blob = DataBlob { raw_data };
+                        blob.set_crc(blob.compute_crc());
+                        return Ok(blob);
+                    }
+                    Err(err) if !zstd_error_is_target_too_small(err) => {
+                        log::warn!("zstd compression error: {err}");
+                    }
+                    _ => {}
                 }
             }
 
-            let mut raw_data = Vec::with_capacity(max_data_len);
-
             let head = DataBlobHeader {
                 magic: UNCOMPRESSED_BLOB_MAGIC_1_0,
                 crc: [0; 4],
             };
             unsafe {
-                raw_data.write_le_value(head)?;
+                (&mut raw_data[0..header_len]).write_le_value(head)?;
             }
-            raw_data.extend_from_slice(data);
+            (&mut raw_data[header_len..]).write_all(data)?;
 
             DataBlob { raw_data }
         };