pxar: caching: add look-ahead cache
Add a lookahead cache and the neccessary types to store the required data and keep track of directory boundaries while traversing the filesystem tree, in order to postpone a decision if to reuse or reencode a given regular file with unchanged metadata. Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
This commit is contained in:
parent
d0f7d86c9e
commit
6f23976247
@ -131,7 +131,7 @@ impl fmt::Display for ArchiveError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Eq, PartialEq, Hash)]
|
#[derive(Eq, PartialEq, Hash)]
|
||||||
struct HardLinkInfo {
|
pub(crate) struct HardLinkInfo {
|
||||||
st_dev: u64,
|
st_dev: u64,
|
||||||
st_ino: u64,
|
st_ino: u64,
|
||||||
}
|
}
|
||||||
|
162
pbs-client/src/pxar/look_ahead_cache.rs
Normal file
162
pbs-client/src/pxar/look_ahead_cache.rs
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
|
use std::ffi::CString;
|
||||||
|
use std::ops::Range;
|
||||||
|
use std::os::unix::io::OwnedFd;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use nix::sys::stat::FileStat;
|
||||||
|
|
||||||
|
use pxar::encoder::PayloadOffset;
|
||||||
|
use pxar::Metadata;
|
||||||
|
|
||||||
|
use super::create::*;
|
||||||
|
|
||||||
|
const DEFAULT_CACHE_SIZE: usize = 512;
|
||||||
|
|
||||||
|
pub(crate) struct CacheEntryData {
|
||||||
|
pub(crate) fd: OwnedFd,
|
||||||
|
pub(crate) c_file_name: CString,
|
||||||
|
pub(crate) stat: FileStat,
|
||||||
|
pub(crate) metadata: Metadata,
|
||||||
|
pub(crate) payload_offset: PayloadOffset,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) enum CacheEntry {
|
||||||
|
RegEntry(CacheEntryData),
|
||||||
|
DirEntry(CacheEntryData),
|
||||||
|
DirEnd,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct PxarLookaheadCache {
|
||||||
|
// Current state of the cache
|
||||||
|
enabled: bool,
|
||||||
|
// Cached entries
|
||||||
|
entries: Vec<CacheEntry>,
|
||||||
|
// Entries encountered having more than one link given by stat
|
||||||
|
hardlinks: HashSet<HardLinkInfo>,
|
||||||
|
// Payload range covered by the currently cached entries
|
||||||
|
range: Range<u64>,
|
||||||
|
// Possible held back last chunk from last flush, used for possible chunk continuation
|
||||||
|
last_chunk: Option<ReusableDynamicEntry>,
|
||||||
|
// Path when started caching
|
||||||
|
start_path: PathBuf,
|
||||||
|
// Number of entries with file descriptors
|
||||||
|
fd_entries: usize,
|
||||||
|
// Max number of entries with file descriptors
|
||||||
|
cache_size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PxarLookaheadCache {
|
||||||
|
pub(crate) fn new(size: Option<usize>) -> Self {
|
||||||
|
Self {
|
||||||
|
enabled: false,
|
||||||
|
entries: Vec::new(),
|
||||||
|
hardlinks: HashSet::new(),
|
||||||
|
range: 0..0,
|
||||||
|
last_chunk: None,
|
||||||
|
start_path: PathBuf::new(),
|
||||||
|
fd_entries: 0,
|
||||||
|
cache_size: size.unwrap_or(DEFAULT_CACHE_SIZE),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn is_full(&self) -> bool {
|
||||||
|
self.fd_entries >= self.cache_size
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn caching_enabled(&self) -> bool {
|
||||||
|
self.enabled
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn insert(
|
||||||
|
&mut self,
|
||||||
|
fd: OwnedFd,
|
||||||
|
c_file_name: CString,
|
||||||
|
stat: FileStat,
|
||||||
|
metadata: Metadata,
|
||||||
|
payload_offset: PayloadOffset,
|
||||||
|
path: PathBuf,
|
||||||
|
) {
|
||||||
|
if !self.enabled {
|
||||||
|
self.start_path = path;
|
||||||
|
if !metadata.is_dir() {
|
||||||
|
self.start_path.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.enabled = true;
|
||||||
|
self.fd_entries += 1;
|
||||||
|
if metadata.is_dir() {
|
||||||
|
self.entries.push(CacheEntry::DirEntry(CacheEntryData {
|
||||||
|
fd,
|
||||||
|
c_file_name,
|
||||||
|
stat,
|
||||||
|
metadata,
|
||||||
|
payload_offset,
|
||||||
|
}))
|
||||||
|
} else {
|
||||||
|
self.entries.push(CacheEntry::RegEntry(CacheEntryData {
|
||||||
|
fd,
|
||||||
|
c_file_name,
|
||||||
|
stat,
|
||||||
|
metadata,
|
||||||
|
payload_offset,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn insert_dir_end(&mut self) {
|
||||||
|
self.entries.push(CacheEntry::DirEnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn take_and_reset(&mut self) -> (Vec<CacheEntry>, PathBuf) {
|
||||||
|
self.fd_entries = 0;
|
||||||
|
self.enabled = false;
|
||||||
|
// keep end for possible continuation if cache has been cleared because
|
||||||
|
// it was full, but further caching would be fine
|
||||||
|
self.range = self.range.end..self.range.end;
|
||||||
|
(
|
||||||
|
std::mem::take(&mut self.entries),
|
||||||
|
std::mem::take(&mut self.start_path),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn contains_hardlink(&self, info: &HardLinkInfo) -> bool {
|
||||||
|
self.hardlinks.contains(info)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn insert_hardlink(&mut self, info: HardLinkInfo) -> bool {
|
||||||
|
self.hardlinks.insert(info)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn range(&self) -> &Range<u64> {
|
||||||
|
&self.range
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn update_range(&mut self, range: Range<u64>) {
|
||||||
|
self.range = range;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn try_extend_range(&mut self, range: Range<u64>) -> bool {
|
||||||
|
if self.range.end == 0 {
|
||||||
|
// initialize first range to start and end with start of new range
|
||||||
|
self.range.start = range.start;
|
||||||
|
self.range.end = range.start;
|
||||||
|
}
|
||||||
|
|
||||||
|
// range continued, update end
|
||||||
|
if self.range.end == range.start {
|
||||||
|
self.range.end = range.end;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn take_last_chunk(&mut self) -> Option<ReusableDynamicEntry> {
|
||||||
|
self.last_chunk.take()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn update_last_chunk(&mut self, chunk: Option<ReusableDynamicEntry>) {
|
||||||
|
self.last_chunk = chunk;
|
||||||
|
}
|
||||||
|
}
|
@ -50,6 +50,7 @@
|
|||||||
pub(crate) mod create;
|
pub(crate) mod create;
|
||||||
pub(crate) mod dir_stack;
|
pub(crate) mod dir_stack;
|
||||||
pub(crate) mod extract;
|
pub(crate) mod extract;
|
||||||
|
pub(crate) mod look_ahead_cache;
|
||||||
pub(crate) mod metadata;
|
pub(crate) mod metadata;
|
||||||
pub(crate) mod tools;
|
pub(crate) mod tools;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user