5
0
mirror of git://git.proxmox.com/git/pxar.git synced 2025-01-03 09:17:38 +03:00
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
This commit is contained in:
Wolfgang Bumiller 2020-01-23 11:18:04 +01:00
commit 6cd4f635b6
19 changed files with 2300 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
/target
**/*.rs.bk
Cargo.lock
test.pxar

32
Cargo.toml Normal file
View File

@ -0,0 +1,32 @@
[package]
name = "pxar"
version = "0.1.0"
authors = ["Wolfgang Bumiller <w.bumiller@proxmox.com>"]
edition = "2018"
[[example]]
name = "apxar"
path = "examples/apxar.rs"
required-features = [ "async-example" ]
[dependencies]
bitflags = "1.2.1"
endian_trait = { version = "0.6", features = ["arrays"] }
failure = "0.1"
siphasher = "0.3"
futures = { version = "0.3.1", optional = true }
tokio = { version = "0.2.10", optional = true, default-features = false }
[features]
default = [ "futures-io", "tokio-io" ]
futures-io = [ "futures" ]
tokio-io = [ "tokio" ]
async-example = [
"futures-io",
"tokio-io",
"tokio/fs",
"tokio/rt-threaded",
"tokio/io-driver",
"tokio/macros",
]

32
examples/apxar.rs Normal file
View File

@ -0,0 +1,32 @@
use pxar::decoder::aio::Decoder;
#[tokio::main]
async fn main() {
let mut args = std::env::args_os().skip(1);
let file = args.next().expect("expected a file name");
let file = tokio::fs::File::open(file)
.await
.expect("failed to open file");
let mut reader = Decoder::from_tokio(file)
.await
.expect("failed to open pxar archive contents");
let mut i = 0;
while let Some(entry) = reader.next().await {
println!("{:#?}", entry.expect("failed to parse entry").path());
i += 1;
if i == 2 {
break;
}
}
// Use a Stream for the remaining entries:
use futures::stream::StreamExt;
let mut stream = reader.into_stream();
while let Some(entry) = stream.next().await {
println!("{:#?}", entry.expect("failed to parse entry").path());
}
}

62
examples/randaccess.rs Normal file
View File

@ -0,0 +1,62 @@
use pxar::accessor::Accessor;
fn main() {
let mut args = std::env::args_os().skip(1);
let file = args.next().expect("expected a file name");
let mut accessor = Accessor::open(file).expect("failed to open file");
let mut dir = accessor
.open_root()
.expect("failed to open archive root directory");
for i in dir.decode_full().expect("failed to access root directory") {
println!("{:#?}", i.expect("failed to parse entry").path());
}
let da = dir
.lookup("da")
.expect("error looking up da/")
.expect("failed to lookup da/");
dir.lookup("db").expect("failed to lookup db");
dir.lookup("root1.txt").expect("failed to lookup root1.txt");
dir.lookup("root2.txt").expect("failed to lookup root2.txt");
println!("{:?}", da.entry());
let da = da.enter_directory().expect("failed to enter /da directory");
for i in da.decode_full().expect("failed to access /da directory") {
println!(
" ==> {:#?}",
i.expect("failed to parse /da file entry").path()
);
}
for i in dir.read_dir() {
let i = i.expect("failed to read directory entry");
println!("read_dir => {:?}", i.file_name());
}
// let file = tokio::fs::File::open(file)
// .await
// .expect("failed to open file");
//
// let mut reader = Accessor::from_tokio(file)
// .await
// .expect("failed to open pxar archive contents");
// let mut i = 0;
// while let Some(entry) = reader.next().await {
// println!("{:#?}", entry.expect("failed to parse entry").path());
// i += 1;
// if i == 2 {
// break;
// }
// }
//
// // Use a Stream for the remaining entries:
// use futures::stream::StreamExt;
//
// let mut stream = reader.into_stream();
//
// while let Some(entry) = stream.next().await {
// println!("{:#?}", entry.expect("failed to parse entry").path());
// }
}

1
rust-toolchain Normal file
View File

@ -0,0 +1 @@
nightly

1
rustfmt.toml Normal file
View File

@ -0,0 +1 @@
edition = "2018"

443
src/accessor.rs Normal file
View File

@ -0,0 +1,443 @@
//! Random access for PXAR files.
use std::ffi::OsString;
use std::io;
use std::mem::{size_of, size_of_val, MaybeUninit};
use std::ops::Range;
use std::os::unix::ffi::{OsStrExt, OsStringExt};
use std::path::{Path, PathBuf};
use std::pin::Pin;
use std::task::{Context, Poll};
use endian_trait::Endian;
use crate::decoder::{self, DecoderImpl};
use crate::format::{self, GoodbyeItem};
use crate::poll_fn::poll_fn;
use crate::util;
use crate::Entry;
pub mod aio;
pub mod sync;
#[doc(inline)]
pub use sync::Accessor;
/// Random access read implementation.
pub trait ReadAt {
fn poll_read_at(
self: Pin<&Self>,
cx: &mut Context,
buf: &mut [u8],
offset: u64,
) -> Poll<io::Result<usize>>;
}
/// We do not want to bother with actual polling, so we implement `async fn` variants of the above
/// on `dyn ReadAt`.
///
/// The reason why this is not an internal `ReadAtExt` trait like `AsyncReadExt` is simply that
/// we'd then need to define all the `Future` types they return manually and explicitly. Since we
/// have no use for them, all we want is the ability to use `async fn`...
///
/// The downside is that we need some `(&mut self.input as &mut dyn ReadAt)` casts in the
/// decoder's code, but that's fine.
impl<'a> dyn ReadAt + 'a {
/// awaitable version of `poll_read_at`.
async fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result<usize> {
poll_fn(|cx| unsafe { Pin::new_unchecked(self).poll_read_at(cx, buf, offset) }).await
}
/// `read_exact_at` - since that's what we _actually_ want most of the time.
async fn read_exact_at(&self, mut buf: &mut [u8], mut offset: u64) -> io::Result<()> {
while !buf.is_empty() {
match self.read_at(buf, offset).await? {
0 => io_bail!("unexpected EOF"),
got => {
buf = &mut buf[got..];
offset += got as u64;
}
}
}
Ok(())
}
/// Helper to read into an `Endian`-implementing `struct`.
async fn read_entry_at<T: Endian>(&self, offset: u64) -> io::Result<T> {
let mut data = MaybeUninit::<T>::uninit();
let buf =
unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<T>()) };
self.read_exact_at(buf, offset).await?;
Ok(unsafe { data.assume_init().from_le() })
}
/// Helper to read into an allocated byte vector.
async fn read_exact_data_at(&self, size: usize, offset: u64) -> io::Result<Vec<u8>> {
let mut data = util::vec_new(size);
self.read_exact_at(&mut data[..], offset).await?;
Ok(data)
}
}
/// The random access state machine implementation.
pub struct AccessorImpl<T> {
input: T,
size: u64,
}
impl<T: ReadAt> AccessorImpl<T> {
pub async fn new(input: T, size: u64) -> io::Result<Self> {
if size < (size_of::<GoodbyeItem>() as u64) {
io_bail!("too small to contain a pxar archive");
}
Ok(Self { input, size })
}
pub async fn open_root<'a>(&'a self) -> io::Result<DirectoryImpl<'a>> {
DirectoryImpl::open_at_end(&self.input, self.size, "/".into()).await
}
}
/// The directory random-access state machine implementation.
pub struct DirectoryImpl<'a> {
input: &'a dyn ReadAt,
entry_ofs: u64,
goodbye_ofs: u64,
size: u64,
table: Box<[GoodbyeItem]>,
path: PathBuf,
}
impl<'a> DirectoryImpl<'a> {
/// Open a directory ending at the specified position.
pub(crate) async fn open_at_end(
input: &'a dyn ReadAt,
end_offset: u64,
path: PathBuf,
) -> io::Result<DirectoryImpl<'a>> {
let tail = Self::read_tail_entry(input, end_offset).await?;
if end_offset < tail.size {
io_bail!("goodbye tail size out of range");
}
let goodbye_ofs = end_offset - tail.size;
if goodbye_ofs < tail.offset {
io_bail!("goodbye offset out of range");
}
let entry_ofs = goodbye_ofs - tail.offset;
let size = end_offset - entry_ofs;
let mut this = Self {
input,
entry_ofs,
goodbye_ofs,
size,
table: Box::new([]),
path,
};
// sanity check:
if this.table_size() % (size_of::<GoodbyeItem>() as u64) != 0 {
io_bail!("invalid goodbye table size: {}", this.table_size());
}
this.table = this.load_table().await?;
Ok(this)
}
/// Load the entire goodbye table:
async fn load_table(&self) -> io::Result<Box<[GoodbyeItem]>> {
let len = self.len();
let mut data = Vec::with_capacity(self.len());
unsafe {
data.set_len(len);
let slice = std::slice::from_raw_parts_mut(
data.as_mut_ptr() as *mut u8,
len * size_of_val(&data[0]),
);
self.input.read_exact_at(slice, self.table_offset()).await?;
drop(slice);
}
Ok(data.into_boxed_slice())
}
#[inline]
fn end_offset(&self) -> u64 {
self.entry_ofs + self.size
}
#[inline]
fn table_size(&self) -> u64 {
(self.end_offset() - self.goodbye_ofs) - (size_of::<format::Header>() as u64)
}
#[inline]
fn table_offset(&self) -> u64 {
self.goodbye_ofs + (size_of::<format::Header>() as u64)
}
/// Length *excluding* the tail marker!
#[inline]
fn len(&self) -> usize {
(self.table_size() / (size_of::<GoodbyeItem>() as u64)) as usize - 1
}
/// Read the goodbye tail and perform some sanity checks.
async fn read_tail_entry(input: &'a dyn ReadAt, end_offset: u64) -> io::Result<GoodbyeItem> {
if end_offset < (size_of::<GoodbyeItem>() as u64) {
io_bail!("goodbye tail does not fit");
}
let tail_offset = end_offset - (size_of::<GoodbyeItem>() as u64);
let tail: GoodbyeItem = input.read_entry_at(tail_offset).await?;
if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER {
io_bail!("no goodbye tail marker found");
}
Ok(tail)
}
/// Get a decoder for the directory contents.
pub(crate) async fn decode_full(&self) -> io::Result<DecoderImpl<SeqReadAtAdapter<'a>>> {
let (dir, decoder) = self
.decode_one_entry(self.entry_ofs..(self.entry_ofs + self.size), None)
.await?;
if !dir.is_dir() {
io_bail!("directory does not seem to be a directory");
}
Ok(decoder)
}
async fn get_decoder(
&self,
entry_range: Range<u64>,
file_name: Option<&Path>,
) -> io::Result<DecoderImpl<SeqReadAtAdapter<'a>>> {
Ok(DecoderImpl::new_full(
SeqReadAtAdapter::new(self.input, entry_range),
match file_name {
None => self.path.clone(),
Some(file) => self.path.join(file),
},
)
.await?)
}
async fn decode_one_entry(
&self,
entry_range: Range<u64>,
file_name: Option<&Path>,
) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<'a>>)> {
let mut decoder = self.get_decoder(entry_range, file_name).await?;
let entry = decoder
.next()
.await
.ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
Ok((entry, decoder))
}
fn lookup_hash_position(&self, hash: u64) -> Option<usize> {
format::search_binary_tree_array_by(&self.table, |i| hash.cmp(&i.hash))
}
/// Lookup a directory entry.
pub async fn lookup(&'a self, path: &Path) -> io::Result<Option<FileEntryImpl<'a>>> {
let hash = format::hash_filename(path.as_os_str().as_bytes());
let index = match self.lookup_hash_position(hash) {
Some(index) => index,
None => return Ok(None),
};
// Lookup FILENAME, if it doesn't match increase index, once found, use the GoodbyeItem's
// offset+size as well as the file's Entry to return a DirEntry::Dir or Dir::Entry.
while index < self.table.len() && self.table[index].hash == hash {
let cursor = self.get_cursor(index).await?;
if cursor.file_name == path {
return Ok(Some(cursor.get_entry().await?));
}
}
Ok(None)
}
async fn get_cursor(&'a self, index: usize) -> io::Result<DirEntryImpl<'a>> {
let entry = &self.table[index];
let file_goodbye_ofs = entry.offset;
if self.goodbye_ofs < file_goodbye_ofs {
io_bail!("invalid file offset");
}
let file_ofs = self.goodbye_ofs - file_goodbye_ofs;
let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?;
Ok(DirEntryImpl {
dir: self,
file_name,
entry_range: Range {
start: entry_ofs,
end: file_ofs + entry.size,
},
})
}
async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> {
let head: format::Header = self.input.read_entry_at(file_ofs).await?;
if head.htype != format::PXAR_FILENAME {
io_bail!("expected PXAR_FILENAME header, found: {:x}", head.htype);
}
let mut path = self
.input
.read_exact_data_at(
head.content_size() as usize,
file_ofs + (size_of_val(&head) as u64),
)
.await?;
if path.pop() != Some(0) {
io_bail!("invalid file name (missing terminating zero)");
}
if path.is_empty() {
io_bail!("invalid empty file name");
}
let file_name = PathBuf::from(OsString::from_vec(path));
format::check_file_name(&file_name)?;
Ok((file_name, file_ofs + head.full_size()))
}
pub fn read_dir(&'a self) -> ReadDirImpl<'a> {
ReadDirImpl::new(self, 0)
}
}
/// A file entry retrieved from a Directory.
pub struct FileEntryImpl<'a> {
parent: &'a DirectoryImpl<'a>,
entry: Entry,
decoder: Option<DecoderImpl<SeqReadAtAdapter<'a>>>,
end_offset: u64,
}
impl<'a> FileEntryImpl<'a> {
pub async fn enter_directory(&self) -> io::Result<DirectoryImpl<'a>> {
if !self.entry.is_dir() {
io_bail!("enter_directory() on a non-directory");
}
DirectoryImpl::open_at_end(self.parent.input, self.end_offset, self.entry.path.clone())
.await
}
#[inline]
pub fn into_entry(self) -> Entry {
self.entry
}
#[inline]
pub fn entry(&self) -> &Entry {
&self.entry
}
}
/// An iterator over the contents of a directory.
pub struct ReadDirImpl<'a> {
dir: &'a DirectoryImpl<'a>,
at: usize,
}
impl<'a> ReadDirImpl<'a> {
pub fn new(dir: &'a DirectoryImpl<'a>, at: usize) -> Self {
Self { dir, at }
}
pub async fn next(&mut self) -> io::Result<Option<DirEntryImpl<'a>>> {
if self.at == self.dir.table.len() {
Ok(None)
} else {
let cursor = self.dir.get_cursor(self.at).await?;
self.at += 1;
Ok(Some(cursor))
}
}
}
/// A cursor pointing to a file in a directory.
///
/// At this point only the file name has been read and we remembered the position for finding the
/// actual data. This can be upgraded into a FileEntryImpl.
pub struct DirEntryImpl<'a> {
dir: &'a DirectoryImpl<'a>,
file_name: PathBuf,
entry_range: Range<u64>,
}
impl<'a> DirEntryImpl<'a> {
pub fn file_name(&self) -> &Path {
&self.file_name
}
pub async fn get_entry(&self) -> io::Result<FileEntryImpl<'a>> {
let end_offset = self.entry_range.end;
let (entry, decoder) = self
.dir
.decode_one_entry(self.entry_range.clone(), Some(&self.file_name))
.await?;
let decoder = if entry.is_dir() { Some(decoder) } else { None };
Ok(FileEntryImpl {
parent: self.dir,
entry,
decoder,
end_offset,
})
}
}
#[doc(hidden)]
pub struct SeqReadAtAdapter<'a> {
input: &'a dyn ReadAt,
range: Range<u64>,
}
impl<'a> SeqReadAtAdapter<'a> {
pub fn new(input: &'a dyn ReadAt, range: Range<u64>) -> Self {
Self { input, range }
}
#[inline]
fn remaining(&self) -> usize {
(self.range.end - self.range.start) as usize
}
}
impl<'a> decoder::SeqRead for SeqReadAtAdapter<'a> {
fn poll_seq_read(
self: Pin<&mut Self>,
cx: &mut Context,
buf: &mut [u8],
) -> Poll<io::Result<usize>> {
let len = buf.len().min(self.remaining());
let buf = &mut buf[..len];
let this = self.get_mut();
let got = ready!(unsafe {
Pin::new_unchecked(this.input).poll_read_at(cx, buf, this.range.start)
})?;
this.range.start += got as u64;
Poll::Ready(Ok(got))
}
fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
Poll::Ready(Some(Ok(self.range.start)))
}
}

3
src/accessor/aio.rs Normal file
View File

@ -0,0 +1,3 @@
//! Asynchronous `pxar` random-access handling.
//!
//! Currently neither tokio nor futures have an `AsyncFileExt` variant.

183
src/accessor/sync.rs Normal file
View File

@ -0,0 +1,183 @@
//! Blocking `pxar` random access handling.
use std::io;
use std::os::unix::fs::FileExt;
use std::path::Path;
use std::pin::Pin;
use std::task::{Context, Poll};
use crate::accessor::{self, ReadAt};
use crate::decoder::Decoder;
use crate::util::poll_result_once;
use crate::Entry;
/// Blocking `pxar` random-access decoder.
///
/// This is the blocking I/O version of the `pxar` accessor. This will *not* work with an
/// asynchronous I/O object. I/O must always return `Poll::Ready`.
///
/// Attempting to use a `Waker` from this context *will* `panic!`
///
/// If you need to use asynchronous I/O, use `aio::Accessor`.
#[repr(transparent)]
pub struct Accessor<T> {
inner: accessor::AccessorImpl<T>,
}
impl<T: FileExt> Accessor<T> {
/// Decode a `pxar` archive from a standard file implementing `FileExt`.
#[inline]
pub fn from_file_and_size(input: T, size: u64) -> io::Result<Accessor<FileReader<T>>> {
Accessor::new(FileReader::new(input), size)
}
}
impl Accessor<FileReader<std::fs::File>> {
/// Decode a `pxar` archive from a regular `std::io::File` input.
#[inline]
pub fn from_file(input: std::fs::File) -> io::Result<Self> {
let size = input.metadata()?.len();
Accessor::from_file_and_size(input, size)
}
/// Convenience shortcut for `File::open` followed by `Accessor::from_file`.
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
Self::from_file(std::fs::File::open(path.as_ref())?)
}
}
impl<T: ReadAt> Accessor<T> {
/// Create a *blocking* random-access decoder from an input implementing our internal read
/// interface.
///
/// Note that the `input`'s `SeqRead` implementation must always return `Poll::Ready` and is
/// not allowed to use the `Waker`, as this will cause a `panic!`.
pub fn new(input: T, size: u64) -> io::Result<Self> {
Ok(Self {
inner: poll_result_once(accessor::AccessorImpl::new(input, size))?,
})
}
/// Open a directory handle to the root of the pxar archive.
pub fn open_root<'a>(&'a self) -> io::Result<Directory<'a>> {
Ok(Directory::new(poll_result_once(self.inner.open_root())?))
}
}
/// Adapter for FileExt readers.
pub struct FileReader<T> {
inner: T,
}
impl<T: FileExt> FileReader<T> {
pub fn new(inner: T) -> Self {
Self { inner }
}
}
impl<T: FileExt> ReadAt for FileReader<T> {
fn poll_read_at(
self: Pin<&Self>,
_cx: &mut Context,
buf: &mut [u8],
offset: u64,
) -> Poll<io::Result<usize>> {
Poll::Ready(self.get_ref().inner.read_at(buf, offset))
}
}
/// Blocking Directory variant:
#[repr(transparent)]
pub struct Directory<'a> {
inner: accessor::DirectoryImpl<'a>,
}
impl<'a> Directory<'a> {
fn new(inner: accessor::DirectoryImpl<'a>) -> Self {
Self { inner }
}
/// Get a decoder for the directory contents.
pub fn decode_full(&self) -> io::Result<Decoder<accessor::SeqReadAtAdapter<'a>>> {
Ok(Decoder::from_impl(poll_result_once(
self.inner.decode_full(),
)?))
}
/// Lookup an entry in a directory.
pub fn lookup<P: AsRef<Path>>(&'a self, path: P) -> io::Result<Option<FileEntry<'a>>> {
if let Some(file_entry) = poll_result_once(self.inner.lookup(path.as_ref()))? {
Ok(Some(FileEntry { inner: file_entry }))
} else {
Ok(None)
}
}
/// Get an iterator over the directory's contents.
pub fn read_dir(&'a self) -> ReadDir<'a> {
ReadDir {
inner: self.inner.read_dir(),
}
}
}
/// A file entry retrieved from a `Directory` via the `lookup` method.
#[repr(transparent)]
pub struct FileEntry<'a> {
inner: accessor::FileEntryImpl<'a>,
}
impl<'a> FileEntry<'a> {
pub fn enter_directory(&self) -> io::Result<Directory<'a>> {
Ok(Directory::new(poll_result_once(
self.inner.enter_directory(),
)?))
}
#[inline]
pub fn into_entry(self) -> Entry {
self.inner.into_entry()
}
#[inline]
pub fn entry(&self) -> &Entry {
&self.inner.entry()
}
}
/// An iterator over the contents of a `Directory`.
#[repr(transparent)]
pub struct ReadDir<'a> {
inner: accessor::ReadDirImpl<'a>,
}
impl<'a> Iterator for ReadDir<'a> {
type Item = io::Result<DirEntry<'a>>;
fn next(&mut self) -> Option<Self::Item> {
match poll_result_once(self.inner.next()) {
Ok(Some(inner)) => Some(Ok(DirEntry { inner })),
Ok(None) => None,
Err(err) => Some(Err(err)),
}
}
}
impl<'a> std::iter::FusedIterator for ReadDir<'a> {}
/// A directory entry. When iterating through the contents of a directory we first get access to
/// the file name. The remaining information can be decoded afterwards.
#[repr(transparent)]
pub struct DirEntry<'a> {
inner: accessor::DirEntryImpl<'a>,
}
impl<'a> DirEntry<'a> {
pub fn file_name(&self) -> &Path {
self.inner.file_name()
}
pub fn get_entry(&self) -> io::Result<FileEntry<'a>> {
poll_result_once(self.inner.get_entry()).map(|inner| FileEntry { inner })
}
}

13
src/bin/pxar.rs Normal file
View File

@ -0,0 +1,13 @@
use pxar::decoder::Decoder;
fn main() {
let mut args = std::env::args_os().skip(1);
let file = args.next().expect("expected a file name");
let file = std::fs::File::open(file).expect("failed to open file");
let reader = Decoder::from_std(file).expect("failed to open pxar archive contents");
for entry in reader {
println!("{:#?}", entry.expect("failed to parse entry").path());
}
}

553
src/decoder.rs Normal file
View File

@ -0,0 +1,553 @@
//! The `pxar` decoder state machine.
//!
//! This is the implementation used by both the synchronous and async pxar wrappers.
use std::convert::TryFrom;
use std::ffi::OsString;
use std::io;
use std::mem::{self, size_of, size_of_val, MaybeUninit};
use std::os::unix::ffi::{OsStrExt, OsStringExt};
use std::path::{Path, PathBuf};
use std::pin::Pin;
use std::task::{Context, Poll};
//use std::os::unix::fs::FileExt;
use endian_trait::Endian;
use crate::format::{self, Header};
use crate::poll_fn::poll_fn;
use crate::util::{self, io_err_other};
use crate::{Entry, EntryKind, Metadata};
pub mod aio;
pub mod sync;
#[doc(inline)]
pub use sync::Decoder;
/// To skip through non-seekable files.
static mut SCRATCH_BUFFER: MaybeUninit<[u8; 4096]> = MaybeUninit::uninit();
fn scratch_buffer() -> &'static mut [u8] {
unsafe { &mut (*SCRATCH_BUFFER.as_mut_ptr())[..] }
}
/// Sequential read interface used by the decoder's state machine.
///
/// To simply iterate through a directory we just need the equivalent of `poll_read()`.
///
/// Currently we also have a `poll_position()` method which can be added for types supporting
/// `Seek` or `AsyncSeek`. In this case the starting position of each entry becomes available
/// (accessible via the `Entry::offset()`), to allow jumping between entries.
pub trait SeqRead {
/// Mostly we want to read sequentially, so this is basically an `AsyncRead` equivalent.
fn poll_seq_read(
self: Pin<&mut Self>,
cx: &mut Context,
buf: &mut [u8],
) -> Poll<io::Result<usize>>;
/// While going through the data we may want to take notes about some offsets within the file
/// for later. If the reader does not support seeking or positional reading, this can just
/// return `None`.
fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
Poll::Ready(None)
}
}
/// Allow using trait objects for generics taking a `SeqRead`:
impl<'a> SeqRead for &mut (dyn SeqRead + 'a) {
fn poll_seq_read(
self: Pin<&mut Self>,
cx: &mut Context,
buf: &mut [u8],
) -> Poll<io::Result<usize>> {
unsafe {
self.map_unchecked_mut(|this| &mut **this)
.poll_seq_read(cx, buf)
}
}
fn poll_position(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
unsafe { self.map_unchecked_mut(|this| &mut **this).poll_position(cx) }
}
}
/// We do not want to bother with actual polling, so we implement `async fn` variants of the above
/// on `dyn SeqRead`.
///
/// The reason why this is not an internal `SeqReadExt` trait like `AsyncReadExt` is simply that
/// we'd then need to define all the `Future` types they return manually and explicitly. Since we
/// have no use for them, all we want is the ability to use `async fn`...
///
/// The downside is that we need some `(&mut self.input as &mut dyn SeqRead)` casts in the
/// decoder's code, but that's fine.
impl<'a> dyn SeqRead + 'a {
/// awaitable version of `poll_position`.
async fn position(&mut self) -> Option<io::Result<u64>> {
poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *self).poll_position(cx) }).await
}
/// awaitable version of `poll_seq_read`.
async fn seq_read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *self).poll_seq_read(cx, buf) }).await
}
/// `read_exact` - since that's what we _actually_ want most of the time, but with EOF handling
async fn seq_read_exact_or_eof(&mut self, mut buf: &mut [u8]) -> io::Result<Option<()>> {
let mut eof_ok = true;
while !buf.is_empty() {
match self.seq_read(buf).await? {
0 if eof_ok => break,
0 => io_bail!("unexpected EOF"),
got => buf = &mut buf[got..],
}
eof_ok = false;
}
Ok(Some(()))
}
/// `read_exact` - since that's what we _actually_ want most of the time.
async fn seq_read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
match self.seq_read_exact_or_eof(buf).await? {
Some(()) => Ok(()),
None => io_bail!("unexpected eof"),
}
}
/// Helper to read into an allocated byte vector.
async fn seq_read_exact_data(&mut self, size: usize) -> io::Result<Vec<u8>> {
let mut data = util::vec_new(size);
self.seq_read_exact(&mut data[..]).await?;
Ok(data)
}
/// `seq_read_entry` with EOF handling
async fn seq_read_entry_or_eof<T: Endian>(&mut self) -> io::Result<Option<T>> {
let mut data = MaybeUninit::<T>::uninit();
let buf =
unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<T>()) };
if self.seq_read_exact_or_eof(buf).await?.is_none() {
return Ok(None);
}
Ok(Some(unsafe { data.assume_init().from_le() }))
}
/// Helper to read into an `Endian`-implementing `struct`.
async fn seq_read_entry<T: Endian>(&mut self) -> io::Result<T> {
self.seq_read_entry_or_eof()
.await?
.ok_or_else(|| io_format_err!("unexepcted EOF"))
}
}
/// The decoder state machine implementation.
///
/// We use `async fn` to implement the decoder state machine so that we can easily plug in both
/// synchronous or `async` I/O objects in as input.
pub struct DecoderImpl<T> {
input: T,
current_header: Header,
entry: Entry,
path_lengths: Vec<usize>,
state: State,
with_goodbye_tables: bool,
}
enum State {
Begin,
Default,
InPayload,
InDirectory,
Eof,
}
/// Control flow while parsing items.
///
/// When parsing an entry, we usually go through all of its attribute items. Once we reach the end
/// of the entry we stop.
/// Note that if we're in a directory, we stopped at the beginning of its contents.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum ItemResult {
/// We parsed an "attribute" item and should continue parsing.
Attribute,
/// We finished an entry (`SYMLINK`, `HARDLINK`, ...) or just entered the contents of a
/// directory (`FILENAME`, `GOODBYE`).
///
/// We stop moving forward at this point.
Entry,
}
impl<T: SeqRead> DecoderImpl<T> {
pub async fn new(input: T) -> io::Result<Self> {
Self::new_full(input, "/".into()).await
}
pub(crate) async fn new_full(mut input: T, path: PathBuf) -> io::Result<Self> {
let offset = (&mut input as &mut dyn SeqRead)
.position()
.await
.transpose()?;
let this = DecoderImpl {
input,
current_header: unsafe { mem::zeroed() },
entry: Entry {
path,
kind: EntryKind::EndOfDirectory,
metadata: Metadata::default(),
offset,
},
path_lengths: Vec::new(),
state: State::Begin,
with_goodbye_tables: false,
};
// this.read_next_entry().await?;
Ok(this)
}
/// Get the next file entry, recursing into directories.
pub async fn next(&mut self) -> Option<io::Result<Entry>> {
self.next_do().await.transpose()
}
pub(crate) async fn next_do(&mut self) -> io::Result<Option<Entry>> {
loop {
match self.state {
State::Eof => return Ok(None),
State::Begin => return self.read_next_entry().await.map(Some),
State::Default => {
// we completely finished an entry, so now we're going "up" in the directory
// hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
self.read_next_item().await?;
}
State::InPayload => {
// We need to skip the current payload first.
self.skip_entry().await?;
self.read_next_item().await?;
}
State::InDirectory => {
// We're at the next FILENAME or GOODBYE item.
}
}
match self.current_header.htype {
format::PXAR_FILENAME => return self.handle_file_entry().await,
format::PXAR_GOODBYE => {
if self.with_goodbye_tables {
self.entry.kind = EntryKind::EndOfDirectory;
let offset = (&mut self.input as &mut dyn SeqRead)
.position()
.await
.transpose()?;
self.entry.offset = offset;
self.state = State::InPayload;
return Ok(Some(self.entry.take()));
}
self.skip_entry().await?;
if self.path_lengths.pop().is_some() {
self.state = State::Default;
// and move on:
continue;
} else {
self.state = State::Eof;
// early out:
return Ok(None);
}
}
h => io_bail!(
"expected filename or directory-goodbye pxar entry, got: {:x}",
h
),
}
}
}
async fn handle_file_entry(&mut self) -> io::Result<Option<Entry>> {
let mut data = self.read_entry_as_bytes().await?;
// filenames are zero terminated!
if data.pop() != Some(0) {
io_bail!("illegal path found (missing terminating zero)");
}
if data.is_empty() {
io_bail!("illegal path found (empty)");
}
let path = PathBuf::from(OsString::from_vec(data));
self.set_path(&path)?;
self.read_next_entry().await.map(Some)
}
fn reset_path(&mut self) -> io::Result<()> {
let path_len = *self
.path_lengths
.last()
.ok_or_else(|| io_format_err!("internal decoder error: path underrun"))?;
let mut path = mem::replace(&mut self.entry.path, PathBuf::new())
.into_os_string()
.into_vec();
path.truncate(path_len);
self.entry.path = PathBuf::from(OsString::from_vec(path));
Ok(())
}
fn set_path(&mut self, path: &Path) -> io::Result<()> {
self.reset_path()?;
self.entry.path.push(path);
Ok(())
}
async fn read_next_entry_or_eof(&mut self) -> io::Result<Option<Entry>> {
self.state = State::Default;
self.entry.clear_data();
#[derive(Endian)]
#[repr(C)]
struct WithHeader<U: Endian> {
header: Header,
data: U,
}
let entry: WithHeader<format::Entry> = {
let input: &mut dyn SeqRead = &mut self.input;
match input.seq_read_entry_or_eof().await? {
None => return Ok(None),
Some(entry) => entry,
}
};
if entry.header.htype != format::PXAR_ENTRY {
io_bail!(
"expected pxar entry of type 'Entry', got: {:x}",
entry.header.htype
);
}
self.current_header = unsafe { mem::zeroed() };
self.entry.metadata = Metadata {
stat: entry.data,
..Default::default()
};
while self.read_next_item().await? != ItemResult::Entry {}
if self.entry.is_dir() {
self.path_lengths
.push(self.entry.path.as_os_str().as_bytes().len());
}
Ok(Some(self.entry.take()))
}
async fn read_next_entry(&mut self) -> io::Result<Entry> {
self.read_next_entry_or_eof()
.await?
.ok_or_else(|| io_format_err!("unexpected EOF"))
}
async fn read_next_item(&mut self) -> io::Result<ItemResult> {
self.read_next_header().await?;
self.read_current_item().await
}
async fn read_next_header(&mut self) -> io::Result<()> {
let dest = unsafe {
std::slice::from_raw_parts_mut(
&mut self.current_header as *mut Header as *mut u8,
size_of_val(&self.current_header),
)
};
(&mut self.input as &mut dyn SeqRead)
.seq_read_exact(dest)
.await?;
Ok(())
}
/// Read the next item, the header is already loaded.
async fn read_current_item(&mut self) -> io::Result<ItemResult> {
match self.current_header.htype {
format::PXAR_XATTR => {
let xattr = self.read_xattr().await?;
self.entry.metadata.xattrs.push(xattr);
}
format::PXAR_ACL_USER => {
let entry = self.read_acl_user().await?;
self.entry.metadata.acl.users.push(entry);
}
format::PXAR_ACL_GROUP => {
let entry = self.read_acl_group().await?;
self.entry.metadata.acl.groups.push(entry);
}
format::PXAR_ACL_GROUP_OBJ => {
if self.entry.metadata.acl.group_obj.is_some() {
io_bail!("multiple acl group object entries detected");
}
let entry = self.read_acl_group_object().await?;
self.entry.metadata.acl.group_obj = Some(entry);
}
format::PXAR_ACL_DEFAULT => {
if self.entry.metadata.acl.default.is_some() {
io_bail!("multiple acl default entries detected");
}
let entry = self.read_acl_default().await?;
self.entry.metadata.acl.default = Some(entry);
}
format::PXAR_ACL_DEFAULT_USER => {
let entry = self.read_acl_user().await?;
self.entry.metadata.acl.default_users.push(entry);
}
format::PXAR_ACL_DEFAULT_GROUP => {
let entry = self.read_acl_group().await?;
self.entry.metadata.acl.default_groups.push(entry);
}
format::PXAR_FCAPS => {
if self.entry.metadata.fcaps.is_some() {
io_bail!("multiple file capability entries detected");
}
let entry = self.read_fcaps().await?;
self.entry.metadata.fcaps = Some(entry);
}
format::PXAR_QUOTA_PROJID => {
if self.entry.metadata.quota_project_id.is_some() {
io_bail!("multiple quota project id entries detected");
}
let entry = self.read_quota_project_id().await?;
self.entry.metadata.quota_project_id = Some(entry);
}
format::PXAR_SYMLINK => {
self.entry.kind = EntryKind::Symlink(self.read_symlink().await?);
return Ok(ItemResult::Entry);
}
format::PXAR_HARDLINK => {
self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?);
return Ok(ItemResult::Entry);
}
format::PXAR_DEVICE => {
self.entry.kind = EntryKind::Device(self.read_device().await?);
return Ok(ItemResult::Entry);
}
format::PXAR_PAYLOAD => {
self.entry.kind = EntryKind::File {
size: self.current_header.content_size(),
};
self.state = State::InPayload;
return Ok(ItemResult::Entry);
}
format::PXAR_FILENAME | format::PXAR_GOODBYE => {
self.state = State::InDirectory;
self.entry.kind = EntryKind::Directory;
return Ok(ItemResult::Entry);
}
_ => io_bail!("unexpected entry type: {:x}", self.current_header.htype),
}
Ok(ItemResult::Attribute)
}
//
// Local read helpers.
//
// These utilize additional information and hence are not part of the `dyn SeqRead` impl.
//
async fn skip_entry(&mut self) -> io::Result<()> {
let mut len = self.current_header.content_size();
let scratch = scratch_buffer();
while len >= (scratch.len() as u64) {
(&mut self.input as &mut dyn SeqRead)
.seq_read_exact(scratch)
.await?;
len -= scratch.len() as u64;
}
let len = len as usize;
if len > 0 {
(&mut self.input as &mut dyn SeqRead)
.seq_read_exact(&mut scratch[..len])
.await?;
}
Ok(())
}
async fn read_entry_as_bytes(&mut self) -> io::Result<Vec<u8>> {
let size = usize::try_from(self.current_header.content_size()).map_err(io_err_other)?;
let data = (&mut self.input as &mut dyn SeqRead)
.seq_read_exact_data(size)
.await?;
Ok(data)
}
/// Helper to read a struct entry while checking its size.
async fn read_simple_entry<U: Endian + 'static>(
&mut self,
what: &'static str,
) -> io::Result<U> {
if self.current_header.content_size() != (size_of::<T>() as u64) {
io_bail!(
"bad {} size: {} (expected {})",
what,
self.current_header.content_size(),
size_of::<T>(),
);
}
(&mut self.input as &mut dyn SeqRead).seq_read_entry().await
}
//
// Read functions for PXAR components.
//
async fn read_xattr(&mut self) -> io::Result<format::XAttr> {
let data = self.read_entry_as_bytes().await?;
let name_len = data
.iter()
.position(|c| *c == 0)
.ok_or_else(|| io_format_err!("missing value separator in xattr"))?;
Ok(format::XAttr { data, name_len })
}
async fn read_symlink(&mut self) -> io::Result<format::Symlink> {
let data = self.read_entry_as_bytes().await?;
Ok(format::Symlink { data })
}
async fn read_hardlink(&mut self) -> io::Result<format::Hardlink> {
let data = self.read_entry_as_bytes().await?;
Ok(format::Hardlink { data })
}
async fn read_device(&mut self) -> io::Result<format::Device> {
self.read_simple_entry("device").await
}
async fn read_fcaps(&mut self) -> io::Result<format::FCaps> {
let data = self.read_entry_as_bytes().await?;
Ok(format::FCaps { data })
}
async fn read_acl_user(&mut self) -> io::Result<format::acl::User> {
self.read_simple_entry("acl user").await
}
async fn read_acl_group(&mut self) -> io::Result<format::acl::Group> {
self.read_simple_entry("acl group").await
}
async fn read_acl_group_object(&mut self) -> io::Result<format::acl::GroupObject> {
self.read_simple_entry("acl group object").await
}
async fn read_acl_default(&mut self) -> io::Result<format::acl::Default> {
self.read_simple_entry("acl default").await
}
async fn read_quota_project_id(&mut self) -> io::Result<format::QuotaProjectId> {
self.read_simple_entry("quota project id").await
}
}

169
src/decoder/aio.rs Normal file
View File

@ -0,0 +1,169 @@
//! Asynchronous `pxar` format handling.
use std::io;
use crate::decoder::{self, SeqRead};
use crate::Entry;
/// Asynchronous `pxar` decoder.
///
/// This is the `async` version of the `pxar` decoder.
#[repr(transparent)]
pub struct Decoder<T> {
inner: decoder::DecoderImpl<T>,
}
#[cfg(feature = "futures-io")]
impl<T: futures::io::AsyncRead> Decoder<T> {
/// Decode a `pxar` archive from a `futures::io::AsyncRead` input.
#[inline]
pub async fn from_futures(input: T) -> io::Result<Decoder<FuturesReader<T>>> {
Decoder::new(FuturesReader::new(input)).await
}
}
#[cfg(feature = "tokio-io")]
impl<T: tokio::io::AsyncRead> Decoder<T> {
/// Decode a `pxar` archive from a `tokio::io::AsyncRead` input.
#[inline]
pub async fn from_tokio(input: T) -> io::Result<Decoder<TokioReader<T>>> {
Decoder::new(TokioReader::new(input)).await
}
}
impl<T: SeqRead> Decoder<T> {
/// Create an async decoder from an input implementing our internal read interface.
pub async fn new(input: T) -> io::Result<Self> {
Ok(Self {
inner: decoder::DecoderImpl::new(input).await?,
})
}
/// If this is a directory entry, get the next item inside the directory.
pub async fn next(&mut self) -> Option<io::Result<Entry>> {
self.inner.next_do().await.transpose()
}
/// Turn this decoder into a `Stream`.
#[cfg(feature = "futures-io")]
pub fn into_stream(self) -> DecoderStream<T> {
DecoderStream::new(self)
}
}
#[cfg(feature = "futures-io")]
mod stream {
use std::future::Future;
use std::io;
use std::pin::Pin;
use std::task::{Context, Poll};
use super::{Entry, SeqRead};
/// A wrapper for the async decoder implementing `futures::stream::Stream`.
///
/// As long as streams are poll-based this wrapper is required to turn `async fn next()` into
/// `Stream`'s `poll_next()` interface.
pub struct DecoderStream<T> {
inner: super::Decoder<T>,
future: Option<Pin<Box<dyn Future<Output = Option<io::Result<Entry>>>>>>,
}
impl<T> DecoderStream<T> {
pub fn new(inner: super::Decoder<T>) -> Self {
Self {
inner,
future: None,
}
}
}
impl<T: SeqRead> futures::stream::Stream for DecoderStream<T> {
type Item = io::Result<Entry>;
fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
let this = unsafe { self.get_unchecked_mut() };
loop {
if let Some(mut fut) = this.future.take() {
match fut.as_mut().poll(cx) {
Poll::Ready(res) => return Poll::Ready(res),
Poll::Pending => {
this.future = Some(fut);
return Poll::Pending;
}
}
}
unsafe {
let fut: Box<dyn Future<Output = _>> = Box::new(this.inner.next());
// Discard the lifetime:
let fut: *mut (dyn Future<Output = Option<io::Result<Entry>>> + 'static) =
core::mem::transmute(Box::into_raw(fut));
let fut = Box::from_raw(fut);
this.future = Some(Pin::new_unchecked(fut));
}
}
}
}
}
#[cfg(feature = "futures-io")]
pub use stream::DecoderStream;
macro_rules! async_io_impl {
(
#[cfg( $($attr:tt)+ )]
mod $mod:ident {
$(#[$docs:meta])*
$name:ident : $trait:path ;
}
) => {
#[cfg( $($attr)+ )]
mod $mod {
use std::io;
use std::pin::Pin;
use std::task::{Context, Poll};
$(#[$docs])*
pub struct $name<T> {
inner: T,
}
impl<T: $trait> $name<T> {
pub fn new(inner: T) -> Self {
Self { inner }
}
}
impl<T: $trait> crate::decoder::SeqRead for $name<T> {
fn poll_seq_read(
self: Pin<&mut Self>,
cx: &mut Context,
buf: &mut [u8],
) -> Poll<io::Result<usize>> {
unsafe {
self.map_unchecked_mut(|this| &mut this.inner)
.poll_read(cx, buf)
}
}
}
}
#[cfg( $($attr)+ )]
pub use $mod::$name;
}
}
async_io_impl! {
#[cfg(feature = "futures-io")]
mod fut {
/// Read adapter for `futures::io::AsyncRead`.
FuturesReader : futures::io::AsyncRead;
}
}
async_io_impl! {
#[cfg(feature = "tokio-io")]
mod tok {
/// Read adapter for `tokio::io::AsyncRead`.
TokioReader : tokio::io::AsyncRead;
}
}

82
src/decoder/sync.rs Normal file
View File

@ -0,0 +1,82 @@
//! Blocking `pxar` format handling.
use std::io;
use std::pin::Pin;
use std::task::{Context, Poll};
use crate::decoder::{self, SeqRead};
use crate::util::poll_result_once;
use crate::Entry;
/// Blocking `pxar` decoder.
///
/// This is the blocking I/O version of the `pxar` decoder. This will *not* work with an
/// asynchronous I/O object. I/O must always return `Poll::Ready`.
///
/// Attempting to use a `Waker` from this context *will* `panic!`
///
/// If you need to use asynchronous I/O, use `aio::Decoder`.
#[repr(transparent)]
pub struct Decoder<T> {
inner: decoder::DecoderImpl<T>,
}
impl<T: io::Read> Decoder<T> {
/// Decode a `pxar` archive from a regular `std::io::Read` input.
#[inline]
pub fn from_std(input: T) -> io::Result<Decoder<StandardReader<T>>> {
Decoder::new(StandardReader::new(input))
}
}
impl<T: SeqRead> Decoder<T> {
/// Create a *blocking* decoder from an input implementing our internal read interface.
///
/// Note that the `input`'s `SeqRead` implementation must always return `Poll::Ready` and is
/// not allowed to use the `Waker`, as this will cause a `panic!`.
pub fn new(input: T) -> io::Result<Self> {
Ok(Self {
inner: poll_result_once(decoder::DecoderImpl::new(input))?,
})
}
/// Internal helper for `Accessor`. In this case we have the low-level state machine, and the
/// layer "above" the `Accessor` propagates the actual type (sync vs async).
pub(crate) fn from_impl(inner: decoder::DecoderImpl<T>) -> Self {
Self { inner }
}
/// If this is a directory entry, get the next item inside the directory.
pub fn next(&mut self) -> Option<io::Result<Entry>> {
poll_result_once(self.inner.next_do()).transpose()
}
}
impl<T: SeqRead> Iterator for Decoder<T> {
type Item = io::Result<Entry>;
fn next(&mut self) -> Option<Self::Item> {
Decoder::next(self)
}
}
/// Pxar decoder read adapter for `std::io::Read`.
pub struct StandardReader<T> {
inner: T,
}
impl<T: io::Read> StandardReader<T> {
pub fn new(inner: T) -> Self {
Self { inner }
}
}
impl<T: io::Read> SeqRead for StandardReader<T> {
fn poll_seq_read(
self: Pin<&mut Self>,
_cx: &mut Context,
buf: &mut [u8],
) -> Poll<io::Result<usize>> {
Poll::Ready(unsafe { self.get_unchecked_mut() }.inner.read(buf))
}
}

233
src/format.rs Normal file
View File

@ -0,0 +1,233 @@
//! *pxar* binary format definition
//!
//! Please note the all values are stored in little endian ordering.
//!
//! The Archive contains a list of items. Each item starts with a `Header`, followed by the
//! item data.
use std::cmp::Ordering;
use std::io;
use std::mem::size_of;
use std::path::Path;
use endian_trait::Endian;
use siphasher::sip::SipHasher24;
pub mod acl;
pub const PXAR_ENTRY: u64 = 0x1396fabcea5bbb51;
pub const PXAR_FILENAME: u64 = 0x6dbb6ebcb3161f0b;
pub const PXAR_SYMLINK: u64 = 0x664a6fb6830e0d6c;
pub const PXAR_DEVICE: u64 = 0xac3dace369dfe643;
pub const PXAR_XATTR: u64 = 0xb8157091f80bc486;
pub const PXAR_ACL_USER: u64 = 0x297dc88b2ef12faf;
pub const PXAR_ACL_GROUP: u64 = 0x36f2acb56cb3dd0b;
pub const PXAR_ACL_GROUP_OBJ: u64 = 0x23047110441f38f3;
pub const PXAR_ACL_DEFAULT: u64 = 0xfe3eeda6823c8cd0;
pub const PXAR_ACL_DEFAULT_USER: u64 = 0xbdf03df9bd010a91;
pub const PXAR_ACL_DEFAULT_GROUP: u64 = 0xa0cb1168782d1f51;
pub const PXAR_FCAPS: u64 = 0xf7267db0afed0629;
pub const PXAR_QUOTA_PROJID: u64 = 0x161baf2d8772a72b;
/// Marks item as hardlink
/// compute_goodbye_hash(b"__PROXMOX_FORMAT_HARDLINK__");
pub const PXAR_HARDLINK: u64 = 0x2c5e06f634f65b86;
/// Marks the beginnig of the payload (actual content) of regular files
pub const PXAR_PAYLOAD: u64 = 0x8b9e1d93d6dcffc9;
/// Marks item as entry of goodbye table
pub const PXAR_GOODBYE: u64 = 0xdfd35c5e8327c403;
/// The end marker used in the GOODBYE object
pub const PXAR_GOODBYE_TAIL_MARKER: u64 = 0x57446fa533702943;
#[derive(Debug, Endian)]
#[repr(C)]
pub struct Header {
/// The item type (see `PXAR_` constants).
pub htype: u64,
/// The size of the item, including the size of `Header`.
full_size: u64,
}
impl Header {
#[inline]
pub fn full_size(&self) -> u64 {
self.full_size
}
#[inline]
pub fn content_size(&self) -> u64 {
self.full_size() - (size_of::<Self>() as u64)
}
}
#[derive(Clone, Debug, Default, Endian)]
#[repr(C)]
pub struct Entry {
pub mode: u64,
pub flags: u64,
pub uid: u32,
pub gid: u32,
pub mtime: u64,
}
#[derive(Clone, Debug)]
pub struct Filename {
pub name: Vec<u8>,
}
#[derive(Clone, Debug)]
pub struct Symlink {
pub data: Vec<u8>,
}
#[derive(Clone, Debug)]
pub struct Hardlink {
pub data: Vec<u8>,
}
#[derive(Clone, Debug, Eq)]
#[repr(C)]
pub struct XAttr {
pub(crate) data: Vec<u8>,
pub(crate) name_len: usize,
}
impl XAttr {
pub fn new<N: AsRef<[u8]>, V: AsRef<[u8]>>(name: N, value: V) -> Self {
let name = name.as_ref();
let value = value.as_ref();
let mut data = Vec::with_capacity(name.len() + value.len() + 1);
data.extend(name);
data.push(0);
data.extend(value);
Self {
data,
name_len: name.len(),
}
}
pub fn name(&self) -> &[u8] {
&self.data[..self.name_len]
}
pub fn value(&self) -> &[u8] {
&self.data[(self.name_len + 1)..]
}
}
impl Ord for XAttr {
fn cmp(&self, other: &XAttr) -> Ordering {
self.name().cmp(&other.name())
}
}
impl PartialOrd for XAttr {
fn partial_cmp(&self, other: &XAttr) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl PartialEq for XAttr {
fn eq(&self, other: &XAttr) -> bool {
self.name() == other.name()
}
}
#[derive(Clone, Debug, Endian)]
#[repr(C)]
pub struct Device {
pub major: u64,
pub minor: u64,
}
#[derive(Clone, Debug)]
#[repr(C)]
pub struct FCaps {
pub data: Vec<u8>,
}
#[derive(Clone, Debug, Endian)]
#[repr(C)]
pub struct QuotaProjectId {
pub projid: u64,
}
#[derive(Debug, Endian)]
#[repr(C)]
pub struct GoodbyeItem {
/// SipHash24 of the directory item name. The last GOODBYE item uses the special hash value
/// `PXAR_GOODBYE_TAIL_MARKER`.
pub hash: u64,
/// The offset from the start of the GOODBYE object to the start of the matching directory item
/// (point to a FILENAME). The last GOODBYE item points to the start of the matching ENTRY
/// object.
pub offset: u64,
/// The overall size of the directory item. This includes the FILENAME header. In other words,
/// `goodbye_start - offset + size` points to the end of the directory.
///
/// The last GOODBYE item repeats the size of the GOODBYE item.
pub size: u64,
}
impl GoodbyeItem {
pub fn new(name: &[u8], offset: u64, size: u64) -> Self {
let hash = hash_filename(name);
Self { hash, offset, size }
}
}
pub fn hash_filename(name: &[u8]) -> u64 {
use std::hash::Hasher;
let mut hasher = SipHasher24::new_with_keys(0x8574442b0f1d84b3, 0x2736ed30d1c22ec1);
hasher.write(name);
hasher.finish()
}
/*
pub fn search_binary_tree_array<F, T>(table: &[T], key: &T) -> Option<usize>
where
T: Ord,
F: FnMut(&T) -> std::cmp::Ordering,
{
search_binary_tree_array_by(table, |elem| key.cmp(elem))
}
*/
pub fn search_binary_tree_array_by<F, T>(table: &[T], mut f: F) -> Option<usize>
where
F: FnMut(&T) -> Ordering,
{
let mut i = 0;
while !table.is_empty() {
match f(&table[i]) {
Ordering::Equal => return Some(i),
Ordering::Less => i = 2 * i + 1,
Ordering::Greater => i = 2 * i + 2,
}
if i >= table.len() {
break;
}
}
None
}
pub fn path_is_legal_component(path: &Path) -> bool {
let mut components = path.components();
match components.next() {
Some(std::path::Component::Normal(_)) => (),
_ => return false,
}
components.next().is_none()
}
pub fn check_file_name(path: &Path) -> io::Result<()> {
if !path_is_legal_component(path) {
io_bail!("invalid file name in archive: {:?}", path);
} else {
Ok(())
}
}

94
src/format/acl.rs Normal file
View File

@ -0,0 +1,94 @@
//! ACL related data
use std::cmp::Ordering;
use endian_trait::Endian;
bitflags::bitflags! {
/// ACL permission bits.
#[derive(Endian)]
pub struct Permissions: u64 {
const PXAR_ACL_PERMISSION_READ = 4;
const PXAR_ACL_PERMISSION_WRITE = 2;
const PXAR_ACL_PERMISSION_EXECUTE = 1;
}
}
#[derive(Clone, Debug, Endian, Eq)]
#[repr(C)]
pub struct User {
pub uid: u64,
pub permissions: Permissions,
//pub name: Vec<u64>, not impl for now
}
// TODO if also name is impl, sort by uid, then by name and last by permissions
impl Ord for User {
fn cmp(&self, other: &User) -> Ordering {
match self.uid.cmp(&other.uid) {
// uids are equal, entries ordered by permissions
Ordering::Equal => self.permissions.cmp(&other.permissions),
// uids are different, entries ordered by uid
uid_order => uid_order,
}
}
}
impl PartialOrd for User {
fn partial_cmp(&self, other: &User) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl PartialEq for User {
fn eq(&self, other: &User) -> bool {
self.uid == other.uid && self.permissions == other.permissions
}
}
#[derive(Clone, Debug, Endian, Eq)]
#[repr(C)]
pub struct Group {
pub gid: u64,
pub permissions: Permissions,
//pub name: Vec<u64>, not impl for now
}
// TODO if also name is impl, sort by gid, then by name and last by permissions
impl Ord for Group {
fn cmp(&self, other: &Group) -> Ordering {
match self.gid.cmp(&other.gid) {
// gids are equal, entries are ordered by permissions
Ordering::Equal => self.permissions.cmp(&other.permissions),
// gids are different, entries ordered by gid
gid_ordering => gid_ordering,
}
}
}
impl PartialOrd for Group {
fn partial_cmp(&self, other: &Group) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl PartialEq for Group {
fn eq(&self, other: &Group) -> bool {
self.gid == other.gid && self.permissions == other.permissions
}
}
#[derive(Clone, Debug, Endian)]
#[repr(C)]
pub struct GroupObject {
pub permissions: Permissions,
}
#[derive(Clone, Debug, Endian)]
#[repr(C)]
pub struct Default {
pub user_obj_permissions: Permissions,
pub group_obj_permissions: Permissions,
pub other_permissions: Permissions,
pub mask_permissions: Permissions,
}

218
src/lib.rs Normal file
View File

@ -0,0 +1,218 @@
//! Proxmox backup archive format handling.
//!
//! This implements a reader and writer for the proxmox archive format (.pxar).
use std::ffi::OsStr;
use std::mem;
use std::os::unix::ffi::OsStrExt;
use std::path::{Path, PathBuf};
#[macro_use]
mod macros;
pub mod format;
pub(crate) mod util;
mod poll_fn;
pub mod accessor;
pub mod decoder;
/// File metadata found in pxar archives.
///
/// This includes the usual data you'd get from `stat()` as well as ACLs, extended attributes, file
/// capabilities and more.
#[derive(Clone, Debug, Default)]
pub struct Metadata {
/// Data typically found in a `stat()` call.
pub stat: format::Entry,
/// Extended attributes.
pub xattrs: Vec<format::XAttr>,
/// ACLs.
pub acl: Acl,
/// File capabilities.
pub fcaps: Option<format::FCaps>,
/// Quota project id.
pub quota_project_id: Option<format::QuotaProjectId>,
}
/// ACL entries of a pxar archive.
///
/// This contains all the various ACL entry types supported by the pxar archive format.
#[derive(Clone, Debug, Default)]
pub struct Acl {
/// User ACL list.
pub users: Vec<format::acl::User>,
/// Group ACL list.
pub groups: Vec<format::acl::Group>,
/// Group object ACL entry.
pub group_obj: Option<format::acl::GroupObject>,
/// Default permissions.
pub default: Option<format::acl::Default>,
/// Default user permissions.
pub default_users: Vec<format::acl::User>,
/// Default group permissions.
pub default_groups: Vec<format::acl::Group>,
}
/// Pxar archive entry kind.
///
/// Identifies whether the entry is a file, symlink, directory, etc.
#[derive(Clone, Debug)]
pub enum EntryKind {
/// Symbolic links.
Symlink(format::Symlink),
/// Hard links, relative to the root of the current archive.
Hardlink(format::Hardlink),
/// Device node.
Device(format::Device),
/// Regular file.
File { size: u64 },
/// Directory entry. When iterating through an archive, the contents follow next.
Directory,
/// End of a directory. This is for internal use to remember the goodbye-table of a directory
/// entry. Will not occur during normal iteration.
EndOfDirectory,
}
/// A pxar archive entry. This contains the current path, file metadata and entry type specific
/// information.
#[derive(Clone, Debug)]
pub struct Entry {
path: PathBuf,
metadata: Metadata,
kind: EntryKind,
offset: Option<u64>,
}
/// General accessors.
impl Entry {
/// Clear everything except for the path.
fn clear_data(&mut self) {
self.metadata = Metadata::default();
self.kind = EntryKind::EndOfDirectory;
self.offset = None;
}
fn internal_default() -> Self {
Self {
path: PathBuf::default(),
metadata: Metadata::default(),
kind: EntryKind::EndOfDirectory,
offset: None,
}
}
fn take(&mut self) -> Self {
let this = mem::replace(self, Self::internal_default());
self.path = this.path.clone();
this
}
/// If the underlying I/O implementation supports seeking, this will be filled with the start
/// offset of this entry, allowing one to jump back to this entry later on.
#[inline]
pub fn offset(&self) -> Option<u64> {
self.offset
}
/// Get the full path of this file within the current pxar directory structure.
#[inline]
pub fn path(&self) -> &Path {
&self.path
}
/// Convenience method to get just the file name portion of the current path.
#[inline]
pub fn file_name(&self) -> &OsStr {
self.path.file_name().unwrap_or(OsStr::new(""))
}
/// Get the file metadata.
#[inline]
pub fn metadata(&self) -> &Metadata {
&self.metadata
}
/// Get the value of the symbolic link if it is one.
pub fn get_symlink(&self) -> Option<&OsStr> {
match &self.kind {
EntryKind::Symlink(link) => Some(OsStr::from_bytes(&link.data)),
_ => None,
}
}
/// Get the value of the hard link if it is one.
pub fn get_hardlink(&self) -> Option<&OsStr> {
match &self.kind {
EntryKind::Hardlink(link) => Some(OsStr::from_bytes(&link.data)),
_ => None,
}
}
/// Get the value of the device node if it is one.
pub fn get_device(&self) -> Option<format::Device> {
match &self.kind {
EntryKind::Device(dev) => Some(dev.clone()),
_ => None,
}
}
}
/// Convenience helpers.
impl Entry {
/// Check whether this is a directory.
pub fn is_dir(&self) -> bool {
match self.kind {
EntryKind::Directory { .. } => true,
_ => false,
}
}
/// Check whether this is a symbolic link.
pub fn is_symlink(&self) -> bool {
match self.kind {
EntryKind::Symlink(_) => true,
_ => false,
}
}
/// Check whether this is a hard link.
pub fn is_hardlink(&self) -> bool {
match self.kind {
EntryKind::Hardlink(_) => true,
_ => false,
}
}
/// Check whether this is a device node.
pub fn is_device(&self) -> bool {
match self.kind {
EntryKind::Device(_) => true,
_ => false,
}
}
/// Check whether this is a regular file.
pub fn is_regular_file(&self) -> bool {
match self.kind {
EntryKind::File { .. } => true,
_ => false,
}
}
}

23
src/macros.rs Normal file
View File

@ -0,0 +1,23 @@
/// Like failure's `format_err` but producing a `std::io::Error`.
macro_rules! io_format_err {
($($msg:tt)+) => {
::std::io::Error::new(::std::io::ErrorKind::Other, format!($($msg)+))
};
}
/// Like failure's `bail` but producing a `std::io::Error`.
macro_rules! io_bail {
($($msg:tt)+) => {{
return Err(io_format_err!($($msg)+));
}};
}
/// Our dependency on `futures` is optional.
macro_rules! ready {
($expr:expr) => {{
match $expr {
std::task::Poll::Ready(r) => r,
std::task::Poll::Pending => return std::task::Poll::Pending,
}
}};
}

39
src/poll_fn.rs Normal file
View File

@ -0,0 +1,39 @@
//! `poll_fn` reimplementation as it is otherwise the only thing we need from the futures crate.
//!
//! Our `futures` crate dependency is optional.
use std::future::Future;
use std::pin::Pin;
use std::task::{Context, Poll};
pub struct PollFn<F> {
func: Option<F>,
}
pub fn poll_fn<F, R>(func: F) -> PollFn<F>
where
F: FnMut(&mut Context) -> Poll<R>,
{
PollFn { func: Some(func) }
}
impl<F, R> Future for PollFn<F>
where
F: FnMut(&mut Context) -> Poll<R>,
{
type Output = R;
fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
let this = unsafe { self.get_unchecked_mut() };
match &mut this.func {
None => panic!("poll() after Ready"),
Some(func) => {
let res = func(cx);
if res.is_ready() {
this.func = None;
}
res
}
}
}
}

115
src/util.rs Normal file
View File

@ -0,0 +1,115 @@
#![allow(dead_code)]
use std::future::Future;
use std::io;
use std::pin::Pin;
use std::task::{Context, Poll};
// from /usr/include/linux/magic.h
// and from casync util.h
#[rustfmt::skip]
#[allow(clippy::unreadable_literal)]
mod consts {
pub const BINFMTFS_MAGIC : i64 = 0x42494e4d;
pub const CGROUP2_SUPER_MAGIC : i64 = 0x63677270;
pub const CGROUP_SUPER_MAGIC : i64 = 0x0027e0eb;
pub const CONFIGFS_MAGIC : i64 = 0x62656570;
pub const DEBUGFS_MAGIC : i64 = 0x64626720;
pub const DEVPTS_SUPER_MAGIC : i64 = 0x00001cd1;
pub const EFIVARFS_MAGIC : i64 = 0xde5e81e4;
pub const FUSE_CTL_SUPER_MAGIC: i64 = 0x65735543;
pub const HUGETLBFS_MAGIC : i64 = 0x958458f6;
pub const MQUEUE_MAGIC : i64 = 0x19800202;
pub const NFSD_MAGIC : i64 = 0x6e667364;
pub const PROC_SUPER_MAGIC : i64 = 0x00009fa0;
pub const PSTOREFS_MAGIC : i64 = 0x6165676C;
pub const RPCAUTH_GSSMAGIC : i64 = 0x67596969;
pub const SECURITYFS_MAGIC : i64 = 0x73636673;
pub const SELINUX_MAGIC : i64 = 0xf97cff8c;
pub const SMACK_MAGIC : i64 = 0x43415d53;
pub const RAMFS_MAGIC : i64 = 0x858458f6;
pub const TMPFS_MAGIC : i64 = 0x01021994;
pub const SYSFS_MAGIC : i64 = 0x62656572;
pub const MSDOS_SUPER_MAGIC : i64 = 0x00004d44;
pub const BTRFS_SUPER_MAGIC : i64 = 0x9123683E;
pub const FUSE_SUPER_MAGIC : i64 = 0x65735546;
pub const EXT4_SUPER_MAGIC : i64 = 0x0000EF53;
pub const XFS_SUPER_MAGIC : i64 = 0x58465342;
pub const ZFS_SUPER_MAGIC : i64 = 0x2FC12FC1;
}
pub fn is_virtual_file_system(magic: i64) -> bool {
match magic {
consts::BINFMTFS_MAGIC
| consts::CGROUP2_SUPER_MAGIC
| consts::CGROUP_SUPER_MAGIC
| consts::CONFIGFS_MAGIC
| consts::DEBUGFS_MAGIC
| consts::DEVPTS_SUPER_MAGIC
| consts::EFIVARFS_MAGIC
| consts::FUSE_CTL_SUPER_MAGIC
| consts::HUGETLBFS_MAGIC
| consts::MQUEUE_MAGIC
| consts::NFSD_MAGIC
| consts::PROC_SUPER_MAGIC
| consts::PSTOREFS_MAGIC
| consts::RPCAUTH_GSSMAGIC
| consts::SECURITYFS_MAGIC
| consts::SELINUX_MAGIC
| consts::SMACK_MAGIC
| consts::SYSFS_MAGIC => true,
_ => false,
}
}
/// Helper function to extract file names from binary archive.
pub fn read_os_string(buffer: &[u8]) -> std::ffi::OsString {
use std::os::unix::ffi::OsStrExt;
std::ffi::OsStr::from_bytes(if buffer.ends_with(&[0]) {
&buffer[..(buffer.len() - 1)]
} else {
buffer
})
.into()
}
#[inline]
pub fn vec_new(size: usize) -> Vec<u8> {
let mut data = Vec::with_capacity(size);
unsafe {
data.set_len(size);
}
data
}
pub fn io_err_other<E: std::fmt::Display>(err: E) -> io::Error {
io::Error::new(io::ErrorKind::Other, err.to_string())
}
pub fn poll_result_once<T, R>(mut fut: T) -> io::Result<R>
where
T: Future<Output = io::Result<R>>,
{
let waker = std::task::RawWaker::new(std::ptr::null(), &WAKER_VTABLE);
let waker = unsafe { std::task::Waker::from_raw(waker) };
let mut cx = Context::from_waker(&waker);
unsafe {
match Pin::new_unchecked(&mut fut).poll(&mut cx) {
Poll::Pending => Err(io_err_other("got Poll::Pending synchronous context")),
Poll::Ready(r) => r,
}
}
}
const WAKER_VTABLE: std::task::RawWakerVTable =
std::task::RawWakerVTable::new(forbid_clone, forbid_wake, forbid_wake, ignore_drop);
unsafe fn forbid_clone(_: *const ()) -> std::task::RawWaker {
panic!("tried to clone waker for synchronous task");
}
unsafe fn forbid_wake(_: *const ()) {
panic!("tried to wake synchronous task");
}
unsafe fn ignore_drop(_: *const ()) {}