From c13df75243bcbee9fad9c403312cad4a0ac40d92 Mon Sep 17 00:00:00 2001 From: Christian Ebner Date: Wed, 12 Jun 2024 15:17:12 +0200 Subject: [PATCH] accessor: adapt and restrict contents access Add checks for split variant inputs when accessing the payload contents via the accessor instance. Both cases, accessing via the safe `contents` method and via the previousely unsafe `open_contents_at_range` call are covered. Reduce possible misuse by wrapping the current plain content range into an opaque `ContentRange` type with an additional optional payload reference field to check consistency between the payload reference encoded in the metadata archive and the payload header' found in the payload data archive. Because of the additional type wrapping and the payload header check, the `open_contents_at_range` is considered safe now, dropping the previously unsafe implementation. The corresponding interfaces have been adapted accordingly. Signed-off-by: Christian Ebner --- src/accessor/aio.rs | 16 ++++++----- src/accessor/mod.rs | 68 ++++++++++++++++++++++++++++++++------------ src/accessor/sync.rs | 16 ++++++----- 3 files changed, 68 insertions(+), 32 deletions(-) diff --git a/src/accessor/aio.rs b/src/accessor/aio.rs index 73b1025..eb89f8f 100644 --- a/src/accessor/aio.rs +++ b/src/accessor/aio.rs @@ -7,14 +7,13 @@ use std::future::Future; use std::io; use std::mem; -use std::ops::Range; use std::os::unix::fs::FileExt; use std::path::Path; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use crate::accessor::{self, cache::Cache, MaybeReady, ReadAt, ReadAtOperation}; +use crate::accessor::{self, cache::Cache, ContentRange, MaybeReady, ReadAt, ReadAtOperation}; use crate::decoder::aio::Decoder; use crate::format::GoodbyeItem; use crate::util; @@ -153,13 +152,16 @@ impl Accessor { /// /// This will provide a reader over an arbitrary range of the archive file, so unless this /// comes from a actual file entry data, the contents might not make much sense. - pub unsafe fn open_contents_at_range(&self, range: Range) -> FileContents { - FileContents { - inner: unsafe { self.inner.open_contents_at_range(range) }, + pub async fn open_contents_at_range( + &self, + range: &ContentRange, + ) -> io::Result> { + Ok(FileContents { + inner: self.inner.open_contents_at_range(range).await?, at: 0, buffer: Vec::new(), future: None, - } + }) } /// Following a hardlink. @@ -235,7 +237,7 @@ impl FileEntry { } /// For use with unsafe accessor methods. - pub fn content_range(&self) -> io::Result>> { + pub fn content_range(&self) -> io::Result> { self.inner.content_range() } diff --git a/src/accessor/mod.rs b/src/accessor/mod.rs index 92d689d..48605eb 100644 --- a/src/accessor/mod.rs +++ b/src/accessor/mod.rs @@ -17,7 +17,7 @@ use endian_trait::Endian; use crate::binary_tree_array; use crate::decoder::{self, DecoderImpl}; -use crate::format::{self, FormatVersion, GoodbyeItem}; +use crate::format::{self, FormatVersion, GoodbyeItem, PayloadRef}; use crate::util; use crate::{Entry, EntryKind, PxarVariant}; @@ -54,6 +54,16 @@ impl EntryRangeInfo { } } +/// Stores a content range to be accessed via the `Accessor` as well as the payload reference to +/// perform consistency checks on payload references for archives accessed via split variant input. +#[derive(Clone)] +pub struct ContentRange { + // Range of the content + content: Range, + // Optional payload ref + payload_ref: Option, +} + /// awaitable version of `ReadAt`. async fn read_at(input: &T, buf: &mut [u8], offset: u64) -> io::Result where @@ -335,13 +345,12 @@ impl AccessorImpl { }) } - /// Allow opening arbitrary contents from a specific range. - pub unsafe fn open_contents_at_range(&self, range: Range) -> FileContentsImpl { - if let Some((payload_input, _)) = &self.input.payload() { - FileContentsImpl::new(payload_input.clone(), range) - } else { - FileContentsImpl::new(self.input.archive().clone(), range) - } + /// Open contents at provided range + pub async fn open_contents_at_range( + &self, + range: &ContentRange, + ) -> io::Result> { + FileContentsImpl::new(&self.input, range).await } /// Following a hardlink breaks a couple of conventions we otherwise have, particularly we will @@ -758,7 +767,7 @@ impl FileEntryImpl { } /// For use with unsafe accessor methods. - pub fn content_range(&self) -> io::Result>> { + pub fn content_range(&self) -> io::Result> { match self.entry.kind { EntryKind::File { offset: None, .. } => { io_bail!("cannot open file, reader provided no offset") @@ -767,7 +776,10 @@ impl FileEntryImpl { size, offset: Some(offset), payload_offset: None, - } => Ok(Some(offset..(offset + size))), + } => Ok(Some(ContentRange { + content: offset..(offset + size), + payload_ref: None, + })), // Payload offset beats regular offset if some EntryKind::File { size, @@ -775,7 +787,13 @@ impl FileEntryImpl { payload_offset: Some(payload_offset), } => { let start_offset = payload_offset + size_of::() as u64; - Ok(Some(start_offset..start_offset + size)) + Ok(Some(ContentRange { + content: start_offset..start_offset + size, + payload_ref: Some(PayloadRef { + offset: payload_offset, + size, + }), + })) } _ => Ok(None), } @@ -785,11 +803,8 @@ impl FileEntryImpl { let range = self .content_range()? .ok_or_else(|| io_format_err!("not a file"))?; - if let Some((ref payload_input, _)) = self.input.payload() { - Ok(FileContentsImpl::new(payload_input.clone(), range)) - } else { - Ok(FileContentsImpl::new(self.input.archive().clone(), range)) - } + + FileContentsImpl::new(&self.input, &range).await } #[inline] @@ -897,8 +912,25 @@ pub(crate) struct FileContentsImpl { } impl FileContentsImpl { - pub fn new(input: T, range: Range) -> Self { - Self { input, range } + async fn new( + input: &PxarVariant)>, + range: &ContentRange, + ) -> io::Result { + let (input, range) = if let Some((payload_input, payload_range)) = input.payload() { + if let Some(payload_ref) = &range.payload_ref { + let header: format::Header = + read_entry_at(payload_input, payload_ref.offset).await?; + format::check_payload_header_and_size(&header, payload_ref.size)?; + } + if payload_range.start > range.content.start || payload_range.end < range.content.end { + io_bail!("out of range access for payload"); + } + (payload_input.clone(), range.content.clone()) + } else { + (input.archive().clone(), range.content.clone()) + }; + + Ok(Self { input, range }) } #[inline] diff --git a/src/accessor/sync.rs b/src/accessor/sync.rs index df2ed23..76e8c03 100644 --- a/src/accessor/sync.rs +++ b/src/accessor/sync.rs @@ -1,14 +1,13 @@ //! Blocking `pxar` random access handling. use std::io; -use std::ops::Range; use std::os::unix::fs::FileExt; use std::path::Path; use std::pin::Pin; use std::sync::Arc; use std::task::Context; -use crate::accessor::{self, cache::Cache, MaybeReady, ReadAt, ReadAtOperation}; +use crate::accessor::{self, cache::Cache, ContentRange, MaybeReady, ReadAt, ReadAtOperation}; use crate::decoder::Decoder; use crate::format::GoodbyeItem; use crate::util::poll_result_once; @@ -142,11 +141,14 @@ impl Accessor { /// /// This will provide a reader over an arbitrary range of the archive file, so unless this /// comes from a actual file entry data, the contents might not make much sense. - pub unsafe fn open_contents_at_range(&self, range: Range) -> FileContents { - FileContents { - inner: unsafe { self.inner.open_contents_at_range(range) }, + pub unsafe fn open_contents_at_range( + &self, + range: &ContentRange, + ) -> io::Result> { + Ok(FileContents { + inner: poll_result_once(self.inner.open_contents_at_range(range))?, at: 0, - } + }) } /// Following a hardlink. @@ -291,7 +293,7 @@ impl FileEntry { } /// For use with unsafe accessor methods. - pub fn content_range(&self) -> io::Result>> { + pub fn content_range(&self) -> io::Result> { self.inner.content_range() }