Merge #272
272: Split Entry into two different enums to remove unnecessary expects r=koivunej a=c410-f3r Second take on #200 `Entry` is now composed of `Bucket(...)` and `Metadata(MetadataEntry { ... })` to separate things that have and doesn't have metadata, which avoids returning optional values derived from a single entry-point. Next PR will address the newly introduced double matching while iterating over `continue_walk` but it will require deeper logical changes. Co-authored-by: Caio <c410.f3r@gmail.com>
This commit is contained in:
commit
c6b68cc0fa
@ -6,7 +6,7 @@ use async_stream::try_stream;
|
||||
use bytes::Bytes;
|
||||
use cid::{Cid, Codec};
|
||||
use futures::stream::TryStream;
|
||||
use ipfs::unixfs::ll::walk::{self, ContinuedWalk, Walker};
|
||||
use ipfs::unixfs::ll::walk::{self, ContinuedWalk, Entry, MetadataEntry, Walker};
|
||||
use ipfs::unixfs::{ll::file::FileReadFailed, TraversalFailed};
|
||||
use ipfs::Block;
|
||||
use ipfs::{Ipfs, IpfsTypes};
|
||||
@ -159,75 +159,63 @@ fn walk<Types: IpfsTypes>(
|
||||
|
||||
visit = match walker.continue_walk(&data, &mut cache)? {
|
||||
ContinuedWalk::File(segment, item) => {
|
||||
let total_size = item.as_entry()
|
||||
.total_file_size()
|
||||
.expect("files do have total_size");
|
||||
if let Entry::Metadata(MetadataEntry::File(.., p, md, size)) = item.as_entry() {
|
||||
if segment.is_first() {
|
||||
for mut bytes in tar_helper.apply_file(p, md, size)?.iter_mut() {
|
||||
if let Some(bytes) = bytes.take() {
|
||||
yield bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if segment.is_first() {
|
||||
let path = item.as_entry().path();
|
||||
let metadata = item
|
||||
.as_entry()
|
||||
.metadata()
|
||||
.expect("files must have metadata");
|
||||
// even if the largest of files can have 256 kB blocks and about the same
|
||||
// amount of content, try to consume it in small parts not to grow the buffers
|
||||
// too much.
|
||||
|
||||
for mut bytes in tar_helper.apply_file(path, metadata, total_size)?.iter_mut() {
|
||||
if let Some(bytes) = bytes.take() {
|
||||
yield bytes;
|
||||
let mut n = 0usize;
|
||||
let slice = segment.as_ref();
|
||||
let total = slice.len();
|
||||
|
||||
while n < total {
|
||||
let next = tar_helper.buffer_file_contents(&slice[n..]);
|
||||
n += next.len();
|
||||
yield next;
|
||||
}
|
||||
|
||||
if segment.is_last() {
|
||||
if let Some(zeroes) = tar_helper.pad(size) {
|
||||
yield zeroes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// even if the largest of files can have 256 kB blocks and about the same
|
||||
// amount of content, try to consume it in small parts not to grow the buffers
|
||||
// too much.
|
||||
|
||||
let mut n = 0usize;
|
||||
let slice = segment.as_ref();
|
||||
let total = slice.len();
|
||||
|
||||
while n < total {
|
||||
let next = tar_helper.buffer_file_contents(&slice[n..]);
|
||||
n += next.len();
|
||||
yield next;
|
||||
}
|
||||
|
||||
if segment.is_last() {
|
||||
if let Some(zeroes) = tar_helper.pad(total_size) {
|
||||
yield zeroes;
|
||||
}
|
||||
}
|
||||
|
||||
item.into_inner()
|
||||
},
|
||||
ContinuedWalk::Directory(item) => {
|
||||
|
||||
// only first instances of directories will have the metadata
|
||||
if let Some(metadata) = item.as_entry().metadata() {
|
||||
let path = item.as_entry().path();
|
||||
|
||||
if let Entry::Metadata(metadata_entry) = item.as_entry() {
|
||||
let metadata = metadata_entry.metadata();
|
||||
let path = metadata_entry.path();
|
||||
for mut bytes in tar_helper.apply_directory(path, metadata)?.iter_mut() {
|
||||
if let Some(bytes) = bytes.take() {
|
||||
yield bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
item.into_inner()
|
||||
},
|
||||
ContinuedWalk::Symlink(bytes, item) => {
|
||||
if let Entry::Metadata(metadata_entry) = item.as_entry() {
|
||||
// converting a symlink is the most tricky part
|
||||
let path = metadata_entry.path();
|
||||
let target = std::str::from_utf8(bytes).map_err(|_| GetError::NonUtf8Symlink)?;
|
||||
let target = Path::new(target);
|
||||
let metadata = metadata_entry.metadata();
|
||||
|
||||
// converting a symlink is the most tricky part
|
||||
let path = item.as_entry().path();
|
||||
let target = std::str::from_utf8(bytes).map_err(|_| GetError::NonUtf8Symlink)?;
|
||||
let target = Path::new(target);
|
||||
let metadata = item.as_entry().metadata().expect("symlink must have metadata");
|
||||
|
||||
for mut bytes in tar_helper.apply_symlink(path, target, metadata)?.iter_mut() {
|
||||
if let Some(bytes) = bytes.take() {
|
||||
yield bytes;
|
||||
for mut bytes in tar_helper.apply_symlink(path, target, metadata)?.iter_mut() {
|
||||
if let Some(bytes) = bytes.take() {
|
||||
yield bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
item.into_inner()
|
||||
},
|
||||
};
|
||||
|
@ -52,7 +52,7 @@ fn main() {
|
||||
}
|
||||
|
||||
fn walk(blocks: ShardedBlockStore, start: &Cid) -> Result<(), Error> {
|
||||
use ipfs_unixfs::walk::{ContinuedWalk, Walker};
|
||||
use ipfs_unixfs::walk::{ContinuedWalk, Entry, MetadataEntry, Walker};
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut cache = None;
|
||||
@ -72,56 +72,45 @@ fn walk(blocks: ShardedBlockStore, start: &Cid) -> Result<(), Error> {
|
||||
// items.
|
||||
visit = match walker.continue_walk(&buf, &mut cache)? {
|
||||
ContinuedWalk::File(segment, item) => {
|
||||
let entry = item.as_entry();
|
||||
let total_size = entry.total_file_size().expect("all files have total size");
|
||||
// metadata is picked up from the root file and carried until the last block
|
||||
let metadata = entry.metadata().expect("all files have metadata");
|
||||
if let Entry::Metadata(MetadataEntry::File(.., path, md, size)) = item.as_entry() {
|
||||
if segment.is_first() {
|
||||
// this is set on the root block, no actual bytes are present for multiblock
|
||||
// files
|
||||
}
|
||||
|
||||
if segment.is_first() {
|
||||
// this is set on the root block, no actual bytes are present for multiblock
|
||||
// files
|
||||
if segment.is_last() {
|
||||
let mode = md.mode().unwrap_or(0o0644) & 0o7777;
|
||||
let (seconds, _) = md.mtime().unwrap_or((0, 0));
|
||||
println!("f {:o} {:>12} {:>16} {:?}", mode, seconds, size, path);
|
||||
}
|
||||
}
|
||||
|
||||
if segment.is_last() {
|
||||
let path = entry.path();
|
||||
let mode = metadata.mode().unwrap_or(0o0644) & 0o7777;
|
||||
let (seconds, _) = metadata.mtime().unwrap_or((0, 0));
|
||||
|
||||
println!("f {:o} {:>12} {:>16} {:?}", mode, seconds, total_size, path);
|
||||
}
|
||||
|
||||
// continue the walk
|
||||
item.into_inner()
|
||||
}
|
||||
ContinuedWalk::Directory(item) => {
|
||||
// presense of metadata can be used to determine if this is the first apperiance of
|
||||
// a directory by looking at the metadata: sibling hamt shard buckets do not have
|
||||
// metadata.
|
||||
if let Some(metadata) = item.as_entry().metadata() {
|
||||
if let Entry::Metadata(metadata_entry) = item.as_entry() {
|
||||
let metadata = metadata_entry.metadata();
|
||||
let path = item.as_entry().path();
|
||||
|
||||
let mode = metadata.mode().unwrap_or(0o0755) & 0o7777;
|
||||
let (seconds, _) = metadata.mtime().unwrap_or((0, 0));
|
||||
|
||||
println!("d {:o} {:>12} {:>16} {:?}", mode, seconds, "-", path);
|
||||
}
|
||||
|
||||
item.into_inner()
|
||||
}
|
||||
ContinuedWalk::Symlink(bytes, item) => {
|
||||
let entry = item.as_entry();
|
||||
let metadata = entry.metadata().expect("symlink must have metadata");
|
||||
|
||||
let path = entry.path();
|
||||
let target = Path::new(std::str::from_utf8(bytes).unwrap());
|
||||
let mode = metadata.mode().unwrap_or(0o0755) & 0o7777;
|
||||
let (seconds, _) = metadata.mtime().unwrap_or((0, 0));
|
||||
|
||||
println!(
|
||||
"s {:o} {:>12} {:>16} {:?} -> {:?}",
|
||||
mode, seconds, "-", path, target
|
||||
);
|
||||
|
||||
if let Entry::Metadata(metadata_entry) = item.as_entry() {
|
||||
let metadata = metadata_entry.metadata();
|
||||
let path = metadata_entry.path();
|
||||
let target = Path::new(std::str::from_utf8(bytes).unwrap());
|
||||
let mode = metadata.mode().unwrap_or(0o0755) & 0o7777;
|
||||
let (seconds, _) = metadata.mtime().unwrap_or((0, 0));
|
||||
println!(
|
||||
"s {:o} {:>12} {:>16} {:?} -> {:?}",
|
||||
mode, seconds, "-", path, target
|
||||
);
|
||||
}
|
||||
item.into_inner()
|
||||
}
|
||||
};
|
||||
|
@ -10,6 +10,43 @@ use std::convert::TryFrom;
|
||||
use std::fmt;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
/// Representation of the current item of Walker or the last observed item with medatada.
|
||||
#[derive(Debug)]
|
||||
pub enum MetadataEntry<'a> {
|
||||
/// Current item is a non-root plain Directory or a HAMTShard directory.
|
||||
Directory(&'a Cid, &'a Path, &'a Metadata),
|
||||
/// Current item is possibly a root file with a path, metadata, and a total file size.
|
||||
File(&'a Cid, &'a Path, &'a Metadata, u64),
|
||||
/// Current item is the root directory (HAMTShard or plain Directory).
|
||||
RootDirectory(&'a Cid, &'a Path, &'a Metadata),
|
||||
/// Current item is possibly a root symlink.
|
||||
Symlink(&'a Cid, &'a Path, &'a Metadata),
|
||||
}
|
||||
|
||||
impl MetadataEntry<'_> {
|
||||
/// Returns the metadata for the latest entry. It exists for initial directory entries, files,
|
||||
/// and symlinks but not for continued HamtShards.
|
||||
pub fn metadata(&self) -> &'_ Metadata {
|
||||
match self {
|
||||
Self::Directory(_, _, m)
|
||||
| Self::File(_, _, m, ..)
|
||||
| Self::RootDirectory(_, _, m)
|
||||
| Self::Symlink(_, _, m) => m,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the path for the latest entry. This is created from a UTF-8 string and, as such, is always
|
||||
/// representable on all supported platforms.
|
||||
pub fn path(&self) -> &'_ Path {
|
||||
match self {
|
||||
MetadataEntry::Directory(_, p, ..)
|
||||
| MetadataEntry::File(_, p, ..)
|
||||
| MetadataEntry::RootDirectory(_, p, ..)
|
||||
| MetadataEntry::Symlink(_, p, ..) => p,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `Walker` helps with walking a UnixFS tree, including all of the content and files. It is created with
|
||||
/// `Walker::new` and walked over each block with `Walker::continue_block`. Use
|
||||
/// `Walker::pending_links` to obtain the next [`Cid`] to be loaded and the prefetchable links.
|
||||
@ -185,7 +222,6 @@ impl Walker {
|
||||
|
||||
// replacing this with try_fold takes as many lines as the R: Try<Ok = B> cannot be
|
||||
// deduced without specifying the Error
|
||||
|
||||
for link in links {
|
||||
pending.push(link?);
|
||||
}
|
||||
@ -355,12 +391,12 @@ impl fmt::Debug for InnerKind {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use InnerKind::*;
|
||||
match self {
|
||||
RootDirectory(cid) => write!(fmt, "RootDirectory({})", cid),
|
||||
BucketAtRoot(cid) => write!(fmt, "BucketAtRoot({})", cid),
|
||||
RootBucket(cid) => write!(fmt, "RootBucket({})", cid),
|
||||
Bucket(cid) => write!(fmt, "Bucket({})", cid),
|
||||
BucketAtRoot(cid) => write!(fmt, "BucketAtRoot({})", cid),
|
||||
Directory(cid) => write!(fmt, "Directory({})", cid),
|
||||
File(cid, _, sz) => write!(fmt, "File({}, _, {})", cid, sz),
|
||||
RootBucket(cid) => write!(fmt, "RootBucket({})", cid),
|
||||
RootDirectory(cid) => write!(fmt, "RootDirectory({})", cid),
|
||||
Symlink(cid) => write!(fmt, "Symlink({})", cid),
|
||||
}
|
||||
}
|
||||
@ -369,63 +405,34 @@ impl fmt::Debug for InnerKind {
|
||||
/// Representation of the current item of Walker or the last observed item.
|
||||
#[derive(Debug)]
|
||||
pub enum Entry<'a> {
|
||||
/// Current item is the root directory (HAMTShard or plain Directory).
|
||||
RootDirectory(&'a Cid, &'a Path, &'a Metadata),
|
||||
/// Current item is a continuation of a HAMTShard directory. Only the root HAMTShard will have
|
||||
/// file metadata.
|
||||
Bucket(&'a Cid, &'a Path),
|
||||
/// Current item is a non-root plain Directory or a HAMTShard directory.
|
||||
Directory(&'a Cid, &'a Path, &'a Metadata),
|
||||
/// Current item is possibly a root file with a path, metadata, and a total file size.
|
||||
File(&'a Cid, &'a Path, &'a Metadata, u64),
|
||||
/// Current item is possibly a root symlink.
|
||||
Symlink(&'a Cid, &'a Path, &'a Metadata),
|
||||
/// All items that have metadata.
|
||||
Metadata(MetadataEntry<'a>),
|
||||
}
|
||||
|
||||
impl<'a> Entry<'a> {
|
||||
/// Returns the Cid for the latest entry.
|
||||
pub fn cid(&self) -> &Cid {
|
||||
match self {
|
||||
Self::Bucket(cid, ..) => cid,
|
||||
Self::Metadata(MetadataEntry::Directory(cid, ..))
|
||||
| Self::Metadata(MetadataEntry::File(cid, ..))
|
||||
| Self::Metadata(MetadataEntry::RootDirectory(cid, ..))
|
||||
| Self::Metadata(MetadataEntry::Symlink(cid, ..)) => cid,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the path for the latest entry. This is created from a UTF-8 string and, as such, is always
|
||||
/// representable on all supported platforms.
|
||||
pub fn path(&self) -> &'a Path {
|
||||
use Entry::*;
|
||||
match self {
|
||||
RootDirectory(_, p, _)
|
||||
| Bucket(_, p)
|
||||
| Directory(_, p, _)
|
||||
| File(_, p, _, _)
|
||||
| Symlink(_, p, _) => p,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the metadata for the latest entry. It exists for initial directory entries, files,
|
||||
/// and symlinks but not for continued HamtShards.
|
||||
pub fn metadata(&self) -> Option<&'a Metadata> {
|
||||
use Entry::*;
|
||||
match self {
|
||||
Bucket(_, _) => None,
|
||||
RootDirectory(_, _, m) | Directory(_, _, m) | File(_, _, m, _) | Symlink(_, _, m) => {
|
||||
Some(m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the total size of the file this entry represents, or `None` if not a file.
|
||||
pub fn total_file_size(&self) -> Option<u64> {
|
||||
use Entry::*;
|
||||
match self {
|
||||
File(_, _, _, sz) => Some(*sz),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the Cid for the latest entry.
|
||||
pub fn cid(&self) -> &Cid {
|
||||
use Entry::*;
|
||||
match self {
|
||||
RootDirectory(cid, _, _)
|
||||
| Bucket(cid, _)
|
||||
| Directory(cid, _, _)
|
||||
| File(cid, _, _, _)
|
||||
| Symlink(cid, _, _) => cid,
|
||||
Self::Bucket(_, p) => p,
|
||||
Self::Metadata(MetadataEntry::Directory(_, p, ..))
|
||||
| Self::Metadata(MetadataEntry::File(_, p, ..))
|
||||
| Self::Metadata(MetadataEntry::RootDirectory(_, p, ..))
|
||||
| Self::Metadata(MetadataEntry::Symlink(_, p, ..)) => p,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -483,16 +490,23 @@ impl InnerEntry {
|
||||
}
|
||||
|
||||
pub fn as_entry(&self) -> Entry<'_> {
|
||||
use InnerKind::*;
|
||||
match &self.kind {
|
||||
RootDirectory(cid) | BucketAtRoot(cid) => {
|
||||
Entry::RootDirectory(cid, &self.path, &self.metadata)
|
||||
InnerKind::Bucket(cid) => Entry::Bucket(cid, &self.path),
|
||||
InnerKind::Directory(cid) => {
|
||||
Entry::Metadata(MetadataEntry::Directory(cid, &self.path, &self.metadata))
|
||||
}
|
||||
InnerKind::File(cid, _, sz) => {
|
||||
Entry::Metadata(MetadataEntry::File(cid, &self.path, &self.metadata, *sz))
|
||||
}
|
||||
InnerKind::RootBucket(cid) => {
|
||||
Entry::Metadata(MetadataEntry::Directory(cid, &self.path, &self.metadata))
|
||||
}
|
||||
InnerKind::RootDirectory(cid) | InnerKind::BucketAtRoot(cid) => Entry::Metadata(
|
||||
MetadataEntry::RootDirectory(cid, &self.path, &self.metadata),
|
||||
),
|
||||
InnerKind::Symlink(cid) => {
|
||||
Entry::Metadata(MetadataEntry::Symlink(cid, &self.path, &self.metadata))
|
||||
}
|
||||
RootBucket(cid) => Entry::Directory(cid, &self.path, &self.metadata),
|
||||
Bucket(cid) => Entry::Bucket(cid, &self.path),
|
||||
Directory(cid) => Entry::Directory(cid, &self.path, &self.metadata),
|
||||
File(cid, _, sz) => Entry::File(cid, &self.path, &self.metadata, *sz),
|
||||
Symlink(cid) => Entry::Symlink(cid, &self.path, &self.metadata),
|
||||
}
|
||||
}
|
||||
|
||||
@ -609,12 +623,12 @@ impl InnerEntry {
|
||||
fn as_symlink(&mut self, cid: Cid, name: &str, depth: usize, metadata: Metadata) {
|
||||
use InnerKind::*;
|
||||
match self.kind {
|
||||
RootDirectory(_)
|
||||
Bucket(_)
|
||||
| BucketAtRoot(_)
|
||||
| RootBucket(_)
|
||||
| Bucket(_)
|
||||
| Directory(_)
|
||||
| File(_, None, _)
|
||||
| RootBucket(_)
|
||||
| RootDirectory(_)
|
||||
| Symlink(_) => {
|
||||
self.kind = Symlink(cid);
|
||||
self.set_path(name, depth);
|
||||
|
Loading…
x
Reference in New Issue
Block a user