272: Split Entry into two different enums to remove unnecessary expects r=koivunej a=c410-f3r

Second take on #200 

`Entry` is now composed of `Bucket(...)` and `Metadata(MetadataEntry { ... })` to separate things that have and doesn't have metadata, which avoids returning optional values derived from a single entry-point.

Next PR will address the newly introduced double matching while iterating over `continue_walk` but it will require deeper logical changes.

Co-authored-by: Caio <c410.f3r@gmail.com>
This commit is contained in:
bors[bot] 2020-07-31 12:39:16 +00:00 committed by GitHub
commit c6b68cc0fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 138 additions and 147 deletions

View File

@ -6,7 +6,7 @@ use async_stream::try_stream;
use bytes::Bytes;
use cid::{Cid, Codec};
use futures::stream::TryStream;
use ipfs::unixfs::ll::walk::{self, ContinuedWalk, Walker};
use ipfs::unixfs::ll::walk::{self, ContinuedWalk, Entry, MetadataEntry, Walker};
use ipfs::unixfs::{ll::file::FileReadFailed, TraversalFailed};
use ipfs::Block;
use ipfs::{Ipfs, IpfsTypes};
@ -159,75 +159,63 @@ fn walk<Types: IpfsTypes>(
visit = match walker.continue_walk(&data, &mut cache)? {
ContinuedWalk::File(segment, item) => {
let total_size = item.as_entry()
.total_file_size()
.expect("files do have total_size");
if let Entry::Metadata(MetadataEntry::File(.., p, md, size)) = item.as_entry() {
if segment.is_first() {
for mut bytes in tar_helper.apply_file(p, md, size)?.iter_mut() {
if let Some(bytes) = bytes.take() {
yield bytes;
}
}
}
if segment.is_first() {
let path = item.as_entry().path();
let metadata = item
.as_entry()
.metadata()
.expect("files must have metadata");
// even if the largest of files can have 256 kB blocks and about the same
// amount of content, try to consume it in small parts not to grow the buffers
// too much.
for mut bytes in tar_helper.apply_file(path, metadata, total_size)?.iter_mut() {
if let Some(bytes) = bytes.take() {
yield bytes;
let mut n = 0usize;
let slice = segment.as_ref();
let total = slice.len();
while n < total {
let next = tar_helper.buffer_file_contents(&slice[n..]);
n += next.len();
yield next;
}
if segment.is_last() {
if let Some(zeroes) = tar_helper.pad(size) {
yield zeroes;
}
}
}
// even if the largest of files can have 256 kB blocks and about the same
// amount of content, try to consume it in small parts not to grow the buffers
// too much.
let mut n = 0usize;
let slice = segment.as_ref();
let total = slice.len();
while n < total {
let next = tar_helper.buffer_file_contents(&slice[n..]);
n += next.len();
yield next;
}
if segment.is_last() {
if let Some(zeroes) = tar_helper.pad(total_size) {
yield zeroes;
}
}
item.into_inner()
},
ContinuedWalk::Directory(item) => {
// only first instances of directories will have the metadata
if let Some(metadata) = item.as_entry().metadata() {
let path = item.as_entry().path();
if let Entry::Metadata(metadata_entry) = item.as_entry() {
let metadata = metadata_entry.metadata();
let path = metadata_entry.path();
for mut bytes in tar_helper.apply_directory(path, metadata)?.iter_mut() {
if let Some(bytes) = bytes.take() {
yield bytes;
}
}
}
item.into_inner()
},
ContinuedWalk::Symlink(bytes, item) => {
if let Entry::Metadata(metadata_entry) = item.as_entry() {
// converting a symlink is the most tricky part
let path = metadata_entry.path();
let target = std::str::from_utf8(bytes).map_err(|_| GetError::NonUtf8Symlink)?;
let target = Path::new(target);
let metadata = metadata_entry.metadata();
// converting a symlink is the most tricky part
let path = item.as_entry().path();
let target = std::str::from_utf8(bytes).map_err(|_| GetError::NonUtf8Symlink)?;
let target = Path::new(target);
let metadata = item.as_entry().metadata().expect("symlink must have metadata");
for mut bytes in tar_helper.apply_symlink(path, target, metadata)?.iter_mut() {
if let Some(bytes) = bytes.take() {
yield bytes;
for mut bytes in tar_helper.apply_symlink(path, target, metadata)?.iter_mut() {
if let Some(bytes) = bytes.take() {
yield bytes;
}
}
}
item.into_inner()
},
};

View File

@ -52,7 +52,7 @@ fn main() {
}
fn walk(blocks: ShardedBlockStore, start: &Cid) -> Result<(), Error> {
use ipfs_unixfs::walk::{ContinuedWalk, Walker};
use ipfs_unixfs::walk::{ContinuedWalk, Entry, MetadataEntry, Walker};
let mut buf = Vec::new();
let mut cache = None;
@ -72,56 +72,45 @@ fn walk(blocks: ShardedBlockStore, start: &Cid) -> Result<(), Error> {
// items.
visit = match walker.continue_walk(&buf, &mut cache)? {
ContinuedWalk::File(segment, item) => {
let entry = item.as_entry();
let total_size = entry.total_file_size().expect("all files have total size");
// metadata is picked up from the root file and carried until the last block
let metadata = entry.metadata().expect("all files have metadata");
if let Entry::Metadata(MetadataEntry::File(.., path, md, size)) = item.as_entry() {
if segment.is_first() {
// this is set on the root block, no actual bytes are present for multiblock
// files
}
if segment.is_first() {
// this is set on the root block, no actual bytes are present for multiblock
// files
if segment.is_last() {
let mode = md.mode().unwrap_or(0o0644) & 0o7777;
let (seconds, _) = md.mtime().unwrap_or((0, 0));
println!("f {:o} {:>12} {:>16} {:?}", mode, seconds, size, path);
}
}
if segment.is_last() {
let path = entry.path();
let mode = metadata.mode().unwrap_or(0o0644) & 0o7777;
let (seconds, _) = metadata.mtime().unwrap_or((0, 0));
println!("f {:o} {:>12} {:>16} {:?}", mode, seconds, total_size, path);
}
// continue the walk
item.into_inner()
}
ContinuedWalk::Directory(item) => {
// presense of metadata can be used to determine if this is the first apperiance of
// a directory by looking at the metadata: sibling hamt shard buckets do not have
// metadata.
if let Some(metadata) = item.as_entry().metadata() {
if let Entry::Metadata(metadata_entry) = item.as_entry() {
let metadata = metadata_entry.metadata();
let path = item.as_entry().path();
let mode = metadata.mode().unwrap_or(0o0755) & 0o7777;
let (seconds, _) = metadata.mtime().unwrap_or((0, 0));
println!("d {:o} {:>12} {:>16} {:?}", mode, seconds, "-", path);
}
item.into_inner()
}
ContinuedWalk::Symlink(bytes, item) => {
let entry = item.as_entry();
let metadata = entry.metadata().expect("symlink must have metadata");
let path = entry.path();
let target = Path::new(std::str::from_utf8(bytes).unwrap());
let mode = metadata.mode().unwrap_or(0o0755) & 0o7777;
let (seconds, _) = metadata.mtime().unwrap_or((0, 0));
println!(
"s {:o} {:>12} {:>16} {:?} -> {:?}",
mode, seconds, "-", path, target
);
if let Entry::Metadata(metadata_entry) = item.as_entry() {
let metadata = metadata_entry.metadata();
let path = metadata_entry.path();
let target = Path::new(std::str::from_utf8(bytes).unwrap());
let mode = metadata.mode().unwrap_or(0o0755) & 0o7777;
let (seconds, _) = metadata.mtime().unwrap_or((0, 0));
println!(
"s {:o} {:>12} {:>16} {:?} -> {:?}",
mode, seconds, "-", path, target
);
}
item.into_inner()
}
};

View File

@ -10,6 +10,43 @@ use std::convert::TryFrom;
use std::fmt;
use std::path::{Path, PathBuf};
/// Representation of the current item of Walker or the last observed item with medatada.
#[derive(Debug)]
pub enum MetadataEntry<'a> {
/// Current item is a non-root plain Directory or a HAMTShard directory.
Directory(&'a Cid, &'a Path, &'a Metadata),
/// Current item is possibly a root file with a path, metadata, and a total file size.
File(&'a Cid, &'a Path, &'a Metadata, u64),
/// Current item is the root directory (HAMTShard or plain Directory).
RootDirectory(&'a Cid, &'a Path, &'a Metadata),
/// Current item is possibly a root symlink.
Symlink(&'a Cid, &'a Path, &'a Metadata),
}
impl MetadataEntry<'_> {
/// Returns the metadata for the latest entry. It exists for initial directory entries, files,
/// and symlinks but not for continued HamtShards.
pub fn metadata(&self) -> &'_ Metadata {
match self {
Self::Directory(_, _, m)
| Self::File(_, _, m, ..)
| Self::RootDirectory(_, _, m)
| Self::Symlink(_, _, m) => m,
}
}
/// Returns the path for the latest entry. This is created from a UTF-8 string and, as such, is always
/// representable on all supported platforms.
pub fn path(&self) -> &'_ Path {
match self {
MetadataEntry::Directory(_, p, ..)
| MetadataEntry::File(_, p, ..)
| MetadataEntry::RootDirectory(_, p, ..)
| MetadataEntry::Symlink(_, p, ..) => p,
}
}
}
/// `Walker` helps with walking a UnixFS tree, including all of the content and files. It is created with
/// `Walker::new` and walked over each block with `Walker::continue_block`. Use
/// `Walker::pending_links` to obtain the next [`Cid`] to be loaded and the prefetchable links.
@ -185,7 +222,6 @@ impl Walker {
// replacing this with try_fold takes as many lines as the R: Try<Ok = B> cannot be
// deduced without specifying the Error
for link in links {
pending.push(link?);
}
@ -355,12 +391,12 @@ impl fmt::Debug for InnerKind {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
use InnerKind::*;
match self {
RootDirectory(cid) => write!(fmt, "RootDirectory({})", cid),
BucketAtRoot(cid) => write!(fmt, "BucketAtRoot({})", cid),
RootBucket(cid) => write!(fmt, "RootBucket({})", cid),
Bucket(cid) => write!(fmt, "Bucket({})", cid),
BucketAtRoot(cid) => write!(fmt, "BucketAtRoot({})", cid),
Directory(cid) => write!(fmt, "Directory({})", cid),
File(cid, _, sz) => write!(fmt, "File({}, _, {})", cid, sz),
RootBucket(cid) => write!(fmt, "RootBucket({})", cid),
RootDirectory(cid) => write!(fmt, "RootDirectory({})", cid),
Symlink(cid) => write!(fmt, "Symlink({})", cid),
}
}
@ -369,63 +405,34 @@ impl fmt::Debug for InnerKind {
/// Representation of the current item of Walker or the last observed item.
#[derive(Debug)]
pub enum Entry<'a> {
/// Current item is the root directory (HAMTShard or plain Directory).
RootDirectory(&'a Cid, &'a Path, &'a Metadata),
/// Current item is a continuation of a HAMTShard directory. Only the root HAMTShard will have
/// file metadata.
Bucket(&'a Cid, &'a Path),
/// Current item is a non-root plain Directory or a HAMTShard directory.
Directory(&'a Cid, &'a Path, &'a Metadata),
/// Current item is possibly a root file with a path, metadata, and a total file size.
File(&'a Cid, &'a Path, &'a Metadata, u64),
/// Current item is possibly a root symlink.
Symlink(&'a Cid, &'a Path, &'a Metadata),
/// All items that have metadata.
Metadata(MetadataEntry<'a>),
}
impl<'a> Entry<'a> {
/// Returns the Cid for the latest entry.
pub fn cid(&self) -> &Cid {
match self {
Self::Bucket(cid, ..) => cid,
Self::Metadata(MetadataEntry::Directory(cid, ..))
| Self::Metadata(MetadataEntry::File(cid, ..))
| Self::Metadata(MetadataEntry::RootDirectory(cid, ..))
| Self::Metadata(MetadataEntry::Symlink(cid, ..)) => cid,
}
}
/// Returns the path for the latest entry. This is created from a UTF-8 string and, as such, is always
/// representable on all supported platforms.
pub fn path(&self) -> &'a Path {
use Entry::*;
match self {
RootDirectory(_, p, _)
| Bucket(_, p)
| Directory(_, p, _)
| File(_, p, _, _)
| Symlink(_, p, _) => p,
}
}
/// Returns the metadata for the latest entry. It exists for initial directory entries, files,
/// and symlinks but not for continued HamtShards.
pub fn metadata(&self) -> Option<&'a Metadata> {
use Entry::*;
match self {
Bucket(_, _) => None,
RootDirectory(_, _, m) | Directory(_, _, m) | File(_, _, m, _) | Symlink(_, _, m) => {
Some(m)
}
}
}
/// Returns the total size of the file this entry represents, or `None` if not a file.
pub fn total_file_size(&self) -> Option<u64> {
use Entry::*;
match self {
File(_, _, _, sz) => Some(*sz),
_ => None,
}
}
/// Returns the Cid for the latest entry.
pub fn cid(&self) -> &Cid {
use Entry::*;
match self {
RootDirectory(cid, _, _)
| Bucket(cid, _)
| Directory(cid, _, _)
| File(cid, _, _, _)
| Symlink(cid, _, _) => cid,
Self::Bucket(_, p) => p,
Self::Metadata(MetadataEntry::Directory(_, p, ..))
| Self::Metadata(MetadataEntry::File(_, p, ..))
| Self::Metadata(MetadataEntry::RootDirectory(_, p, ..))
| Self::Metadata(MetadataEntry::Symlink(_, p, ..)) => p,
}
}
}
@ -483,16 +490,23 @@ impl InnerEntry {
}
pub fn as_entry(&self) -> Entry<'_> {
use InnerKind::*;
match &self.kind {
RootDirectory(cid) | BucketAtRoot(cid) => {
Entry::RootDirectory(cid, &self.path, &self.metadata)
InnerKind::Bucket(cid) => Entry::Bucket(cid, &self.path),
InnerKind::Directory(cid) => {
Entry::Metadata(MetadataEntry::Directory(cid, &self.path, &self.metadata))
}
InnerKind::File(cid, _, sz) => {
Entry::Metadata(MetadataEntry::File(cid, &self.path, &self.metadata, *sz))
}
InnerKind::RootBucket(cid) => {
Entry::Metadata(MetadataEntry::Directory(cid, &self.path, &self.metadata))
}
InnerKind::RootDirectory(cid) | InnerKind::BucketAtRoot(cid) => Entry::Metadata(
MetadataEntry::RootDirectory(cid, &self.path, &self.metadata),
),
InnerKind::Symlink(cid) => {
Entry::Metadata(MetadataEntry::Symlink(cid, &self.path, &self.metadata))
}
RootBucket(cid) => Entry::Directory(cid, &self.path, &self.metadata),
Bucket(cid) => Entry::Bucket(cid, &self.path),
Directory(cid) => Entry::Directory(cid, &self.path, &self.metadata),
File(cid, _, sz) => Entry::File(cid, &self.path, &self.metadata, *sz),
Symlink(cid) => Entry::Symlink(cid, &self.path, &self.metadata),
}
}
@ -609,12 +623,12 @@ impl InnerEntry {
fn as_symlink(&mut self, cid: Cid, name: &str, depth: usize, metadata: Metadata) {
use InnerKind::*;
match self.kind {
RootDirectory(_)
Bucket(_)
| BucketAtRoot(_)
| RootBucket(_)
| Bucket(_)
| Directory(_)
| File(_, None, _)
| RootBucket(_)
| RootDirectory(_)
| Symlink(_) => {
self.kind = Symlink(cid);
self.set_path(name, depth);