refactor: remove size validation example

This commit is contained in:
Joonas Koivunen 2020-06-05 15:04:55 +03:00
parent 62563ba384
commit 88f0e4ae90

View File

@ -382,242 +382,6 @@ mod tests {
assert_eq!(combined, b"foobar\n");
}
#[test]
fn traversal_from_blockstore_with_size_validation() {
let started_at = Instant::now();
use std::path::PathBuf;
struct FakeShardedBlockstore {
root: PathBuf,
}
impl FakeShardedBlockstore {
fn as_path(&self, key: &[u8]) -> PathBuf {
let encoded = multibase::Base::Base32Upper.encode(key);
let len = encoded.len();
// this is safe because base32 is ascii
let dir = &encoded[(len - 3)..(len - 1)];
assert_eq!(dir.len(), 2);
let mut path = self.root.clone();
path.push(dir);
path.push(encoded);
path.set_extension("data");
path
}
fn as_file(&self, key: &[u8]) -> std::io::Result<std::fs::File> {
// assume that we have a block store with second-to-last/2 sharding
// files in Base32Upper
let path = self.as_path(key);
//println!("{} -> {:?}", cid::Cid::try_from(key).unwrap(), path);
std::fs::OpenOptions::new().read(true).open(path)
}
}
/// Debug wrapper for a slice which is expected to have a lot of the same numbers, like an
/// dense storage for merkledag size validation, in which case T = u64.
struct RLE<'a, T: fmt::Display + PartialEq>(&'a [T]);
impl<'a, T: fmt::Display + PartialEq> fmt::Debug for RLE<'_, T> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
let total = self.0.len();
write!(fmt, "{{ total: {}, rle: [", total)?;
let mut last = None;
let mut count = 0;
for c in self.0 {
match last {
Some(x) if x == c => count += 1,
Some(x) => {
if count > 1 {
write!(fmt, "{} x {}, ", count, x)?;
} else {
write!(fmt, "{}, ", x)?;
}
last = Some(c);
count = 1;
}
None => {
last = Some(c);
count = 1;
}
}
}
if let Some(x) = last {
if count > 1 {
write!(fmt, "{} x {}, ", count, x)?;
} else {
write!(fmt, "{}, ", x)?;
}
}
write!(fmt, "] }}")
}
}
// this depends on go-ipfs 0.5 flatfs blockstore and linux-5.6.14.tar.xz imported
let blocks = FakeShardedBlockstore {
root: PathBuf::from("/home/joonas/Programs/go-ipfs/ipfs_home/blocks"),
};
#[derive(Default)]
struct TreeSizeTracker(Vec<u64>);
impl TreeSizeTracker {
/// Returns usize "key" for this tree
fn start_tracking(&mut self, expected_size: u64) -> usize {
let key = self.0.len();
self.0.push(expected_size);
key
}
fn visit_size(&mut self, key: usize, actual_size: u64) {
assert!(
self.0[key] >= actual_size,
"tree size exhausted, failed: {} >= {}",
self.0[key],
actual_size
);
self.0[key] -= actual_size;
let mut index = key;
// this sort of works by isn't actually generational indices which would be safer.
// the indexing still does work ok as this "cleaning" routine is done only after
// completly processing the subtree.
while self.0[index] == 0 && index + 1 == self.0.len() {
self.0.pop();
if index > 0 {
index -= 1;
} else {
break;
}
}
}
fn complete(self) {
assert!(
self.0.iter().all(|&x| x == 0),
"some trees were not exhausted: {:?}",
RLE(&self.0)
);
}
}
//let start = "QmTEn8ypAkbJXZUXCRHBorwF2jM8uTUW9yRLzrcQouSoD4"; // balanced
let start = "Qmf1crhdrQEsVUjvmnSF3Q5PHc825MaHZ5cPhtVS2eJ1p4"; // trickle
let start = cid::Cid::try_from(start).unwrap().to_bytes();
let mut hasher = sha2::Sha256::new();
let mut work_hwm = 0;
let mut buf_hwm = 0;
let mut bytes = (0, 0);
let mut work: Vec<(_, Option<u64>, _, _)> = Vec::new();
work.push((start, None, None, None));
let mut state: Option<Traversal> = None;
let mut block_buffer = Vec::new();
// This is used to do "size validation" for the tree.
let mut tree_validator = TreeSizeTracker::default();
while let Some((key, size, mut range, size_index)) = work.pop() {
// println!("{:?}", RLE(tree_sizes.as_slice()));
block_buffer.clear();
blocks
.as_file(&key)
.unwrap()
.read_to_end(&mut block_buffer)
.unwrap();
buf_hwm = buf_hwm.max(block_buffer.len());
if let Some(size) = size {
// the size on PBLink is the size of the subtree, so it's a bit tricky to validate
// I guess it could be given as nested items to further "scope it down". this is
// per tree.
//
// with generational indices the zeroes could be removed here and be done with it
tree_validator.visit_size(size_index.unwrap(), size);
}
let slice = &block_buffer[..];
bytes.0 += slice.len();
let reader = match state.take() {
Some(t) => t.continue_walk(slice, &range.take().unwrap()),
None => FileReader::from_block(slice),
};
let reader = reader.unwrap_or_else(|e| {
panic!("failed to start or continue from {:02x?}: {:?}", key, e)
});
let (content, traversal) = reader.content();
state = Some(traversal);
match content {
FileContent::Just(content) => {
bytes.1 += content.len();
hasher.input(content);
}
FileContent::Spread(iter) => {
let mapped = iter.map(|(link, range)| {
assert_eq!(link.Name.as_deref(), Some(""));
let hash = link.Hash.unwrap_borrowed().to_vec();
let size = link.Tsize.unwrap_or(0);
let index = tree_validator.start_tracking(size);
(hash, Some(size), Some(range), Some(index))
});
let before = work.len();
work.extend(mapped);
// not using a vecdeque does make this a bit more difficult to read but the DFS
// order is going to the first child first, which needs to be the last entry in
// vec when using pop
(&mut work[before..]).reverse();
}
}
work_hwm = work_hwm.max(work.len());
}
let elapsed = started_at.elapsed();
println!("{:?}", elapsed);
println!("{:?}", bytes);
tree_validator.complete();
let result = hasher.result();
assert_eq!(
&result[..],
hex!("33763f3541711e39fa743da45ff9512d54ade61406173f3d267ba4484cec7ea3")
);
assert_eq!(work_hwm, 176);
assert_eq!(buf_hwm, 262158);
}
fn collect_bytes(blocks: &FakeBlockstore, visit: IdleFileVisit, start: &str) -> Vec<u8> {
let mut ret = Vec::new();