Incremental parsing in CLI

Reparses files in the CLI incrementally and also uses the file modification timestamp to completely skip reparsing if possible.
This commit is contained in:
Laurenz 2023-09-19 15:03:43 +02:00
parent 13758b9c97
commit bb59f0e2b2
6 changed files with 177 additions and 52 deletions

1
Cargo.lock generated
View File

@ -2760,6 +2760,7 @@ dependencies = [
"dirs",
"ecow",
"env_proxy",
"filetime",
"flate2",
"inferno",
"memmap2",

View File

@ -29,6 +29,7 @@ comemo = "0.3"
ecow = "0.1.1"
dirs = "5"
flate2 = "1"
filetime = "0.2"
inferno = "0.11.15"
memmap2 = "0.7"
notify = "6"

View File

@ -75,8 +75,7 @@ pub fn compile_once(
Status::Compiling.print(command).unwrap();
}
// Reset everything and ensure that the main file is present.
world.reset();
// Ensure that the main file is present.
world.source(world.main()).map_err(|err| err.to_string())?;
let mut tracer = Tracer::new();

View File

@ -70,6 +70,9 @@ pub fn watch(mut command: CompileCommand) -> StrResult<()> {
.map(ToOwned::to_owned)
.collect();
// Reset all dependencies.
world.reset();
// Recompile.
compile_once(&mut world, &mut command, true)?;
comemo::evict(10);

View File

@ -1,4 +1,4 @@
use std::cell::{OnceCell, RefCell, RefMut};
use std::cell::{Cell, OnceCell, RefCell, RefMut};
use std::collections::HashMap;
use std::fs;
use std::hash::Hash;
@ -6,6 +6,7 @@ use std::path::{Path, PathBuf};
use chrono::{DateTime, Datelike, Local};
use comemo::Prehashed;
use filetime::FileTime;
use same_file::Handle;
use siphasher::sip128::{Hasher128, SipHasher13};
use typst::diag::{FileError, FileResult, StrResult};
@ -37,7 +38,7 @@ pub struct SystemWorld {
/// be used in conjunction with `paths`.
hashes: RefCell<HashMap<FileId, FileResult<PathHash>>>,
/// Maps canonical path hashes to source files and buffers.
paths: RefCell<HashMap<PathHash, PathSlot>>,
slots: RefCell<HashMap<PathHash, PathSlot>>,
/// The current datetime if requested. This is stored here to ensure it is
/// always the same within one compilation. Reset between compilations.
now: OnceCell<DateTime<Local>>,
@ -78,7 +79,7 @@ impl SystemWorld {
book: Prehashed::new(searcher.book),
fonts: searcher.fonts,
hashes: RefCell::default(),
paths: RefCell::default(),
slots: RefCell::default(),
now: OnceCell::new(),
})
}
@ -100,13 +101,19 @@ impl SystemWorld {
/// Return all paths the last compilation depended on.
pub fn dependencies(&mut self) -> impl Iterator<Item = &Path> {
self.paths.get_mut().values().map(|slot| slot.system_path.as_path())
self.slots
.get_mut()
.values()
.filter(|slot| slot.accessed())
.map(|slot| slot.path.as_path())
}
/// Reset the compilation state in preparation of a new compilation.
pub fn reset(&mut self) {
self.hashes.borrow_mut().clear();
self.paths.borrow_mut().clear();
for slot in self.slots.borrow_mut().values_mut() {
slot.reset();
}
self.now.take();
}
@ -185,15 +192,8 @@ impl SystemWorld {
})
.clone()?;
Ok(RefMut::map(self.paths.borrow_mut(), |paths| {
paths.entry(hash).or_insert_with(|| PathSlot {
id,
// This will only trigger if the `or_insert_with` above also
// triggered.
system_path,
source: OnceCell::new(),
buffer: OnceCell::new(),
})
Ok(RefMut::map(self.slots.borrow_mut(), |paths| {
paths.entry(hash).or_insert_with(|| PathSlot::new(id, system_path))
}))
}
}
@ -205,28 +205,110 @@ struct PathSlot {
/// The slot's canonical file id.
id: FileId,
/// The slot's path on the system.
system_path: PathBuf,
/// The lazily loaded source file for a path hash.
source: OnceCell<FileResult<Source>>,
/// The lazily loaded buffer for a path hash.
buffer: OnceCell<FileResult<Bytes>>,
path: PathBuf,
/// The lazily loaded and incrementally updated source file.
source: SlotCell<Source>,
/// The lazily loaded raw byte buffer.
file: SlotCell<Bytes>,
}
impl PathSlot {
fn source(&self) -> FileResult<Source> {
self.source
.get_or_init(|| {
let buf = read(&self.system_path)?;
let text = decode_utf8(buf)?;
Ok(Source::new(self.id, text))
})
.clone()
/// Create a new path slot.
fn new(id: FileId, path: PathBuf) -> Self {
Self {
id,
path,
file: SlotCell::new(),
source: SlotCell::new(),
}
}
/// Whether the file was accessed in the ongoing compilation.
fn accessed(&self) -> bool {
self.source.accessed() || self.file.accessed()
}
/// Marks the file as not yet accessed in preparation of the next
/// compilation.
fn reset(&self) {
self.source.reset();
self.file.reset();
}
/// Retrieve the source for this file.
fn source(&self) -> FileResult<Source> {
self.source.get_or_init(&self.path, |data, prev| {
let text = decode_utf8(&data)?;
if let Some(mut prev) = prev {
prev.replace(text);
Ok(prev)
} else {
Ok(Source::new(self.id, text.into()))
}
})
}
/// Retrieve the file's bytes.
fn file(&self) -> FileResult<Bytes> {
self.buffer
.get_or_init(|| read(&self.system_path).map(Bytes::from))
.clone()
self.file.get_or_init(&self.path, |data, _| Ok(data.into()))
}
}
/// Lazily processes data for a file.
struct SlotCell<T> {
data: RefCell<Option<FileResult<T>>>,
refreshed: Cell<FileTime>,
accessed: Cell<bool>,
}
impl<T: Clone> SlotCell<T> {
/// Creates a new, empty cell.
fn new() -> Self {
Self {
data: RefCell::new(None),
refreshed: Cell::new(FileTime::zero()),
accessed: Cell::new(false),
}
}
/// Whether the cell was accessed in the ongoing compilation.
fn accessed(&self) -> bool {
self.accessed.get()
}
/// Marks the cell as not yet accessed in preparation of the next
/// compilation.
fn reset(&self) {
self.accessed.set(false);
}
/// Gets the contents of the cell or initialize them.
fn get_or_init(
&self,
path: &Path,
f: impl FnOnce(Vec<u8>, Option<T>) -> FileResult<T>,
) -> FileResult<T> {
let mut borrow = self.data.borrow_mut();
if let Some(data) = &*borrow {
if self.accessed.replace(true) || self.current(path) {
return data.clone();
}
}
self.accessed.set(true);
self.refreshed.set(FileTime::now());
let prev = borrow.take().and_then(Result::ok);
let value = read(path).and_then(|data| f(data, prev));
*borrow = Some(value.clone());
value
}
/// Whether the cell contents are still up to date with the file system.
fn current(&self, path: &Path) -> bool {
fs::metadata(path).map_or(false, |meta| {
let modified = FileTime::from_last_modification_time(&meta);
modified < self.refreshed.get()
})
}
}
@ -255,12 +337,7 @@ fn read(path: &Path) -> FileResult<Vec<u8>> {
}
/// Decode UTF-8 with an optional BOM.
fn decode_utf8(buf: Vec<u8>) -> FileResult<String> {
Ok(if buf.starts_with(b"\xef\xbb\xbf") {
// Remove UTF-8 BOM.
std::str::from_utf8(&buf[3..])?.into()
} else {
// Assume UTF-8.
String::from_utf8(buf)?
})
fn decode_utf8(buf: &[u8]) -> FileResult<&str> {
// Remove UTF-8 BOM.
Ok(std::str::from_utf8(buf.strip_prefix(b"\xef\xbb\xbf").unwrap_or(buf))?)
}

View File

@ -69,13 +69,47 @@ impl Source {
}
/// Fully replace the source text.
pub fn replace(&mut self, text: String) {
let inner = Arc::make_mut(&mut self.0);
inner.text = Prehashed::new(text);
inner.lines = lines(&inner.text);
let mut root = parse(&inner.text);
root.numberize(inner.id, Span::FULL).unwrap();
inner.root = Prehashed::new(root);
///
/// This performs a naive (suffix/prefix-based) diff of the old and new text
/// to produce the smallest single edit that transforms old into new and
/// then calls [`edit`](Self::edit) with it.
///
/// Returns the range in the new source that was ultimately reparsed.
pub fn replace(&mut self, new: &str) -> Range<usize> {
let old = self.text();
let mut prefix = old
.as_bytes()
.iter()
.zip(new.as_bytes())
.take_while(|(x, y)| x == y)
.count();
if prefix == old.len() && prefix == new.len() {
return 0..0;
}
while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
prefix -= 1;
}
let mut suffix = old[prefix..]
.as_bytes()
.iter()
.zip(new[prefix..].as_bytes())
.rev()
.take_while(|(x, y)| x == y)
.count();
while !old.is_char_boundary(old.len() - suffix)
|| !new.is_char_boundary(new.len() - suffix)
{
suffix += 1;
}
let replace = prefix..old.len() - suffix;
let with = &new[prefix..new.len() - suffix];
self.edit(replace, with)
}
/// Edit the source file by replacing the given range.
@ -382,11 +416,21 @@ mod tests {
// tested separately.
#[track_caller]
fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
let mut source = Source::detached(prev);
let result = Source::detached(after);
source.edit(range, with);
assert_eq!(source.text(), result.text());
assert_eq!(source.0.lines, result.0.lines);
let reference = Source::detached(after);
let mut edited = Source::detached(prev);
edited.edit(range.clone(), with);
assert_eq!(edited.text(), reference.text());
assert_eq!(edited.0.lines, reference.0.lines);
let mut replaced = Source::detached(prev);
replaced.replace(&{
let mut s = prev.to_string();
s.replace_range(range, with);
s
});
assert_eq!(replaced.text(), reference.text());
assert_eq!(replaced.0.lines, reference.0.lines);
}
// Test inserting at the beginning.