Incremental parsing in CLI
Reparses files in the CLI incrementally and also uses the file modification timestamp to completely skip reparsing if possible.
This commit is contained in:
parent
13758b9c97
commit
bb59f0e2b2
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -2760,6 +2760,7 @@ dependencies = [
|
||||
"dirs",
|
||||
"ecow",
|
||||
"env_proxy",
|
||||
"filetime",
|
||||
"flate2",
|
||||
"inferno",
|
||||
"memmap2",
|
||||
|
@ -29,6 +29,7 @@ comemo = "0.3"
|
||||
ecow = "0.1.1"
|
||||
dirs = "5"
|
||||
flate2 = "1"
|
||||
filetime = "0.2"
|
||||
inferno = "0.11.15"
|
||||
memmap2 = "0.7"
|
||||
notify = "6"
|
||||
|
@ -75,8 +75,7 @@ pub fn compile_once(
|
||||
Status::Compiling.print(command).unwrap();
|
||||
}
|
||||
|
||||
// Reset everything and ensure that the main file is present.
|
||||
world.reset();
|
||||
// Ensure that the main file is present.
|
||||
world.source(world.main()).map_err(|err| err.to_string())?;
|
||||
|
||||
let mut tracer = Tracer::new();
|
||||
|
@ -70,6 +70,9 @@ pub fn watch(mut command: CompileCommand) -> StrResult<()> {
|
||||
.map(ToOwned::to_owned)
|
||||
.collect();
|
||||
|
||||
// Reset all dependencies.
|
||||
world.reset();
|
||||
|
||||
// Recompile.
|
||||
compile_once(&mut world, &mut command, true)?;
|
||||
comemo::evict(10);
|
||||
|
@ -1,4 +1,4 @@
|
||||
use std::cell::{OnceCell, RefCell, RefMut};
|
||||
use std::cell::{Cell, OnceCell, RefCell, RefMut};
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::hash::Hash;
|
||||
@ -6,6 +6,7 @@ use std::path::{Path, PathBuf};
|
||||
|
||||
use chrono::{DateTime, Datelike, Local};
|
||||
use comemo::Prehashed;
|
||||
use filetime::FileTime;
|
||||
use same_file::Handle;
|
||||
use siphasher::sip128::{Hasher128, SipHasher13};
|
||||
use typst::diag::{FileError, FileResult, StrResult};
|
||||
@ -37,7 +38,7 @@ pub struct SystemWorld {
|
||||
/// be used in conjunction with `paths`.
|
||||
hashes: RefCell<HashMap<FileId, FileResult<PathHash>>>,
|
||||
/// Maps canonical path hashes to source files and buffers.
|
||||
paths: RefCell<HashMap<PathHash, PathSlot>>,
|
||||
slots: RefCell<HashMap<PathHash, PathSlot>>,
|
||||
/// The current datetime if requested. This is stored here to ensure it is
|
||||
/// always the same within one compilation. Reset between compilations.
|
||||
now: OnceCell<DateTime<Local>>,
|
||||
@ -78,7 +79,7 @@ impl SystemWorld {
|
||||
book: Prehashed::new(searcher.book),
|
||||
fonts: searcher.fonts,
|
||||
hashes: RefCell::default(),
|
||||
paths: RefCell::default(),
|
||||
slots: RefCell::default(),
|
||||
now: OnceCell::new(),
|
||||
})
|
||||
}
|
||||
@ -100,13 +101,19 @@ impl SystemWorld {
|
||||
|
||||
/// Return all paths the last compilation depended on.
|
||||
pub fn dependencies(&mut self) -> impl Iterator<Item = &Path> {
|
||||
self.paths.get_mut().values().map(|slot| slot.system_path.as_path())
|
||||
self.slots
|
||||
.get_mut()
|
||||
.values()
|
||||
.filter(|slot| slot.accessed())
|
||||
.map(|slot| slot.path.as_path())
|
||||
}
|
||||
|
||||
/// Reset the compilation state in preparation of a new compilation.
|
||||
pub fn reset(&mut self) {
|
||||
self.hashes.borrow_mut().clear();
|
||||
self.paths.borrow_mut().clear();
|
||||
for slot in self.slots.borrow_mut().values_mut() {
|
||||
slot.reset();
|
||||
}
|
||||
self.now.take();
|
||||
}
|
||||
|
||||
@ -185,15 +192,8 @@ impl SystemWorld {
|
||||
})
|
||||
.clone()?;
|
||||
|
||||
Ok(RefMut::map(self.paths.borrow_mut(), |paths| {
|
||||
paths.entry(hash).or_insert_with(|| PathSlot {
|
||||
id,
|
||||
// This will only trigger if the `or_insert_with` above also
|
||||
// triggered.
|
||||
system_path,
|
||||
source: OnceCell::new(),
|
||||
buffer: OnceCell::new(),
|
||||
})
|
||||
Ok(RefMut::map(self.slots.borrow_mut(), |paths| {
|
||||
paths.entry(hash).or_insert_with(|| PathSlot::new(id, system_path))
|
||||
}))
|
||||
}
|
||||
}
|
||||
@ -205,28 +205,110 @@ struct PathSlot {
|
||||
/// The slot's canonical file id.
|
||||
id: FileId,
|
||||
/// The slot's path on the system.
|
||||
system_path: PathBuf,
|
||||
/// The lazily loaded source file for a path hash.
|
||||
source: OnceCell<FileResult<Source>>,
|
||||
/// The lazily loaded buffer for a path hash.
|
||||
buffer: OnceCell<FileResult<Bytes>>,
|
||||
path: PathBuf,
|
||||
/// The lazily loaded and incrementally updated source file.
|
||||
source: SlotCell<Source>,
|
||||
/// The lazily loaded raw byte buffer.
|
||||
file: SlotCell<Bytes>,
|
||||
}
|
||||
|
||||
impl PathSlot {
|
||||
fn source(&self) -> FileResult<Source> {
|
||||
self.source
|
||||
.get_or_init(|| {
|
||||
let buf = read(&self.system_path)?;
|
||||
let text = decode_utf8(buf)?;
|
||||
Ok(Source::new(self.id, text))
|
||||
})
|
||||
.clone()
|
||||
/// Create a new path slot.
|
||||
fn new(id: FileId, path: PathBuf) -> Self {
|
||||
Self {
|
||||
id,
|
||||
path,
|
||||
file: SlotCell::new(),
|
||||
source: SlotCell::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether the file was accessed in the ongoing compilation.
|
||||
fn accessed(&self) -> bool {
|
||||
self.source.accessed() || self.file.accessed()
|
||||
}
|
||||
|
||||
/// Marks the file as not yet accessed in preparation of the next
|
||||
/// compilation.
|
||||
fn reset(&self) {
|
||||
self.source.reset();
|
||||
self.file.reset();
|
||||
}
|
||||
|
||||
/// Retrieve the source for this file.
|
||||
fn source(&self) -> FileResult<Source> {
|
||||
self.source.get_or_init(&self.path, |data, prev| {
|
||||
let text = decode_utf8(&data)?;
|
||||
if let Some(mut prev) = prev {
|
||||
prev.replace(text);
|
||||
Ok(prev)
|
||||
} else {
|
||||
Ok(Source::new(self.id, text.into()))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Retrieve the file's bytes.
|
||||
fn file(&self) -> FileResult<Bytes> {
|
||||
self.buffer
|
||||
.get_or_init(|| read(&self.system_path).map(Bytes::from))
|
||||
.clone()
|
||||
self.file.get_or_init(&self.path, |data, _| Ok(data.into()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Lazily processes data for a file.
|
||||
struct SlotCell<T> {
|
||||
data: RefCell<Option<FileResult<T>>>,
|
||||
refreshed: Cell<FileTime>,
|
||||
accessed: Cell<bool>,
|
||||
}
|
||||
|
||||
impl<T: Clone> SlotCell<T> {
|
||||
/// Creates a new, empty cell.
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
data: RefCell::new(None),
|
||||
refreshed: Cell::new(FileTime::zero()),
|
||||
accessed: Cell::new(false),
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether the cell was accessed in the ongoing compilation.
|
||||
fn accessed(&self) -> bool {
|
||||
self.accessed.get()
|
||||
}
|
||||
|
||||
/// Marks the cell as not yet accessed in preparation of the next
|
||||
/// compilation.
|
||||
fn reset(&self) {
|
||||
self.accessed.set(false);
|
||||
}
|
||||
|
||||
/// Gets the contents of the cell or initialize them.
|
||||
fn get_or_init(
|
||||
&self,
|
||||
path: &Path,
|
||||
f: impl FnOnce(Vec<u8>, Option<T>) -> FileResult<T>,
|
||||
) -> FileResult<T> {
|
||||
let mut borrow = self.data.borrow_mut();
|
||||
if let Some(data) = &*borrow {
|
||||
if self.accessed.replace(true) || self.current(path) {
|
||||
return data.clone();
|
||||
}
|
||||
}
|
||||
|
||||
self.accessed.set(true);
|
||||
self.refreshed.set(FileTime::now());
|
||||
let prev = borrow.take().and_then(Result::ok);
|
||||
let value = read(path).and_then(|data| f(data, prev));
|
||||
*borrow = Some(value.clone());
|
||||
value
|
||||
}
|
||||
|
||||
/// Whether the cell contents are still up to date with the file system.
|
||||
fn current(&self, path: &Path) -> bool {
|
||||
fs::metadata(path).map_or(false, |meta| {
|
||||
let modified = FileTime::from_last_modification_time(&meta);
|
||||
modified < self.refreshed.get()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -255,12 +337,7 @@ fn read(path: &Path) -> FileResult<Vec<u8>> {
|
||||
}
|
||||
|
||||
/// Decode UTF-8 with an optional BOM.
|
||||
fn decode_utf8(buf: Vec<u8>) -> FileResult<String> {
|
||||
Ok(if buf.starts_with(b"\xef\xbb\xbf") {
|
||||
// Remove UTF-8 BOM.
|
||||
std::str::from_utf8(&buf[3..])?.into()
|
||||
} else {
|
||||
// Assume UTF-8.
|
||||
String::from_utf8(buf)?
|
||||
})
|
||||
fn decode_utf8(buf: &[u8]) -> FileResult<&str> {
|
||||
// Remove UTF-8 BOM.
|
||||
Ok(std::str::from_utf8(buf.strip_prefix(b"\xef\xbb\xbf").unwrap_or(buf))?)
|
||||
}
|
||||
|
@ -69,13 +69,47 @@ impl Source {
|
||||
}
|
||||
|
||||
/// Fully replace the source text.
|
||||
pub fn replace(&mut self, text: String) {
|
||||
let inner = Arc::make_mut(&mut self.0);
|
||||
inner.text = Prehashed::new(text);
|
||||
inner.lines = lines(&inner.text);
|
||||
let mut root = parse(&inner.text);
|
||||
root.numberize(inner.id, Span::FULL).unwrap();
|
||||
inner.root = Prehashed::new(root);
|
||||
///
|
||||
/// This performs a naive (suffix/prefix-based) diff of the old and new text
|
||||
/// to produce the smallest single edit that transforms old into new and
|
||||
/// then calls [`edit`](Self::edit) with it.
|
||||
///
|
||||
/// Returns the range in the new source that was ultimately reparsed.
|
||||
pub fn replace(&mut self, new: &str) -> Range<usize> {
|
||||
let old = self.text();
|
||||
|
||||
let mut prefix = old
|
||||
.as_bytes()
|
||||
.iter()
|
||||
.zip(new.as_bytes())
|
||||
.take_while(|(x, y)| x == y)
|
||||
.count();
|
||||
|
||||
if prefix == old.len() && prefix == new.len() {
|
||||
return 0..0;
|
||||
}
|
||||
|
||||
while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
|
||||
prefix -= 1;
|
||||
}
|
||||
|
||||
let mut suffix = old[prefix..]
|
||||
.as_bytes()
|
||||
.iter()
|
||||
.zip(new[prefix..].as_bytes())
|
||||
.rev()
|
||||
.take_while(|(x, y)| x == y)
|
||||
.count();
|
||||
|
||||
while !old.is_char_boundary(old.len() - suffix)
|
||||
|| !new.is_char_boundary(new.len() - suffix)
|
||||
{
|
||||
suffix += 1;
|
||||
}
|
||||
|
||||
let replace = prefix..old.len() - suffix;
|
||||
let with = &new[prefix..new.len() - suffix];
|
||||
self.edit(replace, with)
|
||||
}
|
||||
|
||||
/// Edit the source file by replacing the given range.
|
||||
@ -382,11 +416,21 @@ mod tests {
|
||||
// tested separately.
|
||||
#[track_caller]
|
||||
fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
|
||||
let mut source = Source::detached(prev);
|
||||
let result = Source::detached(after);
|
||||
source.edit(range, with);
|
||||
assert_eq!(source.text(), result.text());
|
||||
assert_eq!(source.0.lines, result.0.lines);
|
||||
let reference = Source::detached(after);
|
||||
|
||||
let mut edited = Source::detached(prev);
|
||||
edited.edit(range.clone(), with);
|
||||
assert_eq!(edited.text(), reference.text());
|
||||
assert_eq!(edited.0.lines, reference.0.lines);
|
||||
|
||||
let mut replaced = Source::detached(prev);
|
||||
replaced.replace(&{
|
||||
let mut s = prev.to_string();
|
||||
s.replace_range(range, with);
|
||||
s
|
||||
});
|
||||
assert_eq!(replaced.text(), reference.text());
|
||||
assert_eq!(replaced.0.lines, reference.0.lines);
|
||||
}
|
||||
|
||||
// Test inserting at the beginning.
|
||||
|
Loading…
x
Reference in New Issue
Block a user