Clean up the parser a bit

This commit is contained in:
Laurenz 2022-04-24 17:52:02 +02:00
parent 89927d7de0
commit 2791f59ce2
6 changed files with 130 additions and 117 deletions

View File

@ -154,7 +154,7 @@ impl Content {
Self::show(DecoNode::<UNDERLINE>(self))
}
/// Return a node that is spaced apart at top and bottom.
/// Add vertical spacing above and below the node.
pub fn spaced(self, above: Length, below: Length) -> Self {
if above.is_zero() && below.is_zero() {
return self;

View File

@ -8,35 +8,41 @@ use super::{
reparse_markup_elements, TokenMode,
};
/// Refresh the given green node with as little parsing as possible.
///
/// Takes the new source, the range in the old source that was replaced and the
/// length of the replacement.
///
/// Returns the range in the new source that was ultimately reparsed.
pub fn reparse(
green: &mut Arc<GreenNode>,
src: &str,
replaced: Range<usize>,
replacement_len: usize,
) -> Range<usize> {
Reparser { src, replaced, replacement_len }
.reparse_step(Arc::make_mut(green), 0, true)
.unwrap_or_else(|| {
*green = parse(src);
0 .. src.len()
})
}
/// Allows partial refreshs of the [`Green`] node tree.
///
/// This struct holds a description of a change. Its methods can be used to try
/// and apply the change to a green tree.
pub struct Reparser<'a> {
struct Reparser<'a> {
/// The new source code, with the change applied.
src: &'a str,
/// Which range in the old source file was changed.
replace_range: Range<usize>,
/// How many characters replaced the text in `replace_range`.
replace_len: usize,
}
impl<'a> Reparser<'a> {
/// Create a new reparser.
pub fn new(src: &'a str, replace_range: Range<usize>, replace_len: usize) -> Self {
Self { src, replace_range, replace_len }
}
replaced: Range<usize>,
/// How many characters replaced the text in `replaced`.
replacement_len: usize,
}
impl Reparser<'_> {
/// Find the innermost child that is incremental safe.
pub fn reparse(&self, green: &mut Arc<GreenNode>) -> Range<usize> {
self.reparse_step(Arc::make_mut(green), 0, true).unwrap_or_else(|| {
*green = parse(self.src);
0 .. self.src.len()
})
}
/// Try to reparse inside the given node.
fn reparse_step(
&self,
green: &mut GreenNode,
@ -64,19 +70,19 @@ impl Reparser<'_> {
match search {
SearchState::NoneFound => {
// The edit is contained within the span of the current element.
if child_span.contains(&self.replace_range.start)
&& child_span.end >= self.replace_range.end
if child_span.contains(&self.replaced.start)
&& child_span.end >= self.replaced.end
{
// In Markup mode, we want to consider a non-whitespace
// neighbor if the edit is on the node boundary.
search = if child_span.end == self.replace_range.end
search = if child_span.end == self.replaced.end
&& child_mode == TokenMode::Markup
{
SearchState::RequireNonTrivia(pos)
} else {
SearchState::Contained(pos)
};
} else if child_span.contains(&self.replace_range.start) {
} else if child_span.contains(&self.replaced.start) {
search = SearchState::Inside(pos);
} else {
// We look only for non spaces, non-semicolon and also
@ -86,7 +92,7 @@ impl Reparser<'_> {
&& child.kind() != &NodeKind::Semicolon
&& child.kind() != &NodeKind::Text('/'.into())
&& (ahead_nontrivia.is_none()
|| self.replace_range.start > child_span.end)
|| self.replaced.start > child_span.end)
{
ahead_nontrivia = Some((pos, at_start));
}
@ -94,9 +100,9 @@ impl Reparser<'_> {
}
}
SearchState::Inside(start) => {
if child_span.end == self.replace_range.end {
if child_span.end == self.replaced.end {
search = SearchState::RequireNonTrivia(start);
} else if child_span.end > self.replace_range.end {
} else if child_span.end > self.replaced.end {
search = SearchState::SpanFound(start, pos);
}
}
@ -172,7 +178,7 @@ impl Reparser<'_> {
if let Some((ahead, ahead_at_start)) = ahead_nontrivia {
let ahead_kind = green.children()[ahead.idx].kind();
if start.offset == self.replace_range.start
if start.offset == self.replaced.start
|| ahead_kind.only_at_start()
|| ahead_kind.only_in_mode() != Some(TokenMode::Markup)
{
@ -206,7 +212,7 @@ impl Reparser<'_> {
let superseded_start = superseded_idx.start;
let differential: isize =
self.replace_len as isize - self.replace_range.len() as isize;
self.replacement_len as isize - self.replaced.len() as isize;
let newborn_end = (superseded_span.end as isize + differential) as usize;
let newborn_span = superseded_span.start .. newborn_end;
@ -251,11 +257,12 @@ impl Reparser<'_> {
}
}
/// The position of a green node in terms of its string offset and index within
/// the parent node.
/// The position of a green node.
#[derive(Clone, Copy, Debug, PartialEq)]
struct GreenPos {
/// The index in the parent node.
idx: usize,
/// The byte offset in the string.
offset: usize,
}

View File

@ -7,7 +7,6 @@ mod tokens;
pub use incremental::*;
pub use parser::*;
pub use resolve::*;
pub use tokens::*;
use std::collections::HashSet;
@ -30,13 +29,13 @@ pub fn parse(src: &str) -> Arc<GreenNode> {
/// Reparse a code block.
///
/// Returns `Some` if all of the input was consumed.
pub fn reparse_code_block(
fn reparse_code_block(
prefix: &str,
src: &str,
end_pos: usize,
) -> Option<(Vec<Green>, bool, usize)> {
let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
if !p.at(&NodeKind::LeftBrace) {
if !p.at(NodeKind::LeftBrace) {
return None;
}
@ -54,13 +53,13 @@ pub fn reparse_code_block(
/// Reparse a content block.
///
/// Returns `Some` if all of the input was consumed.
pub fn reparse_content_block(
fn reparse_content_block(
prefix: &str,
src: &str,
end_pos: usize,
) -> Option<(Vec<Green>, bool, usize)> {
let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
if !p.at(&NodeKind::LeftBracket) {
if !p.at(NodeKind::LeftBracket) {
return None;
}
@ -78,7 +77,7 @@ pub fn reparse_content_block(
/// Reparse some markup elements without the topmost node.
///
/// Returns `Some` if all of the input was consumed.
pub fn reparse_markup_elements(
fn reparse_markup_elements(
prefix: &str,
src: &str,
end_pos: usize,
@ -108,7 +107,7 @@ pub fn reparse_markup_elements(
continue;
}
let recent = p.children.last().unwrap();
let recent = p.marker().before(&p).unwrap();
let recent_start = p.prev_end() - recent.len();
while offset <= recent_start as isize {
@ -275,8 +274,8 @@ fn emph(p: &mut Parser) {
fn heading(p: &mut Parser, at_start: bool) {
let marker = p.marker();
let current_start = p.current_start();
p.eat_assert(&NodeKind::Eq);
while p.eat_if(&NodeKind::Eq) {}
p.assert(NodeKind::Eq);
while p.eat_if(NodeKind::Eq) {}
if at_start && p.peek().map_or(true, |kind| kind.is_space()) {
p.eat_while(|kind| kind.is_space());
@ -292,10 +291,10 @@ fn heading(p: &mut Parser, at_start: bool) {
fn list_node(p: &mut Parser, at_start: bool) {
let marker = p.marker();
let text: EcoString = p.peek_src().into();
p.eat_assert(&NodeKind::Minus);
p.assert(NodeKind::Minus);
let column = p.column(p.prev_end());
if at_start && p.eat_if(&NodeKind::Space(0)) && !p.eof() {
if at_start && p.eat_if(NodeKind::Space(0)) && !p.eof() {
markup_indented(p, column);
marker.end(p, NodeKind::List);
} else {
@ -310,7 +309,7 @@ fn enum_node(p: &mut Parser, at_start: bool) {
p.eat();
let column = p.column(p.prev_end());
if at_start && p.eat_if(&NodeKind::Space(0)) && !p.eof() {
if at_start && p.eat_if(NodeKind::Space(0)) && !p.eof() {
markup_indented(p, column);
marker.end(p, NodeKind::Enum);
} else {
@ -379,7 +378,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
}
// Method call or field access.
if p.eat_if(&NodeKind::Dot) {
if p.eat_if(NodeKind::Dot) {
ident(p)?;
if let Some(NodeKind::LeftParen | NodeKind::LeftBracket) = p.peek_direct() {
marker.perform(p, NodeKind::MethodCall, |p| args(p, true, true))?;
@ -389,8 +388,8 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
continue;
}
let op = if p.eat_if(&NodeKind::Not) {
if p.at(&NodeKind::In) {
let op = if p.eat_if(NodeKind::Not) {
if p.at(NodeKind::In) {
BinOp::NotIn
} else {
p.expected("keyword `in`");
@ -434,9 +433,9 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
p.eat();
// Arrow means this is a closure's lone parameter.
if !atomic && p.at(&NodeKind::Arrow) {
if !atomic && p.at(NodeKind::Arrow) {
marker.end(p, NodeKind::ClosureParams);
p.eat_assert(&NodeKind::Arrow);
p.assert(NodeKind::Arrow);
marker.perform(p, NodeKind::ClosureExpr, expr)
} else {
Ok(())
@ -519,7 +518,7 @@ fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult {
let marker = p.marker();
p.start_group(Group::Paren);
let colon = p.eat_if(&NodeKind::Colon);
let colon = p.eat_if(NodeKind::Colon);
let kind = collection(p).0;
p.end_group();
@ -530,9 +529,9 @@ fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult {
}
// Arrow means this is a closure's parameter list.
if !atomic && p.at(&NodeKind::Arrow) {
if !atomic && p.at(NodeKind::Arrow) {
params(p, marker);
p.eat_assert(&NodeKind::Arrow);
p.assert(NodeKind::Arrow);
return marker.perform(p, NodeKind::ClosureExpr, expr);
}
@ -592,7 +591,7 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) {
break;
}
if p.eat_if(&NodeKind::Comma) {
if p.eat_if(NodeKind::Comma) {
can_group = false;
} else {
missing_coma = Some(p.trivia_start());
@ -615,16 +614,16 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) {
/// named pair.
fn item(p: &mut Parser) -> ParseResult<NodeKind> {
let marker = p.marker();
if p.eat_if(&NodeKind::Dots) {
if p.eat_if(NodeKind::Dots) {
marker.perform(p, NodeKind::Spread, expr)?;
return Ok(NodeKind::Spread);
}
expr(p)?;
if p.at(&NodeKind::Colon) {
if p.at(NodeKind::Colon) {
marker.perform(p, NodeKind::Named, |p| {
if let Some(NodeKind::Ident(_)) = marker.peek(p).map(|c| c.kind()) {
if let Some(NodeKind::Ident(_)) = marker.after(p).map(|c| c.kind()) {
p.eat();
expr(p)
} else {
@ -732,7 +731,7 @@ fn args(p: &mut Parser, direct: bool, brackets: bool) -> ParseResult {
}
p.perform(NodeKind::CallArgs, |p| {
if p.at(&NodeKind::LeftParen) {
if p.at(NodeKind::LeftParen) {
let marker = p.marker();
p.start_group(Group::Paren);
collection(p);
@ -764,7 +763,7 @@ fn args(p: &mut Parser, direct: bool, brackets: bool) -> ParseResult {
/// Parse a let expression.
fn let_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::LetExpr, |p| {
p.eat_assert(&NodeKind::Let);
p.assert(NodeKind::Let);
let marker = p.marker();
ident(p)?;
@ -779,7 +778,7 @@ fn let_expr(p: &mut Parser) -> ParseResult {
params(p, marker);
}
if p.eat_if(&NodeKind::Eq) {
if p.eat_if(NodeKind::Eq) {
expr(p)?;
} else if has_params {
// Function definitions must have a body.
@ -798,7 +797,7 @@ fn let_expr(p: &mut Parser) -> ParseResult {
/// Parse a set expression.
fn set_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::SetExpr, |p| {
p.eat_assert(&NodeKind::Set);
p.assert(NodeKind::Set);
ident(p)?;
args(p, true, false)
})
@ -807,11 +806,11 @@ fn set_expr(p: &mut Parser) -> ParseResult {
/// Parse a show expression.
fn show_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::ShowExpr, |p| {
p.eat_assert(&NodeKind::Show);
p.assert(NodeKind::Show);
ident(p)?;
p.eat_expect(&NodeKind::Colon)?;
p.expect(NodeKind::Colon)?;
ident(p)?;
p.eat_expect(&NodeKind::As)?;
p.expect(NodeKind::As)?;
expr(p)
})
}
@ -819,9 +818,9 @@ fn show_expr(p: &mut Parser) -> ParseResult {
/// Parse a wrap expression.
fn wrap_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::WrapExpr, |p| {
p.eat_assert(&NodeKind::Wrap);
p.assert(NodeKind::Wrap);
ident(p)?;
p.eat_expect(&NodeKind::In)?;
p.expect(NodeKind::In)?;
expr(p)
})
}
@ -829,13 +828,13 @@ fn wrap_expr(p: &mut Parser) -> ParseResult {
/// Parse an if expresion.
fn if_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::IfExpr, |p| {
p.eat_assert(&NodeKind::If);
p.assert(NodeKind::If);
expr(p)?;
body(p)?;
if p.eat_if(&NodeKind::Else) {
if p.at(&NodeKind::If) {
if p.eat_if(NodeKind::Else) {
if p.at(NodeKind::If) {
if_expr(p)?;
} else {
body(p)?;
@ -849,7 +848,7 @@ fn if_expr(p: &mut Parser) -> ParseResult {
/// Parse a while expresion.
fn while_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::WhileExpr, |p| {
p.eat_assert(&NodeKind::While);
p.assert(NodeKind::While);
expr(p)?;
body(p)
})
@ -858,9 +857,9 @@ fn while_expr(p: &mut Parser) -> ParseResult {
/// Parse a for expression.
fn for_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::ForExpr, |p| {
p.eat_assert(&NodeKind::For);
p.assert(NodeKind::For);
for_pattern(p)?;
p.eat_expect(&NodeKind::In)?;
p.expect(NodeKind::In)?;
expr(p)?;
body(p)
})
@ -870,7 +869,7 @@ fn for_expr(p: &mut Parser) -> ParseResult {
fn for_pattern(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::ForPattern, |p| {
ident(p)?;
if p.eat_if(&NodeKind::Comma) {
if p.eat_if(NodeKind::Comma) {
ident(p)?;
}
Ok(())
@ -880,9 +879,9 @@ fn for_pattern(p: &mut Parser) -> ParseResult {
/// Parse an import expression.
fn import_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::ImportExpr, |p| {
p.eat_assert(&NodeKind::Import);
p.assert(NodeKind::Import);
if !p.eat_if(&NodeKind::Star) {
if !p.eat_if(NodeKind::Star) {
// This is the list of identifiers scenario.
p.perform(NodeKind::ImportItems, |p| {
p.start_group(Group::Imports);
@ -900,7 +899,7 @@ fn import_expr(p: &mut Parser) -> ParseResult {
});
};
p.eat_expect(&NodeKind::From)?;
p.expect(NodeKind::From)?;
expr(p)
})
}
@ -908,7 +907,7 @@ fn import_expr(p: &mut Parser) -> ParseResult {
/// Parse an include expression.
fn include_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::IncludeExpr, |p| {
p.eat_assert(&NodeKind::Include);
p.assert(NodeKind::Include);
expr(p)
})
}
@ -916,7 +915,7 @@ fn include_expr(p: &mut Parser) -> ParseResult {
/// Parse a break expression.
fn break_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::BreakExpr, |p| {
p.eat_assert(&NodeKind::Break);
p.assert(NodeKind::Break);
Ok(())
})
}
@ -924,7 +923,7 @@ fn break_expr(p: &mut Parser) -> ParseResult {
/// Parse a continue expression.
fn continue_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::ContinueExpr, |p| {
p.eat_assert(&NodeKind::Continue);
p.assert(NodeKind::Continue);
Ok(())
})
}
@ -932,7 +931,7 @@ fn continue_expr(p: &mut Parser) -> ParseResult {
/// Parse a return expression.
fn return_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::ReturnExpr, |p| {
p.eat_assert(&NodeKind::Return);
p.assert(NodeKind::Return);
if !p.eof() {
expr(p)?;
}

View File

@ -20,7 +20,7 @@ pub struct Parser<'s> {
/// The stack of open groups.
groups: Vec<GroupEntry>,
/// The children of the currently built node.
pub children: Vec<Green>,
children: Vec<Green>,
/// Whether the last group was not correctly terminated.
unterminated_group: bool,
/// Whether a group terminator was found, that did not close a group.
@ -52,13 +52,14 @@ impl<'s> Parser<'s> {
}
}
/// End the parsing process and return the last child.
/// End the parsing process and return the parsed children.
pub fn finish(self) -> Vec<Green> {
self.children
}
/// End the parsing process and return multiple children and whether the
/// last token was terminated.
/// End the parsing process and return the parsed children and whether the
/// last token was terminated if all groups were terminated correctly or
/// `None` otherwise.
pub fn consume(self) -> Option<(Vec<Green>, bool)> {
self.terminated().then(|| (self.children, self.tokens.terminated()))
}
@ -130,30 +131,14 @@ impl<'s> Parser<'s> {
}
/// Eat if the current token it is the given one.
pub fn eat_if(&mut self, t: &NodeKind) -> bool {
let at = self.at(t);
pub fn eat_if(&mut self, kind: NodeKind) -> bool {
let at = self.at(kind);
if at {
self.eat();
}
at
}
/// Eat if the current token is the given one and produce an error if not.
pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult {
let eaten = self.eat_if(t);
if !eaten {
self.expected(t.as_str());
}
if eaten { Ok(()) } else { Err(ParseError) }
}
/// Eat, debug-asserting that the token is the given one.
#[track_caller]
pub fn eat_assert(&mut self, t: &NodeKind) {
debug_assert_eq!(self.peek(), Some(t));
self.eat();
}
/// Eat tokens while the condition is true.
pub fn eat_while<F>(&mut self, mut f: F)
where
@ -164,9 +149,28 @@ impl<'s> Parser<'s> {
}
}
/// Eat if the current token is the given one and produce an error if not.
pub fn expect(&mut self, kind: NodeKind) -> ParseResult {
let at = self.peek() == Some(&kind);
if at {
self.eat();
Ok(())
} else {
self.expected(kind.as_str());
Err(ParseError)
}
}
/// Eat, debug-asserting that the token is the given one.
#[track_caller]
pub fn assert(&mut self, kind: NodeKind) {
debug_assert_eq!(self.peek(), Some(&kind));
self.eat();
}
/// Whether the current token is of the given type.
pub fn at(&self, kind: &NodeKind) -> bool {
self.peek() == Some(kind)
pub fn at(&self, kind: NodeKind) -> bool {
self.peek() == Some(&kind)
}
/// Peek at the current token without consuming it.
@ -230,11 +234,11 @@ impl<'s> Parser<'s> {
});
match kind {
Group::Brace => self.eat_assert(&NodeKind::LeftBrace),
Group::Bracket => self.eat_assert(&NodeKind::LeftBracket),
Group::Paren => self.eat_assert(&NodeKind::LeftParen),
Group::Strong => self.eat_assert(&NodeKind::Star),
Group::Emph => self.eat_assert(&NodeKind::Underscore),
Group::Brace => self.assert(NodeKind::LeftBrace),
Group::Bracket => self.assert(NodeKind::LeftBracket),
Group::Paren => self.assert(NodeKind::LeftParen),
Group::Strong => self.assert(NodeKind::Star),
Group::Emph => self.assert(NodeKind::Underscore),
Group::Expr => self.repeek(),
Group::Imports => self.repeek(),
}
@ -411,13 +415,18 @@ impl Parser<'_> {
}
}
/// A marker that indicates where a node may start.
/// Marks a location in a parser's child list.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Marker(usize);
impl Marker {
/// Peek at the child directly before the marker.
pub fn before<'a>(self, p: &'a Parser) -> Option<&'a Green> {
p.children.get(self.0.checked_sub(1)?)
}
/// Peek at the child directly after the marker.
pub fn peek<'a>(self, p: &'a Parser) -> Option<&'a Green> {
pub fn after<'a>(self, p: &'a Parser) -> Option<&'a Green> {
p.children.get(self.0)
}

View File

@ -3,7 +3,7 @@ use std::sync::Arc;
use unicode_xid::UnicodeXID;
use unscanny::Scanner;
use super::{resolve_hex, resolve_raw, resolve_string};
use super::resolve::{resolve_hex, resolve_raw, resolve_string};
use crate::geom::{AngleUnit, LengthUnit};
use crate::syntax::ast::{MathNode, RawNode, Unit};
use crate::syntax::{ErrorPos, NodeKind};
@ -311,7 +311,6 @@ impl<'s> Tokens<'s> {
}
}
#[inline]
fn hash(&mut self) -> NodeKind {
if self.s.at(is_id_start) {
let read = self.s.eat_while(is_id_continue);
@ -661,13 +660,13 @@ pub fn is_ident(string: &str) -> bool {
/// Whether a character can start an identifier.
#[inline]
pub fn is_id_start(c: char) -> bool {
fn is_id_start(c: char) -> bool {
c.is_xid_start() || c == '_'
}
/// Whether a character can continue an identifier.
#[inline]
pub fn is_id_continue(c: char) -> bool {
fn is_id_continue(c: char) -> bool {
c.is_xid_continue() || c == '_' || c == '-'
}

View File

@ -10,7 +10,7 @@ use unscanny::Scanner;
use crate::diag::TypResult;
use crate::loading::{FileHash, Loader};
use crate::parse::{is_newline, parse, Reparser};
use crate::parse::{is_newline, parse, reparse};
use crate::syntax::ast::Markup;
use crate::syntax::{self, Category, GreenNode, RedNode};
use crate::util::{PathExt, StrExt};
@ -228,8 +228,7 @@ impl SourceFile {
/// Edit the source file by replacing the given range and increase the
/// revision number.
///
/// Returns the range of the section in the new source that was ultimately
/// reparsed.
/// Returns the range in the new source that was ultimately reparsed.
///
/// The method panics if the `replace` range is out of bounds.
pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> {
@ -256,7 +255,7 @@ impl SourceFile {
));
// Incrementally reparse the replaced range.
Reparser::new(&self.src, replace, with.len()).reparse(&mut self.root)
reparse(&mut self.root, &self.src, replace, with.len())
}
/// Provide highlighting categories for the given range of the source file.