From 40561e57fbbc68becac07acd54a34f94f591f277 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sun, 15 Jan 2023 12:00:13 +0100 Subject: [PATCH] Remove most fields from `SyntaxKind` enum --- library/src/lib.rs | 2 +- library/src/math/mod.rs | 6 +- src/diag.rs | 2 +- src/ide/complete.rs | 30 +- src/ide/highlight.rs | 47 +- src/ide/tooltip.rs | 16 +- src/model/eval.rs | 32 +- src/model/library.rs | 4 +- src/syntax/ast.rs | 441 ++++--- src/syntax/kind.rs | 281 +---- src/syntax/lexer.rs | 512 ++++---- src/syntax/linked.rs | 0 src/syntax/mod.rs | 9 +- src/syntax/node.rs | 320 +++-- src/syntax/parser.rs | 1311 +++++++++++++++++++-- src/syntax/parsing.rs | 1118 ------------------ src/syntax/{incremental.rs => reparse.rs} | 16 +- src/syntax/resolve.rs | 233 ---- src/syntax/source.rs | 13 +- tests/src/benches.rs | 7 +- tests/typ/math/syntax.typ | 2 +- 21 files changed, 2062 insertions(+), 2340 deletions(-) delete mode 100644 src/syntax/linked.rs delete mode 100644 src/syntax/parsing.rs rename src/syntax/{incremental.rs => reparse.rs} (98%) delete mode 100644 src/syntax/resolve.rs diff --git a/library/src/lib.rs b/library/src/lib.rs index 14b020339..24f3560fb 100644 --- a/library/src/lib.rs +++ b/library/src/lib.rs @@ -208,6 +208,6 @@ fn items() -> LangItems { math_atom: |atom| math::AtomNode(atom).pack(), math_script: |base, sub, sup| math::ScriptNode { base, sub, sup }.pack(), math_frac: |num, denom| math::FracNode { num, denom }.pack(), - math_align_point: |count| math::AlignPointNode(count).pack(), + math_align_point: || math::AlignPointNode.pack(), } } diff --git a/library/src/math/mod.rs b/library/src/math/mod.rs index 62afe573d..52cddc9f7 100644 --- a/library/src/math/mod.rs +++ b/library/src/math/mod.rs @@ -637,12 +637,12 @@ impl Texify for ScriptNode { #[func] #[capable(Texify)] #[derive(Debug, Hash)] -pub struct AlignPointNode(pub NonZeroUsize); +pub struct AlignPointNode; #[node] impl AlignPointNode { - fn construct(_: &Vm, args: &mut Args) -> SourceResult { - Ok(Self(args.expect("index")?).pack()) + fn construct(_: &Vm, _: &mut Args) -> SourceResult { + Ok(Self.pack()) } } diff --git a/src/diag.rs b/src/diag.rs index e0015fcc6..5c5d9de9e 100644 --- a/src/diag.rs +++ b/src/diag.rs @@ -50,7 +50,7 @@ pub type SourceResult = Result>>; /// An error in a source file. /// -/// This contained spans will only be detached if any of the input source files +/// The contained spans will only be detached if any of the input source files /// were detached. #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct SourceError { diff --git a/src/ide/complete.rs b/src/ide/complete.rs index bbc4115a6..9e13fc8dd 100644 --- a/src/ide/complete.rs +++ b/src/ide/complete.rs @@ -138,7 +138,7 @@ fn complete_params(ctx: &mut CompletionContext) -> bool { (SyntaxKind::Colon, _) => prev.prev_leaf(), _ => None, }; - if let SyntaxKind::Ident(param) = before_colon.kind(); + if let Some(param) = before_colon.cast::(); then { ctx.from = match ctx.leaf.kind() { SyntaxKind::Colon | SyntaxKind::Space { .. } => ctx.cursor, @@ -160,11 +160,11 @@ fn complete_params(ctx: &mut CompletionContext) -> bool { deciding.kind(), SyntaxKind::LeftParen | SyntaxKind::Comma - | SyntaxKind::Ident(_) + | SyntaxKind::Ident ); then { ctx.from = match deciding.kind() { - SyntaxKind::Ident(_) => deciding.offset(), + SyntaxKind::Ident => deciding.offset(), _ => ctx.cursor, }; @@ -192,9 +192,9 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool { // Behind half-completed symbol: "$arrow:|$". if_chain! { - if matches!(ctx.leaf.kind(), SyntaxKind::Atom(s) if s == ":"); + if matches!(ctx.leaf.kind(), SyntaxKind::Atom if ctx.leaf.text() == ":"); if let Some(prev) = ctx.leaf.prev_leaf(); - if matches!(prev.kind(), SyntaxKind::Ident(_)); + if matches!(prev.kind(), SyntaxKind::Ident); then { ctx.from = prev.offset(); ctx.symbol_completions(false); @@ -205,7 +205,7 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool { // Start of a symbol: ":|". // Checking for a text node ensures that "\:" isn't completed. if ctx.before.ends_with(':') - && matches!(ctx.leaf.kind(), SyntaxKind::Text(_) | SyntaxKind::Atom(_)) + && matches!(ctx.leaf.kind(), SyntaxKind::Text | SyntaxKind::Atom) { ctx.from = ctx.cursor; ctx.symbol_completions(needs_colon); @@ -213,7 +213,7 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool { } // An existing symbol: ":arrow:". - if matches!(ctx.leaf.kind(), SyntaxKind::Symbol(_)) { + if matches!(ctx.leaf.kind(), SyntaxKind::Symbol) { // We want to complete behind the colon, therefore plus 1. let has_colon = ctx.after.starts_with(':'); ctx.from = ctx.leaf.offset() + (has_colon as usize); @@ -225,12 +225,12 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool { if_chain! { if matches!( ctx.leaf.kind(), - SyntaxKind::Text(_) | SyntaxKind::Atom(_) | SyntaxKind::Ident(_) + SyntaxKind::Text | SyntaxKind::Atom | SyntaxKind::Ident ); if let Some(prev) = ctx.leaf.prev_leaf(); - if matches!(prev.kind(), SyntaxKind::Symbol(_)) || matches!( + if matches!(prev.kind(), SyntaxKind::Symbol) || matches!( prev.kind(), - SyntaxKind::Text(s) | SyntaxKind::Atom(s) if s == ":" + SyntaxKind::Text | SyntaxKind::Atom if prev.text() == ":" ); then { // We want to complete behind the colon, therefore plus 1. @@ -252,14 +252,14 @@ fn complete_markup(ctx: &mut CompletionContext) -> bool { // Start of an interpolated identifier: "#|". // Checking for a text node ensures that "\#" isn't completed. - if ctx.before.ends_with('#') && matches!(ctx.leaf.kind(), SyntaxKind::Text(_)) { + if ctx.before.ends_with('#') && matches!(ctx.leaf.kind(), SyntaxKind::Text) { ctx.from = ctx.cursor; ctx.expr_completions(true); return true; } // An existing identifier: "#pa|". - if matches!(ctx.leaf.kind(), SyntaxKind::Ident(_)) { + if matches!(ctx.leaf.kind(), SyntaxKind::Ident) { // We want to complete behind the hashtag, therefore plus 1. ctx.from = ctx.leaf.offset() + 1; ctx.expr_completions(true); @@ -298,14 +298,14 @@ fn complete_math(ctx: &mut CompletionContext) -> bool { } // Start of an interpolated identifier: "#|". - if matches!(ctx.leaf.kind(), SyntaxKind::Atom(s) if s == "#") { + if matches!(ctx.leaf.kind(), SyntaxKind::Atom if ctx.leaf.text() == "#") { ctx.from = ctx.cursor; ctx.expr_completions(true); return true; } // Behind existing atom or identifier: "$a|$" or "$abc|$". - if matches!(ctx.leaf.kind(), SyntaxKind::Atom(_) | SyntaxKind::Ident(_)) { + if matches!(ctx.leaf.kind(), SyntaxKind::Atom | SyntaxKind::Ident) { ctx.from = ctx.leaf.offset(); ctx.math_completions(); return true; @@ -331,7 +331,7 @@ fn complete_code(ctx: &mut CompletionContext) -> bool { } // An existing identifier: "{ pa| }". - if matches!(ctx.leaf.kind(), SyntaxKind::Ident(_)) { + if matches!(ctx.leaf.kind(), SyntaxKind::Ident) { ctx.from = ctx.leaf.offset(); ctx.expr_completions(false); return true; diff --git a/src/ide/highlight.rs b/src/ide/highlight.rs index 321bf9a65..cc502537e 100644 --- a/src/ide/highlight.rs +++ b/src/ide/highlight.rs @@ -119,7 +119,6 @@ pub fn highlight(node: &LinkedNode) -> Option { _ => Category::Operator, }), SyntaxKind::Hat => Some(Category::MathOperator), - SyntaxKind::Amp => Some(Category::MathOperator), SyntaxKind::Dot => Some(Category::Punctuation), SyntaxKind::Eq => match node.parent_kind() { Some(SyntaxKind::Heading) => None, @@ -159,38 +158,38 @@ pub fn highlight(node: &LinkedNode) -> Option { SyntaxKind::As => Some(Category::Keyword), SyntaxKind::Markup { .. } - if node.parent_kind() == Some(&SyntaxKind::TermItem) + if node.parent_kind() == Some(SyntaxKind::TermItem) && node.next_sibling().as_ref().map(|v| v.kind()) - == Some(&SyntaxKind::Colon) => + == Some(SyntaxKind::Colon) => { Some(Category::ListTerm) } SyntaxKind::Markup { .. } => None, - SyntaxKind::Text(_) => None, + SyntaxKind::Text => None, SyntaxKind::Linebreak => Some(Category::Escape), - SyntaxKind::Escape(_) => Some(Category::Escape), - SyntaxKind::Shorthand(_) => Some(Category::Escape), - SyntaxKind::Symbol(_) => Some(Category::Escape), + SyntaxKind::Escape => Some(Category::Escape), + SyntaxKind::Shorthand => Some(Category::Escape), + SyntaxKind::Symbol => Some(Category::Escape), SyntaxKind::SmartQuote { .. } => None, SyntaxKind::Strong => Some(Category::Strong), SyntaxKind::Emph => Some(Category::Emph), - SyntaxKind::Raw(_) => Some(Category::Raw), - SyntaxKind::Link(_) => Some(Category::Link), - SyntaxKind::Label(_) => Some(Category::Label), - SyntaxKind::Ref(_) => Some(Category::Ref), + SyntaxKind::Raw { .. } => Some(Category::Raw), + SyntaxKind::Link => Some(Category::Link), + SyntaxKind::Label => Some(Category::Label), + SyntaxKind::Ref => Some(Category::Ref), SyntaxKind::Heading => Some(Category::Heading), SyntaxKind::ListItem => None, SyntaxKind::EnumItem => None, - SyntaxKind::EnumNumbering(_) => Some(Category::ListMarker), + SyntaxKind::EnumNumbering => Some(Category::ListMarker), SyntaxKind::TermItem => None, SyntaxKind::Math => None, - SyntaxKind::Atom(_) => None, + SyntaxKind::Atom => None, SyntaxKind::Script => None, SyntaxKind::Frac => None, - SyntaxKind::AlignPoint => None, + SyntaxKind::AlignPoint => Some(Category::MathOperator), - SyntaxKind::Ident(_) => match node.parent_kind() { + SyntaxKind::Ident => match node.parent_kind() { Some( SyntaxKind::Markup { .. } | SyntaxKind::Math @@ -202,9 +201,9 @@ pub fn highlight(node: &LinkedNode) -> Option { if node .parent() .and_then(|p| p.parent()) - .filter(|gp| gp.kind() == &SyntaxKind::Parenthesized) + .filter(|gp| gp.kind() == SyntaxKind::Parenthesized) .and_then(|gp| gp.parent()) - .map_or(false, |ggp| ggp.kind() == &SyntaxKind::FuncCall) + .map_or(false, |ggp| ggp.kind() == SyntaxKind::FuncCall) && node.next_sibling().is_none() => { Some(Category::Function) @@ -218,17 +217,17 @@ pub fn highlight(node: &LinkedNode) -> Option { Some(SyntaxKind::SetRule) => Some(Category::Function), Some(SyntaxKind::ShowRule) if node.prev_sibling().as_ref().map(|v| v.kind()) - == Some(&SyntaxKind::Show) => + == Some(SyntaxKind::Show) => { Some(Category::Function) } _ => None, }, - SyntaxKind::Bool(_) => Some(Category::Keyword), - SyntaxKind::Int(_) => Some(Category::Number), - SyntaxKind::Float(_) => Some(Category::Number), - SyntaxKind::Numeric(_, _) => Some(Category::Number), - SyntaxKind::Str(_) => Some(Category::String), + SyntaxKind::Bool => Some(Category::Keyword), + SyntaxKind::Int => Some(Category::Number), + SyntaxKind::Float => Some(Category::Number), + SyntaxKind::Numeric => Some(Category::Number), + SyntaxKind::Str => Some(Category::String), SyntaxKind::CodeBlock => None, SyntaxKind::ContentBlock => None, SyntaxKind::Parenthesized => None, @@ -259,7 +258,7 @@ pub fn highlight(node: &LinkedNode) -> Option { SyntaxKind::LoopContinue => None, SyntaxKind::FuncReturn => None, - SyntaxKind::Error(_, _) => Some(Category::Error), + SyntaxKind::Error => Some(Category::Error), } } diff --git a/src/ide/tooltip.rs b/src/ide/tooltip.rs index 62cb11c1d..8c734bbbb 100644 --- a/src/ide/tooltip.rs +++ b/src/ide/tooltip.rs @@ -18,12 +18,12 @@ pub fn tooltip(world: &dyn World, source: &Source, cursor: usize) -> Option Option { if_chain! { - if let SyntaxKind::Ident(ident) = leaf.kind(); + if let Some(ident) = leaf.cast::(); if matches!( leaf.parent_kind(), Some(SyntaxKind::FuncCall | SyntaxKind::SetRule), ); - if let Some(Value::Func(func)) = world.library().scope.get(ident); + if let Some(Value::Func(func)) = world.library().scope.get(&ident); if let Some(info) = func.info(); then { return Some(plain_docs_sentence(&info.docs)); @@ -60,8 +60,8 @@ fn named_param_tooltip(world: &dyn World, leaf: &LinkedNode) -> Option { // Hovering over the parameter name. if_chain! { if leaf.index() == 0; - if let SyntaxKind::Ident(ident) = leaf.kind(); - if let Some(param) = info.param(ident); + if let Some(ident) = leaf.cast::(); + if let Some(param) = info.param(&ident); then { return Some(plain_docs_sentence(param.docs)); } @@ -69,9 +69,9 @@ fn named_param_tooltip(world: &dyn World, leaf: &LinkedNode) -> Option { // Hovering over a string parameter value. if_chain! { - if let SyntaxKind::Str(string) = leaf.kind(); + if let Some(string) = leaf.cast::(); if let Some(param) = info.param(&named.name()); - if let Some(docs) = find_string_doc(¶m.cast, string); + if let Some(docs) = find_string_doc(¶m.cast, &string.get()); then { return Some(docs.into()); } @@ -95,8 +95,8 @@ fn find_string_doc(info: &CastInfo, string: &str) -> Option<&'static str> { fn font_family_tooltip(world: &dyn World, leaf: &LinkedNode) -> Option { if_chain! { // Ensure that we are on top of a string. - if let SyntaxKind::Str(string) = leaf.kind(); - let lower = string.to_lowercase(); + if let Some(string) = leaf.cast::(); + let lower = string.get().to_lowercase(); // Ensure that we are in the arguments to the text function. if let Some(parent) = leaf.parent(); diff --git a/src/model/eval.rs b/src/model/eval.rs index 789df0c7b..8e8c93c5c 100644 --- a/src/model/eval.rs +++ b/src/model/eval.rs @@ -16,8 +16,8 @@ use crate::diag::{ }; use crate::geom::{Abs, Angle, Em, Fr, Ratio}; use crate::syntax::ast::AstNode; -use crate::syntax::{ast, Source, SourceId, Span, Spanned, SyntaxKind, SyntaxNode, Unit}; -use crate::util::PathExt; +use crate::syntax::{ast, Source, SourceId, Span, Spanned, SyntaxKind, SyntaxNode}; +use crate::util::{EcoString, PathExt}; use crate::World; const MAX_ITERATIONS: usize = 10_000; @@ -389,13 +389,13 @@ impl Eval for ast::Symbol { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { - Ok((vm.items.symbol)(self.get().clone())) + Ok((vm.items.symbol)(self.get().into())) } } impl ast::Symbol { fn eval_in_math(&self, vm: &mut Vm) -> SourceResult { - Ok((vm.items.symbol)(self.get().clone() + ":op".into())) + Ok((vm.items.symbol)(EcoString::from(self.get()) + ":op".into())) } } @@ -427,8 +427,8 @@ impl Eval for ast::Raw { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { - let text = self.text().clone(); - let lang = self.lang().cloned(); + let text = self.text(); + let lang = self.lang().map(Into::into); let block = self.block(); Ok((vm.items.raw)(text, lang, block)) } @@ -446,7 +446,7 @@ impl Eval for ast::Label { type Output = Value; fn eval(&self, _: &mut Vm) -> SourceResult { - Ok(Value::Label(Label(self.get().clone()))) + Ok(Value::Label(Label(self.get().into()))) } } @@ -454,7 +454,7 @@ impl Eval for ast::Ref { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { - Ok((vm.items.ref_)(self.get().clone())) + Ok((vm.items.ref_)(self.get().into())) } } @@ -542,7 +542,7 @@ impl Eval for ast::AlignPoint { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { - Ok((vm.items.math_align_point)(self.count())) + Ok((vm.items.math_align_point)()) } } @@ -563,7 +563,7 @@ impl ast::Ident { if self.as_untyped().len() == self.len() && matches!(vm.scopes.get(&self), Ok(Value::Func(_)) | Err(_)) { - Ok((vm.items.symbol)(self.get().clone() + ":op".into())) + Ok((vm.items.symbol)(EcoString::from(self.get()) + ":op".into())) } else { Ok(self.eval(vm)?.display_in_math()) } @@ -616,11 +616,11 @@ impl Eval for ast::Numeric { fn eval(&self, _: &mut Vm) -> SourceResult { let (v, unit) = self.get(); Ok(match unit { - Unit::Length(unit) => Abs::with_unit(v, unit).into(), - Unit::Angle(unit) => Angle::with_unit(v, unit).into(), - Unit::Em => Em::new(v).into(), - Unit::Fr => Fr::new(v).into(), - Unit::Percent => Ratio::new(v / 100.0).into(), + ast::Unit::Length(unit) => Abs::with_unit(v, unit).into(), + ast::Unit::Angle(unit) => Angle::with_unit(v, unit).into(), + ast::Unit::Em => Em::new(v).into(), + ast::Unit::Fr => Fr::new(v).into(), + ast::Unit::Percent => Ratio::new(v / 100.0).into(), }) } } @@ -743,7 +743,7 @@ impl Eval for ast::Dict { map.insert(named.name().take().into(), named.expr().eval(vm)?); } ast::DictItem::Keyed(keyed) => { - map.insert(keyed.key().into(), keyed.expr().eval(vm)?); + map.insert(keyed.key().get().into(), keyed.expr().eval(vm)?); } ast::DictItem::Spread(expr) => match expr.eval(vm)? { Value::None => {} diff --git a/src/model/library.rs b/src/model/library.rs index 5360b00aa..96218bb10 100644 --- a/src/model/library.rs +++ b/src/model/library.rs @@ -74,8 +74,8 @@ pub struct LangItems { fn(base: Content, sub: Option, sup: Option) -> Content, /// A fraction in a formula: `x/2`. pub math_frac: fn(num: Content, denom: Content) -> Content, - /// An alignment point in a formula: `&`, `&&`. - pub math_align_point: fn(count: NonZeroUsize) -> Content, + /// An alignment point in a formula: `&`. + pub math_align_point: fn() -> Content, } impl Debug for LangItems { diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 3b3186e45..bf4b37bca 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -5,7 +5,12 @@ use std::num::NonZeroUsize; use std::ops::Deref; -use super::{RawFields, Span, SyntaxKind, SyntaxNode, Unit}; +use unscanny::Scanner; + +use super::{ + is_id_continue, is_id_start, is_newline, split_newlines, Span, SyntaxKind, SyntaxNode, +}; +use crate::geom::{AbsUnit, AngleUnit}; use crate::util::EcoString; /// A typed AST node. @@ -117,7 +122,7 @@ pub enum Expr { Script(Script), /// A fraction in a math formula: `x/2`. Frac(Frac), - /// An alignment point in a math formula: `&`, `&&`. + /// An alignment point in a math formula: `&`. AlignPoint(AlignPoint), /// An identifier: `left`. Ident(Ident), @@ -194,34 +199,34 @@ impl AstNode for Expr { fn from_untyped(node: &SyntaxNode) -> Option { match node.kind() { SyntaxKind::Linebreak => node.cast().map(Self::Linebreak), - SyntaxKind::Text(_) => node.cast().map(Self::Text), - SyntaxKind::Escape(_) => node.cast().map(Self::Escape), - SyntaxKind::Shorthand(_) => node.cast().map(Self::Shorthand), - SyntaxKind::Symbol(_) => node.cast().map(Self::Symbol), + SyntaxKind::Text => node.cast().map(Self::Text), + SyntaxKind::Escape => node.cast().map(Self::Escape), + SyntaxKind::Shorthand => node.cast().map(Self::Shorthand), + SyntaxKind::Symbol => node.cast().map(Self::Symbol), SyntaxKind::SmartQuote { .. } => node.cast().map(Self::SmartQuote), SyntaxKind::Strong => node.cast().map(Self::Strong), SyntaxKind::Emph => node.cast().map(Self::Emph), - SyntaxKind::Raw(_) => node.cast().map(Self::Raw), - SyntaxKind::Link(_) => node.cast().map(Self::Link), - SyntaxKind::Label(_) => node.cast().map(Self::Label), - SyntaxKind::Ref(_) => node.cast().map(Self::Ref), + SyntaxKind::Raw { .. } => node.cast().map(Self::Raw), + SyntaxKind::Link => node.cast().map(Self::Link), + SyntaxKind::Label => node.cast().map(Self::Label), + SyntaxKind::Ref => node.cast().map(Self::Ref), SyntaxKind::Heading => node.cast().map(Self::Heading), SyntaxKind::ListItem => node.cast().map(Self::List), SyntaxKind::EnumItem => node.cast().map(Self::Enum), SyntaxKind::TermItem => node.cast().map(Self::Term), SyntaxKind::Math => node.cast().map(Self::Math), - SyntaxKind::Atom(_) => node.cast().map(Self::Atom), + SyntaxKind::Atom => node.cast().map(Self::Atom), SyntaxKind::Script => node.cast().map(Self::Script), SyntaxKind::Frac => node.cast().map(Self::Frac), SyntaxKind::AlignPoint => node.cast().map(Self::AlignPoint), - SyntaxKind::Ident(_) => node.cast().map(Self::Ident), + SyntaxKind::Ident => node.cast().map(Self::Ident), SyntaxKind::None => node.cast().map(Self::None), SyntaxKind::Auto => node.cast().map(Self::Auto), - SyntaxKind::Bool(_) => node.cast().map(Self::Bool), - SyntaxKind::Int(_) => node.cast().map(Self::Int), - SyntaxKind::Float(_) => node.cast().map(Self::Float), - SyntaxKind::Numeric(_, _) => node.cast().map(Self::Numeric), - SyntaxKind::Str(_) => node.cast().map(Self::Str), + SyntaxKind::Bool => node.cast().map(Self::Bool), + SyntaxKind::Int => node.cast().map(Self::Int), + SyntaxKind::Float => node.cast().map(Self::Float), + SyntaxKind::Numeric => node.cast().map(Self::Numeric), + SyntaxKind::Str => node.cast().map(Self::Str), SyntaxKind::CodeBlock => node.cast().map(Self::Code), SyntaxKind::ContentBlock => node.cast().map(Self::Content), SyntaxKind::Parenthesized => node.cast().map(Self::Parenthesized), @@ -315,7 +320,7 @@ impl Space { /// Get the number of newlines. pub fn newlines(&self) -> usize { match self.0.kind() { - &SyntaxKind::Space { newlines } => newlines, + SyntaxKind::Space { newlines } => newlines, _ => panic!("space is of wrong kind"), } } @@ -334,10 +339,7 @@ node! { impl Text { /// Get the text. pub fn get(&self) -> &EcoString { - match self.0.kind() { - SyntaxKind::Text(v) => v, - _ => panic!("text is of wrong kind"), - } + self.0.text() } } @@ -349,15 +351,22 @@ node! { impl Escape { /// Get the escaped character. pub fn get(&self) -> char { - match self.0.kind() { - &SyntaxKind::Escape(v) => v, - _ => panic!("escape is of wrong kind"), + let mut s = Scanner::new(self.0.text()); + s.expect('\\'); + if s.eat_if("u{") { + let hex = s.eat_while(char::is_ascii_hexdigit); + u32::from_str_radix(hex, 16) + .ok() + .and_then(std::char::from_u32) + .expect("unicode escape is invalid") + } else { + s.eat().expect("escape is missing escaped character") } } } node! { - /// A shorthand for a unicode codepoint. For example, `~` for non-breaking + /// A shorthand for a unicode codepoint. For example, `~` for a non-breaking /// space or `-?` for a soft hyphen. Shorthand } @@ -365,9 +374,26 @@ node! { impl Shorthand { /// Get the shorthanded character. pub fn get(&self) -> char { - match self.0.kind() { - &SyntaxKind::Shorthand(v) => v, - _ => panic!("shorthand is of wrong kind"), + match self.0.text().as_str() { + "~" => '\u{00A0}', + "..." => '\u{2026}', + "--" => '\u{2013}', + "---" => '\u{2014}', + "-?" => '\u{00AD}', + "!=" => '≠', + "<=" => '≤', + ">=" => '≥', + "<-" => '←', + "->" => '→', + "=>" => '⇒', + ":=" => '≔', + "[|" => '⟦', + "|]" => '⟧', + "||" => '‖', + "|->" => '↦', + "<->" => '↔', + "<=>" => '⇔', + _ => panic!("shorthand is invalid"), } } } @@ -379,11 +405,8 @@ node! { impl Symbol { /// Get the symbol's notation. - pub fn get(&self) -> &EcoString { - match self.0.kind() { - SyntaxKind::Symbol(v) => v, - _ => panic!("symbol is of wrong kind"), - } + pub fn get(&self) -> &str { + self.0.text().trim_matches(':') } } @@ -395,10 +418,7 @@ node! { impl SmartQuote { /// Whether this is a double quote. pub fn double(&self) -> bool { - match self.0.kind() { - &SyntaxKind::SmartQuote { double } => double, - _ => panic!("smart quote is of wrong kind"), - } + self.0.text() == "\"" } } @@ -410,7 +430,7 @@ node! { impl Strong { /// The contents of the strong node. pub fn body(&self) -> Markup { - self.0.cast_first_child().expect("strong node is missing markup body") + self.0.cast_first_match().expect("strong emphasis is missing body") } } @@ -422,9 +442,7 @@ node! { impl Emph { /// The contents of the emphasis node. pub fn body(&self) -> Markup { - self.0 - .cast_first_child() - .expect("emphasis node is missing markup body") + self.0.cast_first_match().expect("emphasis is missing body") } } @@ -434,27 +452,75 @@ node! { } impl Raw { - /// The raw text. - pub fn text(&self) -> &EcoString { - &self.get().text + /// The trimmed raw text. + pub fn text(&self) -> EcoString { + let SyntaxKind::Raw { column } = self.0.kind() else { + panic!("raw node is of wrong kind"); + }; + + let mut text = self.0.text().as_str(); + let blocky = text.starts_with("```"); + text = text.trim_matches('`'); + + // Trim tag, one space at the start, and one space at the end if the + // last non-whitespace char is a backtick. + if blocky { + let mut s = Scanner::new(text); + if s.eat_if(is_id_start) { + s.eat_while(is_id_continue); + } + text = s.after(); + text = text.strip_prefix(' ').unwrap_or(text); + if text.trim_end().ends_with('`') { + text = text.strip_suffix(' ').unwrap_or(text); + } + } + + // Split into lines. + let mut lines = split_newlines(text); + + if blocky { + // Dedent based on column, but not for the first line. + for line in lines.iter_mut().skip(1) { + let offset = line + .chars() + .take(column) + .take_while(|c| c.is_whitespace()) + .map(char::len_utf8) + .sum(); + *line = &line[offset..]; + } + + let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace); + + // Trims a sequence of whitespace followed by a newline at the start. + if lines.first().map_or(false, is_whitespace) { + lines.remove(0); + } + + // Trims a newline followed by a sequence of whitespace at the end. + if lines.last().map_or(false, is_whitespace) { + lines.pop(); + } + } + + lines.join("\n").into() } /// An optional identifier specifying the language to syntax-highlight in. - pub fn lang(&self) -> Option<&EcoString> { - self.get().lang.as_ref() + pub fn lang(&self) -> Option<&str> { + let inner = self.0.text().trim_start_matches('`'); + let mut s = Scanner::new(inner); + s.eat_if(is_id_start).then(|| { + s.eat_while(is_id_continue); + s.before() + }) } /// Whether the raw text should be displayed in a separate block. pub fn block(&self) -> bool { - self.get().block - } - - /// The raw fields. - fn get(&self) -> &RawFields { - match self.0.kind() { - SyntaxKind::Raw(v) => v.as_ref(), - _ => panic!("raw is of wrong kind"), - } + let text = self.0.text(); + text.starts_with("```") && text.chars().any(is_newline) } } @@ -466,10 +532,7 @@ node! { impl Link { /// Get the URL. pub fn url(&self) -> &EcoString { - match self.0.kind() { - SyntaxKind::Link(url) => url, - _ => panic!("link is of wrong kind"), - } + self.0.text() } } @@ -480,11 +543,8 @@ node! { impl Label { /// Get the label's text. - pub fn get(&self) -> &EcoString { - match self.0.kind() { - SyntaxKind::Label(v) => v, - _ => panic!("label is of wrong kind"), - } + pub fn get(&self) -> &str { + self.0.text().trim_start_matches('<').trim_end_matches('>') } } @@ -495,11 +555,8 @@ node! { impl Ref { /// Get the target. - pub fn get(&self) -> &EcoString { - match self.0.kind() { - SyntaxKind::Ref(v) => v, - _ => panic!("reference is of wrong kind"), - } + pub fn get(&self) -> &str { + self.0.text().trim_start_matches('@') } } @@ -511,14 +568,14 @@ node! { impl Heading { /// The contents of the heading. pub fn body(&self) -> Markup { - self.0.cast_first_child().expect("heading is missing markup body") + self.0.cast_first_match().expect("heading is missing markup body") } /// The section depth (numer of equals signs). pub fn level(&self) -> NonZeroUsize { self.0 .children() - .filter(|n| n.kind() == &SyntaxKind::Eq) + .filter(|n| n.kind() == SyntaxKind::Eq) .count() .try_into() .expect("heading is missing equals sign") @@ -533,7 +590,7 @@ node! { impl ListItem { /// The contents of the list item. pub fn body(&self) -> Markup { - self.0.cast_first_child().expect("list item is missing body") + self.0.cast_first_match().expect("list item is missing body") } } @@ -546,14 +603,14 @@ impl EnumItem { /// The explicit numbering, if any: `23.`. pub fn number(&self) -> Option { self.0.children().find_map(|node| match node.kind() { - SyntaxKind::EnumNumbering(num) => Some(*num), + SyntaxKind::EnumNumbering => node.text().trim_end_matches('.').parse().ok(), _ => Option::None, }) } /// The contents of the list item. pub fn body(&self) -> Markup { - self.0.cast_first_child().expect("enum item is missing body") + self.0.cast_first_match().expect("enum item is missing body") } } @@ -565,13 +622,13 @@ node! { impl TermItem { /// The term described by the item. pub fn term(&self) -> Markup { - self.0.cast_first_child().expect("term list item is missing term") + self.0.cast_first_match().expect("term list item is missing term") } /// The description of the term. pub fn description(&self) -> Markup { self.0 - .cast_last_child() + .cast_last_match() .expect("term list item is missing description") } } @@ -602,10 +659,7 @@ node! { impl Atom { /// Get the atom's text. pub fn get(&self) -> &EcoString { - match self.0.kind() { - SyntaxKind::Atom(v) => v, - _ => panic!("atom is of wrong kind"), - } + self.0.text() } } @@ -617,7 +671,7 @@ node! { impl Script { /// The base of the script. pub fn base(&self) -> Expr { - self.0.cast_first_child().expect("script node is missing base") + self.0.cast_first_match().expect("script node is missing base") } /// The subscript. @@ -647,32 +701,20 @@ node! { impl Frac { /// The numerator. pub fn num(&self) -> Expr { - self.0.cast_first_child().expect("fraction is missing numerator") + self.0.cast_first_match().expect("fraction is missing numerator") } /// The denominator. pub fn denom(&self) -> Expr { - self.0.cast_last_child().expect("fraction is missing denominator") + self.0.cast_last_match().expect("fraction is missing denominator") } } node! { - /// An alignment point in a formula: `&`, `&&`. + /// An alignment point in a formula: `&`. AlignPoint } -impl AlignPoint { - /// The number of ampersands. - pub fn count(&self) -> NonZeroUsize { - self.0 - .children() - .filter(|n| n.kind() == &SyntaxKind::Amp) - .count() - .try_into() - .expect("alignment point is missing ampersand sign") - } -} - node! { /// An identifier: `it`. Ident @@ -680,18 +722,16 @@ node! { impl Ident { /// Get the identifier. - pub fn get(&self) -> &EcoString { - match self.0.kind() { - SyntaxKind::Ident(id) => id, - _ => panic!("identifier is of wrong kind"), - } + pub fn get(&self) -> &str { + self.0.text().trim_start_matches('#') } /// Take out the container identifier. pub fn take(self) -> EcoString { - match self.0.take() { - SyntaxKind::Ident(id) => id, - _ => panic!("identifier is of wrong kind"), + let text = self.0.into_text(); + match text.strip_prefix('#') { + Some(text) => text.into(), + Option::None => text, } } @@ -727,10 +767,7 @@ node! { impl Bool { /// Get the value. pub fn get(&self) -> bool { - match self.0.kind() { - SyntaxKind::Bool(v) => *v, - _ => panic!("boolean is of wrong kind"), - } + self.0.text() == "true" } } @@ -742,10 +779,7 @@ node! { impl Int { /// Get the value. pub fn get(&self) -> i64 { - match self.0.kind() { - SyntaxKind::Int(v) => *v, - _ => panic!("integer is of wrong kind"), - } + self.0.text().parse().expect("integer is invalid") } } @@ -757,10 +791,7 @@ node! { impl Float { /// Get the value. pub fn get(&self) -> f64 { - match self.0.kind() { - SyntaxKind::Float(v) => *v, - _ => panic!("float is of wrong kind"), - } + self.0.text().parse().expect("float is invalid") } } @@ -772,13 +803,47 @@ node! { impl Numeric { /// Get the value and unit. pub fn get(&self) -> (f64, Unit) { - match self.0.kind() { - SyntaxKind::Numeric(v, unit) => (*v, *unit), - _ => panic!("numeric is of wrong kind"), - } + let text = self.0.text(); + let count = text + .chars() + .rev() + .take_while(|c| matches!(c, 'a'..='z' | '%')) + .count(); + + let split = text.len() - count; + let value = text[..split].parse().expect("number is invalid"); + let unit = match &text[split..] { + "pt" => Unit::Length(AbsUnit::Pt), + "mm" => Unit::Length(AbsUnit::Mm), + "cm" => Unit::Length(AbsUnit::Cm), + "in" => Unit::Length(AbsUnit::In), + "deg" => Unit::Angle(AngleUnit::Deg), + "rad" => Unit::Angle(AngleUnit::Rad), + "em" => Unit::Em, + "fr" => Unit::Fr, + "%" => Unit::Percent, + _ => panic!("number has invalid suffix"), + }; + + (value, unit) } } +/// Unit of a numeric value. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum Unit { + /// An absolute length unit. + Length(AbsUnit), + /// An angular unit. + Angle(AngleUnit), + /// Font-relative: `1em` is the same as the font size. + Em, + /// Fractions: `fr`. + Fr, + /// Percentage: `%`. + Percent, +} + node! { /// A quoted string: `"..."`. Str @@ -786,11 +851,46 @@ node! { impl Str { /// Get the value. - pub fn get(&self) -> &EcoString { - match self.0.kind() { - SyntaxKind::Str(v) => v, - _ => panic!("string is of wrong kind"), + pub fn get(&self) -> EcoString { + let text = self.0.text(); + let unquoted = &text[1..text.len() - 1]; + if !unquoted.contains('\\') { + return unquoted.into(); } + + let mut out = EcoString::with_capacity(unquoted.len()); + let mut s = Scanner::new(unquoted); + + while let Some(c) = s.eat() { + if c != '\\' { + out.push(c); + continue; + } + + let start = s.locate(-1); + match s.eat() { + Some('\\') => out.push('\\'), + Some('"') => out.push('"'), + Some('n') => out.push('\n'), + Some('r') => out.push('\r'), + Some('t') => out.push('\t'), + Some('u') if s.eat_if('{') => { + let sequence = s.eat_while(char::is_ascii_hexdigit); + s.eat_if('}'); + + match u32::from_str_radix(sequence, 16) + .ok() + .and_then(std::char::from_u32) + { + Some(c) => out.push(c), + Option::None => out.push_str(s.from(start)), + } + } + _ => out.push_str(s.from(start)), + } + } + + out } } @@ -814,7 +914,7 @@ node! { impl ContentBlock { /// The contained markup. pub fn body(&self) -> Markup { - self.0.cast_first_child().expect("content block is missing body") + self.0.cast_first_match().expect("content block is missing body") } } @@ -827,7 +927,7 @@ impl Parenthesized { /// The wrapped expression. pub fn expr(&self) -> Expr { self.0 - .cast_first_child() + .cast_first_match() .expect("parenthesized expression is missing expression") } } @@ -856,7 +956,7 @@ pub enum ArrayItem { impl AstNode for ArrayItem { fn from_untyped(node: &SyntaxNode) -> Option { match node.kind() { - SyntaxKind::Spread => node.cast_first_child().map(Self::Spread), + SyntaxKind::Spread => node.cast_first_match().map(Self::Spread), _ => node.cast().map(Self::Pos), } } @@ -897,7 +997,7 @@ impl AstNode for DictItem { match node.kind() { SyntaxKind::Named => node.cast().map(Self::Named), SyntaxKind::Keyed => node.cast().map(Self::Keyed), - SyntaxKind::Spread => node.cast_first_child().map(Self::Spread), + SyntaxKind::Spread => node.cast_first_match().map(Self::Spread), _ => Option::None, } } @@ -919,12 +1019,12 @@ node! { impl Named { /// The name: `thickness`. pub fn name(&self) -> Ident { - self.0.cast_first_child().expect("named pair is missing name") + self.0.cast_first_match().expect("named pair is missing name") } /// The right-hand side of the pair: `3pt`. pub fn expr(&self) -> Expr { - self.0.cast_last_child().expect("named pair is missing expression") + self.0.cast_last_match().expect("named pair is missing expression") } } @@ -935,19 +1035,16 @@ node! { impl Keyed { /// The key: `"spacy key"`. - pub fn key(&self) -> EcoString { + pub fn key(&self) -> Str { self.0 .children() - .find_map(|node| match node.kind() { - SyntaxKind::Str(key) => Some(key.clone()), - _ => Option::None, - }) + .find_map(|node| node.cast::()) .expect("keyed pair is missing key") } /// The right-hand side of the pair: `true`. pub fn expr(&self) -> Expr { - self.0.cast_last_child().expect("keyed pair is missing expression") + self.0.cast_last_match().expect("keyed pair is missing expression") } } @@ -967,7 +1064,7 @@ impl Unary { /// The expression to operate on: `x`. pub fn expr(&self) -> Expr { - self.0.cast_last_child().expect("unary operation is missing child") + self.0.cast_last_match().expect("unary operation is missing child") } } @@ -984,7 +1081,7 @@ pub enum UnOp { impl UnOp { /// Try to convert the token into a unary operation. - pub fn from_token(token: &SyntaxKind) -> Option { + pub fn from_token(token: SyntaxKind) -> Option { Some(match token { SyntaxKind::Plus => Self::Pos, SyntaxKind::Minus => Self::Neg, @@ -1036,14 +1133,14 @@ impl Binary { /// The left-hand side of the operation: `a`. pub fn lhs(&self) -> Expr { self.0 - .cast_first_child() + .cast_first_match() .expect("binary operation is missing left-hand side") } /// The right-hand side of the operation: `b`. pub fn rhs(&self) -> Expr { self.0 - .cast_last_child() + .cast_last_match() .expect("binary operation is missing right-hand side") } } @@ -1093,7 +1190,7 @@ pub enum BinOp { impl BinOp { /// Try to convert the token into a binary operation. - pub fn from_token(token: &SyntaxKind) -> Option { + pub fn from_token(token: SyntaxKind) -> Option { Some(match token { SyntaxKind::Plus => Self::Add, SyntaxKind::Minus => Self::Sub, @@ -1210,12 +1307,12 @@ node! { impl FieldAccess { /// The expression to access the field on. pub fn target(&self) -> Expr { - self.0.cast_first_child().expect("field access is missing object") + self.0.cast_first_match().expect("field access is missing object") } /// The name of the field. pub fn field(&self) -> Ident { - self.0.cast_last_child().expect("field access is missing name") + self.0.cast_last_match().expect("field access is missing name") } } @@ -1227,13 +1324,13 @@ node! { impl FuncCall { /// The function to call. pub fn callee(&self) -> Expr { - self.0.cast_first_child().expect("function call is missing callee") + self.0.cast_first_match().expect("function call is missing callee") } /// The arguments to the function. pub fn args(&self) -> Args { self.0 - .cast_last_child() + .cast_last_match() .expect("function call is missing argument list") } } @@ -1246,18 +1343,18 @@ node! { impl MethodCall { /// The expression to call the method on. pub fn target(&self) -> Expr { - self.0.cast_first_child().expect("method call is missing target") + self.0.cast_first_match().expect("method call is missing target") } /// The name of the method. pub fn method(&self) -> Ident { - self.0.cast_last_child().expect("method call is missing name") + self.0.cast_last_match().expect("method call is missing name") } /// The arguments to the method. pub fn args(&self) -> Args { self.0 - .cast_last_child() + .cast_last_match() .expect("method call is missing argument list") } } @@ -1289,7 +1386,7 @@ impl AstNode for Arg { fn from_untyped(node: &SyntaxNode) -> Option { match node.kind() { SyntaxKind::Named => node.cast().map(Self::Named), - SyntaxKind::Spread => node.cast_first_child().map(Self::Spread), + SyntaxKind::Spread => node.cast_first_match().map(Self::Spread), _ => node.cast().map(Self::Pos), } } @@ -1320,7 +1417,7 @@ impl Closure { pub fn params(&self) -> impl DoubleEndedIterator + '_ { self.0 .children() - .find(|x| x.kind() == &SyntaxKind::Params) + .find(|x| x.kind() == SyntaxKind::Params) .expect("closure is missing parameter list") .children() .filter_map(SyntaxNode::cast) @@ -1328,7 +1425,7 @@ impl Closure { /// The body of the closure. pub fn body(&self) -> Expr { - self.0.cast_last_child().expect("closure is missing body") + self.0.cast_last_match().expect("closure is missing body") } } @@ -1346,9 +1443,9 @@ pub enum Param { impl AstNode for Param { fn from_untyped(node: &SyntaxNode) -> Option { match node.kind() { - SyntaxKind::Ident(_) => node.cast().map(Self::Pos), + SyntaxKind::Ident => node.cast().map(Self::Pos), SyntaxKind::Named => node.cast().map(Self::Named), - SyntaxKind::Spread => node.cast_first_child().map(Self::Sink), + SyntaxKind::Spread => node.cast_first_match().map(Self::Sink), _ => Option::None, } } @@ -1370,7 +1467,7 @@ node! { impl LetBinding { /// The binding to assign to. pub fn binding(&self) -> Ident { - match self.0.cast_first_child() { + match self.0.cast_first_match() { Some(Expr::Ident(binding)) => binding, Some(Expr::Closure(closure)) => { closure.name().expect("let-bound closure is missing name") @@ -1381,12 +1478,12 @@ impl LetBinding { /// The expression the binding is initialized with. pub fn init(&self) -> Option { - if self.0.cast_first_child::().is_some() { + if self.0.cast_first_match::().is_some() { // This is a normal binding like `let x = 1`. self.0.children().filter_map(SyntaxNode::cast).nth(1) } else { // This is a closure binding like `let f(x) = 1`. - self.0.cast_first_child() + self.0.cast_first_match() } } } @@ -1399,19 +1496,19 @@ node! { impl SetRule { /// The function to set style properties for. pub fn target(&self) -> Ident { - self.0.cast_first_child().expect("set rule is missing target") + self.0.cast_first_match().expect("set rule is missing target") } /// The style properties to set. pub fn args(&self) -> Args { - self.0.cast_last_child().expect("set rule is missing argument list") + self.0.cast_last_match().expect("set rule is missing argument list") } /// A condition under which the set rule applies. pub fn condition(&self) -> Option { self.0 .children() - .skip_while(|child| child.kind() != &SyntaxKind::If) + .skip_while(|child| child.kind() != SyntaxKind::If) .find_map(SyntaxNode::cast) } } @@ -1427,13 +1524,13 @@ impl ShowRule { self.0 .children() .rev() - .skip_while(|child| child.kind() != &SyntaxKind::Colon) + .skip_while(|child| child.kind() != SyntaxKind::Colon) .find_map(SyntaxNode::cast) } /// The transformation recipe. pub fn transform(&self) -> Expr { - self.0.cast_last_child().expect("show rule is missing transform") + self.0.cast_last_match().expect("show rule is missing transform") } } @@ -1445,7 +1542,7 @@ node! { impl Conditional { /// The condition which selects the body to evaluate. pub fn condition(&self) -> Expr { - self.0.cast_first_child().expect("conditional is missing condition") + self.0.cast_first_match().expect("conditional is missing condition") } /// The expression to evaluate if the condition is true. @@ -1471,12 +1568,12 @@ node! { impl WhileLoop { /// The condition which selects whether to evaluate the body. pub fn condition(&self) -> Expr { - self.0.cast_first_child().expect("while loop is missing condition") + self.0.cast_first_match().expect("while loop is missing condition") } /// The expression to evaluate while the condition is true. pub fn body(&self) -> Expr { - self.0.cast_last_child().expect("while loop is missing body") + self.0.cast_last_match().expect("while loop is missing body") } } @@ -1488,17 +1585,17 @@ node! { impl ForLoop { /// The pattern to assign to. pub fn pattern(&self) -> ForPattern { - self.0.cast_first_child().expect("for loop is missing pattern") + self.0.cast_first_match().expect("for loop is missing pattern") } /// The expression to iterate over. pub fn iter(&self) -> Expr { - self.0.cast_first_child().expect("for loop is missing iterable") + self.0.cast_first_match().expect("for loop is missing iterable") } /// The expression to evaluate for each iteration. pub fn body(&self) -> Expr { - self.0.cast_last_child().expect("for loop is missing body") + self.0.cast_last_match().expect("for loop is missing body") } } @@ -1521,7 +1618,7 @@ impl ForPattern { /// The value part of the pattern. pub fn value(&self) -> Ident { - self.0.cast_last_child().expect("for loop pattern is missing value") + self.0.cast_last_match().expect("for loop pattern is missing value") } } @@ -1533,7 +1630,7 @@ node! { impl ModuleImport { /// The module or path from which the items should be imported. pub fn source(&self) -> Expr { - self.0.cast_last_child().expect("module import is missing source") + self.0.cast_last_match().expect("module import is missing source") } /// The items to be imported. @@ -1566,7 +1663,7 @@ node! { impl ModuleInclude { /// The module or path from which the content should be included. pub fn source(&self) -> Expr { - self.0.cast_last_child().expect("module include is missing path") + self.0.cast_last_match().expect("module include is missing path") } } @@ -1588,6 +1685,6 @@ node! { impl FuncReturn { /// The expression to return. pub fn body(&self) -> Option { - self.0.cast_last_child() + self.0.cast_last_match() } } diff --git a/src/syntax/kind.rs b/src/syntax/kind.rs index 55f4b3ad8..26e92b930 100644 --- a/src/syntax/kind.rs +++ b/src/syntax/kind.rs @@ -1,14 +1,7 @@ -use std::hash::{Hash, Hasher}; -use std::num::NonZeroUsize; -use std::sync::Arc; - -use crate::geom::{AbsUnit, AngleUnit}; -use crate::util::EcoString; - /// All syntactical building blocks that can be part of a Typst document. /// /// Can be created by the lexer or by the parser. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] pub enum SyntaxKind { /// A line comment: `// ...`. LineComment, @@ -58,8 +51,6 @@ pub enum SyntaxKind { Slash, /// The superscript operator in a formula: `^`. Hat, - /// The alignment operator in a formula: `&`. - Amp, /// The field access and method call operator: `.`. Dot, /// The assignment operator: `=`. @@ -135,31 +126,31 @@ pub enum SyntaxKind { /// so it is zero except inside indent-aware constructs like lists. Markup { min_indent: usize }, /// Plain text without markup. - Text(EcoString), + Text, /// A forced line break: `\`. Linebreak, /// An escape sequence: `\#`, `\u{1F5FA}`. - Escape(char), + Escape, /// A shorthand for a unicode codepoint. For example, `~` for non-breaking /// space or `-?` for a soft hyphen. - Shorthand(char), + Shorthand, /// Symbol notation: `:arrow:l:`. The string only contains the inner part /// without leading and trailing dot. - Symbol(EcoString), + Symbol, /// A smart quote: `'` or `"`. - SmartQuote { double: bool }, + SmartQuote, /// Strong content: `*Strong*`. Strong, /// Emphasized content: `_Emphasized_`. Emph, /// Raw text with optional syntax highlighting: `` `...` ``. - Raw(Arc), + Raw { column: usize }, /// A hyperlink: `https://typst.org`. - Link(EcoString), + Link, /// A label: ``. - Label(EcoString), + Label, /// A reference: `@target`. - Ref(EcoString), + Ref, /// A section heading: `= Introduction`. Heading, /// An item in a bullet list: `- ...`. @@ -167,32 +158,32 @@ pub enum SyntaxKind { /// An item in an enumeration (numbered list): `+ ...` or `1. ...`. EnumItem, /// An explicit enumeration numbering: `23.`. - EnumNumbering(NonZeroUsize), + EnumNumbering, /// An item in a term list: `/ Term: Details`. TermItem, /// A mathematical formula: `$x$`, `$ x^2 $`. Math, /// An atom in a formula: `x`, `+`, `12`. - Atom(EcoString), + Atom, /// A base with optional sub- and superscripts in a formula: `a_1^2`. Script, /// A fraction in a formula: `x/2`. Frac, - /// An alignment point in a formula: `&`, `&&`. + /// An alignment point in a formula: `&`. AlignPoint, /// An identifier: `it`. - Ident(EcoString), + Ident, /// A boolean: `true`, `false`. - Bool(bool), + Bool, /// An integer: `120`. - Int(i64), + Int, /// A floating-point number: `1.2`, `10e-4`. - Float(f64), + Float, /// A numeric value with a unit: `12pt`, `3cm`, `2em`, `90deg`, `50%`. - Numeric(f64, Unit), + Numeric, /// A quoted string: `"..."`. - Str(EcoString), + Str, /// A code block: `{ let x = 1; x + 2 }`. CodeBlock, /// A content block: `[*Hi* there!]`. @@ -253,73 +244,37 @@ pub enum SyntaxKind { FuncReturn, /// An invalid sequence of characters. - Error(ErrorPos, EcoString), -} - -/// Fields of the raw syntax kind. -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct RawFields { - /// An optional identifier specifying the language to syntax-highlight in. - pub lang: Option, - /// The raw text, determined as the raw string between the backticks trimmed - /// according to the above rules. - pub text: EcoString, - /// Whether the element is block-level, that is, it has 3+ backticks - /// and contains at least one newline. - pub block: bool, -} - -/// Unit of a numeric value. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum Unit { - /// An absolute length unit. - Length(AbsUnit), - /// An angular unit. - Angle(AngleUnit), - /// Font-relative: `1em` is the same as the font size. - Em, - /// Fractions: `fr`. - Fr, - /// Percentage: `%`. - Percent, -} - -/// Where in a node an error should be annotated, -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum ErrorPos { - /// Over the full width of the node. - Full, - /// At the start of the node. - Start, - /// At the end of the node. - End, + Error, } impl SyntaxKind { /// Whether this is trivia. - pub fn is_trivia(&self) -> bool { - self.is_space() - || self.is_error() - || matches!(self, Self::LineComment | Self::BlockComment) + pub fn is_trivia(self) -> bool { + self.is_space() || self.is_comment() || self.is_error() } /// Whether this is a space. - pub fn is_space(&self) -> bool { + pub fn is_space(self) -> bool { matches!(self, Self::Space { .. }) } - /// Whether this is a left or right parenthesis. - pub fn is_paren(&self) -> bool { - matches!(self, Self::LeftParen | Self::RightParen) + /// Whether this is a comment. + pub fn is_comment(self) -> bool { + matches!(self, Self::LineComment | Self::BlockComment) } /// Whether this is an error. - pub fn is_error(&self) -> bool { - matches!(self, SyntaxKind::Error(_, _)) + pub fn is_error(self) -> bool { + matches!(self, SyntaxKind::Error) + } + + /// Whether this is a left or right parenthesis. + pub fn is_paren(self) -> bool { + matches!(self, Self::LeftParen | Self::RightParen) } /// Does this node need termination through a semicolon or linebreak? - pub fn is_stmt(&self) -> bool { + pub fn is_stmt(self) -> bool { matches!( self, SyntaxKind::LetBinding @@ -331,7 +286,7 @@ impl SyntaxKind { } /// A human-readable name for the kind. - pub fn name(&self) -> &'static str { + pub fn name(self) -> &'static str { match self { Self::LineComment => "line comment", Self::BlockComment => "block comment", @@ -348,13 +303,11 @@ impl SyntaxKind { Self::Star => "star", Self::Underscore => "underscore", Self::Dollar => "dollar sign", - Self::SmartQuote { double: false } => "single quote", - Self::SmartQuote { double: true } => "double quote", + Self::SmartQuote => "smart quote", Self::Plus => "plus", Self::Minus => "minus", Self::Slash => "slash", Self::Hat => "hat", - Self::Amp => "ampersand", Self::Dot => "dot", Self::Eq => "assignment operator", Self::EqEq => "equality operator", @@ -389,41 +342,33 @@ impl SyntaxKind { Self::Include => "keyword `include`", Self::As => "keyword `as`", Self::Markup { .. } => "markup", - Self::Text(_) => "text", + Self::Text => "text", Self::Linebreak => "linebreak", - Self::Escape(_) => "escape sequence", - Self::Shorthand(_) => "shorthand", - Self::Symbol(_) => "symbol notation", + Self::Escape => "escape sequence", + Self::Shorthand => "shorthand", + Self::Symbol => "symbol notation", Self::Strong => "strong content", Self::Emph => "emphasized content", - Self::Raw(_) => "raw block", - Self::Link(_) => "link", - Self::Label(_) => "label", - Self::Ref(_) => "reference", + Self::Raw { .. } => "raw block", + Self::Link => "link", + Self::Label => "label", + Self::Ref => "reference", Self::Heading => "heading", Self::ListItem => "list item", Self::EnumItem => "enumeration item", - Self::EnumNumbering(_) => "enumeration item numbering", + Self::EnumNumbering => "enumeration item numbering", Self::TermItem => "term list item", Self::Math => "math formula", - Self::Atom(s) => match s.as_str() { - "(" => "opening paren", - ")" => "closing paren", - "{" => "opening brace", - "}" => "closing brace", - "[" => "opening bracket", - "]" => "closing bracket", - _ => "math atom", - }, + Self::Atom => "math atom", Self::Script => "script", Self::Frac => "fraction", Self::AlignPoint => "alignment point", - Self::Ident(_) => "identifier", - Self::Bool(_) => "boolean", - Self::Int(_) => "integer", - Self::Float(_) => "float", - Self::Numeric(_, _) => "numeric value", - Self::Str(_) => "string", + Self::Ident => "identifier", + Self::Bool => "boolean", + Self::Int => "integer", + Self::Float => "float", + Self::Numeric => "numeric value", + Self::Str => "string", Self::CodeBlock => "code block", Self::ContentBlock => "content block", Self::Parenthesized => "group", @@ -453,127 +398,7 @@ impl SyntaxKind { Self::LoopBreak => "`break` expression", Self::LoopContinue => "`continue` expression", Self::FuncReturn => "`return` expression", - Self::Error(_, _) => "syntax error", - } - } -} - -impl Hash for SyntaxKind { - fn hash(&self, state: &mut H) { - std::mem::discriminant(self).hash(state); - match self { - Self::LineComment => {} - Self::BlockComment => {} - Self::Space { newlines } => newlines.hash(state), - Self::LeftBrace => {} - Self::RightBrace => {} - Self::LeftBracket => {} - Self::RightBracket => {} - Self::LeftParen => {} - Self::RightParen => {} - Self::Comma => {} - Self::Semicolon => {} - Self::Colon => {} - Self::Star => {} - Self::Underscore => {} - Self::Dollar => {} - Self::Plus => {} - Self::Minus => {} - Self::Slash => {} - Self::Hat => {} - Self::Amp => {} - Self::Dot => {} - Self::Eq => {} - Self::EqEq => {} - Self::ExclEq => {} - Self::Lt => {} - Self::LtEq => {} - Self::Gt => {} - Self::GtEq => {} - Self::PlusEq => {} - Self::HyphEq => {} - Self::StarEq => {} - Self::SlashEq => {} - Self::Dots => {} - Self::Arrow => {} - Self::Not => {} - Self::And => {} - Self::Or => {} - Self::None => {} - Self::Auto => {} - Self::Let => {} - Self::Set => {} - Self::Show => {} - Self::If => {} - Self::Else => {} - Self::For => {} - Self::In => {} - Self::While => {} - Self::Break => {} - Self::Continue => {} - Self::Return => {} - Self::Import => {} - Self::Include => {} - Self::As => {} - Self::Markup { min_indent } => min_indent.hash(state), - Self::Text(s) => s.hash(state), - Self::Linebreak => {} - Self::Escape(c) => c.hash(state), - Self::Shorthand(c) => c.hash(state), - Self::Symbol(s) => s.hash(state), - Self::SmartQuote { double } => double.hash(state), - Self::Strong => {} - Self::Emph => {} - Self::Raw(raw) => raw.hash(state), - Self::Link(link) => link.hash(state), - Self::Label(c) => c.hash(state), - Self::Ref(c) => c.hash(state), - Self::Heading => {} - Self::ListItem => {} - Self::EnumItem => {} - Self::EnumNumbering(num) => num.hash(state), - Self::TermItem => {} - Self::Math => {} - Self::Atom(c) => c.hash(state), - Self::Script => {} - Self::Frac => {} - Self::AlignPoint => {} - Self::Ident(v) => v.hash(state), - Self::Bool(v) => v.hash(state), - Self::Int(v) => v.hash(state), - Self::Float(v) => v.to_bits().hash(state), - Self::Numeric(v, u) => (v.to_bits(), u).hash(state), - Self::Str(v) => v.hash(state), - Self::CodeBlock => {} - Self::ContentBlock => {} - Self::Parenthesized => {} - Self::Array => {} - Self::Dict => {} - Self::Named => {} - Self::Keyed => {} - Self::Unary => {} - Self::Binary => {} - Self::FieldAccess => {} - Self::FuncCall => {} - Self::MethodCall => {} - Self::Args => {} - Self::Spread => {} - Self::Closure => {} - Self::Params => {} - Self::LetBinding => {} - Self::SetRule => {} - Self::ShowRule => {} - Self::Conditional => {} - Self::WhileLoop => {} - Self::ForLoop => {} - Self::ForPattern => {} - Self::ModuleImport => {} - Self::ImportItems => {} - Self::ModuleInclude => {} - Self::LoopBreak => {} - Self::LoopContinue => {} - Self::FuncReturn => {} - Self::Error(pos, msg) => (pos, msg).hash(state), + Self::Error => "syntax error", } } } diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs index d54767745..f082bd285 100644 --- a/src/syntax/lexer.rs +++ b/src/syntax/lexer.rs @@ -1,17 +1,12 @@ -use std::num::NonZeroUsize; -use std::sync::Arc; - use unicode_xid::UnicodeXID; use unscanny::Scanner; -use super::resolve::{resolve_hex, resolve_raw, resolve_string}; -use super::{ErrorPos, RawFields, SyntaxKind, Unit}; -use crate::geom::{AbsUnit, AngleUnit}; +use super::{ErrorPos, SyntaxKind}; use crate::util::{format_eco, EcoString}; /// Splits up a string of source code into tokens. #[derive(Clone)] -pub struct Lexer<'s> { +pub(super) struct Lexer<'s> { /// The underlying scanner. s: Scanner<'s>, /// The mode the lexer is in. This determines what tokens it recognizes. @@ -20,11 +15,13 @@ pub struct Lexer<'s> { terminated: bool, /// Offsets the indentation on the first line of the source. column_offset: usize, + /// An error for the last token. + error: Option<(EcoString, ErrorPos)>, } /// What kind of tokens to emit. #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum LexMode { +pub(super) enum LexMode { /// Text and markup. Markup, /// Math atoms, operators, etc. @@ -34,11 +31,6 @@ pub enum LexMode { } impl<'s> Lexer<'s> { - /// Create a new lexer with the given mode. - pub fn new(text: &'s str, mode: LexMode) -> Self { - Self::with_prefix("", text, mode) - } - /// Create a new lexer with the given mode and a prefix to offset column /// calculations. pub fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self { @@ -47,6 +39,7 @@ impl<'s> Lexer<'s> { mode, terminated: true, column_offset: column(prefix, prefix.len(), 0), + error: None, } } @@ -85,6 +78,23 @@ impl<'s> Lexer<'s> { pub fn column(&self, index: usize) -> usize { column(self.s.string(), index, self.column_offset) } + + /// Take out the last error. + pub fn last_error(&mut self) -> Option<(EcoString, ErrorPos)> { + self.error.take() + } + + /// Construct a full-positioned syntax error. + fn error(&mut self, message: impl Into) -> SyntaxKind { + self.error = Some((message.into(), ErrorPos::Full)); + SyntaxKind::Error + } + + /// Construct a positioned syntax error. + fn error_at_end(&mut self, message: impl Into) -> SyntaxKind { + self.error = Some((message.into(), ErrorPos::End)); + SyntaxKind::Error + } } impl Iterator for Lexer<'_> { @@ -92,22 +102,20 @@ impl Iterator for Lexer<'_> { /// Produce the next token. fn next(&mut self) -> Option { + self.error = None; let start = self.s.cursor(); let c = self.s.eat()?; Some(match c { // Trivia. + c if c.is_whitespace() => self.whitespace(c), '/' if self.s.eat_if('/') => self.line_comment(), '/' if self.s.eat_if('*') => self.block_comment(), - '*' if self.s.eat_if('/') => SyntaxKind::Error( - ErrorPos::Full, - "unexpected end of block comment".into(), - ), - c if c.is_whitespace() => self.whitespace(c), + '*' if self.s.eat_if('/') => self.error("unexpected end of block comment"), // Other things. _ => match self.mode { LexMode::Markup => self.markup(start, c), - LexMode::Math => self.math(start, c), + LexMode::Math => self.math(c), LexMode::Code => self.code(start, c), }, }) @@ -118,7 +126,7 @@ impl Iterator for Lexer<'_> { impl Lexer<'_> { fn line_comment(&mut self) -> SyntaxKind { self.s.eat_until(is_newline); - if self.s.peek().is_none() { + if self.s.done() { self.terminated = false; } SyntaxKind::LineComment @@ -182,57 +190,64 @@ impl Lexer<'_> { } } +/// Markup. impl Lexer<'_> { fn markup(&mut self, start: usize, c: char) -> SyntaxKind { match c { - // Blocks. + '\\' => self.backslash(), + ':' if self.s.at(is_id_start) => self.maybe_symbol(), + '`' => self.raw(), + 'h' if self.s.eat_if("ttp://") => self.link(), + 'h' if self.s.eat_if("ttps://") => self.link(), + '<' if self.s.at(is_id_continue) => self.label(), + '@' if self.s.at(is_id_continue) => self.reference(), + '0'..='9' => self.numbering(start), + '#' if self.s.eat_if('{') => SyntaxKind::LeftBrace, + '#' if self.s.eat_if('[') => SyntaxKind::LeftBracket, + '#' if self.s.at(is_id_start) => { + match keyword(self.s.eat_while(is_id_continue)) { + Some(keyword) => keyword, + None => SyntaxKind::Ident, + } + } + + '.' if self.s.eat_if("..") => SyntaxKind::Shorthand, + '-' if self.s.eat_if("--") => SyntaxKind::Shorthand, + '-' if self.s.eat_if('-') => SyntaxKind::Shorthand, + '-' if self.s.eat_if('?') => SyntaxKind::Shorthand, + '*' if !self.in_word() => SyntaxKind::Star, + '_' if !self.in_word() => SyntaxKind::Underscore, + '{' => SyntaxKind::LeftBrace, '}' => SyntaxKind::RightBrace, '[' => SyntaxKind::LeftBracket, ']' => SyntaxKind::RightBracket, - - // Multi-char things. - '#' => self.hash(start), - '.' if self.s.eat_if("..") => SyntaxKind::Shorthand('\u{2026}'), - '-' => self.hyph(), - ':' => self.colon(), - 'h' if self.s.eat_if("ttp://") || self.s.eat_if("ttps://") => { - self.link(start) - } - '`' => self.raw(), - c if c.is_ascii_digit() => self.numbering(start), - '<' if self.s.at(is_id_continue) => self.label(), - '@' if self.s.at(is_id_continue) => self.reference(), - - // Escape sequences. - '\\' => self.backslash(), - - // Single-char things. - '~' => SyntaxKind::Shorthand('\u{00A0}'), - '\'' => SyntaxKind::SmartQuote { double: false }, - '"' => SyntaxKind::SmartQuote { double: true }, - '*' if !self.in_word() => SyntaxKind::Star, - '_' if !self.in_word() => SyntaxKind::Underscore, + '\'' => SyntaxKind::SmartQuote, + '"' => SyntaxKind::SmartQuote, '$' => SyntaxKind::Dollar, '=' => SyntaxKind::Eq, '+' => SyntaxKind::Plus, '/' => SyntaxKind::Slash, + '~' => SyntaxKind::Shorthand, + ':' => SyntaxKind::Colon, + '-' => SyntaxKind::Minus, - // Plain text. - _ => self.text(start), + _ => self.text(), } } - fn text(&mut self, start: usize) -> SyntaxKind { + fn text(&mut self) -> SyntaxKind { macro_rules! table { - ($(|$c:literal)*) => {{ - let mut t = [false; 128]; - $(t[$c as usize] = true;)* - t - }} + ($(|$c:literal)*) => { + static TABLE: [bool; 128] = { + let mut t = [false; 128]; + $(t[$c as usize] = true;)* + t + }; + }; } - const TABLE: [bool; 128] = table! { + table! { | ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' | '\\' | '/' | '[' | ']' | '{' | '}' | '~' | '-' | '.' | '\'' | '"' | '*' | '_' | ':' | 'h' | '`' | '$' | '<' | '>' | '@' | '#' @@ -247,8 +262,8 @@ impl Lexer<'_> { // anyway. let mut s = self.s; match s.eat() { - Some('/') if !s.at(['/', '*']) => {} Some(' ') if s.at(char::is_alphanumeric) => {} + Some('/') if !s.at(['/', '*']) => {} Some('-') if !s.at(['-', '?']) => {} Some('.') if !s.at("..") => {} Some('h') if !s.at("ttp://") && !s.at("ttps://") => {} @@ -259,77 +274,40 @@ impl Lexer<'_> { self.s = s; } - SyntaxKind::Text(self.s.from(start).into()) + SyntaxKind::Text } fn backslash(&mut self) -> SyntaxKind { - match self.s.peek() { - Some('u') if self.s.eat_if("u{") => { - let sequence = self.s.eat_while(char::is_ascii_alphanumeric); - if self.s.eat_if('}') { - if let Some(c) = resolve_hex(sequence) { - SyntaxKind::Escape(c) - } else { - SyntaxKind::Error( - ErrorPos::Full, - "invalid unicode escape sequence".into(), - ) - } - } else { - self.terminated = false; - SyntaxKind::Error(ErrorPos::End, "expected closing brace".into()) - } + if self.s.eat_if("u{") { + let hex = self.s.eat_while(char::is_ascii_alphanumeric); + if !self.s.eat_if('}') { + self.terminated = false; + return self.error_at_end("expected closing brace"); } - // Linebreaks. - Some(c) if c.is_whitespace() => SyntaxKind::Linebreak, - None => SyntaxKind::Linebreak, - - // Escapes. - Some(c) => { - self.s.expect(c); - SyntaxKind::Escape(c) + if u32::from_str_radix(hex, 16) + .ok() + .and_then(std::char::from_u32) + .is_none() + { + return self.error("invalid unicode escape sequence"); } + + return SyntaxKind::Escape; } - } - fn hash(&mut self, start: usize) -> SyntaxKind { - if self.s.eat_if('{') { - SyntaxKind::LeftBrace - } else if self.s.eat_if('[') { - SyntaxKind::LeftBracket - } else if self.s.at(is_id_start) { - let read = self.s.eat_while(is_id_continue); - match keyword(read) { - Some(keyword) => keyword, - None => SyntaxKind::Ident(read.into()), - } - } else if self.mode == LexMode::Markup { - self.text(start) + if self.s.done() || self.s.at(char::is_whitespace) { + SyntaxKind::Linebreak } else { - SyntaxKind::Atom("#".into()) + self.s.eat(); + SyntaxKind::Escape } } - fn hyph(&mut self) -> SyntaxKind { - if self.s.eat_if('-') { - if self.s.eat_if('-') { - SyntaxKind::Shorthand('\u{2014}') - } else { - SyntaxKind::Shorthand('\u{2013}') - } - } else if self.s.eat_if('?') { - SyntaxKind::Shorthand('\u{00AD}') - } else { - SyntaxKind::Minus - } - } - - fn colon(&mut self) -> SyntaxKind { + fn maybe_symbol(&mut self) -> SyntaxKind { let start = self.s.cursor(); let mut end = start; - while !self.s.eat_while(char::is_ascii_alphanumeric).is_empty() && self.s.at(':') - { + while !self.s.eat_while(is_id_continue).is_empty() && self.s.at(':') { end = self.s.cursor(); self.s.eat(); } @@ -338,15 +316,15 @@ impl Lexer<'_> { if start < end { self.s.expect(':'); - SyntaxKind::Symbol(self.s.get(start..end).into()) + SyntaxKind::Symbol } else if self.mode == LexMode::Markup { SyntaxKind::Colon } else { - SyntaxKind::Atom(":".into()) + SyntaxKind::Atom } } - fn link(&mut self, start: usize) -> SyntaxKind { + fn link(&mut self) -> SyntaxKind { #[rustfmt::skip] self.s.eat_while(|c: char| matches!(c, | '0' ..= '9' @@ -355,10 +333,12 @@ impl Lexer<'_> { | '~' | '/' | '%' | '?' | '#' | '&' | '+' | '=' | '\'' | '.' | ',' | ';' )); + if self.s.scout(-1) == Some('.') { self.s.uneat(); } - SyntaxKind::Link(self.s.from(start).into()) + + SyntaxKind::Link } fn raw(&mut self) -> SyntaxKind { @@ -369,16 +349,10 @@ impl Lexer<'_> { backticks += 1; } - // Special case for empty inline block. if backticks == 2 { - return SyntaxKind::Raw(Arc::new(RawFields { - text: EcoString::new(), - lang: None, - block: false, - })); + return SyntaxKind::Raw { column }; } - let start = self.s.cursor(); let mut found = 0; while found < backticks { match self.s.eat() { @@ -388,45 +362,40 @@ impl Lexer<'_> { } } - if found == backticks { - let end = self.s.cursor() - found as usize; - SyntaxKind::Raw(Arc::new(resolve_raw( - column, - backticks, - self.s.get(start..end), - ))) - } else { + if found != backticks { self.terminated = false; let remaining = backticks - found; let noun = if remaining == 1 { "backtick" } else { "backticks" }; - SyntaxKind::Error( - ErrorPos::End, - if found == 0 { - format_eco!("expected {} {}", remaining, noun) - } else { - format_eco!("expected {} more {}", remaining, noun) - }, - ) + return self.error_at_end(if found == 0 { + format_eco!("expected {} {}", remaining, noun) + } else { + format_eco!("expected {} more {}", remaining, noun) + }); } + + SyntaxKind::Raw { column } } fn numbering(&mut self, start: usize) -> SyntaxKind { self.s.eat_while(char::is_ascii_digit); + let read = self.s.from(start); if self.s.eat_if('.') { if let Ok(number) = read.parse::() { - return match NonZeroUsize::new(number) { - Some(number) => SyntaxKind::EnumNumbering(number), - None => SyntaxKind::Error(ErrorPos::Full, "must be positive".into()), - }; + if number == 0 { + return self.error("must be positive"); + } + + return SyntaxKind::EnumNumbering; } } - self.text(start) + self.text() } fn reference(&mut self) -> SyntaxKind { - SyntaxKind::Ref(self.s.eat_while(is_id_continue).into()) + self.s.eat_while(is_id_continue); + SyntaxKind::Ref } fn in_word(&self) -> bool { @@ -439,95 +408,83 @@ impl Lexer<'_> { /// Math. impl Lexer<'_> { - fn math(&mut self, start: usize, c: char) -> SyntaxKind { + fn math(&mut self, c: char) -> SyntaxKind { match c { - // Symbol shorthands. - '|' if self.s.eat_if("->") => SyntaxKind::Shorthand('\u{21A6}'), - '<' if self.s.eat_if("->") => SyntaxKind::Shorthand('\u{2194}'), - '<' if self.s.eat_if("=>") => SyntaxKind::Shorthand('\u{21D4}'), - '!' if self.s.eat_if('=') => SyntaxKind::Shorthand('\u{2260}'), - '<' if self.s.eat_if('=') => SyntaxKind::Shorthand('\u{2264}'), - '>' if self.s.eat_if('=') => SyntaxKind::Shorthand('\u{2265}'), - '<' if self.s.eat_if('-') => SyntaxKind::Shorthand('\u{2190}'), - '-' if self.s.eat_if('>') => SyntaxKind::Shorthand('\u{2192}'), - '=' if self.s.eat_if('>') => SyntaxKind::Shorthand('\u{21D2}'), - ':' if self.s.eat_if('=') => SyntaxKind::Shorthand('\u{2254}'), - - // Multi-char things. - '#' => self.hash(start), - - // Escape sequences. '\\' => self.backslash(), - - // Single-char things. - '_' => SyntaxKind::Underscore, - '^' => SyntaxKind::Hat, - '/' => SyntaxKind::Slash, - '&' => SyntaxKind::Amp, - '$' => SyntaxKind::Dollar, - - // Symbol notation. - ':' => self.colon(), - - // Strings. + ':' if self.s.at(is_id_start) => self.maybe_symbol(), '"' => self.string(), + '#' if self.s.eat_if('{') => SyntaxKind::LeftBrace, + '#' if self.s.eat_if('[') => SyntaxKind::LeftBracket, + '#' if self.s.at(is_id_start) => { + match keyword(self.s.eat_while(is_id_continue)) { + Some(keyword) => keyword, + None => SyntaxKind::Ident, + } + } + + '|' if self.s.eat_if("->") => SyntaxKind::Shorthand, + '<' if self.s.eat_if("->") => SyntaxKind::Shorthand, + '<' if self.s.eat_if("=>") => SyntaxKind::Shorthand, + '!' if self.s.eat_if('=') => SyntaxKind::Shorthand, + '<' if self.s.eat_if('=') => SyntaxKind::Shorthand, + '>' if self.s.eat_if('=') => SyntaxKind::Shorthand, + '<' if self.s.eat_if('-') => SyntaxKind::Shorthand, + '-' if self.s.eat_if('>') => SyntaxKind::Shorthand, + '=' if self.s.eat_if('>') => SyntaxKind::Shorthand, + ':' if self.s.eat_if('=') => SyntaxKind::Shorthand, + + '_' => SyntaxKind::Underscore, + '$' => SyntaxKind::Dollar, + '/' => SyntaxKind::Slash, + '^' => SyntaxKind::Hat, + '&' => SyntaxKind::AlignPoint, // Identifiers and symbol notation. c if is_math_id_start(c) && self.s.at(is_math_id_continue) => { - self.s.eat_while(is_math_id_continue); - - let mut symbol = false; - while self.s.eat_if(':') - && !self.s.eat_while(char::is_alphanumeric).is_empty() - { - symbol = true; - } - - if symbol { - SyntaxKind::Symbol(self.s.from(start).into()) - } else { - if self.s.scout(-1) == Some(':') { - self.s.uneat(); - } - - SyntaxKind::Ident(self.s.from(start).into()) - } - } - - // Numbers. - c if c.is_numeric() => { - self.s.eat_while(char::is_numeric); - SyntaxKind::Atom(self.s.from(start).into()) + self.math_ident() } // Other math atoms. - c => SyntaxKind::Atom(c.into()), + _ => { + // Keep numbers together. + if c.is_numeric() { + self.s.eat_while(char::is_numeric); + } + SyntaxKind::Atom + } } } + + fn math_ident(&mut self) -> SyntaxKind { + self.s.eat_while(is_math_id_continue); + + let mut symbol = false; + while self.s.eat_if(':') && !self.s.eat_while(char::is_alphanumeric).is_empty() { + symbol = true; + } + + if symbol { + return SyntaxKind::Symbol; + } + + if self.s.scout(-1) == Some(':') { + self.s.uneat(); + } + + SyntaxKind::Ident + } } /// Code. impl Lexer<'_> { fn code(&mut self, start: usize, c: char) -> SyntaxKind { match c { - // Blocks. - '{' => SyntaxKind::LeftBrace, - '}' => SyntaxKind::RightBrace, - '[' => SyntaxKind::LeftBracket, - ']' => SyntaxKind::RightBracket, - - // Parentheses. - '(' => SyntaxKind::LeftParen, - ')' => SyntaxKind::RightParen, - - // Math. - '$' => SyntaxKind::Dollar, - - // Labels and raw. - '<' if self.s.at(is_id_continue) => self.label(), '`' => self.raw(), + '<' if self.s.at(is_id_continue) => self.label(), + '0'..='9' => self.number(start, c), + '.' if self.s.at(char::is_ascii_digit) => self.number(start, c), + '"' => self.string(), - // Two-char operators. '=' if self.s.eat_if('=') => SyntaxKind::EqEq, '!' if self.s.eat_if('=') => SyntaxKind::ExclEq, '<' if self.s.eat_if('=') => SyntaxKind::LtEq, @@ -539,10 +496,17 @@ impl Lexer<'_> { '.' if self.s.eat_if('.') => SyntaxKind::Dots, '=' if self.s.eat_if('>') => SyntaxKind::Arrow, - // Single-char operators. + '{' => SyntaxKind::LeftBrace, + '}' => SyntaxKind::RightBrace, + '[' => SyntaxKind::LeftBracket, + ']' => SyntaxKind::RightBracket, + '(' => SyntaxKind::LeftParen, + ')' => SyntaxKind::RightParen, + '$' => SyntaxKind::Dollar, ',' => SyntaxKind::Comma, ';' => SyntaxKind::Semicolon, ':' => SyntaxKind::Colon, + '.' => SyntaxKind::Dot, '+' => SyntaxKind::Plus, '-' => SyntaxKind::Minus, '*' => SyntaxKind::Star, @@ -550,21 +514,10 @@ impl Lexer<'_> { '=' => SyntaxKind::Eq, '<' => SyntaxKind::Lt, '>' => SyntaxKind::Gt, - '.' if !self.s.at(char::is_ascii_digit) => SyntaxKind::Dot, - // Identifiers. c if is_id_start(c) => self.ident(start), - // Numbers. - c if c.is_ascii_digit() || (c == '.' && self.s.at(char::is_ascii_digit)) => { - self.number(start, c) - } - - // Strings. - '"' => self.string(), - - // Invalid token. - _ => SyntaxKind::Error(ErrorPos::Full, "not valid here".into()), + _ => self.error("not valid here"), } } @@ -573,9 +526,9 @@ impl Lexer<'_> { match self.s.from(start) { "none" => SyntaxKind::None, "auto" => SyntaxKind::Auto, - "true" => SyntaxKind::Bool(true), - "false" => SyntaxKind::Bool(false), - id => keyword(id).unwrap_or_else(|| SyntaxKind::Ident(id.into())), + "true" => SyntaxKind::Bool, + "false" => SyntaxKind::Bool, + id => keyword(id).unwrap_or(SyntaxKind::Ident), } } @@ -604,64 +557,54 @@ impl Lexer<'_> { let number = self.s.get(start..suffix_start); let suffix = self.s.from(suffix_start); - // Find out whether it is a simple number. if suffix.is_empty() { - if let Ok(i) = number.parse::() { - return SyntaxKind::Int(i); - } + return if number.parse::().is_ok() { + SyntaxKind::Int + } else if number.parse::().is_ok() { + SyntaxKind::Float + } else { + self.error("invalid number") + }; } - let Ok(v) = number.parse::() else { - return SyntaxKind::Error(ErrorPos::Full, "invalid number".into()); - }; - - match suffix { - "" => SyntaxKind::Float(v), - "pt" => SyntaxKind::Numeric(v, Unit::Length(AbsUnit::Pt)), - "mm" => SyntaxKind::Numeric(v, Unit::Length(AbsUnit::Mm)), - "cm" => SyntaxKind::Numeric(v, Unit::Length(AbsUnit::Cm)), - "in" => SyntaxKind::Numeric(v, Unit::Length(AbsUnit::In)), - "deg" => SyntaxKind::Numeric(v, Unit::Angle(AngleUnit::Deg)), - "rad" => SyntaxKind::Numeric(v, Unit::Angle(AngleUnit::Rad)), - "em" => SyntaxKind::Numeric(v, Unit::Em), - "fr" => SyntaxKind::Numeric(v, Unit::Fr), - "%" => SyntaxKind::Numeric(v, Unit::Percent), - _ => SyntaxKind::Error(ErrorPos::Full, "invalid number suffix".into()), + if !matches!( + suffix, + "pt" | "mm" | "cm" | "in" | "deg" | "rad" | "em" | "fr" | "%" + ) { + return self.error("invalid number suffix"); } + + SyntaxKind::Numeric } fn string(&mut self) -> SyntaxKind { let mut escaped = false; - let verbatim = self.s.eat_until(|c| { - if c == '"' && !escaped { - true - } else { - escaped = c == '\\' && !escaped; - false - } + self.s.eat_until(|c| { + let stop = c == '"' && !escaped; + escaped = c == '\\' && !escaped; + stop }); - let string = resolve_string(verbatim); - if self.s.eat_if('"') { - SyntaxKind::Str(string) - } else { + if !self.s.eat_if('"') { self.terminated = false; - SyntaxKind::Error(ErrorPos::End, "expected quote".into()) + return self.error_at_end("expected quote"); } + + SyntaxKind::Str } fn label(&mut self) -> SyntaxKind { let label = self.s.eat_while(is_id_continue); - if self.s.eat_if('>') { - if !label.is_empty() { - SyntaxKind::Label(label.into()) - } else { - SyntaxKind::Error(ErrorPos::Full, "label cannot be empty".into()) - } - } else { - self.terminated = false; - SyntaxKind::Error(ErrorPos::End, "expected closing angle bracket".into()) + if label.is_empty() { + return self.error("label cannot be empty"); } + + if !self.s.eat_if('>') { + self.terminated = false; + return self.error_at_end("expected closing angle bracket"); + } + + SyntaxKind::Label } } @@ -729,6 +672,29 @@ pub fn is_newline(character: char) -> bool { ) } +/// Split text at newlines. +pub(super) fn split_newlines(text: &str) -> Vec<&str> { + let mut s = Scanner::new(text); + let mut lines = Vec::new(); + let mut start = 0; + let mut end = 0; + + while let Some(c) = s.eat() { + if is_newline(c) { + if c == '\r' { + s.eat_if('\n'); + } + + lines.push(&text[start..end]); + start = s.cursor(); + } + end = s.cursor(); + } + + lines.push(&text[start..]); + lines +} + /// Whether a string is a valid unicode identifier. /// /// In addition to what is specified in the [Unicode Standard][uax31], we allow: @@ -746,13 +712,13 @@ pub fn is_ident(string: &str) -> bool { /// Whether a character can start an identifier. #[inline] -fn is_id_start(c: char) -> bool { +pub(super) fn is_id_start(c: char) -> bool { c.is_xid_start() || c == '_' } /// Whether a character can continue an identifier. #[inline] -fn is_id_continue(c: char) -> bool { +pub(super) fn is_id_continue(c: char) -> bool { c.is_xid_continue() || c == '_' || c == '-' } diff --git a/src/syntax/linked.rs b/src/syntax/linked.rs deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 81524aa2d..a2bb57662 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -2,22 +2,17 @@ pub mod ast; -mod incremental; mod kind; mod lexer; mod node; mod parser; -mod parsing; -mod resolve; +mod reparse; mod source; mod span; pub use self::kind::*; pub use self::lexer::*; pub use self::node::*; -pub use self::parsing::*; +pub use self::parser::*; pub use self::source::*; pub use self::span::*; - -use incremental::reparse; -use parser::*; diff --git a/src/syntax/node.rs b/src/syntax/node.rs index 13556ede0..283d55b4c 100644 --- a/src/syntax/node.rs +++ b/src/syntax/node.rs @@ -6,6 +6,7 @@ use std::sync::Arc; use super::ast::AstNode; use super::{SourceId, Span, SyntaxKind}; use crate::diag::SourceError; +use crate::util::EcoString; /// A node in the untyped syntax tree. #[derive(Clone, PartialEq, Hash)] @@ -15,84 +16,106 @@ pub struct SyntaxNode(Repr); #[derive(Clone, PartialEq, Hash)] enum Repr { /// A leaf node. - Leaf(NodeData), + Leaf(LeafNode), /// A reference-counted inner node. Inner(Arc), + /// An error. + Error(ErrorNode), } impl SyntaxNode { /// Create a new leaf node. - pub fn leaf(kind: SyntaxKind, len: usize) -> Self { - Self(Repr::Leaf(NodeData::new(kind, len))) + pub fn leaf(kind: SyntaxKind, text: impl Into) -> Self { + Self(Repr::Leaf(LeafNode::new(kind, text))) } /// Create a new inner node with children. pub fn inner(kind: SyntaxKind, children: Vec) -> Self { - Self(Repr::Inner(Arc::new(InnerNode::with_children(kind, children)))) + Self(Repr::Inner(Arc::new(InnerNode::new(kind, children)))) + } + + /// Create a new error node. + pub fn error(message: impl Into, pos: ErrorPos, len: usize) -> Self { + Self(Repr::Error(ErrorNode::new(message, pos, len))) } /// The type of the node. - pub fn kind(&self) -> &SyntaxKind { - &self.data().kind - } - - /// Take the kind out of the node. - pub fn take(self) -> SyntaxKind { - match self.0 { + pub fn kind(&self) -> SyntaxKind { + match &self.0 { Repr::Leaf(leaf) => leaf.kind, - Repr::Inner(inner) => inner.data.kind.clone(), + Repr::Inner(inner) => inner.kind, + Repr::Error(_) => SyntaxKind::Error, } } - /// The length of the node. + /// The byte length of the node in the source text. pub fn len(&self) -> usize { - self.data().len + match &self.0 { + Repr::Leaf(leaf) => leaf.len(), + Repr::Inner(inner) => inner.len, + Repr::Error(error) => error.len, + } } /// The span of the node. pub fn span(&self) -> Span { - self.data().span + match &self.0 { + Repr::Leaf(leaf) => leaf.span, + Repr::Inner(inner) => inner.span, + Repr::Error(error) => error.span, + } } - /// The number of descendants, including the node itself. - pub fn descendants(&self) -> usize { + /// The text of the node if it is a leaf node. + /// + /// Returns an empty string if this is an inner or error node. + pub fn text(&self) -> &EcoString { + static EMPTY: EcoString = EcoString::new(); match &self.0 { - Repr::Inner(inner) => inner.descendants, - Repr::Leaf(_) => 1, + Repr::Leaf(leaf) => &leaf.text, + Repr::Inner(_) | Repr::Error(_) => &EMPTY, + } + } + + /// Extract the text from the node. + /// + /// Returns an empty string if this is an inner or error node. + pub fn into_text(self) -> EcoString { + match self.0 { + Repr::Leaf(leaf) => leaf.text, + Repr::Inner(_) | Repr::Error(_) => EcoString::new(), } } /// The node's children. pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> { match &self.0 { + Repr::Leaf(_) | Repr::Error(_) => [].iter(), Repr::Inner(inner) => inner.children.iter(), - Repr::Leaf(_) => [].iter(), } } - /// Convert the node to a typed AST node. - pub fn cast(&self) -> Option - where - T: AstNode, - { + /// Try to convert the node to a typed AST node. + pub fn cast(&self) -> Option { T::from_untyped(self) } - /// Get the first child that can cast to the AST type `T`. - pub fn cast_first_child(&self) -> Option { + /// Cast the first child that can cast to the AST type `T`. + pub fn cast_first_match(&self) -> Option { self.children().find_map(Self::cast) } - /// Get the last child that can cast to the AST type `T`. - pub fn cast_last_child(&self) -> Option { + /// Cast the last child that can cast to the AST type `T`. + pub fn cast_last_match(&self) -> Option { self.children().rev().find_map(Self::cast) } /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { match &self.0 { + Repr::Leaf(_) => false, Repr::Inner(node) => node.erroneous, - Repr::Leaf(data) => data.kind.is_error(), + Repr::Error(_) => true, } } @@ -102,35 +125,41 @@ impl SyntaxNode { return vec![]; } - match self.kind() { - SyntaxKind::Error(pos, message) => { - vec![SourceError::new(self.span(), message.clone()).with_pos(*pos)] - } - _ => self - .children() + if let Repr::Error(error) = &self.0 { + vec![SourceError::new(error.span, error.message.clone()).with_pos(error.pos)] + } else { + self.children() .filter(|node| node.erroneous()) .flat_map(|node| node.errors()) - .collect(), + .collect() } } /// Change the type of the node. - pub(super) fn convert(&mut self, kind: SyntaxKind) { + pub(super) fn convert_to(&mut self, kind: SyntaxKind) { + debug_assert!(!kind.is_error()); match &mut self.0 { + Repr::Leaf(leaf) => leaf.kind = kind, Repr::Inner(inner) => { let node = Arc::make_mut(inner); - node.erroneous |= kind.is_error(); - node.data.kind = kind; + node.kind = kind; } - Repr::Leaf(leaf) => leaf.kind = kind, + Repr::Error(_) => {} } } + /// Convert the child to an error. + pub(super) fn convert_to_error(&mut self, message: impl Into) { + let len = self.len(); + *self = SyntaxNode::error(message, ErrorPos::Full, len); + } + /// Set a synthetic span for the node and all its descendants. pub(super) fn synthesize(&mut self, span: Span) { match &mut self.0 { + Repr::Leaf(leaf) => leaf.span = span, Repr::Inner(inner) => Arc::make_mut(inner).synthesize(span), - Repr::Leaf(leaf) => leaf.synthesize(span), + Repr::Error(error) => error.span = span, } } @@ -140,17 +169,25 @@ impl SyntaxNode { id: SourceId, within: Range, ) -> NumberingResult { - match &mut self.0 { - Repr::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within), - Repr::Leaf(leaf) => leaf.numberize(id, within), + if within.start >= within.end { + return Err(Unnumberable); } + + let mid = Span::new(id, (within.start + within.end) / 2); + match &mut self.0 { + Repr::Leaf(leaf) => leaf.span = mid, + Repr::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within)?, + Repr::Error(error) => error.span = mid, + } + + Ok(()) } /// If the span points into this node, convert it to a byte range. pub(super) fn range(&self, span: Span, offset: usize) -> Option> { match &self.0 { Repr::Inner(inner) => inner.range(span, offset), - Repr::Leaf(leaf) => leaf.range(span, offset), + _ => (self.span() == span).then(|| offset..offset + self.len()), } } @@ -159,10 +196,18 @@ impl SyntaxNode { matches!(self.0, Repr::Leaf(_)) } + /// The number of descendants, including the node itself. + pub(super) fn descendants(&self) -> usize { + match &self.0 { + Repr::Leaf(_) | Repr::Error(_) => 1, + Repr::Inner(inner) => inner.descendants, + } + } + /// The node's children, mutably. pub(super) fn children_mut(&mut self) -> &mut [SyntaxNode] { match &mut self.0 { - Repr::Leaf(_) => &mut [], + Repr::Leaf(_) | Repr::Error(_) => &mut [], Repr::Inner(inner) => &mut Arc::make_mut(inner).children, } } @@ -199,19 +244,12 @@ impl SyntaxNode { } } - /// The metadata of the node. - fn data(&self) -> &NodeData { - match &self.0 { - Repr::Inner(inner) => &inner.data, - Repr::Leaf(leaf) => leaf, - } - } - /// The upper bound of assigned numbers in this subtree. fn upper(&self) -> u64 { match &self.0 { Repr::Inner(inner) => inner.upper, Repr::Leaf(leaf) => leaf.span.number() + 1, + Repr::Error(error) => error.span.number() + 1, } } } @@ -221,21 +259,64 @@ impl Debug for SyntaxNode { match &self.0 { Repr::Inner(node) => node.fmt(f), Repr::Leaf(node) => node.fmt(f), + Repr::Error(node) => node.fmt(f), } } } impl Default for SyntaxNode { fn default() -> Self { - Self::leaf(SyntaxKind::None, 0) + Self::error("", ErrorPos::Full, 0) + } +} + +/// A leaf node in the untyped syntax tree. +#[derive(Clone, Hash)] +struct LeafNode { + /// What kind of node this is (each kind would have its own struct in a + /// strongly typed AST). + kind: SyntaxKind, + /// The source text of the node. + text: EcoString, + /// The node's span. + span: Span, +} + +impl LeafNode { + /// Create a new leaf node. + fn new(kind: SyntaxKind, text: impl Into) -> Self { + debug_assert!(!kind.is_error()); + Self { kind, text: text.into(), span: Span::detached() } + } + + /// The byte length of the node in the source text. + fn len(&self) -> usize { + self.text.len() + } +} + +impl Debug for LeafNode { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{:?}: {}", self.kind, self.len()) + } +} + +impl PartialEq for LeafNode { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind && self.text == other.text } } /// An inner node in the untyped syntax tree. #[derive(Clone, Hash)] struct InnerNode { - /// Node metadata. - data: NodeData, + /// What kind of node this is (each kind would have its own struct in a + /// strongly typed AST). + kind: SyntaxKind, + /// The byte length of the node in the source. + len: usize, + /// The node's span. + span: Span, /// The number of nodes in the whole subtree, including this node. descendants: usize, /// Whether this node or any of its children are erroneous. @@ -248,10 +329,12 @@ struct InnerNode { impl InnerNode { /// Create a new inner node with the given kind and children. - fn with_children(kind: SyntaxKind, children: Vec) -> Self { + fn new(kind: SyntaxKind, children: Vec) -> Self { + debug_assert!(!kind.is_error()); + let mut len = 0; let mut descendants = 1; - let mut erroneous = kind.is_error(); + let mut erroneous = false; for child in &children { len += child.len(); @@ -260,7 +343,9 @@ impl InnerNode { } Self { - data: NodeData::new(kind, len), + kind, + len, + span: Span::detached(), descendants, erroneous, upper: 0, @@ -270,7 +355,7 @@ impl InnerNode { /// Set a synthetic span for the node and all its descendants. fn synthesize(&mut self, span: Span) { - self.data.synthesize(span); + self.span = span; for child in &mut self.children { child.synthesize(span); } @@ -310,7 +395,7 @@ impl InnerNode { let mut start = within.start; if range.is_none() { let end = start + stride; - self.data.numberize(id, start..end)?; + self.span = Span::new(id, (start + end) / 2); self.upper = within.end; start = end; } @@ -329,14 +414,14 @@ impl InnerNode { /// If the span points into this node, convert it to a byte range. fn range(&self, span: Span, mut offset: usize) -> Option> { // Check whether we found it. - if let Some(range) = self.data.range(span, offset) { - return Some(range); + if span == self.span { + return Some(offset..offset + self.len); } // The parent of a subtree has a smaller span number than all of its // descendants. Therefore, we can bail out early if the target span's // number is smaller than our number. - if span.number() < self.data.span.number() { + if span.number() < self.span.number() { return None; } @@ -371,8 +456,7 @@ impl InnerNode { let superseded = &self.children[range.clone()]; // Compute the new byte length. - self.data.len = self.data.len - + replacement.iter().map(SyntaxNode::len).sum::() + self.len = self.len + replacement.iter().map(SyntaxNode::len).sum::() - superseded.iter().map(SyntaxNode::len).sum::(); // Compute the new number of descendants. @@ -412,7 +496,7 @@ impl InnerNode { .start .checked_sub(1) .and_then(|i| self.children.get(i)) - .map_or(self.data.span.number() + 1, |child| child.upper()); + .map_or(self.span.number() + 1, |child| child.upper()); // The upper bound for renumbering is either // - the span number of the first child after the to-be-renumbered @@ -426,7 +510,7 @@ impl InnerNode { // Try to renumber. let within = start_number..end_number; - let id = self.data.span.source(); + let id = self.span.source(); if self.numberize(id, Some(renumber), within).is_ok() { return Ok(()); } @@ -450,7 +534,7 @@ impl InnerNode { prev_descendants: usize, new_descendants: usize, ) { - self.data.len = self.data.len + new_len - prev_len; + self.len = self.len + new_len - prev_len; self.descendants = self.descendants + new_descendants - prev_descendants; self.erroneous = self.children.iter().any(SyntaxNode::erroneous); } @@ -458,7 +542,7 @@ impl InnerNode { impl Debug for InnerNode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - self.data.fmt(f)?; + write!(f, "{:?}: {}", self.kind, self.len)?; if !self.children.is_empty() { f.write_str(" ")?; f.debug_list().entries(&self.children).finish()?; @@ -469,64 +553,62 @@ impl Debug for InnerNode { impl PartialEq for InnerNode { fn eq(&self, other: &Self) -> bool { - self.data == other.data + self.kind == other.kind + && self.len == other.len && self.descendants == other.descendants && self.erroneous == other.erroneous && self.children == other.children } } -/// Data shared between leaf and inner nodes. +/// An error node in the untyped syntax tree. #[derive(Clone, Hash)] -struct NodeData { - /// What kind of node this is (each kind would have its own struct in a - /// strongly typed AST). - kind: SyntaxKind, - /// The byte length of the node in the source. +struct ErrorNode { + /// The error message. + message: EcoString, + /// Where in the node an error should be annotated. + pos: ErrorPos, + /// The byte length of the error in the source. len: usize, /// The node's span. span: Span, } -impl NodeData { - /// Create new node metadata. - fn new(kind: SyntaxKind, len: usize) -> Self { - Self { len, kind, span: Span::detached() } - } - - /// Set a synthetic span for the node. - fn synthesize(&mut self, span: Span) { - self.span = span; - } - - /// Assign a span to the node. - fn numberize(&mut self, id: SourceId, within: Range) -> NumberingResult { - if within.start < within.end { - self.span = Span::new(id, (within.start + within.end) / 2); - Ok(()) - } else { - Err(Unnumberable) +impl ErrorNode { + /// Create new error node. + fn new(message: impl Into, pos: ErrorPos, len: usize) -> Self { + Self { + message: message.into(), + pos, + len, + span: Span::detached(), } } - - /// If the span points into this node, convert it to a byte range. - fn range(&self, span: Span, offset: usize) -> Option> { - (self.span == span).then(|| offset..offset + self.len) - } } -impl Debug for NodeData { +impl Debug for ErrorNode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {}", self.kind, self.len) + write!(f, "({}): {}", self.message, self.len) } } -impl PartialEq for NodeData { +impl PartialEq for ErrorNode { fn eq(&self, other: &Self) -> bool { - self.kind == other.kind && self.len == other.len + self.message == other.message && self.pos == other.pos && self.len == other.len } } +/// Where in a node an error should be annotated, +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum ErrorPos { + /// Over the full width of the node. + Full, + /// At the start of the node. + Start, + /// At the end of the node. + End, +} + /// A syntax node in a context. /// /// Knows its exact offset in the file and provides access to its @@ -542,7 +624,7 @@ pub struct LinkedNode<'a> { } impl<'a> LinkedNode<'a> { - /// Start a new traversal at the source's root node. + /// Start a new traversal at a root node. pub fn new(root: &'a SyntaxNode) -> Self { Self { node: root, parent: None, index: 0, offset: 0 } } @@ -557,17 +639,17 @@ impl<'a> LinkedNode<'a> { self.index } - /// The absolute byte offset of the this node in the source file. + /// The absolute byte offset of this node in the source file. pub fn offset(&self) -> usize { self.offset } - /// The byte range of the this node in the source file. + /// The byte range of this node in the source file. pub fn range(&self) -> Range { self.offset..self.offset + self.node.len() } - /// Get this node's children. + /// An iterator over this node's children. pub fn children(&self) -> LinkedChildren<'a> { LinkedChildren { parent: Rc::new(self.clone()), @@ -586,7 +668,7 @@ impl<'a> LinkedNode<'a> { } /// Get the kind of this node's parent. - pub fn parent_kind(&self) -> Option<&'a SyntaxKind> { + pub fn parent_kind(&self) -> Option { Some(self.parent()?.node.kind()) } @@ -648,7 +730,7 @@ impl<'a> LinkedNode<'a> { None } - /// Get the leaf at the specified cursor position. + /// Get the leaf at the specified byte offset. pub fn leaf_at(&self, cursor: usize) -> Option { if self.node.children().len() == 0 && cursor <= self.offset + self.len() { return Some(self.clone()); @@ -784,13 +866,13 @@ mod tests { let node = LinkedNode::new(source.root()).leaf_at(7).unwrap(); assert_eq!(node.offset(), 5); assert_eq!(node.len(), 4); - assert_eq!(node.kind(), &SyntaxKind::Ident("text".into())); + assert_eq!(node.kind(), SyntaxKind::Ident); // Go back to "#set". Skips the space. let prev = node.prev_sibling().unwrap(); assert_eq!(prev.offset(), 0); assert_eq!(prev.len(), 4); - assert_eq!(prev.kind(), &SyntaxKind::Set); + assert_eq!(prev.kind(), SyntaxKind::Set); } #[test] @@ -798,15 +880,15 @@ mod tests { let source = Source::detached("#set fun(12pt, red)"); let leaf = LinkedNode::new(source.root()).leaf_at(6).unwrap(); let prev = leaf.prev_leaf().unwrap(); - assert_eq!(leaf.kind(), &SyntaxKind::Ident("fun".into())); - assert_eq!(prev.kind(), &SyntaxKind::Set); + assert_eq!(leaf.kind(), SyntaxKind::Ident); + assert_eq!(prev.kind(), SyntaxKind::Set); let source = Source::detached("#let x = 10"); let leaf = LinkedNode::new(source.root()).leaf_at(9).unwrap(); let prev = leaf.prev_leaf().unwrap(); let next = leaf.next_leaf().unwrap(); - assert_eq!(prev.kind(), &SyntaxKind::Eq); - assert_eq!(leaf.kind(), &SyntaxKind::Space { newlines: 0 }); - assert_eq!(next.kind(), &SyntaxKind::Int(10)); + assert_eq!(prev.kind(), SyntaxKind::Eq); + assert_eq!(leaf.kind(), SyntaxKind::Space { newlines: 0 }); + assert_eq!(next.kind(), SyntaxKind::Int); } } diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index d2ef6e0e2..1584e59b8 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -1,14 +1,1118 @@ +use std::collections::HashSet; use std::fmt::{self, Display, Formatter}; use std::mem; -use std::ops::Range; +use super::ast::{self, Assoc, BinOp, UnOp}; use super::{ErrorPos, LexMode, Lexer, SyntaxKind, SyntaxNode}; use crate::util::{format_eco, EcoString}; +/// Parse a source file. +pub fn parse(text: &str) -> SyntaxNode { + let mut p = Parser::new(text, LexMode::Markup); + markup(&mut p, true); + p.finish().into_iter().next().unwrap() +} + +/// Parse code directly, only used for syntax highlighting. +pub fn parse_code(text: &str) -> SyntaxNode { + let mut p = Parser::new(text, LexMode::Code); + p.perform(SyntaxKind::CodeBlock, code); + p.finish().into_iter().next().unwrap() +} + +/// Reparse a code block. +/// +/// Returns `Some` if all of the input was consumed. +pub(super) fn reparse_code_block( + prefix: &str, + text: &str, + end_pos: usize, +) -> Option<(Vec, bool, usize)> { + let mut p = Parser::with_prefix(prefix, text, LexMode::Code); + if !p.at(SyntaxKind::LeftBrace) { + return None; + } + + code_block(&mut p); + + let (mut node, terminated) = p.consume()?; + let first = node.remove(0); + if first.len() != end_pos { + return None; + } + + Some((vec![first], terminated, 1)) +} + +/// Reparse a content block. +/// +/// Returns `Some` if all of the input was consumed. +pub(super) fn reparse_content_block( + prefix: &str, + text: &str, + end_pos: usize, +) -> Option<(Vec, bool, usize)> { + let mut p = Parser::with_prefix(prefix, text, LexMode::Code); + if !p.at(SyntaxKind::LeftBracket) { + return None; + } + + content_block(&mut p); + + let (mut node, terminated) = p.consume()?; + let first = node.remove(0); + if first.len() != end_pos { + return None; + } + + Some((vec![first], terminated, 1)) +} + +/// Reparse a sequence markup elements without the topmost node. +/// +/// Returns `Some` if all of the input was consumed. +pub(super) fn reparse_markup_elements( + prefix: &str, + text: &str, + end_pos: usize, + differential: isize, + reference: &[SyntaxNode], + mut at_start: bool, + min_indent: usize, +) -> Option<(Vec, bool, usize)> { + let mut p = Parser::with_prefix(prefix, text, LexMode::Markup); + + let mut node: Option<&SyntaxNode> = None; + let mut iter = reference.iter(); + let mut offset = differential; + let mut replaced = 0; + let mut stopped = false; + + 'outer: while !p.eof() { + if let Some(SyntaxKind::Space { newlines: (1..) }) = p.peek() { + if p.column(p.current_end()) < min_indent { + return None; + } + } + + markup_node(&mut p, &mut at_start); + + if p.prev_end() <= end_pos { + continue; + } + + let recent = p.marker().before(&p).unwrap(); + let recent_start = p.prev_end() - recent.len(); + + while offset <= recent_start as isize { + if let Some(node) = node { + // The nodes are equal, at the same position and have the + // same content. The parsing trees have converged again, so + // the reparse may stop here. + if offset == recent_start as isize && node == recent { + replaced -= 1; + stopped = true; + break 'outer; + } + } + + if let Some(node) = node { + offset += node.len() as isize; + } + + node = iter.next(); + if node.is_none() { + break; + } + + replaced += 1; + } + } + + if p.eof() && !stopped { + replaced = reference.len(); + } + + let (mut res, terminated) = p.consume()?; + if stopped { + res.pop().unwrap(); + } + + Some((res, terminated, replaced)) +} + +/// Parse markup. +/// +/// If `at_start` is true, things like headings that may only appear at the +/// beginning of a line or content block are initially allowed. +fn markup(p: &mut Parser, mut at_start: bool) { + p.perform(SyntaxKind::Markup { min_indent: 0 }, |p| { + while !p.eof() { + markup_node(p, &mut at_start); + } + }); +} + +/// Parse markup that stays right of the given `column`. +fn markup_indented(p: &mut Parser, min_indent: usize) { + p.eat_while(|t| match t { + SyntaxKind::Space { newlines } => newlines == 0, + SyntaxKind::LineComment | SyntaxKind::BlockComment => true, + _ => false, + }); + + let marker = p.marker(); + let mut at_start = false; + + while !p.eof() { + match p.peek() { + Some(SyntaxKind::Space { newlines: (1..) }) + if p.column(p.current_end()) < min_indent => + { + break; + } + _ => {} + } + + markup_node(p, &mut at_start); + } + + marker.end(p, SyntaxKind::Markup { min_indent }); +} + +/// Parse a line of markup that can prematurely end if `f` returns true. +fn markup_line(p: &mut Parser, mut f: F) +where + F: FnMut(SyntaxKind) -> bool, +{ + p.eat_while(|t| match t { + SyntaxKind::Space { newlines } => newlines == 0, + SyntaxKind::LineComment | SyntaxKind::BlockComment => true, + _ => false, + }); + + p.perform(SyntaxKind::Markup { min_indent: usize::MAX }, |p| { + let mut at_start = false; + while let Some(kind) = p.peek() { + if let SyntaxKind::Space { newlines: (1..) } = kind { + break; + } + + if f(kind) { + break; + } + + markup_node(p, &mut at_start); + } + }); +} + +fn markup_node(p: &mut Parser, at_start: &mut bool) { + let Some(token) = p.peek() else { return }; + match token { + // Whitespace. + SyntaxKind::Space { newlines } => { + *at_start |= newlines > 0; + p.eat(); + return; + } + + // Comments. + SyntaxKind::LineComment | SyntaxKind::BlockComment => { + p.eat(); + return; + } + + // Text and markup. + SyntaxKind::Text + | SyntaxKind::Linebreak + | SyntaxKind::SmartQuote { .. } + | SyntaxKind::Escape + | SyntaxKind::Shorthand + | SyntaxKind::Symbol + | SyntaxKind::Link + | SyntaxKind::Raw { .. } + | SyntaxKind::Ref => p.eat(), + + // Math. + SyntaxKind::Dollar => math(p), + + // Strong, emph, heading. + SyntaxKind::Star => strong(p), + SyntaxKind::Underscore => emph(p), + SyntaxKind::Eq => heading(p, *at_start), + + // Lists. + SyntaxKind::Minus => list_item(p, *at_start), + SyntaxKind::Plus | SyntaxKind::EnumNumbering => enum_item(p, *at_start), + SyntaxKind::Slash => { + term_item(p, *at_start).ok(); + } + SyntaxKind::Colon => { + let marker = p.marker(); + p.eat(); + marker.convert(p, SyntaxKind::Text); + } + + // Hashtag + keyword / identifier. + SyntaxKind::Ident + | SyntaxKind::Label + | SyntaxKind::Let + | SyntaxKind::Set + | SyntaxKind::Show + | SyntaxKind::If + | SyntaxKind::While + | SyntaxKind::For + | SyntaxKind::Import + | SyntaxKind::Include + | SyntaxKind::Break + | SyntaxKind::Continue + | SyntaxKind::Return => embedded_expr(p), + + // Code and content block. + SyntaxKind::LeftBrace => code_block(p), + SyntaxKind::LeftBracket => content_block(p), + + SyntaxKind::Error => p.eat(), + _ => p.unexpected(), + }; + + *at_start = false; +} + +fn strong(p: &mut Parser) { + p.perform(SyntaxKind::Strong, |p| { + p.start_group(Group::Strong); + markup(p, false); + p.end_group(); + }) +} + +fn emph(p: &mut Parser) { + p.perform(SyntaxKind::Emph, |p| { + p.start_group(Group::Emph); + markup(p, false); + p.end_group(); + }) +} + +fn heading(p: &mut Parser, at_start: bool) { + let marker = p.marker(); + let mut markers = vec![]; + while p.at(SyntaxKind::Eq) { + markers.push(p.marker()); + p.eat(); + } + + if at_start && p.peek().map_or(true, |kind| kind.is_space()) { + p.eat_while(|kind| kind == SyntaxKind::Space { newlines: 0 }); + markup_line(p, |kind| matches!(kind, SyntaxKind::Label)); + marker.end(p, SyntaxKind::Heading); + } else { + for marker in markers { + marker.convert(p, SyntaxKind::Text); + } + } +} + +fn list_item(p: &mut Parser, at_start: bool) { + let marker = p.marker(); + p.assert(SyntaxKind::Minus); + + let min_indent = p.column(p.prev_end()); + if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() { + markup_indented(p, min_indent); + marker.end(p, SyntaxKind::ListItem); + } else { + marker.convert(p, SyntaxKind::Text); + } +} + +fn enum_item(p: &mut Parser, at_start: bool) { + let marker = p.marker(); + p.eat(); + + let min_indent = p.column(p.prev_end()); + if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() { + markup_indented(p, min_indent); + marker.end(p, SyntaxKind::EnumItem); + } else { + marker.convert(p, SyntaxKind::Text); + } +} + +fn term_item(p: &mut Parser, at_start: bool) -> ParseResult { + let marker = p.marker(); + p.eat(); + + let min_indent = p.column(p.prev_end()); + if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() { + markup_line(p, |node| matches!(node, SyntaxKind::Colon)); + p.expect(SyntaxKind::Colon)?; + markup_indented(p, min_indent); + marker.end(p, SyntaxKind::TermItem); + } else { + marker.convert(p, SyntaxKind::Text); + } + + Ok(()) +} + +fn embedded_expr(p: &mut Parser) { + // Does the expression need termination or can content follow directly? + let stmt = matches!( + p.peek(), + Some( + SyntaxKind::Let + | SyntaxKind::Set + | SyntaxKind::Show + | SyntaxKind::Import + | SyntaxKind::Include + ) + ); + + p.start_group(Group::Expr); + let res = expr_prec(p, true, 0); + if stmt && res.is_ok() && !p.eof() { + p.expected("semicolon or line break"); + } + p.end_group(); +} + +fn math(p: &mut Parser) { + p.perform(SyntaxKind::Math, |p| { + p.start_group(Group::Math); + while !p.eof() { + math_node(p); + } + p.end_group(); + }); +} + +fn math_node(p: &mut Parser) { + math_node_prec(p, 0, None) +} + +fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option) { + let marker = p.marker(); + math_primary(p); + + loop { + let (kind, mut prec, assoc, stop) = match p.peek() { + v if v == stop => break, + Some(SyntaxKind::Underscore) => { + (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Hat)) + } + Some(SyntaxKind::Hat) => { + (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Underscore)) + } + Some(SyntaxKind::Slash) => (SyntaxKind::Frac, 1, Assoc::Left, None), + _ => break, + }; + + if prec < min_prec { + break; + } + + match assoc { + Assoc::Left => prec += 1, + Assoc::Right => {} + } + + p.eat(); + math_node_prec(p, prec, stop); + + // Allow up to two different scripts. We do not risk encountering the + // previous script kind again here due to right-associativity. + if p.eat_if(SyntaxKind::Underscore) || p.eat_if(SyntaxKind::Hat) { + math_node_prec(p, prec, None); + } + + marker.end(p, kind); + } +} + +/// Parse a primary math node. +fn math_primary(p: &mut Parser) { + let Some(token) = p.peek() else { return }; + match token { + // Spaces and expressions. + SyntaxKind::Space { .. } + | SyntaxKind::Linebreak + | SyntaxKind::Escape + | SyntaxKind::Str + | SyntaxKind::Shorthand + | SyntaxKind::AlignPoint + | SyntaxKind::Symbol => p.eat(), + + // Atoms. + SyntaxKind::Atom => match p.peek_src() { + "(" => math_group(p, Group::MathRow('(', ')')), + "{" => math_group(p, Group::MathRow('{', '}')), + "[" => math_group(p, Group::MathRow('[', ']')), + _ => p.eat(), + }, + + // Identifiers and math calls. + SyntaxKind::Ident => { + let marker = p.marker(); + p.eat(); + + // Parenthesis or bracket means this is a function call. + if matches!(p.peek_direct(), Some(SyntaxKind::Atom) if p.peek_src() == "(") { + marker.perform(p, SyntaxKind::FuncCall, math_args); + } + } + + // Hashtag + keyword / identifier. + SyntaxKind::Let + | SyntaxKind::Set + | SyntaxKind::Show + | SyntaxKind::If + | SyntaxKind::While + | SyntaxKind::For + | SyntaxKind::Import + | SyntaxKind::Include + | SyntaxKind::Break + | SyntaxKind::Continue + | SyntaxKind::Return => embedded_expr(p), + + // Code and content block. + SyntaxKind::LeftBrace => code_block(p), + SyntaxKind::LeftBracket => content_block(p), + + _ => p.unexpected(), + } +} + +fn math_group(p: &mut Parser, group: Group) { + p.perform(SyntaxKind::Math, |p| { + p.start_group(group); + while !p.eof() { + math_node(p); + } + p.end_group(); + }) +} + +fn expr(p: &mut Parser) -> ParseResult { + expr_prec(p, false, 0) +} + +/// Parse an expression with operators having at least the minimum precedence. +/// +/// If `atomic` is true, this does not parse binary operations and arrow +/// functions, which is exactly what we want in a shorthand expression directly +/// in markup. +/// +/// Stops parsing at operations with lower precedence than `min_prec`, +fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { + let marker = p.marker(); + + // Start the unary expression. + match p.peek().and_then(UnOp::from_token) { + Some(op) if !atomic => { + p.eat(); + let prec = op.precedence(); + expr_prec(p, atomic, prec)?; + marker.end(p, SyntaxKind::Unary); + } + _ => primary(p, atomic)?, + }; + + loop { + // Parenthesis or bracket means this is a function call. + if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct() { + marker.perform(p, SyntaxKind::FuncCall, args)?; + continue; + } + + if atomic { + break; + } + + // Method call or field access. + if p.eat_if(SyntaxKind::Dot) { + ident(p)?; + if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct() + { + marker.perform(p, SyntaxKind::MethodCall, args)?; + } else { + marker.end(p, SyntaxKind::FieldAccess); + } + continue; + } + + let op = if p.eat_if(SyntaxKind::Not) { + if p.at(SyntaxKind::In) { + BinOp::NotIn + } else { + p.expected("keyword `in`"); + return Err(ParseError); + } + } else { + match p.peek().and_then(BinOp::from_token) { + Some(binop) => binop, + None => break, + } + }; + + let mut prec = op.precedence(); + if prec < min_prec { + break; + } + + p.eat(); + + match op.assoc() { + Assoc::Left => prec += 1, + Assoc::Right => {} + } + + marker.perform(p, SyntaxKind::Binary, |p| expr_prec(p, atomic, prec))?; + } + + Ok(()) +} + +fn primary(p: &mut Parser, atomic: bool) -> ParseResult { + match p.peek() { + // Literals and few other things. + Some( + SyntaxKind::None + | SyntaxKind::Auto + | SyntaxKind::Int + | SyntaxKind::Float + | SyntaxKind::Bool + | SyntaxKind::Numeric + | SyntaxKind::Str + | SyntaxKind::Label + | SyntaxKind::Raw { .. }, + ) => { + p.eat(); + Ok(()) + } + + // Things that start with an identifier. + Some(SyntaxKind::Ident) => { + let marker = p.marker(); + p.eat(); + + // Arrow means this is a closure's lone parameter. + if !atomic && p.at(SyntaxKind::Arrow) { + marker.end(p, SyntaxKind::Params); + p.assert(SyntaxKind::Arrow); + marker.perform(p, SyntaxKind::Closure, expr) + } else { + Ok(()) + } + } + + // Structures. + Some(SyntaxKind::LeftParen) => parenthesized(p, atomic), + Some(SyntaxKind::LeftBrace) => Ok(code_block(p)), + Some(SyntaxKind::LeftBracket) => Ok(content_block(p)), + Some(SyntaxKind::Dollar) => Ok(math(p)), + + // Keywords. + Some(SyntaxKind::Let) => let_binding(p), + Some(SyntaxKind::Set) => set_rule(p), + Some(SyntaxKind::Show) => show_rule(p), + Some(SyntaxKind::If) => conditional(p), + Some(SyntaxKind::While) => while_loop(p), + Some(SyntaxKind::For) => for_loop(p), + Some(SyntaxKind::Import) => module_import(p), + Some(SyntaxKind::Include) => module_include(p), + Some(SyntaxKind::Break) => break_stmt(p), + Some(SyntaxKind::Continue) => continue_stmt(p), + Some(SyntaxKind::Return) => return_stmt(p), + + Some(SyntaxKind::Error) => { + p.eat(); + Err(ParseError) + } + + // Nothing. + _ => { + p.expected_found("expression"); + Err(ParseError) + } + } +} + +fn ident(p: &mut Parser) -> ParseResult { + match p.peek() { + Some(SyntaxKind::Ident) => { + p.eat(); + Ok(()) + } + _ => { + p.expected_found("identifier"); + Err(ParseError) + } + } +} + +/// Parse something that starts with a parenthesis, which can be either of: +/// - Array literal +/// - Dictionary literal +/// - Parenthesized expression +/// - Parameter list of closure expression +fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult { + let marker = p.marker(); + + p.start_group(Group::Paren); + let colon = p.eat_if(SyntaxKind::Colon); + let kind = collection(p, true).0; + p.end_group(); + + // Leading colon makes this a dictionary. + if colon { + dict(p, marker); + return Ok(()); + } + + // Arrow means this is a closure's parameter list. + if !atomic && p.at(SyntaxKind::Arrow) { + params(p, marker); + p.assert(SyntaxKind::Arrow); + return marker.perform(p, SyntaxKind::Closure, expr); + } + + // Transform into the identified collection. + match kind { + CollectionKind::Group => marker.end(p, SyntaxKind::Parenthesized), + CollectionKind::Positional => array(p, marker), + CollectionKind::Named => dict(p, marker), + } + + Ok(()) +} + +/// The type of a collection. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum CollectionKind { + /// The collection is only one item and has no comma. + Group, + /// The collection starts with a positional item and has multiple items or a + /// trailing comma. + Positional, + /// The collection starts with a colon or named item. + Named, +} + +/// Parse a collection. +/// +/// Returns the length of the collection and whether the literal contained any +/// commas. +fn collection(p: &mut Parser, keyed: bool) -> (CollectionKind, usize) { + let mut collection_kind = None; + let mut items = 0; + let mut can_group = true; + let mut missing_coma: Option = None; + + while !p.eof() { + let Ok(item_kind) = item(p, keyed) else { + p.eat_if(SyntaxKind::Comma); + collection_kind = Some(CollectionKind::Group); + continue; + }; + + match item_kind { + SyntaxKind::Spread => can_group = false, + SyntaxKind::Named if collection_kind.is_none() => { + collection_kind = Some(CollectionKind::Named); + can_group = false; + } + _ if collection_kind.is_none() => { + collection_kind = Some(CollectionKind::Positional); + } + _ => {} + } + + items += 1; + + if let Some(marker) = missing_coma.take() { + p.expected_at(marker, "comma"); + } + + if p.eof() { + break; + } + + if p.eat_if(SyntaxKind::Comma) { + can_group = false; + } else { + missing_coma = Some(p.trivia_start()); + } + } + + let kind = if can_group && items == 1 { + CollectionKind::Group + } else { + collection_kind.unwrap_or(CollectionKind::Positional) + }; + + (kind, items) +} + +fn item(p: &mut Parser, keyed: bool) -> ParseResult { + let marker = p.marker(); + if p.eat_if(SyntaxKind::Dots) { + marker.perform(p, SyntaxKind::Spread, expr)?; + return Ok(SyntaxKind::Spread); + } + + expr(p)?; + + if p.at(SyntaxKind::Colon) { + match marker.after(p).map(|c| c.kind()) { + Some(SyntaxKind::Ident) => { + p.eat(); + marker.perform(p, SyntaxKind::Named, expr)?; + } + Some(SyntaxKind::Str) if keyed => { + p.eat(); + marker.perform(p, SyntaxKind::Keyed, expr)?; + } + kind => { + let mut msg = EcoString::from("expected identifier"); + if keyed { + msg.push_str(" or string"); + } + if let Some(kind) = kind { + msg.push_str(", found "); + msg.push_str(kind.name()); + } + marker.to_error(p, msg); + p.eat(); + marker.perform(p, SyntaxKind::Named, expr).ok(); + return Err(ParseError); + } + } + + Ok(SyntaxKind::Named) + } else { + Ok(SyntaxKind::None) + } +} + +fn array(p: &mut Parser, marker: Marker) { + marker.filter_children(p, |x| match x.kind() { + SyntaxKind::Named | SyntaxKind::Keyed => Err("expected expression"), + _ => Ok(()), + }); + marker.end(p, SyntaxKind::Array); +} + +fn dict(p: &mut Parser, marker: Marker) { + let mut used = HashSet::new(); + marker.filter_children(p, |x| match x.kind() { + kind if kind.is_paren() => Ok(()), + SyntaxKind::Named | SyntaxKind::Keyed => { + if let Some(child) = x.children().next() { + let key = match child.cast::() { + Some(str) => str.get(), + None => child.text().clone(), + }; + + if !used.insert(key) { + return Err("pair has duplicate key"); + } + } + Ok(()) + } + SyntaxKind::Spread | SyntaxKind::Comma | SyntaxKind::Colon => Ok(()), + _ => Err("expected named or keyed pair"), + }); + marker.end(p, SyntaxKind::Dict); +} + +fn params(p: &mut Parser, marker: Marker) { + marker.filter_children(p, |x| match x.kind() { + kind if kind.is_paren() => Ok(()), + SyntaxKind::Named | SyntaxKind::Ident | SyntaxKind::Comma => Ok(()), + SyntaxKind::Spread + if matches!( + x.children().last().map(|child| child.kind()), + Some(SyntaxKind::Ident) + ) => + { + Ok(()) + } + _ => Err("expected identifier, named pair or argument sink"), + }); + marker.end(p, SyntaxKind::Params); +} + +/// Parse a code block: `{...}`. +fn code_block(p: &mut Parser) { + p.perform(SyntaxKind::CodeBlock, |p| { + p.start_group(Group::Brace); + code(p); + p.end_group(); + }); +} + +fn code(p: &mut Parser) { + while !p.eof() { + p.start_group(Group::Expr); + if expr(p).is_ok() && !p.eof() { + p.expected("semicolon or line break"); + } + p.end_group(); + + // Forcefully skip over newlines since the group's contents can't. + p.eat_while(SyntaxKind::is_space); + } +} + +fn content_block(p: &mut Parser) { + p.perform(SyntaxKind::ContentBlock, |p| { + p.start_group(Group::Bracket); + markup(p, true); + p.end_group(); + }); +} + +fn args(p: &mut Parser) -> ParseResult { + match p.peek_direct() { + Some(SyntaxKind::LeftParen) => {} + Some(SyntaxKind::LeftBracket) => {} + _ => { + p.expected_found("argument list"); + return Err(ParseError); + } + } + + p.perform(SyntaxKind::Args, |p| { + if p.at(SyntaxKind::LeftParen) { + let marker = p.marker(); + p.start_group(Group::Paren); + collection(p, false); + p.end_group(); + + let mut used = HashSet::new(); + marker.filter_children(p, |x| match x.kind() { + SyntaxKind::Named => { + if let Some(ident) = + x.children().next().and_then(|child| child.cast::()) + { + if !used.insert(ident.take()) { + return Err("duplicate argument"); + } + } + Ok(()) + } + _ => Ok(()), + }); + } + + while p.peek_direct() == Some(SyntaxKind::LeftBracket) { + content_block(p); + } + }); + + Ok(()) +} + +fn math_args(p: &mut Parser) { + p.start_group(Group::MathRow('(', ')')); + p.perform(SyntaxKind::Args, |p| { + let mut marker = p.marker(); + while !p.eof() { + if matches!(p.peek(), Some(SyntaxKind::Atom) if p.peek_src() == ",") { + marker.end(p, SyntaxKind::Math); + let comma = p.marker(); + p.eat(); + comma.convert(p, SyntaxKind::Comma); + marker = p.marker(); + } else { + math_node(p); + } + } + if marker != p.marker() { + marker.end(p, SyntaxKind::Math); + } + }); + p.end_group(); +} + +fn let_binding(p: &mut Parser) -> ParseResult { + p.perform(SyntaxKind::LetBinding, |p| { + p.assert(SyntaxKind::Let); + + let marker = p.marker(); + ident(p)?; + + // If a parenthesis follows, this is a function definition. + let has_params = p.peek_direct() == Some(SyntaxKind::LeftParen); + if has_params { + let marker = p.marker(); + p.start_group(Group::Paren); + collection(p, false); + p.end_group(); + params(p, marker); + } + + if p.eat_if(SyntaxKind::Eq) { + expr(p)?; + } else if has_params { + // Function definitions must have a body. + p.expected("body"); + } + + // Rewrite into a closure expression if it's a function definition. + if has_params { + marker.end(p, SyntaxKind::Closure); + } + + Ok(()) + }) +} + +fn set_rule(p: &mut Parser) -> ParseResult { + p.perform(SyntaxKind::SetRule, |p| { + p.assert(SyntaxKind::Set); + ident(p)?; + args(p)?; + if p.eat_if(SyntaxKind::If) { + expr(p)?; + } + Ok(()) + }) +} + +fn show_rule(p: &mut Parser) -> ParseResult { + p.perform(SyntaxKind::ShowRule, |p| { + p.assert(SyntaxKind::Show); + expr(p)?; + if p.eat_if(SyntaxKind::Colon) { + expr(p)?; + } + Ok(()) + }) +} + +fn conditional(p: &mut Parser) -> ParseResult { + p.perform(SyntaxKind::Conditional, |p| { + p.assert(SyntaxKind::If); + + expr(p)?; + body(p)?; + + if p.eat_if(SyntaxKind::Else) { + if p.at(SyntaxKind::If) { + conditional(p)?; + } else { + body(p)?; + } + } + + Ok(()) + }) +} + +fn while_loop(p: &mut Parser) -> ParseResult { + p.perform(SyntaxKind::WhileLoop, |p| { + p.assert(SyntaxKind::While); + expr(p)?; + body(p) + }) +} + +fn for_loop(p: &mut Parser) -> ParseResult { + p.perform(SyntaxKind::ForLoop, |p| { + p.assert(SyntaxKind::For); + for_pattern(p)?; + p.expect(SyntaxKind::In)?; + expr(p)?; + body(p) + }) +} + +fn for_pattern(p: &mut Parser) -> ParseResult { + p.perform(SyntaxKind::ForPattern, |p| { + ident(p)?; + if p.eat_if(SyntaxKind::Comma) { + ident(p)?; + } + Ok(()) + }) +} + +fn module_import(p: &mut Parser) -> ParseResult { + p.perform(SyntaxKind::ModuleImport, |p| { + p.assert(SyntaxKind::Import); + expr(p)?; + + if !p.eat_if(SyntaxKind::Colon) || p.eat_if(SyntaxKind::Star) { + return Ok(()); + } + + // This is the list of identifiers scenario. + p.perform(SyntaxKind::ImportItems, |p| { + let marker = p.marker(); + let items = collection(p, false).1; + if items == 0 { + p.expected("import items"); + } + marker.filter_children(p, |n| match n.kind() { + SyntaxKind::Ident | SyntaxKind::Comma => Ok(()), + _ => Err("expected identifier"), + }); + }); + + Ok(()) + }) +} + +fn module_include(p: &mut Parser) -> ParseResult { + p.perform(SyntaxKind::ModuleInclude, |p| { + p.assert(SyntaxKind::Include); + expr(p) + }) +} + +fn break_stmt(p: &mut Parser) -> ParseResult { + p.perform(SyntaxKind::LoopBreak, |p| { + p.assert(SyntaxKind::Break); + Ok(()) + }) +} + +fn continue_stmt(p: &mut Parser) -> ParseResult { + p.perform(SyntaxKind::LoopContinue, |p| { + p.assert(SyntaxKind::Continue); + Ok(()) + }) +} + +fn return_stmt(p: &mut Parser) -> ParseResult { + p.perform(SyntaxKind::FuncReturn, |p| { + p.assert(SyntaxKind::Return); + if !p.at(SyntaxKind::Comma) && !p.eof() { + expr(p)?; + } + Ok(()) + }) +} + +fn body(p: &mut Parser) -> ParseResult { + match p.peek() { + Some(SyntaxKind::LeftBracket) => Ok(content_block(p)), + Some(SyntaxKind::LeftBrace) => Ok(code_block(p)), + _ => { + p.expected("body"); + Err(ParseError) + } + } +} + /// A convenient token-based parser. -pub struct Parser<'s> { +struct Parser<'s> { /// An iterator over the source tokens. - tokens: Lexer<'s>, + lexer: Lexer<'s>, /// Whether we are at the end of the file or of a group. eof: bool, /// The current token. @@ -29,18 +1133,18 @@ pub struct Parser<'s> { impl<'s> Parser<'s> { /// Create a new parser for the source string. - pub fn new(text: &'s str, mode: LexMode) -> Self { + fn new(text: &'s str, mode: LexMode) -> Self { Self::with_prefix("", text, mode) } /// Create a new parser for the source string that is prefixed by some text /// that does not need to be parsed but taken into account for column /// calculation. - pub fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self { - let mut tokens = Lexer::with_prefix(prefix, text, mode); - let current = tokens.next(); + fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self { + let mut lexer = Lexer::with_prefix(prefix, text, mode); + let current = lexer.next(); Self { - tokens, + lexer, eof: current.is_none(), current, prev_end: 0, @@ -53,7 +1157,7 @@ impl<'s> Parser<'s> { } /// End the parsing process and return the parsed children. - pub fn finish(self) -> Vec { + fn finish(self) -> Vec { self.children } @@ -61,17 +1165,17 @@ impl<'s> Parser<'s> { /// - the parsed children and whether the last token was terminated, if all /// groups were terminated correctly, or /// - `None` otherwise. - pub fn consume(self) -> Option<(Vec, bool)> { - self.terminated().then(|| (self.children, self.tokens.terminated())) + fn consume(self) -> Option<(Vec, bool)> { + self.terminated().then(|| (self.children, self.lexer.terminated())) } /// Create a new marker. - pub fn marker(&mut self) -> Marker { + fn marker(&mut self) -> Marker { Marker(self.children.len()) } /// Create a marker right before the trailing trivia. - pub fn trivia_start(&self) -> Marker { + fn trivia_start(&self) -> Marker { let count = self .children .iter() @@ -82,7 +1186,7 @@ impl<'s> Parser<'s> { } /// Perform a subparse that wraps its result in a node with the given kind. - pub fn perform(&mut self, kind: SyntaxKind, f: F) -> T + fn perform(&mut self, kind: SyntaxKind, f: F) -> T where F: FnOnce(&mut Self) -> T, { @@ -91,7 +1195,7 @@ impl<'s> Parser<'s> { let until = self.trivia_start(); let mut children = mem::replace(&mut self.children, prev); - if self.tokens.mode() == LexMode::Markup { + if self.lexer.mode() == LexMode::Markup { self.children.push(SyntaxNode::inner(kind, children)); } else { // Trailing trivia should not be wrapped into the new node. @@ -105,12 +1209,12 @@ impl<'s> Parser<'s> { } /// Whether the end of the source string or group is reached. - pub fn eof(&self) -> bool { + fn eof(&self) -> bool { self.eof } /// Consume the current token and also trailing trivia. - pub fn eat(&mut self) { + fn eat(&mut self) { self.stray_terminator |= match self.current { Some(SyntaxKind::RightParen) => !self.inside(Group::Paren), Some(SyntaxKind::RightBracket) => !self.inside(Group::Bracket), @@ -118,12 +1222,12 @@ impl<'s> Parser<'s> { _ => false, }; - self.prev_end = self.tokens.cursor(); + self.prev_end = self.lexer.cursor(); self.bump(); - if self.tokens.mode() != LexMode::Markup { + if self.lexer.mode() != LexMode::Markup { // Skip whitespace and comments. - while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) { + while self.current.map_or(false, |kind| self.is_trivia(kind)) { self.bump(); } } @@ -132,7 +1236,7 @@ impl<'s> Parser<'s> { } /// Consume the current token if it is the given one. - pub fn eat_if(&mut self, kind: SyntaxKind) -> bool { + fn eat_if(&mut self, kind: SyntaxKind) -> bool { let at = self.at(kind); if at { self.eat(); @@ -141,9 +1245,9 @@ impl<'s> Parser<'s> { } /// Eat tokens while the condition is true. - pub fn eat_while(&mut self, mut f: F) + fn eat_while(&mut self, mut f: F) where - F: FnMut(&SyntaxKind) -> bool, + F: FnMut(SyntaxKind) -> bool, { while self.peek().map_or(false, |t| f(t)) { self.eat(); @@ -152,8 +1256,8 @@ impl<'s> Parser<'s> { /// Consume the current token if it is the given one and produce an error if /// not. - pub fn expect(&mut self, kind: SyntaxKind) -> ParseResult { - let at = self.peek() == Some(&kind); + fn expect(&mut self, kind: SyntaxKind) -> ParseResult { + let at = self.peek() == Some(kind); if at { self.eat(); Ok(()) @@ -165,28 +1269,28 @@ impl<'s> Parser<'s> { /// Consume the current token, debug-asserting that it is the given one. #[track_caller] - pub fn assert(&mut self, kind: SyntaxKind) { - debug_assert_eq!(self.peek(), Some(&kind)); + fn assert(&mut self, kind: SyntaxKind) { + debug_assert_eq!(self.peek(), Some(kind)); self.eat(); } /// Whether the current token is of the given type. - pub fn at(&self, kind: SyntaxKind) -> bool { - self.peek() == Some(&kind) + fn at(&self, kind: SyntaxKind) -> bool { + self.peek() == Some(kind) } /// Peek at the current token without consuming it. - pub fn peek(&self) -> Option<&SyntaxKind> { + fn peek(&self) -> Option { if self.eof { None } else { - self.current.as_ref() + self.current } } /// Peek at the current token, but only if it follows immediately after the /// last one without any trivia in between. - pub fn peek_direct(&self) -> Option<&SyntaxKind> { + fn peek_direct(&self) -> Option { if self.prev_end() == self.current_start() { self.peek() } else { @@ -194,34 +1298,34 @@ impl<'s> Parser<'s> { } } - /// Peek at the source of the current token. - pub fn peek_src(&self) -> &'s str { - self.get(self.current_start()..self.current_end()) - } - - /// Obtain a range of the source code. - pub fn get(&self, range: Range) -> &'s str { - self.tokens.scanner().get(range) - } - /// The byte index at which the last non-trivia token ended. - pub fn prev_end(&self) -> usize { + fn prev_end(&self) -> usize { self.prev_end } /// The byte index at which the current token starts. - pub fn current_start(&self) -> usize { + fn current_start(&self) -> usize { self.current_start } /// The byte index at which the current token ends. - pub fn current_end(&self) -> usize { - self.tokens.cursor() + fn current_end(&self) -> usize { + self.lexer.cursor() + } + + /// The byte length of the current token. + fn current_len(&self) -> usize { + self.current_end() - self.current_start() + } + + /// The text of the current node. + fn peek_src(&self) -> &str { + self.lexer.scanner().from(self.current_start) } /// Determine the column index for the given byte index. - pub fn column(&self, index: usize) -> usize { - self.tokens.column(index) + fn column(&self, index: usize) -> usize { + self.lexer.column(index) } /// Continue parsing in a group. @@ -232,9 +1336,9 @@ impl<'s> Parser<'s> { /// /// This panics if the current token does not start the given group. #[track_caller] - pub fn start_group(&mut self, kind: Group) { - self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); - self.tokens.set_mode(match kind { + fn start_group(&mut self, kind: Group) { + self.groups.push(GroupEntry { kind, prev_mode: self.lexer.mode() }); + self.lexer.set_mode(match kind { Group::Bracket | Group::Strong | Group::Emph => LexMode::Markup, Group::Math | Group::MathRow(_, _) => LexMode::Math, Group::Brace | Group::Paren | Group::Expr => LexMode::Code, @@ -247,7 +1351,7 @@ impl<'s> Parser<'s> { Group::Strong => self.assert(SyntaxKind::Star), Group::Emph => self.assert(SyntaxKind::Underscore), Group::Math => self.assert(SyntaxKind::Dollar), - Group::MathRow(l, _) => self.assert(SyntaxKind::Atom(l.into())), + Group::MathRow(..) => self.assert(SyntaxKind::Atom), Group::Expr => self.repeek(), } } @@ -256,12 +1360,12 @@ impl<'s> Parser<'s> { /// /// This panics if no group was started. #[track_caller] - pub fn end_group(&mut self) { - let group_mode = self.tokens.mode(); + fn end_group(&mut self) { + let group_mode = self.lexer.mode(); let group = self.groups.pop().expect("no started group"); - self.tokens.set_mode(group.prev_mode); + self.lexer.set_mode(group.prev_mode); - let mut rescan = self.tokens.mode() != group_mode; + let mut rescan = self.lexer.mode() != group_mode; // Eat the end delimiter if there is one. if let Some((end, required)) = match group.kind { @@ -271,7 +1375,7 @@ impl<'s> Parser<'s> { Group::Strong => Some((SyntaxKind::Star, true)), Group::Emph => Some((SyntaxKind::Underscore, true)), Group::Math => Some((SyntaxKind::Dollar, true)), - Group::MathRow(_, r) => Some((SyntaxKind::Atom(r.into()), true)), + Group::MathRow(..) => Some((SyntaxKind::Atom, true)), Group::Expr => Some((SyntaxKind::Semicolon, false)), } { if self.current.as_ref() == Some(&end) { @@ -303,10 +1407,10 @@ impl<'s> Parser<'s> { self.children.truncate(start); } - self.tokens.jump(target); - self.prev_end = self.tokens.cursor(); - self.current_start = self.tokens.cursor(); - self.current = self.tokens.next(); + self.lexer.jump(target); + self.prev_end = self.lexer.cursor(); + self.current_start = self.lexer.cursor(); + self.current = self.lexer.next(); } self.repeek(); @@ -320,11 +1424,16 @@ impl<'s> Parser<'s> { /// Low-level bump that consumes exactly one token without special trivia /// handling. fn bump(&mut self) { - let kind = self.current.take().unwrap(); - let len = self.tokens.cursor() - self.current_start; - self.children.push(SyntaxNode::leaf(kind, len)); - self.current_start = self.tokens.cursor(); - self.current = self.tokens.next(); + if let Some((message, pos)) = self.lexer.last_error() { + let len = self.current_len(); + self.children.push(SyntaxNode::error(message, pos, len)) + } else { + let kind = self.current.unwrap(); + let text = self.peek_src(); + self.children.push(SyntaxNode::leaf(kind, text)); + } + self.current_start = self.lexer.cursor(); + self.current = self.lexer.next(); } /// Take another look at the current token to recheck whether it ends a @@ -344,7 +1453,7 @@ impl<'s> Parser<'s> { .next() .map_or(false, |group| group.kind == Group::Math), Some(SyntaxKind::Semicolon) => self.inside(Group::Expr), - Some(SyntaxKind::Atom(s)) => match s.as_str() { + Some(SyntaxKind::Atom) => match self.peek_src() { ")" => self.inside(Group::MathRow('(', ')')), "}" => self.inside(Group::MathRow('{', '}')), "]" => self.inside(Group::MathRow('[', ']')), @@ -357,9 +1466,9 @@ impl<'s> Parser<'s> { } /// Returns whether the given type can be skipped over. - fn is_trivia(&self, token: &SyntaxKind) -> bool { + fn is_trivia(&self, token: SyntaxKind) -> bool { match token { - SyntaxKind::Space { newlines } => !self.space_ends_group(*newlines), + SyntaxKind::Space { newlines } => !self.space_ends_group(newlines), SyntaxKind::LineComment => true, SyntaxKind::BlockComment => true, _ => false, @@ -379,7 +1488,7 @@ impl<'s> Parser<'s> { self.groups.iter().nth_back(1).map(|group| group.kind) != Some(Group::Brace) || !matches!( - self.tokens.clone().next(), + self.lexer.clone().next(), Some(SyntaxKind::Else | SyntaxKind::Dot) ) } @@ -400,35 +1509,37 @@ impl<'s> Parser<'s> { /// Error handling. impl Parser<'_> { /// Eat the current token and add an error that it is unexpected. - pub fn unexpected(&mut self) { + fn unexpected(&mut self) { if let Some(found) = self.peek() { + let marker = self.marker(); let msg = format_eco!("unexpected {}", found.name()); - let error = SyntaxKind::Error(ErrorPos::Full, msg); - self.perform(error, Self::eat); + self.eat(); + marker.to_error(self, msg); } } /// Add an error that the `thing` was expected at the end of the last /// non-trivia token. - pub fn expected(&mut self, thing: &str) { + fn expected(&mut self, thing: &str) { self.expected_at(self.trivia_start(), thing); } /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(&mut self, marker: Marker, what: &str) { + fn expected_at(&mut self, marker: Marker, what: &str) { let msg = format_eco!("expected {}", what); - let error = SyntaxKind::Error(ErrorPos::Full, msg); - self.children.insert(marker.0, SyntaxNode::leaf(error, 0)); + self.children + .insert(marker.0, SyntaxNode::error(msg, ErrorPos::Full, 0)); } /// Eat the current token and add an error that it is not the expected /// `thing`. - pub fn expected_found(&mut self, thing: &str) { + fn expected_found(&mut self, thing: &str) { match self.peek() { Some(found) => { + let marker = self.marker(); let msg = format_eco!("expected {}, found {}", thing, found.name()); - let error = SyntaxKind::Error(ErrorPos::Full, msg); - self.perform(error, Self::eat); + self.eat(); + marker.to_error(self, msg); } None => self.expected(thing), } @@ -437,29 +1548,36 @@ impl Parser<'_> { /// Marks a location in a parser's child list. #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Marker(usize); +struct Marker(usize); impl Marker { /// Peek at the child directly before the marker. - pub fn before<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> { + fn before<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> { p.children.get(self.0.checked_sub(1)?) } /// Peek at the child directly after the marker. - pub fn after<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> { + fn after<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> { p.children.get(self.0) } /// Convert the child directly after marker. - pub fn convert(self, p: &mut Parser, kind: SyntaxKind) { + fn convert(self, p: &mut Parser, kind: SyntaxKind) { if let Some(child) = p.children.get_mut(self.0) { - child.convert(kind); + child.convert_to(kind); + } + } + + /// Convert the child directly after marker. + fn to_error(self, p: &mut Parser, message: impl Into) { + if let Some(child) = p.children.get_mut(self.0) { + child.convert_to_error(message); } } /// Perform a subparse that wraps all children after the marker in a node /// with the given kind. - pub fn perform(self, p: &mut Parser, kind: SyntaxKind, f: F) -> T + fn perform(self, p: &mut Parser, kind: SyntaxKind, f: F) -> T where F: FnOnce(&mut Parser) -> T, { @@ -470,14 +1588,14 @@ impl Marker { /// Wrap all children after the marker (excluding trailing trivia) in a node /// with the given `kind`. - pub fn end(self, p: &mut Parser, kind: SyntaxKind) { + fn end(self, p: &mut Parser, kind: SyntaxKind) { let until = p.trivia_start().0.max(self.0); let children = p.children.drain(self.0..until).collect(); p.children.insert(self.0, SyntaxNode::inner(kind, children)); } /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children(self, p: &mut Parser, mut f: F) + fn filter_children(self, p: &mut Parser, mut f: F) where F: FnMut(&SyntaxNode) -> Result<(), &'static str>, { @@ -488,7 +1606,7 @@ impl Marker { } // Don't expose trivia in code. - if p.tokens.mode() != LexMode::Markup && child.kind().is_trivia() { + if p.lexer.mode() != LexMode::Markup && child.kind().is_trivia() { continue; } @@ -498,9 +1616,8 @@ impl Marker { msg.push_str(", found "); msg.push_str(child.kind().name()); } - let error = SyntaxKind::Error(ErrorPos::Full, msg); - let inner = mem::take(child); - *child = SyntaxNode::inner(error, vec![inner]); + let len = child.len(); + *child = SyntaxNode::error(msg, ErrorPos::Full, len); } } } @@ -512,15 +1629,15 @@ struct GroupEntry { /// The kind of group this is. This decides which token(s) will end the /// group. For example, a [`Group::Paren`] will be ended by /// [`Token::RightParen`]. - pub kind: Group, + kind: Group, /// The mode the parser was in _before_ the group started (to which we go /// back once the group ends). - pub prev_mode: LexMode, + prev_mode: LexMode, } /// A group, confined by optional start and end delimiters. #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Group { +enum Group { /// A curly-braced group: `{...}`. Brace, /// A bracketed group: `[...]`. @@ -548,11 +1665,11 @@ impl Group { /// Allows parser methods to use the try operator. Never returned top-level /// because the parser recovers from all errors. -pub type ParseResult = Result; +type ParseResult = Result; /// The error type for parsing. #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct ParseError; +struct ParseError; impl Display for ParseError { fn fmt(&self, f: &mut Formatter) -> fmt::Result { diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs deleted file mode 100644 index a6e6c8617..000000000 --- a/src/syntax/parsing.rs +++ /dev/null @@ -1,1118 +0,0 @@ -use std::collections::HashSet; - -use super::ast::{Assoc, BinOp, UnOp}; -use super::{ - ErrorPos, Group, LexMode, Marker, ParseError, ParseResult, Parser, SyntaxKind, - SyntaxNode, -}; -use crate::util::EcoString; - -/// Parse a source file. -pub fn parse(text: &str) -> SyntaxNode { - let mut p = Parser::new(text, LexMode::Markup); - markup(&mut p, true); - p.finish().into_iter().next().unwrap() -} - -/// Parse code directly, only used for syntax highlighting. -pub fn parse_code(text: &str) -> SyntaxNode { - let mut p = Parser::new(text, LexMode::Code); - p.perform(SyntaxKind::CodeBlock, code); - p.finish().into_iter().next().unwrap() -} - -/// Reparse a code block. -/// -/// Returns `Some` if all of the input was consumed. -pub(crate) fn reparse_code_block( - prefix: &str, - text: &str, - end_pos: usize, -) -> Option<(Vec, bool, usize)> { - let mut p = Parser::with_prefix(prefix, text, LexMode::Code); - if !p.at(SyntaxKind::LeftBrace) { - return None; - } - - code_block(&mut p); - - let (mut node, terminated) = p.consume()?; - let first = node.remove(0); - if first.len() != end_pos { - return None; - } - - Some((vec![first], terminated, 1)) -} - -/// Reparse a content block. -/// -/// Returns `Some` if all of the input was consumed. -pub(crate) fn reparse_content_block( - prefix: &str, - text: &str, - end_pos: usize, -) -> Option<(Vec, bool, usize)> { - let mut p = Parser::with_prefix(prefix, text, LexMode::Code); - if !p.at(SyntaxKind::LeftBracket) { - return None; - } - - content_block(&mut p); - - let (mut node, terminated) = p.consume()?; - let first = node.remove(0); - if first.len() != end_pos { - return None; - } - - Some((vec![first], terminated, 1)) -} - -/// Reparse a sequence markup elements without the topmost node. -/// -/// Returns `Some` if all of the input was consumed. -pub(crate) fn reparse_markup_elements( - prefix: &str, - text: &str, - end_pos: usize, - differential: isize, - reference: &[SyntaxNode], - mut at_start: bool, - min_indent: usize, -) -> Option<(Vec, bool, usize)> { - let mut p = Parser::with_prefix(prefix, text, LexMode::Markup); - - let mut node: Option<&SyntaxNode> = None; - let mut iter = reference.iter(); - let mut offset = differential; - let mut replaced = 0; - let mut stopped = false; - - 'outer: while !p.eof() { - if let Some(SyntaxKind::Space { newlines: (1..) }) = p.peek() { - if p.column(p.current_end()) < min_indent { - return None; - } - } - - markup_node(&mut p, &mut at_start); - - if p.prev_end() <= end_pos { - continue; - } - - let recent = p.marker().before(&p).unwrap(); - let recent_start = p.prev_end() - recent.len(); - - while offset <= recent_start as isize { - if let Some(node) = node { - // The nodes are equal, at the same position and have the - // same content. The parsing trees have converged again, so - // the reparse may stop here. - if offset == recent_start as isize && node == recent { - replaced -= 1; - stopped = true; - break 'outer; - } - } - - if let Some(node) = node { - offset += node.len() as isize; - } - - node = iter.next(); - if node.is_none() { - break; - } - - replaced += 1; - } - } - - if p.eof() && !stopped { - replaced = reference.len(); - } - - let (mut res, terminated) = p.consume()?; - if stopped { - res.pop().unwrap(); - } - - Some((res, terminated, replaced)) -} - -/// Parse markup. -/// -/// If `at_start` is true, things like headings that may only appear at the -/// beginning of a line or content block are initially allowed. -fn markup(p: &mut Parser, mut at_start: bool) { - p.perform(SyntaxKind::Markup { min_indent: 0 }, |p| { - while !p.eof() { - markup_node(p, &mut at_start); - } - }); -} - -/// Parse markup that stays right of the given `column`. -fn markup_indented(p: &mut Parser, min_indent: usize) { - p.eat_while(|t| match t { - SyntaxKind::Space { newlines } => *newlines == 0, - SyntaxKind::LineComment | SyntaxKind::BlockComment => true, - _ => false, - }); - - let marker = p.marker(); - let mut at_start = false; - - while !p.eof() { - match p.peek() { - Some(SyntaxKind::Space { newlines: (1..) }) - if p.column(p.current_end()) < min_indent => - { - break; - } - _ => {} - } - - markup_node(p, &mut at_start); - } - - marker.end(p, SyntaxKind::Markup { min_indent }); -} - -/// Parse a line of markup that can prematurely end if `f` returns true. -fn markup_line(p: &mut Parser, mut f: F) -where - F: FnMut(&SyntaxKind) -> bool, -{ - p.eat_while(|t| match t { - SyntaxKind::Space { newlines } => *newlines == 0, - SyntaxKind::LineComment | SyntaxKind::BlockComment => true, - _ => false, - }); - - p.perform(SyntaxKind::Markup { min_indent: usize::MAX }, |p| { - let mut at_start = false; - while let Some(kind) = p.peek() { - if let SyntaxKind::Space { newlines: (1..) } = kind { - break; - } - - if f(kind) { - break; - } - - markup_node(p, &mut at_start); - } - }); -} - -fn markup_node(p: &mut Parser, at_start: &mut bool) { - let Some(token) = p.peek() else { return }; - match token { - // Whitespace. - SyntaxKind::Space { newlines } => { - *at_start |= *newlines > 0; - p.eat(); - return; - } - - // Comments. - SyntaxKind::LineComment | SyntaxKind::BlockComment => { - p.eat(); - return; - } - - // Text and markup. - SyntaxKind::Text(_) - | SyntaxKind::Linebreak - | SyntaxKind::SmartQuote { .. } - | SyntaxKind::Escape(_) - | SyntaxKind::Shorthand(_) - | SyntaxKind::Symbol(_) - | SyntaxKind::Link(_) - | SyntaxKind::Raw(_) - | SyntaxKind::Ref(_) => p.eat(), - - // Math. - SyntaxKind::Dollar => math(p), - - // Strong, emph, heading. - SyntaxKind::Star => strong(p), - SyntaxKind::Underscore => emph(p), - SyntaxKind::Eq => heading(p, *at_start), - - // Lists. - SyntaxKind::Minus => list_item(p, *at_start), - SyntaxKind::Plus | SyntaxKind::EnumNumbering(_) => enum_item(p, *at_start), - SyntaxKind::Slash => { - term_item(p, *at_start).ok(); - } - SyntaxKind::Colon => { - let marker = p.marker(); - p.eat(); - marker.convert(p, SyntaxKind::Text(':'.into())); - } - - // Hashtag + keyword / identifier. - SyntaxKind::Ident(_) - | SyntaxKind::Label(_) - | SyntaxKind::Let - | SyntaxKind::Set - | SyntaxKind::Show - | SyntaxKind::If - | SyntaxKind::While - | SyntaxKind::For - | SyntaxKind::Import - | SyntaxKind::Include - | SyntaxKind::Break - | SyntaxKind::Continue - | SyntaxKind::Return => embedded_expr(p), - - // Code and content block. - SyntaxKind::LeftBrace => code_block(p), - SyntaxKind::LeftBracket => content_block(p), - - SyntaxKind::Error(_, _) => p.eat(), - _ => p.unexpected(), - }; - - *at_start = false; -} - -fn strong(p: &mut Parser) { - p.perform(SyntaxKind::Strong, |p| { - p.start_group(Group::Strong); - markup(p, false); - p.end_group(); - }) -} - -fn emph(p: &mut Parser) { - p.perform(SyntaxKind::Emph, |p| { - p.start_group(Group::Emph); - markup(p, false); - p.end_group(); - }) -} - -fn heading(p: &mut Parser, at_start: bool) { - let marker = p.marker(); - let current_start = p.current_start(); - p.assert(SyntaxKind::Eq); - while p.eat_if(SyntaxKind::Eq) {} - - if at_start && p.peek().map_or(true, |kind| kind.is_space()) { - p.eat_while(|kind| *kind == SyntaxKind::Space { newlines: 0 }); - markup_line(p, |kind| matches!(kind, SyntaxKind::Label(_))); - marker.end(p, SyntaxKind::Heading); - } else { - let text = p.get(current_start..p.prev_end()).into(); - marker.convert(p, SyntaxKind::Text(text)); - } -} - -fn list_item(p: &mut Parser, at_start: bool) { - let marker = p.marker(); - let text: EcoString = p.peek_src().into(); - p.assert(SyntaxKind::Minus); - - let min_indent = p.column(p.prev_end()); - if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() { - markup_indented(p, min_indent); - marker.end(p, SyntaxKind::ListItem); - } else { - marker.convert(p, SyntaxKind::Text(text)); - } -} - -fn enum_item(p: &mut Parser, at_start: bool) { - let marker = p.marker(); - let text: EcoString = p.peek_src().into(); - p.eat(); - - let min_indent = p.column(p.prev_end()); - if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() { - markup_indented(p, min_indent); - marker.end(p, SyntaxKind::EnumItem); - } else { - marker.convert(p, SyntaxKind::Text(text)); - } -} - -fn term_item(p: &mut Parser, at_start: bool) -> ParseResult { - let marker = p.marker(); - let text: EcoString = p.peek_src().into(); - p.eat(); - - let min_indent = p.column(p.prev_end()); - if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() { - markup_line(p, |node| matches!(node, SyntaxKind::Colon)); - p.expect(SyntaxKind::Colon)?; - markup_indented(p, min_indent); - marker.end(p, SyntaxKind::TermItem); - } else { - marker.convert(p, SyntaxKind::Text(text)); - } - - Ok(()) -} - -fn embedded_expr(p: &mut Parser) { - // Does the expression need termination or can content follow directly? - let stmt = matches!( - p.peek(), - Some( - SyntaxKind::Let - | SyntaxKind::Set - | SyntaxKind::Show - | SyntaxKind::Import - | SyntaxKind::Include - ) - ); - - p.start_group(Group::Expr); - let res = expr_prec(p, true, 0); - if stmt && res.is_ok() && !p.eof() { - p.expected("semicolon or line break"); - } - p.end_group(); -} - -fn math(p: &mut Parser) { - p.perform(SyntaxKind::Math, |p| { - p.start_group(Group::Math); - while !p.eof() { - math_node(p); - } - p.end_group(); - }); -} - -fn math_node(p: &mut Parser) { - math_node_prec(p, 0, None) -} - -fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option) { - let marker = p.marker(); - math_primary(p); - - loop { - let (kind, mut prec, assoc, stop) = match p.peek() { - v if v == stop.as_ref() => break, - Some(SyntaxKind::Underscore) => { - (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Hat)) - } - Some(SyntaxKind::Hat) => { - (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Underscore)) - } - Some(SyntaxKind::Slash) => (SyntaxKind::Frac, 1, Assoc::Left, None), - _ => break, - }; - - if prec < min_prec { - break; - } - - match assoc { - Assoc::Left => prec += 1, - Assoc::Right => {} - } - - p.eat(); - math_node_prec(p, prec, stop); - - // Allow up to two different scripts. We do not risk encountering the - // previous script kind again here due to right-associativity. - if p.eat_if(SyntaxKind::Underscore) || p.eat_if(SyntaxKind::Hat) { - math_node_prec(p, prec, None); - } - - marker.end(p, kind); - } -} - -/// Parse a primary math node. -fn math_primary(p: &mut Parser) { - let Some(token) = p.peek() else { return }; - match token { - // Spaces and expressions. - SyntaxKind::Space { .. } - | SyntaxKind::Linebreak - | SyntaxKind::Escape(_) - | SyntaxKind::Str(_) - | SyntaxKind::Shorthand(_) - | SyntaxKind::Symbol(_) => p.eat(), - - // Atoms. - SyntaxKind::Atom(s) => match s.as_str() { - "(" => math_group(p, Group::MathRow('(', ')')), - "{" => math_group(p, Group::MathRow('{', '}')), - "[" => math_group(p, Group::MathRow('[', ']')), - _ => p.eat(), - }, - - // Alignment indactor. - SyntaxKind::Amp => math_align(p), - - // Identifiers and math calls. - SyntaxKind::Ident(_) => { - let marker = p.marker(); - p.eat(); - - // Parenthesis or bracket means this is a function call. - if matches!(p.peek_direct(), Some(SyntaxKind::Atom(s)) if s == "(") { - marker.perform(p, SyntaxKind::FuncCall, math_args); - } - } - - // Hashtag + keyword / identifier. - SyntaxKind::Let - | SyntaxKind::Set - | SyntaxKind::Show - | SyntaxKind::If - | SyntaxKind::While - | SyntaxKind::For - | SyntaxKind::Import - | SyntaxKind::Include - | SyntaxKind::Break - | SyntaxKind::Continue - | SyntaxKind::Return => embedded_expr(p), - - // Code and content block. - SyntaxKind::LeftBrace => code_block(p), - SyntaxKind::LeftBracket => content_block(p), - - _ => p.unexpected(), - } -} - -fn math_group(p: &mut Parser, group: Group) { - p.perform(SyntaxKind::Math, |p| { - p.start_group(group); - while !p.eof() { - math_node(p); - } - p.end_group(); - }) -} - -fn math_align(p: &mut Parser) { - p.perform(SyntaxKind::AlignPoint, |p| { - p.assert(SyntaxKind::Amp); - while p.eat_if(SyntaxKind::Amp) {} - }) -} - -fn expr(p: &mut Parser) -> ParseResult { - expr_prec(p, false, 0) -} - -/// Parse an expression with operators having at least the minimum precedence. -/// -/// If `atomic` is true, this does not parse binary operations and arrow -/// functions, which is exactly what we want in a shorthand expression directly -/// in markup. -/// -/// Stops parsing at operations with lower precedence than `min_prec`, -fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { - let marker = p.marker(); - - // Start the unary expression. - match p.peek().and_then(UnOp::from_token) { - Some(op) if !atomic => { - p.eat(); - let prec = op.precedence(); - expr_prec(p, atomic, prec)?; - marker.end(p, SyntaxKind::Unary); - } - _ => primary(p, atomic)?, - }; - - loop { - // Parenthesis or bracket means this is a function call. - if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct() { - marker.perform(p, SyntaxKind::FuncCall, args)?; - continue; - } - - if atomic { - break; - } - - // Method call or field access. - if p.eat_if(SyntaxKind::Dot) { - ident(p)?; - if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct() - { - marker.perform(p, SyntaxKind::MethodCall, args)?; - } else { - marker.end(p, SyntaxKind::FieldAccess); - } - continue; - } - - let op = if p.eat_if(SyntaxKind::Not) { - if p.at(SyntaxKind::In) { - BinOp::NotIn - } else { - p.expected("keyword `in`"); - return Err(ParseError); - } - } else { - match p.peek().and_then(BinOp::from_token) { - Some(binop) => binop, - None => break, - } - }; - - let mut prec = op.precedence(); - if prec < min_prec { - break; - } - - p.eat(); - - match op.assoc() { - Assoc::Left => prec += 1, - Assoc::Right => {} - } - - marker.perform(p, SyntaxKind::Binary, |p| expr_prec(p, atomic, prec))?; - } - - Ok(()) -} - -fn primary(p: &mut Parser, atomic: bool) -> ParseResult { - match p.peek() { - // Literals and few other things. - Some( - SyntaxKind::None - | SyntaxKind::Auto - | SyntaxKind::Int(_) - | SyntaxKind::Float(_) - | SyntaxKind::Bool(_) - | SyntaxKind::Numeric(_, _) - | SyntaxKind::Str(_) - | SyntaxKind::Label(_) - | SyntaxKind::Raw(_), - ) => { - p.eat(); - Ok(()) - } - - // Things that start with an identifier. - Some(SyntaxKind::Ident(_)) => { - let marker = p.marker(); - p.eat(); - - // Arrow means this is a closure's lone parameter. - if !atomic && p.at(SyntaxKind::Arrow) { - marker.end(p, SyntaxKind::Params); - p.assert(SyntaxKind::Arrow); - marker.perform(p, SyntaxKind::Closure, expr) - } else { - Ok(()) - } - } - - // Structures. - Some(SyntaxKind::LeftParen) => parenthesized(p, atomic), - Some(SyntaxKind::LeftBrace) => Ok(code_block(p)), - Some(SyntaxKind::LeftBracket) => Ok(content_block(p)), - Some(SyntaxKind::Dollar) => Ok(math(p)), - - // Keywords. - Some(SyntaxKind::Let) => let_binding(p), - Some(SyntaxKind::Set) => set_rule(p), - Some(SyntaxKind::Show) => show_rule(p), - Some(SyntaxKind::If) => conditional(p), - Some(SyntaxKind::While) => while_loop(p), - Some(SyntaxKind::For) => for_loop(p), - Some(SyntaxKind::Import) => module_import(p), - Some(SyntaxKind::Include) => module_include(p), - Some(SyntaxKind::Break) => break_stmt(p), - Some(SyntaxKind::Continue) => continue_stmt(p), - Some(SyntaxKind::Return) => return_stmt(p), - - Some(SyntaxKind::Error(_, _)) => { - p.eat(); - Err(ParseError) - } - - // Nothing. - _ => { - p.expected_found("expression"); - Err(ParseError) - } - } -} - -fn ident(p: &mut Parser) -> ParseResult { - match p.peek() { - Some(SyntaxKind::Ident(_)) => { - p.eat(); - Ok(()) - } - _ => { - p.expected_found("identifier"); - Err(ParseError) - } - } -} - -/// Parse something that starts with a parenthesis, which can be either of: -/// - Array literal -/// - Dictionary literal -/// - Parenthesized expression -/// - Parameter list of closure expression -fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult { - let marker = p.marker(); - - p.start_group(Group::Paren); - let colon = p.eat_if(SyntaxKind::Colon); - let kind = collection(p, true).0; - p.end_group(); - - // Leading colon makes this a dictionary. - if colon { - dict(p, marker); - return Ok(()); - } - - // Arrow means this is a closure's parameter list. - if !atomic && p.at(SyntaxKind::Arrow) { - params(p, marker); - p.assert(SyntaxKind::Arrow); - return marker.perform(p, SyntaxKind::Closure, expr); - } - - // Transform into the identified collection. - match kind { - CollectionKind::Group => marker.end(p, SyntaxKind::Parenthesized), - CollectionKind::Positional => array(p, marker), - CollectionKind::Named => dict(p, marker), - } - - Ok(()) -} - -/// The type of a collection. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum CollectionKind { - /// The collection is only one item and has no comma. - Group, - /// The collection starts with a positional item and has multiple items or a - /// trailing comma. - Positional, - /// The collection starts with a colon or named item. - Named, -} - -/// Parse a collection. -/// -/// Returns the length of the collection and whether the literal contained any -/// commas. -fn collection(p: &mut Parser, keyed: bool) -> (CollectionKind, usize) { - let mut collection_kind = None; - let mut items = 0; - let mut can_group = true; - let mut missing_coma: Option = None; - - while !p.eof() { - let Ok(item_kind) = item(p, keyed) else { - p.eat_if(SyntaxKind::Comma); - collection_kind = Some(CollectionKind::Group); - continue; - }; - - match item_kind { - SyntaxKind::Spread => can_group = false, - SyntaxKind::Named if collection_kind.is_none() => { - collection_kind = Some(CollectionKind::Named); - can_group = false; - } - _ if collection_kind.is_none() => { - collection_kind = Some(CollectionKind::Positional); - } - _ => {} - } - - items += 1; - - if let Some(marker) = missing_coma.take() { - p.expected_at(marker, "comma"); - } - - if p.eof() { - break; - } - - if p.eat_if(SyntaxKind::Comma) { - can_group = false; - } else { - missing_coma = Some(p.trivia_start()); - } - } - - let kind = if can_group && items == 1 { - CollectionKind::Group - } else { - collection_kind.unwrap_or(CollectionKind::Positional) - }; - - (kind, items) -} - -fn item(p: &mut Parser, keyed: bool) -> ParseResult { - let marker = p.marker(); - if p.eat_if(SyntaxKind::Dots) { - marker.perform(p, SyntaxKind::Spread, expr)?; - return Ok(SyntaxKind::Spread); - } - - expr(p)?; - - if p.at(SyntaxKind::Colon) { - match marker.after(p).map(|c| c.kind()) { - Some(SyntaxKind::Ident(_)) => { - p.eat(); - marker.perform(p, SyntaxKind::Named, expr)?; - } - Some(SyntaxKind::Str(_)) if keyed => { - p.eat(); - marker.perform(p, SyntaxKind::Keyed, expr)?; - } - kind => { - let mut msg = EcoString::from("expected identifier"); - if keyed { - msg.push_str(" or string"); - } - if let Some(kind) = kind { - msg.push_str(", found "); - msg.push_str(kind.name()); - } - let error = SyntaxKind::Error(ErrorPos::Full, msg); - marker.end(p, error); - p.eat(); - marker.perform(p, SyntaxKind::Named, expr).ok(); - return Err(ParseError); - } - } - - Ok(SyntaxKind::Named) - } else { - Ok(SyntaxKind::None) - } -} - -fn array(p: &mut Parser, marker: Marker) { - marker.filter_children(p, |x| match x.kind() { - SyntaxKind::Named | SyntaxKind::Keyed => Err("expected expression"), - _ => Ok(()), - }); - marker.end(p, SyntaxKind::Array); -} - -fn dict(p: &mut Parser, marker: Marker) { - let mut used = HashSet::new(); - marker.filter_children(p, |x| match x.kind() { - kind if kind.is_paren() => Ok(()), - SyntaxKind::Named | SyntaxKind::Keyed => { - if let Some(SyntaxKind::Ident(key) | SyntaxKind::Str(key)) = - x.children().next().map(|child| child.kind()) - { - if !used.insert(key.clone()) { - return Err("pair has duplicate key"); - } - } - Ok(()) - } - SyntaxKind::Spread | SyntaxKind::Comma | SyntaxKind::Colon => Ok(()), - _ => Err("expected named or keyed pair"), - }); - marker.end(p, SyntaxKind::Dict); -} - -fn params(p: &mut Parser, marker: Marker) { - marker.filter_children(p, |x| match x.kind() { - kind if kind.is_paren() => Ok(()), - SyntaxKind::Named | SyntaxKind::Ident(_) | SyntaxKind::Comma => Ok(()), - SyntaxKind::Spread - if matches!( - x.children().last().map(|child| child.kind()), - Some(&SyntaxKind::Ident(_)) - ) => - { - Ok(()) - } - _ => Err("expected identifier, named pair or argument sink"), - }); - marker.end(p, SyntaxKind::Params); -} - -/// Parse a code block: `{...}`. -fn code_block(p: &mut Parser) { - p.perform(SyntaxKind::CodeBlock, |p| { - p.start_group(Group::Brace); - code(p); - p.end_group(); - }); -} - -fn code(p: &mut Parser) { - while !p.eof() { - p.start_group(Group::Expr); - if expr(p).is_ok() && !p.eof() { - p.expected("semicolon or line break"); - } - p.end_group(); - - // Forcefully skip over newlines since the group's contents can't. - p.eat_while(SyntaxKind::is_space); - } -} - -fn content_block(p: &mut Parser) { - p.perform(SyntaxKind::ContentBlock, |p| { - p.start_group(Group::Bracket); - markup(p, true); - p.end_group(); - }); -} - -fn args(p: &mut Parser) -> ParseResult { - match p.peek_direct() { - Some(SyntaxKind::LeftParen) => {} - Some(SyntaxKind::LeftBracket) => {} - _ => { - p.expected_found("argument list"); - return Err(ParseError); - } - } - - p.perform(SyntaxKind::Args, |p| { - if p.at(SyntaxKind::LeftParen) { - let marker = p.marker(); - p.start_group(Group::Paren); - collection(p, false); - p.end_group(); - - let mut used = HashSet::new(); - marker.filter_children(p, |x| match x.kind() { - SyntaxKind::Named => { - if let Some(SyntaxKind::Ident(ident)) = - x.children().next().map(|child| child.kind()) - { - if !used.insert(ident.clone()) { - return Err("duplicate argument"); - } - } - Ok(()) - } - _ => Ok(()), - }); - } - - while p.peek_direct() == Some(&SyntaxKind::LeftBracket) { - content_block(p); - } - }); - - Ok(()) -} - -fn math_args(p: &mut Parser) { - p.start_group(Group::MathRow('(', ')')); - p.perform(SyntaxKind::Args, |p| { - let mut marker = p.marker(); - while !p.eof() { - if matches!(p.peek(), Some(SyntaxKind::Atom(s)) if s == ",") { - marker.end(p, SyntaxKind::Math); - let comma = p.marker(); - p.eat(); - comma.convert(p, SyntaxKind::Comma); - marker = p.marker(); - } else { - math_node(p); - } - } - if marker != p.marker() { - marker.end(p, SyntaxKind::Math); - } - }); - p.end_group(); -} - -fn let_binding(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::LetBinding, |p| { - p.assert(SyntaxKind::Let); - - let marker = p.marker(); - ident(p)?; - - // If a parenthesis follows, this is a function definition. - let has_params = p.peek_direct() == Some(&SyntaxKind::LeftParen); - if has_params { - let marker = p.marker(); - p.start_group(Group::Paren); - collection(p, false); - p.end_group(); - params(p, marker); - } - - if p.eat_if(SyntaxKind::Eq) { - expr(p)?; - } else if has_params { - // Function definitions must have a body. - p.expected("body"); - } - - // Rewrite into a closure expression if it's a function definition. - if has_params { - marker.end(p, SyntaxKind::Closure); - } - - Ok(()) - }) -} - -fn set_rule(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::SetRule, |p| { - p.assert(SyntaxKind::Set); - ident(p)?; - args(p)?; - if p.eat_if(SyntaxKind::If) { - expr(p)?; - } - Ok(()) - }) -} - -fn show_rule(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::ShowRule, |p| { - p.assert(SyntaxKind::Show); - expr(p)?; - if p.eat_if(SyntaxKind::Colon) { - expr(p)?; - } - Ok(()) - }) -} - -fn conditional(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::Conditional, |p| { - p.assert(SyntaxKind::If); - - expr(p)?; - body(p)?; - - if p.eat_if(SyntaxKind::Else) { - if p.at(SyntaxKind::If) { - conditional(p)?; - } else { - body(p)?; - } - } - - Ok(()) - }) -} - -fn while_loop(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::WhileLoop, |p| { - p.assert(SyntaxKind::While); - expr(p)?; - body(p) - }) -} - -fn for_loop(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::ForLoop, |p| { - p.assert(SyntaxKind::For); - for_pattern(p)?; - p.expect(SyntaxKind::In)?; - expr(p)?; - body(p) - }) -} - -fn for_pattern(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::ForPattern, |p| { - ident(p)?; - if p.eat_if(SyntaxKind::Comma) { - ident(p)?; - } - Ok(()) - }) -} - -fn module_import(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::ModuleImport, |p| { - p.assert(SyntaxKind::Import); - expr(p)?; - - if !p.eat_if(SyntaxKind::Colon) || p.eat_if(SyntaxKind::Star) { - return Ok(()); - } - - // This is the list of identifiers scenario. - p.perform(SyntaxKind::ImportItems, |p| { - let marker = p.marker(); - let items = collection(p, false).1; - if items == 0 { - p.expected("import items"); - } - marker.filter_children(p, |n| match n.kind() { - SyntaxKind::Ident(_) | SyntaxKind::Comma => Ok(()), - _ => Err("expected identifier"), - }); - }); - - Ok(()) - }) -} - -fn module_include(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::ModuleInclude, |p| { - p.assert(SyntaxKind::Include); - expr(p) - }) -} - -fn break_stmt(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::LoopBreak, |p| { - p.assert(SyntaxKind::Break); - Ok(()) - }) -} - -fn continue_stmt(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::LoopContinue, |p| { - p.assert(SyntaxKind::Continue); - Ok(()) - }) -} - -fn return_stmt(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::FuncReturn, |p| { - p.assert(SyntaxKind::Return); - if !p.at(SyntaxKind::Comma) && !p.eof() { - expr(p)?; - } - Ok(()) - }) -} - -fn body(p: &mut Parser) -> ParseResult { - match p.peek() { - Some(SyntaxKind::LeftBracket) => Ok(content_block(p)), - Some(SyntaxKind::LeftBrace) => Ok(code_block(p)), - _ => { - p.expected("body"); - Err(ParseError) - } - } -} diff --git a/src/syntax/incremental.rs b/src/syntax/reparse.rs similarity index 98% rename from src/syntax/incremental.rs rename to src/syntax/reparse.rs index 606daa2e4..e72192fff 100644 --- a/src/syntax/incremental.rs +++ b/src/syntax/reparse.rs @@ -87,8 +87,8 @@ fn try_reparse( // reject text that points to the special case for URL // evasion and line comments. if !child.kind().is_space() - && child.kind() != &SyntaxKind::Semicolon - && child.kind() != &SyntaxKind::Text('/'.into()) + && child.kind() != SyntaxKind::Semicolon + && (child.kind() != SyntaxKind::Text || child.text() != "/") && (ahead.is_none() || change.replaced.start > child_span.end) && !ahead.map_or(false, Ahead::is_compulsory) { @@ -177,7 +177,7 @@ fn try_reparse( // Make sure this is a markup node and that we may replace. If so, save // the current indent. let min_indent = match node.kind() { - SyntaxKind::Markup { min_indent } if safe_to_replace => *min_indent, + SyntaxKind::Markup { min_indent } if safe_to_replace => min_indent, _ => return None, }; @@ -375,23 +375,23 @@ enum ReparseMode { /// Whether changes _inside_ this node are safely encapsulated, so that only /// this node must be reparsed. -fn is_bounded(kind: &SyntaxKind) -> bool { +fn is_bounded(kind: SyntaxKind) -> bool { matches!( kind, SyntaxKind::CodeBlock | SyntaxKind::ContentBlock | SyntaxKind::Linebreak - | SyntaxKind::SmartQuote { .. } + | SyntaxKind::SmartQuote | SyntaxKind::BlockComment | SyntaxKind::Space { .. } - | SyntaxKind::Escape(_) - | SyntaxKind::Shorthand(_) + | SyntaxKind::Escape + | SyntaxKind::Shorthand ) } /// Whether `at_start` would still be true after this node given the /// previous value of the property. -fn next_at_start(kind: &SyntaxKind, prev: bool) -> bool { +fn next_at_start(kind: SyntaxKind, prev: bool) -> bool { match kind { SyntaxKind::Space { newlines: (1..) } => true, SyntaxKind::Space { .. } | SyntaxKind::LineComment | SyntaxKind::BlockComment => { diff --git a/src/syntax/resolve.rs b/src/syntax/resolve.rs deleted file mode 100644 index 3ba9a252d..000000000 --- a/src/syntax/resolve.rs +++ /dev/null @@ -1,233 +0,0 @@ -use unscanny::Scanner; - -use super::{is_ident, is_newline, RawFields}; -use crate::util::EcoString; - -/// Resolve all escape sequences in a string. -pub fn resolve_string(string: &str) -> EcoString { - let mut out = EcoString::with_capacity(string.len()); - let mut s = Scanner::new(string); - - while let Some(c) = s.eat() { - if c != '\\' { - out.push(c); - continue; - } - - let start = s.locate(-1); - match s.eat() { - Some('\\') => out.push('\\'), - Some('"') => out.push('"'), - Some('n') => out.push('\n'), - Some('r') => out.push('\r'), - Some('t') => out.push('\t'), - Some('u') if s.eat_if('{') => { - // TODO: Error if closing brace is missing. - let sequence = s.eat_while(char::is_ascii_hexdigit); - let _terminated = s.eat_if('}'); - match resolve_hex(sequence) { - Some(c) => out.push(c), - None => out.push_str(s.from(start)), - } - } - _ => out.push_str(s.from(start)), - } - } - - out -} - -/// Resolve a hexadecimal escape sequence into a character -/// (only the inner hex letters without braces or `\u`). -pub fn resolve_hex(sequence: &str) -> Option { - u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32) -} - -/// Resolve the language tag and trim the raw text. -pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawFields { - if backticks > 1 { - let (tag, inner) = split_at_lang_tag(text); - let (text, block) = trim_and_split_raw(column, inner); - RawFields { - lang: is_ident(tag).then(|| tag.into()), - text: text.into(), - block, - } - } else { - RawFields { - lang: None, - text: split_lines(text).join("\n").into(), - block: false, - } - } -} - -/// Parse the lang tag and return it alongside the remaining inner raw text. -fn split_at_lang_tag(raw: &str) -> (&str, &str) { - let mut s = Scanner::new(raw); - (s.eat_until(|c: char| c == '`' || c.is_whitespace() || is_newline(c)), s.after()) -} - -/// Trim raw text and splits it into lines. -/// -/// Also returns whether at least one newline was contained in `raw`. -fn trim_and_split_raw(column: usize, mut raw: &str) -> (String, bool) { - // Trims one space at the start. - raw = raw.strip_prefix(' ').unwrap_or(raw); - - // Trim one space at the end if the last non-whitespace char is a backtick. - if raw.trim_end().ends_with('`') { - raw = raw.strip_suffix(' ').unwrap_or(raw); - } - - let mut lines = split_lines(raw); - - // Dedent based on column, but not for the first line. - for line in lines.iter_mut().skip(1) { - let offset = line - .chars() - .take(column) - .take_while(|c| c.is_whitespace()) - .map(char::len_utf8) - .sum(); - *line = &line[offset..]; - } - - let had_newline = lines.len() > 1; - let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace); - - // Trims a sequence of whitespace followed by a newline at the start. - if lines.first().map_or(false, is_whitespace) { - lines.remove(0); - } - - // Trims a newline followed by a sequence of whitespace at the end. - if lines.last().map_or(false, is_whitespace) { - lines.pop(); - } - - (lines.join("\n"), had_newline) -} - -/// Split a string into a vector of lines -/// (respecting Unicode, Unix, Mac and Windows line breaks). -fn split_lines(text: &str) -> Vec<&str> { - let mut s = Scanner::new(text); - let mut lines = Vec::new(); - let mut start = 0; - let mut end = 0; - - while let Some(c) = s.eat() { - if is_newline(c) { - if c == '\r' { - s.eat_if('\n'); - } - - lines.push(&text[start..end]); - start = s.cursor(); - } - end = s.cursor(); - } - - lines.push(&text[start..]); - lines -} - -#[cfg(test)] -#[rustfmt::skip] -mod tests { - use super::*; - - #[test] - fn test_resolve_strings() { - #[track_caller] - fn test(string: &str, expected: &str) { - assert_eq!(resolve_string(string), expected); - } - - test(r#"hello world"#, "hello world"); - test(r#"hello\nworld"#, "hello\nworld"); - test(r#"a\"bc"#, "a\"bc"); - test(r#"a\u{2603}bc"#, "a☃bc"); - test(r#"a\u{26c3bg"#, "a𦰻g"); - test(r#"av\u{6797"#, "av林"); - test(r#"a\\"#, "a\\"); - test(r#"a\\\nbc"#, "a\\\nbc"); - test(r#"a\t\r\nbc"#, "a\t\r\nbc"); - test(r"🌎", "🌎"); - test(r"🌎\", r"🌎\"); - test(r"\🌎", r"\🌎"); - } - - #[test] - fn test_split_at_lang_tag() { - #[track_caller] - fn test(text: &str, lang: &str, inner: &str) { - assert_eq!(split_at_lang_tag(text), (lang, inner)); - } - - test("typst it!", "typst", " it!"); - test("typst\n it!", "typst", "\n it!"); - test("typst\n it!", "typst", "\n it!"); - test("abc`", "abc", "`"); - test(" hi", "", " hi"); - test("`", "", "`"); - } - - #[test] - fn test_resolve_raw() { - #[track_caller] - fn test( - column: usize, - backticks: usize, - raw: &str, - lang: Option<&str>, - text: &str, - block: bool, - ) { - let node = resolve_raw(column, backticks, raw); - assert_eq!(node.lang.as_deref(), lang); - assert_eq!(node.text, text); - assert_eq!(node.block, block); - } - - // Just one backtick. - test(0, 1, "py", None, "py", false); - test(0, 1, "1\n2", None, "1\n2", false); - test(0, 1, "1\r\n2", None, "1\n2", false); - - // More than one backtick with lang tag. - test(0, 2, "js alert()", Some("js"), "alert()", false); - test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true); - test(0, 2, "♥", None, "", false); - - // Trimming of whitespace (tested more thoroughly in separate test). - test(0, 2, " a", None, "a", false); - test(0, 2, " a", None, " a", false); - test(0, 2, " \na", None, "a", true); - - // Dedenting - test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true); - } - - #[test] - fn test_trim_raw() { - #[track_caller] - fn test(text: &str, expected: &str) { - assert_eq!(trim_and_split_raw(0, text).0, expected); - } - - test(" hi", "hi"); - test(" hi", " hi"); - test("\nhi", "hi"); - test(" \n hi", " hi"); - test("hi` ", "hi`"); - test("hi` ", "hi` "); - test("hi` ", "hi` "); - test("hi ", "hi "); - test("hi ", "hi "); - test("hi\n", "hi"); - test("hi \n ", "hi "); - test(" \n hi \n ", " hi "); - } -} diff --git a/src/syntax/source.rs b/src/syntax/source.rs index 9b76af12a..41805a604 100644 --- a/src/syntax/source.rs +++ b/src/syntax/source.rs @@ -8,10 +8,10 @@ use std::path::{Path, PathBuf}; use comemo::Prehashed; use unscanny::Scanner; +use super::ast::Markup; +use super::reparse::reparse; +use super::{is_newline, parse, Span, SyntaxNode}; use crate::diag::SourceResult; -use crate::syntax::ast::Markup; -use crate::syntax::{is_newline, parse, reparse}; -use crate::syntax::{Span, SyntaxNode}; use crate::util::{PathExt, StrExt}; /// A source file. @@ -124,11 +124,8 @@ impl Source { } // Recalculate the line starts after the edit. - self.lines.extend(lines_from( - start_byte, - start_utf16, - &self.text[start_byte..], - )); + self.lines + .extend(lines_from(start_byte, start_utf16, &self.text[start_byte..])); // Incrementally reparse the replaced range. let mut root = std::mem::take(&mut self.root).into_inner(); diff --git a/tests/src/benches.rs b/tests/src/benches.rs index ff61c32ff..e76f3c767 100644 --- a/tests/src/benches.rs +++ b/tests/src/benches.rs @@ -5,7 +5,7 @@ use iai::{black_box, main, Iai}; use typst::diag::{FileError, FileResult}; use typst::font::{Font, FontBook}; use typst::model::Library; -use typst::syntax::{LexMode, Lexer, Source, SourceId}; +use typst::syntax::{Source, SourceId}; use typst::util::Buffer; use typst::World; use unscanny::Scanner; @@ -16,7 +16,6 @@ const FONT: &[u8] = include_bytes!("../fonts/IBMPlexSans-Regular.ttf"); main!( bench_decode, bench_scan, - bench_lex, bench_parse, bench_edit, bench_eval, @@ -49,10 +48,6 @@ fn bench_scan(iai: &mut Iai) { }) } -fn bench_lex(iai: &mut Iai) { - iai.run(|| Lexer::new(black_box(TEXT), black_box(LexMode::Markup)).count()); -} - fn bench_parse(iai: &mut Iai) { iai.run(|| typst::syntax::parse(TEXT)); } diff --git a/tests/typ/math/syntax.typ b/tests/typ/math/syntax.typ index 72b4b7c24..37ea2de79 100644 --- a/tests/typ/math/syntax.typ +++ b/tests/typ/math/syntax.typ @@ -24,5 +24,5 @@ $ A sub:eq:not B $ --- -// Error: 8 expected closing paren +// Error: 8 expected math atom $ sum_( $