diff --git a/src/ide/complete.rs b/src/ide/complete.rs index 9e13fc8dd..f0808b21f 100644 --- a/src/ide/complete.rs +++ b/src/ide/complete.rs @@ -141,7 +141,7 @@ fn complete_params(ctx: &mut CompletionContext) -> bool { if let Some(param) = before_colon.cast::(); then { ctx.from = match ctx.leaf.kind() { - SyntaxKind::Colon | SyntaxKind::Space { .. } => ctx.cursor, + SyntaxKind::Colon | SyntaxKind::Space => ctx.cursor, _ => ctx.leaf.offset(), }; ctx.named_param_value_completions(&callee, ¶m); @@ -246,7 +246,7 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool { /// Complete in markup mode. fn complete_markup(ctx: &mut CompletionContext) -> bool { // Bail if we aren't even in markup. - if !matches!(ctx.leaf.parent_kind(), None | Some(SyntaxKind::Markup { .. })) { + if !matches!(ctx.leaf.parent_kind(), None | Some(SyntaxKind::Markup)) { return false; } @@ -325,7 +325,7 @@ fn complete_math(ctx: &mut CompletionContext) -> bool { fn complete_code(ctx: &mut CompletionContext) -> bool { if matches!( ctx.leaf.parent_kind(), - None | Some(SyntaxKind::Markup { .. }) | Some(SyntaxKind::Math) + None | Some(SyntaxKind::Markup) | Some(SyntaxKind::Math) ) { return false; } @@ -887,7 +887,7 @@ impl<'a> CompletionContext<'a> { self.snippet_completion( "import", - "import ${items} from \"${file.typ}\"", + "import \"${file.typ}\": ${items}", "Imports variables from another file.", ); diff --git a/src/ide/highlight.rs b/src/ide/highlight.rs index cc502537e..42c050028 100644 --- a/src/ide/highlight.rs +++ b/src/ide/highlight.rs @@ -83,9 +83,41 @@ impl Category { /// highlighted. pub fn highlight(node: &LinkedNode) -> Option { match node.kind() { - SyntaxKind::LineComment => Some(Category::Comment), - SyntaxKind::BlockComment => Some(Category::Comment), - SyntaxKind::Space { .. } => None, + SyntaxKind::Markup + if node.parent_kind() == Some(SyntaxKind::TermItem) + && node.next_sibling().as_ref().map(|v| v.kind()) + == Some(SyntaxKind::Colon) => + { + Some(Category::ListTerm) + } + SyntaxKind::Markup => None, + SyntaxKind::Text => None, + SyntaxKind::Space => None, + SyntaxKind::Linebreak => Some(Category::Escape), + SyntaxKind::Parbreak => None, + SyntaxKind::Escape => Some(Category::Escape), + SyntaxKind::Shorthand => Some(Category::Escape), + SyntaxKind::Symbol => Some(Category::Escape), + SyntaxKind::SmartQuote => None, + SyntaxKind::Strong => Some(Category::Strong), + SyntaxKind::Emph => Some(Category::Emph), + SyntaxKind::Raw => Some(Category::Raw), + SyntaxKind::Link => Some(Category::Link), + SyntaxKind::Label => Some(Category::Label), + SyntaxKind::Ref => Some(Category::Ref), + SyntaxKind::Heading => Some(Category::Heading), + SyntaxKind::HeadingMarker => None, + SyntaxKind::ListItem => None, + SyntaxKind::ListMarker => Some(Category::ListMarker), + SyntaxKind::EnumItem => None, + SyntaxKind::EnumMarker => Some(Category::ListMarker), + SyntaxKind::TermItem => None, + SyntaxKind::TermMarker => Some(Category::ListMarker), + SyntaxKind::Math => None, + SyntaxKind::Atom => None, + SyntaxKind::Script => None, + SyntaxKind::Frac => None, + SyntaxKind::AlignPoint => Some(Category::MathOperator), SyntaxKind::LeftBrace => Some(Category::Punctuation), SyntaxKind::RightBrace => Some(Category::Punctuation), @@ -105,16 +137,9 @@ pub fn highlight(node: &LinkedNode) -> Option { _ => None, }, SyntaxKind::Dollar => Some(Category::MathDelimiter), - SyntaxKind::Plus => Some(match node.parent_kind() { - Some(SyntaxKind::EnumItem) => Category::ListMarker, - _ => Category::Operator, - }), - SyntaxKind::Minus => Some(match node.parent_kind() { - Some(SyntaxKind::ListItem) => Category::ListMarker, - _ => Category::Operator, - }), + SyntaxKind::Plus => Some(Category::Operator), + SyntaxKind::Minus => Some(Category::Operator), SyntaxKind::Slash => Some(match node.parent_kind() { - Some(SyntaxKind::TermItem) => Category::ListMarker, Some(SyntaxKind::Frac) => Category::MathOperator, _ => Category::Operator, }), @@ -157,41 +182,9 @@ pub fn highlight(node: &LinkedNode) -> Option { SyntaxKind::Include => Some(Category::Keyword), SyntaxKind::As => Some(Category::Keyword), - SyntaxKind::Markup { .. } - if node.parent_kind() == Some(SyntaxKind::TermItem) - && node.next_sibling().as_ref().map(|v| v.kind()) - == Some(SyntaxKind::Colon) => - { - Some(Category::ListTerm) - } - SyntaxKind::Markup { .. } => None, - - SyntaxKind::Text => None, - SyntaxKind::Linebreak => Some(Category::Escape), - SyntaxKind::Escape => Some(Category::Escape), - SyntaxKind::Shorthand => Some(Category::Escape), - SyntaxKind::Symbol => Some(Category::Escape), - SyntaxKind::SmartQuote { .. } => None, - SyntaxKind::Strong => Some(Category::Strong), - SyntaxKind::Emph => Some(Category::Emph), - SyntaxKind::Raw { .. } => Some(Category::Raw), - SyntaxKind::Link => Some(Category::Link), - SyntaxKind::Label => Some(Category::Label), - SyntaxKind::Ref => Some(Category::Ref), - SyntaxKind::Heading => Some(Category::Heading), - SyntaxKind::ListItem => None, - SyntaxKind::EnumItem => None, - SyntaxKind::EnumNumbering => Some(Category::ListMarker), - SyntaxKind::TermItem => None, - SyntaxKind::Math => None, - SyntaxKind::Atom => None, - SyntaxKind::Script => None, - SyntaxKind::Frac => None, - SyntaxKind::AlignPoint => Some(Category::MathOperator), - SyntaxKind::Ident => match node.parent_kind() { Some( - SyntaxKind::Markup { .. } + SyntaxKind::Markup | SyntaxKind::Math | SyntaxKind::Script | SyntaxKind::Frac, @@ -258,7 +251,10 @@ pub fn highlight(node: &LinkedNode) -> Option { SyntaxKind::LoopContinue => None, SyntaxKind::FuncReturn => None, + SyntaxKind::LineComment => Some(Category::Comment), + SyntaxKind::BlockComment => Some(Category::Comment), SyntaxKind::Error => Some(Category::Error), + SyntaxKind::Eof => None, } } diff --git a/src/model/eval.rs b/src/model/eval.rs index 8e8c93c5c..0469649bc 100644 --- a/src/model/eval.rs +++ b/src/model/eval.rs @@ -261,9 +261,10 @@ impl Eval for ast::Expr { }; match self { + Self::Text(v) => v.eval(vm).map(Value::Content), Self::Space(v) => v.eval(vm).map(Value::Content), Self::Linebreak(v) => v.eval(vm).map(Value::Content), - Self::Text(v) => v.eval(vm).map(Value::Content), + Self::Parbreak(v) => v.eval(vm).map(Value::Content), Self::Escape(v) => v.eval(vm).map(Value::Content), Self::Shorthand(v) => v.eval(vm).map(Value::Content), Self::Symbol(v) => v.eval(vm).map(Value::Content), @@ -330,14 +331,19 @@ impl ast::Expr { } } +impl Eval for ast::Text { + type Output = Content; + + fn eval(&self, vm: &mut Vm) -> SourceResult { + Ok((vm.items.text)(self.get().clone())) + } +} + impl Eval for ast::Space { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { - Ok(match self.newlines() { - 0..=1 => (vm.items.space)(), - _ => (vm.items.parbreak)(), - }) + Ok((vm.items.space)()) } } @@ -349,11 +355,11 @@ impl Eval for ast::Linebreak { } } -impl Eval for ast::Text { +impl Eval for ast::Parbreak { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { - Ok((vm.items.text)(self.get().clone())) + Ok((vm.items.parbreak)()) } } @@ -438,7 +444,7 @@ impl Eval for ast::Link { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { - Ok((vm.items.link)(self.url().clone())) + Ok((vm.items.link)(self.get().clone())) } } @@ -1231,13 +1237,17 @@ impl Eval for ast::ModuleImport { } } Some(ast::Imports::Items(idents)) => { + let mut errors = vec![]; for ident in idents { if let Some(value) = module.scope().get(&ident) { vm.scopes.top.define(ident.take(), value.clone()); } else { - bail!(ident.span(), "unresolved import"); + errors.push(error!(ident.span(), "unresolved import")); } } + if errors.len() > 0 { + return Err(Box::new(errors)); + } } } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index bf4b37bca..169b0276a 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -29,9 +29,6 @@ pub trait AstNode: Sized { macro_rules! node { ($(#[$attr:meta])* $name:ident) => { - node!{ $(#[$attr])* $name: SyntaxKind::$name { .. } } - }; - ($(#[$attr:meta])* $name:ident: $variants:pat) => { #[derive(Debug, Clone, PartialEq, Hash)] #[repr(transparent)] $(#[$attr])* @@ -39,7 +36,7 @@ macro_rules! node { impl AstNode for $name { fn from_untyped(node: &SyntaxNode) -> Option { - if matches!(node.kind(), $variants) { + if matches!(node.kind(), SyntaxKind::$name) { Some(Self(node.clone())) } else { Option::None @@ -67,8 +64,7 @@ impl Markup { .filter(move |node| { // Ignore newline directly after statements without semicolons. let kind = node.kind(); - let keep = - !was_stmt || !matches!(kind, SyntaxKind::Space { newlines: 1 }); + let keep = !was_stmt || node.kind() != SyntaxKind::Space; was_stmt = kind.is_stmt(); keep }) @@ -79,12 +75,15 @@ impl Markup { /// An expression in markup, math or code. #[derive(Debug, Clone, PartialEq, Hash)] pub enum Expr { - /// Whitespace. + /// Plain text without markup. + Text(Text), + /// Whitespace in markup or math. Has at most one newline in markup, as more + /// indicate a paragraph break. Space(Space), /// A forced line break: `\`. Linebreak(Linebreak), - /// Plain text without markup. - Text(Text), + /// A paragraph break, indicated by one or multiple blank lines. + Parbreak(Parbreak), /// An escape sequence: `\#`, `\u{1F5FA}`. Escape(Escape), /// A shorthand for a unicode codepoint. For example, `~` for non-breaking @@ -189,7 +188,7 @@ pub enum Expr { impl Expr { fn cast_with_space(node: &SyntaxNode) -> Option { match node.kind() { - SyntaxKind::Space { .. } => node.cast().map(Self::Space), + SyntaxKind::Space => node.cast().map(Self::Space), _ => Self::from_untyped(node), } } @@ -199,14 +198,15 @@ impl AstNode for Expr { fn from_untyped(node: &SyntaxNode) -> Option { match node.kind() { SyntaxKind::Linebreak => node.cast().map(Self::Linebreak), + SyntaxKind::Parbreak => node.cast().map(Self::Parbreak), SyntaxKind::Text => node.cast().map(Self::Text), SyntaxKind::Escape => node.cast().map(Self::Escape), SyntaxKind::Shorthand => node.cast().map(Self::Shorthand), SyntaxKind::Symbol => node.cast().map(Self::Symbol), - SyntaxKind::SmartQuote { .. } => node.cast().map(Self::SmartQuote), + SyntaxKind::SmartQuote => node.cast().map(Self::SmartQuote), SyntaxKind::Strong => node.cast().map(Self::Strong), SyntaxKind::Emph => node.cast().map(Self::Emph), - SyntaxKind::Raw { .. } => node.cast().map(Self::Raw), + SyntaxKind::Raw => node.cast().map(Self::Raw), SyntaxKind::Link => node.cast().map(Self::Link), SyntaxKind::Label => node.cast().map(Self::Label), SyntaxKind::Ref => node.cast().map(Self::Ref), @@ -255,9 +255,10 @@ impl AstNode for Expr { fn as_untyped(&self) -> &SyntaxNode { match self { + Self::Text(v) => v.as_untyped(), Self::Space(v) => v.as_untyped(), Self::Linebreak(v) => v.as_untyped(), - Self::Text(v) => v.as_untyped(), + Self::Parbreak(v) => v.as_untyped(), Self::Escape(v) => v.as_untyped(), Self::Shorthand(v) => v.as_untyped(), Self::Symbol(v) => v.as_untyped(), @@ -311,26 +312,6 @@ impl AstNode for Expr { } } -node! { - /// Whitespace. - Space -} - -impl Space { - /// Get the number of newlines. - pub fn newlines(&self) -> usize { - match self.0.kind() { - SyntaxKind::Space { newlines } => newlines, - _ => panic!("space is of wrong kind"), - } - } -} - -node! { - /// A forced line break: `\`. - Linebreak -} - node! { /// Plain text without markup. Text @@ -343,6 +324,22 @@ impl Text { } } +node! { + /// Whitespace in markup or math. Has at most one newline in markup, as more + /// indicate a paragraph break. + Space +} + +node! { + /// A forced line break: `\`. + Linebreak +} + +node! { + /// A paragraph break, indicated by one or multiple blank lines. + Parbreak +} + node! { /// An escape sequence: `\#`, `\u{1F5FA}`. Escape @@ -454,10 +451,6 @@ node! { impl Raw { /// The trimmed raw text. pub fn text(&self) -> EcoString { - let SyntaxKind::Raw { column } = self.0.kind() else { - panic!("raw node is of wrong kind"); - }; - let mut text = self.0.text().as_str(); let blocky = text.starts_with("```"); text = text.trim_matches('`'); @@ -480,14 +473,16 @@ impl Raw { let mut lines = split_newlines(text); if blocky { + let dedent = lines + .iter() + .skip(1) + .map(|line| line.chars().take_while(|c| c.is_whitespace()).count()) + .min() + .unwrap_or(0); + // Dedent based on column, but not for the first line. for line in lines.iter_mut().skip(1) { - let offset = line - .chars() - .take(column) - .take_while(|c| c.is_whitespace()) - .map(char::len_utf8) - .sum(); + let offset = line.chars().take(dedent).map(char::len_utf8).sum(); *line = &line[offset..]; } @@ -531,7 +526,7 @@ node! { impl Link { /// Get the URL. - pub fn url(&self) -> &EcoString { + pub fn get(&self) -> &EcoString { self.0.text() } } @@ -575,10 +570,9 @@ impl Heading { pub fn level(&self) -> NonZeroUsize { self.0 .children() - .filter(|n| n.kind() == SyntaxKind::Eq) - .count() - .try_into() - .expect("heading is missing equals sign") + .find(|node| node.kind() == SyntaxKind::HeadingMarker) + .and_then(|node| node.len().try_into().ok()) + .expect("heading is missing marker") } } @@ -603,7 +597,7 @@ impl EnumItem { /// The explicit numbering, if any: `23.`. pub fn number(&self) -> Option { self.0.children().find_map(|node| match node.kind() { - SyntaxKind::EnumNumbering => node.text().trim_end_matches('.').parse().ok(), + SyntaxKind::EnumMarker => node.text().trim_end_matches('.').parse().ok(), _ => Option::None, }) } @@ -765,7 +759,7 @@ node! { } impl Bool { - /// Get the value. + /// Get the boolean value. pub fn get(&self) -> bool { self.0.text() == "true" } @@ -777,7 +771,7 @@ node! { } impl Int { - /// Get the value. + /// Get the integer value. pub fn get(&self) -> i64 { self.0.text().parse().expect("integer is invalid") } @@ -789,7 +783,7 @@ node! { } impl Float { - /// Get the value. + /// Get the floating-point value. pub fn get(&self) -> f64 { self.0.text().parse().expect("float is invalid") } @@ -801,7 +795,7 @@ node! { } impl Numeric { - /// Get the value and unit. + /// Get the numeric value and unit. pub fn get(&self) -> (f64, Unit) { let text = self.0.text(); let count = text @@ -850,7 +844,7 @@ node! { } impl Str { - /// Get the value. + /// Get the string value with resolved escape sequences. pub fn get(&self) -> EcoString { let text = self.0.text(); let unquoted = &text[1..text.len() - 1]; @@ -1058,7 +1052,7 @@ impl Unary { pub fn op(&self) -> UnOp { self.0 .children() - .find_map(|node| UnOp::from_token(node.kind())) + .find_map(|node| UnOp::from_kind(node.kind())) .expect("unary operation is missing operator") } @@ -1081,7 +1075,7 @@ pub enum UnOp { impl UnOp { /// Try to convert the token into a unary operation. - pub fn from_token(token: SyntaxKind) -> Option { + pub fn from_kind(token: SyntaxKind) -> Option { Some(match token { SyntaxKind::Plus => Self::Pos, SyntaxKind::Minus => Self::Neg, @@ -1125,7 +1119,7 @@ impl Binary { Option::None } SyntaxKind::In if not => Some(BinOp::NotIn), - _ => BinOp::from_token(node.kind()), + _ => BinOp::from_kind(node.kind()), }) .expect("binary operation is missing operator") } @@ -1190,7 +1184,7 @@ pub enum BinOp { impl BinOp { /// Try to convert the token into a binary operation. - pub fn from_token(token: SyntaxKind) -> Option { + pub fn from_kind(token: SyntaxKind) -> Option { Some(match token { SyntaxKind::Plus => Self::Add, SyntaxKind::Minus => Self::Sub, diff --git a/src/syntax/kind.rs b/src/syntax/kind.rs index 26e92b930..5928fa0a6 100644 --- a/src/syntax/kind.rs +++ b/src/syntax/kind.rs @@ -1,17 +1,72 @@ -/// All syntactical building blocks that can be part of a Typst document. +/// A syntactical building block of a Typst file. /// /// Can be created by the lexer or by the parser. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +#[repr(u8)] pub enum SyntaxKind { - /// A line comment: `// ...`. - LineComment, - /// A block comment: `/* ... */`. - BlockComment, - /// One or more whitespace characters. Single spaces are collapsed into text - /// nodes if they would otherwise be surrounded by text nodes. + /// Markup of which all lines must have a minimal indentation. /// - /// Also stores how many newlines are contained. - Space { newlines: usize }, + /// Notably, the number does not determine in which column the markup + /// started, but to the right of which column all markup elements must be, + /// so it is zero except inside indent-aware constructs like lists. + Markup, + /// Plain text without markup. + Text, + /// Whitespace. Contains at most one newline in markup, as more indicate a + /// paragraph break. + Space, + /// A forced line break: `\`. + Linebreak, + /// A paragraph break, indicated by one or multiple blank lines. + Parbreak, + /// An escape sequence: `\#`, `\u{1F5FA}`. + Escape, + /// A shorthand for a unicode codepoint. For example, `~` for non-breaking + /// space or `-?` for a soft hyphen. + Shorthand, + /// Symbol notation: `:arrow:l:`. The string only contains the inner part + /// without leading and trailing dot. + Symbol, + /// A smart quote: `'` or `"`. + SmartQuote, + /// Strong content: `*Strong*`. + Strong, + /// Emphasized content: `_Emphasized_`. + Emph, + /// Raw text with optional syntax highlighting: `` `...` ``. + Raw, + /// A hyperlink: `https://typst.org`. + Link, + /// A label: ``. + Label, + /// A reference: `@target`. + Ref, + /// A section heading: `= Introduction`. + Heading, + /// Introduces a section heading: `=`, `==`, ... + HeadingMarker, + /// An item in a bullet list: `- ...`. + ListItem, + /// Introduces a list item: `-`. + ListMarker, + /// An item in an enumeration (numbered list): `+ ...` or `1. ...`. + EnumItem, + /// Introduces an enumeration item: `+`, `1.`. + EnumMarker, + /// An item in a term list: `/ Term: Details`. + TermItem, + /// Introduces a term item: `/`. + TermMarker, + /// A mathematical formula: `$x$`, `$ x^2 $`. + Math, + /// An atom in math: `x`, `+`, `12`. + Atom, + /// A base with optional sub- and superscripts in math: `a_1^2`. + Script, + /// A fraction in math: `x/2`. + Frac, + /// An alignment point in math: `&`. + AlignPoint, /// A left curly brace, starting a code block: `{`. LeftBrace, @@ -37,19 +92,17 @@ pub enum SyntaxKind { /// The strong text toggle, multiplication operator, and wildcard import /// symbol: `*`. Star, - /// Toggles emphasized text and indicates a subscript in a formula: `_`. + /// Toggles emphasized text and indicates a subscript in math: `_`. Underscore, /// Starts and ends a math formula: `$`. Dollar, - /// The unary plus, binary addition operator, and start of enum items: `+`. + /// The unary plus and binary addition operator: `+`. Plus, - /// The unary negation, binary subtraction operator, and start of list - /// items: `-`. + /// The unary negation and binary subtraction operator: `-`. Minus, - /// The division operator, start of term list items, and fraction operator - /// in a formula: `/`. + /// The division operator and fraction operator in math: `/`. Slash, - /// The superscript operator in a formula: `^`. + /// The superscript operator in math: `^`. Hat, /// The field access and method call operator: `.`. Dot, @@ -119,59 +172,6 @@ pub enum SyntaxKind { /// The `as` keyword. As, - /// Markup of which all lines must have a minimal indentation. - /// - /// Notably, the number does not determine in which column the markup - /// started, but to the right of which column all markup elements must be, - /// so it is zero except inside indent-aware constructs like lists. - Markup { min_indent: usize }, - /// Plain text without markup. - Text, - /// A forced line break: `\`. - Linebreak, - /// An escape sequence: `\#`, `\u{1F5FA}`. - Escape, - /// A shorthand for a unicode codepoint. For example, `~` for non-breaking - /// space or `-?` for a soft hyphen. - Shorthand, - /// Symbol notation: `:arrow:l:`. The string only contains the inner part - /// without leading and trailing dot. - Symbol, - /// A smart quote: `'` or `"`. - SmartQuote, - /// Strong content: `*Strong*`. - Strong, - /// Emphasized content: `_Emphasized_`. - Emph, - /// Raw text with optional syntax highlighting: `` `...` ``. - Raw { column: usize }, - /// A hyperlink: `https://typst.org`. - Link, - /// A label: ``. - Label, - /// A reference: `@target`. - Ref, - /// A section heading: `= Introduction`. - Heading, - /// An item in a bullet list: `- ...`. - ListItem, - /// An item in an enumeration (numbered list): `+ ...` or `1. ...`. - EnumItem, - /// An explicit enumeration numbering: `23.`. - EnumNumbering, - /// An item in a term list: `/ Term: Details`. - TermItem, - /// A mathematical formula: `$x$`, `$ x^2 $`. - Math, - /// An atom in a formula: `x`, `+`, `12`. - Atom, - /// A base with optional sub- and superscripts in a formula: `a_1^2`. - Script, - /// A fraction in a formula: `x/2`. - Frac, - /// An alignment point in a formula: `&`. - AlignPoint, - /// An identifier: `it`. Ident, /// A boolean: `true`, `false`. @@ -243,54 +243,103 @@ pub enum SyntaxKind { /// A return from a function: `return`, `return x + 1`. FuncReturn, + /// A line comment: `// ...`. + LineComment, + /// A block comment: `/* ... */`. + BlockComment, /// An invalid sequence of characters. Error, + /// The end of the file. + Eof, } impl SyntaxKind { - /// Whether this is trivia. - pub fn is_trivia(self) -> bool { - self.is_space() || self.is_comment() || self.is_error() + /// Is this a bracket, brace, or parenthesis? + pub fn is_grouping(self) -> bool { + matches!( + self, + Self::LeftBracket + | Self::LeftBrace + | Self::LeftParen + | Self::RightBracket + | Self::RightBrace + | Self::RightParen + ) } - /// Whether this is a space. - pub fn is_space(self) -> bool { - matches!(self, Self::Space { .. }) + /// Does this node terminate a preceding expression? + pub fn is_terminator(self) -> bool { + matches!( + self, + Self::Eof + | Self::Semicolon + | Self::RightBrace + | Self::RightParen + | Self::RightBracket + ) } - /// Whether this is a comment. - pub fn is_comment(self) -> bool { - matches!(self, Self::LineComment | Self::BlockComment) - } - - /// Whether this is an error. - pub fn is_error(self) -> bool { - matches!(self, SyntaxKind::Error) - } - - /// Whether this is a left or right parenthesis. - pub fn is_paren(self) -> bool { - matches!(self, Self::LeftParen | Self::RightParen) + /// Is this a code or content block. + pub fn is_block(self) -> bool { + matches!(self, Self::CodeBlock | Self::ContentBlock) } /// Does this node need termination through a semicolon or linebreak? pub fn is_stmt(self) -> bool { matches!( self, - SyntaxKind::LetBinding - | SyntaxKind::SetRule - | SyntaxKind::ShowRule - | SyntaxKind::ModuleImport - | SyntaxKind::ModuleInclude + Self::LetBinding + | Self::SetRule + | Self::ShowRule + | Self::ModuleImport + | Self::ModuleInclude ) } + /// Whether this kind of node is automatically skipped by the parser in + /// code and math mode. + pub fn is_trivia(self) -> bool { + matches!( + self, + Self::Space | Self::Parbreak | Self::LineComment | Self::BlockComment + ) + } + + /// Whether this is an error. + pub fn is_error(self) -> bool { + self == Self::Error + } + /// A human-readable name for the kind. pub fn name(self) -> &'static str { match self { - Self::LineComment => "line comment", - Self::BlockComment => "block comment", - Self::Space { .. } => "space", + Self::Markup => "markup", + Self::Text => "text", + Self::Space => "space", + Self::Linebreak => "line break", + Self::Parbreak => "paragraph break", + Self::Escape => "escape sequence", + Self::Shorthand => "shorthand", + Self::Symbol => "symbol notation", + Self::Strong => "strong content", + Self::Emph => "emphasized content", + Self::Raw => "raw block", + Self::Link => "link", + Self::Label => "label", + Self::Ref => "reference", + Self::Heading => "heading", + Self::HeadingMarker => "heading marker", + Self::ListItem => "list item", + Self::ListMarker => "list marker", + Self::EnumItem => "enum item", + Self::EnumMarker => "enum marker", + Self::TermItem => "term list item", + Self::TermMarker => "term marker", + Self::Math => "math formula", + Self::Atom => "math atom", + Self::Script => "script", + Self::Frac => "fraction", + Self::AlignPoint => "alignment point", Self::LeftBrace => "opening brace", Self::RightBrace => "closing brace", Self::LeftBracket => "opening bracket", @@ -309,7 +358,7 @@ impl SyntaxKind { Self::Slash => "slash", Self::Hat => "hat", Self::Dot => "dot", - Self::Eq => "assignment operator", + Self::Eq => "equals sign", Self::EqEq => "equality operator", Self::ExclEq => "inequality operator", Self::Lt => "less-than operator", @@ -341,28 +390,6 @@ impl SyntaxKind { Self::Import => "keyword `import`", Self::Include => "keyword `include`", Self::As => "keyword `as`", - Self::Markup { .. } => "markup", - Self::Text => "text", - Self::Linebreak => "linebreak", - Self::Escape => "escape sequence", - Self::Shorthand => "shorthand", - Self::Symbol => "symbol notation", - Self::Strong => "strong content", - Self::Emph => "emphasized content", - Self::Raw { .. } => "raw block", - Self::Link => "link", - Self::Label => "label", - Self::Ref => "reference", - Self::Heading => "heading", - Self::ListItem => "list item", - Self::EnumItem => "enumeration item", - Self::EnumNumbering => "enumeration item numbering", - Self::TermItem => "term list item", - Self::Math => "math formula", - Self::Atom => "math atom", - Self::Script => "script", - Self::Frac => "fraction", - Self::AlignPoint => "alignment point", Self::Ident => "identifier", Self::Bool => "boolean", Self::Int => "integer", @@ -398,7 +425,10 @@ impl SyntaxKind { Self::LoopBreak => "`break` expression", Self::LoopContinue => "`continue` expression", Self::FuncReturn => "`return` expression", + Self::LineComment => "line comment", + Self::BlockComment => "block comment", Self::Error => "syntax error", + Self::Eof => "end of file", } } } diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs index f082bd285..e3c291509 100644 --- a/src/syntax/lexer.rs +++ b/src/syntax/lexer.rs @@ -9,12 +9,11 @@ use crate::util::{format_eco, EcoString}; pub(super) struct Lexer<'s> { /// The underlying scanner. s: Scanner<'s>, - /// The mode the lexer is in. This determines what tokens it recognizes. + /// The mode the lexer is in. This determines which kinds of tokens it + /// produces. mode: LexMode, - /// Whether the last token has been terminated. - terminated: bool, - /// Offsets the indentation on the first line of the source. - column_offset: usize, + /// Whether the last token contained a newline. + newline: bool, /// An error for the last token. error: Option<(EcoString, ErrorPos)>, } @@ -33,12 +32,11 @@ pub(super) enum LexMode { impl<'s> Lexer<'s> { /// Create a new lexer with the given mode and a prefix to offset column /// calculations. - pub fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self { + pub fn new(text: &'s str, mode: LexMode) -> Self { Self { s: Scanner::new(text), mode, - terminated: true, - column_offset: column(prefix, prefix.len(), 0), + newline: false, error: None, } } @@ -64,26 +62,18 @@ impl<'s> Lexer<'s> { self.s.jump(index); } - /// The underlying scanner. - pub fn scanner(&self) -> Scanner<'s> { - self.s + /// Whether the last token contained a newline. + pub fn newline(&self) -> bool { + self.newline } - /// Whether the last token was terminated. - pub fn terminated(&self) -> bool { - self.terminated - } - - /// The column index of a given index in the source string. - pub fn column(&self, index: usize) -> usize { - column(self.s.string(), index, self.column_offset) - } - - /// Take out the last error. - pub fn last_error(&mut self) -> Option<(EcoString, ErrorPos)> { + /// Take out the last error, if any. + pub fn take_error(&mut self) -> Option<(EcoString, ErrorPos)> { self.error.take() } +} +impl Lexer<'_> { /// Construct a full-positioned syntax error. fn error(&mut self, message: impl Into) -> SyntaxKind { self.error = Some((message.into(), ErrorPos::Full)); @@ -97,45 +87,53 @@ impl<'s> Lexer<'s> { } } -impl Iterator for Lexer<'_> { - type Item = SyntaxKind; - - /// Produce the next token. - fn next(&mut self) -> Option { +/// Shared. +impl Lexer<'_> { + pub fn next(&mut self) -> SyntaxKind { + self.newline = false; self.error = None; let start = self.s.cursor(); - let c = self.s.eat()?; - Some(match c { - // Trivia. - c if c.is_whitespace() => self.whitespace(c), - '/' if self.s.eat_if('/') => self.line_comment(), - '/' if self.s.eat_if('*') => self.block_comment(), - '*' if self.s.eat_if('/') => self.error("unexpected end of block comment"), + match self.s.eat() { + Some(c) if c.is_whitespace() => self.whitespace(start, c), + Some('/') if self.s.eat_if('/') => self.line_comment(), + Some('/') if self.s.eat_if('*') => self.block_comment(), + Some('*') if self.s.eat_if('/') => { + self.error("unexpected end of block comment") + } - // Other things. - _ => match self.mode { + Some(c) => match self.mode { LexMode::Markup => self.markup(start, c), LexMode::Math => self.math(c), LexMode::Code => self.code(start, c), }, - }) - } -} -/// Shared. -impl Lexer<'_> { + None => SyntaxKind::Eof, + } + } + + fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind { + let more = self.s.eat_while(char::is_whitespace); + let newlines = match c { + ' ' if more.is_empty() => 0, + _ => count_newlines(self.s.from(start)), + }; + + self.newline = newlines > 0; + if self.mode == LexMode::Markup && newlines >= 2 { + SyntaxKind::Parbreak + } else { + SyntaxKind::Space + } + } + fn line_comment(&mut self) -> SyntaxKind { self.s.eat_until(is_newline); - if self.s.done() { - self.terminated = false; - } SyntaxKind::LineComment } fn block_comment(&mut self) -> SyntaxKind { let mut state = '_'; let mut depth = 1; - self.terminated = false; // Find the first `*/` that does not correspond to a nested `/*`. while let Some(c) = self.s.eat() { @@ -143,7 +141,6 @@ impl Lexer<'_> { ('*', '/') => { depth -= 1; if depth == 0 { - self.terminated = true; break; } '_' @@ -162,32 +159,6 @@ impl Lexer<'_> { SyntaxKind::BlockComment } - - fn whitespace(&mut self, c: char) -> SyntaxKind { - if c == ' ' && !self.s.at(char::is_whitespace) { - return SyntaxKind::Space { newlines: 0 }; - } - - self.s.uneat(); - - // Count the number of newlines. - let mut newlines = 0; - while let Some(c) = self.s.eat() { - if !c.is_whitespace() { - self.s.uneat(); - break; - } - - if is_newline(c) { - if c == '\r' { - self.s.eat_if('\n'); - } - newlines += 1; - } - } - - SyntaxKind::Space { newlines } - } } /// Markup. @@ -199,9 +170,9 @@ impl Lexer<'_> { '`' => self.raw(), 'h' if self.s.eat_if("ttp://") => self.link(), 'h' if self.s.eat_if("ttps://") => self.link(), + '0'..='9' => self.numbering(start), '<' if self.s.at(is_id_continue) => self.label(), '@' if self.s.at(is_id_continue) => self.reference(), - '0'..='9' => self.numbering(start), '#' if self.s.eat_if('{') => SyntaxKind::LeftBrace, '#' if self.s.eat_if('[') => SyntaxKind::LeftBracket, '#' if self.s.at(is_id_start) => { @@ -225,17 +196,154 @@ impl Lexer<'_> { '\'' => SyntaxKind::SmartQuote, '"' => SyntaxKind::SmartQuote, '$' => SyntaxKind::Dollar, - '=' => SyntaxKind::Eq, - '+' => SyntaxKind::Plus, - '/' => SyntaxKind::Slash, '~' => SyntaxKind::Shorthand, ':' => SyntaxKind::Colon, - '-' => SyntaxKind::Minus, + '=' => { + self.s.eat_while('='); + if self.space_and_more() { + SyntaxKind::HeadingMarker + } else { + self.text() + } + } + '-' if self.space_and_more() => SyntaxKind::ListMarker, + '+' if self.space_and_more() => SyntaxKind::EnumMarker, + '/' if self.space_and_more() => SyntaxKind::TermMarker, _ => self.text(), } } + fn backslash(&mut self) -> SyntaxKind { + if self.s.eat_if("u{") { + let hex = self.s.eat_while(char::is_ascii_alphanumeric); + if !self.s.eat_if('}') { + return self.error_at_end("expected closing brace"); + } + + if u32::from_str_radix(hex, 16) + .ok() + .and_then(std::char::from_u32) + .is_none() + { + return self.error("invalid unicode escape sequence"); + } + + return SyntaxKind::Escape; + } + + if self.s.done() || self.s.at(char::is_whitespace) { + SyntaxKind::Linebreak + } else { + self.s.eat(); + SyntaxKind::Escape + } + } + + fn maybe_symbol(&mut self) -> SyntaxKind { + let start = self.s.cursor(); + let mut end = start; + while !self.s.eat_while(is_id_continue).is_empty() && self.s.at(':') { + end = self.s.cursor(); + self.s.eat(); + } + + self.s.jump(end); + + if start < end { + self.s.expect(':'); + SyntaxKind::Symbol + } else if self.mode == LexMode::Markup { + SyntaxKind::Colon + } else { + SyntaxKind::Atom + } + } + + fn raw(&mut self) -> SyntaxKind { + let mut backticks = 1; + while self.s.eat_if('`') { + backticks += 1; + } + + if backticks == 2 { + return SyntaxKind::Raw; + } + + let mut found = 0; + while found < backticks { + match self.s.eat() { + Some('`') => found += 1, + Some(_) => found = 0, + None => break, + } + } + + if found != backticks { + let remaining = backticks - found; + let noun = if remaining == 1 { "backtick" } else { "backticks" }; + return self.error_at_end(if found == 0 { + format_eco!("expected {} {}", remaining, noun) + } else { + format_eco!("expected {} more {}", remaining, noun) + }); + } + + SyntaxKind::Raw + } + + fn link(&mut self) -> SyntaxKind { + #[rustfmt::skip] + self.s.eat_while(|c: char| matches!(c, + | '0' ..= '9' + | 'a' ..= 'z' + | 'A' ..= 'Z' + | '~' | '/' | '%' | '?' | '#' | '&' | '+' | '=' + | '\'' | '.' | ',' | ';' + )); + + if self.s.scout(-1) == Some('.') { + self.s.uneat(); + } + + SyntaxKind::Link + } + + fn numbering(&mut self, start: usize) -> SyntaxKind { + self.s.eat_while(char::is_ascii_digit); + + let read = self.s.from(start); + if self.s.eat_if('.') { + if let Ok(number) = read.parse::() { + if number == 0 { + return self.error("must be positive"); + } + + return SyntaxKind::EnumMarker; + } + } + + self.text() + } + + fn label(&mut self) -> SyntaxKind { + let label = self.s.eat_while(is_id_continue); + if label.is_empty() { + return self.error("label cannot be empty"); + } + + if !self.s.eat_if('>') { + return self.error_at_end("expected closing angle bracket"); + } + + SyntaxKind::Label + } + + fn reference(&mut self) -> SyntaxKind { + self.s.eat_while(is_id_continue); + SyntaxKind::Ref + } + fn text(&mut self) -> SyntaxKind { macro_rules! table { ($(|$c:literal)*) => { @@ -277,132 +385,20 @@ impl Lexer<'_> { SyntaxKind::Text } - fn backslash(&mut self) -> SyntaxKind { - if self.s.eat_if("u{") { - let hex = self.s.eat_while(char::is_ascii_alphanumeric); - if !self.s.eat_if('}') { - self.terminated = false; - return self.error_at_end("expected closing brace"); - } - - if u32::from_str_radix(hex, 16) - .ok() - .and_then(std::char::from_u32) - .is_none() - { - return self.error("invalid unicode escape sequence"); - } - - return SyntaxKind::Escape; - } - - if self.s.done() || self.s.at(char::is_whitespace) { - SyntaxKind::Linebreak - } else { - self.s.eat(); - SyntaxKind::Escape - } - } - - fn maybe_symbol(&mut self) -> SyntaxKind { - let start = self.s.cursor(); - let mut end = start; - while !self.s.eat_while(is_id_continue).is_empty() && self.s.at(':') { - end = self.s.cursor(); - self.s.eat(); - } - - self.s.jump(end); - - if start < end { - self.s.expect(':'); - SyntaxKind::Symbol - } else if self.mode == LexMode::Markup { - SyntaxKind::Colon - } else { - SyntaxKind::Atom - } - } - - fn link(&mut self) -> SyntaxKind { - #[rustfmt::skip] - self.s.eat_while(|c: char| matches!(c, - | '0' ..= '9' - | 'a' ..= 'z' - | 'A' ..= 'Z' - | '~' | '/' | '%' | '?' | '#' | '&' | '+' | '=' - | '\'' | '.' | ',' | ';' - )); - - if self.s.scout(-1) == Some('.') { - self.s.uneat(); - } - - SyntaxKind::Link - } - - fn raw(&mut self) -> SyntaxKind { - let column = self.column(self.s.cursor() - 1); - - let mut backticks = 1; - while self.s.eat_if('`') { - backticks += 1; - } - - if backticks == 2 { - return SyntaxKind::Raw { column }; - } - - let mut found = 0; - while found < backticks { - match self.s.eat() { - Some('`') => found += 1, - Some(_) => found = 0, - None => break, - } - } - - if found != backticks { - self.terminated = false; - let remaining = backticks - found; - let noun = if remaining == 1 { "backtick" } else { "backticks" }; - return self.error_at_end(if found == 0 { - format_eco!("expected {} {}", remaining, noun) - } else { - format_eco!("expected {} more {}", remaining, noun) - }); - } - - SyntaxKind::Raw { column } - } - - fn numbering(&mut self, start: usize) -> SyntaxKind { - self.s.eat_while(char::is_ascii_digit); - - let read = self.s.from(start); - if self.s.eat_if('.') { - if let Ok(number) = read.parse::() { - if number == 0 { - return self.error("must be positive"); - } - - return SyntaxKind::EnumNumbering; - } - } - - self.text() - } - - fn reference(&mut self) -> SyntaxKind { - self.s.eat_while(is_id_continue); - SyntaxKind::Ref - } - fn in_word(&self) -> bool { - let alphanumeric = |c: Option| c.map_or(false, |c| c.is_alphanumeric()); + let alphanum = |c: Option| c.map_or(false, |c| c.is_alphanumeric()); let prev = self.s.scout(-2); let next = self.s.peek(); - alphanumeric(prev) && alphanumeric(next) + alphanum(prev) && alphanum(next) + } + + fn space_and_more(&self) -> bool { + let mut s = self.s; + if !s.at(char::is_whitespace) { + return false; + } + s.eat_while(|c: char| c.is_whitespace() && !is_newline(c)); + !s.done() && !s.at(is_newline) } } @@ -586,26 +582,11 @@ impl Lexer<'_> { }); if !self.s.eat_if('"') { - self.terminated = false; return self.error_at_end("expected quote"); } SyntaxKind::Str } - - fn label(&mut self) -> SyntaxKind { - let label = self.s.eat_while(is_id_continue); - if label.is_empty() { - return self.error("label cannot be empty"); - } - - if !self.s.eat_if('>') { - self.terminated = false; - return self.error_at_end("expected closing angle bracket"); - } - - SyntaxKind::Label - } } /// Try to parse an identifier into a keyword. @@ -632,34 +613,6 @@ fn keyword(ident: &str) -> Option { }) } -/// The column index of a given index in the source string, given a column -/// offset for the first line. -fn column(string: &str, index: usize, offset: usize) -> usize { - let mut apply_offset = false; - let res = string[..index] - .char_indices() - .rev() - .take_while(|&(_, c)| !is_newline(c)) - .inspect(|&(i, _)| { - if i == 0 { - apply_offset = true - } - }) - .count(); - - // The loop is never executed if the slice is empty, but we are of - // course still at the start of the first line. - if index == 0 { - apply_offset = true; - } - - if apply_offset { - res + offset - } else { - res - } -} - /// Whether this character denotes a newline. #[inline] pub fn is_newline(character: char) -> bool { @@ -695,6 +648,21 @@ pub(super) fn split_newlines(text: &str) -> Vec<&str> { lines } +/// Count the number of newlines in text. +fn count_newlines(text: &str) -> usize { + let mut newlines = 0; + let mut s = Scanner::new(text); + while let Some(c) = s.eat() { + if is_newline(c) { + if c == '\r' { + s.eat_if('\n'); + } + newlines += 1; + } + } + newlines +} + /// Whether a string is a valid unicode identifier. /// /// In addition to what is specified in the [Unicode Standard][uax31], we allow: diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index a2bb57662..ae12e818c 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -6,7 +6,7 @@ mod kind; mod lexer; mod node; mod parser; -mod reparse; +mod reparser; mod source; mod span; @@ -14,5 +14,6 @@ pub use self::kind::*; pub use self::lexer::*; pub use self::node::*; pub use self::parser::*; +pub use self::reparser::*; pub use self::source::*; pub use self::span::*; diff --git a/src/syntax/node.rs b/src/syntax/node.rs index 283d55b4c..3465f73fd 100644 --- a/src/syntax/node.rs +++ b/src/syntax/node.rs @@ -12,15 +12,15 @@ use crate::util::EcoString; #[derive(Clone, PartialEq, Hash)] pub struct SyntaxNode(Repr); -/// The two internal representations. +/// The three internal representations. #[derive(Clone, PartialEq, Hash)] enum Repr { /// A leaf node. Leaf(LeafNode), /// A reference-counted inner node. Inner(Arc), - /// An error. - Error(ErrorNode), + /// An error node. + Error(Arc), } impl SyntaxNode { @@ -36,7 +36,7 @@ impl SyntaxNode { /// Create a new error node. pub fn error(message: impl Into, pos: ErrorPos, len: usize) -> Self { - Self(Repr::Error(ErrorNode::new(message, pos, len))) + Self(Repr::Error(Arc::new(ErrorNode::new(message, pos, len)))) } /// The type of the node. @@ -134,17 +134,13 @@ impl SyntaxNode { .collect() } } +} - /// Change the type of the node. - pub(super) fn convert_to(&mut self, kind: SyntaxKind) { - debug_assert!(!kind.is_error()); - match &mut self.0 { - Repr::Leaf(leaf) => leaf.kind = kind, - Repr::Inner(inner) => { - let node = Arc::make_mut(inner); - node.kind = kind; - } - Repr::Error(_) => {} +impl SyntaxNode { + /// Mark this node as erroneous. + pub(super) fn make_erroneous(&mut self) { + if let Repr::Inner(inner) = &mut self.0 { + Arc::make_mut(inner).erroneous = true; } } @@ -159,7 +155,7 @@ impl SyntaxNode { match &mut self.0 { Repr::Leaf(leaf) => leaf.span = span, Repr::Inner(inner) => Arc::make_mut(inner).synthesize(span), - Repr::Error(error) => error.span = span, + Repr::Error(error) => Arc::make_mut(error).span = span, } } @@ -177,7 +173,7 @@ impl SyntaxNode { match &mut self.0 { Repr::Leaf(leaf) => leaf.span = mid, Repr::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within)?, - Repr::Error(error) => error.span = mid, + Repr::Error(error) => Arc::make_mut(error).span = mid, } Ok(()) @@ -245,7 +241,7 @@ impl SyntaxNode { } /// The upper bound of assigned numbers in this subtree. - fn upper(&self) -> u64 { + pub(super) fn upper(&self) -> u64 { match &self.0 { Repr::Inner(inner) => inner.upper, Repr::Leaf(leaf) => leaf.span.number() + 1, @@ -297,7 +293,7 @@ impl LeafNode { impl Debug for LeafNode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {}", self.kind, self.len()) + write!(f, "{:?}: {:?}", self.kind, self.text) } } @@ -588,7 +584,7 @@ impl ErrorNode { impl Debug for ErrorNode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "({}): {}", self.message, self.len) + write!(f, "Error: {} ({})", self.len, self.message) } } @@ -888,7 +884,7 @@ mod tests { let prev = leaf.prev_leaf().unwrap(); let next = leaf.next_leaf().unwrap(); assert_eq!(prev.kind(), SyntaxKind::Eq); - assert_eq!(leaf.kind(), SyntaxKind::Space { newlines: 0 }); + assert_eq!(leaf.kind(), SyntaxKind::Space); assert_eq!(next.kind(), SyntaxKind::Int); } } diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index 1584e59b8..0e1b52b1a 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -1,262 +1,93 @@ use std::collections::HashSet; -use std::fmt::{self, Display, Formatter}; -use std::mem; +use std::ops::Range; -use super::ast::{self, Assoc, BinOp, UnOp}; -use super::{ErrorPos, LexMode, Lexer, SyntaxKind, SyntaxNode}; +use super::{ast, is_newline, ErrorPos, LexMode, Lexer, SyntaxKind, SyntaxNode}; use crate::util::{format_eco, EcoString}; /// Parse a source file. pub fn parse(text: &str) -> SyntaxNode { - let mut p = Parser::new(text, LexMode::Markup); - markup(&mut p, true); + let mut p = Parser::new(text, 0, LexMode::Markup); + markup(&mut p, true, 0, |_| false); p.finish().into_iter().next().unwrap() } -/// Parse code directly, only used for syntax highlighting. +/// Parse code directly. +/// +/// This is only used for syntax highlighting. pub fn parse_code(text: &str) -> SyntaxNode { - let mut p = Parser::new(text, LexMode::Code); - p.perform(SyntaxKind::CodeBlock, code); + let mut p = Parser::new(text, 0, LexMode::Code); + let m = p.marker(); + code(&mut p, |_| false); + p.wrap(m, SyntaxKind::CodeBlock); p.finish().into_iter().next().unwrap() } -/// Reparse a code block. -/// -/// Returns `Some` if all of the input was consumed. -pub(super) fn reparse_code_block( - prefix: &str, - text: &str, - end_pos: usize, -) -> Option<(Vec, bool, usize)> { - let mut p = Parser::with_prefix(prefix, text, LexMode::Code); - if !p.at(SyntaxKind::LeftBrace) { - return None; - } - - code_block(&mut p); - - let (mut node, terminated) = p.consume()?; - let first = node.remove(0); - if first.len() != end_pos { - return None; - } - - Some((vec![first], terminated, 1)) -} - -/// Reparse a content block. -/// -/// Returns `Some` if all of the input was consumed. -pub(super) fn reparse_content_block( - prefix: &str, - text: &str, - end_pos: usize, -) -> Option<(Vec, bool, usize)> { - let mut p = Parser::with_prefix(prefix, text, LexMode::Code); - if !p.at(SyntaxKind::LeftBracket) { - return None; - } - - content_block(&mut p); - - let (mut node, terminated) = p.consume()?; - let first = node.remove(0); - if first.len() != end_pos { - return None; - } - - Some((vec![first], terminated, 1)) -} - -/// Reparse a sequence markup elements without the topmost node. -/// -/// Returns `Some` if all of the input was consumed. -pub(super) fn reparse_markup_elements( - prefix: &str, - text: &str, - end_pos: usize, - differential: isize, - reference: &[SyntaxNode], +fn markup( + p: &mut Parser, mut at_start: bool, min_indent: usize, -) -> Option<(Vec, bool, usize)> { - let mut p = Parser::with_prefix(prefix, text, LexMode::Markup); - - let mut node: Option<&SyntaxNode> = None; - let mut iter = reference.iter(); - let mut offset = differential; - let mut replaced = 0; - let mut stopped = false; - - 'outer: while !p.eof() { - if let Some(SyntaxKind::Space { newlines: (1..) }) = p.peek() { - if p.column(p.current_end()) < min_indent { - return None; + mut stop: impl FnMut(SyntaxKind) -> bool, +) { + let m = p.marker(); + while !p.eof() && !stop(p.current) { + if p.newline() { + at_start = true; + if min_indent > 0 && p.column(p.current_end()) < min_indent { + break; } - } - - markup_node(&mut p, &mut at_start); - - if p.prev_end() <= end_pos { + p.eat(); continue; } - let recent = p.marker().before(&p).unwrap(); - let recent_start = p.prev_end() - recent.len(); - - while offset <= recent_start as isize { - if let Some(node) = node { - // The nodes are equal, at the same position and have the - // same content. The parsing trees have converged again, so - // the reparse may stop here. - if offset == recent_start as isize && node == recent { - replaced -= 1; - stopped = true; - break 'outer; - } - } - - if let Some(node) = node { - offset += node.len() as isize; - } - - node = iter.next(); - if node.is_none() { - break; - } - - replaced += 1; + let prev = p.prev_end(); + markup_expr(p, &mut at_start); + if !p.progress(prev) { + p.unexpected(); } } - - if p.eof() && !stopped { - replaced = reference.len(); - } - - let (mut res, terminated) = p.consume()?; - if stopped { - res.pop().unwrap(); - } - - Some((res, terminated, replaced)) + p.wrap(m, SyntaxKind::Markup); } -/// Parse markup. -/// -/// If `at_start` is true, things like headings that may only appear at the -/// beginning of a line or content block are initially allowed. -fn markup(p: &mut Parser, mut at_start: bool) { - p.perform(SyntaxKind::Markup { min_indent: 0 }, |p| { - while !p.eof() { - markup_node(p, &mut at_start); - } - }); -} - -/// Parse markup that stays right of the given `column`. -fn markup_indented(p: &mut Parser, min_indent: usize) { - p.eat_while(|t| match t { - SyntaxKind::Space { newlines } => newlines == 0, - SyntaxKind::LineComment | SyntaxKind::BlockComment => true, - _ => false, - }); - - let marker = p.marker(); - let mut at_start = false; - - while !p.eof() { - match p.peek() { - Some(SyntaxKind::Space { newlines: (1..) }) - if p.column(p.current_end()) < min_indent => - { - break; - } - _ => {} - } - - markup_node(p, &mut at_start); - } - - marker.end(p, SyntaxKind::Markup { min_indent }); -} - -/// Parse a line of markup that can prematurely end if `f` returns true. -fn markup_line(p: &mut Parser, mut f: F) -where - F: FnMut(SyntaxKind) -> bool, -{ - p.eat_while(|t| match t { - SyntaxKind::Space { newlines } => newlines == 0, - SyntaxKind::LineComment | SyntaxKind::BlockComment => true, - _ => false, - }); - - p.perform(SyntaxKind::Markup { min_indent: usize::MAX }, |p| { - let mut at_start = false; - while let Some(kind) = p.peek() { - if let SyntaxKind::Space { newlines: (1..) } = kind { - break; - } - - if f(kind) { - break; - } - - markup_node(p, &mut at_start); - } - }); -} - -fn markup_node(p: &mut Parser, at_start: &mut bool) { - let Some(token) = p.peek() else { return }; - match token { - // Whitespace. - SyntaxKind::Space { newlines } => { - *at_start |= newlines > 0; +pub(super) fn reparse_markup( + text: &str, + range: Range, + at_start: &mut bool, + mut stop: impl FnMut(SyntaxKind) -> bool, +) -> Option> { + let mut p = Parser::new(&text, range.start, LexMode::Markup); + while !p.eof() && !stop(p.current) && p.current_start() < range.end { + if p.newline() { + *at_start = true; p.eat(); - return; + continue; } - // Comments. - SyntaxKind::LineComment | SyntaxKind::BlockComment => { - p.eat(); - return; + let prev = p.prev_end(); + markup_expr(&mut p, at_start); + if !p.progress(prev) { + p.unexpected(); } + } + (p.balanced && p.current_start() == range.end).then(|| p.finish()) +} - // Text and markup. - SyntaxKind::Text - | SyntaxKind::Linebreak - | SyntaxKind::SmartQuote { .. } - | SyntaxKind::Escape - | SyntaxKind::Shorthand - | SyntaxKind::Symbol - | SyntaxKind::Link - | SyntaxKind::Raw { .. } - | SyntaxKind::Ref => p.eat(), - - // Math. - SyntaxKind::Dollar => math(p), - - // Strong, emph, heading. +fn markup_expr(p: &mut Parser, at_start: &mut bool) { + match p.current() { SyntaxKind::Star => strong(p), SyntaxKind::Underscore => emph(p), - SyntaxKind::Eq => heading(p, *at_start), + SyntaxKind::HeadingMarker if *at_start => heading(p), + SyntaxKind::ListMarker if *at_start => list_item(p), + SyntaxKind::EnumMarker if *at_start => enum_item(p), + SyntaxKind::TermMarker if *at_start => term_item(p), + SyntaxKind::Dollar => equation(p), - // Lists. - SyntaxKind::Minus => list_item(p, *at_start), - SyntaxKind::Plus | SyntaxKind::EnumNumbering => enum_item(p, *at_start), - SyntaxKind::Slash => { - term_item(p, *at_start).ok(); - } - SyntaxKind::Colon => { - let marker = p.marker(); - p.eat(); - marker.convert(p, SyntaxKind::Text); - } + SyntaxKind::HeadingMarker + | SyntaxKind::ListMarker + | SyntaxKind::EnumMarker + | SyntaxKind::TermMarker + | SyntaxKind::Colon => p.convert(SyntaxKind::Text), - // Hashtag + keyword / identifier. SyntaxKind::Ident - | SyntaxKind::Label | SyntaxKind::Let | SyntaxKind::Set | SyntaxKind::Show @@ -267,204 +98,148 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | SyntaxKind::Include | SyntaxKind::Break | SyntaxKind::Continue - | SyntaxKind::Return => embedded_expr(p), + | SyntaxKind::Return + | SyntaxKind::LeftBrace + | SyntaxKind::LeftBracket => embedded_code_expr(p), - // Code and content block. - SyntaxKind::LeftBrace => code_block(p), - SyntaxKind::LeftBracket => content_block(p), + SyntaxKind::Text + | SyntaxKind::Linebreak + | SyntaxKind::Escape + | SyntaxKind::Shorthand + | SyntaxKind::Symbol + | SyntaxKind::SmartQuote + | SyntaxKind::Raw + | SyntaxKind::Link + | SyntaxKind::Label + | SyntaxKind::Ref => p.eat(), - SyntaxKind::Error => p.eat(), - _ => p.unexpected(), - }; + SyntaxKind::Space + | SyntaxKind::Parbreak + | SyntaxKind::LineComment + | SyntaxKind::BlockComment => { + p.eat(); + return; + } + _ => {} + } *at_start = false; } fn strong(p: &mut Parser) { - p.perform(SyntaxKind::Strong, |p| { - p.start_group(Group::Strong); - markup(p, false); - p.end_group(); - }) + let m = p.marker(); + p.expect(SyntaxKind::Star); + markup(p, false, 0, |kind| { + kind == SyntaxKind::Star + || kind == SyntaxKind::Parbreak + || kind == SyntaxKind::RightBracket + }); + p.expect(SyntaxKind::Star); + p.wrap(m, SyntaxKind::Strong); } fn emph(p: &mut Parser) { - p.perform(SyntaxKind::Emph, |p| { - p.start_group(Group::Emph); - markup(p, false); - p.end_group(); - }) -} - -fn heading(p: &mut Parser, at_start: bool) { - let marker = p.marker(); - let mut markers = vec![]; - while p.at(SyntaxKind::Eq) { - markers.push(p.marker()); - p.eat(); - } - - if at_start && p.peek().map_or(true, |kind| kind.is_space()) { - p.eat_while(|kind| kind == SyntaxKind::Space { newlines: 0 }); - markup_line(p, |kind| matches!(kind, SyntaxKind::Label)); - marker.end(p, SyntaxKind::Heading); - } else { - for marker in markers { - marker.convert(p, SyntaxKind::Text); - } - } -} - -fn list_item(p: &mut Parser, at_start: bool) { - let marker = p.marker(); - p.assert(SyntaxKind::Minus); - - let min_indent = p.column(p.prev_end()); - if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() { - markup_indented(p, min_indent); - marker.end(p, SyntaxKind::ListItem); - } else { - marker.convert(p, SyntaxKind::Text); - } -} - -fn enum_item(p: &mut Parser, at_start: bool) { - let marker = p.marker(); - p.eat(); - - let min_indent = p.column(p.prev_end()); - if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() { - markup_indented(p, min_indent); - marker.end(p, SyntaxKind::EnumItem); - } else { - marker.convert(p, SyntaxKind::Text); - } -} - -fn term_item(p: &mut Parser, at_start: bool) -> ParseResult { - let marker = p.marker(); - p.eat(); - - let min_indent = p.column(p.prev_end()); - if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() { - markup_line(p, |node| matches!(node, SyntaxKind::Colon)); - p.expect(SyntaxKind::Colon)?; - markup_indented(p, min_indent); - marker.end(p, SyntaxKind::TermItem); - } else { - marker.convert(p, SyntaxKind::Text); - } - - Ok(()) -} - -fn embedded_expr(p: &mut Parser) { - // Does the expression need termination or can content follow directly? - let stmt = matches!( - p.peek(), - Some( - SyntaxKind::Let - | SyntaxKind::Set - | SyntaxKind::Show - | SyntaxKind::Import - | SyntaxKind::Include - ) - ); - - p.start_group(Group::Expr); - let res = expr_prec(p, true, 0); - if stmt && res.is_ok() && !p.eof() { - p.expected("semicolon or line break"); - } - p.end_group(); -} - -fn math(p: &mut Parser) { - p.perform(SyntaxKind::Math, |p| { - p.start_group(Group::Math); - while !p.eof() { - math_node(p); - } - p.end_group(); + let m = p.marker(); + p.expect(SyntaxKind::Underscore); + markup(p, false, 0, |kind| { + kind == SyntaxKind::Underscore + || kind == SyntaxKind::Parbreak + || kind == SyntaxKind::RightBracket }); + p.expect(SyntaxKind::Underscore); + p.wrap(m, SyntaxKind::Emph); } -fn math_node(p: &mut Parser) { - math_node_prec(p, 0, None) +fn heading(p: &mut Parser) { + let m = p.marker(); + p.expect(SyntaxKind::HeadingMarker); + whitespace(p); + markup(p, false, usize::MAX, |kind| { + kind == SyntaxKind::Label || kind == SyntaxKind::RightBracket + }); + p.wrap(m, SyntaxKind::Heading); } -fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option) { - let marker = p.marker(); - math_primary(p); +fn list_item(p: &mut Parser) { + let m = p.marker(); + p.expect(SyntaxKind::ListMarker); + let min_indent = p.column(p.prev_end()); + whitespace(p); + markup(p, false, min_indent, |kind| kind == SyntaxKind::RightBracket); + p.wrap(m, SyntaxKind::ListItem); +} - loop { - let (kind, mut prec, assoc, stop) = match p.peek() { - v if v == stop => break, - Some(SyntaxKind::Underscore) => { - (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Hat)) - } - Some(SyntaxKind::Hat) => { - (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Underscore)) - } - Some(SyntaxKind::Slash) => (SyntaxKind::Frac, 1, Assoc::Left, None), - _ => break, - }; +fn enum_item(p: &mut Parser) { + let m = p.marker(); + p.expect(SyntaxKind::EnumMarker); + let min_indent = p.column(p.prev_end()); + whitespace(p); + markup(p, false, min_indent, |kind| kind == SyntaxKind::RightBracket); + p.wrap(m, SyntaxKind::EnumItem); +} - if prec < min_prec { - break; - } - - match assoc { - Assoc::Left => prec += 1, - Assoc::Right => {} - } +fn term_item(p: &mut Parser) { + let m = p.marker(); + p.expect(SyntaxKind::TermMarker); + let min_indent = p.column(p.prev_end()); + whitespace(p); + markup(p, false, usize::MAX, |kind| { + kind == SyntaxKind::Colon || kind == SyntaxKind::RightBracket + }); + p.expect(SyntaxKind::Colon); + whitespace(p); + markup(p, false, min_indent, |kind| kind == SyntaxKind::RightBracket); + p.wrap(m, SyntaxKind::TermItem); +} +fn whitespace(p: &mut Parser) { + while p.current().is_trivia() { p.eat(); - math_node_prec(p, prec, stop); - - // Allow up to two different scripts. We do not risk encountering the - // previous script kind again here due to right-associativity. - if p.eat_if(SyntaxKind::Underscore) || p.eat_if(SyntaxKind::Hat) { - math_node_prec(p, prec, None); - } - - marker.end(p, kind); } } -/// Parse a primary math node. -fn math_primary(p: &mut Parser) { - let Some(token) = p.peek() else { return }; - match token { - // Spaces and expressions. - SyntaxKind::Space { .. } - | SyntaxKind::Linebreak - | SyntaxKind::Escape - | SyntaxKind::Str - | SyntaxKind::Shorthand - | SyntaxKind::AlignPoint - | SyntaxKind::Symbol => p.eat(), +fn equation(p: &mut Parser) { + let m = p.marker(); + p.enter(LexMode::Math); + p.expect(SyntaxKind::Dollar); + math(p, |kind| kind == SyntaxKind::Dollar); + p.expect(SyntaxKind::Dollar); + p.exit(); + p.wrap(m, SyntaxKind::Math); +} - // Atoms. - SyntaxKind::Atom => match p.peek_src() { - "(" => math_group(p, Group::MathRow('(', ')')), - "{" => math_group(p, Group::MathRow('{', '}')), - "[" => math_group(p, Group::MathRow('[', ']')), +fn math(p: &mut Parser, mut stop: impl FnMut(SyntaxKind) -> bool) { + while !p.eof() && !stop(p.current()) { + let prev = p.prev_end(); + math_expr(p); + if !p.progress(prev) { + p.unexpected(); + } + } +} + +fn math_expr(p: &mut Parser) { + math_expr_prec(p, 0, SyntaxKind::Eof) +} + +fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) { + let m = p.marker(); + match p.current() { + SyntaxKind::Ident => { + p.eat(); + if p.directly_at(SyntaxKind::Atom) && p.current_text() == "(" { + math_args(p); + p.wrap(m, SyntaxKind::FuncCall); + } + } + + SyntaxKind::Atom => match p.current_text() { + "(" => math_delimited(p, ")"), + "{" => math_delimited(p, "}"), + "[" => math_delimited(p, "]"), _ => p.eat(), }, - // Identifiers and math calls. - SyntaxKind::Ident => { - let marker = p.marker(); - p.eat(); - - // Parenthesis or bracket means this is a function call. - if matches!(p.peek_direct(), Some(SyntaxKind::Atom) if p.peek_src() == "(") { - marker.perform(p, SyntaxKind::FuncCall, math_args); - } - } - - // Hashtag + keyword / identifier. SyntaxKind::Let | SyntaxKind::Set | SyntaxKind::Show @@ -475,55 +250,164 @@ fn math_primary(p: &mut Parser) { | SyntaxKind::Include | SyntaxKind::Break | SyntaxKind::Continue - | SyntaxKind::Return => embedded_expr(p), + | SyntaxKind::Return + | SyntaxKind::LeftBrace + | SyntaxKind::LeftBracket => embedded_code_expr(p), - // Code and content block. - SyntaxKind::LeftBrace => code_block(p), - SyntaxKind::LeftBracket => content_block(p), + SyntaxKind::Linebreak + | SyntaxKind::Escape + | SyntaxKind::Shorthand + | SyntaxKind::Symbol + | SyntaxKind::AlignPoint + | SyntaxKind::Str => p.eat(), - _ => p.unexpected(), + _ => return, + } + + while !p.eof() && !p.at(stop) { + let Some((kind, stop, assoc, mut prec)) = math_op(p.current()) else { + break; + }; + + if prec < min_prec { + break; + } + + match assoc { + ast::Assoc::Left => prec += 1, + ast::Assoc::Right => {} + } + + p.eat(); + math_expr_prec(p, prec, stop); + if p.eat_if(SyntaxKind::Underscore) || p.eat_if(SyntaxKind::Hat) { + math_expr_prec(p, prec, SyntaxKind::Eof); + } + + p.wrap(m, kind); } } -fn math_group(p: &mut Parser, group: Group) { - p.perform(SyntaxKind::Math, |p| { - p.start_group(group); - while !p.eof() { - math_node(p); +fn math_delimited(p: &mut Parser, closing: &str) { + let m = p.marker(); + p.expect(SyntaxKind::Atom); + while !p.eof() + && !p.at(SyntaxKind::Dollar) + && (!p.at(SyntaxKind::Atom) || p.current_text() != closing) + { + let prev = p.prev_end(); + math_expr(p); + if !p.progress(prev) { + p.unexpected(); } - p.end_group(); - }) + } + p.expect(SyntaxKind::Atom); + p.wrap(m, SyntaxKind::Math); } -fn expr(p: &mut Parser) -> ParseResult { - expr_prec(p, false, 0) +fn math_op(kind: SyntaxKind) -> Option<(SyntaxKind, SyntaxKind, ast::Assoc, usize)> { + match kind { + SyntaxKind::Underscore => { + Some((SyntaxKind::Script, SyntaxKind::Hat, ast::Assoc::Right, 2)) + } + SyntaxKind::Hat => { + Some((SyntaxKind::Script, SyntaxKind::Underscore, ast::Assoc::Right, 2)) + } + SyntaxKind::Slash => { + Some((SyntaxKind::Frac, SyntaxKind::Eof, ast::Assoc::Left, 1)) + } + _ => None, + } } -/// Parse an expression with operators having at least the minimum precedence. -/// -/// If `atomic` is true, this does not parse binary operations and arrow -/// functions, which is exactly what we want in a shorthand expression directly -/// in markup. -/// -/// Stops parsing at operations with lower precedence than `min_prec`, -fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { - let marker = p.marker(); - - // Start the unary expression. - match p.peek().and_then(UnOp::from_token) { - Some(op) if !atomic => { - p.eat(); - let prec = op.precedence(); - expr_prec(p, atomic, prec)?; - marker.end(p, SyntaxKind::Unary); +fn math_args(p: &mut Parser) { + p.expect(SyntaxKind::Atom); + let m = p.marker(); + let mut m2 = p.marker(); + while !p.eof() { + match p.current_text() { + ")" => break, + "," => { + p.wrap(m2, SyntaxKind::Math); + p.convert(SyntaxKind::Comma); + m2 = p.marker(); + continue; + } + _ => {} } - _ => primary(p, atomic)?, - }; + + let prev = p.prev_end(); + math_expr(p); + if !p.progress(prev) { + p.unexpected(); + } + } + if m2 != p.marker() { + p.wrap(m2, SyntaxKind::Math); + } + p.wrap(m, SyntaxKind::Args); + p.expect(SyntaxKind::Atom); +} + +fn code(p: &mut Parser, mut stop: impl FnMut(SyntaxKind) -> bool) { + while !p.eof() && !stop(p.current()) { + p.stop_at_newline(true); + let prev = p.prev_end(); + code_expr(p); + if p.progress(prev) + && !p.eof() + && !stop(p.current()) + && !p.eat_if(SyntaxKind::Semicolon) + { + p.expected("semicolon or line break"); + } + p.unstop(); + if !p.progress(prev) && !p.eof() { + p.unexpected(); + } + } +} + +fn code_expr(p: &mut Parser) { + code_expr_prec(p, false, 0) +} + +fn embedded_code_expr(p: &mut Parser) { + let stmt = matches!( + p.current(), + SyntaxKind::Let + | SyntaxKind::Set + | SyntaxKind::Show + | SyntaxKind::Import + | SyntaxKind::Include + ); + + p.stop_at_newline(true); + p.enter(LexMode::Code); + code_expr_prec(p, true, 0); + let semi = p.eat_if(SyntaxKind::Semicolon); + if stmt && !semi && !p.eof() && !p.at(SyntaxKind::RightBracket) { + p.expected("semicolon or line break"); + } + p.exit(); + p.unstop(); +} + +fn code_expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) { + let m = p.marker(); + if let Some(op) = ast::UnOp::from_kind(p.current()) { + p.eat(); + code_expr_prec(p, atomic, op.precedence()); + p.wrap(m, SyntaxKind::Unary); + } else { + code_primary(p, atomic); + } loop { - // Parenthesis or bracket means this is a function call. - if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct() { - marker.perform(p, SyntaxKind::FuncCall, args)?; + if p.directly_at(SyntaxKind::LeftParen) || p.directly_at(SyntaxKind::LeftBracket) + { + args(p); + p.wrap(m, SyntaxKind::FuncCall); continue; } @@ -531,711 +415,571 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { break; } - // Method call or field access. if p.eat_if(SyntaxKind::Dot) { - ident(p)?; - if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct() + p.expect(SyntaxKind::Ident); + if p.directly_at(SyntaxKind::LeftParen) + || p.directly_at(SyntaxKind::LeftBracket) { - marker.perform(p, SyntaxKind::MethodCall, args)?; + args(p); + p.wrap(m, SyntaxKind::MethodCall); } else { - marker.end(p, SyntaxKind::FieldAccess); + p.wrap(m, SyntaxKind::FieldAccess) } continue; } - let op = if p.eat_if(SyntaxKind::Not) { + let binop = if p.eat_if(SyntaxKind::Not) { if p.at(SyntaxKind::In) { - BinOp::NotIn + Some(ast::BinOp::NotIn) } else { p.expected("keyword `in`"); - return Err(ParseError); + break; } } else { - match p.peek().and_then(BinOp::from_token) { - Some(binop) => binop, - None => break, - } + ast::BinOp::from_kind(p.current()) }; - let mut prec = op.precedence(); - if prec < min_prec { - break; - } - - p.eat(); - - match op.assoc() { - Assoc::Left => prec += 1, - Assoc::Right => {} - } - - marker.perform(p, SyntaxKind::Binary, |p| expr_prec(p, atomic, prec))?; - } - - Ok(()) -} - -fn primary(p: &mut Parser, atomic: bool) -> ParseResult { - match p.peek() { - // Literals and few other things. - Some( - SyntaxKind::None - | SyntaxKind::Auto - | SyntaxKind::Int - | SyntaxKind::Float - | SyntaxKind::Bool - | SyntaxKind::Numeric - | SyntaxKind::Str - | SyntaxKind::Label - | SyntaxKind::Raw { .. }, - ) => { - p.eat(); - Ok(()) - } - - // Things that start with an identifier. - Some(SyntaxKind::Ident) => { - let marker = p.marker(); - p.eat(); - - // Arrow means this is a closure's lone parameter. - if !atomic && p.at(SyntaxKind::Arrow) { - marker.end(p, SyntaxKind::Params); - p.assert(SyntaxKind::Arrow); - marker.perform(p, SyntaxKind::Closure, expr) - } else { - Ok(()) + if let Some(op) = binop { + let mut prec = op.precedence(); + if prec < min_prec { + break; } - } - // Structures. - Some(SyntaxKind::LeftParen) => parenthesized(p, atomic), - Some(SyntaxKind::LeftBrace) => Ok(code_block(p)), - Some(SyntaxKind::LeftBracket) => Ok(content_block(p)), - Some(SyntaxKind::Dollar) => Ok(math(p)), + match op.assoc() { + ast::Assoc::Left => prec += 1, + ast::Assoc::Right => {} + } - // Keywords. - Some(SyntaxKind::Let) => let_binding(p), - Some(SyntaxKind::Set) => set_rule(p), - Some(SyntaxKind::Show) => show_rule(p), - Some(SyntaxKind::If) => conditional(p), - Some(SyntaxKind::While) => while_loop(p), - Some(SyntaxKind::For) => for_loop(p), - Some(SyntaxKind::Import) => module_import(p), - Some(SyntaxKind::Include) => module_include(p), - Some(SyntaxKind::Break) => break_stmt(p), - Some(SyntaxKind::Continue) => continue_stmt(p), - Some(SyntaxKind::Return) => return_stmt(p), - - Some(SyntaxKind::Error) => { p.eat(); - Err(ParseError) - } - - // Nothing. - _ => { - p.expected_found("expression"); - Err(ParseError) - } - } -} - -fn ident(p: &mut Parser) -> ParseResult { - match p.peek() { - Some(SyntaxKind::Ident) => { - p.eat(); - Ok(()) - } - _ => { - p.expected_found("identifier"); - Err(ParseError) - } - } -} - -/// Parse something that starts with a parenthesis, which can be either of: -/// - Array literal -/// - Dictionary literal -/// - Parenthesized expression -/// - Parameter list of closure expression -fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult { - let marker = p.marker(); - - p.start_group(Group::Paren); - let colon = p.eat_if(SyntaxKind::Colon); - let kind = collection(p, true).0; - p.end_group(); - - // Leading colon makes this a dictionary. - if colon { - dict(p, marker); - return Ok(()); - } - - // Arrow means this is a closure's parameter list. - if !atomic && p.at(SyntaxKind::Arrow) { - params(p, marker); - p.assert(SyntaxKind::Arrow); - return marker.perform(p, SyntaxKind::Closure, expr); - } - - // Transform into the identified collection. - match kind { - CollectionKind::Group => marker.end(p, SyntaxKind::Parenthesized), - CollectionKind::Positional => array(p, marker), - CollectionKind::Named => dict(p, marker), - } - - Ok(()) -} - -/// The type of a collection. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum CollectionKind { - /// The collection is only one item and has no comma. - Group, - /// The collection starts with a positional item and has multiple items or a - /// trailing comma. - Positional, - /// The collection starts with a colon or named item. - Named, -} - -/// Parse a collection. -/// -/// Returns the length of the collection and whether the literal contained any -/// commas. -fn collection(p: &mut Parser, keyed: bool) -> (CollectionKind, usize) { - let mut collection_kind = None; - let mut items = 0; - let mut can_group = true; - let mut missing_coma: Option = None; - - while !p.eof() { - let Ok(item_kind) = item(p, keyed) else { - p.eat_if(SyntaxKind::Comma); - collection_kind = Some(CollectionKind::Group); + code_expr_prec(p, false, prec); + p.wrap(m, SyntaxKind::Binary); continue; - }; - - match item_kind { - SyntaxKind::Spread => can_group = false, - SyntaxKind::Named if collection_kind.is_none() => { - collection_kind = Some(CollectionKind::Named); - can_group = false; - } - _ if collection_kind.is_none() => { - collection_kind = Some(CollectionKind::Positional); - } - _ => {} } - items += 1; - - if let Some(marker) = missing_coma.take() { - p.expected_at(marker, "comma"); - } - - if p.eof() { - break; - } - - if p.eat_if(SyntaxKind::Comma) { - can_group = false; - } else { - missing_coma = Some(p.trivia_start()); - } - } - - let kind = if can_group && items == 1 { - CollectionKind::Group - } else { - collection_kind.unwrap_or(CollectionKind::Positional) - }; - - (kind, items) -} - -fn item(p: &mut Parser, keyed: bool) -> ParseResult { - let marker = p.marker(); - if p.eat_if(SyntaxKind::Dots) { - marker.perform(p, SyntaxKind::Spread, expr)?; - return Ok(SyntaxKind::Spread); - } - - expr(p)?; - - if p.at(SyntaxKind::Colon) { - match marker.after(p).map(|c| c.kind()) { - Some(SyntaxKind::Ident) => { - p.eat(); - marker.perform(p, SyntaxKind::Named, expr)?; - } - Some(SyntaxKind::Str) if keyed => { - p.eat(); - marker.perform(p, SyntaxKind::Keyed, expr)?; - } - kind => { - let mut msg = EcoString::from("expected identifier"); - if keyed { - msg.push_str(" or string"); - } - if let Some(kind) = kind { - msg.push_str(", found "); - msg.push_str(kind.name()); - } - marker.to_error(p, msg); - p.eat(); - marker.perform(p, SyntaxKind::Named, expr).ok(); - return Err(ParseError); - } - } - - Ok(SyntaxKind::Named) - } else { - Ok(SyntaxKind::None) + break; } } -fn array(p: &mut Parser, marker: Marker) { - marker.filter_children(p, |x| match x.kind() { - SyntaxKind::Named | SyntaxKind::Keyed => Err("expected expression"), - _ => Ok(()), - }); - marker.end(p, SyntaxKind::Array); -} - -fn dict(p: &mut Parser, marker: Marker) { - let mut used = HashSet::new(); - marker.filter_children(p, |x| match x.kind() { - kind if kind.is_paren() => Ok(()), - SyntaxKind::Named | SyntaxKind::Keyed => { - if let Some(child) = x.children().next() { - let key = match child.cast::() { - Some(str) => str.get(), - None => child.text().clone(), - }; - - if !used.insert(key) { - return Err("pair has duplicate key"); - } +fn code_primary(p: &mut Parser, atomic: bool) { + let m = p.marker(); + match p.current() { + SyntaxKind::Ident => { + p.eat(); + if !atomic && p.at(SyntaxKind::Arrow) { + p.wrap(m, SyntaxKind::Params); + p.expect(SyntaxKind::Arrow); + code_expr(p); + p.wrap(m, SyntaxKind::Closure); } - Ok(()) } - SyntaxKind::Spread | SyntaxKind::Comma | SyntaxKind::Colon => Ok(()), - _ => Err("expected named or keyed pair"), - }); - marker.end(p, SyntaxKind::Dict); + + SyntaxKind::LeftBrace => code_block(p), + SyntaxKind::LeftBracket => content_block(p), + SyntaxKind::LeftParen => with_paren(p), + SyntaxKind::Dollar => equation(p), + SyntaxKind::Let => let_binding(p), + SyntaxKind::Set => set_rule(p), + SyntaxKind::Show => show_rule(p), + SyntaxKind::If => conditional(p), + SyntaxKind::While => while_loop(p), + SyntaxKind::For => for_loop(p), + SyntaxKind::Import => module_import(p), + SyntaxKind::Include => module_include(p), + SyntaxKind::Break => break_stmt(p), + SyntaxKind::Continue => continue_stmt(p), + SyntaxKind::Return => return_stmt(p), + + SyntaxKind::None + | SyntaxKind::Auto + | SyntaxKind::Int + | SyntaxKind::Float + | SyntaxKind::Bool + | SyntaxKind::Numeric + | SyntaxKind::Str + | SyntaxKind::Label + | SyntaxKind::Raw => p.eat(), + + _ => p.expected("expression"), + } } -fn params(p: &mut Parser, marker: Marker) { - marker.filter_children(p, |x| match x.kind() { - kind if kind.is_paren() => Ok(()), - SyntaxKind::Named | SyntaxKind::Ident | SyntaxKind::Comma => Ok(()), - SyntaxKind::Spread - if matches!( - x.children().last().map(|child| child.kind()), - Some(SyntaxKind::Ident) - ) => - { - Ok(()) - } - _ => Err("expected identifier, named pair or argument sink"), - }); - marker.end(p, SyntaxKind::Params); +fn block(p: &mut Parser) { + match p.current() { + SyntaxKind::LeftBracket => content_block(p), + SyntaxKind::LeftBrace => code_block(p), + _ => p.expected("block"), + } +} + +pub(super) fn reparse_block(text: &str, range: Range) -> Option { + let mut p = Parser::new(&text, range.start, LexMode::Code); + assert!(p.at(SyntaxKind::LeftBracket) || p.at(SyntaxKind::LeftBrace)); + block(&mut p); + (p.balanced && p.prev_end() == range.end) + .then(|| p.finish().into_iter().next().unwrap()) } -/// Parse a code block: `{...}`. fn code_block(p: &mut Parser) { - p.perform(SyntaxKind::CodeBlock, |p| { - p.start_group(Group::Brace); - code(p); - p.end_group(); - }); -} - -fn code(p: &mut Parser) { - while !p.eof() { - p.start_group(Group::Expr); - if expr(p).is_ok() && !p.eof() { - p.expected("semicolon or line break"); - } - p.end_group(); - - // Forcefully skip over newlines since the group's contents can't. - p.eat_while(SyntaxKind::is_space); - } + let m = p.marker(); + p.enter(LexMode::Code); + p.stop_at_newline(false); + p.expect(SyntaxKind::LeftBrace); + code(p, |kind| kind == SyntaxKind::RightBrace); + p.expect(SyntaxKind::RightBrace); + p.exit(); + p.unstop(); + p.wrap(m, SyntaxKind::CodeBlock); } fn content_block(p: &mut Parser) { - p.perform(SyntaxKind::ContentBlock, |p| { - p.start_group(Group::Bracket); - markup(p, true); - p.end_group(); - }); + let m = p.marker(); + p.enter(LexMode::Markup); + p.expect(SyntaxKind::LeftBracket); + markup(p, true, 0, |kind| kind == SyntaxKind::RightBracket); + p.expect(SyntaxKind::RightBracket); + p.exit(); + p.wrap(m, SyntaxKind::ContentBlock); } -fn args(p: &mut Parser) -> ParseResult { - match p.peek_direct() { - Some(SyntaxKind::LeftParen) => {} - Some(SyntaxKind::LeftBracket) => {} - _ => { - p.expected_found("argument list"); - return Err(ParseError); +fn with_paren(p: &mut Parser) { + let m = p.marker(); + let mut kind = collection(p, true); + if p.at(SyntaxKind::Arrow) { + validate_params(p, m); + p.wrap(m, SyntaxKind::Params); + p.expect(SyntaxKind::Arrow); + code_expr(p); + kind = SyntaxKind::Closure; + } + match kind { + SyntaxKind::Array => validate_array(p, m), + SyntaxKind::Dict => validate_dict(p, m), + _ => {} + } + p.wrap(m, kind); +} + +fn collection(p: &mut Parser, keyed: bool) -> SyntaxKind { + p.stop_at_newline(false); + p.expect(SyntaxKind::LeftParen); + + let mut count = 0; + let mut parenthesized = true; + let mut kind = None; + if keyed && p.eat_if(SyntaxKind::Colon) { + kind = Some(SyntaxKind::Dict); + parenthesized = false; + } + + while !p.current().is_terminator() { + let prev = p.prev_end(); + match item(p, keyed) { + SyntaxKind::Spread => parenthesized = false, + SyntaxKind::Named | SyntaxKind::Keyed if kind.is_none() => { + kind = Some(SyntaxKind::Dict); + parenthesized = false; + } + _ if kind.is_none() => kind = Some(SyntaxKind::Array), + _ => {} + } + + if !p.progress(prev) { + p.unexpected(); + continue; + } + + count += 1; + + if p.current().is_terminator() { + break; + } + + if p.expect(SyntaxKind::Comma) { + parenthesized = false; } } - p.perform(SyntaxKind::Args, |p| { - if p.at(SyntaxKind::LeftParen) { - let marker = p.marker(); - p.start_group(Group::Paren); - collection(p, false); - p.end_group(); + p.expect(SyntaxKind::RightParen); + p.unstop(); - let mut used = HashSet::new(); - marker.filter_children(p, |x| match x.kind() { - SyntaxKind::Named => { - if let Some(ident) = - x.children().next().and_then(|child| child.cast::()) - { - if !used.insert(ident.take()) { - return Err("duplicate argument"); - } - } - Ok(()) + if parenthesized && count == 1 { + SyntaxKind::Parenthesized + } else { + kind.unwrap_or(SyntaxKind::Array) + } +} + +fn item(p: &mut Parser, keyed: bool) -> SyntaxKind { + let m = p.marker(); + + if p.eat_if(SyntaxKind::Dots) { + code_expr(p); + p.wrap(m, SyntaxKind::Spread); + return SyntaxKind::Spread; + } + + code_expr(p); + + if !p.eat_if(SyntaxKind::Colon) { + return SyntaxKind::Int; + } + + code_expr(p); + + let kind = match p.node(m).map(SyntaxNode::kind) { + Some(SyntaxKind::Ident) => SyntaxKind::Named, + Some(SyntaxKind::Str) if keyed => SyntaxKind::Keyed, + _ => { + for child in p.post_process(m).next() { + if child.kind() == SyntaxKind::Colon { + break; } - _ => Ok(()), - }); - } - while p.peek_direct() == Some(SyntaxKind::LeftBracket) { - content_block(p); - } - }); - - Ok(()) -} - -fn math_args(p: &mut Parser) { - p.start_group(Group::MathRow('(', ')')); - p.perform(SyntaxKind::Args, |p| { - let mut marker = p.marker(); - while !p.eof() { - if matches!(p.peek(), Some(SyntaxKind::Atom) if p.peek_src() == ",") { - marker.end(p, SyntaxKind::Math); - let comma = p.marker(); - p.eat(); - comma.convert(p, SyntaxKind::Comma); - marker = p.marker(); - } else { - math_node(p); + let mut message = EcoString::from("expected identifier"); + if keyed { + message.push_str(" or string"); + } + message.push_str(", found "); + message.push_str(child.kind().name()); + child.convert_to_error(message); } + SyntaxKind::Named } - if marker != p.marker() { - marker.end(p, SyntaxKind::Math); - } - }); - p.end_group(); + }; + + p.wrap(m, kind); + kind } -fn let_binding(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::LetBinding, |p| { - p.assert(SyntaxKind::Let); +fn args(p: &mut Parser) { + if !p.at(SyntaxKind::LeftParen) && !p.at(SyntaxKind::LeftBracket) { + p.expected("argument list"); + } - let marker = p.marker(); - ident(p)?; + let m = p.marker(); + if p.at(SyntaxKind::LeftParen) { + collection(p, false); + validate_args(p, m); + } - // If a parenthesis follows, this is a function definition. - let has_params = p.peek_direct() == Some(SyntaxKind::LeftParen); - if has_params { - let marker = p.marker(); - p.start_group(Group::Paren); - collection(p, false); - p.end_group(); - params(p, marker); - } + while p.directly_at(SyntaxKind::LeftBracket) { + content_block(p); + } - if p.eat_if(SyntaxKind::Eq) { - expr(p)?; - } else if has_params { - // Function definitions must have a body. - p.expected("body"); - } - - // Rewrite into a closure expression if it's a function definition. - if has_params { - marker.end(p, SyntaxKind::Closure); - } - - Ok(()) - }) + p.wrap(m, SyntaxKind::Args); } -fn set_rule(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::SetRule, |p| { - p.assert(SyntaxKind::Set); - ident(p)?; - args(p)?; - if p.eat_if(SyntaxKind::If) { - expr(p)?; +fn let_binding(p: &mut Parser) { + let m = p.marker(); + p.assert(SyntaxKind::Let); + + let m2 = p.marker(); + p.expect(SyntaxKind::Ident); + + let closure = p.directly_at(SyntaxKind::LeftParen); + if closure { + let m3 = p.marker(); + collection(p, false); + validate_params(p, m3); + p.wrap(m3, SyntaxKind::Params); + } + + let f = if closure { Parser::expect } else { Parser::eat_if }; + if f(p, SyntaxKind::Eq) { + code_expr(p); + } + + if closure { + p.wrap(m2, SyntaxKind::Closure); + } + + p.wrap(m, SyntaxKind::LetBinding); +} + +fn set_rule(p: &mut Parser) { + let m = p.marker(); + p.assert(SyntaxKind::Set); + p.expect(SyntaxKind::Ident); + args(p); + if p.eat_if(SyntaxKind::If) { + code_expr(p); + } + p.wrap(m, SyntaxKind::SetRule); +} + +fn show_rule(p: &mut Parser) { + let m = p.marker(); + p.assert(SyntaxKind::Show); + code_expr(p); + if p.eat_if(SyntaxKind::Colon) { + code_expr(p); + } + p.wrap(m, SyntaxKind::ShowRule); +} + +fn conditional(p: &mut Parser) { + let m = p.marker(); + p.assert(SyntaxKind::If); + code_expr(p); + block(p); + if p.eat_if(SyntaxKind::Else) { + if p.at(SyntaxKind::If) { + conditional(p); + } else { + block(p); } - Ok(()) - }) + } + p.wrap(m, SyntaxKind::Conditional); } -fn show_rule(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::ShowRule, |p| { - p.assert(SyntaxKind::Show); - expr(p)?; - if p.eat_if(SyntaxKind::Colon) { - expr(p)?; - } - Ok(()) - }) +fn while_loop(p: &mut Parser) { + let m = p.marker(); + p.assert(SyntaxKind::While); + code_expr(p); + block(p); + p.wrap(m, SyntaxKind::WhileLoop); } -fn conditional(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::Conditional, |p| { - p.assert(SyntaxKind::If); - - expr(p)?; - body(p)?; - - if p.eat_if(SyntaxKind::Else) { - if p.at(SyntaxKind::If) { - conditional(p)?; - } else { - body(p)?; - } - } - - Ok(()) - }) +fn for_loop(p: &mut Parser) { + let m = p.marker(); + p.assert(SyntaxKind::For); + for_pattern(p); + p.expect(SyntaxKind::In); + code_expr(p); + block(p); + p.wrap(m, SyntaxKind::ForLoop); } -fn while_loop(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::WhileLoop, |p| { - p.assert(SyntaxKind::While); - expr(p)?; - body(p) - }) -} - -fn for_loop(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::ForLoop, |p| { - p.assert(SyntaxKind::For); - for_pattern(p)?; - p.expect(SyntaxKind::In)?; - expr(p)?; - body(p) - }) -} - -fn for_pattern(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::ForPattern, |p| { - ident(p)?; +fn for_pattern(p: &mut Parser) { + let m = p.marker(); + if p.expect(SyntaxKind::Ident) { if p.eat_if(SyntaxKind::Comma) { - ident(p)?; + p.expect(SyntaxKind::Ident); } - Ok(()) - }) + p.wrap(m, SyntaxKind::ForPattern); + } } -fn module_import(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::ModuleImport, |p| { - p.assert(SyntaxKind::Import); - expr(p)?; +fn module_import(p: &mut Parser) { + let m = p.marker(); + p.assert(SyntaxKind::Import); + code_expr(p); + if p.eat_if(SyntaxKind::Colon) && !p.eat_if(SyntaxKind::Star) { + import_items(p); + } + p.wrap(m, SyntaxKind::ModuleImport); +} - if !p.eat_if(SyntaxKind::Colon) || p.eat_if(SyntaxKind::Star) { - return Ok(()); +fn import_items(p: &mut Parser) { + let m = p.marker(); + while !p.eof() && !p.at(SyntaxKind::Semicolon) { + if !p.eat_if(SyntaxKind::Ident) { + p.unexpected(); } - - // This is the list of identifiers scenario. - p.perform(SyntaxKind::ImportItems, |p| { - let marker = p.marker(); - let items = collection(p, false).1; - if items == 0 { - p.expected("import items"); - } - marker.filter_children(p, |n| match n.kind() { - SyntaxKind::Ident | SyntaxKind::Comma => Ok(()), - _ => Err("expected identifier"), - }); - }); - - Ok(()) - }) -} - -fn module_include(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::ModuleInclude, |p| { - p.assert(SyntaxKind::Include); - expr(p) - }) -} - -fn break_stmt(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::LoopBreak, |p| { - p.assert(SyntaxKind::Break); - Ok(()) - }) -} - -fn continue_stmt(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::LoopContinue, |p| { - p.assert(SyntaxKind::Continue); - Ok(()) - }) -} - -fn return_stmt(p: &mut Parser) -> ParseResult { - p.perform(SyntaxKind::FuncReturn, |p| { - p.assert(SyntaxKind::Return); - if !p.at(SyntaxKind::Comma) && !p.eof() { - expr(p)?; + if p.current().is_terminator() { + break; } - Ok(()) - }) + p.expect(SyntaxKind::Comma); + } + p.wrap(m, SyntaxKind::ImportItems); } -fn body(p: &mut Parser) -> ParseResult { - match p.peek() { - Some(SyntaxKind::LeftBracket) => Ok(content_block(p)), - Some(SyntaxKind::LeftBrace) => Ok(code_block(p)), - _ => { - p.expected("body"); - Err(ParseError) +fn module_include(p: &mut Parser) { + let m = p.marker(); + p.assert(SyntaxKind::Include); + code_expr(p); + p.wrap(m, SyntaxKind::ModuleInclude); +} + +fn break_stmt(p: &mut Parser) { + let m = p.marker(); + p.assert(SyntaxKind::Break); + p.wrap(m, SyntaxKind::LoopBreak); +} + +fn continue_stmt(p: &mut Parser) { + let m = p.marker(); + p.assert(SyntaxKind::Continue); + p.wrap(m, SyntaxKind::LoopContinue); +} + +fn return_stmt(p: &mut Parser) { + let m = p.marker(); + p.assert(SyntaxKind::Return); + if !p.current().is_terminator() && !p.at(SyntaxKind::Comma) { + code_expr(p); + } + p.wrap(m, SyntaxKind::FuncReturn); +} + +fn validate_array(p: &mut Parser, m: Marker) { + for child in p.post_process(m) { + let kind = child.kind(); + if kind == SyntaxKind::Named || kind == SyntaxKind::Keyed { + child.convert_to_error(format_eco!( + "expected expression, found {}", + kind.name() + )); } } } -/// A convenient token-based parser. -struct Parser<'s> { - /// An iterator over the source tokens. - lexer: Lexer<'s>, - /// Whether we are at the end of the file or of a group. - eof: bool, - /// The current token. - current: Option, - /// The end byte index of the last non-trivia token. - prev_end: usize, - /// The start byte index of the peeked token. - current_start: usize, - /// The stack of open groups. - groups: Vec, - /// The children of the currently built node. - children: Vec, - /// Whether the last group was not correctly terminated. - unterminated_group: bool, - /// Whether a group terminator was found that did not close a group. - stray_terminator: bool, +fn validate_dict(p: &mut Parser, m: Marker) { + let mut used = HashSet::new(); + for child in p.post_process(m) { + match child.kind() { + SyntaxKind::Named | SyntaxKind::Keyed => { + let Some(first) = child.children_mut().first_mut() else { continue }; + let key = match first.cast::() { + Some(str) => str.get(), + None => first.text().clone(), + }; + + if !used.insert(key) { + first.convert_to_error("duplicate key"); + child.make_erroneous(); + } + } + SyntaxKind::Spread => {} + SyntaxKind::LeftParen + | SyntaxKind::RightParen + | SyntaxKind::Comma + | SyntaxKind::Colon => {} + kind => { + child.convert_to_error(format_eco!( + "expected named or keyed pair, found {}", + kind.name() + )); + } + } + } } +fn validate_params(p: &mut Parser, m: Marker) { + let mut used = HashSet::new(); + for child in p.post_process(m) { + match child.kind() { + SyntaxKind::Ident => { + if !used.insert(child.text().clone()) { + child.convert_to_error("duplicate parameter"); + } + } + SyntaxKind::Named => { + let Some(within) = child.children_mut().first_mut() else { return }; + if !used.insert(within.text().clone()) { + within.convert_to_error("duplicate parameter"); + child.make_erroneous(); + } + } + SyntaxKind::Spread => { + let Some(within) = child.children_mut().last_mut() else { continue }; + if within.kind() != SyntaxKind::Ident { + within.convert_to_error(format_eco!( + "expected identifier, found {}", + within.kind().name(), + )); + child.make_erroneous(); + } + } + SyntaxKind::LeftParen | SyntaxKind::RightParen | SyntaxKind::Comma => {} + kind => { + child.convert_to_error(format_eco!( + "expected identifier, named pair or argument sink, found {}", + kind.name() + )); + } + } + } +} + +fn validate_args(p: &mut Parser, m: Marker) { + let mut used = HashSet::new(); + for child in p.post_process(m) { + if child.kind() == SyntaxKind::Named { + let Some(within) = child.children_mut().first_mut() else { return }; + if !used.insert(within.text().clone()) { + within.convert_to_error("duplicate argument"); + child.make_erroneous(); + } + } + } +} + +/// Manages parsing of a stream of tokens. +struct Parser<'s> { + text: &'s str, + lexer: Lexer<'s>, + prev_end: usize, + current_start: usize, + current: SyntaxKind, + modes: Vec, + nodes: Vec, + stop_at_newline: Vec, + balanced: bool, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +struct Marker(usize); + impl<'s> Parser<'s> { - /// Create a new parser for the source string. - fn new(text: &'s str, mode: LexMode) -> Self { - Self::with_prefix("", text, mode) - } - - /// Create a new parser for the source string that is prefixed by some text - /// that does not need to be parsed but taken into account for column - /// calculation. - fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self { - let mut lexer = Lexer::with_prefix(prefix, text, mode); + fn new(text: &'s str, offset: usize, mode: LexMode) -> Self { + let mut lexer = Lexer::new(text, mode); + lexer.jump(offset); let current = lexer.next(); Self { lexer, - eof: current.is_none(), + text, + prev_end: offset, + current_start: offset, current, - prev_end: 0, - current_start: 0, - groups: vec![], - children: vec![], - unterminated_group: false, - stray_terminator: false, + modes: vec![], + nodes: vec![], + stop_at_newline: vec![], + balanced: true, } } - /// End the parsing process and return the parsed children. fn finish(self) -> Vec { - self.children + self.nodes } - /// End the parsing process and return - /// - the parsed children and whether the last token was terminated, if all - /// groups were terminated correctly, or - /// - `None` otherwise. - fn consume(self) -> Option<(Vec, bool)> { - self.terminated().then(|| (self.children, self.lexer.terminated())) + fn prev_end(&self) -> usize { + self.prev_end } - /// Create a new marker. - fn marker(&mut self) -> Marker { - Marker(self.children.len()) + fn current(&self) -> SyntaxKind { + self.current } - /// Create a marker right before the trailing trivia. - fn trivia_start(&self) -> Marker { - let count = self - .children - .iter() - .rev() - .take_while(|node| self.is_trivia(node.kind())) - .count(); - Marker(self.children.len() - count) + fn current_start(&self) -> usize { + self.current_start } - /// Perform a subparse that wraps its result in a node with the given kind. - fn perform(&mut self, kind: SyntaxKind, f: F) -> T - where - F: FnOnce(&mut Self) -> T, - { - let prev = mem::take(&mut self.children); - let output = f(self); - let until = self.trivia_start(); - let mut children = mem::replace(&mut self.children, prev); - - if self.lexer.mode() == LexMode::Markup { - self.children.push(SyntaxNode::inner(kind, children)); - } else { - // Trailing trivia should not be wrapped into the new node. - let idx = self.children.len(); - self.children.push(SyntaxNode::default()); - self.children.extend(children.drain(until.0..)); - self.children[idx] = SyntaxNode::inner(kind, children); - } - - output + fn current_end(&self) -> usize { + self.lexer.cursor() + } + + fn current_text(&self) -> &'s str { + &self.text[self.current_start..self.current_end()] + } + + fn at(&self, kind: SyntaxKind) -> bool { + self.current == kind + } + + fn assert(&mut self, kind: SyntaxKind) { + assert_eq!(self.current, kind); + self.eat(); } - /// Whether the end of the source string or group is reached. fn eof(&self) -> bool { - self.eof + self.at(SyntaxKind::Eof) } - /// Consume the current token and also trailing trivia. - fn eat(&mut self) { - self.stray_terminator |= match self.current { - Some(SyntaxKind::RightParen) => !self.inside(Group::Paren), - Some(SyntaxKind::RightBracket) => !self.inside(Group::Bracket), - Some(SyntaxKind::RightBrace) => !self.inside(Group::Brace), - _ => false, - }; - - self.prev_end = self.lexer.cursor(); - self.bump(); - - if self.lexer.mode() != LexMode::Markup { - // Skip whitespace and comments. - while self.current.map_or(false, |kind| self.is_trivia(kind)) { - self.bump(); - } - } - - self.repeek(); + fn directly_at(&self, kind: SyntaxKind) -> bool { + self.current == kind && self.prev_end == self.current_start } - /// Consume the current token if it is the given one. fn eat_if(&mut self, kind: SyntaxKind) -> bool { let at = self.at(kind); if at { @@ -1244,437 +988,169 @@ impl<'s> Parser<'s> { at } - /// Eat tokens while the condition is true. - fn eat_while(&mut self, mut f: F) - where - F: FnMut(SyntaxKind) -> bool, - { - while self.peek().map_or(false, |t| f(t)) { - self.eat(); - } - } - - /// Consume the current token if it is the given one and produce an error if - /// not. - fn expect(&mut self, kind: SyntaxKind) -> ParseResult { - let at = self.peek() == Some(kind); - if at { - self.eat(); - Ok(()) - } else { - self.expected(kind.name()); - Err(ParseError) - } - } - - /// Consume the current token, debug-asserting that it is the given one. - #[track_caller] - fn assert(&mut self, kind: SyntaxKind) { - debug_assert_eq!(self.peek(), Some(kind)); + fn convert(&mut self, kind: SyntaxKind) { + self.current = kind; self.eat(); } - /// Whether the current token is of the given type. - fn at(&self, kind: SyntaxKind) -> bool { - self.peek() == Some(kind) + fn newline(&mut self) -> bool { + self.lexer.newline() } - /// Peek at the current token without consuming it. - fn peek(&self) -> Option { - if self.eof { - None - } else { - self.current + fn column(&self, at: usize) -> usize { + self.text[..at].chars().rev().take_while(|&c| !is_newline(c)).count() + } + + fn marker(&self) -> Marker { + Marker(self.nodes.len()) + } + + fn node(&self, m: Marker) -> Option<&SyntaxNode> { + self.nodes.get(m.0) + } + + fn post_process(&mut self, m: Marker) -> impl Iterator { + self.nodes[m.0..] + .iter_mut() + .filter(|child| !child.kind().is_error() && !child.kind().is_trivia()) + } + + fn wrap(&mut self, m: Marker, kind: SyntaxKind) { + self.unskip(); + let from = m.0.min(self.nodes.len()); + let children = self.nodes.drain(from..).collect(); + self.nodes.push(SyntaxNode::inner(kind, children)); + self.skip(); + } + + fn progress(&self, offset: usize) -> bool { + offset < self.prev_end + } + + fn enter(&mut self, mode: LexMode) { + self.modes.push(self.lexer.mode()); + self.lexer.set_mode(mode); + } + + fn exit(&mut self) { + let mode = self.modes.pop().unwrap(); + if mode != self.lexer.mode() { + self.unskip(); + self.lexer.set_mode(mode); + self.lexer.jump(self.current_start); + self.lex(); + self.skip(); } } - /// Peek at the current token, but only if it follows immediately after the - /// last one without any trivia in between. - fn peek_direct(&self) -> Option { - if self.prev_end() == self.current_start() { - self.peek() - } else { - None - } + fn stop_at_newline(&mut self, stop: bool) { + self.stop_at_newline.push(stop); } - /// The byte index at which the last non-trivia token ended. - fn prev_end(&self) -> usize { - self.prev_end + fn unstop(&mut self) { + self.unskip(); + self.stop_at_newline.pop(); + self.lexer.jump(self.prev_end); + self.lex(); + self.skip(); } - /// The byte index at which the current token starts. - fn current_start(&self) -> usize { - self.current_start + fn eat(&mut self) { + self.save(); + self.lex(); + self.skip(); } - /// The byte index at which the current token ends. - fn current_end(&self) -> usize { - self.lexer.cursor() - } - - /// The byte length of the current token. - fn current_len(&self) -> usize { - self.current_end() - self.current_start() - } - - /// The text of the current node. - fn peek_src(&self) -> &str { - self.lexer.scanner().from(self.current_start) - } - - /// Determine the column index for the given byte index. - fn column(&self, index: usize) -> usize { - self.lexer.column(index) - } - - /// Continue parsing in a group. - /// - /// When the end delimiter of the group is reached, all subsequent calls to - /// `peek()` return `None`. Parsing can only continue with a matching call - /// to `end_group`. - /// - /// This panics if the current token does not start the given group. - #[track_caller] - fn start_group(&mut self, kind: Group) { - self.groups.push(GroupEntry { kind, prev_mode: self.lexer.mode() }); - self.lexer.set_mode(match kind { - Group::Bracket | Group::Strong | Group::Emph => LexMode::Markup, - Group::Math | Group::MathRow(_, _) => LexMode::Math, - Group::Brace | Group::Paren | Group::Expr => LexMode::Code, - }); - - match kind { - Group::Brace => self.assert(SyntaxKind::LeftBrace), - Group::Bracket => self.assert(SyntaxKind::LeftBracket), - Group::Paren => self.assert(SyntaxKind::LeftParen), - Group::Strong => self.assert(SyntaxKind::Star), - Group::Emph => self.assert(SyntaxKind::Underscore), - Group::Math => self.assert(SyntaxKind::Dollar), - Group::MathRow(..) => self.assert(SyntaxKind::Atom), - Group::Expr => self.repeek(), - } - } - - /// End the parsing of a group. - /// - /// This panics if no group was started. - #[track_caller] - fn end_group(&mut self) { - let group_mode = self.lexer.mode(); - let group = self.groups.pop().expect("no started group"); - self.lexer.set_mode(group.prev_mode); - - let mut rescan = self.lexer.mode() != group_mode; - - // Eat the end delimiter if there is one. - if let Some((end, required)) = match group.kind { - Group::Brace => Some((SyntaxKind::RightBrace, true)), - Group::Bracket => Some((SyntaxKind::RightBracket, true)), - Group::Paren => Some((SyntaxKind::RightParen, true)), - Group::Strong => Some((SyntaxKind::Star, true)), - Group::Emph => Some((SyntaxKind::Underscore, true)), - Group::Math => Some((SyntaxKind::Dollar, true)), - Group::MathRow(..) => Some((SyntaxKind::Atom, true)), - Group::Expr => Some((SyntaxKind::Semicolon, false)), - } { - if self.current.as_ref() == Some(&end) { - // If another group closes after a group with the missing - // terminator, its scope of influence ends here and no longer - // taints the rest of the reparse. - self.unterminated_group = false; - - // Bump the delimeter and return. No need to rescan in this - // case. Also, we know that the delimiter is not stray even - // though we already removed the group. - let s = self.stray_terminator; - self.eat(); - self.stray_terminator = s; - rescan = false; - } else if required { - self.expected(end.name()); - self.unterminated_group = true; + fn skip(&mut self) { + if self.lexer.mode() != LexMode::Markup { + while self.current.is_trivia() { + self.save(); + self.lex(); } } + } - // Rescan the peeked token if the mode changed. - if rescan { - let mut target = self.prev_end(); - if group_mode != LexMode::Markup { - let start = self.trivia_start().0; - target = self.current_start - - self.children[start..].iter().map(SyntaxNode::len).sum::(); - self.children.truncate(start); + fn unskip(&mut self) { + if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start { + while self.nodes.last().map_or(false, |last| last.kind().is_trivia()) { + self.nodes.pop(); } - self.lexer.jump(target); - self.prev_end = self.lexer.cursor(); - self.current_start = self.lexer.cursor(); - self.current = self.lexer.next(); + self.lexer.jump(self.prev_end); + self.lex(); } - - self.repeek(); } - /// Checks if all groups were correctly terminated. - fn terminated(&self) -> bool { - self.groups.is_empty() && !self.unterminated_group && !self.stray_terminator - } - - /// Low-level bump that consumes exactly one token without special trivia - /// handling. - fn bump(&mut self) { - if let Some((message, pos)) = self.lexer.last_error() { - let len = self.current_len(); - self.children.push(SyntaxNode::error(message, pos, len)) + fn save(&mut self) { + if self.at(SyntaxKind::Error) { + let (message, pos) = self.lexer.take_error().unwrap(); + let len = self.current_end() - self.current_start; + self.nodes.push(SyntaxNode::error(message, pos, len)); } else { - let kind = self.current.unwrap(); - let text = self.peek_src(); - self.children.push(SyntaxNode::leaf(kind, text)); + let text = self.current_text(); + self.nodes.push(SyntaxNode::leaf(self.current, text)); } + + if self.lexer.mode() == LexMode::Markup || !self.current.is_trivia() { + self.prev_end = self.current_end(); + } + } + + fn lex(&mut self) { self.current_start = self.lexer.cursor(); self.current = self.lexer.next(); - } - - /// Take another look at the current token to recheck whether it ends a - /// group. - fn repeek(&mut self) { - self.eof = match &self.current { - Some(SyntaxKind::RightBrace) => self.inside(Group::Brace), - Some(SyntaxKind::RightBracket) => self.inside(Group::Bracket), - Some(SyntaxKind::RightParen) => self.inside(Group::Paren), - Some(SyntaxKind::Star) => self.inside(Group::Strong), - Some(SyntaxKind::Underscore) => self.inside(Group::Emph), - Some(SyntaxKind::Dollar) => self - .groups - .iter() - .rev() - .skip_while(|group| matches!(group.kind, Group::MathRow(..))) - .next() - .map_or(false, |group| group.kind == Group::Math), - Some(SyntaxKind::Semicolon) => self.inside(Group::Expr), - Some(SyntaxKind::Atom) => match self.peek_src() { - ")" => self.inside(Group::MathRow('(', ')')), - "}" => self.inside(Group::MathRow('{', '}')), - "]" => self.inside(Group::MathRow('[', ']')), - _ => false, - }, - Some(SyntaxKind::Space { newlines }) => self.space_ends_group(*newlines), - Some(_) => false, - None => true, - }; - } - - /// Returns whether the given type can be skipped over. - fn is_trivia(&self, token: SyntaxKind) -> bool { - match token { - SyntaxKind::Space { newlines } => !self.space_ends_group(newlines), - SyntaxKind::LineComment => true, - SyntaxKind::BlockComment => true, - _ => false, + if self.lexer.mode() == LexMode::Code + && self.lexer.newline() + && self.stop_at_newline.last().copied().unwrap_or(false) + && !matches!(self.lexer.clone().next(), SyntaxKind::Else | SyntaxKind::Dot) + { + self.current = SyntaxKind::Eof; } } - /// Whether a space with the given number of newlines ends the current group. - fn space_ends_group(&self, n: usize) -> bool { - if n == 0 { - return false; - } - - match self.groups.last().map(|group| group.kind) { - Some(Group::Strong | Group::Emph) => n >= 2, - Some(Group::Expr) if n >= 1 => { - // Allow else and method call to continue on next line. - self.groups.iter().nth_back(1).map(|group| group.kind) - != Some(Group::Brace) - || !matches!( - self.lexer.clone().next(), - Some(SyntaxKind::Else | SyntaxKind::Dot) - ) - } - _ => false, - } - } - - /// Whether we are inside the given group (can be nested). - fn inside(&self, kind: Group) -> bool { - self.groups - .iter() - .rev() - .take_while(|g| !kind.is_weak() || g.kind.is_weak()) - .any(|g| g.kind == kind) - } -} - -/// Error handling. -impl Parser<'_> { - /// Eat the current token and add an error that it is unexpected. - fn unexpected(&mut self) { - if let Some(found) = self.peek() { - let marker = self.marker(); - let msg = format_eco!("unexpected {}", found.name()); + fn expect(&mut self, kind: SyntaxKind) -> bool { + let at = self.at(kind); + if at { self.eat(); - marker.to_error(self, msg); + } else { + self.balanced &= !kind.is_grouping(); + self.expected(kind.name()); } + at } - /// Add an error that the `thing` was expected at the end of the last - /// non-trivia token. fn expected(&mut self, thing: &str) { - self.expected_at(self.trivia_start(), thing); + self.unskip(); + if self + .nodes + .last() + .map_or(true, |child| child.kind() != SyntaxKind::Error) + { + let message = format_eco!("expected {}", thing); + self.nodes.push(SyntaxNode::error(message, ErrorPos::Full, 0)); + } + self.skip(); } - /// Insert an error message that `what` was expected at the marker position. - fn expected_at(&mut self, marker: Marker, what: &str) { - let msg = format_eco!("expected {}", what); - self.children - .insert(marker.0, SyntaxNode::error(msg, ErrorPos::Full, 0)); - } + fn unexpected(&mut self) { + self.unskip(); + while self + .nodes + .last() + .map_or(false, |child| child.kind() == SyntaxKind::Error && child.len() == 0) + { + self.nodes.pop(); + } + self.skip(); - /// Eat the current token and add an error that it is not the expected - /// `thing`. - fn expected_found(&mut self, thing: &str) { - match self.peek() { - Some(found) => { - let marker = self.marker(); - let msg = format_eco!("expected {}, found {}", thing, found.name()); - self.eat(); - marker.to_error(self, msg); - } - None => self.expected(thing), + let kind = self.current; + let offset = self.nodes.len(); + self.eat(); + self.balanced &= !kind.is_grouping(); + + if !kind.is_error() { + self.nodes[offset] + .convert_to_error(format_eco!("unexpected {}", kind.name())); } } } - -/// Marks a location in a parser's child list. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -struct Marker(usize); - -impl Marker { - /// Peek at the child directly before the marker. - fn before<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> { - p.children.get(self.0.checked_sub(1)?) - } - - /// Peek at the child directly after the marker. - fn after<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> { - p.children.get(self.0) - } - - /// Convert the child directly after marker. - fn convert(self, p: &mut Parser, kind: SyntaxKind) { - if let Some(child) = p.children.get_mut(self.0) { - child.convert_to(kind); - } - } - - /// Convert the child directly after marker. - fn to_error(self, p: &mut Parser, message: impl Into) { - if let Some(child) = p.children.get_mut(self.0) { - child.convert_to_error(message); - } - } - - /// Perform a subparse that wraps all children after the marker in a node - /// with the given kind. - fn perform(self, p: &mut Parser, kind: SyntaxKind, f: F) -> T - where - F: FnOnce(&mut Parser) -> T, - { - let success = f(p); - self.end(p, kind); - success - } - - /// Wrap all children after the marker (excluding trailing trivia) in a node - /// with the given `kind`. - fn end(self, p: &mut Parser, kind: SyntaxKind) { - let until = p.trivia_start().0.max(self.0); - let children = p.children.drain(self.0..until).collect(); - p.children.insert(self.0, SyntaxNode::inner(kind, children)); - } - - /// Wrap all children that do not fulfill the predicate in error nodes. - fn filter_children(self, p: &mut Parser, mut f: F) - where - F: FnMut(&SyntaxNode) -> Result<(), &'static str>, - { - for child in &mut p.children[self.0..] { - // Don't expose errors. - if child.kind().is_error() { - continue; - } - - // Don't expose trivia in code. - if p.lexer.mode() != LexMode::Markup && child.kind().is_trivia() { - continue; - } - - if let Err(msg) = f(child) { - let mut msg = EcoString::from(msg); - if msg.starts_with("expected") { - msg.push_str(", found "); - msg.push_str(child.kind().name()); - } - let len = child.len(); - *child = SyntaxNode::error(msg, ErrorPos::Full, len); - } - } - } -} - -/// A logical group of tokens, e.g. `[...]`. -#[derive(Debug)] -struct GroupEntry { - /// The kind of group this is. This decides which token(s) will end the - /// group. For example, a [`Group::Paren`] will be ended by - /// [`Token::RightParen`]. - kind: Group, - /// The mode the parser was in _before_ the group started (to which we go - /// back once the group ends). - prev_mode: LexMode, -} - -/// A group, confined by optional start and end delimiters. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum Group { - /// A curly-braced group: `{...}`. - Brace, - /// A bracketed group: `[...]`. - Bracket, - /// A parenthesized group: `(...)`. - Paren, - /// A group surrounded with stars: `*...*`. - Strong, - /// A group surrounded with underscore: `_..._`. - Emph, - /// A group surrounded by dollar signs: `$...$`. - Math, - /// A group surrounded by math delimiters. - MathRow(char, char), - /// A group ended by a semicolon or a line break: `;`, `\n`. - Expr, -} - -impl Group { - /// Whether the group can only force other weak groups to end. - fn is_weak(self) -> bool { - matches!(self, Group::Strong | Group::Emph) - } -} - -/// Allows parser methods to use the try operator. Never returned top-level -/// because the parser recovers from all errors. -type ParseResult = Result; - -/// The error type for parsing. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -struct ParseError; - -impl Display for ParseError { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - f.pad("failed to parse") - } -} - -impl std::error::Error for ParseError {} diff --git a/src/syntax/reparse.rs b/src/syntax/reparse.rs deleted file mode 100644 index e72192fff..000000000 --- a/src/syntax/reparse.rs +++ /dev/null @@ -1,525 +0,0 @@ -use std::ops::Range; - -use super::{ - is_newline, parse, reparse_code_block, reparse_content_block, - reparse_markup_elements, Span, SyntaxKind, SyntaxNode, -}; - -/// Refresh the given syntax node with as little parsing as possible. -/// -/// Takes the new source, the range in the old source that was replaced and the -/// length of the replacement. -/// -/// Returns the range in the new source that was ultimately reparsed. -pub fn reparse( - root: &mut SyntaxNode, - text: &str, - replaced: Range, - replacement_len: usize, -) -> Range { - let change = Change { text, replaced, replacement_len }; - if let Some(range) = try_reparse(&change, root, 0, true, true) { - return range; - } - - let id = root.span().source(); - *root = parse(text); - root.numberize(id, Span::FULL).unwrap(); - 0..text.len() -} - -/// Try to reparse inside the given node. -fn try_reparse( - change: &Change, - node: &mut SyntaxNode, - mut offset: usize, - outermost: bool, - safe_to_replace: bool, -) -> Option> { - let is_markup = matches!(node.kind(), SyntaxKind::Markup { .. }); - let original_count = node.children().len(); - let original_offset = offset; - - let mut search = SearchState::default(); - let mut ahead: Option = None; - - // Whether the first node that should be replaced is at start. - let mut at_start = true; - - // Whether the last searched child is the outermost child. - let mut child_outermost = false; - - // Find the the first child in the range of children to reparse. - for (i, child) in node.children().enumerate() { - let pos = NodePos { idx: i, offset }; - let child_span = offset..offset + child.len(); - child_outermost = outermost && i + 1 == original_count; - - match search { - SearchState::NoneFound => { - // The edit is contained within the span of the current element. - if child_span.contains(&change.replaced.start) - && child_span.end >= change.replaced.end - { - // In Markup mode, we want to consider a non-whitespace - // neighbor if the edit is on the node boundary. - search = if is_markup && child_span.end == change.replaced.end { - SearchState::RequireNonTrivia(pos) - } else { - SearchState::Contained(pos) - }; - } else if child_span.contains(&change.replaced.start) { - search = SearchState::Inside(pos); - } else if child_span.end == change.replaced.start - && change.replaced.start == change.replaced.end - && child_outermost - { - search = SearchState::SpanFound(pos, pos); - } else { - // Update compulsary state of `ahead_nontrivia`. - if let Some(ahead_nontrivia) = ahead.as_mut() { - if let SyntaxKind::Space { newlines: (1..) } = child.kind() { - ahead_nontrivia.newline(); - } - } - - // We look only for non spaces, non-semicolon and also - // reject text that points to the special case for URL - // evasion and line comments. - if !child.kind().is_space() - && child.kind() != SyntaxKind::Semicolon - && (child.kind() != SyntaxKind::Text || child.text() != "/") - && (ahead.is_none() || change.replaced.start > child_span.end) - && !ahead.map_or(false, Ahead::is_compulsory) - { - ahead = Some(Ahead::new(pos, at_start, is_bounded(child.kind()))); - } - - at_start = next_at_start(child.kind(), at_start); - } - } - SearchState::Inside(start) => { - if child_span.end == change.replaced.end { - search = SearchState::RequireNonTrivia(start); - } else if child_span.end > change.replaced.end { - search = SearchState::SpanFound(start, pos); - } - } - SearchState::RequireNonTrivia(start) => { - if !child.kind().is_trivia() { - search = SearchState::SpanFound(start, pos); - } - } - _ => unreachable!(), - } - - offset += child.len(); - - if search.done().is_some() { - break; - } - } - - // If we were looking for a non-whitespace element and hit the end of - // the file here, we instead use EOF as the end of the span. - if let SearchState::RequireNonTrivia(start) = search { - search = SearchState::SpanFound( - start, - NodePos { - idx: node.children().len() - 1, - offset: offset - node.children().last().unwrap().len(), - }, - ) - } - - if let SearchState::Contained(pos) = search { - // Do not allow replacement of elements inside of constructs whose - // opening and closing brackets look the same. - let safe_inside = is_bounded(node.kind()); - let child = &mut node.children_mut()[pos.idx]; - let prev_len = child.len(); - let prev_descendants = child.descendants(); - - if !child.is_leaf() { - if let Some(range) = - try_reparse(change, child, pos.offset, child_outermost, safe_inside) - { - let new_len = child.len(); - let new_descendants = child.descendants(); - node.update_parent(prev_len, new_len, prev_descendants, new_descendants); - return Some(range); - } - } - - let superseded_span = pos.offset..pos.offset + prev_len; - let func: Option = match child.kind() { - SyntaxKind::CodeBlock => Some(ReparseMode::Code), - SyntaxKind::ContentBlock => Some(ReparseMode::Content), - _ => None, - }; - - // Return if the element was reparsable on its own, otherwise try to - // treat it as a markup element. - if let Some(func) = func { - if let Some(result) = replace( - change, - node, - func, - pos.idx..pos.idx + 1, - superseded_span, - outermost, - ) { - return Some(result); - } - } - } - - // Make sure this is a markup node and that we may replace. If so, save - // the current indent. - let min_indent = match node.kind() { - SyntaxKind::Markup { min_indent } if safe_to_replace => min_indent, - _ => return None, - }; - - let (mut start, end) = search.done()?; - if let Some(ahead) = ahead { - if start.offset == change.replaced.start || ahead.is_compulsory() { - start = ahead.pos; - at_start = ahead.at_start; - } - } else { - start = NodePos { idx: 0, offset: original_offset }; - } - - let superseded_span = - start.offset..end.offset + node.children().as_slice()[end.idx].len(); - - replace( - change, - node, - ReparseMode::MarkupElements { at_start, min_indent }, - start.idx..end.idx + 1, - superseded_span, - outermost, - ) -} - -/// Reparse the superseded nodes and replace them. -fn replace( - change: &Change, - node: &mut SyntaxNode, - mode: ReparseMode, - superseded_idx: Range, - superseded_span: Range, - outermost: bool, -) -> Option> { - let superseded_start = superseded_idx.start; - - let differential: isize = - change.replacement_len as isize - change.replaced.len() as isize; - let newborn_end = (superseded_span.end as isize + differential) as usize; - let newborn_span = superseded_span.start..newborn_end; - - let mut prefix = ""; - for (i, c) in change.text[..newborn_span.start].char_indices().rev() { - if is_newline(c) { - break; - } - prefix = &change.text[i..newborn_span.start]; - } - - let (newborns, terminated, amount) = match mode { - ReparseMode::Code => reparse_code_block( - prefix, - &change.text[newborn_span.start..], - newborn_span.len(), - ), - ReparseMode::Content => reparse_content_block( - prefix, - &change.text[newborn_span.start..], - newborn_span.len(), - ), - ReparseMode::MarkupElements { at_start, min_indent } => reparse_markup_elements( - prefix, - &change.text[newborn_span.start..], - newborn_span.len(), - differential, - &node.children().as_slice()[superseded_start..], - at_start, - min_indent, - ), - }?; - - // Do not accept unclosed nodes if the old node wasn't at the right edge - // of the tree. - if !outermost && !terminated { - return None; - } - - node.replace_children(superseded_start..superseded_start + amount, newborns) - .ok()?; - - Some(newborn_span) -} - -/// A description of a change. -struct Change<'a> { - /// The new source code, with the change applied. - text: &'a str, - /// Which range in the old source file was changed. - replaced: Range, - /// How many characters replaced the text in `replaced`. - replacement_len: usize, -} - -/// Encodes the state machine of the search for the nodes are pending for -/// replacement. -#[derive(Clone, Copy, Debug, PartialEq)] -enum SearchState { - /// Neither an end nor a start have been found as of now. - /// The latest non-trivia child is continually saved. - NoneFound, - /// The search has concluded by finding a node that fully contains the - /// modifications. - Contained(NodePos), - /// The search has found the start of the modified nodes. - Inside(NodePos), - /// The search has found the end of the modified nodes but the change - /// touched its boundries so another non-trivia node is needed. - RequireNonTrivia(NodePos), - /// The search has concluded by finding a start and an end index for nodes - /// with a pending reparse. - SpanFound(NodePos, NodePos), -} - -impl Default for SearchState { - fn default() -> Self { - Self::NoneFound - } -} - -impl SearchState { - fn done(self) -> Option<(NodePos, NodePos)> { - match self { - Self::NoneFound => None, - Self::Contained(s) => Some((s, s)), - Self::Inside(_) => None, - Self::RequireNonTrivia(_) => None, - Self::SpanFound(s, e) => Some((s, e)), - } - } -} - -/// The position of a syntax node. -#[derive(Clone, Copy, Debug, PartialEq)] -struct NodePos { - /// The index in the parent node. - idx: usize, - /// The byte offset in the string. - offset: usize, -} - -/// An ahead node with an index and whether it is `at_start`. -#[derive(Clone, Copy, Debug, PartialEq)] -struct Ahead { - /// The position of the node. - pos: NodePos, - /// The `at_start` before this node. - at_start: bool, - /// The kind of ahead node. - kind: AheadKind, -} - -/// The kind of ahead node. -#[derive(Clone, Copy, Debug, PartialEq)] -enum AheadKind { - /// A normal non-trivia child has been found. - Normal, - /// An unbounded child has been found. The boolean indicates whether it was - /// on the current line, in which case adding it to the reparsing range is - /// compulsory. - Unbounded(bool), -} - -impl Ahead { - fn new(pos: NodePos, at_start: bool, bounded: bool) -> Self { - Self { - pos, - at_start, - kind: if bounded { AheadKind::Normal } else { AheadKind::Unbounded(true) }, - } - } - - fn newline(&mut self) { - if let AheadKind::Unbounded(current_line) = &mut self.kind { - *current_line = false; - } - } - - fn is_compulsory(self) -> bool { - matches!(self.kind, AheadKind::Unbounded(true)) - } -} - -/// Which reparse function to choose for a span of elements. -#[derive(Clone, Copy, Debug, PartialEq)] -enum ReparseMode { - /// Reparse a code block, including its braces. - Code, - /// Reparse a content block, including its square brackets. - Content, - /// Reparse elements of the markup. Also specified the initial `at_start` - /// state for the reparse and the minimum indent of the reparsed nodes. - MarkupElements { at_start: bool, min_indent: usize }, -} - -/// Whether changes _inside_ this node are safely encapsulated, so that only -/// this node must be reparsed. -fn is_bounded(kind: SyntaxKind) -> bool { - matches!( - kind, - SyntaxKind::CodeBlock - | SyntaxKind::ContentBlock - | SyntaxKind::Linebreak - | SyntaxKind::SmartQuote - | SyntaxKind::BlockComment - | SyntaxKind::Space { .. } - | SyntaxKind::Escape - | SyntaxKind::Shorthand - ) -} - -/// Whether `at_start` would still be true after this node given the -/// previous value of the property. -fn next_at_start(kind: SyntaxKind, prev: bool) -> bool { - match kind { - SyntaxKind::Space { newlines: (1..) } => true, - SyntaxKind::Space { .. } | SyntaxKind::LineComment | SyntaxKind::BlockComment => { - prev - } - _ => false, - } -} - -#[cfg(test)] -#[rustfmt::skip] -mod tests { - use std::fmt::Debug; - - use super::*; - use super::super::{parse, Source}; - - #[track_caller] - fn check(text: &str, found: T, expected: T) - where - T: Debug + PartialEq, - { - if found != expected { - println!("source: {text:?}"); - println!("expected: {expected:#?}"); - println!("found: {found:#?}"); - panic!("test failed"); - } - } - - #[track_caller] - fn test(prev: &str, range: Range, with: &str, goal: Range) { - let mut source = Source::detached(prev); - let range = source.edit(range, with); - check(source.text(), source.root(), &parse(source.text())); - assert_eq!(range, goal); - } - - #[test] - fn test_parse_incremental_simple_replacements() { - test("hello world", 7 .. 12, "walkers", 0 .. 14); - test("some content", 0..12, "", 0..0); - test("", 0..0, "do it", 0..5); - test("a d e", 1 .. 3, " b c d", 0 .. 9); - test("*~ *", 2..2, "*", 0..5); - test("_1_\n2a\n3", 5..5, "4", 4..7); - test("_1_\n2a\n3~", 8..8, "4", 4..10); - test("_1_ 2 3a\n4", 7..7, "5", 0..9); - test("* {1+2} *", 5..6, "3", 2..7); - test("a #f() e", 1 .. 6, " b c d", 0 .. 9); - test("a\nb\nc\nd\ne\n", 5 .. 5, "c", 2 .. 7); - test("a\n\nb\n\nc\n\nd\n\ne\n", 7 .. 7, "c", 3 .. 10); - test("a\nb\nc *hel a b lo* d\nd\ne", 13..13, "c ", 4..20); - test("~~ {a} ~~", 4 .. 5, "b", 3 .. 6); - test("{(0, 1, 2)}", 5 .. 6, "11pt", 0..14); - test("\n= A heading", 4 .. 4, "n evocative", 0 .. 23); - test("for~your~thing", 9 .. 9, "a", 0 .. 15); - test("a your thing a", 6 .. 7, "a", 0 .. 14); - test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); - test("#call() abc", 7 .. 7, "[]", 0 .. 10); - test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 2 .. 35); - test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 3..19); - test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 0..99); - test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33..42); - test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 33 .. 40); - test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 0 .. 33); - test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 0 .. 33); - test("hello~~{x}", 7 .. 10, "#f()", 0 .. 11); - test("this~is -- in my opinion -- spectacular", 8 .. 10, "---", 0 .. 25); - test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 22); - test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); - test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 18); - test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18); - test("#for", 4 .. 4, "//", 0 .. 6); - test("#show f: a => b..", 16..16, "c", 0..18); - test("a\n#let \nb", 7 .. 7, "i", 2 .. 9); - test("a\n#for i \nb", 9 .. 9, "in", 2 .. 12); - test("a~https://fun/html", 13..14, "n", 0..18); - } - - #[test] - fn test_parse_incremental_whitespace_invariants() { - test("hello \\ world", 7 .. 8, "a ", 0 .. 14); - test("hello \\ world", 7 .. 8, " a", 0 .. 14); - test("x = y", 1 .. 1, " + y", 0 .. 6); - test("x = y", 1 .. 1, " + y\n", 0 .. 7); - test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21); - test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19); - test("#let x = (1, 2 + ;~ Five\r\n\r", 20 .. 23, "2.", 0 .. 23); - test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); - test("hey #myfriend", 4 .. 4, "\\", 0 .. 6); - test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0 .. 11); - test("= foo\n bar\n baz", 6 .. 8, "", 0 .. 9); - test(" // hi", 1 .. 1, " ", 0 .. 7); - test("- \nA", 2..3, "", 0..3); - } - - #[test] - fn test_parse_incremental_type_invariants() { - test("a #for x in array {x}", 18 .. 21, "[#x]", 0 .. 22); - test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 11); - test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16); - test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 13); - test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18); - test("a // b c #f()", 3 .. 4, "", 0 .. 12); - test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12); - test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13); - test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26); - test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14); - test("a b c", 1 .. 1, "{[}", 0 .. 8); - } - - #[test] - fn test_parse_incremental_wrongly_or_unclosed_things() { - test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6); - test(r"this \u{abcd}", 8 .. 9, "", 0 .. 12); - test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17); - test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24); - test("a b c", 1 .. 1, " /* letters */", 0 .. 19); - test("a b c", 1 .. 1, " /* letters", 0 .. 16); - test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 0 .. 41); - test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); - test("~~~~", 2 .. 2, "[]", 0 .. 5); - test("a[]b", 2 .. 2, "{", 1 .. 4); - test("[hello]", 2 .. 3, "]", 0 .. 7); - test("{a}", 1 .. 2, "b", 0 .. 3); - test("{ a; b; c }", 5 .. 6, "[}]", 0 .. 13); - test("#a()\n~", 3..4, "{}", 0..7); - test("[]\n~", 1..2, "#if i==0 {true}", 0..18); - } -} diff --git a/src/syntax/reparser.rs b/src/syntax/reparser.rs new file mode 100644 index 000000000..9404055d8 --- /dev/null +++ b/src/syntax/reparser.rs @@ -0,0 +1,262 @@ +use std::ops::Range; + +use super::{ + is_newline, parse, reparse_block, reparse_markup, Span, SyntaxKind, SyntaxNode, +}; + +/// Refresh the given syntax node with as little parsing as possible. +/// +/// Takes the new text, the range in the old text that was replaced and the +/// length of the replacement and returns the range in the new text that was +/// ultimately reparsed. +/// +/// The high-level API for this function is +/// [`Source::edit`](super::Source::edit). +pub fn reparse( + root: &mut SyntaxNode, + text: &str, + replaced: Range, + replacement_len: usize, +) -> Range { + try_reparse(text, replaced, replacement_len, None, root, 0).unwrap_or_else(|| { + let id = root.span().source(); + *root = parse(text); + root.numberize(id, Span::FULL).unwrap(); + 0..text.len() + }) +} + +/// Try to reparse inside the given node. +fn try_reparse( + text: &str, + replaced: Range, + replacement_len: usize, + parent_kind: Option, + node: &mut SyntaxNode, + offset: usize, +) -> Option> { + // The range of children which overlap with the edit. + let mut overlap = usize::MAX..0; + let mut cursor = offset; + let node_kind = node.kind(); + + for (i, child) in node.children_mut().iter_mut().enumerate() { + let prev_range = cursor..cursor + child.len(); + let prev_len = child.len(); + let prev_desc = child.descendants(); + + // Does the child surround the edit? + // If so, try to reparse within it or itself. + if !child.is_leaf() && includes(&prev_range, &replaced) { + let new_len = prev_len + replacement_len - replaced.len(); + let new_range = cursor..cursor + new_len; + + // Try to reparse within the child. + if let Some(range) = try_reparse( + text, + replaced.clone(), + replacement_len, + Some(node_kind), + child, + cursor, + ) { + assert_eq!(child.len(), new_len); + let new_desc = child.descendants(); + node.update_parent(prev_len, new_len, prev_desc, new_desc); + return Some(range); + } + + // If the child is a block, try to reparse the block. + if child.kind().is_block() { + if let Some(newborn) = reparse_block(text, new_range.clone()) { + return node + .replace_children(i..i + 1, vec![newborn]) + .is_ok() + .then(|| new_range); + } + } + } + + // Does the child overlap with the edit? + if overlaps(&prev_range, &replaced) { + overlap.start = overlap.start.min(i); + overlap.end = i + 1; + } + + // Is the child beyond the edit? + if replaced.end < cursor { + break; + } + + cursor += child.len(); + } + + // Try to reparse a range of markup expressions within markup. This is only + // possible if the markup is top-level or contained in a block, not if it is + // contained in things like headings or lists because too much can go wrong + // with indent and line breaks. + if node.kind() == SyntaxKind::Markup + && (parent_kind == None || parent_kind == Some(SyntaxKind::ContentBlock)) + && !overlap.is_empty() + { + // Add one node of slack in both directions. + let children = node.children_mut(); + let mut start = overlap.start.saturating_sub(1); + let mut end = (overlap.end + 1).min(children.len()); + + // Expand to the left. + while start > 0 && expand(&children[start]) { + start -= 1; + } + + // Expand to the right. + while end < children.len() && expand(&children[end]) { + end += 1; + } + + // Synthesize what `at_start` would be at the start of the reparse. + let mut prefix_len = 0; + let mut at_start = true; + for child in &children[..start] { + prefix_len += child.len(); + next_at_start(child, &mut at_start); + } + + // Determine what `at_start` will have to be at the end of the reparse. + let mut prev_len = 0; + let mut prev_at_start_after = at_start; + for child in &children[start..end] { + prev_len += child.len(); + next_at_start(child, &mut prev_at_start_after); + } + + let shifted = offset + prefix_len; + let new_len = prev_len + replacement_len - replaced.len(); + let new_range = shifted..shifted + new_len; + let stop_kind = match parent_kind { + Some(_) => SyntaxKind::RightBracket, + None => SyntaxKind::Eof, + }; + + if let Some(newborns) = + reparse_markup(text, new_range.clone(), &mut at_start, |kind| { + kind == stop_kind + }) + { + if at_start == prev_at_start_after { + return node + .replace_children(start..end, newborns) + .is_ok() + .then(|| new_range); + } + } + } + + None +} + +/// Whether the inner range is fully contained in the outer one (no touching). +fn includes(outer: &Range, inner: &Range) -> bool { + outer.start < inner.start && outer.end > inner.end +} + +/// Whether the first and second range overlap or touch. +fn overlaps(first: &Range, second: &Range) -> bool { + (first.start <= second.start && second.start <= first.end) + || (second.start <= first.start && first.start <= second.end) +} + +/// Whether the selection should be expanded beyond a node of this kind. +fn expand(node: &SyntaxNode) -> bool { + let kind = node.kind(); + kind.is_trivia() + || kind.is_error() + || kind == SyntaxKind::Semicolon + || node.text() == "/" + || node.text() == ":" +} + +/// Whether `at_start` would still be true after this node given the +/// previous value of the property. +fn next_at_start(node: &SyntaxNode, at_start: &mut bool) { + if node.kind().is_trivia() { + if node.text().chars().any(is_newline) { + *at_start = true; + } + } else { + *at_start = false; + } +} + +#[cfg(test)] +mod tests { + use std::ops::Range; + + use super::super::{parse, Source}; + + #[track_caller] + fn test(prev: &str, range: Range, with: &str, incremental: bool) { + let mut source = Source::detached(prev); + let prev = source.root().clone(); + let range = source.edit(range, with); + let found = source.root(); + let expected = parse(source.text()); + if found != &expected { + eprintln!("source: {:?}", source.text()); + eprintln!("previous: {prev:#?}"); + eprintln!("expected: {expected:#?}"); + eprintln!("found: {found:#?}"); + panic!("test failed"); + } + if incremental { + assert_ne!(source.len_bytes(), range.len()); + } else { + assert_eq!(source.len_bytes(), range.len()); + } + } + + #[test] + fn test_reparse_markup() { + test("abc~def~ghi", 5..6, "+", true); + test("~~~~~~~", 3..4, "A", true); + test("abc~~", 1..2, "", true); + test("#var;hello", 9..10, "a", false); + test("https:/world", 7..7, "/", false); + test("hello world", 7..12, "walkers", false); + test("some content", 0..12, "", false); + test("", 0..0, "do it", false); + test("a d e", 1..3, " b c d", false); + test("~*~*~", 2..2, "*", false); + test("::1\n2. a\n3", 7..7, "4", true); + test("* {1+2} *", 5..6, "3", true); + test("{(0, 1, 2)}", 5..6, "11pt", false); + test("\n= A heading", 4..4, "n evocative", false); + test("#call() abc~d", 7..7, "[]", true); + test("a your thing a", 6..7, "a", false); + test("#grid(columns: (auto, 1fr, 40%))", 16..20, "4pt", false); + test("abc\n= a heading\njoke", 3..4, "\nmore\n\n", true); + test("#show f: a => b..", 16..16, "c", false); + test("#for", 4..4, "//", false); + test("a\n#let \nb", 7..7, "i", true); + test("#let x = (1, 2 + ;~ Five\r\n\r", 20..23, "2.", true); + test(r"{{let x = z}; a = 1} b", 6..6, "//", false); + test(r#"a ```typst hello```"#, 16..17, "", false); + } + + #[test] + fn test_reparse_block() { + test("Hello { x + 1 }!", 8..9, "abc", true); + test("A{}!", 2..2, "\"", false); + test("{ [= x] }!", 4..4, "=", true); + test("[[]]", 2..2, "\\", false); + test("[[ab]]", 3..4, "\\", false); + test("{}}", 1..1, "{", false); + test("A: [BC]", 5..5, "{", false); + test("A: [BC]", 5..5, "{}", true); + test("{\"ab\"}A", 4..4, "c", true); + test("{\"ab\"}A", 4..5, "c", false); + test("a[]b", 2..2, "{", false); + test("a{call(); abc}b", 7..7, "[]", true); + test("a #while x {\n g(x) \n} b", 12..12, "//", true); + } +} diff --git a/src/syntax/source.rs b/src/syntax/source.rs index 41805a604..472e8c6ca 100644 --- a/src/syntax/source.rs +++ b/src/syntax/source.rs @@ -9,8 +9,7 @@ use comemo::Prehashed; use unscanny::Scanner; use super::ast::Markup; -use super::reparse::reparse; -use super::{is_newline, parse, Span, SyntaxNode}; +use super::{is_newline, parse, reparse, Span, SyntaxNode}; use crate::diag::SourceResult; use crate::util::{PathExt, StrExt}; diff --git a/tests/ref/basics/heading.png b/tests/ref/basics/heading.png index 96ffcb80a..9cb4d0985 100644 Binary files a/tests/ref/basics/heading.png and b/tests/ref/basics/heading.png differ diff --git a/tests/ref/basics/list.png b/tests/ref/basics/list.png index 5d0f03c00..b6b8ed3e6 100644 Binary files a/tests/ref/basics/list.png and b/tests/ref/basics/list.png differ diff --git a/tests/typ/basics/list.typ b/tests/typ/basics/list.typ index fc3e5ca72..b8bd59eab 100644 --- a/tests/typ/basics/list.typ +++ b/tests/typ/basics/list.typ @@ -43,6 +43,7 @@ _Shopping list_ - A with 1 tab - B with 2 tabs +--- // This doesn't work because of mixed tabs and spaces. - A with 2 spaces - B with 2 tabs diff --git a/tests/typ/compiler/array.typ b/tests/typ/compiler/array.typ index ccde8598e..e01f88966 100644 --- a/tests/typ/compiler/array.typ +++ b/tests/typ/compiler/array.typ @@ -208,17 +208,16 @@ // Error: 3 expected closing paren {(} -// Error: 2-3 expected expression, found closing paren +// Error: 2-3 unexpected closing paren {)} -// Error: 4 expected comma // Error: 4-6 unexpected end of block comment {(1*/2)} // Error: 6-8 invalid number suffix {(1, 1u 2)} -// Error: 3-4 expected expression, found comma +// Error: 3-4 unexpected comma {(,1)} // Missing expression makes named pair incomplete, making this an empty array. diff --git a/tests/typ/compiler/block.typ b/tests/typ/compiler/block.typ index 7cf1f8bec..7fb7738b2 100644 --- a/tests/typ/compiler/block.typ +++ b/tests/typ/compiler/block.typ @@ -126,10 +126,12 @@ // Should output `3`. { - // Error: 7-10 expected identifier, found string + // Error: 6 expected identifier + // Error: 10 expected block for "v" // Error: 8 expected keyword `in` + // Error: 22 expected block for v let z = 1 + 2 z diff --git a/tests/typ/compiler/call.typ b/tests/typ/compiler/call.typ index 7ea0a998f..087e46941 100644 --- a/tests/typ/compiler/call.typ +++ b/tests/typ/compiler/call.typ @@ -44,7 +44,7 @@ } --- -// Error: 28-47 duplicate argument +// Error: 28-34 duplicate argument #set text(family: "Arial", family: "Helvetica") --- @@ -70,7 +70,8 @@ #f[1](2) --- -// Error: 7-8 expected expression, found colon +// Error: 7 expected expression +// Error: 8 expected expression #func(:) // Error: 10-12 unexpected end of block comment @@ -102,5 +103,4 @@ --- // Error: 2:1 expected quote -// Error: 2:1 expected closing paren #func("] diff --git a/tests/typ/compiler/closure.typ b/tests/typ/compiler/closure.typ index c73212044..f1604b19d 100644 --- a/tests/typ/compiler/closure.typ +++ b/tests/typ/compiler/closure.typ @@ -145,6 +145,16 @@ test(greet("Typst", whatever: 10)) } +--- +// Error: 11-12 duplicate parameter +#let f(x, x) = none + +--- +// Error: 14-15 duplicate parameter +// Error: 23-24 duplicate parameter +// Error: 35-36 duplicate parameter +#let f(a, b, a: none, b: none, c, b) = none + --- // Error: 6-16 expected identifier, named pair or argument sink, found keyed pair {(a, "named": b) => none} diff --git a/tests/typ/compiler/dict.typ b/tests/typ/compiler/dict.typ index 0170cb8b3..f9a0b3695 100644 --- a/tests/typ/compiler/dict.typ +++ b/tests/typ/compiler/dict.typ @@ -56,11 +56,11 @@ #test(dict, (a: 3, b: 1)) --- -// Error: 24-32 pair has duplicate key +// Error: 24-29 duplicate key {(first: 1, second: 2, first: 3)} --- -// Error: 17-23 pair has duplicate key +// Error: 17-20 duplicate key {(a: 1, "b": 2, "a": 3)} --- @@ -72,8 +72,11 @@ // Error: 4-5 expected named or keyed pair, found integer // Error: 5 expected comma // Error: 12-16 expected identifier or string, found boolean -// Error: 17-18 expected expression, found colon -{(:1 b:"", true::)} +// Error: 17 expected expression +{(:1 b:"", true:)} + +// Error: 3-8 expected identifier or string, found binary expression +{(a + b: "hey")} --- // Error: 3-15 cannot mutate a temporary value diff --git a/tests/typ/compiler/field.typ b/tests/typ/compiler/field.typ index 78439ae08..0195c6d8d 100644 --- a/tests/typ/compiler/field.typ +++ b/tests/typ/compiler/field.typ @@ -36,5 +36,6 @@ = A --- -// Error: 8-12 expected identifier, found boolean +// Error: 8 expected identifier +// Error: 8 expected semicolon or line break {false.true} diff --git a/tests/typ/compiler/for.typ b/tests/typ/compiler/for.typ index f63b870e1..7a530b73a 100644 --- a/tests/typ/compiler/for.typ +++ b/tests/typ/compiler/for.typ @@ -94,7 +94,7 @@ // Error: 5 expected identifier #for -// Error: 7 expected identifier +// Error: 5 expected identifier #for// // Error: 5 expected identifier @@ -106,17 +106,18 @@ // Error: 10 expected expression #for v in -// Error: 15 expected body +// Error: 15 expected block #for v in iter // Error: 5 expected identifier #for v in iter {} -// Error: 7-10 expected identifier, found string +// Error: 6 expected identifier +// Error: 10 expected block A#for "v" thing -// Error: 6-9 expected identifier, found string +// Error: 5 expected identifier #for "v" in iter {} // Error: 7 expected keyword `in` diff --git a/tests/typ/compiler/if.typ b/tests/typ/compiler/if.typ index 0d87c689b..3b35ebd89 100644 --- a/tests/typ/compiler/if.typ +++ b/tests/typ/compiler/if.typ @@ -112,7 +112,7 @@ // Error: 4 expected expression {if} -// Error: 6 expected body +// Error: 6 expected block #if x // Error: 1-6 unexpected keyword `else` @@ -124,11 +124,11 @@ x {} // Should output `something`. -// Error: 6 expected body +// Error: 6 expected block #if x something // Should output `A thing.` -// Error: 19 expected body +// Error: 19 expected block A#if false {} else thing #if a []else [b] diff --git a/tests/typ/compiler/import.typ b/tests/typ/compiler/import.typ index 6f2ac459c..6b2d80750 100644 --- a/tests/typ/compiler/import.typ +++ b/tests/typ/compiler/import.typ @@ -81,21 +81,16 @@ This is never reached. #import --- -// Error: 26-29 expected identifier, found string +// Error: 26-29 unexpected string #import "module.typ": a, "b", c --- -// Error: 22 expected import items -#import "module.typ": - ---- -// Error: 23-24 expected expression, found assignment operator -// Error: 24 expected import items +// Error: 23-24 unexpected equals sign #import "module.typ": = --- // An additional trailing comma. -// Error: 31-32 expected expression, found comma +// Error: 31-32 unexpected comma #import "module.typ": a, b, c,, --- @@ -105,7 +100,7 @@ This is never reached. --- // A star in the list. -// Error: 26-27 expected expression, found star +// Error: 26-27 unexpected star #import "module.typ": a, *, b --- @@ -114,5 +109,10 @@ This is never reached. #import "module.typ": *, a --- -// Error: 13-17 expected identifier, found named pair +// Error: 14-15 unexpected colon +// Error: 16-17 unexpected integer #import "": a: 1 + +--- +// Error: 14 expected comma +#import "": a b diff --git a/tests/typ/compiler/let.typ b/tests/typ/compiler/let.typ index d4f9510ab..3a879ce7a 100644 --- a/tests/typ/compiler/let.typ +++ b/tests/typ/compiler/let.typ @@ -39,7 +39,8 @@ Three // Error: 5 expected identifier {let} -// Error: 6-9 expected identifier, found string +// Error: 5 expected identifier +// Error: 5 expected semicolon or line break #let "v" // Error: 7 expected semicolon or line break @@ -48,7 +49,8 @@ Three // Error: 9 expected expression #let v = -// Error: 6-9 expected identifier, found string +// Error: 5 expected identifier +// Error: 5 expected semicolon or line break #let "v" = 1 // Terminated because expression ends. @@ -61,7 +63,7 @@ Three #let v5 = (1, 2 + ; Five --- -// Error: 13 expected body +// Error: 13 expected equals sign #let func(x) // Error: 15 expected expression diff --git a/tests/typ/compiler/spread.typ b/tests/typ/compiler/spread.typ index ff661eadb..244e9fb9c 100644 --- a/tests/typ/compiler/spread.typ +++ b/tests/typ/compiler/spread.typ @@ -60,7 +60,7 @@ #min(.."nope") --- -// Error: 8-14 expected identifier, named pair or argument sink, found spread +// Error: 10-14 expected identifier, found boolean #let f(..true) = none --- diff --git a/tests/typ/compiler/while.typ b/tests/typ/compiler/while.typ index 3c28a32a1..d495a84a1 100644 --- a/tests/typ/compiler/while.typ +++ b/tests/typ/compiler/while.typ @@ -49,12 +49,12 @@ // Error: 7 expected expression {while} -// Error: 9 expected body +// Error: 9 expected block #while x // Error: 7 expected expression #while x {} -// Error: 9 expected body +// Error: 9 expected block #while x something diff --git a/tests/typ/text/emphasis.typ b/tests/typ/text/emphasis.typ index 27e3b9776..f45700310 100644 --- a/tests/typ/text/emphasis.typ +++ b/tests/typ/text/emphasis.typ @@ -38,6 +38,6 @@ _Hello World --- -// Error: 1:12 expected star -// Error: 2:1 expected star -_Cannot *be_ interleaved* +// Error: 25 expected star +// Error: 25 expected underscore +[_Cannot *be interleaved] diff --git a/tests/typ/text/raw.typ b/tests/typ/text/raw.typ index c17c8fecf..96a23b66d 100644 --- a/tests/typ/text/raw.typ +++ b/tests/typ/text/raw.typ @@ -41,7 +41,8 @@ The keyword ```rust let```. // First line is not dedented and leading space is still possible. ``` A B - C``` + C + ``` --- // Unterminated.