Remove most fields from SyntaxKind enum

This commit is contained in:
Laurenz 2023-01-15 12:00:13 +01:00
parent 15f0434d1f
commit 40561e57fb
21 changed files with 2062 additions and 2340 deletions

View File

@ -208,6 +208,6 @@ fn items() -> LangItems {
math_atom: |atom| math::AtomNode(atom).pack(),
math_script: |base, sub, sup| math::ScriptNode { base, sub, sup }.pack(),
math_frac: |num, denom| math::FracNode { num, denom }.pack(),
math_align_point: |count| math::AlignPointNode(count).pack(),
math_align_point: || math::AlignPointNode.pack(),
}
}

View File

@ -637,12 +637,12 @@ impl Texify for ScriptNode {
#[func]
#[capable(Texify)]
#[derive(Debug, Hash)]
pub struct AlignPointNode(pub NonZeroUsize);
pub struct AlignPointNode;
#[node]
impl AlignPointNode {
fn construct(_: &Vm, args: &mut Args) -> SourceResult<Content> {
Ok(Self(args.expect("index")?).pack())
fn construct(_: &Vm, _: &mut Args) -> SourceResult<Content> {
Ok(Self.pack())
}
}

View File

@ -50,7 +50,7 @@ pub type SourceResult<T> = Result<T, Box<Vec<SourceError>>>;
/// An error in a source file.
///
/// This contained spans will only be detached if any of the input source files
/// The contained spans will only be detached if any of the input source files
/// were detached.
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct SourceError {

View File

@ -138,7 +138,7 @@ fn complete_params(ctx: &mut CompletionContext) -> bool {
(SyntaxKind::Colon, _) => prev.prev_leaf(),
_ => None,
};
if let SyntaxKind::Ident(param) = before_colon.kind();
if let Some(param) = before_colon.cast::<ast::Ident>();
then {
ctx.from = match ctx.leaf.kind() {
SyntaxKind::Colon | SyntaxKind::Space { .. } => ctx.cursor,
@ -160,11 +160,11 @@ fn complete_params(ctx: &mut CompletionContext) -> bool {
deciding.kind(),
SyntaxKind::LeftParen
| SyntaxKind::Comma
| SyntaxKind::Ident(_)
| SyntaxKind::Ident
);
then {
ctx.from = match deciding.kind() {
SyntaxKind::Ident(_) => deciding.offset(),
SyntaxKind::Ident => deciding.offset(),
_ => ctx.cursor,
};
@ -192,9 +192,9 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool {
// Behind half-completed symbol: "$arrow:|$".
if_chain! {
if matches!(ctx.leaf.kind(), SyntaxKind::Atom(s) if s == ":");
if matches!(ctx.leaf.kind(), SyntaxKind::Atom if ctx.leaf.text() == ":");
if let Some(prev) = ctx.leaf.prev_leaf();
if matches!(prev.kind(), SyntaxKind::Ident(_));
if matches!(prev.kind(), SyntaxKind::Ident);
then {
ctx.from = prev.offset();
ctx.symbol_completions(false);
@ -205,7 +205,7 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool {
// Start of a symbol: ":|".
// Checking for a text node ensures that "\:" isn't completed.
if ctx.before.ends_with(':')
&& matches!(ctx.leaf.kind(), SyntaxKind::Text(_) | SyntaxKind::Atom(_))
&& matches!(ctx.leaf.kind(), SyntaxKind::Text | SyntaxKind::Atom)
{
ctx.from = ctx.cursor;
ctx.symbol_completions(needs_colon);
@ -213,7 +213,7 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool {
}
// An existing symbol: ":arrow:".
if matches!(ctx.leaf.kind(), SyntaxKind::Symbol(_)) {
if matches!(ctx.leaf.kind(), SyntaxKind::Symbol) {
// We want to complete behind the colon, therefore plus 1.
let has_colon = ctx.after.starts_with(':');
ctx.from = ctx.leaf.offset() + (has_colon as usize);
@ -225,12 +225,12 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool {
if_chain! {
if matches!(
ctx.leaf.kind(),
SyntaxKind::Text(_) | SyntaxKind::Atom(_) | SyntaxKind::Ident(_)
SyntaxKind::Text | SyntaxKind::Atom | SyntaxKind::Ident
);
if let Some(prev) = ctx.leaf.prev_leaf();
if matches!(prev.kind(), SyntaxKind::Symbol(_)) || matches!(
if matches!(prev.kind(), SyntaxKind::Symbol) || matches!(
prev.kind(),
SyntaxKind::Text(s) | SyntaxKind::Atom(s) if s == ":"
SyntaxKind::Text | SyntaxKind::Atom if prev.text() == ":"
);
then {
// We want to complete behind the colon, therefore plus 1.
@ -252,14 +252,14 @@ fn complete_markup(ctx: &mut CompletionContext) -> bool {
// Start of an interpolated identifier: "#|".
// Checking for a text node ensures that "\#" isn't completed.
if ctx.before.ends_with('#') && matches!(ctx.leaf.kind(), SyntaxKind::Text(_)) {
if ctx.before.ends_with('#') && matches!(ctx.leaf.kind(), SyntaxKind::Text) {
ctx.from = ctx.cursor;
ctx.expr_completions(true);
return true;
}
// An existing identifier: "#pa|".
if matches!(ctx.leaf.kind(), SyntaxKind::Ident(_)) {
if matches!(ctx.leaf.kind(), SyntaxKind::Ident) {
// We want to complete behind the hashtag, therefore plus 1.
ctx.from = ctx.leaf.offset() + 1;
ctx.expr_completions(true);
@ -298,14 +298,14 @@ fn complete_math(ctx: &mut CompletionContext) -> bool {
}
// Start of an interpolated identifier: "#|".
if matches!(ctx.leaf.kind(), SyntaxKind::Atom(s) if s == "#") {
if matches!(ctx.leaf.kind(), SyntaxKind::Atom if ctx.leaf.text() == "#") {
ctx.from = ctx.cursor;
ctx.expr_completions(true);
return true;
}
// Behind existing atom or identifier: "$a|$" or "$abc|$".
if matches!(ctx.leaf.kind(), SyntaxKind::Atom(_) | SyntaxKind::Ident(_)) {
if matches!(ctx.leaf.kind(), SyntaxKind::Atom | SyntaxKind::Ident) {
ctx.from = ctx.leaf.offset();
ctx.math_completions();
return true;
@ -331,7 +331,7 @@ fn complete_code(ctx: &mut CompletionContext) -> bool {
}
// An existing identifier: "{ pa| }".
if matches!(ctx.leaf.kind(), SyntaxKind::Ident(_)) {
if matches!(ctx.leaf.kind(), SyntaxKind::Ident) {
ctx.from = ctx.leaf.offset();
ctx.expr_completions(false);
return true;

View File

@ -119,7 +119,6 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
_ => Category::Operator,
}),
SyntaxKind::Hat => Some(Category::MathOperator),
SyntaxKind::Amp => Some(Category::MathOperator),
SyntaxKind::Dot => Some(Category::Punctuation),
SyntaxKind::Eq => match node.parent_kind() {
Some(SyntaxKind::Heading) => None,
@ -159,38 +158,38 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
SyntaxKind::As => Some(Category::Keyword),
SyntaxKind::Markup { .. }
if node.parent_kind() == Some(&SyntaxKind::TermItem)
if node.parent_kind() == Some(SyntaxKind::TermItem)
&& node.next_sibling().as_ref().map(|v| v.kind())
== Some(&SyntaxKind::Colon) =>
== Some(SyntaxKind::Colon) =>
{
Some(Category::ListTerm)
}
SyntaxKind::Markup { .. } => None,
SyntaxKind::Text(_) => None,
SyntaxKind::Text => None,
SyntaxKind::Linebreak => Some(Category::Escape),
SyntaxKind::Escape(_) => Some(Category::Escape),
SyntaxKind::Shorthand(_) => Some(Category::Escape),
SyntaxKind::Symbol(_) => Some(Category::Escape),
SyntaxKind::Escape => Some(Category::Escape),
SyntaxKind::Shorthand => Some(Category::Escape),
SyntaxKind::Symbol => Some(Category::Escape),
SyntaxKind::SmartQuote { .. } => None,
SyntaxKind::Strong => Some(Category::Strong),
SyntaxKind::Emph => Some(Category::Emph),
SyntaxKind::Raw(_) => Some(Category::Raw),
SyntaxKind::Link(_) => Some(Category::Link),
SyntaxKind::Label(_) => Some(Category::Label),
SyntaxKind::Ref(_) => Some(Category::Ref),
SyntaxKind::Raw { .. } => Some(Category::Raw),
SyntaxKind::Link => Some(Category::Link),
SyntaxKind::Label => Some(Category::Label),
SyntaxKind::Ref => Some(Category::Ref),
SyntaxKind::Heading => Some(Category::Heading),
SyntaxKind::ListItem => None,
SyntaxKind::EnumItem => None,
SyntaxKind::EnumNumbering(_) => Some(Category::ListMarker),
SyntaxKind::EnumNumbering => Some(Category::ListMarker),
SyntaxKind::TermItem => None,
SyntaxKind::Math => None,
SyntaxKind::Atom(_) => None,
SyntaxKind::Atom => None,
SyntaxKind::Script => None,
SyntaxKind::Frac => None,
SyntaxKind::AlignPoint => None,
SyntaxKind::AlignPoint => Some(Category::MathOperator),
SyntaxKind::Ident(_) => match node.parent_kind() {
SyntaxKind::Ident => match node.parent_kind() {
Some(
SyntaxKind::Markup { .. }
| SyntaxKind::Math
@ -202,9 +201,9 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
if node
.parent()
.and_then(|p| p.parent())
.filter(|gp| gp.kind() == &SyntaxKind::Parenthesized)
.filter(|gp| gp.kind() == SyntaxKind::Parenthesized)
.and_then(|gp| gp.parent())
.map_or(false, |ggp| ggp.kind() == &SyntaxKind::FuncCall)
.map_or(false, |ggp| ggp.kind() == SyntaxKind::FuncCall)
&& node.next_sibling().is_none() =>
{
Some(Category::Function)
@ -218,17 +217,17 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
Some(SyntaxKind::SetRule) => Some(Category::Function),
Some(SyntaxKind::ShowRule)
if node.prev_sibling().as_ref().map(|v| v.kind())
== Some(&SyntaxKind::Show) =>
== Some(SyntaxKind::Show) =>
{
Some(Category::Function)
}
_ => None,
},
SyntaxKind::Bool(_) => Some(Category::Keyword),
SyntaxKind::Int(_) => Some(Category::Number),
SyntaxKind::Float(_) => Some(Category::Number),
SyntaxKind::Numeric(_, _) => Some(Category::Number),
SyntaxKind::Str(_) => Some(Category::String),
SyntaxKind::Bool => Some(Category::Keyword),
SyntaxKind::Int => Some(Category::Number),
SyntaxKind::Float => Some(Category::Number),
SyntaxKind::Numeric => Some(Category::Number),
SyntaxKind::Str => Some(Category::String),
SyntaxKind::CodeBlock => None,
SyntaxKind::ContentBlock => None,
SyntaxKind::Parenthesized => None,
@ -259,7 +258,7 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
SyntaxKind::LoopContinue => None,
SyntaxKind::FuncReturn => None,
SyntaxKind::Error(_, _) => Some(Category::Error),
SyntaxKind::Error => Some(Category::Error),
}
}

View File

@ -18,12 +18,12 @@ pub fn tooltip(world: &dyn World, source: &Source, cursor: usize) -> Option<Stri
/// Tooltip for a function or set rule name.
fn function_tooltip(world: &dyn World, leaf: &LinkedNode) -> Option<String> {
if_chain! {
if let SyntaxKind::Ident(ident) = leaf.kind();
if let Some(ident) = leaf.cast::<ast::Ident>();
if matches!(
leaf.parent_kind(),
Some(SyntaxKind::FuncCall | SyntaxKind::SetRule),
);
if let Some(Value::Func(func)) = world.library().scope.get(ident);
if let Some(Value::Func(func)) = world.library().scope.get(&ident);
if let Some(info) = func.info();
then {
return Some(plain_docs_sentence(&info.docs));
@ -60,8 +60,8 @@ fn named_param_tooltip(world: &dyn World, leaf: &LinkedNode) -> Option<String> {
// Hovering over the parameter name.
if_chain! {
if leaf.index() == 0;
if let SyntaxKind::Ident(ident) = leaf.kind();
if let Some(param) = info.param(ident);
if let Some(ident) = leaf.cast::<ast::Ident>();
if let Some(param) = info.param(&ident);
then {
return Some(plain_docs_sentence(param.docs));
}
@ -69,9 +69,9 @@ fn named_param_tooltip(world: &dyn World, leaf: &LinkedNode) -> Option<String> {
// Hovering over a string parameter value.
if_chain! {
if let SyntaxKind::Str(string) = leaf.kind();
if let Some(string) = leaf.cast::<ast::Str>();
if let Some(param) = info.param(&named.name());
if let Some(docs) = find_string_doc(&param.cast, string);
if let Some(docs) = find_string_doc(&param.cast, &string.get());
then {
return Some(docs.into());
}
@ -95,8 +95,8 @@ fn find_string_doc(info: &CastInfo, string: &str) -> Option<&'static str> {
fn font_family_tooltip(world: &dyn World, leaf: &LinkedNode) -> Option<String> {
if_chain! {
// Ensure that we are on top of a string.
if let SyntaxKind::Str(string) = leaf.kind();
let lower = string.to_lowercase();
if let Some(string) = leaf.cast::<ast::Str>();
let lower = string.get().to_lowercase();
// Ensure that we are in the arguments to the text function.
if let Some(parent) = leaf.parent();

View File

@ -16,8 +16,8 @@ use crate::diag::{
};
use crate::geom::{Abs, Angle, Em, Fr, Ratio};
use crate::syntax::ast::AstNode;
use crate::syntax::{ast, Source, SourceId, Span, Spanned, SyntaxKind, SyntaxNode, Unit};
use crate::util::PathExt;
use crate::syntax::{ast, Source, SourceId, Span, Spanned, SyntaxKind, SyntaxNode};
use crate::util::{EcoString, PathExt};
use crate::World;
const MAX_ITERATIONS: usize = 10_000;
@ -389,13 +389,13 @@ impl Eval for ast::Symbol {
type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
Ok((vm.items.symbol)(self.get().clone()))
Ok((vm.items.symbol)(self.get().into()))
}
}
impl ast::Symbol {
fn eval_in_math(&self, vm: &mut Vm) -> SourceResult<Content> {
Ok((vm.items.symbol)(self.get().clone() + ":op".into()))
Ok((vm.items.symbol)(EcoString::from(self.get()) + ":op".into()))
}
}
@ -427,8 +427,8 @@ impl Eval for ast::Raw {
type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
let text = self.text().clone();
let lang = self.lang().cloned();
let text = self.text();
let lang = self.lang().map(Into::into);
let block = self.block();
Ok((vm.items.raw)(text, lang, block))
}
@ -446,7 +446,7 @@ impl Eval for ast::Label {
type Output = Value;
fn eval(&self, _: &mut Vm) -> SourceResult<Self::Output> {
Ok(Value::Label(Label(self.get().clone())))
Ok(Value::Label(Label(self.get().into())))
}
}
@ -454,7 +454,7 @@ impl Eval for ast::Ref {
type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
Ok((vm.items.ref_)(self.get().clone()))
Ok((vm.items.ref_)(self.get().into()))
}
}
@ -542,7 +542,7 @@ impl Eval for ast::AlignPoint {
type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
Ok((vm.items.math_align_point)(self.count()))
Ok((vm.items.math_align_point)())
}
}
@ -563,7 +563,7 @@ impl ast::Ident {
if self.as_untyped().len() == self.len()
&& matches!(vm.scopes.get(&self), Ok(Value::Func(_)) | Err(_))
{
Ok((vm.items.symbol)(self.get().clone() + ":op".into()))
Ok((vm.items.symbol)(EcoString::from(self.get()) + ":op".into()))
} else {
Ok(self.eval(vm)?.display_in_math())
}
@ -616,11 +616,11 @@ impl Eval for ast::Numeric {
fn eval(&self, _: &mut Vm) -> SourceResult<Self::Output> {
let (v, unit) = self.get();
Ok(match unit {
Unit::Length(unit) => Abs::with_unit(v, unit).into(),
Unit::Angle(unit) => Angle::with_unit(v, unit).into(),
Unit::Em => Em::new(v).into(),
Unit::Fr => Fr::new(v).into(),
Unit::Percent => Ratio::new(v / 100.0).into(),
ast::Unit::Length(unit) => Abs::with_unit(v, unit).into(),
ast::Unit::Angle(unit) => Angle::with_unit(v, unit).into(),
ast::Unit::Em => Em::new(v).into(),
ast::Unit::Fr => Fr::new(v).into(),
ast::Unit::Percent => Ratio::new(v / 100.0).into(),
})
}
}
@ -743,7 +743,7 @@ impl Eval for ast::Dict {
map.insert(named.name().take().into(), named.expr().eval(vm)?);
}
ast::DictItem::Keyed(keyed) => {
map.insert(keyed.key().into(), keyed.expr().eval(vm)?);
map.insert(keyed.key().get().into(), keyed.expr().eval(vm)?);
}
ast::DictItem::Spread(expr) => match expr.eval(vm)? {
Value::None => {}

View File

@ -74,8 +74,8 @@ pub struct LangItems {
fn(base: Content, sub: Option<Content>, sup: Option<Content>) -> Content,
/// A fraction in a formula: `x/2`.
pub math_frac: fn(num: Content, denom: Content) -> Content,
/// An alignment point in a formula: `&`, `&&`.
pub math_align_point: fn(count: NonZeroUsize) -> Content,
/// An alignment point in a formula: `&`.
pub math_align_point: fn() -> Content,
}
impl Debug for LangItems {

View File

@ -5,7 +5,12 @@
use std::num::NonZeroUsize;
use std::ops::Deref;
use super::{RawFields, Span, SyntaxKind, SyntaxNode, Unit};
use unscanny::Scanner;
use super::{
is_id_continue, is_id_start, is_newline, split_newlines, Span, SyntaxKind, SyntaxNode,
};
use crate::geom::{AbsUnit, AngleUnit};
use crate::util::EcoString;
/// A typed AST node.
@ -117,7 +122,7 @@ pub enum Expr {
Script(Script),
/// A fraction in a math formula: `x/2`.
Frac(Frac),
/// An alignment point in a math formula: `&`, `&&`.
/// An alignment point in a math formula: `&`.
AlignPoint(AlignPoint),
/// An identifier: `left`.
Ident(Ident),
@ -194,34 +199,34 @@ impl AstNode for Expr {
fn from_untyped(node: &SyntaxNode) -> Option<Self> {
match node.kind() {
SyntaxKind::Linebreak => node.cast().map(Self::Linebreak),
SyntaxKind::Text(_) => node.cast().map(Self::Text),
SyntaxKind::Escape(_) => node.cast().map(Self::Escape),
SyntaxKind::Shorthand(_) => node.cast().map(Self::Shorthand),
SyntaxKind::Symbol(_) => node.cast().map(Self::Symbol),
SyntaxKind::Text => node.cast().map(Self::Text),
SyntaxKind::Escape => node.cast().map(Self::Escape),
SyntaxKind::Shorthand => node.cast().map(Self::Shorthand),
SyntaxKind::Symbol => node.cast().map(Self::Symbol),
SyntaxKind::SmartQuote { .. } => node.cast().map(Self::SmartQuote),
SyntaxKind::Strong => node.cast().map(Self::Strong),
SyntaxKind::Emph => node.cast().map(Self::Emph),
SyntaxKind::Raw(_) => node.cast().map(Self::Raw),
SyntaxKind::Link(_) => node.cast().map(Self::Link),
SyntaxKind::Label(_) => node.cast().map(Self::Label),
SyntaxKind::Ref(_) => node.cast().map(Self::Ref),
SyntaxKind::Raw { .. } => node.cast().map(Self::Raw),
SyntaxKind::Link => node.cast().map(Self::Link),
SyntaxKind::Label => node.cast().map(Self::Label),
SyntaxKind::Ref => node.cast().map(Self::Ref),
SyntaxKind::Heading => node.cast().map(Self::Heading),
SyntaxKind::ListItem => node.cast().map(Self::List),
SyntaxKind::EnumItem => node.cast().map(Self::Enum),
SyntaxKind::TermItem => node.cast().map(Self::Term),
SyntaxKind::Math => node.cast().map(Self::Math),
SyntaxKind::Atom(_) => node.cast().map(Self::Atom),
SyntaxKind::Atom => node.cast().map(Self::Atom),
SyntaxKind::Script => node.cast().map(Self::Script),
SyntaxKind::Frac => node.cast().map(Self::Frac),
SyntaxKind::AlignPoint => node.cast().map(Self::AlignPoint),
SyntaxKind::Ident(_) => node.cast().map(Self::Ident),
SyntaxKind::Ident => node.cast().map(Self::Ident),
SyntaxKind::None => node.cast().map(Self::None),
SyntaxKind::Auto => node.cast().map(Self::Auto),
SyntaxKind::Bool(_) => node.cast().map(Self::Bool),
SyntaxKind::Int(_) => node.cast().map(Self::Int),
SyntaxKind::Float(_) => node.cast().map(Self::Float),
SyntaxKind::Numeric(_, _) => node.cast().map(Self::Numeric),
SyntaxKind::Str(_) => node.cast().map(Self::Str),
SyntaxKind::Bool => node.cast().map(Self::Bool),
SyntaxKind::Int => node.cast().map(Self::Int),
SyntaxKind::Float => node.cast().map(Self::Float),
SyntaxKind::Numeric => node.cast().map(Self::Numeric),
SyntaxKind::Str => node.cast().map(Self::Str),
SyntaxKind::CodeBlock => node.cast().map(Self::Code),
SyntaxKind::ContentBlock => node.cast().map(Self::Content),
SyntaxKind::Parenthesized => node.cast().map(Self::Parenthesized),
@ -315,7 +320,7 @@ impl Space {
/// Get the number of newlines.
pub fn newlines(&self) -> usize {
match self.0.kind() {
&SyntaxKind::Space { newlines } => newlines,
SyntaxKind::Space { newlines } => newlines,
_ => panic!("space is of wrong kind"),
}
}
@ -334,10 +339,7 @@ node! {
impl Text {
/// Get the text.
pub fn get(&self) -> &EcoString {
match self.0.kind() {
SyntaxKind::Text(v) => v,
_ => panic!("text is of wrong kind"),
}
self.0.text()
}
}
@ -349,15 +351,22 @@ node! {
impl Escape {
/// Get the escaped character.
pub fn get(&self) -> char {
match self.0.kind() {
&SyntaxKind::Escape(v) => v,
_ => panic!("escape is of wrong kind"),
let mut s = Scanner::new(self.0.text());
s.expect('\\');
if s.eat_if("u{") {
let hex = s.eat_while(char::is_ascii_hexdigit);
u32::from_str_radix(hex, 16)
.ok()
.and_then(std::char::from_u32)
.expect("unicode escape is invalid")
} else {
s.eat().expect("escape is missing escaped character")
}
}
}
node! {
/// A shorthand for a unicode codepoint. For example, `~` for non-breaking
/// A shorthand for a unicode codepoint. For example, `~` for a non-breaking
/// space or `-?` for a soft hyphen.
Shorthand
}
@ -365,9 +374,26 @@ node! {
impl Shorthand {
/// Get the shorthanded character.
pub fn get(&self) -> char {
match self.0.kind() {
&SyntaxKind::Shorthand(v) => v,
_ => panic!("shorthand is of wrong kind"),
match self.0.text().as_str() {
"~" => '\u{00A0}',
"..." => '\u{2026}',
"--" => '\u{2013}',
"---" => '\u{2014}',
"-?" => '\u{00AD}',
"!=" => '≠',
"<=" => '≤',
">=" => '≥',
"<-" => '←',
"->" => '→',
"=>" => '⇒',
":=" => '≔',
"[|" => '⟦',
"|]" => '⟧',
"||" => '‖',
"|->" => '↦',
"<->" => '↔',
"<=>" => '⇔',
_ => panic!("shorthand is invalid"),
}
}
}
@ -379,11 +405,8 @@ node! {
impl Symbol {
/// Get the symbol's notation.
pub fn get(&self) -> &EcoString {
match self.0.kind() {
SyntaxKind::Symbol(v) => v,
_ => panic!("symbol is of wrong kind"),
}
pub fn get(&self) -> &str {
self.0.text().trim_matches(':')
}
}
@ -395,10 +418,7 @@ node! {
impl SmartQuote {
/// Whether this is a double quote.
pub fn double(&self) -> bool {
match self.0.kind() {
&SyntaxKind::SmartQuote { double } => double,
_ => panic!("smart quote is of wrong kind"),
}
self.0.text() == "\""
}
}
@ -410,7 +430,7 @@ node! {
impl Strong {
/// The contents of the strong node.
pub fn body(&self) -> Markup {
self.0.cast_first_child().expect("strong node is missing markup body")
self.0.cast_first_match().expect("strong emphasis is missing body")
}
}
@ -422,9 +442,7 @@ node! {
impl Emph {
/// The contents of the emphasis node.
pub fn body(&self) -> Markup {
self.0
.cast_first_child()
.expect("emphasis node is missing markup body")
self.0.cast_first_match().expect("emphasis is missing body")
}
}
@ -434,27 +452,75 @@ node! {
}
impl Raw {
/// The raw text.
pub fn text(&self) -> &EcoString {
&self.get().text
/// The trimmed raw text.
pub fn text(&self) -> EcoString {
let SyntaxKind::Raw { column } = self.0.kind() else {
panic!("raw node is of wrong kind");
};
let mut text = self.0.text().as_str();
let blocky = text.starts_with("```");
text = text.trim_matches('`');
// Trim tag, one space at the start, and one space at the end if the
// last non-whitespace char is a backtick.
if blocky {
let mut s = Scanner::new(text);
if s.eat_if(is_id_start) {
s.eat_while(is_id_continue);
}
text = s.after();
text = text.strip_prefix(' ').unwrap_or(text);
if text.trim_end().ends_with('`') {
text = text.strip_suffix(' ').unwrap_or(text);
}
}
// Split into lines.
let mut lines = split_newlines(text);
if blocky {
// Dedent based on column, but not for the first line.
for line in lines.iter_mut().skip(1) {
let offset = line
.chars()
.take(column)
.take_while(|c| c.is_whitespace())
.map(char::len_utf8)
.sum();
*line = &line[offset..];
}
let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
// Trims a sequence of whitespace followed by a newline at the start.
if lines.first().map_or(false, is_whitespace) {
lines.remove(0);
}
// Trims a newline followed by a sequence of whitespace at the end.
if lines.last().map_or(false, is_whitespace) {
lines.pop();
}
}
lines.join("\n").into()
}
/// An optional identifier specifying the language to syntax-highlight in.
pub fn lang(&self) -> Option<&EcoString> {
self.get().lang.as_ref()
pub fn lang(&self) -> Option<&str> {
let inner = self.0.text().trim_start_matches('`');
let mut s = Scanner::new(inner);
s.eat_if(is_id_start).then(|| {
s.eat_while(is_id_continue);
s.before()
})
}
/// Whether the raw text should be displayed in a separate block.
pub fn block(&self) -> bool {
self.get().block
}
/// The raw fields.
fn get(&self) -> &RawFields {
match self.0.kind() {
SyntaxKind::Raw(v) => v.as_ref(),
_ => panic!("raw is of wrong kind"),
}
let text = self.0.text();
text.starts_with("```") && text.chars().any(is_newline)
}
}
@ -466,10 +532,7 @@ node! {
impl Link {
/// Get the URL.
pub fn url(&self) -> &EcoString {
match self.0.kind() {
SyntaxKind::Link(url) => url,
_ => panic!("link is of wrong kind"),
}
self.0.text()
}
}
@ -480,11 +543,8 @@ node! {
impl Label {
/// Get the label's text.
pub fn get(&self) -> &EcoString {
match self.0.kind() {
SyntaxKind::Label(v) => v,
_ => panic!("label is of wrong kind"),
}
pub fn get(&self) -> &str {
self.0.text().trim_start_matches('<').trim_end_matches('>')
}
}
@ -495,11 +555,8 @@ node! {
impl Ref {
/// Get the target.
pub fn get(&self) -> &EcoString {
match self.0.kind() {
SyntaxKind::Ref(v) => v,
_ => panic!("reference is of wrong kind"),
}
pub fn get(&self) -> &str {
self.0.text().trim_start_matches('@')
}
}
@ -511,14 +568,14 @@ node! {
impl Heading {
/// The contents of the heading.
pub fn body(&self) -> Markup {
self.0.cast_first_child().expect("heading is missing markup body")
self.0.cast_first_match().expect("heading is missing markup body")
}
/// The section depth (numer of equals signs).
pub fn level(&self) -> NonZeroUsize {
self.0
.children()
.filter(|n| n.kind() == &SyntaxKind::Eq)
.filter(|n| n.kind() == SyntaxKind::Eq)
.count()
.try_into()
.expect("heading is missing equals sign")
@ -533,7 +590,7 @@ node! {
impl ListItem {
/// The contents of the list item.
pub fn body(&self) -> Markup {
self.0.cast_first_child().expect("list item is missing body")
self.0.cast_first_match().expect("list item is missing body")
}
}
@ -546,14 +603,14 @@ impl EnumItem {
/// The explicit numbering, if any: `23.`.
pub fn number(&self) -> Option<NonZeroUsize> {
self.0.children().find_map(|node| match node.kind() {
SyntaxKind::EnumNumbering(num) => Some(*num),
SyntaxKind::EnumNumbering => node.text().trim_end_matches('.').parse().ok(),
_ => Option::None,
})
}
/// The contents of the list item.
pub fn body(&self) -> Markup {
self.0.cast_first_child().expect("enum item is missing body")
self.0.cast_first_match().expect("enum item is missing body")
}
}
@ -565,13 +622,13 @@ node! {
impl TermItem {
/// The term described by the item.
pub fn term(&self) -> Markup {
self.0.cast_first_child().expect("term list item is missing term")
self.0.cast_first_match().expect("term list item is missing term")
}
/// The description of the term.
pub fn description(&self) -> Markup {
self.0
.cast_last_child()
.cast_last_match()
.expect("term list item is missing description")
}
}
@ -602,10 +659,7 @@ node! {
impl Atom {
/// Get the atom's text.
pub fn get(&self) -> &EcoString {
match self.0.kind() {
SyntaxKind::Atom(v) => v,
_ => panic!("atom is of wrong kind"),
}
self.0.text()
}
}
@ -617,7 +671,7 @@ node! {
impl Script {
/// The base of the script.
pub fn base(&self) -> Expr {
self.0.cast_first_child().expect("script node is missing base")
self.0.cast_first_match().expect("script node is missing base")
}
/// The subscript.
@ -647,32 +701,20 @@ node! {
impl Frac {
/// The numerator.
pub fn num(&self) -> Expr {
self.0.cast_first_child().expect("fraction is missing numerator")
self.0.cast_first_match().expect("fraction is missing numerator")
}
/// The denominator.
pub fn denom(&self) -> Expr {
self.0.cast_last_child().expect("fraction is missing denominator")
self.0.cast_last_match().expect("fraction is missing denominator")
}
}
node! {
/// An alignment point in a formula: `&`, `&&`.
/// An alignment point in a formula: `&`.
AlignPoint
}
impl AlignPoint {
/// The number of ampersands.
pub fn count(&self) -> NonZeroUsize {
self.0
.children()
.filter(|n| n.kind() == &SyntaxKind::Amp)
.count()
.try_into()
.expect("alignment point is missing ampersand sign")
}
}
node! {
/// An identifier: `it`.
Ident
@ -680,18 +722,16 @@ node! {
impl Ident {
/// Get the identifier.
pub fn get(&self) -> &EcoString {
match self.0.kind() {
SyntaxKind::Ident(id) => id,
_ => panic!("identifier is of wrong kind"),
}
pub fn get(&self) -> &str {
self.0.text().trim_start_matches('#')
}
/// Take out the container identifier.
pub fn take(self) -> EcoString {
match self.0.take() {
SyntaxKind::Ident(id) => id,
_ => panic!("identifier is of wrong kind"),
let text = self.0.into_text();
match text.strip_prefix('#') {
Some(text) => text.into(),
Option::None => text,
}
}
@ -727,10 +767,7 @@ node! {
impl Bool {
/// Get the value.
pub fn get(&self) -> bool {
match self.0.kind() {
SyntaxKind::Bool(v) => *v,
_ => panic!("boolean is of wrong kind"),
}
self.0.text() == "true"
}
}
@ -742,10 +779,7 @@ node! {
impl Int {
/// Get the value.
pub fn get(&self) -> i64 {
match self.0.kind() {
SyntaxKind::Int(v) => *v,
_ => panic!("integer is of wrong kind"),
}
self.0.text().parse().expect("integer is invalid")
}
}
@ -757,10 +791,7 @@ node! {
impl Float {
/// Get the value.
pub fn get(&self) -> f64 {
match self.0.kind() {
SyntaxKind::Float(v) => *v,
_ => panic!("float is of wrong kind"),
}
self.0.text().parse().expect("float is invalid")
}
}
@ -772,13 +803,47 @@ node! {
impl Numeric {
/// Get the value and unit.
pub fn get(&self) -> (f64, Unit) {
match self.0.kind() {
SyntaxKind::Numeric(v, unit) => (*v, *unit),
_ => panic!("numeric is of wrong kind"),
}
let text = self.0.text();
let count = text
.chars()
.rev()
.take_while(|c| matches!(c, 'a'..='z' | '%'))
.count();
let split = text.len() - count;
let value = text[..split].parse().expect("number is invalid");
let unit = match &text[split..] {
"pt" => Unit::Length(AbsUnit::Pt),
"mm" => Unit::Length(AbsUnit::Mm),
"cm" => Unit::Length(AbsUnit::Cm),
"in" => Unit::Length(AbsUnit::In),
"deg" => Unit::Angle(AngleUnit::Deg),
"rad" => Unit::Angle(AngleUnit::Rad),
"em" => Unit::Em,
"fr" => Unit::Fr,
"%" => Unit::Percent,
_ => panic!("number has invalid suffix"),
};
(value, unit)
}
}
/// Unit of a numeric value.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum Unit {
/// An absolute length unit.
Length(AbsUnit),
/// An angular unit.
Angle(AngleUnit),
/// Font-relative: `1em` is the same as the font size.
Em,
/// Fractions: `fr`.
Fr,
/// Percentage: `%`.
Percent,
}
node! {
/// A quoted string: `"..."`.
Str
@ -786,11 +851,46 @@ node! {
impl Str {
/// Get the value.
pub fn get(&self) -> &EcoString {
match self.0.kind() {
SyntaxKind::Str(v) => v,
_ => panic!("string is of wrong kind"),
pub fn get(&self) -> EcoString {
let text = self.0.text();
let unquoted = &text[1..text.len() - 1];
if !unquoted.contains('\\') {
return unquoted.into();
}
let mut out = EcoString::with_capacity(unquoted.len());
let mut s = Scanner::new(unquoted);
while let Some(c) = s.eat() {
if c != '\\' {
out.push(c);
continue;
}
let start = s.locate(-1);
match s.eat() {
Some('\\') => out.push('\\'),
Some('"') => out.push('"'),
Some('n') => out.push('\n'),
Some('r') => out.push('\r'),
Some('t') => out.push('\t'),
Some('u') if s.eat_if('{') => {
let sequence = s.eat_while(char::is_ascii_hexdigit);
s.eat_if('}');
match u32::from_str_radix(sequence, 16)
.ok()
.and_then(std::char::from_u32)
{
Some(c) => out.push(c),
Option::None => out.push_str(s.from(start)),
}
}
_ => out.push_str(s.from(start)),
}
}
out
}
}
@ -814,7 +914,7 @@ node! {
impl ContentBlock {
/// The contained markup.
pub fn body(&self) -> Markup {
self.0.cast_first_child().expect("content block is missing body")
self.0.cast_first_match().expect("content block is missing body")
}
}
@ -827,7 +927,7 @@ impl Parenthesized {
/// The wrapped expression.
pub fn expr(&self) -> Expr {
self.0
.cast_first_child()
.cast_first_match()
.expect("parenthesized expression is missing expression")
}
}
@ -856,7 +956,7 @@ pub enum ArrayItem {
impl AstNode for ArrayItem {
fn from_untyped(node: &SyntaxNode) -> Option<Self> {
match node.kind() {
SyntaxKind::Spread => node.cast_first_child().map(Self::Spread),
SyntaxKind::Spread => node.cast_first_match().map(Self::Spread),
_ => node.cast().map(Self::Pos),
}
}
@ -897,7 +997,7 @@ impl AstNode for DictItem {
match node.kind() {
SyntaxKind::Named => node.cast().map(Self::Named),
SyntaxKind::Keyed => node.cast().map(Self::Keyed),
SyntaxKind::Spread => node.cast_first_child().map(Self::Spread),
SyntaxKind::Spread => node.cast_first_match().map(Self::Spread),
_ => Option::None,
}
}
@ -919,12 +1019,12 @@ node! {
impl Named {
/// The name: `thickness`.
pub fn name(&self) -> Ident {
self.0.cast_first_child().expect("named pair is missing name")
self.0.cast_first_match().expect("named pair is missing name")
}
/// The right-hand side of the pair: `3pt`.
pub fn expr(&self) -> Expr {
self.0.cast_last_child().expect("named pair is missing expression")
self.0.cast_last_match().expect("named pair is missing expression")
}
}
@ -935,19 +1035,16 @@ node! {
impl Keyed {
/// The key: `"spacy key"`.
pub fn key(&self) -> EcoString {
pub fn key(&self) -> Str {
self.0
.children()
.find_map(|node| match node.kind() {
SyntaxKind::Str(key) => Some(key.clone()),
_ => Option::None,
})
.find_map(|node| node.cast::<Str>())
.expect("keyed pair is missing key")
}
/// The right-hand side of the pair: `true`.
pub fn expr(&self) -> Expr {
self.0.cast_last_child().expect("keyed pair is missing expression")
self.0.cast_last_match().expect("keyed pair is missing expression")
}
}
@ -967,7 +1064,7 @@ impl Unary {
/// The expression to operate on: `x`.
pub fn expr(&self) -> Expr {
self.0.cast_last_child().expect("unary operation is missing child")
self.0.cast_last_match().expect("unary operation is missing child")
}
}
@ -984,7 +1081,7 @@ pub enum UnOp {
impl UnOp {
/// Try to convert the token into a unary operation.
pub fn from_token(token: &SyntaxKind) -> Option<Self> {
pub fn from_token(token: SyntaxKind) -> Option<Self> {
Some(match token {
SyntaxKind::Plus => Self::Pos,
SyntaxKind::Minus => Self::Neg,
@ -1036,14 +1133,14 @@ impl Binary {
/// The left-hand side of the operation: `a`.
pub fn lhs(&self) -> Expr {
self.0
.cast_first_child()
.cast_first_match()
.expect("binary operation is missing left-hand side")
}
/// The right-hand side of the operation: `b`.
pub fn rhs(&self) -> Expr {
self.0
.cast_last_child()
.cast_last_match()
.expect("binary operation is missing right-hand side")
}
}
@ -1093,7 +1190,7 @@ pub enum BinOp {
impl BinOp {
/// Try to convert the token into a binary operation.
pub fn from_token(token: &SyntaxKind) -> Option<Self> {
pub fn from_token(token: SyntaxKind) -> Option<Self> {
Some(match token {
SyntaxKind::Plus => Self::Add,
SyntaxKind::Minus => Self::Sub,
@ -1210,12 +1307,12 @@ node! {
impl FieldAccess {
/// The expression to access the field on.
pub fn target(&self) -> Expr {
self.0.cast_first_child().expect("field access is missing object")
self.0.cast_first_match().expect("field access is missing object")
}
/// The name of the field.
pub fn field(&self) -> Ident {
self.0.cast_last_child().expect("field access is missing name")
self.0.cast_last_match().expect("field access is missing name")
}
}
@ -1227,13 +1324,13 @@ node! {
impl FuncCall {
/// The function to call.
pub fn callee(&self) -> Expr {
self.0.cast_first_child().expect("function call is missing callee")
self.0.cast_first_match().expect("function call is missing callee")
}
/// The arguments to the function.
pub fn args(&self) -> Args {
self.0
.cast_last_child()
.cast_last_match()
.expect("function call is missing argument list")
}
}
@ -1246,18 +1343,18 @@ node! {
impl MethodCall {
/// The expression to call the method on.
pub fn target(&self) -> Expr {
self.0.cast_first_child().expect("method call is missing target")
self.0.cast_first_match().expect("method call is missing target")
}
/// The name of the method.
pub fn method(&self) -> Ident {
self.0.cast_last_child().expect("method call is missing name")
self.0.cast_last_match().expect("method call is missing name")
}
/// The arguments to the method.
pub fn args(&self) -> Args {
self.0
.cast_last_child()
.cast_last_match()
.expect("method call is missing argument list")
}
}
@ -1289,7 +1386,7 @@ impl AstNode for Arg {
fn from_untyped(node: &SyntaxNode) -> Option<Self> {
match node.kind() {
SyntaxKind::Named => node.cast().map(Self::Named),
SyntaxKind::Spread => node.cast_first_child().map(Self::Spread),
SyntaxKind::Spread => node.cast_first_match().map(Self::Spread),
_ => node.cast().map(Self::Pos),
}
}
@ -1320,7 +1417,7 @@ impl Closure {
pub fn params(&self) -> impl DoubleEndedIterator<Item = Param> + '_ {
self.0
.children()
.find(|x| x.kind() == &SyntaxKind::Params)
.find(|x| x.kind() == SyntaxKind::Params)
.expect("closure is missing parameter list")
.children()
.filter_map(SyntaxNode::cast)
@ -1328,7 +1425,7 @@ impl Closure {
/// The body of the closure.
pub fn body(&self) -> Expr {
self.0.cast_last_child().expect("closure is missing body")
self.0.cast_last_match().expect("closure is missing body")
}
}
@ -1346,9 +1443,9 @@ pub enum Param {
impl AstNode for Param {
fn from_untyped(node: &SyntaxNode) -> Option<Self> {
match node.kind() {
SyntaxKind::Ident(_) => node.cast().map(Self::Pos),
SyntaxKind::Ident => node.cast().map(Self::Pos),
SyntaxKind::Named => node.cast().map(Self::Named),
SyntaxKind::Spread => node.cast_first_child().map(Self::Sink),
SyntaxKind::Spread => node.cast_first_match().map(Self::Sink),
_ => Option::None,
}
}
@ -1370,7 +1467,7 @@ node! {
impl LetBinding {
/// The binding to assign to.
pub fn binding(&self) -> Ident {
match self.0.cast_first_child() {
match self.0.cast_first_match() {
Some(Expr::Ident(binding)) => binding,
Some(Expr::Closure(closure)) => {
closure.name().expect("let-bound closure is missing name")
@ -1381,12 +1478,12 @@ impl LetBinding {
/// The expression the binding is initialized with.
pub fn init(&self) -> Option<Expr> {
if self.0.cast_first_child::<Ident>().is_some() {
if self.0.cast_first_match::<Ident>().is_some() {
// This is a normal binding like `let x = 1`.
self.0.children().filter_map(SyntaxNode::cast).nth(1)
} else {
// This is a closure binding like `let f(x) = 1`.
self.0.cast_first_child()
self.0.cast_first_match()
}
}
}
@ -1399,19 +1496,19 @@ node! {
impl SetRule {
/// The function to set style properties for.
pub fn target(&self) -> Ident {
self.0.cast_first_child().expect("set rule is missing target")
self.0.cast_first_match().expect("set rule is missing target")
}
/// The style properties to set.
pub fn args(&self) -> Args {
self.0.cast_last_child().expect("set rule is missing argument list")
self.0.cast_last_match().expect("set rule is missing argument list")
}
/// A condition under which the set rule applies.
pub fn condition(&self) -> Option<Expr> {
self.0
.children()
.skip_while(|child| child.kind() != &SyntaxKind::If)
.skip_while(|child| child.kind() != SyntaxKind::If)
.find_map(SyntaxNode::cast)
}
}
@ -1427,13 +1524,13 @@ impl ShowRule {
self.0
.children()
.rev()
.skip_while(|child| child.kind() != &SyntaxKind::Colon)
.skip_while(|child| child.kind() != SyntaxKind::Colon)
.find_map(SyntaxNode::cast)
}
/// The transformation recipe.
pub fn transform(&self) -> Expr {
self.0.cast_last_child().expect("show rule is missing transform")
self.0.cast_last_match().expect("show rule is missing transform")
}
}
@ -1445,7 +1542,7 @@ node! {
impl Conditional {
/// The condition which selects the body to evaluate.
pub fn condition(&self) -> Expr {
self.0.cast_first_child().expect("conditional is missing condition")
self.0.cast_first_match().expect("conditional is missing condition")
}
/// The expression to evaluate if the condition is true.
@ -1471,12 +1568,12 @@ node! {
impl WhileLoop {
/// The condition which selects whether to evaluate the body.
pub fn condition(&self) -> Expr {
self.0.cast_first_child().expect("while loop is missing condition")
self.0.cast_first_match().expect("while loop is missing condition")
}
/// The expression to evaluate while the condition is true.
pub fn body(&self) -> Expr {
self.0.cast_last_child().expect("while loop is missing body")
self.0.cast_last_match().expect("while loop is missing body")
}
}
@ -1488,17 +1585,17 @@ node! {
impl ForLoop {
/// The pattern to assign to.
pub fn pattern(&self) -> ForPattern {
self.0.cast_first_child().expect("for loop is missing pattern")
self.0.cast_first_match().expect("for loop is missing pattern")
}
/// The expression to iterate over.
pub fn iter(&self) -> Expr {
self.0.cast_first_child().expect("for loop is missing iterable")
self.0.cast_first_match().expect("for loop is missing iterable")
}
/// The expression to evaluate for each iteration.
pub fn body(&self) -> Expr {
self.0.cast_last_child().expect("for loop is missing body")
self.0.cast_last_match().expect("for loop is missing body")
}
}
@ -1521,7 +1618,7 @@ impl ForPattern {
/// The value part of the pattern.
pub fn value(&self) -> Ident {
self.0.cast_last_child().expect("for loop pattern is missing value")
self.0.cast_last_match().expect("for loop pattern is missing value")
}
}
@ -1533,7 +1630,7 @@ node! {
impl ModuleImport {
/// The module or path from which the items should be imported.
pub fn source(&self) -> Expr {
self.0.cast_last_child().expect("module import is missing source")
self.0.cast_last_match().expect("module import is missing source")
}
/// The items to be imported.
@ -1566,7 +1663,7 @@ node! {
impl ModuleInclude {
/// The module or path from which the content should be included.
pub fn source(&self) -> Expr {
self.0.cast_last_child().expect("module include is missing path")
self.0.cast_last_match().expect("module include is missing path")
}
}
@ -1588,6 +1685,6 @@ node! {
impl FuncReturn {
/// The expression to return.
pub fn body(&self) -> Option<Expr> {
self.0.cast_last_child()
self.0.cast_last_match()
}
}

View File

@ -1,14 +1,7 @@
use std::hash::{Hash, Hasher};
use std::num::NonZeroUsize;
use std::sync::Arc;
use crate::geom::{AbsUnit, AngleUnit};
use crate::util::EcoString;
/// All syntactical building blocks that can be part of a Typst document.
///
/// Can be created by the lexer or by the parser.
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum SyntaxKind {
/// A line comment: `// ...`.
LineComment,
@ -58,8 +51,6 @@ pub enum SyntaxKind {
Slash,
/// The superscript operator in a formula: `^`.
Hat,
/// The alignment operator in a formula: `&`.
Amp,
/// The field access and method call operator: `.`.
Dot,
/// The assignment operator: `=`.
@ -135,31 +126,31 @@ pub enum SyntaxKind {
/// so it is zero except inside indent-aware constructs like lists.
Markup { min_indent: usize },
/// Plain text without markup.
Text(EcoString),
Text,
/// A forced line break: `\`.
Linebreak,
/// An escape sequence: `\#`, `\u{1F5FA}`.
Escape(char),
Escape,
/// A shorthand for a unicode codepoint. For example, `~` for non-breaking
/// space or `-?` for a soft hyphen.
Shorthand(char),
Shorthand,
/// Symbol notation: `:arrow:l:`. The string only contains the inner part
/// without leading and trailing dot.
Symbol(EcoString),
Symbol,
/// A smart quote: `'` or `"`.
SmartQuote { double: bool },
SmartQuote,
/// Strong content: `*Strong*`.
Strong,
/// Emphasized content: `_Emphasized_`.
Emph,
/// Raw text with optional syntax highlighting: `` `...` ``.
Raw(Arc<RawFields>),
Raw { column: usize },
/// A hyperlink: `https://typst.org`.
Link(EcoString),
Link,
/// A label: `<intro>`.
Label(EcoString),
Label,
/// A reference: `@target`.
Ref(EcoString),
Ref,
/// A section heading: `= Introduction`.
Heading,
/// An item in a bullet list: `- ...`.
@ -167,32 +158,32 @@ pub enum SyntaxKind {
/// An item in an enumeration (numbered list): `+ ...` or `1. ...`.
EnumItem,
/// An explicit enumeration numbering: `23.`.
EnumNumbering(NonZeroUsize),
EnumNumbering,
/// An item in a term list: `/ Term: Details`.
TermItem,
/// A mathematical formula: `$x$`, `$ x^2 $`.
Math,
/// An atom in a formula: `x`, `+`, `12`.
Atom(EcoString),
Atom,
/// A base with optional sub- and superscripts in a formula: `a_1^2`.
Script,
/// A fraction in a formula: `x/2`.
Frac,
/// An alignment point in a formula: `&`, `&&`.
/// An alignment point in a formula: `&`.
AlignPoint,
/// An identifier: `it`.
Ident(EcoString),
Ident,
/// A boolean: `true`, `false`.
Bool(bool),
Bool,
/// An integer: `120`.
Int(i64),
Int,
/// A floating-point number: `1.2`, `10e-4`.
Float(f64),
Float,
/// A numeric value with a unit: `12pt`, `3cm`, `2em`, `90deg`, `50%`.
Numeric(f64, Unit),
Numeric,
/// A quoted string: `"..."`.
Str(EcoString),
Str,
/// A code block: `{ let x = 1; x + 2 }`.
CodeBlock,
/// A content block: `[*Hi* there!]`.
@ -253,73 +244,37 @@ pub enum SyntaxKind {
FuncReturn,
/// An invalid sequence of characters.
Error(ErrorPos, EcoString),
}
/// Fields of the raw syntax kind.
#[derive(Debug, Clone, PartialEq, Hash)]
pub struct RawFields {
/// An optional identifier specifying the language to syntax-highlight in.
pub lang: Option<EcoString>,
/// The raw text, determined as the raw string between the backticks trimmed
/// according to the above rules.
pub text: EcoString,
/// Whether the element is block-level, that is, it has 3+ backticks
/// and contains at least one newline.
pub block: bool,
}
/// Unit of a numeric value.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum Unit {
/// An absolute length unit.
Length(AbsUnit),
/// An angular unit.
Angle(AngleUnit),
/// Font-relative: `1em` is the same as the font size.
Em,
/// Fractions: `fr`.
Fr,
/// Percentage: `%`.
Percent,
}
/// Where in a node an error should be annotated,
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum ErrorPos {
/// Over the full width of the node.
Full,
/// At the start of the node.
Start,
/// At the end of the node.
End,
Error,
}
impl SyntaxKind {
/// Whether this is trivia.
pub fn is_trivia(&self) -> bool {
self.is_space()
|| self.is_error()
|| matches!(self, Self::LineComment | Self::BlockComment)
pub fn is_trivia(self) -> bool {
self.is_space() || self.is_comment() || self.is_error()
}
/// Whether this is a space.
pub fn is_space(&self) -> bool {
pub fn is_space(self) -> bool {
matches!(self, Self::Space { .. })
}
/// Whether this is a left or right parenthesis.
pub fn is_paren(&self) -> bool {
matches!(self, Self::LeftParen | Self::RightParen)
/// Whether this is a comment.
pub fn is_comment(self) -> bool {
matches!(self, Self::LineComment | Self::BlockComment)
}
/// Whether this is an error.
pub fn is_error(&self) -> bool {
matches!(self, SyntaxKind::Error(_, _))
pub fn is_error(self) -> bool {
matches!(self, SyntaxKind::Error)
}
/// Whether this is a left or right parenthesis.
pub fn is_paren(self) -> bool {
matches!(self, Self::LeftParen | Self::RightParen)
}
/// Does this node need termination through a semicolon or linebreak?
pub fn is_stmt(&self) -> bool {
pub fn is_stmt(self) -> bool {
matches!(
self,
SyntaxKind::LetBinding
@ -331,7 +286,7 @@ impl SyntaxKind {
}
/// A human-readable name for the kind.
pub fn name(&self) -> &'static str {
pub fn name(self) -> &'static str {
match self {
Self::LineComment => "line comment",
Self::BlockComment => "block comment",
@ -348,13 +303,11 @@ impl SyntaxKind {
Self::Star => "star",
Self::Underscore => "underscore",
Self::Dollar => "dollar sign",
Self::SmartQuote { double: false } => "single quote",
Self::SmartQuote { double: true } => "double quote",
Self::SmartQuote => "smart quote",
Self::Plus => "plus",
Self::Minus => "minus",
Self::Slash => "slash",
Self::Hat => "hat",
Self::Amp => "ampersand",
Self::Dot => "dot",
Self::Eq => "assignment operator",
Self::EqEq => "equality operator",
@ -389,41 +342,33 @@ impl SyntaxKind {
Self::Include => "keyword `include`",
Self::As => "keyword `as`",
Self::Markup { .. } => "markup",
Self::Text(_) => "text",
Self::Text => "text",
Self::Linebreak => "linebreak",
Self::Escape(_) => "escape sequence",
Self::Shorthand(_) => "shorthand",
Self::Symbol(_) => "symbol notation",
Self::Escape => "escape sequence",
Self::Shorthand => "shorthand",
Self::Symbol => "symbol notation",
Self::Strong => "strong content",
Self::Emph => "emphasized content",
Self::Raw(_) => "raw block",
Self::Link(_) => "link",
Self::Label(_) => "label",
Self::Ref(_) => "reference",
Self::Raw { .. } => "raw block",
Self::Link => "link",
Self::Label => "label",
Self::Ref => "reference",
Self::Heading => "heading",
Self::ListItem => "list item",
Self::EnumItem => "enumeration item",
Self::EnumNumbering(_) => "enumeration item numbering",
Self::EnumNumbering => "enumeration item numbering",
Self::TermItem => "term list item",
Self::Math => "math formula",
Self::Atom(s) => match s.as_str() {
"(" => "opening paren",
")" => "closing paren",
"{" => "opening brace",
"}" => "closing brace",
"[" => "opening bracket",
"]" => "closing bracket",
_ => "math atom",
},
Self::Atom => "math atom",
Self::Script => "script",
Self::Frac => "fraction",
Self::AlignPoint => "alignment point",
Self::Ident(_) => "identifier",
Self::Bool(_) => "boolean",
Self::Int(_) => "integer",
Self::Float(_) => "float",
Self::Numeric(_, _) => "numeric value",
Self::Str(_) => "string",
Self::Ident => "identifier",
Self::Bool => "boolean",
Self::Int => "integer",
Self::Float => "float",
Self::Numeric => "numeric value",
Self::Str => "string",
Self::CodeBlock => "code block",
Self::ContentBlock => "content block",
Self::Parenthesized => "group",
@ -453,127 +398,7 @@ impl SyntaxKind {
Self::LoopBreak => "`break` expression",
Self::LoopContinue => "`continue` expression",
Self::FuncReturn => "`return` expression",
Self::Error(_, _) => "syntax error",
}
}
}
impl Hash for SyntaxKind {
fn hash<H: Hasher>(&self, state: &mut H) {
std::mem::discriminant(self).hash(state);
match self {
Self::LineComment => {}
Self::BlockComment => {}
Self::Space { newlines } => newlines.hash(state),
Self::LeftBrace => {}
Self::RightBrace => {}
Self::LeftBracket => {}
Self::RightBracket => {}
Self::LeftParen => {}
Self::RightParen => {}
Self::Comma => {}
Self::Semicolon => {}
Self::Colon => {}
Self::Star => {}
Self::Underscore => {}
Self::Dollar => {}
Self::Plus => {}
Self::Minus => {}
Self::Slash => {}
Self::Hat => {}
Self::Amp => {}
Self::Dot => {}
Self::Eq => {}
Self::EqEq => {}
Self::ExclEq => {}
Self::Lt => {}
Self::LtEq => {}
Self::Gt => {}
Self::GtEq => {}
Self::PlusEq => {}
Self::HyphEq => {}
Self::StarEq => {}
Self::SlashEq => {}
Self::Dots => {}
Self::Arrow => {}
Self::Not => {}
Self::And => {}
Self::Or => {}
Self::None => {}
Self::Auto => {}
Self::Let => {}
Self::Set => {}
Self::Show => {}
Self::If => {}
Self::Else => {}
Self::For => {}
Self::In => {}
Self::While => {}
Self::Break => {}
Self::Continue => {}
Self::Return => {}
Self::Import => {}
Self::Include => {}
Self::As => {}
Self::Markup { min_indent } => min_indent.hash(state),
Self::Text(s) => s.hash(state),
Self::Linebreak => {}
Self::Escape(c) => c.hash(state),
Self::Shorthand(c) => c.hash(state),
Self::Symbol(s) => s.hash(state),
Self::SmartQuote { double } => double.hash(state),
Self::Strong => {}
Self::Emph => {}
Self::Raw(raw) => raw.hash(state),
Self::Link(link) => link.hash(state),
Self::Label(c) => c.hash(state),
Self::Ref(c) => c.hash(state),
Self::Heading => {}
Self::ListItem => {}
Self::EnumItem => {}
Self::EnumNumbering(num) => num.hash(state),
Self::TermItem => {}
Self::Math => {}
Self::Atom(c) => c.hash(state),
Self::Script => {}
Self::Frac => {}
Self::AlignPoint => {}
Self::Ident(v) => v.hash(state),
Self::Bool(v) => v.hash(state),
Self::Int(v) => v.hash(state),
Self::Float(v) => v.to_bits().hash(state),
Self::Numeric(v, u) => (v.to_bits(), u).hash(state),
Self::Str(v) => v.hash(state),
Self::CodeBlock => {}
Self::ContentBlock => {}
Self::Parenthesized => {}
Self::Array => {}
Self::Dict => {}
Self::Named => {}
Self::Keyed => {}
Self::Unary => {}
Self::Binary => {}
Self::FieldAccess => {}
Self::FuncCall => {}
Self::MethodCall => {}
Self::Args => {}
Self::Spread => {}
Self::Closure => {}
Self::Params => {}
Self::LetBinding => {}
Self::SetRule => {}
Self::ShowRule => {}
Self::Conditional => {}
Self::WhileLoop => {}
Self::ForLoop => {}
Self::ForPattern => {}
Self::ModuleImport => {}
Self::ImportItems => {}
Self::ModuleInclude => {}
Self::LoopBreak => {}
Self::LoopContinue => {}
Self::FuncReturn => {}
Self::Error(pos, msg) => (pos, msg).hash(state),
Self::Error => "syntax error",
}
}
}

View File

@ -1,17 +1,12 @@
use std::num::NonZeroUsize;
use std::sync::Arc;
use unicode_xid::UnicodeXID;
use unscanny::Scanner;
use super::resolve::{resolve_hex, resolve_raw, resolve_string};
use super::{ErrorPos, RawFields, SyntaxKind, Unit};
use crate::geom::{AbsUnit, AngleUnit};
use super::{ErrorPos, SyntaxKind};
use crate::util::{format_eco, EcoString};
/// Splits up a string of source code into tokens.
#[derive(Clone)]
pub struct Lexer<'s> {
pub(super) struct Lexer<'s> {
/// The underlying scanner.
s: Scanner<'s>,
/// The mode the lexer is in. This determines what tokens it recognizes.
@ -20,11 +15,13 @@ pub struct Lexer<'s> {
terminated: bool,
/// Offsets the indentation on the first line of the source.
column_offset: usize,
/// An error for the last token.
error: Option<(EcoString, ErrorPos)>,
}
/// What kind of tokens to emit.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum LexMode {
pub(super) enum LexMode {
/// Text and markup.
Markup,
/// Math atoms, operators, etc.
@ -34,11 +31,6 @@ pub enum LexMode {
}
impl<'s> Lexer<'s> {
/// Create a new lexer with the given mode.
pub fn new(text: &'s str, mode: LexMode) -> Self {
Self::with_prefix("", text, mode)
}
/// Create a new lexer with the given mode and a prefix to offset column
/// calculations.
pub fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self {
@ -47,6 +39,7 @@ impl<'s> Lexer<'s> {
mode,
terminated: true,
column_offset: column(prefix, prefix.len(), 0),
error: None,
}
}
@ -85,6 +78,23 @@ impl<'s> Lexer<'s> {
pub fn column(&self, index: usize) -> usize {
column(self.s.string(), index, self.column_offset)
}
/// Take out the last error.
pub fn last_error(&mut self) -> Option<(EcoString, ErrorPos)> {
self.error.take()
}
/// Construct a full-positioned syntax error.
fn error(&mut self, message: impl Into<EcoString>) -> SyntaxKind {
self.error = Some((message.into(), ErrorPos::Full));
SyntaxKind::Error
}
/// Construct a positioned syntax error.
fn error_at_end(&mut self, message: impl Into<EcoString>) -> SyntaxKind {
self.error = Some((message.into(), ErrorPos::End));
SyntaxKind::Error
}
}
impl Iterator for Lexer<'_> {
@ -92,22 +102,20 @@ impl Iterator for Lexer<'_> {
/// Produce the next token.
fn next(&mut self) -> Option<Self::Item> {
self.error = None;
let start = self.s.cursor();
let c = self.s.eat()?;
Some(match c {
// Trivia.
c if c.is_whitespace() => self.whitespace(c),
'/' if self.s.eat_if('/') => self.line_comment(),
'/' if self.s.eat_if('*') => self.block_comment(),
'*' if self.s.eat_if('/') => SyntaxKind::Error(
ErrorPos::Full,
"unexpected end of block comment".into(),
),
c if c.is_whitespace() => self.whitespace(c),
'*' if self.s.eat_if('/') => self.error("unexpected end of block comment"),
// Other things.
_ => match self.mode {
LexMode::Markup => self.markup(start, c),
LexMode::Math => self.math(start, c),
LexMode::Math => self.math(c),
LexMode::Code => self.code(start, c),
},
})
@ -118,7 +126,7 @@ impl Iterator for Lexer<'_> {
impl Lexer<'_> {
fn line_comment(&mut self) -> SyntaxKind {
self.s.eat_until(is_newline);
if self.s.peek().is_none() {
if self.s.done() {
self.terminated = false;
}
SyntaxKind::LineComment
@ -182,57 +190,64 @@ impl Lexer<'_> {
}
}
/// Markup.
impl Lexer<'_> {
fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
match c {
// Blocks.
'\\' => self.backslash(),
':' if self.s.at(is_id_start) => self.maybe_symbol(),
'`' => self.raw(),
'h' if self.s.eat_if("ttp://") => self.link(),
'h' if self.s.eat_if("ttps://") => self.link(),
'<' if self.s.at(is_id_continue) => self.label(),
'@' if self.s.at(is_id_continue) => self.reference(),
'0'..='9' => self.numbering(start),
'#' if self.s.eat_if('{') => SyntaxKind::LeftBrace,
'#' if self.s.eat_if('[') => SyntaxKind::LeftBracket,
'#' if self.s.at(is_id_start) => {
match keyword(self.s.eat_while(is_id_continue)) {
Some(keyword) => keyword,
None => SyntaxKind::Ident,
}
}
'.' if self.s.eat_if("..") => SyntaxKind::Shorthand,
'-' if self.s.eat_if("--") => SyntaxKind::Shorthand,
'-' if self.s.eat_if('-') => SyntaxKind::Shorthand,
'-' if self.s.eat_if('?') => SyntaxKind::Shorthand,
'*' if !self.in_word() => SyntaxKind::Star,
'_' if !self.in_word() => SyntaxKind::Underscore,
'{' => SyntaxKind::LeftBrace,
'}' => SyntaxKind::RightBrace,
'[' => SyntaxKind::LeftBracket,
']' => SyntaxKind::RightBracket,
// Multi-char things.
'#' => self.hash(start),
'.' if self.s.eat_if("..") => SyntaxKind::Shorthand('\u{2026}'),
'-' => self.hyph(),
':' => self.colon(),
'h' if self.s.eat_if("ttp://") || self.s.eat_if("ttps://") => {
self.link(start)
}
'`' => self.raw(),
c if c.is_ascii_digit() => self.numbering(start),
'<' if self.s.at(is_id_continue) => self.label(),
'@' if self.s.at(is_id_continue) => self.reference(),
// Escape sequences.
'\\' => self.backslash(),
// Single-char things.
'~' => SyntaxKind::Shorthand('\u{00A0}'),
'\'' => SyntaxKind::SmartQuote { double: false },
'"' => SyntaxKind::SmartQuote { double: true },
'*' if !self.in_word() => SyntaxKind::Star,
'_' if !self.in_word() => SyntaxKind::Underscore,
'\'' => SyntaxKind::SmartQuote,
'"' => SyntaxKind::SmartQuote,
'$' => SyntaxKind::Dollar,
'=' => SyntaxKind::Eq,
'+' => SyntaxKind::Plus,
'/' => SyntaxKind::Slash,
'~' => SyntaxKind::Shorthand,
':' => SyntaxKind::Colon,
'-' => SyntaxKind::Minus,
// Plain text.
_ => self.text(start),
_ => self.text(),
}
}
fn text(&mut self, start: usize) -> SyntaxKind {
fn text(&mut self) -> SyntaxKind {
macro_rules! table {
($(|$c:literal)*) => {{
let mut t = [false; 128];
$(t[$c as usize] = true;)*
t
}}
($(|$c:literal)*) => {
static TABLE: [bool; 128] = {
let mut t = [false; 128];
$(t[$c as usize] = true;)*
t
};
};
}
const TABLE: [bool; 128] = table! {
table! {
| ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' | '\\' | '/'
| '[' | ']' | '{' | '}' | '~' | '-' | '.' | '\'' | '"'
| '*' | '_' | ':' | 'h' | '`' | '$' | '<' | '>' | '@' | '#'
@ -247,8 +262,8 @@ impl Lexer<'_> {
// anyway.
let mut s = self.s;
match s.eat() {
Some('/') if !s.at(['/', '*']) => {}
Some(' ') if s.at(char::is_alphanumeric) => {}
Some('/') if !s.at(['/', '*']) => {}
Some('-') if !s.at(['-', '?']) => {}
Some('.') if !s.at("..") => {}
Some('h') if !s.at("ttp://") && !s.at("ttps://") => {}
@ -259,77 +274,40 @@ impl Lexer<'_> {
self.s = s;
}
SyntaxKind::Text(self.s.from(start).into())
SyntaxKind::Text
}
fn backslash(&mut self) -> SyntaxKind {
match self.s.peek() {
Some('u') if self.s.eat_if("u{") => {
let sequence = self.s.eat_while(char::is_ascii_alphanumeric);
if self.s.eat_if('}') {
if let Some(c) = resolve_hex(sequence) {
SyntaxKind::Escape(c)
} else {
SyntaxKind::Error(
ErrorPos::Full,
"invalid unicode escape sequence".into(),
)
}
} else {
self.terminated = false;
SyntaxKind::Error(ErrorPos::End, "expected closing brace".into())
}
if self.s.eat_if("u{") {
let hex = self.s.eat_while(char::is_ascii_alphanumeric);
if !self.s.eat_if('}') {
self.terminated = false;
return self.error_at_end("expected closing brace");
}
// Linebreaks.
Some(c) if c.is_whitespace() => SyntaxKind::Linebreak,
None => SyntaxKind::Linebreak,
// Escapes.
Some(c) => {
self.s.expect(c);
SyntaxKind::Escape(c)
if u32::from_str_radix(hex, 16)
.ok()
.and_then(std::char::from_u32)
.is_none()
{
return self.error("invalid unicode escape sequence");
}
return SyntaxKind::Escape;
}
}
fn hash(&mut self, start: usize) -> SyntaxKind {
if self.s.eat_if('{') {
SyntaxKind::LeftBrace
} else if self.s.eat_if('[') {
SyntaxKind::LeftBracket
} else if self.s.at(is_id_start) {
let read = self.s.eat_while(is_id_continue);
match keyword(read) {
Some(keyword) => keyword,
None => SyntaxKind::Ident(read.into()),
}
} else if self.mode == LexMode::Markup {
self.text(start)
if self.s.done() || self.s.at(char::is_whitespace) {
SyntaxKind::Linebreak
} else {
SyntaxKind::Atom("#".into())
self.s.eat();
SyntaxKind::Escape
}
}
fn hyph(&mut self) -> SyntaxKind {
if self.s.eat_if('-') {
if self.s.eat_if('-') {
SyntaxKind::Shorthand('\u{2014}')
} else {
SyntaxKind::Shorthand('\u{2013}')
}
} else if self.s.eat_if('?') {
SyntaxKind::Shorthand('\u{00AD}')
} else {
SyntaxKind::Minus
}
}
fn colon(&mut self) -> SyntaxKind {
fn maybe_symbol(&mut self) -> SyntaxKind {
let start = self.s.cursor();
let mut end = start;
while !self.s.eat_while(char::is_ascii_alphanumeric).is_empty() && self.s.at(':')
{
while !self.s.eat_while(is_id_continue).is_empty() && self.s.at(':') {
end = self.s.cursor();
self.s.eat();
}
@ -338,15 +316,15 @@ impl Lexer<'_> {
if start < end {
self.s.expect(':');
SyntaxKind::Symbol(self.s.get(start..end).into())
SyntaxKind::Symbol
} else if self.mode == LexMode::Markup {
SyntaxKind::Colon
} else {
SyntaxKind::Atom(":".into())
SyntaxKind::Atom
}
}
fn link(&mut self, start: usize) -> SyntaxKind {
fn link(&mut self) -> SyntaxKind {
#[rustfmt::skip]
self.s.eat_while(|c: char| matches!(c,
| '0' ..= '9'
@ -355,10 +333,12 @@ impl Lexer<'_> {
| '~' | '/' | '%' | '?' | '#' | '&' | '+' | '='
| '\'' | '.' | ',' | ';'
));
if self.s.scout(-1) == Some('.') {
self.s.uneat();
}
SyntaxKind::Link(self.s.from(start).into())
SyntaxKind::Link
}
fn raw(&mut self) -> SyntaxKind {
@ -369,16 +349,10 @@ impl Lexer<'_> {
backticks += 1;
}
// Special case for empty inline block.
if backticks == 2 {
return SyntaxKind::Raw(Arc::new(RawFields {
text: EcoString::new(),
lang: None,
block: false,
}));
return SyntaxKind::Raw { column };
}
let start = self.s.cursor();
let mut found = 0;
while found < backticks {
match self.s.eat() {
@ -388,45 +362,40 @@ impl Lexer<'_> {
}
}
if found == backticks {
let end = self.s.cursor() - found as usize;
SyntaxKind::Raw(Arc::new(resolve_raw(
column,
backticks,
self.s.get(start..end),
)))
} else {
if found != backticks {
self.terminated = false;
let remaining = backticks - found;
let noun = if remaining == 1 { "backtick" } else { "backticks" };
SyntaxKind::Error(
ErrorPos::End,
if found == 0 {
format_eco!("expected {} {}", remaining, noun)
} else {
format_eco!("expected {} more {}", remaining, noun)
},
)
return self.error_at_end(if found == 0 {
format_eco!("expected {} {}", remaining, noun)
} else {
format_eco!("expected {} more {}", remaining, noun)
});
}
SyntaxKind::Raw { column }
}
fn numbering(&mut self, start: usize) -> SyntaxKind {
self.s.eat_while(char::is_ascii_digit);
let read = self.s.from(start);
if self.s.eat_if('.') {
if let Ok(number) = read.parse::<usize>() {
return match NonZeroUsize::new(number) {
Some(number) => SyntaxKind::EnumNumbering(number),
None => SyntaxKind::Error(ErrorPos::Full, "must be positive".into()),
};
if number == 0 {
return self.error("must be positive");
}
return SyntaxKind::EnumNumbering;
}
}
self.text(start)
self.text()
}
fn reference(&mut self) -> SyntaxKind {
SyntaxKind::Ref(self.s.eat_while(is_id_continue).into())
self.s.eat_while(is_id_continue);
SyntaxKind::Ref
}
fn in_word(&self) -> bool {
@ -439,95 +408,83 @@ impl Lexer<'_> {
/// Math.
impl Lexer<'_> {
fn math(&mut self, start: usize, c: char) -> SyntaxKind {
fn math(&mut self, c: char) -> SyntaxKind {
match c {
// Symbol shorthands.
'|' if self.s.eat_if("->") => SyntaxKind::Shorthand('\u{21A6}'),
'<' if self.s.eat_if("->") => SyntaxKind::Shorthand('\u{2194}'),
'<' if self.s.eat_if("=>") => SyntaxKind::Shorthand('\u{21D4}'),
'!' if self.s.eat_if('=') => SyntaxKind::Shorthand('\u{2260}'),
'<' if self.s.eat_if('=') => SyntaxKind::Shorthand('\u{2264}'),
'>' if self.s.eat_if('=') => SyntaxKind::Shorthand('\u{2265}'),
'<' if self.s.eat_if('-') => SyntaxKind::Shorthand('\u{2190}'),
'-' if self.s.eat_if('>') => SyntaxKind::Shorthand('\u{2192}'),
'=' if self.s.eat_if('>') => SyntaxKind::Shorthand('\u{21D2}'),
':' if self.s.eat_if('=') => SyntaxKind::Shorthand('\u{2254}'),
// Multi-char things.
'#' => self.hash(start),
// Escape sequences.
'\\' => self.backslash(),
// Single-char things.
'_' => SyntaxKind::Underscore,
'^' => SyntaxKind::Hat,
'/' => SyntaxKind::Slash,
'&' => SyntaxKind::Amp,
'$' => SyntaxKind::Dollar,
// Symbol notation.
':' => self.colon(),
// Strings.
':' if self.s.at(is_id_start) => self.maybe_symbol(),
'"' => self.string(),
'#' if self.s.eat_if('{') => SyntaxKind::LeftBrace,
'#' if self.s.eat_if('[') => SyntaxKind::LeftBracket,
'#' if self.s.at(is_id_start) => {
match keyword(self.s.eat_while(is_id_continue)) {
Some(keyword) => keyword,
None => SyntaxKind::Ident,
}
}
'|' if self.s.eat_if("->") => SyntaxKind::Shorthand,
'<' if self.s.eat_if("->") => SyntaxKind::Shorthand,
'<' if self.s.eat_if("=>") => SyntaxKind::Shorthand,
'!' if self.s.eat_if('=') => SyntaxKind::Shorthand,
'<' if self.s.eat_if('=') => SyntaxKind::Shorthand,
'>' if self.s.eat_if('=') => SyntaxKind::Shorthand,
'<' if self.s.eat_if('-') => SyntaxKind::Shorthand,
'-' if self.s.eat_if('>') => SyntaxKind::Shorthand,
'=' if self.s.eat_if('>') => SyntaxKind::Shorthand,
':' if self.s.eat_if('=') => SyntaxKind::Shorthand,
'_' => SyntaxKind::Underscore,
'$' => SyntaxKind::Dollar,
'/' => SyntaxKind::Slash,
'^' => SyntaxKind::Hat,
'&' => SyntaxKind::AlignPoint,
// Identifiers and symbol notation.
c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
self.s.eat_while(is_math_id_continue);
let mut symbol = false;
while self.s.eat_if(':')
&& !self.s.eat_while(char::is_alphanumeric).is_empty()
{
symbol = true;
}
if symbol {
SyntaxKind::Symbol(self.s.from(start).into())
} else {
if self.s.scout(-1) == Some(':') {
self.s.uneat();
}
SyntaxKind::Ident(self.s.from(start).into())
}
}
// Numbers.
c if c.is_numeric() => {
self.s.eat_while(char::is_numeric);
SyntaxKind::Atom(self.s.from(start).into())
self.math_ident()
}
// Other math atoms.
c => SyntaxKind::Atom(c.into()),
_ => {
// Keep numbers together.
if c.is_numeric() {
self.s.eat_while(char::is_numeric);
}
SyntaxKind::Atom
}
}
}
fn math_ident(&mut self) -> SyntaxKind {
self.s.eat_while(is_math_id_continue);
let mut symbol = false;
while self.s.eat_if(':') && !self.s.eat_while(char::is_alphanumeric).is_empty() {
symbol = true;
}
if symbol {
return SyntaxKind::Symbol;
}
if self.s.scout(-1) == Some(':') {
self.s.uneat();
}
SyntaxKind::Ident
}
}
/// Code.
impl Lexer<'_> {
fn code(&mut self, start: usize, c: char) -> SyntaxKind {
match c {
// Blocks.
'{' => SyntaxKind::LeftBrace,
'}' => SyntaxKind::RightBrace,
'[' => SyntaxKind::LeftBracket,
']' => SyntaxKind::RightBracket,
// Parentheses.
'(' => SyntaxKind::LeftParen,
')' => SyntaxKind::RightParen,
// Math.
'$' => SyntaxKind::Dollar,
// Labels and raw.
'<' if self.s.at(is_id_continue) => self.label(),
'`' => self.raw(),
'<' if self.s.at(is_id_continue) => self.label(),
'0'..='9' => self.number(start, c),
'.' if self.s.at(char::is_ascii_digit) => self.number(start, c),
'"' => self.string(),
// Two-char operators.
'=' if self.s.eat_if('=') => SyntaxKind::EqEq,
'!' if self.s.eat_if('=') => SyntaxKind::ExclEq,
'<' if self.s.eat_if('=') => SyntaxKind::LtEq,
@ -539,10 +496,17 @@ impl Lexer<'_> {
'.' if self.s.eat_if('.') => SyntaxKind::Dots,
'=' if self.s.eat_if('>') => SyntaxKind::Arrow,
// Single-char operators.
'{' => SyntaxKind::LeftBrace,
'}' => SyntaxKind::RightBrace,
'[' => SyntaxKind::LeftBracket,
']' => SyntaxKind::RightBracket,
'(' => SyntaxKind::LeftParen,
')' => SyntaxKind::RightParen,
'$' => SyntaxKind::Dollar,
',' => SyntaxKind::Comma,
';' => SyntaxKind::Semicolon,
':' => SyntaxKind::Colon,
'.' => SyntaxKind::Dot,
'+' => SyntaxKind::Plus,
'-' => SyntaxKind::Minus,
'*' => SyntaxKind::Star,
@ -550,21 +514,10 @@ impl Lexer<'_> {
'=' => SyntaxKind::Eq,
'<' => SyntaxKind::Lt,
'>' => SyntaxKind::Gt,
'.' if !self.s.at(char::is_ascii_digit) => SyntaxKind::Dot,
// Identifiers.
c if is_id_start(c) => self.ident(start),
// Numbers.
c if c.is_ascii_digit() || (c == '.' && self.s.at(char::is_ascii_digit)) => {
self.number(start, c)
}
// Strings.
'"' => self.string(),
// Invalid token.
_ => SyntaxKind::Error(ErrorPos::Full, "not valid here".into()),
_ => self.error("not valid here"),
}
}
@ -573,9 +526,9 @@ impl Lexer<'_> {
match self.s.from(start) {
"none" => SyntaxKind::None,
"auto" => SyntaxKind::Auto,
"true" => SyntaxKind::Bool(true),
"false" => SyntaxKind::Bool(false),
id => keyword(id).unwrap_or_else(|| SyntaxKind::Ident(id.into())),
"true" => SyntaxKind::Bool,
"false" => SyntaxKind::Bool,
id => keyword(id).unwrap_or(SyntaxKind::Ident),
}
}
@ -604,64 +557,54 @@ impl Lexer<'_> {
let number = self.s.get(start..suffix_start);
let suffix = self.s.from(suffix_start);
// Find out whether it is a simple number.
if suffix.is_empty() {
if let Ok(i) = number.parse::<i64>() {
return SyntaxKind::Int(i);
}
return if number.parse::<i64>().is_ok() {
SyntaxKind::Int
} else if number.parse::<f64>().is_ok() {
SyntaxKind::Float
} else {
self.error("invalid number")
};
}
let Ok(v) = number.parse::<f64>() else {
return SyntaxKind::Error(ErrorPos::Full, "invalid number".into());
};
match suffix {
"" => SyntaxKind::Float(v),
"pt" => SyntaxKind::Numeric(v, Unit::Length(AbsUnit::Pt)),
"mm" => SyntaxKind::Numeric(v, Unit::Length(AbsUnit::Mm)),
"cm" => SyntaxKind::Numeric(v, Unit::Length(AbsUnit::Cm)),
"in" => SyntaxKind::Numeric(v, Unit::Length(AbsUnit::In)),
"deg" => SyntaxKind::Numeric(v, Unit::Angle(AngleUnit::Deg)),
"rad" => SyntaxKind::Numeric(v, Unit::Angle(AngleUnit::Rad)),
"em" => SyntaxKind::Numeric(v, Unit::Em),
"fr" => SyntaxKind::Numeric(v, Unit::Fr),
"%" => SyntaxKind::Numeric(v, Unit::Percent),
_ => SyntaxKind::Error(ErrorPos::Full, "invalid number suffix".into()),
if !matches!(
suffix,
"pt" | "mm" | "cm" | "in" | "deg" | "rad" | "em" | "fr" | "%"
) {
return self.error("invalid number suffix");
}
SyntaxKind::Numeric
}
fn string(&mut self) -> SyntaxKind {
let mut escaped = false;
let verbatim = self.s.eat_until(|c| {
if c == '"' && !escaped {
true
} else {
escaped = c == '\\' && !escaped;
false
}
self.s.eat_until(|c| {
let stop = c == '"' && !escaped;
escaped = c == '\\' && !escaped;
stop
});
let string = resolve_string(verbatim);
if self.s.eat_if('"') {
SyntaxKind::Str(string)
} else {
if !self.s.eat_if('"') {
self.terminated = false;
SyntaxKind::Error(ErrorPos::End, "expected quote".into())
return self.error_at_end("expected quote");
}
SyntaxKind::Str
}
fn label(&mut self) -> SyntaxKind {
let label = self.s.eat_while(is_id_continue);
if self.s.eat_if('>') {
if !label.is_empty() {
SyntaxKind::Label(label.into())
} else {
SyntaxKind::Error(ErrorPos::Full, "label cannot be empty".into())
}
} else {
self.terminated = false;
SyntaxKind::Error(ErrorPos::End, "expected closing angle bracket".into())
if label.is_empty() {
return self.error("label cannot be empty");
}
if !self.s.eat_if('>') {
self.terminated = false;
return self.error_at_end("expected closing angle bracket");
}
SyntaxKind::Label
}
}
@ -729,6 +672,29 @@ pub fn is_newline(character: char) -> bool {
)
}
/// Split text at newlines.
pub(super) fn split_newlines(text: &str) -> Vec<&str> {
let mut s = Scanner::new(text);
let mut lines = Vec::new();
let mut start = 0;
let mut end = 0;
while let Some(c) = s.eat() {
if is_newline(c) {
if c == '\r' {
s.eat_if('\n');
}
lines.push(&text[start..end]);
start = s.cursor();
}
end = s.cursor();
}
lines.push(&text[start..]);
lines
}
/// Whether a string is a valid unicode identifier.
///
/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
@ -746,13 +712,13 @@ pub fn is_ident(string: &str) -> bool {
/// Whether a character can start an identifier.
#[inline]
fn is_id_start(c: char) -> bool {
pub(super) fn is_id_start(c: char) -> bool {
c.is_xid_start() || c == '_'
}
/// Whether a character can continue an identifier.
#[inline]
fn is_id_continue(c: char) -> bool {
pub(super) fn is_id_continue(c: char) -> bool {
c.is_xid_continue() || c == '_' || c == '-'
}

View File

View File

@ -2,22 +2,17 @@
pub mod ast;
mod incremental;
mod kind;
mod lexer;
mod node;
mod parser;
mod parsing;
mod resolve;
mod reparse;
mod source;
mod span;
pub use self::kind::*;
pub use self::lexer::*;
pub use self::node::*;
pub use self::parsing::*;
pub use self::parser::*;
pub use self::source::*;
pub use self::span::*;
use incremental::reparse;
use parser::*;

View File

@ -6,6 +6,7 @@ use std::sync::Arc;
use super::ast::AstNode;
use super::{SourceId, Span, SyntaxKind};
use crate::diag::SourceError;
use crate::util::EcoString;
/// A node in the untyped syntax tree.
#[derive(Clone, PartialEq, Hash)]
@ -15,84 +16,106 @@ pub struct SyntaxNode(Repr);
#[derive(Clone, PartialEq, Hash)]
enum Repr {
/// A leaf node.
Leaf(NodeData),
Leaf(LeafNode),
/// A reference-counted inner node.
Inner(Arc<InnerNode>),
/// An error.
Error(ErrorNode),
}
impl SyntaxNode {
/// Create a new leaf node.
pub fn leaf(kind: SyntaxKind, len: usize) -> Self {
Self(Repr::Leaf(NodeData::new(kind, len)))
pub fn leaf(kind: SyntaxKind, text: impl Into<EcoString>) -> Self {
Self(Repr::Leaf(LeafNode::new(kind, text)))
}
/// Create a new inner node with children.
pub fn inner(kind: SyntaxKind, children: Vec<SyntaxNode>) -> Self {
Self(Repr::Inner(Arc::new(InnerNode::with_children(kind, children))))
Self(Repr::Inner(Arc::new(InnerNode::new(kind, children))))
}
/// Create a new error node.
pub fn error(message: impl Into<EcoString>, pos: ErrorPos, len: usize) -> Self {
Self(Repr::Error(ErrorNode::new(message, pos, len)))
}
/// The type of the node.
pub fn kind(&self) -> &SyntaxKind {
&self.data().kind
}
/// Take the kind out of the node.
pub fn take(self) -> SyntaxKind {
match self.0 {
pub fn kind(&self) -> SyntaxKind {
match &self.0 {
Repr::Leaf(leaf) => leaf.kind,
Repr::Inner(inner) => inner.data.kind.clone(),
Repr::Inner(inner) => inner.kind,
Repr::Error(_) => SyntaxKind::Error,
}
}
/// The length of the node.
/// The byte length of the node in the source text.
pub fn len(&self) -> usize {
self.data().len
match &self.0 {
Repr::Leaf(leaf) => leaf.len(),
Repr::Inner(inner) => inner.len,
Repr::Error(error) => error.len,
}
}
/// The span of the node.
pub fn span(&self) -> Span {
self.data().span
match &self.0 {
Repr::Leaf(leaf) => leaf.span,
Repr::Inner(inner) => inner.span,
Repr::Error(error) => error.span,
}
}
/// The number of descendants, including the node itself.
pub fn descendants(&self) -> usize {
/// The text of the node if it is a leaf node.
///
/// Returns an empty string if this is an inner or error node.
pub fn text(&self) -> &EcoString {
static EMPTY: EcoString = EcoString::new();
match &self.0 {
Repr::Inner(inner) => inner.descendants,
Repr::Leaf(_) => 1,
Repr::Leaf(leaf) => &leaf.text,
Repr::Inner(_) | Repr::Error(_) => &EMPTY,
}
}
/// Extract the text from the node.
///
/// Returns an empty string if this is an inner or error node.
pub fn into_text(self) -> EcoString {
match self.0 {
Repr::Leaf(leaf) => leaf.text,
Repr::Inner(_) | Repr::Error(_) => EcoString::new(),
}
}
/// The node's children.
pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> {
match &self.0 {
Repr::Leaf(_) | Repr::Error(_) => [].iter(),
Repr::Inner(inner) => inner.children.iter(),
Repr::Leaf(_) => [].iter(),
}
}
/// Convert the node to a typed AST node.
pub fn cast<T>(&self) -> Option<T>
where
T: AstNode,
{
/// Try to convert the node to a typed AST node.
pub fn cast<T: AstNode>(&self) -> Option<T> {
T::from_untyped(self)
}
/// Get the first child that can cast to the AST type `T`.
pub fn cast_first_child<T: AstNode>(&self) -> Option<T> {
/// Cast the first child that can cast to the AST type `T`.
pub fn cast_first_match<T: AstNode>(&self) -> Option<T> {
self.children().find_map(Self::cast)
}
/// Get the last child that can cast to the AST type `T`.
pub fn cast_last_child<T: AstNode>(&self) -> Option<T> {
/// Cast the last child that can cast to the AST type `T`.
pub fn cast_last_match<T: AstNode>(&self) -> Option<T> {
self.children().rev().find_map(Self::cast)
}
/// Whether the node or its children contain an error.
pub fn erroneous(&self) -> bool {
match &self.0 {
Repr::Leaf(_) => false,
Repr::Inner(node) => node.erroneous,
Repr::Leaf(data) => data.kind.is_error(),
Repr::Error(_) => true,
}
}
@ -102,35 +125,41 @@ impl SyntaxNode {
return vec![];
}
match self.kind() {
SyntaxKind::Error(pos, message) => {
vec![SourceError::new(self.span(), message.clone()).with_pos(*pos)]
}
_ => self
.children()
if let Repr::Error(error) = &self.0 {
vec![SourceError::new(error.span, error.message.clone()).with_pos(error.pos)]
} else {
self.children()
.filter(|node| node.erroneous())
.flat_map(|node| node.errors())
.collect(),
.collect()
}
}
/// Change the type of the node.
pub(super) fn convert(&mut self, kind: SyntaxKind) {
pub(super) fn convert_to(&mut self, kind: SyntaxKind) {
debug_assert!(!kind.is_error());
match &mut self.0 {
Repr::Leaf(leaf) => leaf.kind = kind,
Repr::Inner(inner) => {
let node = Arc::make_mut(inner);
node.erroneous |= kind.is_error();
node.data.kind = kind;
node.kind = kind;
}
Repr::Leaf(leaf) => leaf.kind = kind,
Repr::Error(_) => {}
}
}
/// Convert the child to an error.
pub(super) fn convert_to_error(&mut self, message: impl Into<EcoString>) {
let len = self.len();
*self = SyntaxNode::error(message, ErrorPos::Full, len);
}
/// Set a synthetic span for the node and all its descendants.
pub(super) fn synthesize(&mut self, span: Span) {
match &mut self.0 {
Repr::Leaf(leaf) => leaf.span = span,
Repr::Inner(inner) => Arc::make_mut(inner).synthesize(span),
Repr::Leaf(leaf) => leaf.synthesize(span),
Repr::Error(error) => error.span = span,
}
}
@ -140,17 +169,25 @@ impl SyntaxNode {
id: SourceId,
within: Range<u64>,
) -> NumberingResult {
match &mut self.0 {
Repr::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within),
Repr::Leaf(leaf) => leaf.numberize(id, within),
if within.start >= within.end {
return Err(Unnumberable);
}
let mid = Span::new(id, (within.start + within.end) / 2);
match &mut self.0 {
Repr::Leaf(leaf) => leaf.span = mid,
Repr::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within)?,
Repr::Error(error) => error.span = mid,
}
Ok(())
}
/// If the span points into this node, convert it to a byte range.
pub(super) fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> {
match &self.0 {
Repr::Inner(inner) => inner.range(span, offset),
Repr::Leaf(leaf) => leaf.range(span, offset),
_ => (self.span() == span).then(|| offset..offset + self.len()),
}
}
@ -159,10 +196,18 @@ impl SyntaxNode {
matches!(self.0, Repr::Leaf(_))
}
/// The number of descendants, including the node itself.
pub(super) fn descendants(&self) -> usize {
match &self.0 {
Repr::Leaf(_) | Repr::Error(_) => 1,
Repr::Inner(inner) => inner.descendants,
}
}
/// The node's children, mutably.
pub(super) fn children_mut(&mut self) -> &mut [SyntaxNode] {
match &mut self.0 {
Repr::Leaf(_) => &mut [],
Repr::Leaf(_) | Repr::Error(_) => &mut [],
Repr::Inner(inner) => &mut Arc::make_mut(inner).children,
}
}
@ -199,19 +244,12 @@ impl SyntaxNode {
}
}
/// The metadata of the node.
fn data(&self) -> &NodeData {
match &self.0 {
Repr::Inner(inner) => &inner.data,
Repr::Leaf(leaf) => leaf,
}
}
/// The upper bound of assigned numbers in this subtree.
fn upper(&self) -> u64 {
match &self.0 {
Repr::Inner(inner) => inner.upper,
Repr::Leaf(leaf) => leaf.span.number() + 1,
Repr::Error(error) => error.span.number() + 1,
}
}
}
@ -221,21 +259,64 @@ impl Debug for SyntaxNode {
match &self.0 {
Repr::Inner(node) => node.fmt(f),
Repr::Leaf(node) => node.fmt(f),
Repr::Error(node) => node.fmt(f),
}
}
}
impl Default for SyntaxNode {
fn default() -> Self {
Self::leaf(SyntaxKind::None, 0)
Self::error("", ErrorPos::Full, 0)
}
}
/// A leaf node in the untyped syntax tree.
#[derive(Clone, Hash)]
struct LeafNode {
/// What kind of node this is (each kind would have its own struct in a
/// strongly typed AST).
kind: SyntaxKind,
/// The source text of the node.
text: EcoString,
/// The node's span.
span: Span,
}
impl LeafNode {
/// Create a new leaf node.
fn new(kind: SyntaxKind, text: impl Into<EcoString>) -> Self {
debug_assert!(!kind.is_error());
Self { kind, text: text.into(), span: Span::detached() }
}
/// The byte length of the node in the source text.
fn len(&self) -> usize {
self.text.len()
}
}
impl Debug for LeafNode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{:?}: {}", self.kind, self.len())
}
}
impl PartialEq for LeafNode {
fn eq(&self, other: &Self) -> bool {
self.kind == other.kind && self.text == other.text
}
}
/// An inner node in the untyped syntax tree.
#[derive(Clone, Hash)]
struct InnerNode {
/// Node metadata.
data: NodeData,
/// What kind of node this is (each kind would have its own struct in a
/// strongly typed AST).
kind: SyntaxKind,
/// The byte length of the node in the source.
len: usize,
/// The node's span.
span: Span,
/// The number of nodes in the whole subtree, including this node.
descendants: usize,
/// Whether this node or any of its children are erroneous.
@ -248,10 +329,12 @@ struct InnerNode {
impl InnerNode {
/// Create a new inner node with the given kind and children.
fn with_children(kind: SyntaxKind, children: Vec<SyntaxNode>) -> Self {
fn new(kind: SyntaxKind, children: Vec<SyntaxNode>) -> Self {
debug_assert!(!kind.is_error());
let mut len = 0;
let mut descendants = 1;
let mut erroneous = kind.is_error();
let mut erroneous = false;
for child in &children {
len += child.len();
@ -260,7 +343,9 @@ impl InnerNode {
}
Self {
data: NodeData::new(kind, len),
kind,
len,
span: Span::detached(),
descendants,
erroneous,
upper: 0,
@ -270,7 +355,7 @@ impl InnerNode {
/// Set a synthetic span for the node and all its descendants.
fn synthesize(&mut self, span: Span) {
self.data.synthesize(span);
self.span = span;
for child in &mut self.children {
child.synthesize(span);
}
@ -310,7 +395,7 @@ impl InnerNode {
let mut start = within.start;
if range.is_none() {
let end = start + stride;
self.data.numberize(id, start..end)?;
self.span = Span::new(id, (start + end) / 2);
self.upper = within.end;
start = end;
}
@ -329,14 +414,14 @@ impl InnerNode {
/// If the span points into this node, convert it to a byte range.
fn range(&self, span: Span, mut offset: usize) -> Option<Range<usize>> {
// Check whether we found it.
if let Some(range) = self.data.range(span, offset) {
return Some(range);
if span == self.span {
return Some(offset..offset + self.len);
}
// The parent of a subtree has a smaller span number than all of its
// descendants. Therefore, we can bail out early if the target span's
// number is smaller than our number.
if span.number() < self.data.span.number() {
if span.number() < self.span.number() {
return None;
}
@ -371,8 +456,7 @@ impl InnerNode {
let superseded = &self.children[range.clone()];
// Compute the new byte length.
self.data.len = self.data.len
+ replacement.iter().map(SyntaxNode::len).sum::<usize>()
self.len = self.len + replacement.iter().map(SyntaxNode::len).sum::<usize>()
- superseded.iter().map(SyntaxNode::len).sum::<usize>();
// Compute the new number of descendants.
@ -412,7 +496,7 @@ impl InnerNode {
.start
.checked_sub(1)
.and_then(|i| self.children.get(i))
.map_or(self.data.span.number() + 1, |child| child.upper());
.map_or(self.span.number() + 1, |child| child.upper());
// The upper bound for renumbering is either
// - the span number of the first child after the to-be-renumbered
@ -426,7 +510,7 @@ impl InnerNode {
// Try to renumber.
let within = start_number..end_number;
let id = self.data.span.source();
let id = self.span.source();
if self.numberize(id, Some(renumber), within).is_ok() {
return Ok(());
}
@ -450,7 +534,7 @@ impl InnerNode {
prev_descendants: usize,
new_descendants: usize,
) {
self.data.len = self.data.len + new_len - prev_len;
self.len = self.len + new_len - prev_len;
self.descendants = self.descendants + new_descendants - prev_descendants;
self.erroneous = self.children.iter().any(SyntaxNode::erroneous);
}
@ -458,7 +542,7 @@ impl InnerNode {
impl Debug for InnerNode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
self.data.fmt(f)?;
write!(f, "{:?}: {}", self.kind, self.len)?;
if !self.children.is_empty() {
f.write_str(" ")?;
f.debug_list().entries(&self.children).finish()?;
@ -469,64 +553,62 @@ impl Debug for InnerNode {
impl PartialEq for InnerNode {
fn eq(&self, other: &Self) -> bool {
self.data == other.data
self.kind == other.kind
&& self.len == other.len
&& self.descendants == other.descendants
&& self.erroneous == other.erroneous
&& self.children == other.children
}
}
/// Data shared between leaf and inner nodes.
/// An error node in the untyped syntax tree.
#[derive(Clone, Hash)]
struct NodeData {
/// What kind of node this is (each kind would have its own struct in a
/// strongly typed AST).
kind: SyntaxKind,
/// The byte length of the node in the source.
struct ErrorNode {
/// The error message.
message: EcoString,
/// Where in the node an error should be annotated.
pos: ErrorPos,
/// The byte length of the error in the source.
len: usize,
/// The node's span.
span: Span,
}
impl NodeData {
/// Create new node metadata.
fn new(kind: SyntaxKind, len: usize) -> Self {
Self { len, kind, span: Span::detached() }
}
/// Set a synthetic span for the node.
fn synthesize(&mut self, span: Span) {
self.span = span;
}
/// Assign a span to the node.
fn numberize(&mut self, id: SourceId, within: Range<u64>) -> NumberingResult {
if within.start < within.end {
self.span = Span::new(id, (within.start + within.end) / 2);
Ok(())
} else {
Err(Unnumberable)
impl ErrorNode {
/// Create new error node.
fn new(message: impl Into<EcoString>, pos: ErrorPos, len: usize) -> Self {
Self {
message: message.into(),
pos,
len,
span: Span::detached(),
}
}
/// If the span points into this node, convert it to a byte range.
fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> {
(self.span == span).then(|| offset..offset + self.len)
}
}
impl Debug for NodeData {
impl Debug for ErrorNode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{:?}: {}", self.kind, self.len)
write!(f, "({}): {}", self.message, self.len)
}
}
impl PartialEq for NodeData {
impl PartialEq for ErrorNode {
fn eq(&self, other: &Self) -> bool {
self.kind == other.kind && self.len == other.len
self.message == other.message && self.pos == other.pos && self.len == other.len
}
}
/// Where in a node an error should be annotated,
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum ErrorPos {
/// Over the full width of the node.
Full,
/// At the start of the node.
Start,
/// At the end of the node.
End,
}
/// A syntax node in a context.
///
/// Knows its exact offset in the file and provides access to its
@ -542,7 +624,7 @@ pub struct LinkedNode<'a> {
}
impl<'a> LinkedNode<'a> {
/// Start a new traversal at the source's root node.
/// Start a new traversal at a root node.
pub fn new(root: &'a SyntaxNode) -> Self {
Self { node: root, parent: None, index: 0, offset: 0 }
}
@ -557,17 +639,17 @@ impl<'a> LinkedNode<'a> {
self.index
}
/// The absolute byte offset of the this node in the source file.
/// The absolute byte offset of this node in the source file.
pub fn offset(&self) -> usize {
self.offset
}
/// The byte range of the this node in the source file.
/// The byte range of this node in the source file.
pub fn range(&self) -> Range<usize> {
self.offset..self.offset + self.node.len()
}
/// Get this node's children.
/// An iterator over this node's children.
pub fn children(&self) -> LinkedChildren<'a> {
LinkedChildren {
parent: Rc::new(self.clone()),
@ -586,7 +668,7 @@ impl<'a> LinkedNode<'a> {
}
/// Get the kind of this node's parent.
pub fn parent_kind(&self) -> Option<&'a SyntaxKind> {
pub fn parent_kind(&self) -> Option<SyntaxKind> {
Some(self.parent()?.node.kind())
}
@ -648,7 +730,7 @@ impl<'a> LinkedNode<'a> {
None
}
/// Get the leaf at the specified cursor position.
/// Get the leaf at the specified byte offset.
pub fn leaf_at(&self, cursor: usize) -> Option<Self> {
if self.node.children().len() == 0 && cursor <= self.offset + self.len() {
return Some(self.clone());
@ -784,13 +866,13 @@ mod tests {
let node = LinkedNode::new(source.root()).leaf_at(7).unwrap();
assert_eq!(node.offset(), 5);
assert_eq!(node.len(), 4);
assert_eq!(node.kind(), &SyntaxKind::Ident("text".into()));
assert_eq!(node.kind(), SyntaxKind::Ident);
// Go back to "#set". Skips the space.
let prev = node.prev_sibling().unwrap();
assert_eq!(prev.offset(), 0);
assert_eq!(prev.len(), 4);
assert_eq!(prev.kind(), &SyntaxKind::Set);
assert_eq!(prev.kind(), SyntaxKind::Set);
}
#[test]
@ -798,15 +880,15 @@ mod tests {
let source = Source::detached("#set fun(12pt, red)");
let leaf = LinkedNode::new(source.root()).leaf_at(6).unwrap();
let prev = leaf.prev_leaf().unwrap();
assert_eq!(leaf.kind(), &SyntaxKind::Ident("fun".into()));
assert_eq!(prev.kind(), &SyntaxKind::Set);
assert_eq!(leaf.kind(), SyntaxKind::Ident);
assert_eq!(prev.kind(), SyntaxKind::Set);
let source = Source::detached("#let x = 10");
let leaf = LinkedNode::new(source.root()).leaf_at(9).unwrap();
let prev = leaf.prev_leaf().unwrap();
let next = leaf.next_leaf().unwrap();
assert_eq!(prev.kind(), &SyntaxKind::Eq);
assert_eq!(leaf.kind(), &SyntaxKind::Space { newlines: 0 });
assert_eq!(next.kind(), &SyntaxKind::Int(10));
assert_eq!(prev.kind(), SyntaxKind::Eq);
assert_eq!(leaf.kind(), SyntaxKind::Space { newlines: 0 });
assert_eq!(next.kind(), SyntaxKind::Int);
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -87,8 +87,8 @@ fn try_reparse(
// reject text that points to the special case for URL
// evasion and line comments.
if !child.kind().is_space()
&& child.kind() != &SyntaxKind::Semicolon
&& child.kind() != &SyntaxKind::Text('/'.into())
&& child.kind() != SyntaxKind::Semicolon
&& (child.kind() != SyntaxKind::Text || child.text() != "/")
&& (ahead.is_none() || change.replaced.start > child_span.end)
&& !ahead.map_or(false, Ahead::is_compulsory)
{
@ -177,7 +177,7 @@ fn try_reparse(
// Make sure this is a markup node and that we may replace. If so, save
// the current indent.
let min_indent = match node.kind() {
SyntaxKind::Markup { min_indent } if safe_to_replace => *min_indent,
SyntaxKind::Markup { min_indent } if safe_to_replace => min_indent,
_ => return None,
};
@ -375,23 +375,23 @@ enum ReparseMode {
/// Whether changes _inside_ this node are safely encapsulated, so that only
/// this node must be reparsed.
fn is_bounded(kind: &SyntaxKind) -> bool {
fn is_bounded(kind: SyntaxKind) -> bool {
matches!(
kind,
SyntaxKind::CodeBlock
| SyntaxKind::ContentBlock
| SyntaxKind::Linebreak
| SyntaxKind::SmartQuote { .. }
| SyntaxKind::SmartQuote
| SyntaxKind::BlockComment
| SyntaxKind::Space { .. }
| SyntaxKind::Escape(_)
| SyntaxKind::Shorthand(_)
| SyntaxKind::Escape
| SyntaxKind::Shorthand
)
}
/// Whether `at_start` would still be true after this node given the
/// previous value of the property.
fn next_at_start(kind: &SyntaxKind, prev: bool) -> bool {
fn next_at_start(kind: SyntaxKind, prev: bool) -> bool {
match kind {
SyntaxKind::Space { newlines: (1..) } => true,
SyntaxKind::Space { .. } | SyntaxKind::LineComment | SyntaxKind::BlockComment => {

View File

@ -1,233 +0,0 @@
use unscanny::Scanner;
use super::{is_ident, is_newline, RawFields};
use crate::util::EcoString;
/// Resolve all escape sequences in a string.
pub fn resolve_string(string: &str) -> EcoString {
let mut out = EcoString::with_capacity(string.len());
let mut s = Scanner::new(string);
while let Some(c) = s.eat() {
if c != '\\' {
out.push(c);
continue;
}
let start = s.locate(-1);
match s.eat() {
Some('\\') => out.push('\\'),
Some('"') => out.push('"'),
Some('n') => out.push('\n'),
Some('r') => out.push('\r'),
Some('t') => out.push('\t'),
Some('u') if s.eat_if('{') => {
// TODO: Error if closing brace is missing.
let sequence = s.eat_while(char::is_ascii_hexdigit);
let _terminated = s.eat_if('}');
match resolve_hex(sequence) {
Some(c) => out.push(c),
None => out.push_str(s.from(start)),
}
}
_ => out.push_str(s.from(start)),
}
}
out
}
/// Resolve a hexadecimal escape sequence into a character
/// (only the inner hex letters without braces or `\u`).
pub fn resolve_hex(sequence: &str) -> Option<char> {
u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
}
/// Resolve the language tag and trim the raw text.
pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawFields {
if backticks > 1 {
let (tag, inner) = split_at_lang_tag(text);
let (text, block) = trim_and_split_raw(column, inner);
RawFields {
lang: is_ident(tag).then(|| tag.into()),
text: text.into(),
block,
}
} else {
RawFields {
lang: None,
text: split_lines(text).join("\n").into(),
block: false,
}
}
}
/// Parse the lang tag and return it alongside the remaining inner raw text.
fn split_at_lang_tag(raw: &str) -> (&str, &str) {
let mut s = Scanner::new(raw);
(s.eat_until(|c: char| c == '`' || c.is_whitespace() || is_newline(c)), s.after())
}
/// Trim raw text and splits it into lines.
///
/// Also returns whether at least one newline was contained in `raw`.
fn trim_and_split_raw(column: usize, mut raw: &str) -> (String, bool) {
// Trims one space at the start.
raw = raw.strip_prefix(' ').unwrap_or(raw);
// Trim one space at the end if the last non-whitespace char is a backtick.
if raw.trim_end().ends_with('`') {
raw = raw.strip_suffix(' ').unwrap_or(raw);
}
let mut lines = split_lines(raw);
// Dedent based on column, but not for the first line.
for line in lines.iter_mut().skip(1) {
let offset = line
.chars()
.take(column)
.take_while(|c| c.is_whitespace())
.map(char::len_utf8)
.sum();
*line = &line[offset..];
}
let had_newline = lines.len() > 1;
let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
// Trims a sequence of whitespace followed by a newline at the start.
if lines.first().map_or(false, is_whitespace) {
lines.remove(0);
}
// Trims a newline followed by a sequence of whitespace at the end.
if lines.last().map_or(false, is_whitespace) {
lines.pop();
}
(lines.join("\n"), had_newline)
}
/// Split a string into a vector of lines
/// (respecting Unicode, Unix, Mac and Windows line breaks).
fn split_lines(text: &str) -> Vec<&str> {
let mut s = Scanner::new(text);
let mut lines = Vec::new();
let mut start = 0;
let mut end = 0;
while let Some(c) = s.eat() {
if is_newline(c) {
if c == '\r' {
s.eat_if('\n');
}
lines.push(&text[start..end]);
start = s.cursor();
}
end = s.cursor();
}
lines.push(&text[start..]);
lines
}
#[cfg(test)]
#[rustfmt::skip]
mod tests {
use super::*;
#[test]
fn test_resolve_strings() {
#[track_caller]
fn test(string: &str, expected: &str) {
assert_eq!(resolve_string(string), expected);
}
test(r#"hello world"#, "hello world");
test(r#"hello\nworld"#, "hello\nworld");
test(r#"a\"bc"#, "a\"bc");
test(r#"a\u{2603}bc"#, "a☃bc");
test(r#"a\u{26c3bg"#, "a𦰻g");
test(r#"av\u{6797"#, "av林");
test(r#"a\\"#, "a\\");
test(r#"a\\\nbc"#, "a\\\nbc");
test(r#"a\t\r\nbc"#, "a\t\r\nbc");
test(r"🌎", "🌎");
test(r"🌎\", r"🌎\");
test(r"\🌎", r"\🌎");
}
#[test]
fn test_split_at_lang_tag() {
#[track_caller]
fn test(text: &str, lang: &str, inner: &str) {
assert_eq!(split_at_lang_tag(text), (lang, inner));
}
test("typst it!", "typst", " it!");
test("typst\n it!", "typst", "\n it!");
test("typst\n it!", "typst", "\n it!");
test("abc`", "abc", "`");
test(" hi", "", " hi");
test("`", "", "`");
}
#[test]
fn test_resolve_raw() {
#[track_caller]
fn test(
column: usize,
backticks: usize,
raw: &str,
lang: Option<&str>,
text: &str,
block: bool,
) {
let node = resolve_raw(column, backticks, raw);
assert_eq!(node.lang.as_deref(), lang);
assert_eq!(node.text, text);
assert_eq!(node.block, block);
}
// Just one backtick.
test(0, 1, "py", None, "py", false);
test(0, 1, "1\n2", None, "1\n2", false);
test(0, 1, "1\r\n2", None, "1\n2", false);
// More than one backtick with lang tag.
test(0, 2, "js alert()", Some("js"), "alert()", false);
test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true);
test(0, 2, "", None, "", false);
// Trimming of whitespace (tested more thoroughly in separate test).
test(0, 2, " a", None, "a", false);
test(0, 2, " a", None, " a", false);
test(0, 2, " \na", None, "a", true);
// Dedenting
test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true);
}
#[test]
fn test_trim_raw() {
#[track_caller]
fn test(text: &str, expected: &str) {
assert_eq!(trim_and_split_raw(0, text).0, expected);
}
test(" hi", "hi");
test(" hi", " hi");
test("\nhi", "hi");
test(" \n hi", " hi");
test("hi` ", "hi`");
test("hi` ", "hi` ");
test("hi` ", "hi` ");
test("hi ", "hi ");
test("hi ", "hi ");
test("hi\n", "hi");
test("hi \n ", "hi ");
test(" \n hi \n ", " hi ");
}
}

View File

@ -8,10 +8,10 @@ use std::path::{Path, PathBuf};
use comemo::Prehashed;
use unscanny::Scanner;
use super::ast::Markup;
use super::reparse::reparse;
use super::{is_newline, parse, Span, SyntaxNode};
use crate::diag::SourceResult;
use crate::syntax::ast::Markup;
use crate::syntax::{is_newline, parse, reparse};
use crate::syntax::{Span, SyntaxNode};
use crate::util::{PathExt, StrExt};
/// A source file.
@ -124,11 +124,8 @@ impl Source {
}
// Recalculate the line starts after the edit.
self.lines.extend(lines_from(
start_byte,
start_utf16,
&self.text[start_byte..],
));
self.lines
.extend(lines_from(start_byte, start_utf16, &self.text[start_byte..]));
// Incrementally reparse the replaced range.
let mut root = std::mem::take(&mut self.root).into_inner();

View File

@ -5,7 +5,7 @@ use iai::{black_box, main, Iai};
use typst::diag::{FileError, FileResult};
use typst::font::{Font, FontBook};
use typst::model::Library;
use typst::syntax::{LexMode, Lexer, Source, SourceId};
use typst::syntax::{Source, SourceId};
use typst::util::Buffer;
use typst::World;
use unscanny::Scanner;
@ -16,7 +16,6 @@ const FONT: &[u8] = include_bytes!("../fonts/IBMPlexSans-Regular.ttf");
main!(
bench_decode,
bench_scan,
bench_lex,
bench_parse,
bench_edit,
bench_eval,
@ -49,10 +48,6 @@ fn bench_scan(iai: &mut Iai) {
})
}
fn bench_lex(iai: &mut Iai) {
iai.run(|| Lexer::new(black_box(TEXT), black_box(LexMode::Markup)).count());
}
fn bench_parse(iai: &mut Iai) {
iai.run(|| typst::syntax::parse(TEXT));
}

View File

@ -24,5 +24,5 @@ $ A sub:eq:not B $
<table>
---
// Error: 8 expected closing paren
// Error: 8 expected math atom
$ sum_( $