More math syntax
This commit is contained in:
parent
c2e458a133
commit
3ecb0c754b
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -1188,6 +1188,7 @@ dependencies = [
|
||||
"unicode-bidi",
|
||||
"unicode-math",
|
||||
"unicode-script",
|
||||
"unicode-segmentation",
|
||||
"xi-unicode",
|
||||
]
|
||||
|
||||
|
@ -28,4 +28,5 @@ typed-arena = "2"
|
||||
unicode-bidi = "0.3.5"
|
||||
unicode-math = { git = "https://github.com/s3bk/unicode-math/" }
|
||||
unicode-script = "0.5"
|
||||
unicode-segmentation = "1"
|
||||
xi-unicode = "0.3"
|
||||
|
@ -52,11 +52,7 @@ fn scope() -> Scope {
|
||||
std.def_node::<math::MathNode>("math");
|
||||
std.def_node::<math::AtomNode>("atom");
|
||||
std.def_node::<math::FracNode>("frac");
|
||||
std.define("sum", "∑");
|
||||
std.define("in", "∈");
|
||||
std.define("arrow", "→");
|
||||
std.define("NN", "ℕ");
|
||||
std.define("RR", "ℝ");
|
||||
std.def_node::<math::SqrtNode>("sqrt");
|
||||
|
||||
// Layout.
|
||||
std.def_node::<layout::PageNode>("page");
|
||||
|
@ -2,13 +2,12 @@
|
||||
|
||||
mod tex;
|
||||
|
||||
use std::fmt::Write;
|
||||
use typst::model::{Guard, SequenceNode};
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
use typst::model::Guard;
|
||||
|
||||
use self::tex::{layout_tex, Texify};
|
||||
use self::tex::layout_tex;
|
||||
use crate::prelude::*;
|
||||
use crate::text::FontFamily;
|
||||
use crate::text::{FontFamily, LinebreakNode, SpaceNode, SymbolNode, TextNode};
|
||||
|
||||
/// A piece of a mathematical formula.
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
@ -55,15 +54,182 @@ impl Layout for MathNode {
|
||||
styles: StyleChain,
|
||||
_: &Regions,
|
||||
) -> SourceResult<Fragment> {
|
||||
layout_tex(vt, &self.texify(), self.display, styles)
|
||||
let mut t = Texifier::new();
|
||||
self.texify(&mut t)?;
|
||||
layout_tex(vt, &t.finish(), self.display, styles)
|
||||
}
|
||||
}
|
||||
|
||||
impl Inline for MathNode {}
|
||||
|
||||
/// Turn a math node into TeX math code.
|
||||
#[capability]
|
||||
trait Texify {
|
||||
/// Perform the conversion.
|
||||
fn texify(&self, t: &mut Texifier) -> SourceResult<()>;
|
||||
|
||||
/// Texify the node, but trim parentheses..
|
||||
fn texify_unparen(&self, t: &mut Texifier) -> SourceResult<()> {
|
||||
let s = {
|
||||
let mut sub = Texifier::new();
|
||||
self.texify(&mut sub)?;
|
||||
sub.finish()
|
||||
};
|
||||
|
||||
let unparened = if s.starts_with("\\left(") && s.ends_with("\\right)") {
|
||||
s[6..s.len() - 7].into()
|
||||
} else {
|
||||
s
|
||||
};
|
||||
|
||||
t.push_str(&unparened);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the TeX representation of the formula.
|
||||
struct Texifier {
|
||||
tex: EcoString,
|
||||
support: bool,
|
||||
space: bool,
|
||||
}
|
||||
|
||||
impl Texifier {
|
||||
/// Create a new texifier.
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
tex: EcoString::new(),
|
||||
support: false,
|
||||
space: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Finish texifier and return the TeX string.
|
||||
fn finish(self) -> EcoString {
|
||||
self.tex
|
||||
}
|
||||
|
||||
/// Push a weak space.
|
||||
fn push_space(&mut self) {
|
||||
self.space = !self.tex.is_empty();
|
||||
}
|
||||
|
||||
/// Mark this position as supportive. This allows a space before or after
|
||||
/// to exist.
|
||||
fn support(&mut self) {
|
||||
self.support = true;
|
||||
}
|
||||
|
||||
/// Flush a space.
|
||||
fn flush(&mut self) {
|
||||
if self.space && self.support {
|
||||
self.tex.push_str("\\ ");
|
||||
}
|
||||
|
||||
self.space = false;
|
||||
self.support = false;
|
||||
}
|
||||
|
||||
/// Push a string.
|
||||
fn push_str(&mut self, s: &str) {
|
||||
self.flush();
|
||||
self.tex.push_str(s);
|
||||
}
|
||||
|
||||
/// Escape and push a char for TeX usage.
|
||||
#[rustfmt::skip]
|
||||
fn push_escaped(&mut self, c: char) {
|
||||
self.flush();
|
||||
match c {
|
||||
' ' => self.tex.push_str("\\ "),
|
||||
'%' | '&' | '$' | '#' => {
|
||||
self.tex.push('\\');
|
||||
self.tex.push(c);
|
||||
self.tex.push(' ');
|
||||
}
|
||||
'{' => self.tex.push_str("\\left\\{"),
|
||||
'}' => self.tex.push_str("\\right\\}"),
|
||||
'[' | '(' => {
|
||||
self.tex.push_str("\\left");
|
||||
self.tex.push(c);
|
||||
}
|
||||
']' | ')' => {
|
||||
self.tex.push_str("\\right");
|
||||
self.tex.push(c);
|
||||
}
|
||||
'a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9' | 'Α' ..= 'Ω' | 'α' ..= 'ω' |
|
||||
'*' | '+' | '-' | '?' | '!' | '=' | '<' | '>' |
|
||||
':' | ',' | ';' | '|' | '/' | '@' | '.' | '"' => self.tex.push(c),
|
||||
c => {
|
||||
if let Some(sym) = unicode_math::SYMBOLS
|
||||
.iter()
|
||||
.find(|sym| sym.codepoint == c) {
|
||||
self.tex.push('\\');
|
||||
self.tex.push_str(sym.name);
|
||||
self.tex.push(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Texify for MathNode {
|
||||
fn texify(&self) -> EcoString {
|
||||
self.children.iter().map(Texify::texify).collect()
|
||||
fn texify(&self, t: &mut Texifier) -> SourceResult<()> {
|
||||
for child in &self.children {
|
||||
child.texify(t)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Texify for Content {
|
||||
fn texify(&self, t: &mut Texifier) -> SourceResult<()> {
|
||||
if self.is::<SpaceNode>() {
|
||||
t.push_space();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if self.is::<LinebreakNode>() {
|
||||
t.push_str("\\");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let Some(node) = self.to::<SymbolNode>() {
|
||||
if let Some(c) = symmie::get(&node.0) {
|
||||
t.push_escaped(c);
|
||||
return Ok(());
|
||||
} else if let Some(span) = self.span() {
|
||||
bail!(span, "unknown symbol");
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(node) = self.to::<TextNode>() {
|
||||
t.support();
|
||||
t.push_str("\\mathrm{");
|
||||
for c in node.0.chars() {
|
||||
t.push_escaped(c);
|
||||
}
|
||||
t.push_str("}");
|
||||
t.support();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let Some(node) = self.to::<SequenceNode>() {
|
||||
for child in &node.0 {
|
||||
child.texify(t)?;
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let Some(node) = self.with::<dyn Texify>() {
|
||||
return node.texify(t);
|
||||
}
|
||||
|
||||
if let Some(span) = self.span() {
|
||||
bail!(span, "not allowed here");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@ -72,11 +238,35 @@ impl Texify for MathNode {
|
||||
pub struct AtomNode(pub EcoString);
|
||||
|
||||
#[node(Texify)]
|
||||
impl AtomNode {}
|
||||
impl AtomNode {
|
||||
fn construct(_: &Vm, args: &mut Args) -> SourceResult<Content> {
|
||||
Ok(Self(args.expect("text")?).pack())
|
||||
}
|
||||
}
|
||||
|
||||
impl Texify for AtomNode {
|
||||
fn texify(&self) -> EcoString {
|
||||
self.0.chars().map(escape_char).collect()
|
||||
fn texify(&self, t: &mut Texifier) -> SourceResult<()> {
|
||||
let multi = self.0.graphemes(true).count() > 1;
|
||||
if multi {
|
||||
t.push_str("\\mathrm{");
|
||||
}
|
||||
|
||||
for c in self.0.chars() {
|
||||
let supportive = c == '|';
|
||||
if supportive {
|
||||
t.support();
|
||||
}
|
||||
t.push_escaped(c);
|
||||
if supportive {
|
||||
t.support();
|
||||
}
|
||||
}
|
||||
|
||||
if multi {
|
||||
t.push_str("}");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@ -90,15 +280,22 @@ pub struct FracNode {
|
||||
}
|
||||
|
||||
#[node(Texify)]
|
||||
impl FracNode {}
|
||||
impl FracNode {
|
||||
fn construct(_: &Vm, args: &mut Args) -> SourceResult<Content> {
|
||||
let num = args.expect("numerator")?;
|
||||
let denom = args.expect("denominator")?;
|
||||
Ok(Self { num, denom }.pack())
|
||||
}
|
||||
}
|
||||
|
||||
impl Texify for FracNode {
|
||||
fn texify(&self) -> EcoString {
|
||||
format_eco!(
|
||||
"\\frac{{{}}}{{{}}}",
|
||||
unparen(self.num.texify()),
|
||||
unparen(self.denom.texify())
|
||||
)
|
||||
fn texify(&self, t: &mut Texifier) -> SourceResult<()> {
|
||||
t.push_str("\\frac{");
|
||||
self.num.texify_unparen(t)?;
|
||||
t.push_str("}{");
|
||||
self.denom.texify_unparen(t)?;
|
||||
t.push_str("}");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@ -117,18 +314,22 @@ pub struct ScriptNode {
|
||||
impl ScriptNode {}
|
||||
|
||||
impl Texify for ScriptNode {
|
||||
fn texify(&self) -> EcoString {
|
||||
let mut tex = self.base.texify();
|
||||
fn texify(&self, t: &mut Texifier) -> SourceResult<()> {
|
||||
self.base.texify(t)?;
|
||||
|
||||
if let Some(sub) = &self.sub {
|
||||
write!(tex, "_{{{}}}", unparen(sub.texify())).unwrap();
|
||||
t.push_str("_{");
|
||||
sub.texify_unparen(t)?;
|
||||
t.push_str("}");
|
||||
}
|
||||
|
||||
if let Some(sup) = &self.sup {
|
||||
write!(tex, "^{{{}}}", unparen(sup.texify())).unwrap();
|
||||
t.push_str("^{");
|
||||
sup.texify_unparen(t)?;
|
||||
t.push_str("}");
|
||||
}
|
||||
|
||||
tex
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@ -140,32 +341,27 @@ pub struct AlignNode(pub usize);
|
||||
impl AlignNode {}
|
||||
|
||||
impl Texify for AlignNode {
|
||||
fn texify(&self) -> EcoString {
|
||||
EcoString::new()
|
||||
fn texify(&self, _: &mut Texifier) -> SourceResult<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Escape a char for TeX usage.
|
||||
#[rustfmt::skip]
|
||||
fn escape_char(c: char) -> EcoString {
|
||||
match c {
|
||||
'{' | '}' | '%' | '&' | '$' | '#' => format_eco!(" \\{c} "),
|
||||
'a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9' | 'Α' ..= 'Ω' | 'α' ..= 'ω' |
|
||||
'*' | '+' | '-' | '[' | '(' | ']' | ')' | '?' | '!' | '=' | '<' | '>' |
|
||||
':' | ',' | ';' | '|' | '/' | '@' | '.' | '"' => c.into(),
|
||||
c => unicode_math::SYMBOLS
|
||||
.iter()
|
||||
.find(|sym| sym.codepoint == c)
|
||||
.map(|sym| format_eco!("\\{} ", sym.name))
|
||||
.unwrap_or_default(),
|
||||
/// A square root node.
|
||||
#[derive(Debug, Hash)]
|
||||
pub struct SqrtNode(Content);
|
||||
|
||||
#[node(Texify)]
|
||||
impl SqrtNode {
|
||||
fn construct(_: &Vm, args: &mut Args) -> SourceResult<Content> {
|
||||
Ok(Self(args.expect("body")?).pack())
|
||||
}
|
||||
}
|
||||
|
||||
/// Trim grouping parenthesis≤.
|
||||
fn unparen(s: EcoString) -> EcoString {
|
||||
if s.starts_with('(') && s.ends_with(')') {
|
||||
s[1..s.len() - 1].into()
|
||||
} else {
|
||||
s
|
||||
impl Texify for SqrtNode {
|
||||
fn texify(&self, t: &mut Texifier) -> SourceResult<()> {
|
||||
t.push_str("\\sqrt{");
|
||||
self.0.texify_unparen(t)?;
|
||||
t.push_str("}");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -6,32 +6,7 @@ use rex::render::{Backend, Cursor, Renderer};
|
||||
use typst::font::Font;
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::text::{families, variant, LinebreakNode, SpaceNode, TextNode};
|
||||
|
||||
/// Turn a math node into TeX math code.
|
||||
#[capability]
|
||||
pub trait Texify {
|
||||
/// Perform the conversion.
|
||||
fn texify(&self) -> EcoString;
|
||||
}
|
||||
|
||||
impl Texify for Content {
|
||||
fn texify(&self) -> EcoString {
|
||||
if self.is::<SpaceNode>() {
|
||||
return EcoString::new();
|
||||
}
|
||||
|
||||
if self.is::<LinebreakNode>() {
|
||||
return r"\\".into();
|
||||
}
|
||||
|
||||
if let Some(node) = self.with::<dyn Texify>() {
|
||||
return node.texify();
|
||||
}
|
||||
|
||||
panic!("{self:?} is not math");
|
||||
}
|
||||
}
|
||||
use crate::text::{families, variant, TextNode};
|
||||
|
||||
/// Layout a TeX formula into a frame.
|
||||
pub fn layout_tex(
|
||||
@ -63,13 +38,15 @@ pub fn layout_tex(
|
||||
let style = if display { Style::Display } else { Style::Text };
|
||||
let settings = LayoutSettings::new(&ctx, em.to_pt(), style);
|
||||
let renderer = Renderer::new();
|
||||
let layout = renderer
|
||||
let Ok(layout) = renderer
|
||||
.layout(&tex, settings)
|
||||
.map_err(|err| match err {
|
||||
Error::Parse(err) => err.to_string(),
|
||||
Error::Layout(LayoutError::Font(err)) => err.to_string(),
|
||||
})
|
||||
.expect("failed to layout with rex");
|
||||
else {
|
||||
panic!("failed to layout with rex: {tex}");
|
||||
};
|
||||
|
||||
// Determine the metrics.
|
||||
let (x0, y0, x1, y1) = renderer.size(&layout);
|
||||
|
@ -169,7 +169,7 @@ pub static THEME: Lazy<Theme> = Lazy::new(|| Theme {
|
||||
item("entity.name, variable.function, support", Some("#4b69c6"), None),
|
||||
item("support.macro", Some("#16718d"), None),
|
||||
item("meta.annotation", Some("#301414"), None),
|
||||
item("entity.other, meta.interpolation", Some("#8b41b1"), None),
|
||||
item("entity.other, meta.interpolation, constant.symbol.typst", Some("#8b41b1"), None),
|
||||
item("invalid", Some("#ff0000"), None),
|
||||
],
|
||||
});
|
||||
|
@ -271,7 +271,6 @@ impl Eval for ast::MarkupNode {
|
||||
Self::Emph(v) => v.eval(vm)?,
|
||||
Self::Link(v) => v.eval(vm)?,
|
||||
Self::Raw(v) => v.eval(vm)?,
|
||||
Self::Math(v) => v.eval(vm)?,
|
||||
Self::Heading(v) => v.eval(vm)?,
|
||||
Self::List(v) => v.eval(vm)?,
|
||||
Self::Enum(v) => v.eval(vm)?,
|
||||
@ -426,19 +425,29 @@ impl Eval for ast::MathNode {
|
||||
Self::Linebreak(v) => v.eval(vm)?,
|
||||
Self::Escape(v) => (vm.items.math_atom)(v.get().into()),
|
||||
Self::Atom(v) => v.eval(vm)?,
|
||||
Self::Symbol(v) => (vm.items.symbol)(v.get().clone()),
|
||||
Self::Script(v) => v.eval(vm)?,
|
||||
Self::Frac(v) => v.eval(vm)?,
|
||||
Self::Align(v) => v.eval(vm)?,
|
||||
Self::Group(v) => v.eval(vm)?,
|
||||
Self::Expr(v) => match v.eval(vm)? {
|
||||
Value::None => Content::empty(),
|
||||
Value::Int(v) => (vm.items.math_atom)(format_eco!("{}", v)),
|
||||
Value::Float(v) => (vm.items.math_atom)(format_eco!("{}", v)),
|
||||
Value::Str(v) => (vm.items.math_atom)(v.into()),
|
||||
Value::Content(v) => v,
|
||||
_ => bail!(v.span(), "unexpected garbage"),
|
||||
},
|
||||
})
|
||||
Self::Expr(v) => {
|
||||
if let ast::Expr::Ident(ident) = v {
|
||||
if self.as_untyped().len() == ident.len()
|
||||
&& !vm.scopes.get(ident).is_ok()
|
||||
{
|
||||
let node = (vm.items.symbol)(ident.get().clone());
|
||||
return Ok(node.spanned(self.span()));
|
||||
}
|
||||
}
|
||||
|
||||
match v.eval(vm)? {
|
||||
Value::Int(v) => (vm.items.math_atom)(format_eco!("{}", v)),
|
||||
Value::Float(v) => (vm.items.math_atom)(format_eco!("{}", v)),
|
||||
v => v.display(),
|
||||
}
|
||||
}
|
||||
}
|
||||
.spanned(self.span()))
|
||||
}
|
||||
}
|
||||
|
||||
@ -494,6 +503,7 @@ impl Eval for ast::Expr {
|
||||
Self::Ident(v) => v.eval(vm),
|
||||
Self::Code(v) => v.eval(vm),
|
||||
Self::Content(v) => v.eval(vm).map(Value::Content),
|
||||
Self::Math(v) => v.eval(vm).map(Value::Content),
|
||||
Self::Array(v) => v.eval(vm).map(Value::Array),
|
||||
Self::Dict(v) => v.eval(vm).map(Value::Dict),
|
||||
Self::Parenthesized(v) => v.eval(vm),
|
||||
|
@ -107,8 +107,6 @@ pub enum MarkupNode {
|
||||
Enum(EnumItem),
|
||||
/// An item in a description list: `/ Term: Details`.
|
||||
Desc(DescItem),
|
||||
/// A math formula: `$x$`, `$ x^2 $`.
|
||||
Math(Math),
|
||||
/// An expression.
|
||||
Expr(Expr),
|
||||
}
|
||||
@ -132,7 +130,6 @@ impl AstNode for MarkupNode {
|
||||
SyntaxKind::ListItem => node.cast().map(Self::List),
|
||||
SyntaxKind::EnumItem => node.cast().map(Self::Enum),
|
||||
SyntaxKind::DescItem => node.cast().map(Self::Desc),
|
||||
SyntaxKind::Math => node.cast().map(Self::Math),
|
||||
_ => node.cast().map(Self::Expr),
|
||||
}
|
||||
}
|
||||
@ -155,7 +152,6 @@ impl AstNode for MarkupNode {
|
||||
Self::List(v) => v.as_untyped(),
|
||||
Self::Enum(v) => v.as_untyped(),
|
||||
Self::Desc(v) => v.as_untyped(),
|
||||
Self::Math(v) => v.as_untyped(),
|
||||
Self::Expr(v) => v.as_untyped(),
|
||||
}
|
||||
}
|
||||
@ -447,6 +443,9 @@ pub enum MathNode {
|
||||
Escape(Escape),
|
||||
/// An atom: `x`, `+`, `12`.
|
||||
Atom(Atom),
|
||||
/// Symbol notation: `:arrow:l:` or `arrow:l`. Notations without any colons
|
||||
/// are parsed as identifier expression and handled during evaluation.
|
||||
Symbol(Symbol),
|
||||
/// A base with optional sub- and superscripts: `a_1^2`.
|
||||
Script(Script),
|
||||
/// A fraction: `x/2`.
|
||||
@ -466,6 +465,7 @@ impl AstNode for MathNode {
|
||||
SyntaxKind::Linebreak => node.cast().map(Self::Linebreak),
|
||||
SyntaxKind::Escape(_) => node.cast().map(Self::Escape),
|
||||
SyntaxKind::Atom(_) => node.cast().map(Self::Atom),
|
||||
SyntaxKind::Symbol(_) => node.cast().map(Self::Symbol),
|
||||
SyntaxKind::Script => node.cast().map(Self::Script),
|
||||
SyntaxKind::Frac => node.cast().map(Self::Frac),
|
||||
SyntaxKind::Align => node.cast().map(Self::Align),
|
||||
@ -480,6 +480,7 @@ impl AstNode for MathNode {
|
||||
Self::Linebreak(v) => v.as_untyped(),
|
||||
Self::Escape(v) => v.as_untyped(),
|
||||
Self::Atom(v) => v.as_untyped(),
|
||||
Self::Symbol(v) => v.as_untyped(),
|
||||
Self::Script(v) => v.as_untyped(),
|
||||
Self::Frac(v) => v.as_untyped(),
|
||||
Self::Align(v) => v.as_untyped(),
|
||||
@ -574,6 +575,8 @@ pub enum Expr {
|
||||
Code(CodeBlock),
|
||||
/// A content block: `[*Hi* there!]`.
|
||||
Content(ContentBlock),
|
||||
/// A math formula: `$x$`, `$ x^2 $`.
|
||||
Math(Math),
|
||||
/// A grouped expression: `(1 + 2)`.
|
||||
Parenthesized(Parenthesized),
|
||||
/// An array: `(1, "hi", 12cm)`.
|
||||
@ -622,6 +625,7 @@ impl AstNode for Expr {
|
||||
SyntaxKind::Ident(_) => node.cast().map(Self::Ident),
|
||||
SyntaxKind::CodeBlock => node.cast().map(Self::Code),
|
||||
SyntaxKind::ContentBlock => node.cast().map(Self::Content),
|
||||
SyntaxKind::Math => node.cast().map(Self::Math),
|
||||
SyntaxKind::Parenthesized => node.cast().map(Self::Parenthesized),
|
||||
SyntaxKind::Array => node.cast().map(Self::Array),
|
||||
SyntaxKind::Dict => node.cast().map(Self::Dict),
|
||||
@ -651,6 +655,7 @@ impl AstNode for Expr {
|
||||
Self::Lit(v) => v.as_untyped(),
|
||||
Self::Code(v) => v.as_untyped(),
|
||||
Self::Content(v) => v.as_untyped(),
|
||||
Self::Math(v) => v.as_untyped(),
|
||||
Self::Ident(v) => v.as_untyped(),
|
||||
Self::Array(v) => v.as_untyped(),
|
||||
Self::Dict(v) => v.as_untyped(),
|
||||
|
@ -163,8 +163,6 @@ pub enum Category {
|
||||
ListMarker,
|
||||
/// A term in a description list.
|
||||
ListTerm,
|
||||
/// A full math formula.
|
||||
Math,
|
||||
/// The delimiters of a math formula.
|
||||
MathDelimiter,
|
||||
/// An operator with special meaning in a math formula.
|
||||
@ -300,15 +298,17 @@ impl Category {
|
||||
SyntaxKind::EnumItem => Some(Category::ListItem),
|
||||
SyntaxKind::EnumNumbering(_) => Some(Category::ListMarker),
|
||||
SyntaxKind::DescItem => Some(Category::ListItem),
|
||||
SyntaxKind::Math => Some(Category::Math),
|
||||
SyntaxKind::Math => None,
|
||||
SyntaxKind::Atom(_) => None,
|
||||
SyntaxKind::Script => None,
|
||||
SyntaxKind::Frac => None,
|
||||
SyntaxKind::Align => None,
|
||||
|
||||
SyntaxKind::Ident(_) => match parent.kind() {
|
||||
SyntaxKind::Markup { .. } => Some(Category::Interpolated),
|
||||
SyntaxKind::Math => Some(Category::Interpolated),
|
||||
SyntaxKind::Markup { .. }
|
||||
| SyntaxKind::Math
|
||||
| SyntaxKind::Script
|
||||
| SyntaxKind::Frac => Some(Category::Interpolated),
|
||||
SyntaxKind::FuncCall => Some(Category::Function),
|
||||
SyntaxKind::MethodCall if i > 0 => Some(Category::Function),
|
||||
SyntaxKind::Closure if i == 0 => Some(Category::Function),
|
||||
@ -378,7 +378,6 @@ impl Category {
|
||||
Self::Emph => "markup.italic.typst",
|
||||
Self::Link => "markup.underline.link.typst",
|
||||
Self::Raw => "markup.raw.typst",
|
||||
Self::Math => "string.other.math.typst",
|
||||
Self::MathDelimiter => "punctuation.definition.math.typst",
|
||||
Self::MathOperator => "keyword.operator.math.typst",
|
||||
Self::Heading => "markup.heading.typst",
|
||||
|
@ -24,19 +24,4 @@ use incremental::reparse;
|
||||
use parser::*;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fmt::Debug;
|
||||
|
||||
#[track_caller]
|
||||
pub fn check<T>(text: &str, found: T, expected: T)
|
||||
where
|
||||
T: Debug + PartialEq,
|
||||
{
|
||||
if found != expected {
|
||||
println!("source: {text:?}");
|
||||
println!("expected: {expected:#?}");
|
||||
println!("found: {found:#?}");
|
||||
panic!("test failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
mod tests;
|
||||
|
@ -235,17 +235,9 @@ impl<'s> Parser<'s> {
|
||||
pub fn start_group(&mut self, kind: Group) {
|
||||
self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
|
||||
self.tokens.set_mode(match kind {
|
||||
Group::Strong | Group::Emph => TokenMode::Markup,
|
||||
Group::Bracket => match self.tokens.mode() {
|
||||
TokenMode::Math => TokenMode::Math,
|
||||
_ => TokenMode::Markup,
|
||||
},
|
||||
Group::Brace | Group::Paren => match self.tokens.mode() {
|
||||
TokenMode::Math => TokenMode::Math,
|
||||
_ => TokenMode::Code,
|
||||
},
|
||||
Group::Math => TokenMode::Math,
|
||||
Group::Expr | Group::Imports => TokenMode::Code,
|
||||
Group::Bracket | Group::Strong | Group::Emph => TokenMode::Markup,
|
||||
Group::Math | Group::MathRow(_, _) => TokenMode::Math,
|
||||
Group::Brace | Group::Paren | Group::Expr | Group::Imports => TokenMode::Code,
|
||||
});
|
||||
|
||||
match kind {
|
||||
@ -255,6 +247,7 @@ impl<'s> Parser<'s> {
|
||||
Group::Strong => self.assert(SyntaxKind::Star),
|
||||
Group::Emph => self.assert(SyntaxKind::Underscore),
|
||||
Group::Math => self.assert(SyntaxKind::Dollar),
|
||||
Group::MathRow(l, _) => self.assert(SyntaxKind::Atom(l.into())),
|
||||
Group::Expr => self.repeek(),
|
||||
Group::Imports => self.repeek(),
|
||||
}
|
||||
@ -279,6 +272,7 @@ impl<'s> Parser<'s> {
|
||||
Group::Strong => Some((SyntaxKind::Star, true)),
|
||||
Group::Emph => Some((SyntaxKind::Underscore, true)),
|
||||
Group::Math => Some((SyntaxKind::Dollar, true)),
|
||||
Group::MathRow(_, r) => Some((SyntaxKind::Atom(r.into()), true)),
|
||||
Group::Expr => Some((SyntaxKind::Semicolon, false)),
|
||||
Group::Imports => None,
|
||||
} {
|
||||
@ -344,9 +338,17 @@ impl<'s> Parser<'s> {
|
||||
Some(SyntaxKind::RightParen) => self.inside(Group::Paren),
|
||||
Some(SyntaxKind::Star) => self.inside(Group::Strong),
|
||||
Some(SyntaxKind::Underscore) => self.inside(Group::Emph),
|
||||
Some(SyntaxKind::Dollar) => self.inside(Group::Math),
|
||||
Some(SyntaxKind::Dollar) => {
|
||||
self.groups.last().map(|group| group.kind) == Some(Group::Math)
|
||||
}
|
||||
Some(SyntaxKind::Semicolon) => self.inside(Group::Expr),
|
||||
Some(SyntaxKind::From) => self.inside(Group::Imports),
|
||||
Some(SyntaxKind::Atom(s)) => match s.as_str() {
|
||||
")" => self.inside(Group::MathRow('(', ')')),
|
||||
"}" => self.inside(Group::MathRow('{', '}')),
|
||||
"]" => self.inside(Group::MathRow('[', ']')),
|
||||
_ => false,
|
||||
},
|
||||
Some(SyntaxKind::Space { newlines }) => self.space_ends_group(*newlines),
|
||||
Some(_) => false,
|
||||
None => true,
|
||||
@ -531,6 +533,8 @@ pub enum Group {
|
||||
Emph,
|
||||
/// A group surrounded by dollar signs: `$...$`.
|
||||
Math,
|
||||
/// A group surrounded by math delimiters.
|
||||
MathRow(char, char),
|
||||
/// A group ended by a semicolon or a line break: `;`, `\n`.
|
||||
Expr,
|
||||
/// A group for import items, ended by a semicolon, line break or `from`.
|
||||
|
@ -268,7 +268,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
|
||||
| SyntaxKind::Include
|
||||
| SyntaxKind::Break
|
||||
| SyntaxKind::Continue
|
||||
| SyntaxKind::Return => markup_expr(p),
|
||||
| SyntaxKind::Return => embedded_expr(p),
|
||||
|
||||
// Code and content block.
|
||||
SyntaxKind::LeftBrace => code_block(p),
|
||||
@ -359,7 +359,7 @@ fn desc_item(p: &mut Parser, at_start: bool) -> ParseResult {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn markup_expr(p: &mut Parser) {
|
||||
fn embedded_expr(p: &mut Parser) {
|
||||
// Does the expression need termination or can content follow directly?
|
||||
let stmt = matches!(
|
||||
p.peek(),
|
||||
@ -437,36 +437,63 @@ fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option<SyntaxKind>) {
|
||||
fn math_primary(p: &mut Parser) {
|
||||
let Some(token) = p.peek() else { return };
|
||||
match token {
|
||||
// Spaces, atoms and expressions.
|
||||
// Spaces and expressions.
|
||||
SyntaxKind::Space { .. }
|
||||
| SyntaxKind::Linebreak
|
||||
| SyntaxKind::Escape(_)
|
||||
| SyntaxKind::Atom(_)
|
||||
| SyntaxKind::Ident(_) => p.eat(),
|
||||
| SyntaxKind::Str(_)
|
||||
| SyntaxKind::Symbol(_) => p.eat(),
|
||||
|
||||
// Groups.
|
||||
SyntaxKind::LeftParen => math_group(p, Group::Paren, '(', ')'),
|
||||
SyntaxKind::LeftBracket => math_group(p, Group::Bracket, '[', ']'),
|
||||
SyntaxKind::LeftBrace => math_group(p, Group::Brace, '{', '}'),
|
||||
// Atoms.
|
||||
SyntaxKind::Atom(s) => match s.as_str() {
|
||||
"(" => math_group(p, Group::MathRow('(', ')')),
|
||||
"{" => math_group(p, Group::MathRow('{', '}')),
|
||||
"[" => math_group(p, Group::MathRow('[', ']')),
|
||||
_ => p.eat(),
|
||||
},
|
||||
|
||||
// Alignment indactor.
|
||||
SyntaxKind::Amp => math_align(p),
|
||||
|
||||
// Identifiers and math calls.
|
||||
SyntaxKind::Ident(_) => {
|
||||
let marker = p.marker();
|
||||
p.eat();
|
||||
|
||||
// Parenthesis or bracket means this is a function call.
|
||||
if matches!(p.peek_direct(), Some(SyntaxKind::Atom(s)) if s == "(") {
|
||||
marker.perform(p, SyntaxKind::FuncCall, math_args);
|
||||
}
|
||||
}
|
||||
|
||||
// Hashtag + keyword / identifier.
|
||||
SyntaxKind::Let
|
||||
| SyntaxKind::Set
|
||||
| SyntaxKind::Show
|
||||
| SyntaxKind::If
|
||||
| SyntaxKind::While
|
||||
| SyntaxKind::For
|
||||
| SyntaxKind::Import
|
||||
| SyntaxKind::Include
|
||||
| SyntaxKind::Break
|
||||
| SyntaxKind::Continue
|
||||
| SyntaxKind::Return => embedded_expr(p),
|
||||
|
||||
// Code and content block.
|
||||
SyntaxKind::LeftBrace => code_block(p),
|
||||
SyntaxKind::LeftBracket => content_block(p),
|
||||
|
||||
_ => p.unexpected(),
|
||||
}
|
||||
}
|
||||
|
||||
fn math_group(p: &mut Parser, group: Group, l: char, r: char) {
|
||||
fn math_group(p: &mut Parser, group: Group) {
|
||||
p.perform(SyntaxKind::Math, |p| {
|
||||
let marker = p.marker();
|
||||
p.start_group(group);
|
||||
marker.convert(p, SyntaxKind::Atom(l.into()));
|
||||
while !p.eof() {
|
||||
math_node(p);
|
||||
}
|
||||
let marker = p.marker();
|
||||
p.end_group();
|
||||
marker.convert(p, SyntaxKind::Atom(r.into()));
|
||||
})
|
||||
}
|
||||
|
||||
@ -582,6 +609,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
|
||||
Some(SyntaxKind::LeftParen) => parenthesized(p, atomic),
|
||||
Some(SyntaxKind::LeftBrace) => Ok(code_block(p)),
|
||||
Some(SyntaxKind::LeftBracket) => Ok(content_block(p)),
|
||||
Some(SyntaxKind::Dollar) => Ok(math(p)),
|
||||
|
||||
// Keywords.
|
||||
Some(SyntaxKind::Let) => let_binding(p),
|
||||
@ -902,6 +930,28 @@ fn args(p: &mut Parser) -> ParseResult {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn math_args(p: &mut Parser) {
|
||||
p.start_group(Group::MathRow('(', ')'));
|
||||
p.perform(SyntaxKind::Args, |p| {
|
||||
let mut marker = p.marker();
|
||||
while !p.eof() {
|
||||
if matches!(p.peek(), Some(SyntaxKind::Atom(s)) if s == ",") {
|
||||
marker.end(p, SyntaxKind::Math);
|
||||
let comma = p.marker();
|
||||
p.eat();
|
||||
comma.convert(p, SyntaxKind::Comma);
|
||||
marker = p.marker();
|
||||
} else {
|
||||
math_node(p);
|
||||
}
|
||||
}
|
||||
if marker != p.marker() {
|
||||
marker.end(p, SyntaxKind::Math);
|
||||
}
|
||||
});
|
||||
p.end_group();
|
||||
}
|
||||
|
||||
fn let_binding(p: &mut Parser) -> ParseResult {
|
||||
p.perform(SyntaxKind::LetBinding, |p| {
|
||||
p.assert(SyntaxKind::Let);
|
||||
|
483
src/syntax/tests.rs
Normal file
483
src/syntax/tests.rs
Normal file
@ -0,0 +1,483 @@
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
use crate::geom::{AbsUnit, AngleUnit};
|
||||
|
||||
use ErrorPos::*;
|
||||
use Option::None;
|
||||
use SyntaxKind::*;
|
||||
use TokenMode::{Code, Markup};
|
||||
|
||||
use std::fmt::Debug;
|
||||
|
||||
#[track_caller]
|
||||
pub fn check<T>(text: &str, found: T, expected: T)
|
||||
where
|
||||
T: Debug + PartialEq,
|
||||
{
|
||||
if found != expected {
|
||||
println!("source: {text:?}");
|
||||
println!("expected: {expected:#?}");
|
||||
println!("found: {found:#?}");
|
||||
panic!("test failed");
|
||||
}
|
||||
}
|
||||
|
||||
fn Space(newlines: usize) -> SyntaxKind {
|
||||
SyntaxKind::Space { newlines }
|
||||
}
|
||||
|
||||
fn Raw(text: &str, lang: Option<&str>, block: bool) -> SyntaxKind {
|
||||
SyntaxKind::Raw(Arc::new(RawFields {
|
||||
text: text.into(),
|
||||
lang: lang.map(Into::into),
|
||||
block,
|
||||
}))
|
||||
}
|
||||
|
||||
fn Str(string: &str) -> SyntaxKind {
|
||||
SyntaxKind::Str(string.into())
|
||||
}
|
||||
|
||||
fn Text(string: &str) -> SyntaxKind {
|
||||
SyntaxKind::Text(string.into())
|
||||
}
|
||||
|
||||
fn Ident(ident: &str) -> SyntaxKind {
|
||||
SyntaxKind::Ident(ident.into())
|
||||
}
|
||||
|
||||
fn Error(pos: ErrorPos, message: &str) -> SyntaxKind {
|
||||
SyntaxKind::Error(pos, message.into())
|
||||
}
|
||||
|
||||
/// Building blocks for suffix testing.
|
||||
///
|
||||
/// We extend each test case with a collection of different suffixes to make
|
||||
/// sure tokens end at the correct position. These suffixes are split into
|
||||
/// blocks, which can be disabled/enabled per test case. For example, when
|
||||
/// testing identifiers we disable letter suffixes because these would
|
||||
/// mingle with the identifiers.
|
||||
///
|
||||
/// Suffix blocks:
|
||||
/// - ' ': spacing
|
||||
/// - 'a': letters
|
||||
/// - '1': numbers
|
||||
/// - '/': symbols
|
||||
const BLOCKS: &str = " a1/";
|
||||
|
||||
// Suffixes described by four-tuples of:
|
||||
//
|
||||
// - block the suffix is part of
|
||||
// - mode in which the suffix is applicable
|
||||
// - the suffix string
|
||||
// - the resulting suffix NodeKind
|
||||
fn suffixes() -> impl Iterator<Item = (char, Option<TokenMode>, &'static str, SyntaxKind)>
|
||||
{
|
||||
[
|
||||
// Whitespace suffixes.
|
||||
(' ', None, " ", Space(0)),
|
||||
(' ', None, "\n", Space(1)),
|
||||
(' ', None, "\r", Space(1)),
|
||||
(' ', None, "\r\n", Space(1)),
|
||||
// Letter suffixes.
|
||||
('a', Some(Markup), "hello", Text("hello")),
|
||||
('a', Some(Markup), "💚", Text("💚")),
|
||||
('a', Some(Code), "val", Ident("val")),
|
||||
('a', Some(Code), "α", Ident("α")),
|
||||
('a', Some(Code), "_", Ident("_")),
|
||||
// Number suffixes.
|
||||
('1', Some(Code), "2", Int(2)),
|
||||
('1', Some(Code), ".2", Float(0.2)),
|
||||
// Symbol suffixes.
|
||||
('/', None, "[", LeftBracket),
|
||||
('/', None, "//", LineComment),
|
||||
('/', None, "/**/", BlockComment),
|
||||
('/', Some(Markup), "*", Star),
|
||||
('/', Some(Markup), r"\\", Escape('\\')),
|
||||
('/', Some(Markup), "#let", Let),
|
||||
('/', Some(Code), "(", LeftParen),
|
||||
('/', Some(Code), ":", Colon),
|
||||
('/', Some(Code), "+=", PlusEq),
|
||||
]
|
||||
.into_iter()
|
||||
}
|
||||
|
||||
macro_rules! t {
|
||||
(Both $($tts:tt)*) => {
|
||||
t!(Markup $($tts)*);
|
||||
t!(Code $($tts)*);
|
||||
};
|
||||
($mode:ident $([$blocks:literal])?: $text:expr => $($token:expr),*) => {{
|
||||
// Test without suffix.
|
||||
t!(@$mode: $text => $($token),*);
|
||||
|
||||
// Test with each applicable suffix.
|
||||
for (block, mode, suffix, ref token) in suffixes() {
|
||||
let text = $text;
|
||||
#[allow(unused_variables)]
|
||||
let blocks = BLOCKS;
|
||||
$(let blocks = $blocks;)?
|
||||
assert!(!blocks.contains(|c| !BLOCKS.contains(c)));
|
||||
if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) {
|
||||
t!(@$mode: format!("{}{}", text, suffix) => $($token,)* token);
|
||||
}
|
||||
}
|
||||
}};
|
||||
(@$mode:ident: $text:expr => $($token:expr),*) => {{
|
||||
let text = $text;
|
||||
let found = Tokens::new(&text, $mode).collect::<Vec<_>>();
|
||||
let expected = vec![$($token.clone()),*];
|
||||
check(&text, found, expected);
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_brackets() {
|
||||
// Test in markup.
|
||||
t!(Markup: "{" => LeftBrace);
|
||||
t!(Markup: "}" => RightBrace);
|
||||
t!(Markup: "[" => LeftBracket);
|
||||
t!(Markup: "]" => RightBracket);
|
||||
t!(Markup[" /"]: "(" => Text("("));
|
||||
t!(Markup[" /"]: ")" => Text(")"));
|
||||
|
||||
// Test in code.
|
||||
t!(Code: "{" => LeftBrace);
|
||||
t!(Code: "}" => RightBrace);
|
||||
t!(Code: "[" => LeftBracket);
|
||||
t!(Code: "]" => RightBracket);
|
||||
t!(Code: "(" => LeftParen);
|
||||
t!(Code: ")" => RightParen);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_whitespace() {
|
||||
// Test basic whitespace.
|
||||
t!(Both["a1/"]: "" => );
|
||||
t!(Both["a1/"]: " " => Space(0));
|
||||
t!(Both["a1/"]: " " => Space(0));
|
||||
t!(Both["a1/"]: "\t" => Space(0));
|
||||
t!(Both["a1/"]: " \t" => Space(0));
|
||||
t!(Both["a1/"]: "\u{202F}" => Space(0));
|
||||
|
||||
// Test newline counting.
|
||||
t!(Both["a1/"]: "\n" => Space(1));
|
||||
t!(Both["a1/"]: "\n " => Space(1));
|
||||
t!(Both["a1/"]: " \n" => Space(1));
|
||||
t!(Both["a1/"]: " \n " => Space(1));
|
||||
t!(Both["a1/"]: "\r\n" => Space(1));
|
||||
t!(Both["a1/"]: "\r\n\r" => Space(2));
|
||||
t!(Both["a1/"]: " \n\t \n " => Space(2));
|
||||
t!(Both["a1/"]: "\n\r" => Space(2));
|
||||
t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_text() {
|
||||
// Test basic text.
|
||||
t!(Markup[" /"]: "hello" => Text("hello"));
|
||||
t!(Markup[" /"]: "reha-world" => Text("reha-world"));
|
||||
|
||||
// Test code symbols in text.
|
||||
t!(Markup[" /"]: "a():\"b" => Text("a()"), Colon, SmartQuote { double: true }, Text("b"));
|
||||
t!(Markup[" /"]: ";,|/+" => Text(";,|/+"));
|
||||
t!(Markup[" /"]: "=-a" => Eq, Minus, Text("a"));
|
||||
t!(Markup[" "]: "#123" => Text("#123"));
|
||||
|
||||
// Test text ends.
|
||||
t!(Markup[""]: "hello " => Text("hello"), Space(0));
|
||||
t!(Markup[""]: "hello~" => Text("hello"), Shorthand('\u{00A0}'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_escape_sequences() {
|
||||
// Test escapable symbols.
|
||||
t!(Markup: r"\\" => Escape('\\'));
|
||||
t!(Markup: r"\/" => Escape('/'));
|
||||
t!(Markup: r"\[" => Escape('['));
|
||||
t!(Markup: r"\]" => Escape(']'));
|
||||
t!(Markup: r"\{" => Escape('{'));
|
||||
t!(Markup: r"\}" => Escape('}'));
|
||||
t!(Markup: r"\*" => Escape('*'));
|
||||
t!(Markup: r"\_" => Escape('_'));
|
||||
t!(Markup: r"\=" => Escape('='));
|
||||
t!(Markup: r"\~" => Escape('~'));
|
||||
t!(Markup: r"\'" => Escape('\''));
|
||||
t!(Markup: r#"\""# => Escape('"'));
|
||||
t!(Markup: r"\`" => Escape('`'));
|
||||
t!(Markup: r"\$" => Escape('$'));
|
||||
t!(Markup: r"\#" => Escape('#'));
|
||||
t!(Markup: r"\a" => Escape('a'));
|
||||
t!(Markup: r"\u" => Escape('u'));
|
||||
t!(Markup: r"\1" => Escape('1'));
|
||||
|
||||
// Test basic unicode escapes.
|
||||
t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence"));
|
||||
t!(Markup: r"\u{2603}" => Escape('☃'));
|
||||
t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence"));
|
||||
|
||||
// Test unclosed unicode escapes.
|
||||
t!(Markup[" /"]: r"\u{" => Error(End, "expected closing brace"));
|
||||
t!(Markup[" /"]: r"\u{1" => Error(End, "expected closing brace"));
|
||||
t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace"));
|
||||
t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace"));
|
||||
t!(Markup: r"\u{1🏕}" => Error(End, "expected closing brace"), Text("🏕"), RightBrace);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_markup_symbols() {
|
||||
// Test markup tokens.
|
||||
t!(Markup[" a1"]: "*" => Star);
|
||||
t!(Markup: "_" => Underscore);
|
||||
t!(Markup[""]: "===" => Eq, Eq, Eq);
|
||||
t!(Markup["a1/"]: "= " => Eq, Space(0));
|
||||
t!(Markup[" "]: r"\" => Linebreak);
|
||||
t!(Markup: "~" => Shorthand('\u{00A0}'));
|
||||
t!(Markup["a1/"]: "-?" => Shorthand('\u{00AD}'));
|
||||
t!(Markup["a "]: r"a--" => Text("a"), Shorthand('\u{2013}'));
|
||||
t!(Markup["a1/"]: "- " => Minus, Space(0));
|
||||
t!(Markup[" "]: "+" => Plus);
|
||||
t!(Markup[" "]: "1." => EnumNumbering(NonZeroUsize::new(1).unwrap()));
|
||||
t!(Markup[" "]: "1.a" => EnumNumbering(NonZeroUsize::new(1).unwrap()), Text("a"));
|
||||
t!(Markup[" /"]: "a1." => Text("a1."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_code_symbols() {
|
||||
// Test all symbols.
|
||||
t!(Code: "," => Comma);
|
||||
t!(Code: ";" => Semicolon);
|
||||
t!(Code: ":" => Colon);
|
||||
t!(Code: "+" => Plus);
|
||||
t!(Code: "-" => Minus);
|
||||
t!(Code[" a1"]: "*" => Star);
|
||||
t!(Code[" a1"]: "/" => Slash);
|
||||
t!(Code[" a/"]: "." => Dot);
|
||||
t!(Code: "=" => Eq);
|
||||
t!(Code: "==" => EqEq);
|
||||
t!(Code: "!=" => ExclEq);
|
||||
t!(Code[" /"]: "<" => Lt);
|
||||
t!(Code: "<=" => LtEq);
|
||||
t!(Code: ">" => Gt);
|
||||
t!(Code: ">=" => GtEq);
|
||||
t!(Code: "+=" => PlusEq);
|
||||
t!(Code: "-=" => HyphEq);
|
||||
t!(Code: "*=" => StarEq);
|
||||
t!(Code: "/=" => SlashEq);
|
||||
t!(Code: ".." => Dots);
|
||||
t!(Code: "=>" => Arrow);
|
||||
|
||||
// Test combinations.
|
||||
t!(Code: "<=>" => LtEq, Gt);
|
||||
t!(Code[" a/"]: "..." => Dots, Dot);
|
||||
|
||||
// Test hyphen as symbol vs part of identifier.
|
||||
t!(Code[" /"]: "-1" => Minus, Int(1));
|
||||
t!(Code[" /"]: "-a" => Minus, Ident("a"));
|
||||
t!(Code[" /"]: "--1" => Minus, Minus, Int(1));
|
||||
t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a"));
|
||||
t!(Code[" /"]: "a-b" => Ident("a-b"));
|
||||
|
||||
// Test invalid.
|
||||
t!(Code: r"\" => Error(Full, "not valid here"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_keywords() {
|
||||
// A list of a few (not all) keywords.
|
||||
let list = [
|
||||
("not", Not),
|
||||
("let", Let),
|
||||
("if", If),
|
||||
("else", Else),
|
||||
("for", For),
|
||||
("in", In),
|
||||
("import", Import),
|
||||
];
|
||||
|
||||
for (s, t) in list.clone() {
|
||||
t!(Markup[" "]: format!("#{}", s) => t);
|
||||
t!(Markup[" "]: format!("#{0}#{0}", s) => t, t);
|
||||
t!(Markup[" /"]: format!("# {}", s) => Text(&format!("# {s}")));
|
||||
}
|
||||
|
||||
for (s, t) in list {
|
||||
t!(Code[" "]: s => t);
|
||||
t!(Markup[" /"]: s => Text(s));
|
||||
}
|
||||
|
||||
// Test simple identifier.
|
||||
t!(Markup[" "]: "#letter" => Ident("letter"));
|
||||
t!(Code[" /"]: "falser" => Ident("falser"));
|
||||
t!(Code[" /"]: "None" => Ident("None"));
|
||||
t!(Code[" /"]: "True" => Ident("True"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_raw_blocks() {
|
||||
// Test basic raw block.
|
||||
t!(Markup: "``" => Raw("", None, false));
|
||||
t!(Markup: "`raw`" => Raw("raw", None, false));
|
||||
t!(Markup[""]: "`]" => Error(End, "expected 1 backtick"));
|
||||
|
||||
// Test special symbols in raw block.
|
||||
t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false));
|
||||
t!(Markup[""]: r"`\`` " => Raw(r"\", None, false), Error(End, "expected 1 backtick"));
|
||||
|
||||
// Test separated closing backticks.
|
||||
t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false));
|
||||
|
||||
// Test more backticks.
|
||||
t!(Markup: "``nope``" => Raw("", None, false), Text("nope"), Raw("", None, false));
|
||||
t!(Markup: "````🚀````" => Raw("", None, false));
|
||||
t!(Markup[""]: "`````👩🚀````noend" => Error(End, "expected 5 backticks"));
|
||||
t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_idents() {
|
||||
// Test valid identifiers.
|
||||
t!(Code[" /"]: "x" => Ident("x"));
|
||||
t!(Code[" /"]: "value" => Ident("value"));
|
||||
t!(Code[" /"]: "__main__" => Ident("__main__"));
|
||||
t!(Code[" /"]: "_snake_case" => Ident("_snake_case"));
|
||||
|
||||
// Test non-ascii.
|
||||
t!(Code[" /"]: "α" => Ident("α"));
|
||||
t!(Code[" /"]: "ម្តាយ" => Ident("ម្តាយ"));
|
||||
|
||||
// Test hyphen parsed as identifier.
|
||||
t!(Code[" /"]: "kebab-case" => Ident("kebab-case"));
|
||||
t!(Code[" /"]: "one-10" => Ident("one-10"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_numeric() {
|
||||
let ints = [("7", 7), ("012", 12)];
|
||||
let floats = [
|
||||
(".3", 0.3),
|
||||
("0.3", 0.3),
|
||||
("3.", 3.0),
|
||||
("3.0", 3.0),
|
||||
("14.3", 14.3),
|
||||
("10e2", 1000.0),
|
||||
("10e+0", 10.0),
|
||||
("10e+1", 100.0),
|
||||
("10e-2", 0.1),
|
||||
("10.e1", 100.0),
|
||||
("10.e-1", 1.0),
|
||||
(".1e1", 1.0),
|
||||
("10E2", 1000.0),
|
||||
];
|
||||
|
||||
// Test integers.
|
||||
for &(s, v) in &ints {
|
||||
t!(Code[" /"]: s => Int(v));
|
||||
}
|
||||
|
||||
// Test floats.
|
||||
for &(s, v) in &floats {
|
||||
t!(Code[" /"]: s => Float(v));
|
||||
}
|
||||
|
||||
// Test attached numbers.
|
||||
t!(Code[" /"]: ".2.3" => Float(0.2), Float(0.3));
|
||||
t!(Code[" /"]: "1.2.3" => Float(1.2), Float(0.3));
|
||||
t!(Code[" /"]: "1e-2+3" => Float(0.01), Plus, Int(3));
|
||||
|
||||
// Test float from too large integer.
|
||||
let large = i64::MAX as f64 + 1.0;
|
||||
t!(Code[" /"]: large.to_string() => Float(large));
|
||||
|
||||
// Combined integers and floats.
|
||||
let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats);
|
||||
|
||||
let suffixes: &[(&str, fn(f64) -> SyntaxKind)] = &[
|
||||
("mm", |x| Numeric(x, Unit::Length(AbsUnit::Mm))),
|
||||
("pt", |x| Numeric(x, Unit::Length(AbsUnit::Pt))),
|
||||
("cm", |x| Numeric(x, Unit::Length(AbsUnit::Cm))),
|
||||
("in", |x| Numeric(x, Unit::Length(AbsUnit::In))),
|
||||
("rad", |x| Numeric(x, Unit::Angle(AngleUnit::Rad))),
|
||||
("deg", |x| Numeric(x, Unit::Angle(AngleUnit::Deg))),
|
||||
("em", |x| Numeric(x, Unit::Em)),
|
||||
("fr", |x| Numeric(x, Unit::Fr)),
|
||||
("%", |x| Numeric(x, Unit::Percent)),
|
||||
];
|
||||
|
||||
// Numeric types.
|
||||
for &(suffix, build) in suffixes {
|
||||
for (s, v) in nums.clone() {
|
||||
t!(Code[" /"]: format!("{}{}", s, suffix) => build(v));
|
||||
}
|
||||
}
|
||||
|
||||
// Multiple dots close the number.
|
||||
t!(Code[" /"]: "1..2" => Int(1), Dots, Int(2));
|
||||
t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3));
|
||||
t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3));
|
||||
|
||||
// Test invalid.
|
||||
t!(Code[" /"]: "1foo" => Error(Full, "invalid number suffix"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_strings() {
|
||||
// Test basic strings.
|
||||
t!(Code: "\"hi\"" => Str("hi"));
|
||||
t!(Code: "\"hi\nthere\"" => Str("hi\nthere"));
|
||||
t!(Code: "\"🌎\"" => Str("🌎"));
|
||||
|
||||
// Test unterminated.
|
||||
t!(Code[""]: "\"hi" => Error(End, "expected quote"));
|
||||
|
||||
// Test escaped quote.
|
||||
t!(Code: r#""a\"bc""# => Str("a\"bc"));
|
||||
t!(Code[""]: r#""\""# => Error(End, "expected quote"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_line_comments() {
|
||||
// Test line comment with no trailing newline.
|
||||
t!(Both[""]: "//" => LineComment);
|
||||
|
||||
// Test line comment ends at newline.
|
||||
t!(Both["a1/"]: "//bc\n" => LineComment, Space(1));
|
||||
t!(Both["a1/"]: "// bc \n" => LineComment, Space(1));
|
||||
t!(Both["a1/"]: "//bc\r\n" => LineComment, Space(1));
|
||||
|
||||
// Test nested line comments.
|
||||
t!(Both["a1/"]: "//a//b\n" => LineComment, Space(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_block_comments() {
|
||||
// Test basic block comments.
|
||||
t!(Both[""]: "/*" => BlockComment);
|
||||
t!(Both: "/**/" => BlockComment);
|
||||
t!(Both: "/*🏞*/" => BlockComment);
|
||||
t!(Both: "/*\n*/" => BlockComment);
|
||||
|
||||
// Test depth 1 and 2 nested block comments.
|
||||
t!(Both: "/* /* */ */" => BlockComment);
|
||||
t!(Both: "/*/*/**/*/*/" => BlockComment);
|
||||
|
||||
// Test two nested, one unclosed block comments.
|
||||
t!(Both[""]: "/*/*/**/*/" => BlockComment);
|
||||
|
||||
// Test all combinations of up to two following slashes and stars.
|
||||
t!(Both[""]: "/*" => BlockComment);
|
||||
t!(Both[""]: "/*/" => BlockComment);
|
||||
t!(Both[""]: "/**" => BlockComment);
|
||||
t!(Both[""]: "/*//" => BlockComment);
|
||||
t!(Both[""]: "/*/*" => BlockComment);
|
||||
t!(Both[""]: "/**/" => BlockComment);
|
||||
t!(Both[""]: "/***" => BlockComment);
|
||||
|
||||
// Test unexpected terminator.
|
||||
t!(Both: "/*Hi*/*/" => BlockComment,
|
||||
Error(Full, "unexpected end of block comment"));
|
||||
}
|
@ -35,14 +35,12 @@ pub enum TokenMode {
|
||||
|
||||
impl<'s> Tokens<'s> {
|
||||
/// Create a new token iterator with the given mode.
|
||||
#[inline]
|
||||
pub fn new(text: &'s str, mode: TokenMode) -> Self {
|
||||
Self::with_prefix("", text, mode)
|
||||
}
|
||||
|
||||
/// Create a new token iterator with the given mode and a prefix to offset
|
||||
/// column calculations.
|
||||
#[inline]
|
||||
pub fn with_prefix(prefix: &str, text: &'s str, mode: TokenMode) -> Self {
|
||||
Self {
|
||||
s: Scanner::new(text),
|
||||
@ -53,54 +51,46 @@ impl<'s> Tokens<'s> {
|
||||
}
|
||||
|
||||
/// Get the current token mode.
|
||||
#[inline]
|
||||
pub fn mode(&self) -> TokenMode {
|
||||
self.mode
|
||||
}
|
||||
|
||||
/// Change the token mode.
|
||||
#[inline]
|
||||
pub fn set_mode(&mut self, mode: TokenMode) {
|
||||
self.mode = mode;
|
||||
}
|
||||
|
||||
/// The index in the string at which the last token ends and next token
|
||||
/// will start.
|
||||
#[inline]
|
||||
pub fn cursor(&self) -> usize {
|
||||
self.s.cursor()
|
||||
}
|
||||
|
||||
/// Jump to the given index in the string.
|
||||
#[inline]
|
||||
pub fn jump(&mut self, index: usize) {
|
||||
self.s.jump(index);
|
||||
}
|
||||
|
||||
/// The underlying scanner.
|
||||
#[inline]
|
||||
pub fn scanner(&self) -> Scanner<'s> {
|
||||
self.s
|
||||
}
|
||||
|
||||
/// Whether the last token was terminated.
|
||||
#[inline]
|
||||
pub fn terminated(&self) -> bool {
|
||||
self.terminated
|
||||
}
|
||||
|
||||
/// The column index of a given index in the source string.
|
||||
#[inline]
|
||||
pub fn column(&self, index: usize) -> usize {
|
||||
column(self.s.string(), index, self.column_offset)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Iterator for Tokens<'s> {
|
||||
impl Iterator for Tokens<'_> {
|
||||
type Item = SyntaxKind;
|
||||
|
||||
/// Parse the next token in the source code.
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let start = self.s.cursor();
|
||||
let c = self.s.eat()?;
|
||||
@ -124,7 +114,8 @@ impl<'s> Iterator for Tokens<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Tokens<'s> {
|
||||
/// Shared.
|
||||
impl Tokens<'_> {
|
||||
fn line_comment(&mut self) -> SyntaxKind {
|
||||
self.s.eat_until(is_newline);
|
||||
if self.s.peek().is_none() {
|
||||
@ -189,8 +180,9 @@ impl<'s> Tokens<'s> {
|
||||
|
||||
SyntaxKind::Space { newlines }
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
impl Tokens<'_> {
|
||||
fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||
match c {
|
||||
// Blocks.
|
||||
@ -231,7 +223,6 @@ impl<'s> Tokens<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn text(&mut self, start: usize) -> SyntaxKind {
|
||||
macro_rules! table {
|
||||
($(|$c:literal)*) => {{
|
||||
@ -303,7 +294,11 @@ impl<'s> Tokens<'s> {
|
||||
}
|
||||
|
||||
fn hash(&mut self, start: usize) -> SyntaxKind {
|
||||
if self.s.at(is_id_start) {
|
||||
if self.s.eat_if('{') {
|
||||
SyntaxKind::LeftBrace
|
||||
} else if self.s.eat_if('[') {
|
||||
SyntaxKind::LeftBracket
|
||||
} else if self.s.at(is_id_start) {
|
||||
let read = self.s.eat_while(is_id_continue);
|
||||
match keyword(read) {
|
||||
Some(keyword) => keyword,
|
||||
@ -342,8 +337,10 @@ impl<'s> Tokens<'s> {
|
||||
if start < end {
|
||||
self.s.expect(':');
|
||||
SyntaxKind::Symbol(self.s.get(start..end).into())
|
||||
} else {
|
||||
} else if self.mode == TokenMode::Markup {
|
||||
SyntaxKind::Colon
|
||||
} else {
|
||||
SyntaxKind::Atom(":".into())
|
||||
}
|
||||
}
|
||||
|
||||
@ -426,26 +423,25 @@ impl<'s> Tokens<'s> {
|
||||
self.text(start)
|
||||
}
|
||||
|
||||
fn label(&mut self) -> SyntaxKind {
|
||||
let label = self.s.eat_while(is_id_continue);
|
||||
if self.s.eat_if('>') {
|
||||
if !label.is_empty() {
|
||||
SyntaxKind::Label(label.into())
|
||||
} else {
|
||||
SyntaxKind::Error(ErrorPos::Full, "label cannot be empty".into())
|
||||
}
|
||||
} else {
|
||||
self.terminated = false;
|
||||
SyntaxKind::Error(ErrorPos::End, "expected closing angle bracket".into())
|
||||
}
|
||||
}
|
||||
|
||||
fn reference(&mut self) -> SyntaxKind {
|
||||
SyntaxKind::Ref(self.s.eat_while(is_id_continue).into())
|
||||
}
|
||||
|
||||
fn in_word(&self) -> bool {
|
||||
let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
|
||||
let prev = self.s.scout(-2);
|
||||
let next = self.s.peek();
|
||||
alphanumeric(prev) && alphanumeric(next)
|
||||
}
|
||||
}
|
||||
|
||||
/// Math.
|
||||
impl Tokens<'_> {
|
||||
fn math(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||
match c {
|
||||
// Multi-char things.
|
||||
'#' => self.hash(start),
|
||||
|
||||
// Escape sequences.
|
||||
'\\' => self.backslash(),
|
||||
|
||||
@ -456,18 +452,32 @@ impl<'s> Tokens<'s> {
|
||||
'&' => SyntaxKind::Amp,
|
||||
'$' => SyntaxKind::Dollar,
|
||||
|
||||
// Brackets.
|
||||
'{' => SyntaxKind::LeftBrace,
|
||||
'}' => SyntaxKind::RightBrace,
|
||||
'[' => SyntaxKind::LeftBracket,
|
||||
']' => SyntaxKind::RightBracket,
|
||||
'(' => SyntaxKind::LeftParen,
|
||||
')' => SyntaxKind::RightParen,
|
||||
// Symbol notation.
|
||||
':' => self.colon(),
|
||||
|
||||
// Identifiers.
|
||||
// Strings.
|
||||
'"' => self.string(),
|
||||
|
||||
// Identifiers and symbol notation.
|
||||
c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
|
||||
self.s.eat_while(is_math_id_continue);
|
||||
SyntaxKind::Ident(self.s.from(start).into())
|
||||
|
||||
let mut symbol = false;
|
||||
while self.s.eat_if(':')
|
||||
&& !self.s.eat_while(char::is_alphanumeric).is_empty()
|
||||
{
|
||||
symbol = true;
|
||||
}
|
||||
|
||||
if symbol {
|
||||
SyntaxKind::Symbol(self.s.from(start).into())
|
||||
} else {
|
||||
if self.s.scout(-1) == Some(':') {
|
||||
self.s.uneat();
|
||||
}
|
||||
|
||||
SyntaxKind::Ident(self.s.from(start).into())
|
||||
}
|
||||
}
|
||||
|
||||
// Numbers.
|
||||
@ -480,7 +490,10 @@ impl<'s> Tokens<'s> {
|
||||
c => SyntaxKind::Atom(c.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Code.
|
||||
impl Tokens<'_> {
|
||||
fn code(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||
match c {
|
||||
// Blocks.
|
||||
@ -493,6 +506,9 @@ impl<'s> Tokens<'s> {
|
||||
'(' => SyntaxKind::LeftParen,
|
||||
')' => SyntaxKind::RightParen,
|
||||
|
||||
// Math.
|
||||
'$' => SyntaxKind::Dollar,
|
||||
|
||||
// Labels.
|
||||
'<' if self.s.at(is_id_continue) => self.label(),
|
||||
|
||||
@ -619,14 +635,22 @@ impl<'s> Tokens<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
fn in_word(&self) -> bool {
|
||||
let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
|
||||
let prev = self.s.scout(-2);
|
||||
let next = self.s.peek();
|
||||
alphanumeric(prev) && alphanumeric(next)
|
||||
fn label(&mut self) -> SyntaxKind {
|
||||
let label = self.s.eat_while(is_id_continue);
|
||||
if self.s.eat_if('>') {
|
||||
if !label.is_empty() {
|
||||
SyntaxKind::Label(label.into())
|
||||
} else {
|
||||
SyntaxKind::Error(ErrorPos::Full, "label cannot be empty".into())
|
||||
}
|
||||
} else {
|
||||
self.terminated = false;
|
||||
SyntaxKind::Error(ErrorPos::End, "expected closing angle bracket".into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to parse an identifier into a keyword.
|
||||
fn keyword(ident: &str) -> Option<SyntaxKind> {
|
||||
Some(match ident {
|
||||
"not" => SyntaxKind::Not,
|
||||
@ -652,7 +676,6 @@ fn keyword(ident: &str) -> Option<SyntaxKind> {
|
||||
|
||||
/// The column index of a given index in the source string, given a column
|
||||
/// offset for the first line.
|
||||
#[inline]
|
||||
fn column(string: &str, index: usize, offset: usize) -> usize {
|
||||
let mut apply_offset = false;
|
||||
let res = string[..index]
|
||||
@ -729,471 +752,3 @@ fn is_math_id_start(c: char) -> bool {
|
||||
fn is_math_id_continue(c: char) -> bool {
|
||||
c.is_xid_continue() && c != '_'
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(non_snake_case)]
|
||||
mod tests {
|
||||
use super::super::tests::check;
|
||||
use super::*;
|
||||
|
||||
use ErrorPos::*;
|
||||
use Option::None;
|
||||
use SyntaxKind::*;
|
||||
use TokenMode::{Code, Markup};
|
||||
|
||||
fn Space(newlines: usize) -> SyntaxKind {
|
||||
SyntaxKind::Space { newlines }
|
||||
}
|
||||
|
||||
fn Raw(text: &str, lang: Option<&str>, block: bool) -> SyntaxKind {
|
||||
SyntaxKind::Raw(Arc::new(RawFields {
|
||||
text: text.into(),
|
||||
lang: lang.map(Into::into),
|
||||
block,
|
||||
}))
|
||||
}
|
||||
|
||||
fn Str(string: &str) -> SyntaxKind {
|
||||
SyntaxKind::Str(string.into())
|
||||
}
|
||||
|
||||
fn Text(string: &str) -> SyntaxKind {
|
||||
SyntaxKind::Text(string.into())
|
||||
}
|
||||
|
||||
fn Ident(ident: &str) -> SyntaxKind {
|
||||
SyntaxKind::Ident(ident.into())
|
||||
}
|
||||
|
||||
fn Error(pos: ErrorPos, message: &str) -> SyntaxKind {
|
||||
SyntaxKind::Error(pos, message.into())
|
||||
}
|
||||
|
||||
/// Building blocks for suffix testing.
|
||||
///
|
||||
/// We extend each test case with a collection of different suffixes to make
|
||||
/// sure tokens end at the correct position. These suffixes are split into
|
||||
/// blocks, which can be disabled/enabled per test case. For example, when
|
||||
/// testing identifiers we disable letter suffixes because these would
|
||||
/// mingle with the identifiers.
|
||||
///
|
||||
/// Suffix blocks:
|
||||
/// - ' ': spacing
|
||||
/// - 'a': letters
|
||||
/// - '1': numbers
|
||||
/// - '/': symbols
|
||||
const BLOCKS: &str = " a1/";
|
||||
|
||||
// Suffixes described by four-tuples of:
|
||||
//
|
||||
// - block the suffix is part of
|
||||
// - mode in which the suffix is applicable
|
||||
// - the suffix string
|
||||
// - the resulting suffix NodeKind
|
||||
fn suffixes(
|
||||
) -> impl Iterator<Item = (char, Option<TokenMode>, &'static str, SyntaxKind)> {
|
||||
[
|
||||
// Whitespace suffixes.
|
||||
(' ', None, " ", Space(0)),
|
||||
(' ', None, "\n", Space(1)),
|
||||
(' ', None, "\r", Space(1)),
|
||||
(' ', None, "\r\n", Space(1)),
|
||||
// Letter suffixes.
|
||||
('a', Some(Markup), "hello", Text("hello")),
|
||||
('a', Some(Markup), "💚", Text("💚")),
|
||||
('a', Some(Code), "val", Ident("val")),
|
||||
('a', Some(Code), "α", Ident("α")),
|
||||
('a', Some(Code), "_", Ident("_")),
|
||||
// Number suffixes.
|
||||
('1', Some(Code), "2", Int(2)),
|
||||
('1', Some(Code), ".2", Float(0.2)),
|
||||
// Symbol suffixes.
|
||||
('/', None, "[", LeftBracket),
|
||||
('/', None, "//", LineComment),
|
||||
('/', None, "/**/", BlockComment),
|
||||
('/', Some(Markup), "*", Star),
|
||||
('/', Some(Markup), r"\\", Escape('\\')),
|
||||
('/', Some(Markup), "#let", Let),
|
||||
('/', Some(Code), "(", LeftParen),
|
||||
('/', Some(Code), ":", Colon),
|
||||
('/', Some(Code), "+=", PlusEq),
|
||||
]
|
||||
.into_iter()
|
||||
}
|
||||
|
||||
macro_rules! t {
|
||||
(Both $($tts:tt)*) => {
|
||||
t!(Markup $($tts)*);
|
||||
t!(Code $($tts)*);
|
||||
};
|
||||
($mode:ident $([$blocks:literal])?: $text:expr => $($token:expr),*) => {{
|
||||
// Test without suffix.
|
||||
t!(@$mode: $text => $($token),*);
|
||||
|
||||
// Test with each applicable suffix.
|
||||
for (block, mode, suffix, ref token) in suffixes() {
|
||||
let text = $text;
|
||||
#[allow(unused_variables)]
|
||||
let blocks = BLOCKS;
|
||||
$(let blocks = $blocks;)?
|
||||
assert!(!blocks.contains(|c| !BLOCKS.contains(c)));
|
||||
if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) {
|
||||
t!(@$mode: format!("{}{}", text, suffix) => $($token,)* token);
|
||||
}
|
||||
}
|
||||
}};
|
||||
(@$mode:ident: $text:expr => $($token:expr),*) => {{
|
||||
let text = $text;
|
||||
let found = Tokens::new(&text, $mode).collect::<Vec<_>>();
|
||||
let expected = vec![$($token.clone()),*];
|
||||
check(&text, found, expected);
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_brackets() {
|
||||
// Test in markup.
|
||||
t!(Markup: "{" => LeftBrace);
|
||||
t!(Markup: "}" => RightBrace);
|
||||
t!(Markup: "[" => LeftBracket);
|
||||
t!(Markup: "]" => RightBracket);
|
||||
t!(Markup[" /"]: "(" => Text("("));
|
||||
t!(Markup[" /"]: ")" => Text(")"));
|
||||
|
||||
// Test in code.
|
||||
t!(Code: "{" => LeftBrace);
|
||||
t!(Code: "}" => RightBrace);
|
||||
t!(Code: "[" => LeftBracket);
|
||||
t!(Code: "]" => RightBracket);
|
||||
t!(Code: "(" => LeftParen);
|
||||
t!(Code: ")" => RightParen);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_whitespace() {
|
||||
// Test basic whitespace.
|
||||
t!(Both["a1/"]: "" => );
|
||||
t!(Both["a1/"]: " " => Space(0));
|
||||
t!(Both["a1/"]: " " => Space(0));
|
||||
t!(Both["a1/"]: "\t" => Space(0));
|
||||
t!(Both["a1/"]: " \t" => Space(0));
|
||||
t!(Both["a1/"]: "\u{202F}" => Space(0));
|
||||
|
||||
// Test newline counting.
|
||||
t!(Both["a1/"]: "\n" => Space(1));
|
||||
t!(Both["a1/"]: "\n " => Space(1));
|
||||
t!(Both["a1/"]: " \n" => Space(1));
|
||||
t!(Both["a1/"]: " \n " => Space(1));
|
||||
t!(Both["a1/"]: "\r\n" => Space(1));
|
||||
t!(Both["a1/"]: "\r\n\r" => Space(2));
|
||||
t!(Both["a1/"]: " \n\t \n " => Space(2));
|
||||
t!(Both["a1/"]: "\n\r" => Space(2));
|
||||
t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_text() {
|
||||
// Test basic text.
|
||||
t!(Markup[" /"]: "hello" => Text("hello"));
|
||||
t!(Markup[" /"]: "reha-world" => Text("reha-world"));
|
||||
|
||||
// Test code symbols in text.
|
||||
t!(Markup[" /"]: "a():\"b" => Text("a()"), Colon, SmartQuote { double: true }, Text("b"));
|
||||
t!(Markup[" /"]: ";,|/+" => Text(";,|/+"));
|
||||
t!(Markup[" /"]: "=-a" => Eq, Minus, Text("a"));
|
||||
t!(Markup[" "]: "#123" => Text("#123"));
|
||||
|
||||
// Test text ends.
|
||||
t!(Markup[""]: "hello " => Text("hello"), Space(0));
|
||||
t!(Markup[""]: "hello~" => Text("hello"), Shorthand('\u{00A0}'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_escape_sequences() {
|
||||
// Test escapable symbols.
|
||||
t!(Markup: r"\\" => Escape('\\'));
|
||||
t!(Markup: r"\/" => Escape('/'));
|
||||
t!(Markup: r"\[" => Escape('['));
|
||||
t!(Markup: r"\]" => Escape(']'));
|
||||
t!(Markup: r"\{" => Escape('{'));
|
||||
t!(Markup: r"\}" => Escape('}'));
|
||||
t!(Markup: r"\*" => Escape('*'));
|
||||
t!(Markup: r"\_" => Escape('_'));
|
||||
t!(Markup: r"\=" => Escape('='));
|
||||
t!(Markup: r"\~" => Escape('~'));
|
||||
t!(Markup: r"\'" => Escape('\''));
|
||||
t!(Markup: r#"\""# => Escape('"'));
|
||||
t!(Markup: r"\`" => Escape('`'));
|
||||
t!(Markup: r"\$" => Escape('$'));
|
||||
t!(Markup: r"\#" => Escape('#'));
|
||||
t!(Markup: r"\a" => Escape('a'));
|
||||
t!(Markup: r"\u" => Escape('u'));
|
||||
t!(Markup: r"\1" => Escape('1'));
|
||||
|
||||
// Test basic unicode escapes.
|
||||
t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence"));
|
||||
t!(Markup: r"\u{2603}" => Escape('☃'));
|
||||
t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence"));
|
||||
|
||||
// Test unclosed unicode escapes.
|
||||
t!(Markup[" /"]: r"\u{" => Error(End, "expected closing brace"));
|
||||
t!(Markup[" /"]: r"\u{1" => Error(End, "expected closing brace"));
|
||||
t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace"));
|
||||
t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace"));
|
||||
t!(Markup: r"\u{1🏕}" => Error(End, "expected closing brace"), Text("🏕"), RightBrace);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_markup_symbols() {
|
||||
// Test markup tokens.
|
||||
t!(Markup[" a1"]: "*" => Star);
|
||||
t!(Markup: "_" => Underscore);
|
||||
t!(Markup[""]: "===" => Eq, Eq, Eq);
|
||||
t!(Markup["a1/"]: "= " => Eq, Space(0));
|
||||
t!(Markup[" "]: r"\" => Linebreak);
|
||||
t!(Markup: "~" => Shorthand('\u{00A0}'));
|
||||
t!(Markup["a1/"]: "-?" => Shorthand('\u{00AD}'));
|
||||
t!(Markup["a "]: r"a--" => Text("a"), Shorthand('\u{2013}'));
|
||||
t!(Markup["a1/"]: "- " => Minus, Space(0));
|
||||
t!(Markup[" "]: "+" => Plus);
|
||||
t!(Markup[" "]: "1." => EnumNumbering(NonZeroUsize::new(1).unwrap()));
|
||||
t!(Markup[" "]: "1.a" => EnumNumbering(NonZeroUsize::new(1).unwrap()), Text("a"));
|
||||
t!(Markup[" /"]: "a1." => Text("a1."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_code_symbols() {
|
||||
// Test all symbols.
|
||||
t!(Code: "," => Comma);
|
||||
t!(Code: ";" => Semicolon);
|
||||
t!(Code: ":" => Colon);
|
||||
t!(Code: "+" => Plus);
|
||||
t!(Code: "-" => Minus);
|
||||
t!(Code[" a1"]: "*" => Star);
|
||||
t!(Code[" a1"]: "/" => Slash);
|
||||
t!(Code[" a/"]: "." => Dot);
|
||||
t!(Code: "=" => Eq);
|
||||
t!(Code: "==" => EqEq);
|
||||
t!(Code: "!=" => ExclEq);
|
||||
t!(Code[" /"]: "<" => Lt);
|
||||
t!(Code: "<=" => LtEq);
|
||||
t!(Code: ">" => Gt);
|
||||
t!(Code: ">=" => GtEq);
|
||||
t!(Code: "+=" => PlusEq);
|
||||
t!(Code: "-=" => HyphEq);
|
||||
t!(Code: "*=" => StarEq);
|
||||
t!(Code: "/=" => SlashEq);
|
||||
t!(Code: ".." => Dots);
|
||||
t!(Code: "=>" => Arrow);
|
||||
|
||||
// Test combinations.
|
||||
t!(Code: "<=>" => LtEq, Gt);
|
||||
t!(Code[" a/"]: "..." => Dots, Dot);
|
||||
|
||||
// Test hyphen as symbol vs part of identifier.
|
||||
t!(Code[" /"]: "-1" => Minus, Int(1));
|
||||
t!(Code[" /"]: "-a" => Minus, Ident("a"));
|
||||
t!(Code[" /"]: "--1" => Minus, Minus, Int(1));
|
||||
t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a"));
|
||||
t!(Code[" /"]: "a-b" => Ident("a-b"));
|
||||
|
||||
// Test invalid.
|
||||
t!(Code: r"\" => Error(Full, "not valid here"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_keywords() {
|
||||
// A list of a few (not all) keywords.
|
||||
let list = [
|
||||
("not", Not),
|
||||
("let", Let),
|
||||
("if", If),
|
||||
("else", Else),
|
||||
("for", For),
|
||||
("in", In),
|
||||
("import", Import),
|
||||
];
|
||||
|
||||
for (s, t) in list.clone() {
|
||||
t!(Markup[" "]: format!("#{}", s) => t);
|
||||
t!(Markup[" "]: format!("#{0}#{0}", s) => t, t);
|
||||
t!(Markup[" /"]: format!("# {}", s) => Text(&format!("# {s}")));
|
||||
}
|
||||
|
||||
for (s, t) in list {
|
||||
t!(Code[" "]: s => t);
|
||||
t!(Markup[" /"]: s => Text(s));
|
||||
}
|
||||
|
||||
// Test simple identifier.
|
||||
t!(Markup[" "]: "#letter" => Ident("letter"));
|
||||
t!(Code[" /"]: "falser" => Ident("falser"));
|
||||
t!(Code[" /"]: "None" => Ident("None"));
|
||||
t!(Code[" /"]: "True" => Ident("True"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_raw_blocks() {
|
||||
// Test basic raw block.
|
||||
t!(Markup: "``" => Raw("", None, false));
|
||||
t!(Markup: "`raw`" => Raw("raw", None, false));
|
||||
t!(Markup[""]: "`]" => Error(End, "expected 1 backtick"));
|
||||
|
||||
// Test special symbols in raw block.
|
||||
t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false));
|
||||
t!(Markup[""]: r"`\`` " => Raw(r"\", None, false), Error(End, "expected 1 backtick"));
|
||||
|
||||
// Test separated closing backticks.
|
||||
t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false));
|
||||
|
||||
// Test more backticks.
|
||||
t!(Markup: "``nope``" => Raw("", None, false), Text("nope"), Raw("", None, false));
|
||||
t!(Markup: "````🚀````" => Raw("", None, false));
|
||||
t!(Markup[""]: "`````👩🚀````noend" => Error(End, "expected 5 backticks"));
|
||||
t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_idents() {
|
||||
// Test valid identifiers.
|
||||
t!(Code[" /"]: "x" => Ident("x"));
|
||||
t!(Code[" /"]: "value" => Ident("value"));
|
||||
t!(Code[" /"]: "__main__" => Ident("__main__"));
|
||||
t!(Code[" /"]: "_snake_case" => Ident("_snake_case"));
|
||||
|
||||
// Test non-ascii.
|
||||
t!(Code[" /"]: "α" => Ident("α"));
|
||||
t!(Code[" /"]: "ម្តាយ" => Ident("ម្តាយ"));
|
||||
|
||||
// Test hyphen parsed as identifier.
|
||||
t!(Code[" /"]: "kebab-case" => Ident("kebab-case"));
|
||||
t!(Code[" /"]: "one-10" => Ident("one-10"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_numeric() {
|
||||
let ints = [("7", 7), ("012", 12)];
|
||||
let floats = [
|
||||
(".3", 0.3),
|
||||
("0.3", 0.3),
|
||||
("3.", 3.0),
|
||||
("3.0", 3.0),
|
||||
("14.3", 14.3),
|
||||
("10e2", 1000.0),
|
||||
("10e+0", 10.0),
|
||||
("10e+1", 100.0),
|
||||
("10e-2", 0.1),
|
||||
("10.e1", 100.0),
|
||||
("10.e-1", 1.0),
|
||||
(".1e1", 1.0),
|
||||
("10E2", 1000.0),
|
||||
];
|
||||
|
||||
// Test integers.
|
||||
for &(s, v) in &ints {
|
||||
t!(Code[" /"]: s => Int(v));
|
||||
}
|
||||
|
||||
// Test floats.
|
||||
for &(s, v) in &floats {
|
||||
t!(Code[" /"]: s => Float(v));
|
||||
}
|
||||
|
||||
// Test attached numbers.
|
||||
t!(Code[" /"]: ".2.3" => Float(0.2), Float(0.3));
|
||||
t!(Code[" /"]: "1.2.3" => Float(1.2), Float(0.3));
|
||||
t!(Code[" /"]: "1e-2+3" => Float(0.01), Plus, Int(3));
|
||||
|
||||
// Test float from too large integer.
|
||||
let large = i64::MAX as f64 + 1.0;
|
||||
t!(Code[" /"]: large.to_string() => Float(large));
|
||||
|
||||
// Combined integers and floats.
|
||||
let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats);
|
||||
|
||||
let suffixes: &[(&str, fn(f64) -> SyntaxKind)] = &[
|
||||
("mm", |x| Numeric(x, Unit::Length(AbsUnit::Mm))),
|
||||
("pt", |x| Numeric(x, Unit::Length(AbsUnit::Pt))),
|
||||
("cm", |x| Numeric(x, Unit::Length(AbsUnit::Cm))),
|
||||
("in", |x| Numeric(x, Unit::Length(AbsUnit::In))),
|
||||
("rad", |x| Numeric(x, Unit::Angle(AngleUnit::Rad))),
|
||||
("deg", |x| Numeric(x, Unit::Angle(AngleUnit::Deg))),
|
||||
("em", |x| Numeric(x, Unit::Em)),
|
||||
("fr", |x| Numeric(x, Unit::Fr)),
|
||||
("%", |x| Numeric(x, Unit::Percent)),
|
||||
];
|
||||
|
||||
// Numeric types.
|
||||
for &(suffix, build) in suffixes {
|
||||
for (s, v) in nums.clone() {
|
||||
t!(Code[" /"]: format!("{}{}", s, suffix) => build(v));
|
||||
}
|
||||
}
|
||||
|
||||
// Multiple dots close the number.
|
||||
t!(Code[" /"]: "1..2" => Int(1), Dots, Int(2));
|
||||
t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3));
|
||||
t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3));
|
||||
|
||||
// Test invalid.
|
||||
t!(Code[" /"]: "1foo" => Error(Full, "invalid number suffix"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_strings() {
|
||||
// Test basic strings.
|
||||
t!(Code: "\"hi\"" => Str("hi"));
|
||||
t!(Code: "\"hi\nthere\"" => Str("hi\nthere"));
|
||||
t!(Code: "\"🌎\"" => Str("🌎"));
|
||||
|
||||
// Test unterminated.
|
||||
t!(Code[""]: "\"hi" => Error(End, "expected quote"));
|
||||
|
||||
// Test escaped quote.
|
||||
t!(Code: r#""a\"bc""# => Str("a\"bc"));
|
||||
t!(Code[""]: r#""\""# => Error(End, "expected quote"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_line_comments() {
|
||||
// Test line comment with no trailing newline.
|
||||
t!(Both[""]: "//" => LineComment);
|
||||
|
||||
// Test line comment ends at newline.
|
||||
t!(Both["a1/"]: "//bc\n" => LineComment, Space(1));
|
||||
t!(Both["a1/"]: "// bc \n" => LineComment, Space(1));
|
||||
t!(Both["a1/"]: "//bc\r\n" => LineComment, Space(1));
|
||||
|
||||
// Test nested line comments.
|
||||
t!(Both["a1/"]: "//a//b\n" => LineComment, Space(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_block_comments() {
|
||||
// Test basic block comments.
|
||||
t!(Both[""]: "/*" => BlockComment);
|
||||
t!(Both: "/**/" => BlockComment);
|
||||
t!(Both: "/*🏞*/" => BlockComment);
|
||||
t!(Both: "/*\n*/" => BlockComment);
|
||||
|
||||
// Test depth 1 and 2 nested block comments.
|
||||
t!(Both: "/* /* */ */" => BlockComment);
|
||||
t!(Both: "/*/*/**/*/*/" => BlockComment);
|
||||
|
||||
// Test two nested, one unclosed block comments.
|
||||
t!(Both[""]: "/*/*/**/*/" => BlockComment);
|
||||
|
||||
// Test all combinations of up to two following slashes and stars.
|
||||
t!(Both[""]: "/*" => BlockComment);
|
||||
t!(Both[""]: "/*/" => BlockComment);
|
||||
t!(Both[""]: "/**" => BlockComment);
|
||||
t!(Both[""]: "/*//" => BlockComment);
|
||||
t!(Both[""]: "/*/*" => BlockComment);
|
||||
t!(Both[""]: "/**/" => BlockComment);
|
||||
t!(Both[""]: "/***" => BlockComment);
|
||||
|
||||
// Test unexpected terminator.
|
||||
t!(Both: "/*Hi*/*/" => BlockComment,
|
||||
Error(Full, "unexpected end of block comment"));
|
||||
}
|
||||
}
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 6.4 KiB After Width: | Height: | Size: 6.4 KiB |
BIN
tests/ref/math/syntax.png
Normal file
BIN
tests/ref/math/syntax.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 52 KiB |
24
tests/typ/math/syntax.typ
Normal file
24
tests/typ/math/syntax.typ
Normal file
@ -0,0 +1,24 @@
|
||||
#set page(width: auto)
|
||||
#set text("Latin Modern Roman")
|
||||
#show <table>: it => table(
|
||||
columns: 2,
|
||||
padding: 8pt,
|
||||
..it.text
|
||||
.split("\n")
|
||||
.map(line => (text(10pt, raw(line, lang: "typ")), eval(line) + [ ]))
|
||||
.flatten()
|
||||
)
|
||||
|
||||
```
|
||||
Let $x in NN$ be ...
|
||||
$ (1 + x/2)^2 $
|
||||
$ x arrow:l y $
|
||||
$ sum_(n=1)^mu 1 + (2pi (5 + n)) / k $
|
||||
$ { x in RR | x "is natural" and x < 10 } $
|
||||
$ sqrt(x^2) = frac(x, 1) $
|
||||
$ "profit" = "income" - "expenses" $
|
||||
$ x < #for i in range(5) [$ #i < $] y $
|
||||
$ 1 + 2 = #{1 + 2} $
|
||||
$ A sub:eq:not B $
|
||||
```
|
||||
<table>
|
@ -132,15 +132,19 @@ function getWebviewContent(pngSrc, refSrc, stdout, stderr) {
|
||||
</div>
|
||||
|
||||
<h1>Standard output</h1>
|
||||
<pre>${stdout}</pre>
|
||||
<pre>${escape(stdout)}</pre>
|
||||
|
||||
<h1>Standard error</h1>
|
||||
<pre>${stderr}</pre>
|
||||
<pre>${escape(stderr)}</pre>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
}
|
||||
|
||||
function escape(text) {
|
||||
return text.replace(/</g, "<").replace(/>/g, ">");
|
||||
}
|
||||
|
||||
function deactivate() {}
|
||||
|
||||
module.exports = { activate, deactivate }
|
||||
|
Loading…
x
Reference in New Issue
Block a user