Reorganize syntax module
This commit is contained in:
parent
2589692b00
commit
ab03f32240
@ -404,9 +404,23 @@ fn next_at_start(kind: &SyntaxKind, prev: bool) -> bool {
|
||||
#[cfg(test)]
|
||||
#[rustfmt::skip]
|
||||
mod tests {
|
||||
use std::fmt::Debug;
|
||||
|
||||
use super::*;
|
||||
use super::super::{parse, Source};
|
||||
use super::super::tests::check;
|
||||
|
||||
#[track_caller]
|
||||
fn check<T>(text: &str, found: T, expected: T)
|
||||
where
|
||||
T: Debug + PartialEq,
|
||||
{
|
||||
if found != expected {
|
||||
println!("source: {text:?}");
|
||||
println!("expected: {expected:#?}");
|
||||
println!("found: {found:#?}");
|
||||
panic!("test failed");
|
||||
}
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
|
||||
|
@ -7,7 +7,7 @@ use crate::util::EcoString;
|
||||
|
||||
/// All syntactical building blocks that can be part of a Typst document.
|
||||
///
|
||||
/// Can be created by the tokenizer or by the parser.
|
||||
/// Can be created by the lexer or by the parser.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum SyntaxKind {
|
||||
/// A line comment: `// ...`.
|
||||
|
@ -9,13 +9,13 @@ use super::{ErrorPos, RawFields, SyntaxKind, Unit};
|
||||
use crate::geom::{AbsUnit, AngleUnit};
|
||||
use crate::util::{format_eco, EcoString};
|
||||
|
||||
/// An iterator over the tokens of a string of source code.
|
||||
/// Splits up a string of source code into tokens.
|
||||
#[derive(Clone)]
|
||||
pub struct Tokens<'s> {
|
||||
pub struct Lexer<'s> {
|
||||
/// The underlying scanner.
|
||||
s: Scanner<'s>,
|
||||
/// The mode the scanner is in. This determines what tokens it recognizes.
|
||||
mode: TokenMode,
|
||||
/// The mode the lexer is in. This determines what tokens it recognizes.
|
||||
mode: LexMode,
|
||||
/// Whether the last token has been terminated.
|
||||
terminated: bool,
|
||||
/// Offsets the indentation on the first line of the source.
|
||||
@ -24,7 +24,7 @@ pub struct Tokens<'s> {
|
||||
|
||||
/// What kind of tokens to emit.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
pub enum TokenMode {
|
||||
pub enum LexMode {
|
||||
/// Text and markup.
|
||||
Markup,
|
||||
/// Math atoms, operators, etc.
|
||||
@ -33,15 +33,15 @@ pub enum TokenMode {
|
||||
Code,
|
||||
}
|
||||
|
||||
impl<'s> Tokens<'s> {
|
||||
/// Create a new token iterator with the given mode.
|
||||
pub fn new(text: &'s str, mode: TokenMode) -> Self {
|
||||
impl<'s> Lexer<'s> {
|
||||
/// Create a new lexer with the given mode.
|
||||
pub fn new(text: &'s str, mode: LexMode) -> Self {
|
||||
Self::with_prefix("", text, mode)
|
||||
}
|
||||
|
||||
/// Create a new token iterator with the given mode and a prefix to offset
|
||||
/// column calculations.
|
||||
pub fn with_prefix(prefix: &str, text: &'s str, mode: TokenMode) -> Self {
|
||||
/// Create a new lexer with the given mode and a prefix to offset column
|
||||
/// calculations.
|
||||
pub fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self {
|
||||
Self {
|
||||
s: Scanner::new(text),
|
||||
mode,
|
||||
@ -50,13 +50,13 @@ impl<'s> Tokens<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the current token mode.
|
||||
pub fn mode(&self) -> TokenMode {
|
||||
/// Get the current lexing mode.
|
||||
pub fn mode(&self) -> LexMode {
|
||||
self.mode
|
||||
}
|
||||
|
||||
/// Change the token mode.
|
||||
pub fn set_mode(&mut self, mode: TokenMode) {
|
||||
/// Change the lexing mode.
|
||||
pub fn set_mode(&mut self, mode: LexMode) {
|
||||
self.mode = mode;
|
||||
}
|
||||
|
||||
@ -87,10 +87,10 @@ impl<'s> Tokens<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Tokens<'_> {
|
||||
impl Iterator for Lexer<'_> {
|
||||
type Item = SyntaxKind;
|
||||
|
||||
/// Parse the next token in the source code.
|
||||
/// Produce the next token.
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let start = self.s.cursor();
|
||||
let c = self.s.eat()?;
|
||||
@ -106,16 +106,16 @@ impl Iterator for Tokens<'_> {
|
||||
|
||||
// Other things.
|
||||
_ => match self.mode {
|
||||
TokenMode::Markup => self.markup(start, c),
|
||||
TokenMode::Math => self.math(start, c),
|
||||
TokenMode::Code => self.code(start, c),
|
||||
LexMode::Markup => self.markup(start, c),
|
||||
LexMode::Math => self.math(start, c),
|
||||
LexMode::Code => self.code(start, c),
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Shared.
|
||||
impl Tokens<'_> {
|
||||
impl Lexer<'_> {
|
||||
fn line_comment(&mut self) -> SyntaxKind {
|
||||
self.s.eat_until(is_newline);
|
||||
if self.s.peek().is_none() {
|
||||
@ -182,7 +182,7 @@ impl Tokens<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
impl Tokens<'_> {
|
||||
impl Lexer<'_> {
|
||||
fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||
match c {
|
||||
// Blocks.
|
||||
@ -304,7 +304,7 @@ impl Tokens<'_> {
|
||||
Some(keyword) => keyword,
|
||||
None => SyntaxKind::Ident(read.into()),
|
||||
}
|
||||
} else if self.mode == TokenMode::Markup {
|
||||
} else if self.mode == LexMode::Markup {
|
||||
self.text(start)
|
||||
} else {
|
||||
SyntaxKind::Atom("#".into())
|
||||
@ -339,7 +339,7 @@ impl Tokens<'_> {
|
||||
if start < end {
|
||||
self.s.expect(':');
|
||||
SyntaxKind::Symbol(self.s.get(start..end).into())
|
||||
} else if self.mode == TokenMode::Markup {
|
||||
} else if self.mode == LexMode::Markup {
|
||||
SyntaxKind::Colon
|
||||
} else {
|
||||
SyntaxKind::Atom(":".into())
|
||||
@ -438,7 +438,7 @@ impl Tokens<'_> {
|
||||
}
|
||||
|
||||
/// Math.
|
||||
impl Tokens<'_> {
|
||||
impl Lexer<'_> {
|
||||
fn math(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||
match c {
|
||||
// Symbol shorthands.
|
||||
@ -507,7 +507,7 @@ impl Tokens<'_> {
|
||||
}
|
||||
|
||||
/// Code.
|
||||
impl Tokens<'_> {
|
||||
impl Lexer<'_> {
|
||||
fn code(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||
match c {
|
||||
// Blocks.
|
@ -1,274 +0,0 @@
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::ops::{Deref, Range};
|
||||
use std::rc::Rc;
|
||||
|
||||
use super::{SyntaxKind, SyntaxNode};
|
||||
|
||||
/// A syntax node in a context.
|
||||
///
|
||||
/// Knows its exact offset in the file and provides access to its
|
||||
/// children, parent and siblings.
|
||||
///
|
||||
/// **Note that all sibling and leaf accessors skip over trivia!**
|
||||
#[derive(Clone)]
|
||||
pub struct LinkedNode<'a> {
|
||||
node: &'a SyntaxNode,
|
||||
parent: Option<Rc<Self>>,
|
||||
index: usize,
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
impl<'a> LinkedNode<'a> {
|
||||
/// Start a new traversal at the source's root node.
|
||||
pub fn new(root: &'a SyntaxNode) -> Self {
|
||||
Self { node: root, parent: None, index: 0, offset: 0 }
|
||||
}
|
||||
|
||||
/// Get the contained syntax node.
|
||||
pub fn get(&self) -> &'a SyntaxNode {
|
||||
self.node
|
||||
}
|
||||
|
||||
/// The index of this node in its parent's children list.
|
||||
pub fn index(&self) -> usize {
|
||||
self.index
|
||||
}
|
||||
|
||||
/// The absolute byte offset of the this node in the source file.
|
||||
pub fn offset(&self) -> usize {
|
||||
self.offset
|
||||
}
|
||||
|
||||
/// The byte range of the this node in the source file.
|
||||
pub fn range(&self) -> Range<usize> {
|
||||
self.offset..self.offset + self.node.len()
|
||||
}
|
||||
|
||||
/// Get this node's children.
|
||||
pub fn children(&self) -> LinkedChildren<'a> {
|
||||
LinkedChildren {
|
||||
parent: Rc::new(self.clone()),
|
||||
iter: self.node.children().enumerate(),
|
||||
front: self.offset,
|
||||
back: self.offset + self.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Access to parents and siblings.
|
||||
impl<'a> LinkedNode<'a> {
|
||||
/// Get this node's parent.
|
||||
pub fn parent(&self) -> Option<&Self> {
|
||||
self.parent.as_deref()
|
||||
}
|
||||
|
||||
/// Get the kind of this node's parent.
|
||||
pub fn parent_kind(&self) -> Option<&'a SyntaxKind> {
|
||||
Some(self.parent()?.node.kind())
|
||||
}
|
||||
|
||||
/// Get the first previous non-trivia sibling node.
|
||||
pub fn prev_sibling(&self) -> Option<Self> {
|
||||
let parent = self.parent()?;
|
||||
let index = self.index.checked_sub(1)?;
|
||||
let node = parent.node.children().nth(index)?;
|
||||
let offset = self.offset - node.len();
|
||||
let prev = Self { node, parent: self.parent.clone(), index, offset };
|
||||
if prev.kind().is_trivia() {
|
||||
prev.prev_sibling()
|
||||
} else {
|
||||
Some(prev)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the next non-trivia sibling node.
|
||||
pub fn next_sibling(&self) -> Option<Self> {
|
||||
let parent = self.parent()?;
|
||||
let index = self.index.checked_add(1)?;
|
||||
let node = parent.node.children().nth(index)?;
|
||||
let offset = self.offset + self.node.len();
|
||||
let next = Self { node, parent: self.parent.clone(), index, offset };
|
||||
if next.kind().is_trivia() {
|
||||
next.next_sibling()
|
||||
} else {
|
||||
Some(next)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Access to leafs.
|
||||
impl<'a> LinkedNode<'a> {
|
||||
/// Get the rightmost non-trivia leaf before this node.
|
||||
pub fn prev_leaf(&self) -> Option<Self> {
|
||||
let mut node = self.clone();
|
||||
while let Some(prev) = node.prev_sibling() {
|
||||
if let Some(leaf) = prev.rightmost_leaf() {
|
||||
return Some(leaf);
|
||||
}
|
||||
node = prev;
|
||||
}
|
||||
self.parent()?.prev_leaf()
|
||||
}
|
||||
|
||||
/// Find the leftmost contained non-trivia leaf.
|
||||
pub fn leftmost_leaf(&self) -> Option<Self> {
|
||||
if self.is_leaf() && !self.kind().is_trivia() && !self.kind().is_error() {
|
||||
return Some(self.clone());
|
||||
}
|
||||
|
||||
for child in self.children() {
|
||||
if let Some(leaf) = child.leftmost_leaf() {
|
||||
return Some(leaf);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Get the leaf at the specified cursor position.
|
||||
pub fn leaf_at(&self, cursor: usize) -> Option<Self> {
|
||||
if self.node.children().len() == 0 && cursor <= self.offset + self.len() {
|
||||
return Some(self.clone());
|
||||
}
|
||||
|
||||
let mut offset = self.offset;
|
||||
let count = self.node.children().len();
|
||||
for (i, child) in self.children().enumerate() {
|
||||
let len = child.len();
|
||||
if (offset < cursor && cursor <= offset + len)
|
||||
|| (offset == cursor && i + 1 == count)
|
||||
{
|
||||
return child.leaf_at(cursor);
|
||||
}
|
||||
offset += len;
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Find the rightmost contained non-trivia leaf.
|
||||
pub fn rightmost_leaf(&self) -> Option<Self> {
|
||||
if self.is_leaf() && !self.kind().is_trivia() {
|
||||
return Some(self.clone());
|
||||
}
|
||||
|
||||
for child in self.children().rev() {
|
||||
if let Some(leaf) = child.rightmost_leaf() {
|
||||
return Some(leaf);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Get the leftmost non-trivia leaf after this node.
|
||||
pub fn next_leaf(&self) -> Option<Self> {
|
||||
let mut node = self.clone();
|
||||
while let Some(next) = node.next_sibling() {
|
||||
if let Some(leaf) = next.leftmost_leaf() {
|
||||
return Some(leaf);
|
||||
}
|
||||
node = next;
|
||||
}
|
||||
self.parent()?.next_leaf()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for LinkedNode<'_> {
|
||||
type Target = SyntaxNode;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.get()
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for LinkedNode<'_> {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
self.node.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over the children of a linked node.
|
||||
pub struct LinkedChildren<'a> {
|
||||
parent: Rc<LinkedNode<'a>>,
|
||||
iter: std::iter::Enumerate<std::slice::Iter<'a, SyntaxNode>>,
|
||||
front: usize,
|
||||
back: usize,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for LinkedChildren<'a> {
|
||||
type Item = LinkedNode<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next().map(|(index, node)| {
|
||||
let offset = self.front;
|
||||
self.front += node.len();
|
||||
LinkedNode {
|
||||
node,
|
||||
parent: Some(self.parent.clone()),
|
||||
index,
|
||||
offset,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.iter.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl DoubleEndedIterator for LinkedChildren<'_> {
|
||||
fn next_back(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next_back().map(|(index, node)| {
|
||||
self.back -= node.len();
|
||||
LinkedNode {
|
||||
node,
|
||||
parent: Some(self.parent.clone()),
|
||||
index,
|
||||
offset: self.back,
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ExactSizeIterator for LinkedChildren<'_> {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::syntax::Source;
|
||||
|
||||
#[test]
|
||||
fn test_linked_node() {
|
||||
let source = Source::detached("#set text(12pt, red)");
|
||||
|
||||
// Find "text".
|
||||
let node = LinkedNode::new(source.root()).leaf_at(7).unwrap();
|
||||
assert_eq!(node.offset(), 5);
|
||||
assert_eq!(node.len(), 4);
|
||||
assert_eq!(node.kind(), &SyntaxKind::Ident("text".into()));
|
||||
|
||||
// Go back to "#set". Skips the space.
|
||||
let prev = node.prev_sibling().unwrap();
|
||||
assert_eq!(prev.offset(), 0);
|
||||
assert_eq!(prev.len(), 4);
|
||||
assert_eq!(prev.kind(), &SyntaxKind::Set);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_linked_node_non_trivia_leaf() {
|
||||
let source = Source::detached("#set fun(12pt, red)");
|
||||
let leaf = LinkedNode::new(source.root()).leaf_at(6).unwrap();
|
||||
let prev = leaf.prev_leaf().unwrap();
|
||||
assert_eq!(leaf.kind(), &SyntaxKind::Ident("fun".into()));
|
||||
assert_eq!(prev.kind(), &SyntaxKind::Set);
|
||||
|
||||
let source = Source::detached("#let x = 10");
|
||||
let leaf = LinkedNode::new(source.root()).leaf_at(9).unwrap();
|
||||
let prev = leaf.prev_leaf().unwrap();
|
||||
let next = leaf.next_leaf().unwrap();
|
||||
assert_eq!(prev.kind(), &SyntaxKind::Eq);
|
||||
assert_eq!(leaf.kind(), &SyntaxKind::Space { newlines: 0 });
|
||||
assert_eq!(next.kind(), &SyntaxKind::Int(10));
|
||||
}
|
||||
}
|
@ -4,25 +4,20 @@ pub mod ast;
|
||||
|
||||
mod incremental;
|
||||
mod kind;
|
||||
mod linked;
|
||||
mod lexer;
|
||||
mod node;
|
||||
mod parser;
|
||||
mod parsing;
|
||||
mod resolve;
|
||||
mod source;
|
||||
mod span;
|
||||
mod tokens;
|
||||
|
||||
pub use self::kind::*;
|
||||
pub use self::linked::*;
|
||||
pub use self::lexer::*;
|
||||
pub use self::node::*;
|
||||
pub use self::parsing::*;
|
||||
pub use self::source::*;
|
||||
pub use self::span::*;
|
||||
pub use self::tokens::*;
|
||||
|
||||
use incremental::reparse;
|
||||
use parser::*;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::fmt::{self, Debug, Display, Formatter};
|
||||
use std::ops::Range;
|
||||
use std::ops::{Deref, Range};
|
||||
use std::rc::Rc;
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::ast::AstNode;
|
||||
@ -526,6 +527,235 @@ impl PartialEq for NodeData {
|
||||
}
|
||||
}
|
||||
|
||||
/// A syntax node in a context.
|
||||
///
|
||||
/// Knows its exact offset in the file and provides access to its
|
||||
/// children, parent and siblings.
|
||||
///
|
||||
/// **Note that all sibling and leaf accessors skip over trivia!**
|
||||
#[derive(Clone)]
|
||||
pub struct LinkedNode<'a> {
|
||||
node: &'a SyntaxNode,
|
||||
parent: Option<Rc<Self>>,
|
||||
index: usize,
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
impl<'a> LinkedNode<'a> {
|
||||
/// Start a new traversal at the source's root node.
|
||||
pub fn new(root: &'a SyntaxNode) -> Self {
|
||||
Self { node: root, parent: None, index: 0, offset: 0 }
|
||||
}
|
||||
|
||||
/// Get the contained syntax node.
|
||||
pub fn get(&self) -> &'a SyntaxNode {
|
||||
self.node
|
||||
}
|
||||
|
||||
/// The index of this node in its parent's children list.
|
||||
pub fn index(&self) -> usize {
|
||||
self.index
|
||||
}
|
||||
|
||||
/// The absolute byte offset of the this node in the source file.
|
||||
pub fn offset(&self) -> usize {
|
||||
self.offset
|
||||
}
|
||||
|
||||
/// The byte range of the this node in the source file.
|
||||
pub fn range(&self) -> Range<usize> {
|
||||
self.offset..self.offset + self.node.len()
|
||||
}
|
||||
|
||||
/// Get this node's children.
|
||||
pub fn children(&self) -> LinkedChildren<'a> {
|
||||
LinkedChildren {
|
||||
parent: Rc::new(self.clone()),
|
||||
iter: self.node.children().enumerate(),
|
||||
front: self.offset,
|
||||
back: self.offset + self.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Access to parents and siblings.
|
||||
impl<'a> LinkedNode<'a> {
|
||||
/// Get this node's parent.
|
||||
pub fn parent(&self) -> Option<&Self> {
|
||||
self.parent.as_deref()
|
||||
}
|
||||
|
||||
/// Get the kind of this node's parent.
|
||||
pub fn parent_kind(&self) -> Option<&'a SyntaxKind> {
|
||||
Some(self.parent()?.node.kind())
|
||||
}
|
||||
|
||||
/// Get the first previous non-trivia sibling node.
|
||||
pub fn prev_sibling(&self) -> Option<Self> {
|
||||
let parent = self.parent()?;
|
||||
let index = self.index.checked_sub(1)?;
|
||||
let node = parent.node.children().nth(index)?;
|
||||
let offset = self.offset - node.len();
|
||||
let prev = Self { node, parent: self.parent.clone(), index, offset };
|
||||
if prev.kind().is_trivia() {
|
||||
prev.prev_sibling()
|
||||
} else {
|
||||
Some(prev)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the next non-trivia sibling node.
|
||||
pub fn next_sibling(&self) -> Option<Self> {
|
||||
let parent = self.parent()?;
|
||||
let index = self.index.checked_add(1)?;
|
||||
let node = parent.node.children().nth(index)?;
|
||||
let offset = self.offset + self.node.len();
|
||||
let next = Self { node, parent: self.parent.clone(), index, offset };
|
||||
if next.kind().is_trivia() {
|
||||
next.next_sibling()
|
||||
} else {
|
||||
Some(next)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Access to leafs.
|
||||
impl<'a> LinkedNode<'a> {
|
||||
/// Get the rightmost non-trivia leaf before this node.
|
||||
pub fn prev_leaf(&self) -> Option<Self> {
|
||||
let mut node = self.clone();
|
||||
while let Some(prev) = node.prev_sibling() {
|
||||
if let Some(leaf) = prev.rightmost_leaf() {
|
||||
return Some(leaf);
|
||||
}
|
||||
node = prev;
|
||||
}
|
||||
self.parent()?.prev_leaf()
|
||||
}
|
||||
|
||||
/// Find the leftmost contained non-trivia leaf.
|
||||
pub fn leftmost_leaf(&self) -> Option<Self> {
|
||||
if self.is_leaf() && !self.kind().is_trivia() && !self.kind().is_error() {
|
||||
return Some(self.clone());
|
||||
}
|
||||
|
||||
for child in self.children() {
|
||||
if let Some(leaf) = child.leftmost_leaf() {
|
||||
return Some(leaf);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Get the leaf at the specified cursor position.
|
||||
pub fn leaf_at(&self, cursor: usize) -> Option<Self> {
|
||||
if self.node.children().len() == 0 && cursor <= self.offset + self.len() {
|
||||
return Some(self.clone());
|
||||
}
|
||||
|
||||
let mut offset = self.offset;
|
||||
let count = self.node.children().len();
|
||||
for (i, child) in self.children().enumerate() {
|
||||
let len = child.len();
|
||||
if (offset < cursor && cursor <= offset + len)
|
||||
|| (offset == cursor && i + 1 == count)
|
||||
{
|
||||
return child.leaf_at(cursor);
|
||||
}
|
||||
offset += len;
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Find the rightmost contained non-trivia leaf.
|
||||
pub fn rightmost_leaf(&self) -> Option<Self> {
|
||||
if self.is_leaf() && !self.kind().is_trivia() {
|
||||
return Some(self.clone());
|
||||
}
|
||||
|
||||
for child in self.children().rev() {
|
||||
if let Some(leaf) = child.rightmost_leaf() {
|
||||
return Some(leaf);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Get the leftmost non-trivia leaf after this node.
|
||||
pub fn next_leaf(&self) -> Option<Self> {
|
||||
let mut node = self.clone();
|
||||
while let Some(next) = node.next_sibling() {
|
||||
if let Some(leaf) = next.leftmost_leaf() {
|
||||
return Some(leaf);
|
||||
}
|
||||
node = next;
|
||||
}
|
||||
self.parent()?.next_leaf()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for LinkedNode<'_> {
|
||||
type Target = SyntaxNode;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.get()
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for LinkedNode<'_> {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
self.node.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over the children of a linked node.
|
||||
pub struct LinkedChildren<'a> {
|
||||
parent: Rc<LinkedNode<'a>>,
|
||||
iter: std::iter::Enumerate<std::slice::Iter<'a, SyntaxNode>>,
|
||||
front: usize,
|
||||
back: usize,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for LinkedChildren<'a> {
|
||||
type Item = LinkedNode<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next().map(|(index, node)| {
|
||||
let offset = self.front;
|
||||
self.front += node.len();
|
||||
LinkedNode {
|
||||
node,
|
||||
parent: Some(self.parent.clone()),
|
||||
index,
|
||||
offset,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.iter.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl DoubleEndedIterator for LinkedChildren<'_> {
|
||||
fn next_back(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next_back().map(|(index, node)| {
|
||||
self.back -= node.len();
|
||||
LinkedNode {
|
||||
node,
|
||||
parent: Some(self.parent.clone()),
|
||||
index,
|
||||
offset: self.back,
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ExactSizeIterator for LinkedChildren<'_> {}
|
||||
|
||||
/// Result of numbering a node within an interval.
|
||||
pub(super) type NumberingResult = Result<(), Unnumberable>;
|
||||
|
||||
@ -540,3 +770,43 @@ impl Display for Unnumberable {
|
||||
}
|
||||
|
||||
impl std::error::Error for Unnumberable {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::syntax::Source;
|
||||
|
||||
#[test]
|
||||
fn test_linked_node() {
|
||||
let source = Source::detached("#set text(12pt, red)");
|
||||
|
||||
// Find "text".
|
||||
let node = LinkedNode::new(source.root()).leaf_at(7).unwrap();
|
||||
assert_eq!(node.offset(), 5);
|
||||
assert_eq!(node.len(), 4);
|
||||
assert_eq!(node.kind(), &SyntaxKind::Ident("text".into()));
|
||||
|
||||
// Go back to "#set". Skips the space.
|
||||
let prev = node.prev_sibling().unwrap();
|
||||
assert_eq!(prev.offset(), 0);
|
||||
assert_eq!(prev.len(), 4);
|
||||
assert_eq!(prev.kind(), &SyntaxKind::Set);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_linked_node_non_trivia_leaf() {
|
||||
let source = Source::detached("#set fun(12pt, red)");
|
||||
let leaf = LinkedNode::new(source.root()).leaf_at(6).unwrap();
|
||||
let prev = leaf.prev_leaf().unwrap();
|
||||
assert_eq!(leaf.kind(), &SyntaxKind::Ident("fun".into()));
|
||||
assert_eq!(prev.kind(), &SyntaxKind::Set);
|
||||
|
||||
let source = Source::detached("#let x = 10");
|
||||
let leaf = LinkedNode::new(source.root()).leaf_at(9).unwrap();
|
||||
let prev = leaf.prev_leaf().unwrap();
|
||||
let next = leaf.next_leaf().unwrap();
|
||||
assert_eq!(prev.kind(), &SyntaxKind::Eq);
|
||||
assert_eq!(leaf.kind(), &SyntaxKind::Space { newlines: 0 });
|
||||
assert_eq!(next.kind(), &SyntaxKind::Int(10));
|
||||
}
|
||||
}
|
||||
|
@ -2,13 +2,13 @@ use std::fmt::{self, Display, Formatter};
|
||||
use std::mem;
|
||||
use std::ops::Range;
|
||||
|
||||
use super::{ErrorPos, SyntaxKind, SyntaxNode, TokenMode, Tokens};
|
||||
use super::{ErrorPos, LexMode, Lexer, SyntaxKind, SyntaxNode};
|
||||
use crate::util::{format_eco, EcoString};
|
||||
|
||||
/// A convenient token-based parser.
|
||||
pub struct Parser<'s> {
|
||||
/// An iterator over the source tokens.
|
||||
tokens: Tokens<'s>,
|
||||
tokens: Lexer<'s>,
|
||||
/// Whether we are at the end of the file or of a group.
|
||||
eof: bool,
|
||||
/// The current token.
|
||||
@ -29,15 +29,15 @@ pub struct Parser<'s> {
|
||||
|
||||
impl<'s> Parser<'s> {
|
||||
/// Create a new parser for the source string.
|
||||
pub fn new(text: &'s str, mode: TokenMode) -> Self {
|
||||
pub fn new(text: &'s str, mode: LexMode) -> Self {
|
||||
Self::with_prefix("", text, mode)
|
||||
}
|
||||
|
||||
/// Create a new parser for the source string that is prefixed by some text
|
||||
/// that does not need to be parsed but taken into account for column
|
||||
/// calculation.
|
||||
pub fn with_prefix(prefix: &str, text: &'s str, mode: TokenMode) -> Self {
|
||||
let mut tokens = Tokens::with_prefix(prefix, text, mode);
|
||||
pub fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self {
|
||||
let mut tokens = Lexer::with_prefix(prefix, text, mode);
|
||||
let current = tokens.next();
|
||||
Self {
|
||||
tokens,
|
||||
@ -91,7 +91,7 @@ impl<'s> Parser<'s> {
|
||||
let until = self.trivia_start();
|
||||
let mut children = mem::replace(&mut self.children, prev);
|
||||
|
||||
if self.tokens.mode() == TokenMode::Markup {
|
||||
if self.tokens.mode() == LexMode::Markup {
|
||||
self.children.push(SyntaxNode::inner(kind, children));
|
||||
} else {
|
||||
// Trailing trivia should not be wrapped into the new node.
|
||||
@ -121,7 +121,7 @@ impl<'s> Parser<'s> {
|
||||
self.prev_end = self.tokens.cursor();
|
||||
self.bump();
|
||||
|
||||
if self.tokens.mode() != TokenMode::Markup {
|
||||
if self.tokens.mode() != LexMode::Markup {
|
||||
// Skip whitespace and comments.
|
||||
while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) {
|
||||
self.bump();
|
||||
@ -235,9 +235,9 @@ impl<'s> Parser<'s> {
|
||||
pub fn start_group(&mut self, kind: Group) {
|
||||
self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
|
||||
self.tokens.set_mode(match kind {
|
||||
Group::Bracket | Group::Strong | Group::Emph => TokenMode::Markup,
|
||||
Group::Math | Group::MathRow(_, _) => TokenMode::Math,
|
||||
Group::Brace | Group::Paren | Group::Expr => TokenMode::Code,
|
||||
Group::Bracket | Group::Strong | Group::Emph => LexMode::Markup,
|
||||
Group::Math | Group::MathRow(_, _) => LexMode::Math,
|
||||
Group::Brace | Group::Paren | Group::Expr => LexMode::Code,
|
||||
});
|
||||
|
||||
match kind {
|
||||
@ -296,7 +296,7 @@ impl<'s> Parser<'s> {
|
||||
// Rescan the peeked token if the mode changed.
|
||||
if rescan {
|
||||
let mut target = self.prev_end();
|
||||
if group_mode != TokenMode::Markup {
|
||||
if group_mode != LexMode::Markup {
|
||||
let start = self.trivia_start().0;
|
||||
target = self.current_start
|
||||
- self.children[start..].iter().map(SyntaxNode::len).sum::<usize>();
|
||||
@ -488,7 +488,7 @@ impl Marker {
|
||||
}
|
||||
|
||||
// Don't expose trivia in code.
|
||||
if p.tokens.mode() != TokenMode::Markup && child.kind().is_trivia() {
|
||||
if p.tokens.mode() != LexMode::Markup && child.kind().is_trivia() {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -515,7 +515,7 @@ struct GroupEntry {
|
||||
pub kind: Group,
|
||||
/// The mode the parser was in _before_ the group started (to which we go
|
||||
/// back once the group ends).
|
||||
pub prev_mode: TokenMode,
|
||||
pub prev_mode: LexMode,
|
||||
}
|
||||
|
||||
/// A group, confined by optional start and end delimiters.
|
||||
|
@ -2,21 +2,21 @@ use std::collections::HashSet;
|
||||
|
||||
use super::ast::{Assoc, BinOp, UnOp};
|
||||
use super::{
|
||||
ErrorPos, Group, Marker, ParseError, ParseResult, Parser, SyntaxKind, SyntaxNode,
|
||||
TokenMode,
|
||||
ErrorPos, Group, LexMode, Marker, ParseError, ParseResult, Parser, SyntaxKind,
|
||||
SyntaxNode,
|
||||
};
|
||||
use crate::util::EcoString;
|
||||
|
||||
/// Parse a source file.
|
||||
pub fn parse(text: &str) -> SyntaxNode {
|
||||
let mut p = Parser::new(text, TokenMode::Markup);
|
||||
let mut p = Parser::new(text, LexMode::Markup);
|
||||
markup(&mut p, true);
|
||||
p.finish().into_iter().next().unwrap()
|
||||
}
|
||||
|
||||
/// Parse code directly, only used for syntax highlighting.
|
||||
pub fn parse_code(text: &str) -> SyntaxNode {
|
||||
let mut p = Parser::new(text, TokenMode::Code);
|
||||
let mut p = Parser::new(text, LexMode::Code);
|
||||
p.perform(SyntaxKind::CodeBlock, code);
|
||||
p.finish().into_iter().next().unwrap()
|
||||
}
|
||||
@ -29,7 +29,7 @@ pub(crate) fn reparse_code_block(
|
||||
text: &str,
|
||||
end_pos: usize,
|
||||
) -> Option<(Vec<SyntaxNode>, bool, usize)> {
|
||||
let mut p = Parser::with_prefix(prefix, text, TokenMode::Code);
|
||||
let mut p = Parser::with_prefix(prefix, text, LexMode::Code);
|
||||
if !p.at(SyntaxKind::LeftBrace) {
|
||||
return None;
|
||||
}
|
||||
@ -53,7 +53,7 @@ pub(crate) fn reparse_content_block(
|
||||
text: &str,
|
||||
end_pos: usize,
|
||||
) -> Option<(Vec<SyntaxNode>, bool, usize)> {
|
||||
let mut p = Parser::with_prefix(prefix, text, TokenMode::Code);
|
||||
let mut p = Parser::with_prefix(prefix, text, LexMode::Code);
|
||||
if !p.at(SyntaxKind::LeftBracket) {
|
||||
return None;
|
||||
}
|
||||
@ -81,7 +81,7 @@ pub(crate) fn reparse_markup_elements(
|
||||
mut at_start: bool,
|
||||
min_indent: usize,
|
||||
) -> Option<(Vec<SyntaxNode>, bool, usize)> {
|
||||
let mut p = Parser::with_prefix(prefix, text, TokenMode::Markup);
|
||||
let mut p = Parser::with_prefix(prefix, text, LexMode::Markup);
|
||||
|
||||
let mut node: Option<&SyntaxNode> = None;
|
||||
let mut iter = reference.iter();
|
||||
|
@ -1,483 +0,0 @@
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
use crate::geom::{AbsUnit, AngleUnit};
|
||||
|
||||
use ErrorPos::*;
|
||||
use Option::None;
|
||||
use SyntaxKind::*;
|
||||
use TokenMode::{Code, Markup};
|
||||
|
||||
use std::fmt::Debug;
|
||||
|
||||
#[track_caller]
|
||||
pub fn check<T>(text: &str, found: T, expected: T)
|
||||
where
|
||||
T: Debug + PartialEq,
|
||||
{
|
||||
if found != expected {
|
||||
println!("source: {text:?}");
|
||||
println!("expected: {expected:#?}");
|
||||
println!("found: {found:#?}");
|
||||
panic!("test failed");
|
||||
}
|
||||
}
|
||||
|
||||
fn Space(newlines: usize) -> SyntaxKind {
|
||||
SyntaxKind::Space { newlines }
|
||||
}
|
||||
|
||||
fn Raw(text: &str, lang: Option<&str>, block: bool) -> SyntaxKind {
|
||||
SyntaxKind::Raw(Arc::new(RawFields {
|
||||
text: text.into(),
|
||||
lang: lang.map(Into::into),
|
||||
block,
|
||||
}))
|
||||
}
|
||||
|
||||
fn Str(string: &str) -> SyntaxKind {
|
||||
SyntaxKind::Str(string.into())
|
||||
}
|
||||
|
||||
fn Text(string: &str) -> SyntaxKind {
|
||||
SyntaxKind::Text(string.into())
|
||||
}
|
||||
|
||||
fn Ident(ident: &str) -> SyntaxKind {
|
||||
SyntaxKind::Ident(ident.into())
|
||||
}
|
||||
|
||||
fn Error(pos: ErrorPos, message: &str) -> SyntaxKind {
|
||||
SyntaxKind::Error(pos, message.into())
|
||||
}
|
||||
|
||||
/// Building blocks for suffix testing.
|
||||
///
|
||||
/// We extend each test case with a collection of different suffixes to make
|
||||
/// sure tokens end at the correct position. These suffixes are split into
|
||||
/// blocks, which can be disabled/enabled per test case. For example, when
|
||||
/// testing identifiers we disable letter suffixes because these would
|
||||
/// mingle with the identifiers.
|
||||
///
|
||||
/// Suffix blocks:
|
||||
/// - ' ': spacing
|
||||
/// - 'a': letters
|
||||
/// - '1': numbers
|
||||
/// - '/': symbols
|
||||
const BLOCKS: &str = " a1/";
|
||||
|
||||
// Suffixes described by four-tuples of:
|
||||
//
|
||||
// - block the suffix is part of
|
||||
// - mode in which the suffix is applicable
|
||||
// - the suffix string
|
||||
// - the resulting suffix NodeKind
|
||||
fn suffixes() -> impl Iterator<Item = (char, Option<TokenMode>, &'static str, SyntaxKind)>
|
||||
{
|
||||
[
|
||||
// Whitespace suffixes.
|
||||
(' ', None, " ", Space(0)),
|
||||
(' ', None, "\n", Space(1)),
|
||||
(' ', None, "\r", Space(1)),
|
||||
(' ', None, "\r\n", Space(1)),
|
||||
// Letter suffixes.
|
||||
('a', Some(Markup), "hello", Text("hello")),
|
||||
('a', Some(Markup), "💚", Text("💚")),
|
||||
('a', Some(Code), "val", Ident("val")),
|
||||
('a', Some(Code), "α", Ident("α")),
|
||||
('a', Some(Code), "_", Ident("_")),
|
||||
// Number suffixes.
|
||||
('1', Some(Code), "2", Int(2)),
|
||||
('1', Some(Code), ".2", Float(0.2)),
|
||||
// Symbol suffixes.
|
||||
('/', None, "[", LeftBracket),
|
||||
('/', None, "//", LineComment),
|
||||
('/', None, "/**/", BlockComment),
|
||||
('/', Some(Markup), "*", Star),
|
||||
('/', Some(Markup), r"\\", Escape('\\')),
|
||||
('/', Some(Markup), "#let", Let),
|
||||
('/', Some(Code), "(", LeftParen),
|
||||
('/', Some(Code), ":", Colon),
|
||||
('/', Some(Code), "+=", PlusEq),
|
||||
]
|
||||
.into_iter()
|
||||
}
|
||||
|
||||
macro_rules! t {
|
||||
(Both $($tts:tt)*) => {
|
||||
t!(Markup $($tts)*);
|
||||
t!(Code $($tts)*);
|
||||
};
|
||||
($mode:ident $([$blocks:literal])?: $text:expr => $($token:expr),*) => {{
|
||||
// Test without suffix.
|
||||
t!(@$mode: $text => $($token),*);
|
||||
|
||||
// Test with each applicable suffix.
|
||||
for (block, mode, suffix, ref token) in suffixes() {
|
||||
let text = $text;
|
||||
#[allow(unused_variables)]
|
||||
let blocks = BLOCKS;
|
||||
$(let blocks = $blocks;)?
|
||||
assert!(!blocks.contains(|c| !BLOCKS.contains(c)));
|
||||
if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) {
|
||||
t!(@$mode: format!("{}{}", text, suffix) => $($token,)* token);
|
||||
}
|
||||
}
|
||||
}};
|
||||
(@$mode:ident: $text:expr => $($token:expr),*) => {{
|
||||
let text = $text;
|
||||
let found = Tokens::new(&text, $mode).collect::<Vec<_>>();
|
||||
let expected = vec![$($token.clone()),*];
|
||||
check(&text, found, expected);
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_brackets() {
|
||||
// Test in markup.
|
||||
t!(Markup: "{" => LeftBrace);
|
||||
t!(Markup: "}" => RightBrace);
|
||||
t!(Markup: "[" => LeftBracket);
|
||||
t!(Markup: "]" => RightBracket);
|
||||
t!(Markup[" /"]: "(" => Text("("));
|
||||
t!(Markup[" /"]: ")" => Text(")"));
|
||||
|
||||
// Test in code.
|
||||
t!(Code: "{" => LeftBrace);
|
||||
t!(Code: "}" => RightBrace);
|
||||
t!(Code: "[" => LeftBracket);
|
||||
t!(Code: "]" => RightBracket);
|
||||
t!(Code: "(" => LeftParen);
|
||||
t!(Code: ")" => RightParen);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_whitespace() {
|
||||
// Test basic whitespace.
|
||||
t!(Both["a1/"]: "" => );
|
||||
t!(Both["a1/"]: " " => Space(0));
|
||||
t!(Both["a1/"]: " " => Space(0));
|
||||
t!(Both["a1/"]: "\t" => Space(0));
|
||||
t!(Both["a1/"]: " \t" => Space(0));
|
||||
t!(Both["a1/"]: "\u{202F}" => Space(0));
|
||||
|
||||
// Test newline counting.
|
||||
t!(Both["a1/"]: "\n" => Space(1));
|
||||
t!(Both["a1/"]: "\n " => Space(1));
|
||||
t!(Both["a1/"]: " \n" => Space(1));
|
||||
t!(Both["a1/"]: " \n " => Space(1));
|
||||
t!(Both["a1/"]: "\r\n" => Space(1));
|
||||
t!(Both["a1/"]: "\r\n\r" => Space(2));
|
||||
t!(Both["a1/"]: " \n\t \n " => Space(2));
|
||||
t!(Both["a1/"]: "\n\r" => Space(2));
|
||||
t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_text() {
|
||||
// Test basic text.
|
||||
t!(Markup[" /"]: "hello" => Text("hello"));
|
||||
t!(Markup[" /"]: "reha-world" => Text("reha-world"));
|
||||
|
||||
// Test code symbols in text.
|
||||
t!(Markup[" /"]: "a():\"b" => Text("a()"), Colon, SmartQuote { double: true }, Text("b"));
|
||||
t!(Markup[" /"]: ";,|/+" => Text(";,|/+"));
|
||||
t!(Markup[" /"]: "=-a" => Eq, Minus, Text("a"));
|
||||
t!(Markup[" "]: "#123" => Text("#123"));
|
||||
|
||||
// Test text ends.
|
||||
t!(Markup[""]: "hello " => Text("hello"), Space(0));
|
||||
t!(Markup[""]: "hello~" => Text("hello"), Shorthand('\u{00A0}'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_escape_sequences() {
|
||||
// Test escapable symbols.
|
||||
t!(Markup: r"\\" => Escape('\\'));
|
||||
t!(Markup: r"\/" => Escape('/'));
|
||||
t!(Markup: r"\[" => Escape('['));
|
||||
t!(Markup: r"\]" => Escape(']'));
|
||||
t!(Markup: r"\{" => Escape('{'));
|
||||
t!(Markup: r"\}" => Escape('}'));
|
||||
t!(Markup: r"\*" => Escape('*'));
|
||||
t!(Markup: r"\_" => Escape('_'));
|
||||
t!(Markup: r"\=" => Escape('='));
|
||||
t!(Markup: r"\~" => Escape('~'));
|
||||
t!(Markup: r"\'" => Escape('\''));
|
||||
t!(Markup: r#"\""# => Escape('"'));
|
||||
t!(Markup: r"\`" => Escape('`'));
|
||||
t!(Markup: r"\$" => Escape('$'));
|
||||
t!(Markup: r"\#" => Escape('#'));
|
||||
t!(Markup: r"\a" => Escape('a'));
|
||||
t!(Markup: r"\u" => Escape('u'));
|
||||
t!(Markup: r"\1" => Escape('1'));
|
||||
|
||||
// Test basic unicode escapes.
|
||||
t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence"));
|
||||
t!(Markup: r"\u{2603}" => Escape('☃'));
|
||||
t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence"));
|
||||
|
||||
// Test unclosed unicode escapes.
|
||||
t!(Markup[" /"]: r"\u{" => Error(End, "expected closing brace"));
|
||||
t!(Markup[" /"]: r"\u{1" => Error(End, "expected closing brace"));
|
||||
t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace"));
|
||||
t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace"));
|
||||
t!(Markup: r"\u{1🏕}" => Error(End, "expected closing brace"), Text("🏕"), RightBrace);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_markup_symbols() {
|
||||
// Test markup tokens.
|
||||
t!(Markup[" a1"]: "*" => Star);
|
||||
t!(Markup: "_" => Underscore);
|
||||
t!(Markup[""]: "===" => Eq, Eq, Eq);
|
||||
t!(Markup["a1/"]: "= " => Eq, Space(0));
|
||||
t!(Markup[" "]: r"\" => Linebreak);
|
||||
t!(Markup: "~" => Shorthand('\u{00A0}'));
|
||||
t!(Markup["a1/"]: "-?" => Shorthand('\u{00AD}'));
|
||||
t!(Markup["a "]: r"a--" => Text("a"), Shorthand('\u{2013}'));
|
||||
t!(Markup["a1/"]: "- " => Minus, Space(0));
|
||||
t!(Markup[" "]: "+" => Plus);
|
||||
t!(Markup[" "]: "1." => EnumNumbering(NonZeroUsize::new(1).unwrap()));
|
||||
t!(Markup[" "]: "1.a" => EnumNumbering(NonZeroUsize::new(1).unwrap()), Text("a"));
|
||||
t!(Markup[" /"]: "a1." => Text("a1."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_code_symbols() {
|
||||
// Test all symbols.
|
||||
t!(Code: "," => Comma);
|
||||
t!(Code: ";" => Semicolon);
|
||||
t!(Code: ":" => Colon);
|
||||
t!(Code: "+" => Plus);
|
||||
t!(Code: "-" => Minus);
|
||||
t!(Code[" a1"]: "*" => Star);
|
||||
t!(Code[" a1"]: "/" => Slash);
|
||||
t!(Code[" a/"]: "." => Dot);
|
||||
t!(Code: "=" => Eq);
|
||||
t!(Code: "==" => EqEq);
|
||||
t!(Code: "!=" => ExclEq);
|
||||
t!(Code[" /"]: "<" => Lt);
|
||||
t!(Code: "<=" => LtEq);
|
||||
t!(Code: ">" => Gt);
|
||||
t!(Code: ">=" => GtEq);
|
||||
t!(Code: "+=" => PlusEq);
|
||||
t!(Code: "-=" => HyphEq);
|
||||
t!(Code: "*=" => StarEq);
|
||||
t!(Code: "/=" => SlashEq);
|
||||
t!(Code: ".." => Dots);
|
||||
t!(Code: "=>" => Arrow);
|
||||
|
||||
// Test combinations.
|
||||
t!(Code: "<=>" => LtEq, Gt);
|
||||
t!(Code[" a/"]: "..." => Dots, Dot);
|
||||
|
||||
// Test hyphen as symbol vs part of identifier.
|
||||
t!(Code[" /"]: "-1" => Minus, Int(1));
|
||||
t!(Code[" /"]: "-a" => Minus, Ident("a"));
|
||||
t!(Code[" /"]: "--1" => Minus, Minus, Int(1));
|
||||
t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a"));
|
||||
t!(Code[" /"]: "a-b" => Ident("a-b"));
|
||||
|
||||
// Test invalid.
|
||||
t!(Code: r"\" => Error(Full, "not valid here"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_keywords() {
|
||||
// A list of a few (not all) keywords.
|
||||
let list = [
|
||||
("not", Not),
|
||||
("let", Let),
|
||||
("if", If),
|
||||
("else", Else),
|
||||
("for", For),
|
||||
("in", In),
|
||||
("import", Import),
|
||||
];
|
||||
|
||||
for (s, t) in list.clone() {
|
||||
t!(Markup[" "]: format!("#{}", s) => t);
|
||||
t!(Markup[" "]: format!("#{0}#{0}", s) => t, t);
|
||||
t!(Markup[" /"]: format!("# {}", s) => Text(&format!("# {s}")));
|
||||
}
|
||||
|
||||
for (s, t) in list {
|
||||
t!(Code[" "]: s => t);
|
||||
t!(Markup[" /"]: s => Text(s));
|
||||
}
|
||||
|
||||
// Test simple identifier.
|
||||
t!(Markup[" "]: "#letter" => Ident("letter"));
|
||||
t!(Code[" /"]: "falser" => Ident("falser"));
|
||||
t!(Code[" /"]: "None" => Ident("None"));
|
||||
t!(Code[" /"]: "True" => Ident("True"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_raw_blocks() {
|
||||
// Test basic raw block.
|
||||
t!(Markup: "``" => Raw("", None, false));
|
||||
t!(Markup: "`raw`" => Raw("raw", None, false));
|
||||
t!(Markup[""]: "`]" => Error(End, "expected 1 backtick"));
|
||||
|
||||
// Test special symbols in raw block.
|
||||
t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false));
|
||||
t!(Markup[""]: r"`\`` " => Raw(r"\", None, false), Error(End, "expected 1 backtick"));
|
||||
|
||||
// Test separated closing backticks.
|
||||
t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false));
|
||||
|
||||
// Test more backticks.
|
||||
t!(Markup: "``nope``" => Raw("", None, false), Text("nope"), Raw("", None, false));
|
||||
t!(Markup: "````🚀````" => Raw("", None, false));
|
||||
t!(Markup[""]: "`````👩🚀````noend" => Error(End, "expected 5 backticks"));
|
||||
t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_idents() {
|
||||
// Test valid identifiers.
|
||||
t!(Code[" /"]: "x" => Ident("x"));
|
||||
t!(Code[" /"]: "value" => Ident("value"));
|
||||
t!(Code[" /"]: "__main__" => Ident("__main__"));
|
||||
t!(Code[" /"]: "_snake_case" => Ident("_snake_case"));
|
||||
|
||||
// Test non-ascii.
|
||||
t!(Code[" /"]: "α" => Ident("α"));
|
||||
t!(Code[" /"]: "ម្តាយ" => Ident("ម្តាយ"));
|
||||
|
||||
// Test hyphen parsed as identifier.
|
||||
t!(Code[" /"]: "kebab-case" => Ident("kebab-case"));
|
||||
t!(Code[" /"]: "one-10" => Ident("one-10"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_numeric() {
|
||||
let ints = [("7", 7), ("012", 12)];
|
||||
let floats = [
|
||||
(".3", 0.3),
|
||||
("0.3", 0.3),
|
||||
("3.", 3.0),
|
||||
("3.0", 3.0),
|
||||
("14.3", 14.3),
|
||||
("10e2", 1000.0),
|
||||
("10e+0", 10.0),
|
||||
("10e+1", 100.0),
|
||||
("10e-2", 0.1),
|
||||
("10.e1", 100.0),
|
||||
("10.e-1", 1.0),
|
||||
(".1e1", 1.0),
|
||||
("10E2", 1000.0),
|
||||
];
|
||||
|
||||
// Test integers.
|
||||
for &(s, v) in &ints {
|
||||
t!(Code[" /"]: s => Int(v));
|
||||
}
|
||||
|
||||
// Test floats.
|
||||
for &(s, v) in &floats {
|
||||
t!(Code[" /"]: s => Float(v));
|
||||
}
|
||||
|
||||
// Test attached numbers.
|
||||
t!(Code[" /"]: ".2.3" => Float(0.2), Float(0.3));
|
||||
t!(Code[" /"]: "1.2.3" => Float(1.2), Float(0.3));
|
||||
t!(Code[" /"]: "1e-2+3" => Float(0.01), Plus, Int(3));
|
||||
|
||||
// Test float from too large integer.
|
||||
let large = i64::MAX as f64 + 1.0;
|
||||
t!(Code[" /"]: large.to_string() => Float(large));
|
||||
|
||||
// Combined integers and floats.
|
||||
let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats);
|
||||
|
||||
let suffixes: &[(&str, fn(f64) -> SyntaxKind)] = &[
|
||||
("mm", |x| Numeric(x, Unit::Length(AbsUnit::Mm))),
|
||||
("pt", |x| Numeric(x, Unit::Length(AbsUnit::Pt))),
|
||||
("cm", |x| Numeric(x, Unit::Length(AbsUnit::Cm))),
|
||||
("in", |x| Numeric(x, Unit::Length(AbsUnit::In))),
|
||||
("rad", |x| Numeric(x, Unit::Angle(AngleUnit::Rad))),
|
||||
("deg", |x| Numeric(x, Unit::Angle(AngleUnit::Deg))),
|
||||
("em", |x| Numeric(x, Unit::Em)),
|
||||
("fr", |x| Numeric(x, Unit::Fr)),
|
||||
("%", |x| Numeric(x, Unit::Percent)),
|
||||
];
|
||||
|
||||
// Numeric types.
|
||||
for &(suffix, build) in suffixes {
|
||||
for (s, v) in nums.clone() {
|
||||
t!(Code[" /"]: format!("{}{}", s, suffix) => build(v));
|
||||
}
|
||||
}
|
||||
|
||||
// Multiple dots close the number.
|
||||
t!(Code[" /"]: "1..2" => Int(1), Dots, Int(2));
|
||||
t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3));
|
||||
t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3));
|
||||
|
||||
// Test invalid.
|
||||
t!(Code[" /"]: "1foo" => Error(Full, "invalid number suffix"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_strings() {
|
||||
// Test basic strings.
|
||||
t!(Code: "\"hi\"" => Str("hi"));
|
||||
t!(Code: "\"hi\nthere\"" => Str("hi\nthere"));
|
||||
t!(Code: "\"🌎\"" => Str("🌎"));
|
||||
|
||||
// Test unterminated.
|
||||
t!(Code[""]: "\"hi" => Error(End, "expected quote"));
|
||||
|
||||
// Test escaped quote.
|
||||
t!(Code: r#""a\"bc""# => Str("a\"bc"));
|
||||
t!(Code[""]: r#""\""# => Error(End, "expected quote"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_line_comments() {
|
||||
// Test line comment with no trailing newline.
|
||||
t!(Both[""]: "//" => LineComment);
|
||||
|
||||
// Test line comment ends at newline.
|
||||
t!(Both["a1/"]: "//bc\n" => LineComment, Space(1));
|
||||
t!(Both["a1/"]: "// bc \n" => LineComment, Space(1));
|
||||
t!(Both["a1/"]: "//bc\r\n" => LineComment, Space(1));
|
||||
|
||||
// Test nested line comments.
|
||||
t!(Both["a1/"]: "//a//b\n" => LineComment, Space(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_block_comments() {
|
||||
// Test basic block comments.
|
||||
t!(Both[""]: "/*" => BlockComment);
|
||||
t!(Both: "/**/" => BlockComment);
|
||||
t!(Both: "/*🏞*/" => BlockComment);
|
||||
t!(Both: "/*\n*/" => BlockComment);
|
||||
|
||||
// Test depth 1 and 2 nested block comments.
|
||||
t!(Both: "/* /* */ */" => BlockComment);
|
||||
t!(Both: "/*/*/**/*/*/" => BlockComment);
|
||||
|
||||
// Test two nested, one unclosed block comments.
|
||||
t!(Both[""]: "/*/*/**/*/" => BlockComment);
|
||||
|
||||
// Test all combinations of up to two following slashes and stars.
|
||||
t!(Both[""]: "/*" => BlockComment);
|
||||
t!(Both[""]: "/*/" => BlockComment);
|
||||
t!(Both[""]: "/**" => BlockComment);
|
||||
t!(Both[""]: "/*//" => BlockComment);
|
||||
t!(Both[""]: "/*/*" => BlockComment);
|
||||
t!(Both[""]: "/**/" => BlockComment);
|
||||
t!(Both[""]: "/***" => BlockComment);
|
||||
|
||||
// Test unexpected terminator.
|
||||
t!(Both: "/*Hi*/*/" => BlockComment,
|
||||
Error(Full, "unexpected end of block comment"));
|
||||
}
|
@ -46,7 +46,7 @@ const LIMIT: usize = 14;
|
||||
|
||||
impl EcoString {
|
||||
/// Create a new, empty string.
|
||||
pub fn new() -> Self {
|
||||
pub const fn new() -> Self {
|
||||
Self(Repr::Small { buf: [0; LIMIT], len: 0 })
|
||||
}
|
||||
|
||||
|
@ -5,7 +5,7 @@ use iai::{black_box, main, Iai};
|
||||
use typst::diag::{FileError, FileResult};
|
||||
use typst::font::{Font, FontBook};
|
||||
use typst::model::Library;
|
||||
use typst::syntax::{Source, SourceId, TokenMode, Tokens};
|
||||
use typst::syntax::{LexMode, Lexer, Source, SourceId};
|
||||
use typst::util::Buffer;
|
||||
use typst::World;
|
||||
use unscanny::Scanner;
|
||||
@ -16,7 +16,7 @@ const FONT: &[u8] = include_bytes!("../fonts/IBMPlexSans-Regular.ttf");
|
||||
main!(
|
||||
bench_decode,
|
||||
bench_scan,
|
||||
bench_tokenize,
|
||||
bench_lex,
|
||||
bench_parse,
|
||||
bench_edit,
|
||||
bench_eval,
|
||||
@ -49,8 +49,8 @@ fn bench_scan(iai: &mut Iai) {
|
||||
})
|
||||
}
|
||||
|
||||
fn bench_tokenize(iai: &mut Iai) {
|
||||
iai.run(|| Tokens::new(black_box(TEXT), black_box(TokenMode::Markup)).count());
|
||||
fn bench_lex(iai: &mut Iai) {
|
||||
iai.run(|| Lexer::new(black_box(TEXT), black_box(LexMode::Markup)).count());
|
||||
}
|
||||
|
||||
fn bench_parse(iai: &mut Iai) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user