Re-engineer tokenization 🚿
This commit is contained in:
parent
a75ddd2c93
commit
b1e956419d
@ -15,7 +15,7 @@ pub mod prelude {
|
||||
pub use crate::func::{Scope, ParseFunc, LayoutFunc, Command, Commands};
|
||||
pub use crate::layout::prelude::*;
|
||||
pub use crate::syntax::{
|
||||
parse, ParseContext, ParseResult,
|
||||
ParseContext, ParseResult,
|
||||
SyntaxTree, FuncCall, FuncArgs, PosArg, KeyArg,
|
||||
Expression, Ident, ExpressionKind,
|
||||
Spanned, Span
|
||||
|
@ -297,9 +297,10 @@ function! {
|
||||
parse!(forbidden: body);
|
||||
|
||||
if let Some(name) = args.get_pos_opt::<Ident>()? {
|
||||
let flip = args.get_key_opt::<bool>("flip")?
|
||||
.unwrap_or(false);
|
||||
PageSizeFunc::Paper(Paper::from_name(name.as_str())?, flip)
|
||||
let flip = args.get_key_opt::<bool>("flip")?.unwrap_or(false);
|
||||
let paper = Paper::from_name(name.as_str())
|
||||
.ok_or_else(|| error!(@"invalid paper name: `{}`", name))?;
|
||||
PageSizeFunc::Paper(paper, flip)
|
||||
} else {
|
||||
PageSizeFunc::Custom(ExtentMap::new(&mut args, true)?)
|
||||
}
|
||||
|
@ -72,7 +72,7 @@ impl Size {
|
||||
|
||||
impl Display for Size {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
write!(f, "{}cm", self.to_cm())
|
||||
write!(f, "{}pt", self.points)
|
||||
}
|
||||
}
|
||||
|
||||
|
13
src/style.rs
13
src/style.rs
@ -3,7 +3,6 @@
|
||||
use toddle::query::{FontFallbackTree, FontVariant, FontStyle, FontWeight};
|
||||
|
||||
use crate::size::{Size, Size2D, SizeBox, ValueBox, PSize};
|
||||
use crate::syntax::ParseResult;
|
||||
|
||||
|
||||
/// Defines properties of pages and text.
|
||||
@ -157,7 +156,7 @@ pub struct Paper {
|
||||
|
||||
impl Paper {
|
||||
/// The paper with the given name.
|
||||
pub fn from_name(name: &str) -> ParseResult<Paper> {
|
||||
pub fn from_name(name: &str) -> Option<Paper> {
|
||||
parse_paper(name)
|
||||
}
|
||||
}
|
||||
@ -193,11 +192,11 @@ macro_rules! papers {
|
||||
class: $class,
|
||||
};)*
|
||||
|
||||
fn parse_paper(paper: &str) -> ParseResult<Paper> {
|
||||
Ok(match paper.to_lowercase().as_str() {
|
||||
$($($patterns)* => $var,)*
|
||||
_ => error!("unknown paper size: `{}`", paper),
|
||||
})
|
||||
fn parse_paper(paper: &str) -> Option<Paper> {
|
||||
match paper.to_lowercase().as_str() {
|
||||
$($($patterns)* => Some($var),)*
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -11,48 +11,6 @@ pub_use_mod!(parsing);
|
||||
pub_use_mod!(span);
|
||||
|
||||
|
||||
/// A logical unit of the incoming text stream.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
pub enum Token<'s> {
|
||||
/// One or more whitespace (non-newline) codepoints.
|
||||
Space,
|
||||
/// A line feed (`\n`, `\r\n` and some more as defined by the Unicode standard).
|
||||
Newline,
|
||||
/// A left bracket: `[`.
|
||||
LeftBracket,
|
||||
/// A right bracket: `]`.
|
||||
RightBracket,
|
||||
/// A colon (`:`) indicating the beginning of function arguments (Function
|
||||
/// header only).
|
||||
///
|
||||
/// If a colon occurs outside of a function header, it will be tokenized as
|
||||
/// [Text](Token::Text), just like the other tokens annotated with
|
||||
/// _Header only_.
|
||||
Colon,
|
||||
/// An equals (`=`) sign assigning a function argument a value (Header only).
|
||||
Equals,
|
||||
/// A comma (`,`) separating two function arguments (Header only).
|
||||
Comma,
|
||||
/// Quoted text as a string value (Header only).
|
||||
Quoted(&'s str),
|
||||
/// An underscore, indicating text in italics (Body only).
|
||||
Underscore,
|
||||
/// A star, indicating bold text (Body only).
|
||||
Star,
|
||||
/// A backtick, indicating monospace text (Body only).
|
||||
Backtick,
|
||||
/// A line comment.
|
||||
LineComment(&'s str),
|
||||
/// A block comment.
|
||||
BlockComment(&'s str),
|
||||
/// A star followed by a slash unexpectedly ending a block comment
|
||||
/// (the comment was not started before, otherwise a
|
||||
/// [BlockComment](Token::BlockComment) would be returned).
|
||||
StarSlash,
|
||||
/// Any consecutive string which does not contain markup.
|
||||
Text(&'s str),
|
||||
}
|
||||
|
||||
/// A tree representation of source code.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct SyntaxTree {
|
||||
@ -256,11 +214,11 @@ debug_display!(Expression);
|
||||
pub struct Ident(pub String);
|
||||
|
||||
impl Ident {
|
||||
pub fn new(string: String) -> ParseResult<Ident> {
|
||||
if is_identifier(&string) {
|
||||
Ok(Ident(string))
|
||||
pub fn new<S>(ident: S) -> Option<Ident> where S: AsRef<str> + Into<String> {
|
||||
if is_identifier(ident.as_ref()) {
|
||||
Some(Ident(ident.into()))
|
||||
} else {
|
||||
error!("invalid identifier: `{}`", string);
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@ -277,20 +235,20 @@ impl Display for Ident {
|
||||
|
||||
debug_display!(Ident);
|
||||
|
||||
/// Whether this word is a valid unicode identifier.
|
||||
/// Whether this word is a valid identifier.
|
||||
fn is_identifier(string: &str) -> bool {
|
||||
let mut chars = string.chars();
|
||||
|
||||
match chars.next() {
|
||||
Some('-') => (),
|
||||
Some(c) if UnicodeXID::is_xid_start(c) => (),
|
||||
Some('-') => {}
|
||||
Some(c) if UnicodeXID::is_xid_start(c) => {}
|
||||
_ => return false,
|
||||
}
|
||||
|
||||
while let Some(c) = chars.next() {
|
||||
match c {
|
||||
'.' | '-' => (),
|
||||
c if UnicodeXID::is_xid_continue(c) => (),
|
||||
'.' | '-' => {}
|
||||
c if UnicodeXID::is_xid_continue(c) => {}
|
||||
_ => return false,
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,4 @@
|
||||
//! Parsing of token streams into syntax trees.
|
||||
|
||||
use crate::func::Scope;
|
||||
use crate::size::Size;
|
||||
use super::*;
|
||||
|
||||
|
||||
@ -10,7 +7,7 @@ pub type ParseResult<T> = crate::TypesetResult<T>;
|
||||
|
||||
/// Parses source code into a syntax tree given a context.
|
||||
pub fn parse(src: &str, ctx: ParseContext) -> ParseResult<SyntaxTree> {
|
||||
Parser::new(src, ctx).parse()
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// The context for parsing.
|
||||
@ -19,833 +16,3 @@ pub struct ParseContext<'a> {
|
||||
/// The scope containing function definitions.
|
||||
pub scope: &'a Scope,
|
||||
}
|
||||
|
||||
/// Transforms token streams into syntax trees.
|
||||
#[derive(Debug)]
|
||||
struct Parser<'s> {
|
||||
src: &'s str,
|
||||
tokens: PeekableTokens<'s>,
|
||||
ctx: ParseContext<'s>,
|
||||
tree: SyntaxTree,
|
||||
color_tokens: Vec<Spanned<ColorToken>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
enum NewlineState {
|
||||
/// No newline yet.
|
||||
Zero,
|
||||
/// We saw one newline with the given span already and are
|
||||
/// looking for another.
|
||||
One(Span),
|
||||
/// We saw at least two newlines and wrote one, thus not
|
||||
/// writing another one for more newlines.
|
||||
TwoOrMore,
|
||||
}
|
||||
|
||||
impl<'s> Parser<'s> {
|
||||
/// Create a new parser from the source code and the context.
|
||||
fn new(src: &'s str, ctx: ParseContext<'s>) -> Parser<'s> {
|
||||
Parser {
|
||||
src,
|
||||
tokens: PeekableTokens::new(tokenize(src)),
|
||||
ctx,
|
||||
tree: SyntaxTree::new(),
|
||||
color_tokens: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse the source into a syntax tree.
|
||||
fn parse(mut self) -> ParseResult<SyntaxTree> {
|
||||
while self.tokens.peek().is_some() {
|
||||
self.parse_white()?;
|
||||
self.parse_body_part()?;
|
||||
}
|
||||
|
||||
Ok(self.tree)
|
||||
}
|
||||
|
||||
/// Parse the next part of the body.
|
||||
fn parse_body_part(&mut self) -> ParseResult<()> {
|
||||
use Token::*;
|
||||
|
||||
if let Some(token) = self.tokens.peek() {
|
||||
match token.v {
|
||||
// Functions.
|
||||
LeftBracket => self.parse_func()?,
|
||||
RightBracket => error!("unexpected closing bracket"),
|
||||
|
||||
// Modifiers.
|
||||
Underscore => self.add_consumed(Node::ToggleItalics, token.span),
|
||||
Star => self.add_consumed(Node::ToggleBolder, token.span),
|
||||
Backtick => self.add_consumed(Node::ToggleMonospace, token.span),
|
||||
|
||||
// Normal text.
|
||||
Text(word) => self.add_consumed(Node::Text(word.to_owned()), token.span),
|
||||
|
||||
// The rest is handled elsewhere or should not happen, because
|
||||
// the tokenizer does not yield these in a body.
|
||||
Space | Newline | LineComment(_) | BlockComment(_) |
|
||||
Colon | Equals | Comma | Quoted(_) | StarSlash
|
||||
=> panic!("parse_body_part: unexpected token: {:?}", token),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Parse a complete function from the current position.
|
||||
fn parse_func(&mut self) -> ParseResult<()> {
|
||||
// This should only be called if a left bracket was seen.
|
||||
let token = self.tokens.next().expect("parse_func: expected token");
|
||||
assert!(token.v == Token::LeftBracket);
|
||||
|
||||
self.add_color_token(ColorToken::Bracket, token.span);
|
||||
|
||||
let mut span = token.span;
|
||||
let name = self.parse_func_name()?;
|
||||
|
||||
// Check for arguments
|
||||
let args = match self.tokens.next() {
|
||||
Some(Spanned { v: Token::RightBracket, span }) => {
|
||||
self.add_color_token(ColorToken::Bracket, span);
|
||||
FuncArgs::new()
|
||||
},
|
||||
Some(Spanned { v: Token::Colon, span }) => {
|
||||
self.add_color_token(ColorToken::Colon, span);
|
||||
self.parse_func_args()?
|
||||
}
|
||||
_ => error!("expected arguments or closing bracket"),
|
||||
};
|
||||
|
||||
span.end = self.tokens.get_position();
|
||||
let (func, body_span) = self.parse_func_call(name, args)?;
|
||||
|
||||
if let Some(body_span) = body_span {
|
||||
span.expand(body_span);
|
||||
}
|
||||
|
||||
// Finally this function is parsed to the end.
|
||||
self.add(Node::Func(func), span);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Parse a function header.
|
||||
fn parse_func_name(&mut self) -> ParseResult<Spanned<Ident>> {
|
||||
self.skip_white();
|
||||
|
||||
let name = match self.tokens.next() {
|
||||
Some(Spanned { v: Token::Text(word), span }) => {
|
||||
let ident = Ident::new(word.to_string())?;
|
||||
Spanned::new(ident, span)
|
||||
}
|
||||
_ => error!("expected identifier"),
|
||||
};
|
||||
|
||||
self.add_color_token(ColorToken::FuncName, name.span);
|
||||
self.skip_white();
|
||||
|
||||
Ok(name)
|
||||
}
|
||||
|
||||
/// Parse the arguments to a function.
|
||||
fn parse_func_args(&mut self) -> ParseResult<FuncArgs> {
|
||||
let mut args = FuncArgs::new();
|
||||
|
||||
loop {
|
||||
self.skip_white();
|
||||
|
||||
match self.parse_func_arg()? {
|
||||
Some(DynArg::Pos(arg)) => args.add_pos(arg),
|
||||
Some(DynArg::Key(arg)) => args.add_key(arg),
|
||||
None => {},
|
||||
}
|
||||
|
||||
match self.tokens.next() {
|
||||
Some(Spanned { v: Token::Comma, span }) => {
|
||||
self.add_color_token(ColorToken::Comma, span);
|
||||
}
|
||||
Some(Spanned { v: Token::RightBracket, span }) => {
|
||||
self.add_color_token(ColorToken::Bracket, span);
|
||||
break;
|
||||
}
|
||||
_ => error!("expected comma or closing bracket"),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(args)
|
||||
}
|
||||
|
||||
/// Parse one argument to a function.
|
||||
fn parse_func_arg(&mut self) -> ParseResult<Option<DynArg>> {
|
||||
let token = match self.tokens.peek() {
|
||||
Some(token) => token,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
Ok(match token.v {
|
||||
Token::Text(name) => {
|
||||
self.advance();
|
||||
self.skip_white();
|
||||
|
||||
Some(match self.tokens.peek() {
|
||||
Some(Spanned { v: Token::Equals, span }) => {
|
||||
self.advance();
|
||||
self.skip_white();
|
||||
|
||||
let name = Ident::new(name.to_string())?;
|
||||
let key = Spanned::new(name, token.span);
|
||||
|
||||
self.add_color_token(ColorToken::KeyArg, key.span);
|
||||
self.add_color_token(ColorToken::Equals, span);
|
||||
|
||||
let next = self.tokens.next()
|
||||
.ok_or_else(|| error!(@"expected expression"))?;
|
||||
|
||||
let value = Self::parse_expression(next)?;
|
||||
|
||||
self.add_expr_token(&value);
|
||||
|
||||
let span = Span::merge(key.span, value.span);
|
||||
let arg = KeyArg { key, value };
|
||||
|
||||
DynArg::Key(Spanned::new(arg, span))
|
||||
}
|
||||
|
||||
_ => {
|
||||
let expr = Self::parse_expression(token)?;
|
||||
self.add_expr_token(&expr);
|
||||
DynArg::Pos(expr)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Token::Quoted(_) => {
|
||||
self.advance();
|
||||
self.skip_white();
|
||||
|
||||
self.add_color_token(ColorToken::ExprStr, token.span);
|
||||
|
||||
Some(DynArg::Pos(Self::parse_expression(token)?))
|
||||
}
|
||||
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse a function call.
|
||||
fn parse_func_call(&mut self, name: Spanned<Ident>, args: FuncArgs)
|
||||
-> ParseResult<(FuncCall, Option<Span>)> {
|
||||
// Now we want to parse this function dynamically.
|
||||
let parser = self
|
||||
.ctx
|
||||
.scope
|
||||
.get_parser(&name.v.0)
|
||||
.ok_or_else(|| error!(@"unknown function: `{}`", &name.v))?;
|
||||
|
||||
let has_body = self.tokens.peek().map(Spanned::value) == Some(Token::LeftBracket);
|
||||
|
||||
// Do the parsing dependent on whether the function has a body.
|
||||
Ok(if has_body {
|
||||
self.advance();
|
||||
|
||||
// Find out the string which makes the body of this function.
|
||||
let start_index = self.tokens.string_index();
|
||||
let mut start_pos = self.tokens.get_position();
|
||||
start_pos.column -= 1;
|
||||
|
||||
let (mut end_index, mut end_pos) =
|
||||
find_closing_bracket(&self.src[start_index..])
|
||||
.ok_or_else(|| error!(@"expected closing bracket"))?;
|
||||
|
||||
end_index += start_index;
|
||||
end_pos.column += 1;
|
||||
|
||||
let span = Span::new(start_pos, end_pos);
|
||||
|
||||
// Parse the body.
|
||||
let body_string = &self.src[start_index..end_index];
|
||||
let body = parser(args, Some(body_string), self.ctx)?;
|
||||
|
||||
// Skip to the end of the function in the token stream.
|
||||
self.tokens.set_string_index(end_index);
|
||||
|
||||
// Now the body should be closed.
|
||||
let token = self.tokens.next().expect("parse_func_body: expected token");
|
||||
assert!(token.v == Token::RightBracket);
|
||||
|
||||
(FuncCall(body), Some(span))
|
||||
} else {
|
||||
(FuncCall(parser(args, None, self.ctx)?), None)
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse an expression.
|
||||
fn parse_expression(token: Spanned<Token>) -> ParseResult<Spanned<Expression>> {
|
||||
Ok(Spanned::new(match token.v {
|
||||
Token::Quoted(text) => Expression::Str(text.to_owned()),
|
||||
Token::Text(text) => {
|
||||
if let Ok(b) = text.parse::<bool>() {
|
||||
Expression::Bool(b)
|
||||
} else if let Ok(num) = text.parse::<f64>() {
|
||||
Expression::Num(num)
|
||||
} else if let Ok(size) = text.parse::<Size>() {
|
||||
Expression::Size(size)
|
||||
} else {
|
||||
// This loop does not actually loop, but is used for breaking.
|
||||
loop {
|
||||
if text.ends_with('%') {
|
||||
if let Ok(percent) = text[.. text.len()-1].parse::<f64>() {
|
||||
break Expression::Num(percent / 100.0);
|
||||
}
|
||||
}
|
||||
|
||||
break Expression::Ident(Ident::new(text.to_string())?);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => error!("expected expression"),
|
||||
}, token.span))
|
||||
}
|
||||
|
||||
/// Parse whitespace (as long as there is any) and skip over comments.
|
||||
fn parse_white(&mut self) -> ParseResult<()> {
|
||||
let mut state = NewlineState::Zero;
|
||||
|
||||
while let Some(token) = self.tokens.peek() {
|
||||
match token.v {
|
||||
Token::Space => {
|
||||
self.advance();
|
||||
match state {
|
||||
NewlineState::Zero | NewlineState::TwoOrMore => {
|
||||
self.add_space(token.span);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Token::Newline => {
|
||||
self.advance();
|
||||
match state {
|
||||
NewlineState::Zero => state = NewlineState::One(token.span),
|
||||
NewlineState::One(span) => {
|
||||
self.add(Node::Newline, Span::merge(span, token.span));
|
||||
state = NewlineState::TwoOrMore;
|
||||
},
|
||||
NewlineState::TwoOrMore => self.add_space(token.span),
|
||||
}
|
||||
}
|
||||
|
||||
_ => {
|
||||
if let NewlineState::One(span) = state {
|
||||
self.add_space(Span::new(span.start, token.span.start));
|
||||
}
|
||||
|
||||
state = NewlineState::Zero;
|
||||
match token.v {
|
||||
Token::LineComment(_) | Token::BlockComment(_) => self.advance(),
|
||||
Token::StarSlash => error!("unexpected end of block comment"),
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Skip over whitespace and comments.
|
||||
fn skip_white(&mut self) {
|
||||
while let Some(token) = self.tokens.peek() {
|
||||
match token.v {
|
||||
Token::Space | Token::Newline |
|
||||
Token::LineComment(_) | Token::BlockComment(_) => self.advance(),
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Advance the iterator by one step.
|
||||
fn advance(&mut self) {
|
||||
self.tokens.next();
|
||||
}
|
||||
|
||||
/// Append a node to the tree.
|
||||
fn add(&mut self, node: Node, span: Span) {
|
||||
self.tree.nodes.push(Spanned::new(node, span));
|
||||
}
|
||||
|
||||
/// Append a space, merging with a previous space if there is one.
|
||||
fn add_space(&mut self, span: Span) {
|
||||
match self.tree.nodes.last_mut() {
|
||||
Some(ref mut node) if node.v == Node::Space => node.span.expand(span),
|
||||
_ => self.add(Node::Space, span),
|
||||
}
|
||||
}
|
||||
|
||||
/// Advance and return the given node.
|
||||
fn add_consumed(&mut self, node: Node, span: Span) {
|
||||
self.advance();
|
||||
self.add(node, span);
|
||||
}
|
||||
|
||||
/// Add a color token to the list.
|
||||
fn add_color_token(&mut self, token: ColorToken, span: Span) {
|
||||
self.color_tokens.push(Spanned::new(token, span));
|
||||
}
|
||||
|
||||
/// Add a color token for an expression.
|
||||
fn add_expr_token(&mut self, expr: &Spanned<Expression>) {
|
||||
let kind = match expr.v {
|
||||
Expression::Bool(_) => ColorToken::ExprBool,
|
||||
Expression::Ident(_) => ColorToken::ExprIdent,
|
||||
Expression::Num(_) => ColorToken::ExprNumber,
|
||||
Expression::Size(_) => ColorToken::ExprSize,
|
||||
Expression::Str(_) => ColorToken::ExprStr,
|
||||
};
|
||||
|
||||
self.add_color_token(kind, expr.span);
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the index of the first unbalanced and unescaped closing bracket.
|
||||
fn find_closing_bracket(src: &str) -> Option<(usize, Position)> {
|
||||
let mut parens = 0;
|
||||
let mut escaped = false;
|
||||
let mut line = 1;
|
||||
let mut line_start_index = 0;
|
||||
|
||||
for (index, c) in src.char_indices() {
|
||||
match c {
|
||||
'\\' => {
|
||||
escaped = !escaped;
|
||||
continue;
|
||||
}
|
||||
c if is_newline_char(c) => {
|
||||
line += 1;
|
||||
line_start_index = index + c.len_utf8();
|
||||
}
|
||||
']' if !escaped && parens == 0 => {
|
||||
let position = Position {
|
||||
line,
|
||||
column: index - line_start_index,
|
||||
};
|
||||
|
||||
return Some((index, position))
|
||||
}
|
||||
'[' if !escaped => parens += 1,
|
||||
']' if !escaped => parens -= 1,
|
||||
_ => {}
|
||||
}
|
||||
escaped = false;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// A peekable iterator for tokens which allows access to the original iterator
|
||||
/// inside this module (which is needed by the parser).
|
||||
#[derive(Debug, Clone)]
|
||||
struct PeekableTokens<'s> {
|
||||
tokens: Tokens<'s>,
|
||||
peeked: Option<Option<Spanned<Token<'s>>>>,
|
||||
}
|
||||
|
||||
impl<'s> PeekableTokens<'s> {
|
||||
/// Create a new iterator from a string.
|
||||
fn new(tokens: Tokens<'s>) -> PeekableTokens<'s> {
|
||||
PeekableTokens {
|
||||
tokens,
|
||||
peeked: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Peek at the next element.
|
||||
fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
|
||||
let iter = &mut self.tokens;
|
||||
*self.peeked.get_or_insert_with(|| iter.next())
|
||||
}
|
||||
|
||||
fn get_position(&self) -> Position {
|
||||
match self.peeked {
|
||||
Some(Some(peeked)) => peeked.span.start,
|
||||
_ => self.tokens.get_position(),
|
||||
}
|
||||
}
|
||||
|
||||
fn string_index(&self) -> usize {
|
||||
match self.peeked {
|
||||
Some(Some(peeked)) => peeked.span.start.line,
|
||||
_ => self.tokens.string_index(),
|
||||
}
|
||||
}
|
||||
|
||||
fn set_string_index(&mut self, index: usize) {
|
||||
self.tokens.set_string_index(index);
|
||||
self.peeked = None;
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Iterator for PeekableTokens<'s> {
|
||||
type Item = Spanned<Token<'s>>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.peeked.take() {
|
||||
Some(value) => value,
|
||||
None => self.tokens.next(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(non_snake_case)]
|
||||
mod tests {
|
||||
use crate::func::{Commands, Scope};
|
||||
use crate::layout::{LayoutContext, LayoutResult};
|
||||
use crate::syntax::*;
|
||||
use Node::{Func as F, Newline as N, Space as S};
|
||||
|
||||
function! {
|
||||
/// A testing function which just parses it's body into a syntax
|
||||
/// tree.
|
||||
#[derive(Debug)]
|
||||
pub struct TreeFn { pub tree: SyntaxTree }
|
||||
|
||||
parse(args, body, ctx) {
|
||||
args.clear();
|
||||
TreeFn {
|
||||
tree: parse!(expected: body, ctx)
|
||||
}
|
||||
}
|
||||
|
||||
layout() { vec![] }
|
||||
}
|
||||
|
||||
impl PartialEq for TreeFn {
|
||||
fn eq(&self, other: &TreeFn) -> bool {
|
||||
assert_tree_equal(&self.tree, &other.tree);
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
function! {
|
||||
/// A testing function without a body.
|
||||
#[derive(Debug, Default, PartialEq)]
|
||||
pub struct BodylessFn(Vec<Expression>, Vec<(Ident, Expression)>);
|
||||
|
||||
parse(args, body) {
|
||||
parse!(forbidden: body);
|
||||
BodylessFn(
|
||||
args.pos().map(Spanned::value).collect(),
|
||||
args.keys().map(|arg| (arg.v.key.v, arg.v.value.v)).collect(),
|
||||
)
|
||||
}
|
||||
|
||||
layout() { vec![] }
|
||||
}
|
||||
|
||||
mod args {
|
||||
use super::*;
|
||||
use super::Expression;
|
||||
pub use Expression::{Num as N, Size as Z, Bool as B};
|
||||
|
||||
pub fn S(string: &str) -> Expression { Expression::Str(string.to_owned()) }
|
||||
pub fn I(string: &str) -> Expression {
|
||||
Expression::Ident(Ident::new(string.to_owned()).unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
/// Asserts that two syntax trees are equal except for all spans inside them.
|
||||
fn assert_tree_equal(a: &SyntaxTree, b: &SyntaxTree) {
|
||||
for (x, y) in a.nodes.iter().zip(&b.nodes) {
|
||||
if x.v != y.v {
|
||||
panic!("trees are not equal: ({:#?}) != ({:#?})", x.v, y.v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Test if the source code parses into the syntax tree.
|
||||
fn test(src: &str, tree: SyntaxTree) {
|
||||
let ctx = ParseContext {
|
||||
scope: &Scope::new(),
|
||||
};
|
||||
assert_tree_equal(&parse(src, ctx).unwrap(), &tree);
|
||||
}
|
||||
|
||||
/// Test with a scope containing function definitions.
|
||||
fn test_scoped(scope: &Scope, src: &str, tree: SyntaxTree) {
|
||||
let ctx = ParseContext { scope };
|
||||
assert_tree_equal(&parse(src, ctx).unwrap(), &tree);
|
||||
}
|
||||
|
||||
/// Test if the source parses into the error.
|
||||
fn test_err(src: &str, err: &str) {
|
||||
let ctx = ParseContext {
|
||||
scope: &Scope::new(),
|
||||
};
|
||||
assert_eq!(parse(src, ctx).unwrap_err().to_string(), err);
|
||||
}
|
||||
|
||||
/// Test with a scope if the source parses into the error.
|
||||
fn test_err_scoped(scope: &Scope, src: &str, err: &str) {
|
||||
let ctx = ParseContext { scope };
|
||||
assert_eq!(parse(src, ctx).unwrap_err().to_string(), err);
|
||||
}
|
||||
|
||||
fn test_color(scope: &Scope, src: &str, tokens: Vec<(usize, usize, ColorToken)>) {
|
||||
let ctx = ParseContext { scope };
|
||||
let tree = parse(src, ctx).unwrap();
|
||||
// assert_eq!(tree.tokens,
|
||||
// tokens.into_iter()
|
||||
// .map(|(s, e, t)| Spanned::new(t, Span::new(s, e)))
|
||||
// .collect::<Vec<_>>()
|
||||
// );
|
||||
}
|
||||
|
||||
/// Create a text node.
|
||||
fn T(s: &str) -> Node {
|
||||
Node::Text(s.to_owned())
|
||||
}
|
||||
|
||||
fn zerospan<T>(val: T) -> Spanned<T> {
|
||||
Spanned::new(val, Span::new(Position::new(0, 0), Position::new(0, 0)))
|
||||
}
|
||||
|
||||
/// Shortcut macro to create a syntax tree. Is `vec`-like and the elements
|
||||
/// are the nodes without spans.
|
||||
macro_rules! tree {
|
||||
($($x:expr),*) => ({
|
||||
#[allow(unused_mut)] let mut nodes = vec![];
|
||||
$(
|
||||
nodes.push(zerospan($x));
|
||||
)*
|
||||
SyntaxTree { nodes }
|
||||
});
|
||||
($($x:expr,)*) => (tree![$($x),*])
|
||||
}
|
||||
|
||||
/// Shortcut macro to create a function.
|
||||
macro_rules! func {
|
||||
() => (
|
||||
FuncCall(Box::new(BodylessFn(vec![], vec![])))
|
||||
);
|
||||
(body: $tree:expr $(,)*) => (
|
||||
FuncCall(Box::new(TreeFn { tree: $tree }))
|
||||
);
|
||||
(args: $pos:expr, $key:expr) => (
|
||||
FuncCall(Box::new(BodylessFn($pos, $key)))
|
||||
);
|
||||
}
|
||||
|
||||
/// Parse the basic cases.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn parse_base() {
|
||||
test("", tree! []);
|
||||
test("Hello World!", tree! [ T("Hello"), S, T("World!") ]);
|
||||
}
|
||||
|
||||
/// Test whether newlines generate the correct whitespace.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn parse_newlines_whitespace() {
|
||||
test("Hello\nWorld", tree! [ T("Hello"), S, T("World") ]);
|
||||
test("Hello \n World", tree! [ T("Hello"), S, T("World") ]);
|
||||
test("Hello\n\nWorld", tree! [ T("Hello"), N, T("World") ]);
|
||||
test("Hello \n\nWorld", tree! [ T("Hello"), S, N, T("World") ]);
|
||||
test("Hello\n\n World", tree! [ T("Hello"), N, S, T("World") ]);
|
||||
test("Hello \n \n \n World", tree! [ T("Hello"), S, N, S, T("World") ]);
|
||||
test("Hello\n \n\n World", tree! [ T("Hello"), N, S, T("World") ]);
|
||||
test("Hello\n \nWorld", tree! [ T("Hello"), N, T("World") ]);
|
||||
}
|
||||
|
||||
/// Parse things dealing with functions.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn parse_functions() {
|
||||
let mut scope = Scope::new();
|
||||
scope.add::<BodylessFn>("test");
|
||||
scope.add::<BodylessFn>("end");
|
||||
scope.add::<TreeFn>("modifier");
|
||||
scope.add::<TreeFn>("func");
|
||||
|
||||
test_scoped(&scope,"[test]", tree! [ F(func! {}) ]);
|
||||
test_scoped(&scope,"[ test]", tree! [ F(func! {}) ]);
|
||||
test_scoped(&scope, "This is an [modifier][example] of a function invocation.", tree! [
|
||||
T("This"), S, T("is"), S, T("an"), S,
|
||||
F(func! { body: tree! [ T("example") ] }), S,
|
||||
T("of"), S, T("a"), S, T("function"), S, T("invocation.")
|
||||
]);
|
||||
test_scoped(&scope, "[func][Hello][modifier][Here][end]", tree! [
|
||||
F(func! { body: tree! [ T("Hello") ] }),
|
||||
F(func! { body: tree! [ T("Here") ] }),
|
||||
F(func! {}),
|
||||
]);
|
||||
test_scoped(&scope, "[func][]", tree! [ F(func! { body: tree! [] }) ]);
|
||||
test_scoped(&scope, "[modifier][[func][call]] outside", tree! [
|
||||
F(func! { body: tree! [ F(func! { body: tree! [ T("call") ] }) ] }), S, T("outside")
|
||||
]);
|
||||
|
||||
}
|
||||
|
||||
/// Parse functions with arguments.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn parse_function_args() {
|
||||
use args::*;
|
||||
|
||||
fn func(
|
||||
pos: Vec<Expression>,
|
||||
key: Vec<(&str, Expression)>,
|
||||
) -> SyntaxTree {
|
||||
let key = key.into_iter()
|
||||
.map(|s| (Ident::new(s.0.to_string()).unwrap(), s.1))
|
||||
.collect();
|
||||
|
||||
tree! [ F(func!(args: pos, key)) ]
|
||||
}
|
||||
|
||||
let mut scope = Scope::new();
|
||||
scope.add::<BodylessFn>("align");
|
||||
|
||||
test_scoped(&scope, "[align: left]", func(vec![I("left")], vec![]));
|
||||
test_scoped(&scope, "[align: left,right]", func(vec![I("left"), I("right")], vec![]));
|
||||
test_scoped(&scope, "[align: left, right]", func(vec![I("left"), I("right")], vec![]));
|
||||
test_scoped(&scope, "[align: \"hello\"]", func(vec![S("hello")], vec![]));
|
||||
test_scoped(&scope, r#"[align: "hello\"world"]"#, func(vec![S(r#"hello\"world"#)], vec![]));
|
||||
test_scoped(&scope, "[align: 12]", func(vec![N(12.0)], vec![]));
|
||||
test_scoped(&scope, "[align: 17.53pt]", func(vec![Z(Size::pt(17.53))], vec![]));
|
||||
test_scoped(&scope, "[align: 2.4in]", func(vec![Z(Size::inches(2.4))], vec![]));
|
||||
test_scoped(&scope, "[align: true, 10mm, left, \"hi, there\"]",
|
||||
func(vec![B(true), Z(Size::mm(10.0)), I("left"), S("hi, there")], vec![]));
|
||||
|
||||
test_scoped(&scope, "[align: right=true]", func(vec![], vec![("right", B(true))]));
|
||||
test_scoped(&scope, "[align: flow = horizontal]",
|
||||
func(vec![], vec![("flow", I("horizontal"))]));
|
||||
test_scoped(&scope, "[align: x=1cm, y=20mm]",
|
||||
func(vec![], vec![("x", Z(Size::cm(1.0))), ("y", Z(Size::mm(20.0)))]));
|
||||
test_scoped(&scope, "[align: x=5.14,a, \"b\", c=me,d=you]",
|
||||
func(vec![I("a"), S("b")], vec![("x", N(5.14)), ("c", I("me")), ("d", I("you"))]));
|
||||
}
|
||||
|
||||
/// Parse comments (line and block).
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn parse_comments() {
|
||||
let mut scope = Scope::new();
|
||||
scope.add::<BodylessFn>("test");
|
||||
scope.add::<TreeFn>("func");
|
||||
|
||||
test_scoped(&scope, "Text\n// Comment\n More text",
|
||||
tree! [ T("Text"), S, T("More"), S, T("text") ]);
|
||||
test_scoped(&scope, "[test/*world*/]",
|
||||
tree! [ F(func! {}) ]);
|
||||
test_scoped(&scope, "[test/*]*/]",
|
||||
tree! [ F(func! {}) ]);
|
||||
}
|
||||
|
||||
/// Test if escaped, but unbalanced parens are correctly parsed.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn parse_unbalanced_body_parens() {
|
||||
let mut scope = Scope::new();
|
||||
scope.add::<TreeFn>("code");
|
||||
|
||||
test_scoped(&scope, r"My [code][Close \]] end", tree! [
|
||||
T("My"), S, F(func! { body: tree! [ T("Close"), S, T("]") ] }), S, T("end")
|
||||
]);
|
||||
test_scoped(&scope, r"My [code][\[ Open] end", tree! [
|
||||
T("My"), S, F(func! { body: tree! [ T("["), S, T("Open") ] }), S, T("end")
|
||||
]);
|
||||
test_scoped(&scope, r"My [code][Open \] and \[ close]end", tree! [
|
||||
T("My"), S, F(func! { body:
|
||||
tree! [ T("Open"), S, T("]"), S, T("and"), S, T("["), S, T("close") ]
|
||||
}), T("end")
|
||||
]);
|
||||
}
|
||||
|
||||
/// Tests if the parser handles non-ASCII stuff correctly.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn parse_unicode() {
|
||||
let mut scope = Scope::new();
|
||||
scope.add::<BodylessFn>("func");
|
||||
scope.add::<TreeFn>("bold");
|
||||
|
||||
test_scoped(&scope, "[func] ⺐.", tree! [ F(func! {}), S, T("⺐.") ]);
|
||||
test_scoped(&scope, "[bold][Hello 🌍!]", tree! [
|
||||
F(func! { body: tree! [ T("Hello"), S, T("🌍!") ] })
|
||||
]);
|
||||
}
|
||||
|
||||
/// Tests whether spans get calculated correctly.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn parse_spans() {
|
||||
fn test_span(src: &str, correct: Vec<(usize, usize, usize, usize)>) {
|
||||
let mut scope = Scope::new();
|
||||
scope.add::<TreeFn>("hello");
|
||||
let tree = parse(src, ParseContext { scope: &scope }).unwrap();
|
||||
let spans = tree.nodes.into_iter()
|
||||
.map(|node| {
|
||||
let Span { start, end } = node.span;
|
||||
(start.line, start.column, end.line, end.column)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(spans, correct);
|
||||
}
|
||||
|
||||
test_span("hello world", vec![(1, 0, 1, 5), (1, 5, 1, 6), (1, 6, 1, 11)]);
|
||||
test_span("p1\n \np2", vec![(1, 0, 1, 2), (1, 2, 2, 2), (3, 0, 3, 2)]);
|
||||
|
||||
let src = "func\n [hello: pos, other][body\r\n _🌍_\n]";
|
||||
test_span(src, vec![
|
||||
(1, 0, 1, 4),
|
||||
(1, 4, 2, 1),
|
||||
(2, 1, 4, 1)
|
||||
]);
|
||||
}
|
||||
|
||||
/// Tests whether errors get reported correctly.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn parse_errors() {
|
||||
let mut scope = Scope::new();
|
||||
scope.add::<TreeFn>("hello");
|
||||
|
||||
test_err("No functions here]", "unexpected closing bracket");
|
||||
test_err_scoped(&scope, "[hello][world", "expected closing bracket");
|
||||
test_err("[hello world", "expected arguments or closing bracket");
|
||||
test_err("[ no^name][Why?]", "invalid identifier: `no^name`");
|
||||
test_err("Hello */", "unexpected end of block comment");
|
||||
}
|
||||
|
||||
/// Tests syntax highlighting.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn test_highlighting() {
|
||||
use ColorToken::{Bracket as B, FuncName as F, *};
|
||||
|
||||
let mut scope = Scope::new();
|
||||
scope.add::<BodylessFn>("func");
|
||||
scope.add::<TreeFn>("tree");
|
||||
|
||||
test_color(&scope, "[func]", vec![(0, 1, B), (1, 5, F), (5, 6, B)]);
|
||||
test_color(&scope, "[func: 12pt]", vec![
|
||||
(0, 1, B), (1, 5, F), (5, 6, Colon), (7, 11, ExprSize), (11, 12, B)
|
||||
]);
|
||||
test_color(&scope, "[func: x=25.3, y=\"hi\"]", vec![
|
||||
(0, 1, B), (1, 5, F), (5, 6, Colon),
|
||||
(7, 8, KeyArg), (8, 9, Equals), (9, 13, ExprNumber),
|
||||
(13, 14, Comma),
|
||||
(15, 16, KeyArg), (16, 17, Equals), (17, 21, ExprStr),
|
||||
(21, 22, B),
|
||||
]);
|
||||
|
||||
test_color(&scope, "Hello [tree][With [func: 3]]", vec![
|
||||
(6, 7, B), (7, 11, F), (11, 12, B),
|
||||
(12, 13, B), (18, 19, B)
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
@ -45,8 +45,6 @@ impl Span {
|
||||
}
|
||||
|
||||
pub fn merge(a: Span, b: Span) -> Span {
|
||||
let start = a.start.min(b.start);
|
||||
|
||||
Span {
|
||||
start: a.start.min(b.start),
|
||||
end: a.end.max(b.end),
|
||||
|
@ -1,88 +1,87 @@
|
||||
//! Tokenization of source code.
|
||||
|
||||
use std::str::CharIndices;
|
||||
use smallvec::SmallVec;
|
||||
use std::iter::Peekable;
|
||||
use std::str::Chars;
|
||||
|
||||
use super::*;
|
||||
use Token::*;
|
||||
use State::*;
|
||||
|
||||
|
||||
/// Builds an iterator over the tokens of the source code.
|
||||
pub fn tokenize(src: &str) -> Tokens {
|
||||
Tokens::new(src)
|
||||
}
|
||||
|
||||
/// An iterator over the tokens of source code.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Tokens<'s> {
|
||||
src: &'s str,
|
||||
chars: PeekableChars<'s>,
|
||||
state: TokensState,
|
||||
stack: SmallVec<[TokensState; 1]>,
|
||||
line: usize,
|
||||
line_start_index: usize,
|
||||
/// A minimal semantic entity of source code.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Token<'s> {
|
||||
/// One or more whitespace characters. The contained `usize` denotes the
|
||||
/// number of newlines that were contained in the whitespace.
|
||||
Whitespace(usize),
|
||||
|
||||
/// A line comment with inner string contents `//<&'s str>\n`.
|
||||
LineComment(&'s str),
|
||||
/// A block comment with inner string contents `/*<&'s str>*/`. The comment
|
||||
/// can contain nested block comments.
|
||||
BlockComment(&'s str),
|
||||
/// An erroneous `*/` without an opening block comment.
|
||||
StarSlash,
|
||||
|
||||
/// A left bracket: `[`.
|
||||
LeftBracket,
|
||||
/// A right bracket: `]`.
|
||||
RightBracket,
|
||||
|
||||
/// A left parenthesis in a function header: `(`.
|
||||
LeftParen,
|
||||
/// A right parenthesis in a function header: `)`.
|
||||
RightParen,
|
||||
/// A left brace in a function header: `{`.
|
||||
LeftBrace,
|
||||
/// A right brace in a function header: `}`.
|
||||
RightBrace,
|
||||
|
||||
/// A colon in a function header: `:`.
|
||||
Colon,
|
||||
/// A comma in a function header: `:`.
|
||||
Comma,
|
||||
/// An equals sign in a function header: `=`.
|
||||
Equals,
|
||||
|
||||
/// An expression in a function header.
|
||||
Expr(Expression),
|
||||
|
||||
/// A star in body-text.
|
||||
Star,
|
||||
/// An underscore in body-text.
|
||||
Underscore,
|
||||
/// A backtick in body-text.
|
||||
Backtick,
|
||||
|
||||
/// Any other consecutive string.
|
||||
Text(&'s str),
|
||||
}
|
||||
|
||||
/// An iterator over the tokens of a string of source code.
|
||||
pub struct Tokens<'s> {
|
||||
src: &'s str,
|
||||
chars: Characters<'s>,
|
||||
state: State,
|
||||
stack: Vec<State>,
|
||||
}
|
||||
|
||||
/// The state the tokenizer is in.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
enum TokensState {
|
||||
/// The base state if there is nothing special we are in.
|
||||
enum State {
|
||||
Header,
|
||||
StartBody,
|
||||
Body,
|
||||
/// Inside a function header. Here colons and equal signs get parsed
|
||||
/// as distinct tokens rather than text.
|
||||
Function,
|
||||
/// We expect either the end of the function or the beginning of the body.
|
||||
MaybeBody,
|
||||
}
|
||||
|
||||
impl<'s> Tokens<'s> {
|
||||
/// Create a new token stream from source code.
|
||||
pub fn new(src: &'s str) -> Tokens<'s> {
|
||||
Tokens {
|
||||
src,
|
||||
chars: PeekableChars::new(src),
|
||||
state: TokensState::Body,
|
||||
stack: SmallVec::new(),
|
||||
line: 1,
|
||||
line_start_index: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// The index of the first character of the next token in the source string.
|
||||
pub fn string_index(&self) -> usize {
|
||||
self.chars.string_index()
|
||||
}
|
||||
|
||||
/// Go to a new position in the underlying string.
|
||||
pub fn set_string_index(&mut self, index: usize) {
|
||||
self.chars.set_string_index(index);
|
||||
}
|
||||
|
||||
/// The current position in the source.
|
||||
pub fn get_position(&self) -> Position {
|
||||
self.line_position(self.string_index())
|
||||
}
|
||||
|
||||
/// Advance the iterator by one step.
|
||||
fn advance(&mut self) {
|
||||
self.chars.next();
|
||||
}
|
||||
|
||||
/// Switch to the given state.
|
||||
fn switch(&mut self, state: TokensState) {
|
||||
self.stack.push(self.state);
|
||||
self.state = state;
|
||||
}
|
||||
|
||||
/// Go back to the top-of-stack state.
|
||||
fn unswitch(&mut self) {
|
||||
self.state = self.stack.pop().unwrap_or(TokensState::Body);
|
||||
}
|
||||
|
||||
/// The `Position` with line and column for a string index.
|
||||
fn line_position(&self, index: usize) -> Position {
|
||||
Position {
|
||||
line: self.line,
|
||||
column: index - self.line_start_index,
|
||||
chars: Characters::new(src),
|
||||
state: State::Body,
|
||||
stack: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -90,455 +89,281 @@ impl<'s> Tokens<'s> {
|
||||
impl<'s> Iterator for Tokens<'s> {
|
||||
type Item = Spanned<Token<'s>>;
|
||||
|
||||
/// Advance the iterator, return the next token or nothing.
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
use TokensState as TS;
|
||||
/// Parse the next token in the source code.
|
||||
fn next(&mut self) -> Option<Spanned<Token<'s>>> {
|
||||
let start = self.chars.position();
|
||||
let first = self.chars.next()?;
|
||||
let second = self.chars.peek();
|
||||
|
||||
// Go to the body state if the function has a body or return to the top-of-stack
|
||||
// state.
|
||||
if self.state == TS::MaybeBody {
|
||||
if let Some((index, '[')) = self.chars.peek() {
|
||||
self.advance();
|
||||
self.state = TS::Body;
|
||||
let span = Span::at(self.line_position(index));
|
||||
return Some(Spanned::new(Token::LeftBracket, span));
|
||||
} else {
|
||||
self.unswitch();
|
||||
}
|
||||
}
|
||||
let token = match first {
|
||||
// Comments.
|
||||
'/' if second == Some('/') => self.parse_line_comment(),
|
||||
'/' if second == Some('*') => self.parse_block_comment(),
|
||||
'*' if second == Some('/') => { self.eat(); StarSlash }
|
||||
|
||||
// Take the next char and peek at the one behind.
|
||||
let (pos, next) = self.chars.next()?;
|
||||
let afterwards = self.chars.peekc();
|
||||
// Whitespace.
|
||||
c if c.is_whitespace() => self.parse_whitespace(c),
|
||||
|
||||
/// The index at which the line ended, if it did.
|
||||
let mut eol = None;
|
||||
|
||||
let token = match next {
|
||||
// Functions
|
||||
'[' => {
|
||||
self.switch(TS::Function);
|
||||
Token::LeftBracket
|
||||
}
|
||||
// Functions.
|
||||
'[' => { self.set_state(Header); LeftBracket }
|
||||
']' => {
|
||||
if self.state == TS::Function {
|
||||
self.state = TS::MaybeBody;
|
||||
if self.state == Header && second == Some('[') {
|
||||
self.state = StartBody;
|
||||
} else {
|
||||
self.unswitch();
|
||||
self.pop_state();
|
||||
}
|
||||
|
||||
Token::RightBracket
|
||||
RightBracket
|
||||
}
|
||||
|
||||
// Line comment
|
||||
'/' if afterwards == Some('/') => {
|
||||
let start = self.string_index() + 1;
|
||||
// Syntactic elements in function headers.
|
||||
'(' if self.state == Header => LeftParen,
|
||||
')' if self.state == Header => RightParen,
|
||||
'{' if self.state == Header => LeftBrace,
|
||||
'}' if self.state == Header => RightBrace,
|
||||
':' if self.state == Header => Colon,
|
||||
',' if self.state == Header => Comma,
|
||||
'=' if self.state == Header => Equals,
|
||||
|
||||
while let Some(c) = self.chars.peekc() {
|
||||
if is_newline_char(c) {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
// String values.
|
||||
'"' if self.state == Header => self.parse_string(),
|
||||
|
||||
let end = self.string_index();
|
||||
Token::LineComment(&self.src[start..end])
|
||||
}
|
||||
// Style toggles.
|
||||
'*' if self.state == Body => Star,
|
||||
'_' if self.state == Body => Underscore,
|
||||
'`' if self.state == Body => Backtick,
|
||||
|
||||
// Block comment
|
||||
'/' if afterwards == Some('*') => {
|
||||
let start = self.string_index() + 1;
|
||||
let mut nested = 0;
|
||||
// An escaped thing.
|
||||
'\\' => self.parse_escaped(),
|
||||
|
||||
while let Some((_, c)) = self.chars.next() {
|
||||
let after = self.chars.peekc();
|
||||
match (c, after) {
|
||||
('*', Some('/')) if nested == 0 => {
|
||||
self.advance();
|
||||
break;
|
||||
}
|
||||
('/', Some('*')) => {
|
||||
self.advance();
|
||||
nested += 1
|
||||
}
|
||||
('*', Some('/')) => {
|
||||
self.advance();
|
||||
nested -= 1
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let end = self.string_index() - 2;
|
||||
Token::BlockComment(&self.src[start..end])
|
||||
}
|
||||
|
||||
// Unexpected end of block comment
|
||||
'*' if afterwards == Some('/') => {
|
||||
self.advance();
|
||||
Token::StarSlash
|
||||
}
|
||||
|
||||
// Whitespace
|
||||
' ' | '\t' => {
|
||||
while let Some(c) = self.chars.peekc() {
|
||||
match c {
|
||||
' ' | '\t' => self.advance(),
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
Token::Space
|
||||
}
|
||||
|
||||
// Newlines
|
||||
'\r' if afterwards == Some('\n') => {
|
||||
self.advance();
|
||||
eol = Some(pos + "\r\n".len());
|
||||
Token::Newline
|
||||
}
|
||||
c if is_newline_char(c) => {
|
||||
eol = Some(pos + c.len_utf8());
|
||||
Token::Newline
|
||||
}
|
||||
|
||||
// Star/Underscore/Backtick in bodies
|
||||
'*' if self.state == TS::Body => Token::Star,
|
||||
'_' if self.state == TS::Body => Token::Underscore,
|
||||
'`' if self.state == TS::Body => Token::Backtick,
|
||||
|
||||
// Context sensitive operators in headers
|
||||
':' if self.state == TS::Function => Token::Colon,
|
||||
'=' if self.state == TS::Function => Token::Equals,
|
||||
',' if self.state == TS::Function => Token::Comma,
|
||||
|
||||
// A string value.
|
||||
'"' if self.state == TS::Function => {
|
||||
let start = self.string_index();
|
||||
let mut end = start;
|
||||
let mut escaped = false;
|
||||
|
||||
while let Some((index, c)) = self.chars.next() {
|
||||
end = index;
|
||||
if c == '"' && !escaped {
|
||||
break;
|
||||
}
|
||||
|
||||
escaped = c == '\\';
|
||||
}
|
||||
|
||||
Token::Quoted(&self.src[start..end])
|
||||
}
|
||||
|
||||
// Escaping
|
||||
'\\' => {
|
||||
if let Some((index, c)) = self.chars.peek() {
|
||||
let escapable = match c {
|
||||
'[' | ']' | '\\' | '*' | '_' | '`' | ':' | '=' | ',' | '/' => true,
|
||||
// Expressions or just strings.
|
||||
c => {
|
||||
let word = self.read_string_until(|n| {
|
||||
match n {
|
||||
c if c.is_whitespace() => true,
|
||||
'\\' | '[' | ']' | '*' | '_' | '`' | ':' | '=' |
|
||||
',' | '"' | '/' => true,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
if escapable {
|
||||
self.advance();
|
||||
Token::Text(&self.src[index..index + c.len_utf8()])
|
||||
} else {
|
||||
Token::Text("\\")
|
||||
}
|
||||
}, false, -(c.len_utf8() as isize), 0);
|
||||
|
||||
if self.state == Header {
|
||||
self.parse_expr(word)
|
||||
} else {
|
||||
Token::Text("\\")
|
||||
Text(word)
|
||||
}
|
||||
}
|
||||
|
||||
// Normal text
|
||||
_ => {
|
||||
// Find out when the word ends.
|
||||
while let Some((_, c)) = self.chars.peek() {
|
||||
let second = self.chars.peekn(1).map(|p| p.1);
|
||||
|
||||
// Whether the next token is still from the text or not.
|
||||
let continues = match c {
|
||||
'[' | ']' | '\\' => false,
|
||||
'*' | '_' | '`' if self.state == TS::Body => false,
|
||||
':' | '=' | ',' | '"' if self.state == TS::Function => false,
|
||||
|
||||
'/' => second != Some('/') && second != Some('*'),
|
||||
'*' => second != Some('/'),
|
||||
|
||||
' ' | '\t' => false,
|
||||
c if is_newline_char(c) => false,
|
||||
|
||||
_ => true,
|
||||
};
|
||||
|
||||
if !continues {
|
||||
break;
|
||||
}
|
||||
|
||||
self.advance();
|
||||
}
|
||||
|
||||
let end = self.string_index();
|
||||
Token::Text(&self.src[pos..end])
|
||||
}
|
||||
};
|
||||
|
||||
let start = self.line_position(pos);
|
||||
let end = self.get_position();
|
||||
let span = Span::new(start, end);
|
||||
let end = self.chars.position();
|
||||
let span = Span { start, end };
|
||||
|
||||
if let Some(index) = eol {
|
||||
self.line += 1;
|
||||
self.line_start_index = index;
|
||||
}
|
||||
|
||||
Some(Spanned::new(token, span))
|
||||
Some(Spanned { v: token, span })
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether this character is a newline (or starts one).
|
||||
pub(crate) fn is_newline_char(character: char) -> bool {
|
||||
impl<'s> Tokens<'s> {
|
||||
fn parse_line_comment(&mut self) -> Token<'s> {
|
||||
LineComment(self.read_string_until(is_newline_char, false, 1, 0))
|
||||
}
|
||||
|
||||
fn parse_block_comment(&mut self) -> Token<'s> {
|
||||
enum Last { Slash, Star, Other }
|
||||
use Last::*;
|
||||
|
||||
self.eat();
|
||||
|
||||
let mut depth = 0;
|
||||
let mut last = Last::Other;
|
||||
|
||||
// Find the first `*/` that does not correspond to a nested `/*`.
|
||||
// Remove the last two bytes to obtain the raw inner text without `*/`.
|
||||
BlockComment(self.read_string_until(|n| {
|
||||
match n {
|
||||
'/' => match last {
|
||||
Star if depth == 0 => return true,
|
||||
Star => depth -= 1,
|
||||
_ => last = Slash
|
||||
}
|
||||
'*' => match last {
|
||||
Slash => depth += 1,
|
||||
_ => last = Star,
|
||||
}
|
||||
_ => last = Other,
|
||||
}
|
||||
|
||||
false
|
||||
}, true, 0, -2))
|
||||
}
|
||||
|
||||
fn parse_whitespace(&mut self, c: char) -> Token<'s> {
|
||||
let mut newlines = if is_newline_char(c) { 1 } else { 0 };
|
||||
let mut last = c;
|
||||
|
||||
self.read_string_until(|n| {
|
||||
if is_newline_char(n) && !(last == '\r' && n == '\n') {
|
||||
newlines += 1;
|
||||
}
|
||||
|
||||
last = n;
|
||||
!n.is_whitespace()
|
||||
}, false, 0, 0);
|
||||
|
||||
Whitespace(newlines)
|
||||
}
|
||||
|
||||
fn parse_string(&mut self) -> Token<'s> {
|
||||
let mut escaped = false;
|
||||
Expr(Expression::Str(self.read_string_until(|n| {
|
||||
if n == '"' && !escaped {
|
||||
return true;
|
||||
} else if n == '\\' {
|
||||
escaped = !escaped;
|
||||
} else {
|
||||
escaped = false;
|
||||
}
|
||||
|
||||
false
|
||||
}, true, 0, -1).to_string()))
|
||||
}
|
||||
|
||||
fn parse_escaped(&mut self) -> Token<'s> {
|
||||
fn is_escapable(c: char) -> bool {
|
||||
match c {
|
||||
'\\' | '[' | ']' | '*' | '_' | '`' | '/' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
let c = self.chars.peek().unwrap_or('n');
|
||||
if self.state == Body && is_escapable(c) {
|
||||
let index = self.chars.index();
|
||||
self.eat();
|
||||
Text(&self.src[index .. index + c.len_utf8()])
|
||||
} else {
|
||||
Text("\\")
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_expr(&mut self, word: &'s str) -> Token<'s> {
|
||||
if let Ok(b) = word.parse::<bool>() {
|
||||
Expr(Expression::Bool(b))
|
||||
} else if let Ok(num) = word.parse::<f64>() {
|
||||
Expr(Expression::Num(num))
|
||||
} else if let Ok(num) = parse_percentage(word) {
|
||||
Expr(Expression::Num(num / 100.0))
|
||||
} else if let Ok(size) = word.parse::<Size>() {
|
||||
Expr(Expression::Size(size))
|
||||
} else if let Some(ident) = Ident::new(word) {
|
||||
Expr(Expression::Ident(ident))
|
||||
} else {
|
||||
Text(word)
|
||||
}
|
||||
}
|
||||
|
||||
fn read_string_until<F>(
|
||||
&mut self,
|
||||
mut f: F,
|
||||
eat_match: bool,
|
||||
offset_start: isize,
|
||||
offset_end: isize,
|
||||
) -> &'s str where F: FnMut(char) -> bool {
|
||||
let start = ((self.chars.index() as isize) + offset_start) as usize;
|
||||
let mut matched = false;
|
||||
|
||||
while let Some(c) = self.chars.peek() {
|
||||
if f(c) {
|
||||
matched = true;
|
||||
if eat_match {
|
||||
self.chars.next();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
self.chars.next();
|
||||
}
|
||||
|
||||
let mut end = self.chars.index();
|
||||
if matched {
|
||||
end = ((end as isize) + offset_end) as usize;
|
||||
}
|
||||
|
||||
&self.src[start .. end]
|
||||
}
|
||||
|
||||
fn set_state(&mut self, state: State) {
|
||||
self.stack.push(self.state);
|
||||
self.state = state;
|
||||
}
|
||||
|
||||
fn pop_state(&mut self) {
|
||||
self.state = self.stack.pop().unwrap_or(Body);
|
||||
}
|
||||
|
||||
fn eat(&mut self) {
|
||||
self.chars.next();
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_percentage(word: &str) -> Result<f64, ()> {
|
||||
if word.ends_with('%') {
|
||||
word[.. word.len() - 1].parse::<f64>().map_err(|_| ())
|
||||
} else {
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether this character denotes a newline.
|
||||
fn is_newline_char(character: char) -> bool {
|
||||
match character {
|
||||
'\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
|
||||
// Line Feed, Vertical Tab, Form Feed, Carriage Return.
|
||||
'\x0A' ..= '\x0D' => true,
|
||||
// Next Line, Line Separator, Paragraph Separator.
|
||||
'\u{0085}' | '\u{2028}' | '\u{2029}' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// A (index, char) iterator with double lookahead.
|
||||
#[derive(Debug, Clone)]
|
||||
struct PeekableChars<'s> {
|
||||
string: &'s str,
|
||||
chars: CharIndices<'s>,
|
||||
peeked: SmallVec<[Option<(usize, char)>; 2]>,
|
||||
base: usize,
|
||||
struct Characters<'s> {
|
||||
iter: Peekable<Chars<'s>>,
|
||||
position: Position,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl<'s> PeekableChars<'s> {
|
||||
/// Create a new iterator from a string.
|
||||
fn new(string: &'s str) -> PeekableChars<'s> {
|
||||
PeekableChars {
|
||||
string,
|
||||
chars: string.char_indices(),
|
||||
peeked: SmallVec::new(),
|
||||
base: 0,
|
||||
impl<'s> Characters<'s> {
|
||||
fn new(src: &'s str) -> Characters<'s> {
|
||||
Characters {
|
||||
iter: src.chars().peekable(),
|
||||
position: Position::new(0, 0),
|
||||
index: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Peek at the next element.
|
||||
fn peek(&mut self) -> Option<(usize, char)> {
|
||||
self.peekn(0)
|
||||
}
|
||||
fn next(&mut self) -> Option<char> {
|
||||
let c = self.iter.next()?;
|
||||
let len = c.len_utf8();
|
||||
|
||||
/// Peek at the char of the next element.
|
||||
fn peekc(&mut self) -> Option<char> {
|
||||
self.peekn(0).map(|p| p.1)
|
||||
}
|
||||
self.index += len;
|
||||
|
||||
/// Peek at the element after the next element.
|
||||
fn peekn(&mut self, n: usize) -> Option<(usize, char)> {
|
||||
while self.peeked.len() <= n {
|
||||
let next = self.next_inner();
|
||||
self.peeked.push(next);
|
||||
if is_newline_char(c) && !(c == '\r' && self.peek() == Some('\n')) {
|
||||
self.position.line += 1;
|
||||
self.position.column = 0;
|
||||
} else {
|
||||
self.position.column += len;
|
||||
}
|
||||
|
||||
self.peeked[n]
|
||||
Some(c)
|
||||
}
|
||||
|
||||
/// Return the next value of the inner iterator mapped with the offset.
|
||||
fn next_inner(&mut self) -> Option<(usize, char)> {
|
||||
self.chars.next().map(|(i, c)| (self.base + i, c))
|
||||
fn peek(&mut self) -> Option<char> {
|
||||
self.iter.peek().copied()
|
||||
}
|
||||
|
||||
fn string_index(&self) -> usize {
|
||||
fn index(&self) -> usize {
|
||||
self.index
|
||||
}
|
||||
|
||||
fn set_string_index(&mut self, index: usize) {
|
||||
self.chars = self.string[index..].char_indices();
|
||||
self.base = index;
|
||||
self.index = 0;
|
||||
self.peeked.clear();
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for PeekableChars<'_> {
|
||||
type Item = (usize, char);
|
||||
|
||||
fn next(&mut self) -> Option<(usize, char)> {
|
||||
let next = if !self.peeked.is_empty() {
|
||||
self.peeked.remove(0)
|
||||
} else {
|
||||
self.next_inner()
|
||||
};
|
||||
|
||||
if let Some((index, c)) = next {
|
||||
self.index = index + c.len_utf8();
|
||||
}
|
||||
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use Token::{
|
||||
Backtick as TB, BlockComment as BC, Colon as C, Equals as E, LeftBracket as L,
|
||||
LineComment as LC, Newline as N, Quoted as Q, RightBracket as R, Space as S, Star as TS,
|
||||
StarSlash as SS, Text as T, Underscore as TU,
|
||||
};
|
||||
|
||||
/// Test if the source code tokenizes to the tokens.
|
||||
fn test(src: &str, tokens: Vec<Token>) {
|
||||
assert_eq!(Tokens::new(src)
|
||||
.map(|token| token.v)
|
||||
.collect::<Vec<_>>(), tokens);
|
||||
}
|
||||
|
||||
/// Test if the tokens of the source code have the correct spans.
|
||||
fn test_span(src: &str, spans: Vec<(usize, usize, usize, usize)>) {
|
||||
assert_eq!(Tokens::new(src)
|
||||
.map(|token| {
|
||||
let Span { start, end } = token.span;
|
||||
(start.line, start.column, end.line, end.column)
|
||||
})
|
||||
.collect::<Vec<_>>(), spans);
|
||||
}
|
||||
|
||||
/// Tokenizes the basic building blocks.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn tokenize_base() {
|
||||
test("", vec![]);
|
||||
test("Hallo", vec![T("Hallo")]);
|
||||
test("[", vec![L]);
|
||||
test("]", vec![R]);
|
||||
test("*", vec![TS]);
|
||||
test("_", vec![TU]);
|
||||
test("`", vec![TB]);
|
||||
test("\n", vec![N]);
|
||||
}
|
||||
|
||||
/// This test looks if LF- and CRLF-style newlines get both identified correctly.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn tokenize_whitespace_newlines() {
|
||||
test(" \t", vec![S]);
|
||||
test("First line\r\nSecond line\nThird line\n", vec![
|
||||
T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
|
||||
T("Third"), S, T("line"), N
|
||||
]);
|
||||
test("Hello \n ", vec![T("Hello"), S, N, S]);
|
||||
test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
|
||||
}
|
||||
|
||||
/// Tests if escaping with backslash works as it should.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn tokenize_escape() {
|
||||
test(r"\[", vec![T("[")]);
|
||||
test(r"\]", vec![T("]")]);
|
||||
test(r"\**", vec![T("*"), TS]);
|
||||
test(r"\*", vec![T("*")]);
|
||||
test(r"\__", vec![T("_"), TU]);
|
||||
test(r"\_", vec![T("_")]);
|
||||
test(r"\hello", vec![T("\\"), T("hello")]);
|
||||
}
|
||||
|
||||
/// Tests if escaped strings work.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn tokenize_quoted() {
|
||||
test(r#"[align: "hello\"world"]"#, vec![L, T("align"), C, S, Q(r#"hello\"world"#), R]);
|
||||
}
|
||||
|
||||
/// Tokenizes some more realistic examples.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn tokenize_examples() {
|
||||
test(r"
|
||||
[function][
|
||||
Test [italic][example]!
|
||||
]
|
||||
", vec![
|
||||
N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
|
||||
T("example"), R, T("!"), N, S, R, N, S
|
||||
]);
|
||||
|
||||
test(r"
|
||||
[page: size=A4]
|
||||
[font: size=12pt]
|
||||
|
||||
Das ist ein Beispielsatz mit *fetter* Schrift.
|
||||
", vec![
|
||||
N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
|
||||
L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
|
||||
T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
|
||||
TS, T("fetter"), TS, S, T("Schrift."), N, S
|
||||
]);
|
||||
}
|
||||
|
||||
/// This test checks whether the colon and equals symbols get parsed correctly depending on the
|
||||
/// context: Either in a function header or in a body.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn tokenize_symbols_context() {
|
||||
test("[func: key=value][Answer: 7]", vec![
|
||||
L, T("func"), C, S, T("key"), E, T("value"), R, L,
|
||||
T("Answer:"), S, T("7"), R
|
||||
]);
|
||||
test("[[n: k=v]:x][:[=]]:=", vec![
|
||||
L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
|
||||
L, T(":"), L, E, R, R, T(":=")
|
||||
]);
|
||||
test("[hi: k=[func][body] v=1][hello]", vec![
|
||||
L, T("hi"), C, S, T("k"), E, L, T("func"), R, L, T("body"), R, S,
|
||||
T("v"), E, T("1"), R, L, T("hello"), R
|
||||
]);
|
||||
test("[func: __key__=value]", vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
|
||||
test("The /*[*/ answer: 7.", vec![T("The"), S, BC("["), S, T("answer:"), S, T("7.")]);
|
||||
}
|
||||
|
||||
/// Test if block and line comments get tokenized as expected.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn tokenize_comments() {
|
||||
test("These // Line comments.", vec![T("These"), S, LC(" Line comments.")]);
|
||||
test("This /* is */ a comment.", vec![T("This"), S, BC(" is "), S, T("a"), S, T("comment.")]);
|
||||
test("[Head/*of*/][Body]", vec![L, T("Head"), BC("of"), R, L, T("Body"), R]);
|
||||
test("/* Hey */ */", vec![BC(" Hey "), S, SS]);
|
||||
test("Hey\n// Yoo /*\n*/", vec![T("Hey"), N, LC(" Yoo /*"), N, SS]);
|
||||
test("/* My /* line // */ comment */", vec![BC(" My /* line // */ comment ")])
|
||||
}
|
||||
|
||||
/// This test has a special look at the underscore syntax.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn tokenize_underscores() {
|
||||
test("he_llo_world_ __ Now this_ is_ special!",
|
||||
vec![T("he"), TU, T("llo"), TU, T("world"), TU, S, TU, TU, S, T("Now"), S,
|
||||
T("this"), TU, S, T("is"), TU, S, T("special!")]);
|
||||
}
|
||||
|
||||
/// This test is for checking if non-ASCII characters get parsed correctly.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn tokenize_unicode() {
|
||||
test("[document][Hello 🌍!]", vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
|
||||
test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
|
||||
}
|
||||
|
||||
/// This test checks if all tokens have the correct spans.
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn tokenize_spans() {
|
||||
test_span("Hello World", vec![(1, 0, 1, 5), (1, 5, 1, 6), (1, 6, 1, 11)]);
|
||||
test_span("🌍_🎈", vec![(1, 0, 1, 4), (1, 4, 1, 5), (1, 5, 1, 9)]);
|
||||
test_span("hello\nworld", vec![(1, 0, 1, 5), (1, 5, 1, 6), (2, 0, 2, 5)]);
|
||||
test_span("[hello: world]", vec![
|
||||
(1, 0, 1, 1), (1, 1, 1, 6), (1, 6, 1, 7),
|
||||
(1, 7, 1, 8), (1, 8, 1, 13), (1, 13, 1, 14)
|
||||
]);
|
||||
fn position(&self) -> Position {
|
||||
self.position
|
||||
}
|
||||
}
|
||||
|
@ -1,9 +1,26 @@
|
||||
#![allow(unused_imports)]
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use typstc::size::Size;
|
||||
use typstc::syntax::*;
|
||||
use Token::{
|
||||
Space as S, Newline as N, LeftBracket as LB,
|
||||
RightBracket as RB, Text as T, *
|
||||
Whitespace as W,
|
||||
LineComment as LC, BlockComment as BC, StarSlash as SS,
|
||||
LeftBracket as LB, RightBracket as RB,
|
||||
LeftParen as LP, RightParen as RP,
|
||||
LeftBrace as LBR, RightBrace as RBR,
|
||||
Colon as CL, Comma as CM, Equals as EQ, Expr as E,
|
||||
Star as ST, Underscore as U, Backtick as B, Text as T,
|
||||
};
|
||||
|
||||
use Expression as Expr;
|
||||
fn ID(ident: &str) -> Token { E(Expr::Ident(Ident::new(ident.to_string()).unwrap())) }
|
||||
fn STR(ident: &str) -> Token { E(Expr::Str(ident.to_string())) }
|
||||
fn SIZE(size: Size) -> Token<'static> { E(Expr::Size(size)) }
|
||||
fn NUM(num: f64) -> Token<'static> { E(Expr::Num(num)) }
|
||||
fn BOOL(b: bool) -> Token<'static> { E(Expr::Bool(b)) }
|
||||
|
||||
|
||||
/// Parses the test syntax.
|
||||
macro_rules! tokens {
|
||||
($($src:expr =>($line:expr)=> $tokens:expr)*) => ({
|
||||
|
@ -1,78 +0,0 @@
|
||||
// Spaces, Newlines, Brackets.
|
||||
"" => []
|
||||
" " => [S]
|
||||
" " => [S]
|
||||
"\t" => [S]
|
||||
" \t" => [S]
|
||||
"\n" => [N]
|
||||
"\n " => [N, S]
|
||||
" \n" => [S, N]
|
||||
" \n " => [S, N, S]
|
||||
"[" => [LB]
|
||||
"]" => [RB]
|
||||
|
||||
// Header only tokens.
|
||||
"[:]" => [LB, Colon, RB]
|
||||
"[=]" => [LB, Equals, RB]
|
||||
"[,]" => [LB, Comma, RB]
|
||||
":" => [T(":")]
|
||||
"=" => [T("=")]
|
||||
"," => [T(",")]
|
||||
r#"["hi"]"# => [LB, Quoted("hi"), RB]
|
||||
r#""hi""# => [T(r#""hi""#)]
|
||||
|
||||
// Body only tokens.
|
||||
"_" => [Underscore]
|
||||
"*" => [Star]
|
||||
"`" => [Backtick]
|
||||
"[_]" => [LB, T("_"), RB]
|
||||
"[*]" => [LB, T("*"), RB]
|
||||
"[`]" => [LB, T("`"), RB]
|
||||
|
||||
// Comments.
|
||||
"//line" => [LineComment("line")]
|
||||
"/*block*/" => [BlockComment("block")]
|
||||
"*/" => [StarSlash]
|
||||
|
||||
// Plain text.
|
||||
"A" => [T("A")]
|
||||
"Hello" => [T("Hello")]
|
||||
"Hello-World" => [T("Hello-World")]
|
||||
r#"A"B"# => [T(r#"A"B"#)]
|
||||
"🌍" => [T("🌍")]
|
||||
|
||||
// Escapes.
|
||||
r"\[" => [T("[")]
|
||||
r"\]" => [T("]")]
|
||||
r"\\" => [T(r"\")]
|
||||
r"[\[]" => [LB, T("["), RB]
|
||||
r"[\]]" => [LB, T("]"), RB]
|
||||
r"[\\]" => [LB, T(r"\"), RB]
|
||||
r"\:" => [T(":")]
|
||||
r"\=" => [T("=")]
|
||||
r"\/" => [T("/")]
|
||||
r"[\:]" => [LB, T(":"), RB]
|
||||
r"[\=]" => [LB, T("="), RB]
|
||||
r"[\,]" => [LB, T(","), RB]
|
||||
r"\*" => [T("*")]
|
||||
r"\_" => [T("_")]
|
||||
r"\`" => [T("`")]
|
||||
r"[\*]" => [LB, T("*"), RB]
|
||||
r"[\_]" => [LB, T("_"), RB]
|
||||
r"[\`]" => [LB, T("`"), RB]
|
||||
|
||||
// Whitespace.
|
||||
"Hello World" => [T("Hello"), S, T("World")]
|
||||
"Hello World" => [T("Hello"), S, T("World")]
|
||||
"Hello \t World" => [T("Hello"), S, T("World")]
|
||||
|
||||
// Newline.
|
||||
"First\n" => [T("First"), N]
|
||||
"First \n" => [T("First"), S, N]
|
||||
"First\n " => [T("First"), N, S]
|
||||
"First \n " => [T("First"), S, N, S]
|
||||
"First\nSecond" => [T("First"), N, T("Second")]
|
||||
"First\r\nSecond" => [T("First"), N, T("Second")]
|
||||
"First \nSecond" => [T("First"), S, N, T("Second")]
|
||||
"First\n Second" => [T("First"), N, S, T("Second")]
|
||||
"First \n Second" => [T("First"), S, N, S, T("Second")]
|
62
tests/parsing/tokens.rs
Normal file
62
tests/parsing/tokens.rs
Normal file
@ -0,0 +1,62 @@
|
||||
// Whitespace.
|
||||
"" => []
|
||||
" " => [W(0)]
|
||||
" " => [W(0)]
|
||||
"\t" => [W(0)]
|
||||
" \t" => [W(0)]
|
||||
"\n" => [W(1)]
|
||||
"\n " => [W(1)]
|
||||
" \n" => [W(1)]
|
||||
" \n " => [W(1)]
|
||||
" \n\t \n " => [W(2)]
|
||||
"\r\n" => [W(1)]
|
||||
" \r\r\n \x0D" => [W(3)]
|
||||
"\n\r" => [W(2)]
|
||||
|
||||
// Comments.
|
||||
"a // bc\n " => [T("a"), W(0), LC(" bc"), W(1)]
|
||||
"a //a//b\n " => [T("a"), W(0), LC("a//b"), W(1)]
|
||||
"a //a//b\r\n" => [T("a"), W(0), LC("a//b"), W(1)]
|
||||
"a //a//b\n\nhello" => [T("a"), W(0), LC("a//b"), W(2), T("hello")]
|
||||
"/**/" => [BC("")]
|
||||
"_/*_/*a*/*/" => [U, BC("_/*a*/")]
|
||||
"/*/*/" => [BC("/*/")]
|
||||
"abc*/" => [T("abc"), SS]
|
||||
|
||||
// Header only tokens.
|
||||
"[" => [LB]
|
||||
"]" => [RB]
|
||||
"[(){}:=,]" => [LB, LP, RP, LBR, RBR, CL, EQ, CM, RB]
|
||||
"[a:b]" => [LB, ID("a"), CL, ID("b"), RB]
|
||||
"[🌓, 🌍,]" => [LB, T("🌓"), CM, W(0), T("🌍"), CM, RB]
|
||||
"[=]" => [LB, EQ, RB]
|
||||
"[,]" => [LB, CM, RB]
|
||||
"a: b" => [T("a"), T(":"), W(0), T("b")]
|
||||
"c=d, " => [T("c"), T("=d"), T(","), W(0)]
|
||||
r#"["hello\"world"]"# => [LB, STR(r#"hello\"world"#), RB]
|
||||
r#"["hi", 12pt]"# => [LB, STR("hi"), CM, W(0), SIZE(Size::pt(12.0)), RB]
|
||||
"\"hi\"" => [T("\"hi"), T("\"")]
|
||||
"[a: true, x=1]" => [LB, ID("a"), CL, W(0), BOOL(true), CM, W(0),
|
||||
ID("x"), EQ, NUM(1.0), RB]
|
||||
"[120%]" => [LB, NUM(1.2), RB]
|
||||
|
||||
// Body only tokens.
|
||||
"_*`" => [U, ST, B]
|
||||
"[_*`]" => [LB, T("_"), T("*"), T("`"), RB]
|
||||
"hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")]
|
||||
|
||||
// Escapes.
|
||||
r"\[" => [T("[")]
|
||||
r"\]" => [T("]")]
|
||||
r"\\" => [T(r"\")]
|
||||
r"\/" => [T("/")]
|
||||
r"\*" => [T("*")]
|
||||
r"\_" => [T("_")]
|
||||
r"\`" => [T("`")]
|
||||
|
||||
// Unescapable special symbols.
|
||||
r"\:" => [T(r"\"), T(":")]
|
||||
r"\=" => [T(r"\"), T("=")]
|
||||
r"[\:]" => [LB, T(r"\"), CL, RB]
|
||||
r"[\=]" => [LB, T(r"\"), EQ, RB]
|
||||
r"[\,]" => [LB, T(r"\"), CM, RB]
|
Loading…
Reference in New Issue
Block a user