Parser testing prototype 🥥

This commit is contained in:
Laurenz 2020-01-14 20:17:50 +01:00
parent dde69276d4
commit 15ad30555b
12 changed files with 698 additions and 504 deletions

View File

@ -9,9 +9,9 @@ build = "build.rs"
toddle = { path = "../toddle", default-features = false }
tide = { path = "../tide" }
byteorder = "1"
smallvec = "0.6.10"
unicode-xid = "0.1.0"
async-trait = "0.1.22"
smallvec = "1"
unicode-xid = "0.2"
async-trait = "0.1"
futures-executor = { version = "0.3", optional = true }
[features]

View File

@ -75,6 +75,8 @@ macro_rules! function {
parse($args:ident, $body:pat, $ctx:pat, $metadata:pat) $code:block
$($rest:tt)*
) => {
use $crate::func::prelude::*;
impl $crate::func::ParseFunc for $type {
type Meta = $meta;
@ -88,7 +90,8 @@ macro_rules! function {
let mut $args = args;
let val = $code;
if !$args.is_empty() {
error!(unexpected_argument);
return Err($crate::TypesetError
::with_message("unexpected arguments"));
}
Ok(val)
}
@ -109,6 +112,8 @@ macro_rules! function {
// (2-arg) Parse a layout-definition with all arguments.
(@layout $type:ident | layout($this:ident, $ctx:pat) $code:block) => {
use $crate::func::prelude::*;
impl LayoutFunc for $type {
fn layout<'a, 'life0, 'life1, 'async_trait>(
&'a $this,
@ -138,13 +143,13 @@ macro_rules! function {
macro_rules! parse {
(forbidden: $body:expr) => {
if $body.is_some() {
error!("unexpected body");
return Err($crate::TypesetError::with_message("unexpected body"));
}
};
(optional: $body:expr, $ctx:expr) => (
if let Some(body) = $body {
Some($crate::syntax::parse(body, $ctx))
Some($crate::syntax::parse(body, $ctx).0)
} else {
None
}
@ -152,9 +157,9 @@ macro_rules! parse {
(expected: $body:expr, $ctx:expr) => (
if let Some(body) = $body {
$crate::syntax::parse(body, $ctx)?
$crate::syntax::parse(body, $ctx).0
} else {
error!("expected body");
Err($crate::TypesetError::with_message("unexpected body"))
}
)
}

View File

@ -119,6 +119,7 @@ pub enum Command<'a> {
/// A map from identifiers to function parsers.
pub struct Scope {
parsers: HashMap<String, Box<Parser>>,
debug: Option<Box<Parser>>
}
/// A function which parses the source of a function into a function type which
@ -129,11 +130,30 @@ type Parser = dyn Fn(
ParseContext
) -> ParseResult<Box<dyn LayoutFunc>>;
fn make_parser<F>(metadata: <F as ParseFunc>::Meta) -> Box<Parser>
where F: ParseFunc + LayoutFunc + 'static {
Box::new(move |a, b, c| {
F::parse(a, b, c, metadata.clone())
.map(|f| Box::new(f) as Box<dyn LayoutFunc>)
})
}
impl Scope {
/// Create a new empty scope.
pub fn new() -> Scope {
Scope {
parsers: HashMap::new(),
debug: None,
}
}
/// Create a new scope with a debug parser that is invoked if not other
/// match is found.
pub fn with_debug<F>() -> Scope
where F: ParseFunc<Meta=()> + LayoutFunc + 'static {
Scope {
parsers: HashMap::new(),
debug: Some(make_parser::<F>(())),
}
}
@ -154,16 +174,14 @@ impl Scope {
where F: ParseFunc + LayoutFunc + 'static {
self.parsers.insert(
name.to_owned(),
Box::new(move |a, b, c| {
F::parse(a, b, c, metadata.clone())
.map(|f| Box::new(f) as Box<dyn LayoutFunc>)
})
make_parser::<F>(metadata),
);
}
/// Return the parser with the given name if there is one.
pub(crate) fn get_parser(&self, name: &str) -> Option<&Parser> {
self.parsers.get(name).map(|x| &**x)
.or(self.debug.as_ref().map(|x| &**x))
}
}

View File

@ -28,7 +28,7 @@ use toddle::Error as FontError;
use crate::func::Scope;
use crate::layout::{MultiLayout, LayoutResult};
use crate::syntax::{parse, SyntaxTree, ParseContext, Span, ParseResult};
use crate::syntax::{parse, SyntaxTree, Colorization, ErrorMap, ParseContext, Span};
use crate::style::{LayoutStyle, PageStyle, TextStyle};
#[macro_use]
@ -84,7 +84,7 @@ impl<'p> Typesetter<'p> {
}
/// Parse source code into a syntax tree.
pub fn parse(&self, src: &str) -> SyntaxTree {
pub fn parse(&self, src: &str) -> (SyntaxTree, Colorization, ErrorMap) {
let scope = Scope::with_std();
parse(src, ParseContext { scope: &scope })
}
@ -115,7 +115,7 @@ impl<'p> Typesetter<'p> {
/// Process source code directly into a layout.
pub async fn typeset(&self, src: &str) -> TypesetResult<MultiLayout> {
let tree = self.parse(src);
let tree = self.parse(src).0;
let layout = self.layout(&tree).await?;
Ok(layout)
}
@ -132,8 +132,8 @@ pub struct TypesetError {
impl TypesetError {
/// Create a new typesetting error.
pub fn with_message(message: String) -> TypesetError {
TypesetError { message, span: None }
pub fn with_message(message: impl Into<String>) -> TypesetError {
TypesetError { message: message.into(), span: None }
}
}

View File

@ -1,28 +1,3 @@
/// Entities which can be colored by syntax highlighting.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ColorToken {
Comment,
use super::*;
Bracket,
FuncName,
Colon,
Key,
Equals,
Comma,
Paren,
Brace,
ExprIdent,
ExprStr,
ExprNumber,
ExprSize,
ExprBool,
Bold,
Italic,
Monospace,
Invalid,
}

View File

@ -91,12 +91,6 @@ pub struct Object {
pub pairs: Vec<Pair>,
}
#[derive(Clone, PartialEq)]
pub struct Pair {
pub key: Spanned<Ident>,
pub value: Spanned<Expression>,
}
impl Object {
pub fn new() -> Object {
Object { pairs: vec![] }
@ -120,7 +114,7 @@ impl Display for Object {
if !first {
write!(f, ", ")?;
}
write!(f, "{}: {}", pair.key.v, pair.value.v)?;
write!(f, "{}", pair)?;
first = false;
}
@ -128,10 +122,23 @@ impl Display for Object {
}
}
#[derive(Clone, PartialEq)]
pub struct Pair {
pub key: Spanned<Ident>,
pub value: Spanned<Expression>,
}
impl Display for Pair {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{}: {}", self.key.v, self.value.v)
}
}
debug_display!(Ident);
debug_display!(Expression);
debug_display!(Tuple);
debug_display!(Object);
debug_display!(Pair);
/// Kinds of expressions.

View File

@ -14,3 +14,247 @@ pub_use_mod!(expr);
pub_use_mod!(tokens);
pub_use_mod!(parsing);
pub_use_mod!(span);
/// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> {
/// One or more whitespace characters. The contained `usize` denotes the
/// number of newlines that were contained in the whitespace.
Whitespace(usize),
/// A line comment with inner string contents `//<&'s str>\n`.
LineComment(&'s str),
/// A block comment with inner string contents `/*<&'s str>*/`. The comment
/// can contain nested block comments.
BlockComment(&'s str),
/// An erroneous `*/` without an opening block comment.
StarSlash,
/// A left bracket: `[`.
LeftBracket,
/// A right bracket: `]`.
RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
RightParen,
/// A left brace in a function header: `{`.
LeftBrace,
/// A right brace in a function header: `}`.
RightBrace,
/// A colon in a function header: `:`.
Colon,
/// A comma in a function header: `:`.
Comma,
/// An equals sign in a function header: `=`.
Equals,
/// An identifier in a function header: `center`.
ExprIdent(&'s str),
/// A quoted string in a function header: `"..."`.
ExprStr(&'s str),
/// A number in a function header: `3.14`.
ExprNumber(f64),
/// A size in a function header: `12pt`.
ExprSize(Size),
/// A boolean in a function header: `true | false`.
ExprBool(bool),
/// A star in body-text.
Star,
/// An underscore in body-text.
Underscore,
/// A backtick in body-text.
Backtick,
/// Any other consecutive string.
Text(&'s str),
}
/// A tree representation of source code.
#[derive(Debug, PartialEq)]
pub struct SyntaxTree {
pub nodes: Vec<Spanned<Node>>,
}
impl SyntaxTree {
/// Create an empty syntax tree.
pub fn new() -> SyntaxTree {
SyntaxTree { nodes: vec![] }
}
/// Add a node to the tree.
pub fn add(&mut self, node: Spanned<Node>) {
self.nodes.push(node);
}
}
/// A node in the syntax tree.
#[derive(Debug, PartialEq)]
pub enum Node {
/// A number of whitespace characters containing less than two newlines.
Space,
/// Whitespace characters with more than two newlines.
Newline,
/// Plain text.
Text(String),
/// Italics enabled / disabled.
ToggleItalic,
/// Bolder enabled / disabled.
ToggleBolder,
/// Monospace enabled / disabled.
ToggleMonospace,
/// A function invocation.
Func(FuncCall),
}
/// An invocation of a function.
#[derive(Debug)]
pub struct FuncCall(pub Box<dyn LayoutFunc>);
impl PartialEq for FuncCall {
fn eq(&self, other: &FuncCall) -> bool {
&self.0 == &other.0
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Colorization {
pub colors: Vec<Spanned<ColorToken>>,
}
/// Entities which can be colored by syntax highlighting.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ColorToken {
Comment,
Bracket,
FuncName,
Colon,
Key,
Equals,
Comma,
Paren,
Brace,
ExprIdent,
ExprStr,
ExprNumber,
ExprSize,
ExprBool,
Bold,
Italic,
Monospace,
Invalid,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct ErrorMap {
pub errors: Vec<Spanned<String>>,
}
#[derive(Debug)]
pub struct FuncHeader {
pub name: Spanned<Ident>,
pub args: FuncArgs,
}
#[derive(Debug)]
pub struct FuncArgs {
positional: Tuple,
keyword: Object,
}
impl FuncArgs {
fn new() -> FuncArgs {
FuncArgs {
positional: Tuple::new(),
keyword: Object::new(),
}
}
/// Add a positional argument.
pub fn add_pos(&mut self, item: Spanned<Expression>) {
self.positional.add(item);
}
/// Force-extract the first positional argument.
pub fn get_pos<E: ExpressionKind>(&mut self) -> ParseResult<E> {
expect(self.get_pos_opt())
}
/// Extract the first positional argument.
pub fn get_pos_opt<E: ExpressionKind>(&mut self) -> ParseResult<Option<E>> {
Ok(if !self.positional.items.is_empty() {
let spanned = self.positional.items.remove(0);
Some(E::from_expr(spanned)?)
} else {
None
})
}
/// Add a keyword argument.
pub fn add_key(&mut self, key: Spanned<Ident>, value: Spanned<Expression>) {
self.keyword.add(key, value);
}
/// Add a keyword argument from an existing pair.
pub fn add_key_pair(&mut self, pair: Pair) {
self.keyword.add_pair(pair);
}
/// Force-extract a keyword argument.
pub fn get_key<E: ExpressionKind>(&mut self, name: &str) -> ParseResult<E> {
expect(self.get_key_opt(name))
}
/// Extract a keyword argument.
pub fn get_key_opt<E: ExpressionKind>(&mut self, name: &str) -> ParseResult<Option<E>> {
self.keyword.pairs.iter()
.position(|p| p.key.v.0 == name)
.map(|index| {
let value = self.keyword.pairs.swap_remove(index).value;
E::from_expr(value)
})
.transpose()
}
/// Iterator over positional arguments.
pub fn iter_pos(&mut self) -> std::vec::IntoIter<Spanned<Expression>> {
let tuple = std::mem::replace(&mut self.positional, Tuple::new());
tuple.items.into_iter()
}
/// Iterator over all keyword arguments.
pub fn iter_keys(&mut self) -> std::vec::IntoIter<Pair> {
let object = std::mem::replace(&mut self.keyword, Object::new());
object.pairs.into_iter()
}
/// Clear the argument lists.
pub fn clear(&mut self) {
self.positional.items.clear();
self.keyword.pairs.clear();
}
/// Whether both the positional and keyword argument lists are empty.
pub fn is_empty(&self) -> bool {
self.positional.items.is_empty() && self.keyword.pairs.is_empty()
}
}
/// Extract the option expression kind from the option or return an error.
fn expect<E: ExpressionKind>(opt: ParseResult<Option<E>>) -> ParseResult<E> {
match opt {
Ok(Some(spanned)) => Ok(spanned),
Ok(None) => error!("expected {}", E::NAME),
Err(e) => Err(e),
}
}

View File

@ -1,147 +1,10 @@
use std::iter::Peekable;
use crate::func::Scope;
use super::*;
use Token::*;
/// A tree representation of source code.
#[derive(Debug, PartialEq)]
pub struct SyntaxTree {
pub nodes: Vec<Spanned<Node>>,
}
impl SyntaxTree {
/// Create an empty syntax tree.
pub fn new() -> SyntaxTree {
SyntaxTree { nodes: vec![] }
}
}
/// A node in the syntax tree.
#[derive(Debug, PartialEq)]
pub enum Node {
/// A number of whitespace characters containing less than two newlines.
Space,
/// Whitespace characters with more than two newlines.
Newline,
/// Plain text.
Text(String),
/// Italics enabled / disabled.
ToggleItalic,
/// Bolder enabled / disabled.
ToggleBolder,
/// Monospace enabled / disabled.
ToggleMonospace,
/// A function invocation.
Func(FuncCall),
}
/// An invocation of a function.
#[derive(Debug)]
pub struct FuncCall(pub Box<dyn LayoutFunc>);
impl PartialEq for FuncCall {
fn eq(&self, other: &FuncCall) -> bool {
&self.0 == &other.0
}
}
#[derive(Debug)]
pub struct FuncArgs {
positional: Tuple,
keyword: Object,
}
impl FuncArgs {
fn new() -> FuncArgs {
FuncArgs {
positional: Tuple::new(),
keyword: Object::new(),
}
}
/// Add a positional argument.
pub fn add_pos(&mut self, item: Spanned<Expression>) {
self.positional.add(item);
}
/// Force-extract the first positional argument.
pub fn get_pos<E: ExpressionKind>(&mut self) -> ParseResult<E> {
expect(self.get_pos_opt())
}
/// Extract the first positional argument.
pub fn get_pos_opt<E: ExpressionKind>(&mut self) -> ParseResult<Option<E>> {
Ok(if !self.positional.items.is_empty() {
let spanned = self.positional.items.remove(0);
Some(E::from_expr(spanned)?)
} else {
None
})
}
/// Add a keyword argument.
pub fn add_key(&mut self, key: Spanned<Ident>, value: Spanned<Expression>) {
self.keyword.add(key, value);
}
/// Add a keyword argument from an existing pair.
pub fn add_key_pair(&mut self, pair: Pair) {
self.keyword.add_pair(pair);
}
/// Force-extract a keyword argument.
pub fn get_key<E: ExpressionKind>(&mut self, name: &str) -> ParseResult<E> {
expect(self.get_key_opt(name))
}
/// Extract a keyword argument.
pub fn get_key_opt<E: ExpressionKind>(&mut self, name: &str) -> ParseResult<Option<E>> {
self.keyword.pairs.iter()
.position(|p| p.key.v.0 == name)
.map(|index| {
let value = self.keyword.pairs.swap_remove(index).value;
E::from_expr(value)
})
.transpose()
}
/// Iterator over positional arguments.
pub fn iter_pos(&mut self) -> std::vec::IntoIter<Spanned<Expression>> {
let tuple = std::mem::replace(&mut self.positional, Tuple::new());
tuple.items.into_iter()
}
/// Iterator over all keyword arguments.
pub fn iter_keys(&mut self) -> std::vec::IntoIter<Pair> {
let object = std::mem::replace(&mut self.keyword, Object::new());
object.pairs.into_iter()
}
/// Clear the argument lists.
pub fn clear(&mut self) {
self.positional.items.clear();
self.keyword.pairs.clear();
}
/// Whether both the positional and keyword argument lists are empty.
pub fn is_empty(&self) -> bool {
self.positional.items.is_empty() && self.keyword.pairs.is_empty()
}
}
/// Extract the option expression kind from the option or return an error.
fn expect<E: ExpressionKind>(opt: ParseResult<Option<E>>) -> ParseResult<E> {
match opt {
Ok(Some(spanned)) => Ok(spanned),
Ok(None) => error!("expected {}", E::NAME),
Err(e) => Err(e),
}
}
/// Parses source code into a syntax tree given a context.
pub fn parse(src: &str, ctx: ParseContext) -> SyntaxTree {
pub fn parse(src: &str, ctx: ParseContext) -> (SyntaxTree, Colorization, ErrorMap) {
Parser::new(src, ctx).parse()
}
@ -155,16 +18,13 @@ pub struct ParseContext<'a> {
struct Parser<'s> {
src: &'s str,
ctx: ParseContext<'s>,
tokens: Peekable<Tokens<'s>>,
errors: Vec<Spanned<String>>,
colored: Vec<Spanned<ColorToken>>,
span: Span,
}
colorization: Colorization,
error_map: ErrorMap,
macro_rules! defer {
($($tts:tt)*) => (
unimplemented!()
);
tokens: Tokens<'s>,
peeked: Option<Option<Spanned<Token<'s>>>>,
position: Position,
last_position: Position,
}
impl<'s> Parser<'s> {
@ -172,81 +32,128 @@ impl<'s> Parser<'s> {
Parser {
src,
ctx,
tokens: Tokens::new(src).peekable(),
errors: vec![],
colored: vec![],
span: Span::ZERO,
error_map: ErrorMap { errors: vec![] },
colorization: Colorization { colors: vec![] },
tokens: Tokens::new(src),
peeked: None,
position: Position::ZERO,
last_position: Position::ZERO,
}
}
fn parse(mut self) -> SyntaxTree {
fn parse(mut self) -> (SyntaxTree, Colorization, ErrorMap) {
let mut tree = SyntaxTree::new();
loop {
self.skip_whitespace();
if let Some(spanned) = self.eat() {
match spanned.v {
LineComment(_) | BlockComment(_) => {}
let start = self.position();
Whitespace(newlines) => {
tree.add(spanned.map_v(if newlines >= 2 {
Node::Newline
} else {
Node::Space
}));
}
let node = match self.next() {
Some(LeftBracket) => self.parse_func().map(|f| Node::Func(f)),
Some(Star) => Some(Node::ToggleBolder),
Some(Underscore) => Some(Node::ToggleItalic),
Some(Backtick) => Some(Node::ToggleMonospace),
Some(Text(text)) => Some(Node::Text(text.to_owned())),
Some(other) => { self.unexpected(other); None },
None => break,
};
LeftBracket => {
if let Some(func) = self.parse_func() {
tree.add(func);
}
}
if let Some(node) = node {
let end = self.position();
let span = Span { start, end };
Star => tree.add(spanned.map_v(Node::ToggleBolder)),
Underscore => tree.add(spanned.map_v(Node::ToggleItalic)),
Backtick => tree.add(spanned.map_v(Node::ToggleMonospace)),
Text(text) => tree.add(spanned.map_v(Node::Text(text.to_owned()))),
tree.nodes.push(Spanned { v: node, span });
_ => self.unexpected(spanned),
}
} else {
break;
}
}
tree
(tree, self.colorization, self.error_map)
}
fn parse_func(&mut self) -> Option<FuncCall> {
let (name, args) = self.parse_func_header()?;
self.parse_func_call(name, args)
fn parse_func(&mut self) -> Option<Spanned<Node>> {
let start = self.last_pos();
let header = self.parse_func_header();
let call = self.parse_func_call(header)?;
let end = self.pos();
let span = Span { start, end };
Some(Spanned { v: Node::Func(call), span })
}
fn parse_func_header(&mut self) -> Option<(Spanned<Ident>, FuncArgs)> {
defer! { self.eat_until(|t| t == RightBracket, true); }
fn parse_func_header(&mut self) -> Option<FuncHeader> {
self.skip_whitespace();
let name = self.parse_func_name()?;
let name = self.parse_func_name().or_else(|| {
self.eat_until(|t| t == RightBracket, true);
None
})?;
self.skip_whitespace();
let args = match self.next() {
Some(Colon) => self.parse_func_args(),
Some(RightBracket) => FuncArgs::new(),
let args = match self.eat() {
Some(Spanned { v: Colon, .. }) => self.parse_func_args(),
Some(Spanned { v: RightBracket, .. }) => FuncArgs::new(),
other => {
self.expected("colon or closing bracket", other);
self.eat_until(|t| t == RightBracket, true);
FuncArgs::new()
}
};
Some((name, args))
Some(FuncHeader { name, args })
}
fn parse_func_call(
&mut self,
name: Spanned<Ident>,
args: FuncArgs,
) -> Option<FuncCall> {
unimplemented!()
fn parse_func_call(&mut self, header: Option<FuncHeader>) -> Option<FuncCall> {
println!("peek: {:?}", self.peek());
let body = if self.peek() == Some(LeftBracket) {
self.eat();
let start = self.tokens.index();
let found = self.tokens.move_to_closing_bracket();
let end = self.tokens.index();
self.last_position = self.position;
self.position = self.tokens.pos();
let body = &self.src[start .. end];
if found {
assert_eq!(self.eat().map(Spanned::value), Some(RightBracket));
} else {
self.error_here("expected closing bracket");
}
Some(body)
} else {
None
};
let header = header?;
let name = header.name;
let parser = self.ctx.scope.get_parser(name.v.as_str()).or_else(|| {
self.error(format!("unknown function: `{}`", name.v), name.span);
None
})?;
Some(FuncCall(parser(header.args, body, self.ctx).unwrap()))
}
fn parse_func_name(&mut self) -> Option<Spanned<Ident>> {
match self.next() {
Some(ExprIdent(ident)) => {
self.color_span(ColorToken::FuncName, self.span(), true);
Some(Spanned { v: Ident(ident.to_string()), span: self.span() })
match self.eat() {
Some(Spanned { v: ExprIdent(ident), span }) => {
self.color(Spanned { v: ColorToken::FuncName, span }, true);
Some(Spanned { v: Ident(ident.to_string()), span })
}
other => {
self.expected("identifier", other);
@ -256,119 +163,16 @@ impl<'s> Parser<'s> {
}
fn parse_func_args(&mut self) -> FuncArgs {
enum State {
Start,
Identifier(Spanned<Ident>),
Assignment(Spanned<Ident>),
Value,
}
impl State {
fn expected(&self) -> &'static str {
match self {
State::Start => "value or key",
State::Identifier(_) => "comma or assignment",
State::Assignment(_) => "value",
State::Value => "comma",
}
}
}
let mut args = FuncArgs::new();
let mut state = State::Start;
loop {
self.skip_whitespace();
/*
let token = self.next();
match token {
Some(ExprIdent(ident)) => match state {
State::Start => {
state = State::Identifier(Spanned {
v: Ident(ident.to_string()),
span: self.span(),
});
}
State::Identifier(prev) => {
self.expected(state.expected(), token);
args.add_pos(prev.map(|id| Expression::Ident(id)));
state = State::Identifier(Spanned {
v: Ident(ident.to_string()),
span: self.span(),
});
}
State::Assignment(key) => {
let span = Span::merge(key.span, self.span());
args.add_key(Spanned::new(KeyArg {
key,
value: Spanned {
v: Expression::Ident(Ident(ident.to_string())),
span: self.span(),
},
}, span));
state = State::Value;
}
State::Value => {
self.expected(state.expected(), token);
state = State::Identifier(Spanned {
v: Ident(ident.to_string()),
span: self.span(),
});
}
}
// Handle expressions.
Some(Expr(_)) | Some(LeftParen) | Some(LeftBrace) => {
let expr = match token.unwrap() {
Expr(e) => e,
LeftParen => self.parse_tuple(),
LeftBrace => self.parse_object(),
_ => unreachable!(),
}
}
// Handle commas after values.
Some(Comma) => match state {
State::Identifier(ident) => {
args.add_pos(ident.map(|id| Expression::Ident(id)));
state = State::Start;
}
State::Value => state = State::Start,
_ => self.expected(state.expected(), token),
}
// Handle the end of the function header.
Some(RightBracket) => {
match state {
State::Identifier(ident) => {
args.add_pos(ident.map(|id| Expression::Ident(id)));
}
State::Assignment(_) => {
self.expected(state.expected(), token);
}
_ => {}
}
break;
}
}
*/
}
args
// unimplemented!()
FuncArgs::new()
}
fn handle_expr(&mut self, expr: Spanned<Expression>) {
fn parse_tuple(&mut self) -> Spanned<Expression> {
unimplemented!("parse_tuple")
}
fn parse_tuple(&mut self) -> Spanned<Tuple> {
unimplemented!()
}
fn parse_object(&mut self) -> Spanned<Object> {
unimplemented!()
fn parse_object(&mut self) -> Spanned<Expression> {
unimplemented!("parse_object")
}
fn skip_whitespace(&mut self) {
@ -378,68 +182,52 @@ impl<'s> Parser<'s> {
}, false)
}
fn eat_until<F>(&mut self, mut f: F, eat_match: bool)
where F: FnMut(Token<'s>) -> bool {
while let Some(token) = self.tokens.peek() {
if f(token.v) {
if eat_match {
self.next();
}
break;
}
self.next();
fn expected(&mut self, thing: &str, found: Option<Spanned<Token>>) {
if let Some(Spanned { v: found, span }) = found {
self.error(
format!("expected {}, found {}", thing, name(found)),
span
);
} else {
self.error_here(format!("expected {}", thing));
}
}
fn next(&mut self) -> Option<Token<'s>> {
self.tokens.next().map(|spanned| {
self.color_token(&spanned.v, spanned.span);
self.span = spanned.span;
spanned.v
})
fn unexpected(&mut self, found: Spanned<Token>) {
self.error_map.errors.push(found.map(|t| format!("unexpected {}", name(t))));
}
fn span(&self) -> Span {
self.span
fn error(&mut self, message: impl Into<String>, span: Span) {
self.error_map.errors.push(Spanned { v: message.into(), span });
}
fn position(&self) -> Position {
self.span.end
fn error_here(&mut self, message: impl Into<String>) {
self.error(message, Span::at(self.pos()));
}
fn unexpected(&mut self, found: Token) {
self.errors.push(Spanned {
v: format!("unexpected {}", name(found)),
span: self.span(),
});
fn color(&mut self, token: Spanned<ColorToken>, replace_last: bool) {
if replace_last {
if let Some(last) = self.colorization.colors.last_mut() {
*last = token;
return;
}
}
self.colorization.colors.push(token);
}
fn expected(&mut self, thing: &str, found: Option<Token>) {
let message = if let Some(found) = found {
format!("expected {}, found {}", thing, name(found))
} else {
format!("expected {}", thing)
};
self.errors.push(Spanned {
v: message,
span: self.span(),
});
}
fn color_token(&mut self, token: &Token<'s>, span: Span) {
let colored = match token {
fn color_token(&mut self, token: Spanned<Token<'s>>) {
let colored = match token.v {
LineComment(_) | BlockComment(_) => Some(ColorToken::Comment),
StarSlash => Some(ColorToken::Invalid),
StarSlash => Some(ColorToken::Invalid),
LeftBracket | RightBracket => Some(ColorToken::Bracket),
LeftParen | RightParen => Some(ColorToken::Paren),
LeftBrace | RightBrace => Some(ColorToken::Brace),
Colon => Some(ColorToken::Colon),
Comma => Some(ColorToken::Comma),
Equals => Some(ColorToken::Equals),
ExprIdent(_) => Some(ColorToken::ExprIdent),
ExprStr(_) => Some(ColorToken::ExprStr),
Colon => Some(ColorToken::Colon),
Comma => Some(ColorToken::Comma),
Equals => Some(ColorToken::Equals),
ExprIdent(_) => Some(ColorToken::ExprIdent),
ExprStr(_) => Some(ColorToken::ExprStr),
ExprNumber(_) => Some(ColorToken::ExprNumber),
ExprSize(_) => Some(ColorToken::ExprSize),
ExprBool(_) => Some(ColorToken::ExprBool),
@ -447,21 +235,49 @@ impl<'s> Parser<'s> {
};
if let Some(color) = colored {
self.colored.push(Spanned { v: color, span });
self.colorization.colors.push(Spanned { v: color, span: token.span });
}
}
fn color_span(&mut self, color: ColorToken, span: Span, replace_last: bool) {
let token = Spanned { v: color, span };
if replace_last {
if let Some(last) = self.colored.last_mut() {
*last = token;
return;
fn eat_until<F>(&mut self, mut f: F, eat_match: bool)
where F: FnMut(Token<'s>) -> bool {
while let Some(token) = self.peek() {
if f(token) {
if eat_match {
self.eat();
}
break;
}
self.eat();
}
}
fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
let token = self.peeked.take().unwrap_or_else(|| self.tokens.next());
self.last_position = self.position;
if let Some(spanned) = token {
self.color_token(spanned);
self.position = spanned.span.end;
}
self.colored.push(token);
token
}
fn peek(&mut self) -> Option<Token<'s>> {
let iter = &mut self.tokens;
self.peeked
.get_or_insert_with(|| iter.next())
.map(Spanned::value)
}
fn pos(&self) -> Position {
self.position
}
fn last_pos(&self) -> Position {
self.last_position
}
}

View File

@ -6,64 +6,6 @@ use Token::*;
use State::*;
/// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> {
/// One or more whitespace characters. The contained `usize` denotes the
/// number of newlines that were contained in the whitespace.
Whitespace(usize),
/// A line comment with inner string contents `//<&'s str>\n`.
LineComment(&'s str),
/// A block comment with inner string contents `/*<&'s str>*/`. The comment
/// can contain nested block comments.
BlockComment(&'s str),
/// An erroneous `*/` without an opening block comment.
StarSlash,
/// A left bracket: `[`.
LeftBracket,
/// A right bracket: `]`.
RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
RightParen,
/// A left brace in a function header: `{`.
LeftBrace,
/// A right brace in a function header: `}`.
RightBrace,
/// A colon in a function header: `:`.
Colon,
/// A comma in a function header: `:`.
Comma,
/// An equals sign in a function header: `=`.
Equals,
/// An identifier in a function header: `center`.
ExprIdent(&'s str),
/// A quoted string in a function header: `"..."`.
ExprStr(&'s str),
/// A number in a function header: `3.14`.
ExprNumber(f64),
/// A size in a function header: `12pt`.
ExprSize(Size),
/// A boolean in a function header: `true | false`.
ExprBool(bool),
/// A star in body-text.
Star,
/// An underscore in body-text.
Underscore,
/// A backtick in body-text.
Backtick,
/// Any other consecutive string.
Text(&'s str),
}
/// Decomposes text into a sequence of semantic tokens.
pub fn tokenize(src: &str) -> Tokens {
Tokens::new(src)
@ -97,6 +39,47 @@ impl<'s> Tokens<'s> {
index: 0,
}
}
/// The index in the string at which the last token ends and next token will
/// start.
pub fn index(&self) -> usize {
self.index
}
/// The line-colunn position in the source at which the last token ends and
/// next token will start.
pub fn pos(&self) -> Position {
self.position
}
/// Move through the string until an unbalanced closing bracket is found
/// without tokenizing the contents.
///
/// Returns whether a closing bracket was found or the end of the string was
/// reached.
pub fn move_to_closing_bracket(&mut self) -> bool {
let mut escaped = false;
let mut depth = 0;
self.read_string_until(|n| {
match n {
'[' if !escaped => depth += 1,
']' if !escaped => {
if depth == 0 {
return true;
} else {
depth -= 1;
}
}
'\\' => escaped = !escaped,
_ => escaped = false,
}
false
}, false, 0, 0);
self.peek() == Some(']')
}
}
impl<'s> Iterator for Tokens<'s> {
@ -118,8 +101,13 @@ impl<'s> Iterator for Tokens<'s> {
// Functions.
'[' => {
self.stack.push(self.state);
self.state = Header;
if self.state == Header || self.state == Body {
self.stack.push(self.state);
self.state = Header;
} else {
self.state = Body;
}
LeftBracket
}
']' => {
@ -221,12 +209,10 @@ impl<'s> Tokens<'s> {
fn parse_string(&mut self) -> Token<'s> {
let mut escaped = false;
ExprStr(self.read_string_until(|n| {
if n == '"' && !escaped {
return true;
} else if n == '\\' {
escaped = !escaped;
} else {
escaped = false;
match n {
'"' if !escaped => return true,
'\\' => escaped = !escaped,
_ => escaped = false,
}
false
@ -316,14 +302,6 @@ impl<'s> Tokens<'s> {
fn peek(&mut self) -> Option<char> {
self.iter.peek().copied()
}
fn index(&self) -> usize {
self.index
}
fn pos(&self) -> Position {
self.position
}
}
fn parse_percentage(text: &str) -> Option<f64> {

View File

@ -1,47 +1,159 @@
#![allow(unused_imports)]
#![allow(dead_code)]
#![allow(non_snake_case)]
use typstc::func::Scope;
use typstc::size::Size;
use typstc::syntax::*;
use Token::{
Whitespace as W,
LineComment as LC, BlockComment as BC, StarSlash as SS,
LeftBracket as LB, RightBracket as RB,
LeftParen as LP, RightParen as RP,
LeftBrace as LBR, RightBrace as RBR,
Colon as CL, Comma as CM, Equals as EQ,
ExprIdent as ID, ExprStr as STR, ExprSize as SIZE,
ExprNumber as NUM, ExprBool as BOOL,
Star as ST, Underscore as U, Backtick as B, Text as T,
};
use typstc::{function, parse};
mod token_shorthands {
pub use super::Token::{
Whitespace as W,
LineComment as LC, BlockComment as BC, StarSlash as SS,
LeftBracket as LB, RightBracket as RB,
LeftParen as LP, RightParen as RP,
LeftBrace as LBR, RightBrace as RBR,
Colon as CL, Comma as CM, Equals as EQ,
ExprIdent as ID, ExprStr as STR, ExprSize as SIZE,
ExprNumber as NUM, ExprBool as BOOL,
Star as ST, Underscore as U, Backtick as B, Text as T,
};
}
mod node_shorthands {
use super::Node;
pub use Node::{
Space as S, Newline as N, Text,
ToggleItalic as I, ToggleBolder as B, ToggleMonospace as M,
Func,
};
pub fn T(text: &str) -> Node { Node::Text(text.to_string()) }
}
macro_rules! F {
(@body None) => (None);
(@body Some([$($tts:tt)*])) => ({
let nodes = vec![$($tts)*].into_iter()
.map(|v| Spanned { v, span: Span::ZERO })
.collect();
Some(SyntaxTree { nodes })
});
($($body:tt)*) => ({
Func(FuncCall(Box::new(DebugFn {
pos: vec![],
key: vec![],
body: F!(@body $($body)*),
})))
});
}
function! {
#[derive(Debug, PartialEq)]
pub struct DebugFn {
pos: Vec<Spanned<Expression>>,
key: Vec<Pair>,
body: Option<SyntaxTree>,
}
parse(args, body, ctx) {
DebugFn {
pos: args.iter_pos().collect(),
key: args.iter_keys().collect(),
body: parse!(optional: body, ctx),
}
}
layout() { vec![] }
}
impl DebugFn {
fn compare(&self, other: &DebugFn) -> bool {
self.pos.iter().zip(&other.pos).all(|(a, b)| a.v == b.v)
&& self.key.iter().zip(&other.key)
.all(|(a, b)| a.key.v == b.key.v && a.value.v == b.value.v)
&& match (&self.body, &other.body) {
(Some(a), Some(b)) => compare(a, b),
(None, None) => true,
_ => false,
}
}
}
fn downcast(func: &FuncCall) -> &DebugFn {
func.0.downcast::<DebugFn>().expect("not a debug fn")
}
fn compare(a: &SyntaxTree, b: &SyntaxTree) -> bool {
for (x, y) in a.nodes.iter().zip(&b.nodes) {
use node_shorthands::*;
let same = match (&x.v, &y.v) {
(S, S) | (N, N) | (I, I) | (B, B) | (M, M) => true,
(Text(t1), Text(t2)) => t1 == t2,
(Func(f1), Func(f2)) => {
downcast(f1).compare(downcast(f2))
}
_ => false,
};
if !same { return false; }
}
true
}
/// Parses the test syntax.
macro_rules! tokens {
($($task:ident $src:expr =>($line:expr)=> [$($target:tt)*])*) => ({
($($task:ident $src:expr =>($line:expr)=> [$($tts:tt)*])*) => ({
#[allow(unused_mut)]
let mut cases = Vec::new();
$(cases.push(($line, $src, tokens!(@$task [$($target)*])));)*
$(cases.push(($line, $src, tokens!(@$task [$($tts)*])));)*
cases
});
(@t $tokens:expr) => ({
Target::Tokenized($tokens.to_vec())
(@t [$($tts:tt)*]) => ({
use token_shorthands::*;
Target::Tokenize(vec![$($tts)*])
});
(@ts [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $t:expr)),* $(,)?]) => ({
Target::TokenizedSpanned(vec![
$(Spanned { v: $t, span: Span {
(@ts [$($tts:tt)*]) => ({
use token_shorthands::*;
Target::TokenizeSpanned(tokens!(@__spans [$($tts)*]))
});
(@p [$($tts:tt)*]) => ({
use node_shorthands::*;
let nodes = vec![$($tts)*].into_iter()
.map(|v| Spanned { v, span: Span::ZERO })
.collect();
Target::Parse(SyntaxTree { nodes })
});
(@ps [$($tts:tt)*]) => ({
use node_shorthands::*;
Target::ParseSpanned(tokens!(@__spans [$($tts)*]))
});
(@__spans [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $v:expr)),* $(,)?]) => ({
vec![
$(Spanned { v: $v, span: Span {
start: Position { line: $sl, column: $sc },
end: Position { line: $el, column: $ec },
}}),*
])
]
});
}
#[derive(Debug)]
enum Target {
Tokenized(Vec<Token<'static>>),
TokenizedSpanned(Vec<Spanned<Token<'static>>>),
Tokenize(Vec<Token<'static>>),
TokenizeSpanned(Vec<Spanned<Token<'static>>>),
Parse(SyntaxTree),
ParseSpanned(SyntaxTree),
}
fn main() {
@ -75,6 +187,7 @@ fn main() {
println!(" - Source: {:?}", src);
println!(" - Expected: {:?}", expected);
println!(" - Found: {:?}", found);
println!();
failed += 1;
errors = true;
@ -98,14 +211,26 @@ fn main() {
fn test_case(src: &str, target: Target) -> (bool, String, String) {
match target {
Target::Tokenized(tokens) => {
Target::Tokenize(tokens) => {
let found: Vec<_> = tokenize(src).map(Spanned::value).collect();
(found == tokens, format!("{:?}", tokens), format!("{:?}", found))
}
Target::TokenizedSpanned(tokens) => {
Target::TokenizeSpanned(tokens) => {
let found: Vec<_> = tokenize(src).collect();
(found == tokens, format!("{:?}", tokens), format!("{:?}", found))
}
Target::Parse(tree) => {
let scope = Scope::with_debug::<DebugFn>();
let (found, _, errs) = parse(src, ParseContext { scope: &scope });
(compare(&tree, &found), format!("{:?}", tree), format!("{:?}", found))
}
Target::ParseSpanned(tree) => {
let scope = Scope::with_debug::<DebugFn>();
let (found, _, _) = parse(src, ParseContext { scope: &scope });
(tree == found, format!("{:?}", tree), format!("{:?}", found))
}
}
}

View File

@ -46,6 +46,12 @@ t "[func]*bold*" => [LB, ID("func"), RB, ST, T("bold"), ST]
t "[_*`]" => [LB, T("_"), T("*"), T("`"), RB]
t "hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")]
// Nested functions.
t "[f: [=][*]]" => [LB, ID("f"), CL, W(0), LB, EQ, RB, LB, ST, RB, RB]
t "[_][[,],]," => [LB, T("_"), RB, LB, LB, CM, RB, T(","), RB, T(",")]
t "[=][=][=]" => [LB, EQ, RB, LB, T("="), RB, LB, EQ, RB]
t "[=][[=][=][=]]" => [LB, EQ, RB, LB, LB, EQ, RB, LB, T("="), RB, LB, EQ, RB, RB]
// Escapes.
t r"\[" => [T("[")]
t r"\]" => [T("]")]
@ -68,7 +74,7 @@ ts "ab\r\nc" => [(0:0, 0:2, T("ab")), (0:2, 1:0, W(1)), (1:0, 1:1, T("c"
ts "[a=10]" => [(0:0, 0:1, LB), (0:1, 0:2, ID("a")), (0:2, 0:3, EQ),
(0:3, 0:5, NUM(10.0)), (0:5, 0:6, RB)]
ts r#"[x = "(1)"]*"# => [(0:0, 0:1, LB), (0:1, 0:2, ID("x")), (0:2, 0:3, W(0)),
(0:3, 0:4, EQ), (0:4, 0:5, W(0)), (0:5, 0:10, STR("(1)")),
(0:10, 0:11, RB), (0:11, 0:12, ST)]
(0:3, 0:4, EQ), (0:4, 0:5, W(0)), (0:5, 0:10, STR("(1)")),
(0:10, 0:11, RB), (0:11, 0:12, ST)]
ts "// ab\r\n\nf" => [(0:0, 0:5, LC(" ab")), (0:5, 2:0, W(2)), (2:0, 2:1, T("f"))]
ts "/*b*/_" => [(0:0, 0:5, BC("b")), (0:5, 0:6, U)]

20
tests/parsing/trees.rs Normal file
View File

@ -0,0 +1,20 @@
p "" => []
p "hi" => [T("hi")]
p "hi you" => [T("hi"), S, T("you")]
p "\n\n 🌍" => [T(""), N, T("🌍")]
p "[func]" => [F!(None)]
p "[tree][hi *you*]" => [F!(Some([T("hi"), S, B, T("you"), B]))]
// p "from [align: left] to" => [
// T("from"), S,
// F!("align", pos=[ID("left")], None),
// S, T("to"),
// ]
// p "[box: x=1.2pt, false][a b c] bye" => [
// F!(
// "box",
// pos=[BOOL(false)],
// key=["x": SIZE(Size::pt(1.2))],
// Some([T("a"), S, T("b"), S, T("c")]),
// ),
// S, T("bye"),
// ]