From 904bc392abdcd7c48fa4541594f6218168afb61f Mon Sep 17 00:00:00 2001 From: Laurenz Date: Fri, 2 Oct 2020 19:17:47 +0200 Subject: [PATCH] =?UTF-8?q?Remove=20spans=20from=20token=20iterator=20?= =?UTF-8?q?=F0=9F=A7=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/layout/stack.rs | 6 +- src/parse/mod.rs | 427 ++++++++++++++++++++++--------------------- src/parse/parser.rs | 110 +++++++---- src/parse/scanner.rs | 2 +- src/parse/tokens.rs | 36 ++-- src/syntax/span.rs | 20 +- 6 files changed, 319 insertions(+), 282 deletions(-) diff --git a/src/layout/stack.rs b/src/layout/stack.rs index f88184635..3e63f5e51 100644 --- a/src/layout/stack.rs +++ b/src/layout/stack.rs @@ -317,7 +317,7 @@ impl StackLayouter { // Then, we reduce the bounding box for the following layouts. This // layout uses up space from the origin to the end. Thus, it reduces - // the usable space for following layouts at it's origin by its + // the usable space for following layouts at its origin by its // extent along the secondary axis. *bound.get_mut(sys.secondary, GenAlign::Start) += sys.secondary.factor() * layout.size.secondary(*sys); @@ -345,7 +345,7 @@ impl StackLayouter { rotation = sys.secondary.axis(); } - // We reduce the bounding box of this layout at it's end by the + // We reduce the bounding box of this layout at its end by the // accumulated secondary extent of all layouts we have seen so far, // which are the layouts after this one since we iterate reversed. *bound.get_mut(sys.secondary, GenAlign::End) -= @@ -369,7 +369,7 @@ impl StackLayouter { let align = layout.align; // The space in which this layout is aligned is given by the - // distances between the borders of it's bounding box. + // distances between the borders of its bounding box. let usable = Size::new(bound.right - bound.left, bound.bottom - bound.top) .generalized(sys); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 2f34357c7..395090af2 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -46,55 +46,55 @@ fn tree(p: &mut Parser) -> SynTree { /// Parse a syntax node. fn node(p: &mut Parser, at_start: bool) -> Option> { - let token = p.eat()?; - let span = token.span; - Some(match token.v { + let start = p.pos(); + let node = match p.eat()? { // Spaces. Token::Space(newlines) => { if newlines < 2 { - SynNode::Space.span_with(span) + SynNode::Space } else { - SynNode::Parbreak.span_with(span) + SynNode::Parbreak } } - Token::Text(text) => SynNode::Text(text.into()).span_with(span), + Token::Text(text) => SynNode::Text(text.into()), // Comments. Token::LineComment(_) | Token::BlockComment(_) => return None, // Markup. - Token::Star => SynNode::ToggleBolder.span_with(span), - Token::Underscore => SynNode::ToggleItalic.span_with(span), - Token::Backslash => SynNode::Linebreak.span_with(span), + Token::Star => SynNode::ToggleBolder, + Token::Underscore => SynNode::ToggleItalic, + Token::Backslash => SynNode::Linebreak, Token::Hashtag => { if at_start { - heading(p, span.start).map(SynNode::Heading) + SynNode::Heading(heading(p, start)) } else { - SynNode::Text(p.get(span).into()).span_with(span) + SynNode::Text(p.eaten_from(start).into()) } } - Token::Raw(token) => raw(p, token, span).map(SynNode::Raw), - Token::UnicodeEscape(token) => unicode_escape(p, token, span).map(SynNode::Text), + Token::Raw(token) => SynNode::Raw(raw(p, token)), + Token::UnicodeEscape(token) => SynNode::Text(unicode_escape(p, token, start)), // Functions. Token::LeftBracket => { - p.jump(span.start); - bracket_call(p).map(Expr::Call).map(SynNode::Expr) + p.jump(start); + SynNode::Expr(Expr::Call(bracket_call(p))) } // Bad tokens. - _ => { - p.diag_unexpected(token); + token => { + p.diag_unexpected(token.span_with(start .. p.pos())); return None; } - }) + }; + Some(node.span_with(start .. p.pos())) } /// Parse a heading. -fn heading(p: &mut Parser, start: Pos) -> Spanned { +fn heading(p: &mut Parser, start: Pos) -> NodeHeading { // Parse the section depth. let count = p.eat_while(|c| c == Token::Hashtag); - let span = (start, p.pos()); + let span = Span::new(start, p.pos()); let level = (count.min(5) as u8).span_with(span); if count > 5 { p.diag(warning!(span, "section depth larger than 6 has no effect")); @@ -109,26 +109,23 @@ fn heading(p: &mut Parser, start: Pos) -> Spanned { } } - NodeHeading { level, contents }.span_with((start, p.pos())) + NodeHeading { level, contents } } /// Parse a raw block. -fn raw(p: &mut Parser, token: TokenRaw, span: Span) -> Spanned { +fn raw(p: &mut Parser, token: TokenRaw) -> NodeRaw { let raw = resolve::resolve_raw(token.text, token.backticks); if !token.terminated { - p.diag(error!(span.end, "expected backtick(s)")); + p.diag(error!(p.pos(), "expected backtick(s)")); } - raw.span_with(span) + raw } /// Parse a unicode escape sequence. -fn unicode_escape( - p: &mut Parser, - token: TokenUnicodeEscape, - span: Span, -) -> Spanned { +fn unicode_escape(p: &mut Parser, token: TokenUnicodeEscape, start: Pos) -> String { + let span = Span::new(start, p.pos()); let text = if let Some(c) = resolve::resolve_hex(token.sequence) { c.to_string() } else { @@ -142,29 +139,28 @@ fn unicode_escape( p.diag(error!(span.end, "expected closing brace")); } - text.span_with(span) + text } /// Parse a bracketed function call. -fn bracket_call(p: &mut Parser) -> Spanned { - let before_bracket = p.pos(); +fn bracket_call(p: &mut Parser) -> ExprCall { p.start_group(Group::Bracket); p.push_mode(TokenMode::Header); // One header is guaranteed, but there may be more (through chaining). let mut outer = vec![]; - let mut inner = bracket_subheader(p); + let mut inner = p.span(|p| bracket_subheader(p)); - while p.eat_if(Token::Chain).is_some() { + while p.eat_if(Token::Chain) { outer.push(inner); - inner = bracket_subheader(p); + inner = p.span(|p| bracket_subheader(p)); } p.pop_mode(); p.end_group(); if p.peek() == Some(Token::LeftBracket) { - let expr = bracket_body(p).map(Lit::Content).map(Expr::Lit); + let expr = p.span(|p| Expr::Lit(Lit::Content(bracket_body(p)))); inner.span.expand(expr.span); inner.v.args.0.push(LitDictEntry { key: None, expr }); } @@ -177,26 +173,26 @@ fn bracket_call(p: &mut Parser) -> Spanned { inner = top; } - inner.v.span_with((before_bracket, p.pos())) + inner.v } /// Parse one subheader of a bracketed function call. -fn bracket_subheader(p: &mut Parser) -> Spanned { +fn bracket_subheader(p: &mut Parser) -> ExprCall { p.start_group(Group::Subheader); - let before_name = p.pos(); + let start = p.pos(); p.skip_white(); - let name = ident(p).unwrap_or_else(|| { + let name = p.span(|p| ident(p)).transpose().unwrap_or_else(|| { if p.eof() { - p.diag_expected_at("function name", before_name); + p.diag_expected_at("function name", start); } else { p.diag_expected("function name"); } - Ident(String::new()).span_with(before_name) + Ident(String::new()).span_with(start) }); p.skip_white(); - let args = if p.eat_if(Token::Colon).is_some() { + let args = if p.eat_if(Token::Colon) { dict_contents(p).0 } else { // Ignore the rest if there's no colon. @@ -207,171 +203,26 @@ fn bracket_subheader(p: &mut Parser) -> Spanned { LitDict::new() }; - ExprCall { name, args }.span_with(p.end_group()) + p.end_group(); + ExprCall { name, args } } /// Parse the body of a bracketed function call. -fn bracket_body(p: &mut Parser) -> Spanned { +fn bracket_body(p: &mut Parser) -> SynTree { p.start_group(Group::Bracket); p.push_mode(TokenMode::Body); let tree = tree(p); p.pop_mode(); - tree.span_with(p.end_group()) -} - -/// Parse an expression: `term (+ term)*`. -fn expr(p: &mut Parser) -> Option> { - binops(p, "summand", term, |token| match token { - Token::Plus => Some(BinOp::Add), - Token::Hyphen => Some(BinOp::Sub), - _ => None, - }) -} - -/// Parse a term: `factor (* factor)*`. -fn term(p: &mut Parser) -> Option> { - binops(p, "factor", factor, |token| match token { - Token::Star => Some(BinOp::Mul), - Token::Slash => Some(BinOp::Div), - _ => None, - }) -} - -/// Parse binary operations of the from `a ( b)*`. -fn binops( - p: &mut Parser, - operand_name: &str, - operand: fn(&mut Parser) -> Option>, - op: fn(Token) -> Option, -) -> Option> { - let mut lhs = operand(p)?; - - loop { - p.skip_white(); - if let Some(op) = p.eat_map(op) { - p.skip_white(); - - if let Some(rhs) = operand(p) { - let span = lhs.span.join(rhs.span); - let expr = Expr::Binary(ExprBinary { - lhs: lhs.map(Box::new), - op, - rhs: rhs.map(Box::new), - }); - lhs = expr.span_with(span); - p.skip_white(); - } else { - let span = lhs.span.join(op.span); - p.diag(error!(span, "missing right {}", operand_name)); - break; - } - } else { - break; - } - } - - Some(lhs) -} - -/// Parse a factor of the form `-?value`. -fn factor(p: &mut Parser) -> Option> { - if let Some(op) = p.eat_map(|token| match token { - Token::Hyphen => Some(UnOp::Neg), - _ => None, - }) { - p.skip_white(); - if let Some(expr) = factor(p) { - let span = op.span.join(expr.span); - let expr = Expr::Unary(ExprUnary { op, expr: expr.map(Box::new) }); - Some(expr.span_with(span)) - } else { - p.diag(error!(op.span, "missing factor")); - None - } - } else { - value(p) - } -} - -/// Parse a value. -fn value(p: &mut Parser) -> Option> { - let Spanned { v: token, span } = p.eat()?; - Some(match token { - // Bracketed function call. - Token::LeftBracket => { - p.jump(span.start); - let call = bracket_call(p); - let span = call.span; - let node = call.map(Expr::Call).map(SynNode::Expr); - Expr::Lit(Lit::Content(vec![node])).span_with(span) - } - - // Content expression. - Token::LeftBrace => { - p.jump(span.start); - content(p).map(Lit::Content).map(Expr::Lit) - } - - // Dictionary or just a parenthesized expression. - Token::LeftParen => { - p.jump(span.start); - parenthesized(p) - } - - // Function or just ident. - Token::Ident(id) => { - let ident = Ident(id.into()).span_with(span); - - p.skip_white(); - if p.peek() == Some(Token::LeftParen) { - paren_call(p, ident).map(Expr::Call) - } else { - ident.map(Lit::Ident).map(Expr::Lit) - } - } - - // Atomic values. - Token::Bool(b) => Expr::Lit(Lit::Bool(b)).span_with(span), - Token::Number(f) => Expr::Lit(Lit::Float(f)).span_with(span), - Token::Length(l) => Expr::Lit(Lit::Length(l)).span_with(span), - Token::Hex(hex) => color(p, hex, span).map(Lit::Color).map(Expr::Lit), - Token::Str(token) => string(p, token, span).map(Lit::Str).map(Expr::Lit), - - // No value. - _ => { - p.jump(span.start); - return None; - } - }) -} - -// Parse a content expression: `{...}`. -fn content(p: &mut Parser) -> Spanned { - p.start_group(Group::Brace); - p.push_mode(TokenMode::Body); - let tree = tree(p); - p.pop_mode(); - tree.span_with(p.end_group()) -} - -/// Parse a parenthesized expression: `(a + b)`, `(1, key="value"). -fn parenthesized(p: &mut Parser) -> Spanned { - p.start_group(Group::Paren); - let (dict, coercable) = dict_contents(p); - let expr = if coercable { - dict.0.into_iter().next().expect("dict is coercable").expr.v - } else { - Expr::Lit(Lit::Dict(dict)) - }; - expr.span_with(p.end_group()) + p.end_group(); + tree } /// Parse a parenthesized function call. -fn paren_call(p: &mut Parser, name: Spanned) -> Spanned { +fn paren_call(p: &mut Parser, name: Spanned) -> ExprCall { p.start_group(Group::Paren); let args = dict_contents(p).0; - let span = name.span.join(p.end_group()); - ExprCall { name, args }.span_with(span) + p.end_group(); + ExprCall { name, args } } /// Parse the contents of a dictionary. @@ -405,7 +256,7 @@ fn dict_contents(p: &mut Parser) -> (LitDict, bool) { break; } - if p.eat_if(Token::Comma).is_none() { + if !p.eat_if(Token::Comma) { p.diag_expected_at("comma", behind); } @@ -418,7 +269,7 @@ fn dict_contents(p: &mut Parser) -> (LitDict, bool) { /// Parse a single entry in a dictionary. fn dict_entry(p: &mut Parser) -> Option { - if let Some(ident) = ident(p) { + if let Some(ident) = p.span(|p| ident(p)).transpose() { p.skip_white(); match p.peek() { // Key-value pair. @@ -438,7 +289,11 @@ fn dict_entry(p: &mut Parser) -> Option { // Function call. Some(Token::LeftParen) => Some(LitDictEntry { key: None, - expr: paren_call(p, ident).map(Expr::Call), + expr: { + let start = ident.span.start; + let call = paren_call(p, ident); + Expr::Call(call).span_with(start .. p.pos()) + }, }), // Just an identifier. @@ -454,8 +309,160 @@ fn dict_entry(p: &mut Parser) -> Option { } } +/// Parse an expression: `term (+ term)*`. +fn expr(p: &mut Parser) -> Option> { + binops(p, "summand", term, |token| match token { + Token::Plus => Some(BinOp::Add), + Token::Hyphen => Some(BinOp::Sub), + _ => None, + }) +} + +/// Parse a term: `factor (* factor)*`. +fn term(p: &mut Parser) -> Option> { + binops(p, "factor", factor, |token| match token { + Token::Star => Some(BinOp::Mul), + Token::Slash => Some(BinOp::Div), + _ => None, + }) +} + +/// Parse binary operations of the from `a ( b)*`. +fn binops( + p: &mut Parser, + operand_name: &str, + operand: fn(&mut Parser) -> Option>, + op: fn(Token) -> Option, +) -> Option> { + let mut lhs = operand(p)?; + + loop { + p.skip_white(); + if let Some(op) = p.span(|p| p.eat_map(op)).transpose() { + p.skip_white(); + + if let Some(rhs) = operand(p) { + let span = lhs.span.join(rhs.span); + let expr = Expr::Binary(ExprBinary { + lhs: lhs.map(Box::new), + op, + rhs: rhs.map(Box::new), + }); + lhs = expr.span_with(span); + p.skip_white(); + } else { + let span = lhs.span.join(op.span); + p.diag(error!(span, "missing right {}", operand_name)); + break; + } + } else { + break; + } + } + + Some(lhs) +} + +/// Parse a factor of the form `-?value`. +fn factor(p: &mut Parser) -> Option> { + let op = |token| match token { + Token::Hyphen => Some(UnOp::Neg), + _ => None, + }; + + p.span(|p| { + if let Some(op) = p.span(|p| p.eat_map(op)).transpose() { + p.skip_white(); + if let Some(expr) = factor(p) { + Some(Expr::Unary(ExprUnary { op, expr: expr.map(Box::new) })) + } else { + p.diag(error!(op.span, "missing factor")); + None + } + } else { + value(p) + } + }) + .transpose() +} + +/// Parse a value. +fn value(p: &mut Parser) -> Option { + let start = p.pos(); + Some(match p.eat()? { + // Bracketed function call. + Token::LeftBracket => { + p.jump(start); + let node = p.span(|p| SynNode::Expr(Expr::Call(bracket_call(p)))); + Expr::Lit(Lit::Content(vec![node])) + } + + // Content expression. + Token::LeftBrace => { + p.jump(start); + Expr::Lit(Lit::Content(content(p))) + } + + // Dictionary or just a parenthesized expression. + Token::LeftParen => { + p.jump(start); + parenthesized(p) + } + + // Function or just ident. + Token::Ident(id) => { + let ident = Ident(id.into()); + let after = p.pos(); + + p.skip_white(); + if p.peek() == Some(Token::LeftParen) { + let name = ident.span_with(start .. after); + Expr::Call(paren_call(p, name)) + } else { + Expr::Lit(Lit::Ident(ident)) + } + } + + // Atomic values. + Token::Bool(b) => Expr::Lit(Lit::Bool(b)), + Token::Number(f) => Expr::Lit(Lit::Float(f)), + Token::Length(l) => Expr::Lit(Lit::Length(l)), + Token::Hex(hex) => Expr::Lit(Lit::Color(color(p, hex, start))), + Token::Str(token) => Expr::Lit(Lit::Str(string(p, token))), + + // No value. + _ => { + p.jump(start); + return None; + } + }) +} + +// Parse a content expression: `{...}`. +fn content(p: &mut Parser) -> SynTree { + p.start_group(Group::Brace); + p.push_mode(TokenMode::Body); + let tree = tree(p); + p.pop_mode(); + p.end_group(); + tree +} + +/// Parse a parenthesized expression: `(a + b)`, `(1, key="value"). +fn parenthesized(p: &mut Parser) -> Expr { + p.start_group(Group::Paren); + let (dict, coercable) = dict_contents(p); + let expr = if coercable { + dict.0.into_iter().next().expect("dict is coercable").expr.v + } else { + Expr::Lit(Lit::Dict(dict)) + }; + p.end_group(); + expr +} + /// Parse an identifier. -fn ident(p: &mut Parser) -> Option> { +fn ident(p: &mut Parser) -> Option { p.eat_map(|token| match token { Token::Ident(id) => Some(Ident(id.into())), _ => None, @@ -463,23 +470,21 @@ fn ident(p: &mut Parser) -> Option> { } /// Parse a color. -fn color(p: &mut Parser, hex: &str, span: Span) -> Spanned { - RgbaColor::from_str(hex) - .unwrap_or_else(|_| { - // Heal color by assuming black. - p.diag(error!(span, "invalid color")); - RgbaColor::new_healed(0, 0, 0, 255) - }) - .span_with(span) +fn color(p: &mut Parser, hex: &str, start: Pos) -> RgbaColor { + RgbaColor::from_str(hex).unwrap_or_else(|_| { + // Heal color by assuming black. + p.diag(error!(start .. p.pos(), "invalid color")); + RgbaColor::new_healed(0, 0, 0, 255) + }) } /// Parse a string. -fn string(p: &mut Parser, token: TokenStr, span: Span) -> Spanned { +fn string(p: &mut Parser, token: TokenStr) -> String { if !token.terminated { - p.diag_expected_at("quote", span.end); + p.diag_expected_at("quote", p.pos()); } - resolve::resolve_string(token.string).span_with(span) + resolve::resolve_string(token.string) } #[cfg(test)] diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 9d5eb0a9d..d34730c8c 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -2,15 +2,16 @@ use std::fmt::{self, Debug, Formatter}; use super::{Scanner, TokenMode, Tokens}; use crate::diagnostic::Diagnostic; -use crate::syntax::{Decoration, Pos, Span, Spanned, Token}; +use crate::syntax::{Decoration, Pos, Span, SpanWith, Spanned, Token}; use crate::Feedback; /// A convenient token-based parser. pub struct Parser<'s> { tokens: Tokens<'s>, - peeked: Option>>, + peeked: Option>, modes: Vec, - groups: Vec<(Pos, Group)>, + groups: Vec, + pos: Pos, f: Feedback, } @@ -22,6 +23,7 @@ impl<'s> Parser<'s> { peeked: None, modes: vec![], groups: vec![], + pos: Pos::ZERO, f: Feedback::new(), } } @@ -39,12 +41,14 @@ impl<'s> Parser<'s> { /// Eat the next token and add a diagnostic that it was not the expected /// `thing`. pub fn diag_expected(&mut self, thing: &str) { + let before = self.pos(); if let Some(found) = self.eat() { + let after = self.pos(); self.diag(error!( - found.span, + before .. after, "expected {}, found {}", thing, - found.v.name(), + found.name(), )); } else { self.diag_expected_at(thing, self.pos()); @@ -89,25 +93,24 @@ impl<'s> Parser<'s> { /// # Panics /// This panics if the next token does not start the given group. pub fn start_group(&mut self, group: Group) { - let start = self.pos(); match group { Group::Paren => self.eat_assert(Token::LeftParen), Group::Bracket => self.eat_assert(Token::LeftBracket), Group::Brace => self.eat_assert(Token::LeftBrace), Group::Subheader => {} } - self.groups.push((start, group)); + self.groups.push(group); } /// Ends the parsing of a group and returns the span of the whole group. /// /// # Panics /// This panics if no group was started. - pub fn end_group(&mut self) -> Span { + pub fn end_group(&mut self) { // Check that we are indeed at the end of the group. debug_assert_eq!(self.peek(), None, "unfinished group"); - let (start, group) = self.groups.pop().expect("unstarted group"); + let group = self.groups.pop().expect("unstarted group"); let end = match group { Group::Paren => Some(Token::RightParen), Group::Bracket => Some(Token::RightBracket), @@ -119,14 +122,12 @@ impl<'s> Parser<'s> { // This `peek()` can't be used directly because it hides the end of // group token. To circumvent this, we drop down to `self.peeked`. self.peek(); - if self.peeked.map(|s| s.v) == Some(token) { - self.peeked = None; + if self.peeked == Some(token) { + self.bump(); } else { self.diag(error!(self.pos(), "expected {}", token.name())); } } - - Span::new(start, self.pos()) } /// Skip whitespace tokens. @@ -136,34 +137,43 @@ impl<'s> Parser<'s> { }); } + /// Execute `f` and return the result alongside the span of everything `f` + /// ate. + pub fn span(&mut self, f: impl FnOnce(&mut Self) -> T) -> Spanned { + let start = self.pos; + f(self).span_with(start .. self.pos) + } + /// Consume the next token. - pub fn eat(&mut self) -> Option>> { + pub fn eat(&mut self) -> Option> { self.peek()?; - self.peeked.take() + self.bump() } /// Consume the next token if it is the given one. - pub fn eat_if(&mut self, t: Token) -> Option>> { - if self.peek()? == t { self.peeked.take() } else { None } + pub fn eat_if(&mut self, t: Token) -> bool { + if self.peek() == Some(t) { + self.bump(); + true + } else { + false + } } /// Consume the next token if the closure maps it a to `Some`-variant. - pub fn eat_map( - &mut self, - mut f: impl FnMut(Token<'s>) -> Option, - ) -> Option> { + pub fn eat_map(&mut self, f: impl FnOnce(Token<'s>) -> Option) -> Option { let token = self.peek()?; - if let Some(t) = f(token) { - self.peeked.take().map(|spanned| spanned.map(|_| t)) - } else { - None + let out = f(token); + if out.is_some() { + self.bump(); } + out } /// Consume the next token, debug-asserting that it is the given one. pub fn eat_assert(&mut self, t: Token) { let next = self.eat(); - debug_assert_eq!(next.map(|s| s.v), Some(t)); + debug_assert_eq!(next, Some(t)); } /// Consume tokens while the condition is true. @@ -182,7 +192,7 @@ impl<'s> Parser<'s> { if f(t) { break; } - self.peeked = None; + self.bump(); count += 1; } count @@ -191,11 +201,11 @@ impl<'s> Parser<'s> { /// Peek at the next token without consuming it. pub fn peek(&mut self) -> Option> { let token = match self.peeked { - Some(token) => token.v, + Some(token) => token, None => { let token = self.tokens.next()?; self.peeked = Some(token); - token.v + token } }; @@ -207,7 +217,7 @@ impl<'s> Parser<'s> { _ => return Some(token), }; - if self.groups.iter().rev().any(|&(_, g)| g == group) { + if self.groups.contains(&group) { None } else { Some(token) @@ -217,7 +227,7 @@ impl<'s> Parser<'s> { /// Checks whether the next token fulfills a condition. /// /// Returns `false` if there is no next token. - pub fn check(&mut self, f: impl FnMut(Token<'s>) -> bool) -> bool { + pub fn check(&mut self, f: impl FnOnce(Token<'s>) -> bool) -> bool { self.peek().map(f).unwrap_or(false) } @@ -229,30 +239,52 @@ impl<'s> Parser<'s> { /// The position in the string at which the last token ends and next token /// will start. pub fn pos(&self) -> Pos { - self.peeked.map(|s| s.span.start).unwrap_or_else(|| self.tokens.pos()) + self.pos } /// Jump to a position in the source string. pub fn jump(&mut self, pos: Pos) { self.tokens.jump(pos); - self.peeked = None; + self.bump(); } - /// Returns the part of the source string that is spanned by the given span. - pub fn get(&self, span: Span) -> &'s str { - self.scanner().get(span.start.to_usize() .. span.end.to_usize()) + /// Slice a part out of the source string. + pub fn get(&self, span: impl Into) -> &'s str { + self.tokens.scanner().get(span.into().to_range()) + } + + /// The full source string up to the current index. + pub fn eaten(&self) -> &'s str { + self.tokens.scanner().get(.. self.pos.to_usize()) + } + + /// The source string from `start` to the current index. + pub fn eaten_from(&self, start: Pos) -> &'s str { + self.tokens.scanner().get(start.to_usize() .. self.pos.to_usize()) + } + + /// The remaining source string after the current index. + pub fn rest(&self) -> &'s str { + self.tokens.scanner().get(self.pos.to_usize() ..) } /// The underlying scanner. pub fn scanner(&self) -> Scanner<'s> { - self.tokens.scanner() + let mut scanner = self.tokens.scanner().clone(); + scanner.jump(self.pos.to_usize()); + scanner + } + + /// Set the position to the tokenizer's position and take the peeked token. + fn bump(&mut self) -> Option> { + self.pos = self.tokens.pos(); + self.peeked.take() } } impl Debug for Parser<'_> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - let s = self.scanner(); - write!(f, "Parser({}|{})", s.eaten(), s.rest()) + write!(f, "Parser({}|{})", self.eaten(), self.rest()) } } diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index 38c8736f5..b71079791 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -100,7 +100,7 @@ impl<'s> Scanner<'s> { /// Checks whether the next char fulfills a condition. /// /// Returns `false` if there is no next char. - pub fn check(&self, f: impl FnMut(char) -> bool) -> bool { + pub fn check(&self, f: impl FnOnce(char) -> bool) -> bool { self.peek().map(f).unwrap_or(false) } diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 354aae7df..77e7e92ed 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -5,7 +5,7 @@ use std::fmt::{self, Debug, Formatter}; use super::{is_newline, Scanner}; use crate::length::Length; use crate::syntax::token::*; -use crate::syntax::{is_ident, Pos, Span, SpanWith, Spanned}; +use crate::syntax::{is_ident, Pos}; use TokenMode::*; @@ -53,18 +53,18 @@ impl<'s> Tokens<'s> { } /// The underlying scanner. - pub fn scanner(&self) -> Scanner<'s> { - self.s.clone() + pub fn scanner(&self) -> &Scanner<'s> { + &self.s } } impl<'s> Iterator for Tokens<'s> { - type Item = Spanned>; + type Item = Token<'s>; /// Parse the next token in the source code. fn next(&mut self) -> Option { let start = self.s.index(); - let token = match self.s.eat()? { + Some(match self.s.eat()? { // Whitespace with fast path for just a single space. ' ' if !self.s.check(|c| c.is_whitespace()) => Token::Space(0), c if c.is_whitespace() => { @@ -109,10 +109,7 @@ impl<'s> Iterator for Tokens<'s> { // Expressions or just plain text. _ => self.read_text_or_expr(start), - }; - - let end = self.s.index(); - Some(token.span_with(Span::new(start, end))) + }) } } @@ -298,7 +295,7 @@ fn parse_percent(text: &str) -> Option { mod tests { use super::*; use crate::length::Length; - use crate::parse::tests::{check, s}; + use crate::parse::tests::check; use Token::{ BlockComment as BC, Bool, Chain, Hex, Hyphen as Min, Ident as Id, @@ -317,13 +314,11 @@ mod tests { Token::UnicodeEscape(TokenUnicodeEscape { sequence, terminated }) } - macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} } - macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} } - macro_rules! test { - (@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => { - let exp = vec![$(Into::>::into($token)),*]; + macro_rules! t { + ($mode:expr, $src:expr => $($token:expr),*) => { + let exp = vec![$($token),*]; let found = Tokens::new($src, $mode).collect::>(); - check($src, exp, found, $spans); + check($src, exp, found, false); } } @@ -479,13 +474,4 @@ mod tests { t!(Header, "(5 - 1) / 2.1" => LP, Num(5.0), S(0), Min, S(0), Num(1.0), RP, S(0), Slash, S(0), Num(2.1)); } - - #[test] - fn tokenize_with_spans() { - ts!(Body, "hello" => s(0, 5, T("hello"))); - ts!(Body, "ab\r\nc" => s(0, 2, T("ab")), s(2, 4, S(1)), s(4, 5, T("c"))); - ts!(Body, "// ab\r\n\nf" => s(0, 5, LC(" ab")), s(5, 8, S(2)), s(8, 9, T("f"))); - ts!(Body, "/*b*/_" => s(0, 5, BC("b")), s(5, 6, Underscore)); - ts!(Header, "a=10" => s(0, 1, Id("a")), s(1, 2, Equals), s(2, 4, Num(10.0))); - } } diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 179c46de8..09e5f02cd 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -1,6 +1,7 @@ //! Mapping of values to the locations they originate from in source code. use std::fmt::{self, Debug, Display, Formatter}; +use std::ops::Range; #[cfg(test)] use std::cell::Cell; @@ -81,6 +82,14 @@ impl Spanned { } } +impl Spanned> { + /// Swap the spanned and option. + pub fn transpose(self) -> Option> { + let Spanned { v, span } = self; + v.map(|v| v.span_with(span)) + } +} + impl Offset for Spanned { fn offset(self, by: Pos) -> Self { self.map_span(|span| span.offset(by)) @@ -135,6 +144,11 @@ impl Span { *self = self.join(other) } + /// Convert to a `Range` for indexing. + pub fn to_range(self) -> Range { + self.start.to_usize() .. self.end.to_usize() + } + /// When set to `false` comparisons with `PartialEq` ignore spans. #[cfg(test)] pub(crate) fn set_cmp(cmp: bool) { @@ -173,12 +187,12 @@ where } } -impl From<(T, T)> for Span +impl From> for Span where T: Into, { - fn from((start, end): (T, T)) -> Self { - Self::new(start, end) + fn from(range: Range) -> Self { + Self::new(range.start, range.end) } }