diff --git a/src/func/macros.rs b/src/func/macros.rs index bbe04b981..3a32ec097 100644 --- a/src/func/macros.rs +++ b/src/func/macros.rs @@ -80,7 +80,7 @@ macro_rules! function { fn parse( args: FuncArgs, - $body: Option>, + $body: Option<&str>, $ctx: ParseContext, $metadata: Self::Meta, ) -> ParseResult where Self: Sized { @@ -144,7 +144,7 @@ macro_rules! parse { (optional: $body:expr, $ctx:expr) => ( if let Some(body) = $body { - Some($crate::syntax::parse(body.v, $ctx)?) + Some($crate::syntax::parse(body, $ctx)?) } else { None } @@ -152,7 +152,7 @@ macro_rules! parse { (expected: $body:expr, $ctx:expr) => ( if let Some(body) = $body { - $crate::syntax::parse(body.v, $ctx)? + $crate::syntax::parse(body, $ctx)? } else { error!("expected body"); } diff --git a/src/func/mod.rs b/src/func/mod.rs index aca612aab..69f28e007 100644 --- a/src/func/mod.rs +++ b/src/func/mod.rs @@ -32,7 +32,7 @@ pub trait ParseFunc { /// Parse the header and body into this function given a context. fn parse( args: FuncArgs, - body: Option>, + body: Option<&str>, ctx: ParseContext, metadata: Self::Meta, ) -> ParseResult where Self: Sized; @@ -125,7 +125,7 @@ pub struct Scope { /// implements [`LayoutFunc`]. type Parser = dyn Fn( FuncArgs, - Option>, + Option<&str>, ParseContext ) -> ParseResult>; diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs index 77d64f1ee..dc39145ac 100644 --- a/src/syntax/parsing.rs +++ b/src/syntax/parsing.rs @@ -117,8 +117,12 @@ impl<'s> Parser<'s> { _ => error!("expected arguments or closing bracket"), }; - let func = self.parse_func_call(name, args)?; - span.end = self.tokens.string_index(); + span.end = self.tokens.get_position(); + let (func, body_span) = self.parse_func_call(name, args)?; + + if let Some(body_span) = body_span { + span.expand(body_span); + } // Finally this function is parsed to the end. self.add(Node::Func(func), span); @@ -139,7 +143,6 @@ impl<'s> Parser<'s> { }; self.add_color_token(ColorToken::FuncName, name.span); - self.skip_white(); Ok(name) @@ -231,7 +234,8 @@ impl<'s> Parser<'s> { } /// Parse a function call. - fn parse_func_call(&mut self, name: Spanned, args: FuncArgs) -> ParseResult { + fn parse_func_call(&mut self, name: Spanned, args: FuncArgs) + -> ParseResult<(FuncCall, Option)> { // Now we want to parse this function dynamically. let parser = self .ctx @@ -242,32 +246,38 @@ impl<'s> Parser<'s> { let has_body = self.tokens.peek().map(Spanned::value) == Some(Token::LeftBracket); // Do the parsing dependent on whether the function has a body. - Ok(FuncCall(if has_body { + Ok(if has_body { self.advance(); // Find out the string which makes the body of this function. - let start = self.tokens.string_index(); - let end = find_closing_bracket(&self.src[start..]) - .map(|end| start + end) - .ok_or_else(|| error!(@"expected closing bracket"))?; + let start_index = self.tokens.string_index(); + let mut start_pos = self.tokens.get_position(); + start_pos.column -= 1; - let span = Span::new(start - 1, end + 1); + let (mut end_index, mut end_pos) = + find_closing_bracket(&self.src[start_index..]) + .ok_or_else(|| error!(@"expected closing bracket"))?; + + end_index += start_index; + end_pos.column += 1; + + let span = Span::new(start_pos, end_pos); // Parse the body. - let body_string = &self.src[start..end]; - let body = parser(args, Some(Spanned::new(body_string, span)), self.ctx)?; + let body_string = &self.src[start_index..end_index]; + let body = parser(args, Some(body_string), self.ctx)?; // Skip to the end of the function in the token stream. - self.tokens.set_string_index(end); + self.tokens.set_string_index(end_index); // Now the body should be closed. let token = self.tokens.next().expect("parse_func_body: expected token"); assert!(token.v == Token::RightBracket); - body + (FuncCall(body), Some(span)) } else { - parser(args, None, self.ctx)? - })) + (FuncCall(parser(args, None, self.ctx)?), None) + }) } /// Parse an expression. @@ -399,16 +409,30 @@ impl<'s> Parser<'s> { } /// Find the index of the first unbalanced and unescaped closing bracket. -fn find_closing_bracket(src: &str) -> Option { +fn find_closing_bracket(src: &str) -> Option<(usize, Position)> { let mut parens = 0; let mut escaped = false; + let mut line = 1; + let mut line_start_index = 0; + for (index, c) in src.char_indices() { match c { '\\' => { escaped = !escaped; continue; } - ']' if !escaped && parens == 0 => return Some(index), + c if is_newline_char(c) => { + line += 1; + line_start_index = index + c.len_utf8(); + } + ']' if !escaped && parens == 0 => { + let position = Position { + line, + column: index - line_start_index, + }; + + return Some((index, position)) + } '[' if !escaped => parens += 1, ']' if !escaped => parens -= 1, _ => {} @@ -441,9 +465,16 @@ impl<'s> PeekableTokens<'s> { *self.peeked.get_or_insert_with(|| iter.next()) } - fn string_index(&mut self) -> usize { + fn get_position(&self) -> Position { match self.peeked { Some(Some(peeked)) => peeked.span.start, + _ => self.tokens.get_position(), + } + } + + fn string_index(&self) -> usize { + match self.peeked { + Some(Some(peeked)) => peeked.span.start.line, _ => self.tokens.string_index(), } } @@ -577,7 +608,7 @@ mod tests { } fn zerospan(val: T) -> Spanned { - Spanned::new(val, Span::new(0, 0)) + Spanned::new(val, Span::new(Position::new(0, 0), Position::new(0, 0))) } /// Shortcut macro to create a syntax tree. Is `vec`-like and the elements @@ -751,36 +782,29 @@ mod tests { #[test] #[rustfmt::skip] fn parse_spans() { - let mut scope = Scope::new(); - scope.add::("hello"); + fn test_span(src: &str, correct: Vec<(usize, usize, usize, usize)>) { + let mut scope = Scope::new(); + scope.add::("hello"); + let tree = parse(src, ParseContext { scope: &scope }).unwrap(); + let spans = tree.nodes.into_iter() + .map(|node| { + let Span { start, end } = node.span; + (start.line, start.column, end.line, end.column) + }) + .collect::>(); - let parse = |string| { - parse(string, ParseContext { scope: &scope }).unwrap().nodes - }; + assert_eq!(spans, correct); + } - let tree = parse("hello world"); - assert_eq!(tree[0].span.pair(), (0, 5)); - assert_eq!(tree[2].span.pair(), (6, 11)); + test_span("hello world", vec![(1, 0, 1, 5), (1, 5, 1, 6), (1, 6, 1, 11)]); + test_span("p1\n \np2", vec![(1, 0, 1, 2), (1, 2, 2, 2), (3, 0, 3, 2)]); - let tree = parse("p1\n \np2"); - assert_eq!(tree[1].span.pair(), (2, 5)); - - let tree = parse("p1\n p2"); - assert_eq!(tree[1].span.pair(), (2, 4)); - - let src = "func [hello: pos, other][body _🌍_]"; - let tree = parse(src); - assert_eq!(tree[0].span.pair(), (0, 4)); - assert_eq!(tree[1].span.pair(), (4, 5)); - assert_eq!(tree[2].span.pair(), (5, 37)); - - let func = if let Node::Func(f) = &tree[2].v { f } else { panic!() }; - let body = &func.0.downcast::().unwrap().tree.nodes; - assert_eq!(body[0].span.pair(), (0, 4)); - assert_eq!(body[1].span.pair(), (4, 5)); - assert_eq!(body[2].span.pair(), (5, 6)); - assert_eq!(body[3].span.pair(), (6, 10)); - assert_eq!(body[4].span.pair(), (10, 11)); + let src = "func\n [hello: pos, other][body\r\n _🌍_\n]"; + test_span(src, vec![ + (1, 0, 1, 4), + (1, 4, 2, 1), + (2, 1, 4, 1) + ]); } /// Tests whether errors get reported correctly. diff --git a/src/syntax/span.rs b/src/syntax/span.rs index c12ac5131..bc7001a96 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -35,28 +35,26 @@ debug_display!(Spanned; T where T: std::fmt::Debug); /// Describes a slice of source code. #[derive(Copy, Clone, Eq, PartialEq)] pub struct Span { - pub start: usize, - pub end: usize, + pub start: Position, + pub end: Position, } impl Span { - pub fn new(start: usize, end: usize) -> Span { + pub fn new(start: Position, end: Position) -> Span { Span { start, end } } pub fn merge(a: Span, b: Span) -> Span { + let start = a.start.min(b.start); + Span { start: a.start.min(b.start), end: a.end.max(b.end), } } - pub fn at(index: usize) -> Span { - Span { start: index, end: index + 1 } - } - - pub fn pair(&self) -> (usize, usize) { - (self.start, self.end) + pub fn at(pos: Position) -> Span { + Span { start: pos, end: pos } } pub fn expand(&mut self, other: Span) { @@ -71,3 +69,26 @@ impl Display for Span { } debug_display!(Span); + +/// A line-column position in source code. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Position { + /// The 1-indexed line (inclusive). + pub line: usize, + /// The 0-indexed column (inclusive). + pub column: usize, +} + +impl Position { + pub fn new(line: usize, column: usize) -> Position { + Position { line, column } + } +} + +impl Display for Position { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{}:{}", self.line, self.column) + } +} + +debug_display!(Position); diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index f5854d8f6..cf37fe483 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -18,6 +18,8 @@ pub struct Tokens<'s> { chars: PeekableChars<'s>, state: TokensState, stack: SmallVec<[TokensState; 1]>, + line: usize, + line_start_index: usize, } /// The state the tokenizer is in. @@ -40,11 +42,13 @@ impl<'s> Tokens<'s> { chars: PeekableChars::new(src), state: TokensState::Body, stack: SmallVec::new(), + line: 1, + line_start_index: 0, } } /// The index of the first character of the next token in the source string. - pub fn string_index(&mut self) -> usize { + pub fn string_index(&self) -> usize { self.chars.string_index() } @@ -53,6 +57,11 @@ impl<'s> Tokens<'s> { self.chars.set_string_index(index); } + /// The current position in the source. + pub fn get_position(&self) -> Position { + self.line_position(self.string_index()) + } + /// Advance the iterator by one step. fn advance(&mut self) { self.chars.next(); @@ -68,6 +77,14 @@ impl<'s> Tokens<'s> { fn unswitch(&mut self) { self.state = self.stack.pop().unwrap_or(TokensState::Body); } + + /// The `Position` with line and column for a string index. + fn line_position(&self, index: usize) -> Position { + Position { + line: self.line, + column: index - self.line_start_index, + } + } } impl<'s> Iterator for Tokens<'s> { @@ -83,7 +100,8 @@ impl<'s> Iterator for Tokens<'s> { if let Some((index, '[')) = self.chars.peek() { self.advance(); self.state = TS::Body; - return Some(Spanned::new(Token::LeftBracket, Span::at(index))); + let span = Span::at(self.line_position(index)); + return Some(Spanned::new(Token::LeftBracket, span)); } else { self.unswitch(); } @@ -93,6 +111,9 @@ impl<'s> Iterator for Tokens<'s> { let (pos, next) = self.chars.next()?; let afterwards = self.chars.peekc(); + /// The index at which the line ended, if it did. + let mut eol = None; + let token = match next { // Functions '[' => { @@ -173,9 +194,13 @@ impl<'s> Iterator for Tokens<'s> { // Newlines '\r' if afterwards == Some('\n') => { self.advance(); + eol = Some(pos + "\r\n".len()); Token::Newline - }, - c if is_newline_char(c) => Token::Newline, + } + c if is_newline_char(c) => { + eol = Some(pos + c.len_utf8()); + Token::Newline + } // Star/Underscore/Backtick in bodies '*' if self.state == TS::Body => Token::Star, @@ -257,12 +282,21 @@ impl<'s> Iterator for Tokens<'s> { } }; - Some(Spanned::new(token, Span::new(pos, self.string_index()))) + let start = self.line_position(pos); + let end = self.get_position(); + let span = Span::new(start, end); + + if let Some(index) = eol { + self.line += 1; + self.line_start_index = index; + } + + Some(Spanned::new(token, span)) } } /// Whether this character is a newline (or starts one). -fn is_newline_char(character: char) -> bool { +pub(crate) fn is_newline_char(character: char) -> bool { match character { '\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true, _ => false, @@ -316,7 +350,7 @@ impl<'s> PeekableChars<'s> { self.chars.next().map(|(i, c)| (self.base + i, c)) } - fn string_index(&mut self) -> usize { + fn string_index(&self) -> usize { self.index } @@ -363,9 +397,12 @@ mod tests { } /// Test if the tokens of the source code have the correct spans. - fn test_span(src: &str, spans: Vec<(usize, usize)>) { + fn test_span(src: &str, spans: Vec<(usize, usize, usize, usize)>) { assert_eq!(Tokens::new(src) - .map(|token| token.span.pair()) + .map(|token| { + let Span { start, end } = token.span; + (start.line, start.column, end.line, end.column) + }) .collect::>(), spans); } @@ -496,8 +533,12 @@ mod tests { #[test] #[rustfmt::skip] fn tokenize_spans() { - test_span("Hello World", vec![(0, 5), (5, 6), (6, 11)]); - test_span("🌍_🎈", vec![(0, 4), (4, 5), (5, 9)]); - test_span("[hello: world]", vec![(0, 1), (1, 6), (6, 7), (7, 8), (8, 13), (13, 14)]); + test_span("Hello World", vec![(1, 0, 1, 5), (1, 5, 1, 6), (1, 6, 1, 11)]); + test_span("🌍_🎈", vec![(1, 0, 1, 4), (1, 4, 1, 5), (1, 5, 1, 9)]); + test_span("hello\nworld", vec![(1, 0, 1, 5), (1, 5, 1, 6), (2, 0, 2, 5)]); + test_span("[hello: world]", vec![ + (1, 0, 1, 1), (1, 1, 1, 6), (1, 6, 1, 7), + (1, 7, 1, 8), (1, 8, 1, 13), (1, 13, 1, 14) + ]); } }