Convert spans to line/column format 📑
This commit is contained in:
parent
5dbc7dc5aa
commit
a75ddd2c93
@ -80,7 +80,7 @@ macro_rules! function {
|
||||
|
||||
fn parse(
|
||||
args: FuncArgs,
|
||||
$body: Option<Spanned<&str>>,
|
||||
$body: Option<&str>,
|
||||
$ctx: ParseContext,
|
||||
$metadata: Self::Meta,
|
||||
) -> ParseResult<Self> where Self: Sized {
|
||||
@ -144,7 +144,7 @@ macro_rules! parse {
|
||||
|
||||
(optional: $body:expr, $ctx:expr) => (
|
||||
if let Some(body) = $body {
|
||||
Some($crate::syntax::parse(body.v, $ctx)?)
|
||||
Some($crate::syntax::parse(body, $ctx)?)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@ -152,7 +152,7 @@ macro_rules! parse {
|
||||
|
||||
(expected: $body:expr, $ctx:expr) => (
|
||||
if let Some(body) = $body {
|
||||
$crate::syntax::parse(body.v, $ctx)?
|
||||
$crate::syntax::parse(body, $ctx)?
|
||||
} else {
|
||||
error!("expected body");
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ pub trait ParseFunc {
|
||||
/// Parse the header and body into this function given a context.
|
||||
fn parse(
|
||||
args: FuncArgs,
|
||||
body: Option<Spanned<&str>>,
|
||||
body: Option<&str>,
|
||||
ctx: ParseContext,
|
||||
metadata: Self::Meta,
|
||||
) -> ParseResult<Self> where Self: Sized;
|
||||
@ -125,7 +125,7 @@ pub struct Scope {
|
||||
/// implements [`LayoutFunc`].
|
||||
type Parser = dyn Fn(
|
||||
FuncArgs,
|
||||
Option<Spanned<&str>>,
|
||||
Option<&str>,
|
||||
ParseContext
|
||||
) -> ParseResult<Box<dyn LayoutFunc>>;
|
||||
|
||||
|
@ -117,8 +117,12 @@ impl<'s> Parser<'s> {
|
||||
_ => error!("expected arguments or closing bracket"),
|
||||
};
|
||||
|
||||
let func = self.parse_func_call(name, args)?;
|
||||
span.end = self.tokens.string_index();
|
||||
span.end = self.tokens.get_position();
|
||||
let (func, body_span) = self.parse_func_call(name, args)?;
|
||||
|
||||
if let Some(body_span) = body_span {
|
||||
span.expand(body_span);
|
||||
}
|
||||
|
||||
// Finally this function is parsed to the end.
|
||||
self.add(Node::Func(func), span);
|
||||
@ -139,7 +143,6 @@ impl<'s> Parser<'s> {
|
||||
};
|
||||
|
||||
self.add_color_token(ColorToken::FuncName, name.span);
|
||||
|
||||
self.skip_white();
|
||||
|
||||
Ok(name)
|
||||
@ -231,7 +234,8 @@ impl<'s> Parser<'s> {
|
||||
}
|
||||
|
||||
/// Parse a function call.
|
||||
fn parse_func_call(&mut self, name: Spanned<Ident>, args: FuncArgs) -> ParseResult<FuncCall> {
|
||||
fn parse_func_call(&mut self, name: Spanned<Ident>, args: FuncArgs)
|
||||
-> ParseResult<(FuncCall, Option<Span>)> {
|
||||
// Now we want to parse this function dynamically.
|
||||
let parser = self
|
||||
.ctx
|
||||
@ -242,32 +246,38 @@ impl<'s> Parser<'s> {
|
||||
let has_body = self.tokens.peek().map(Spanned::value) == Some(Token::LeftBracket);
|
||||
|
||||
// Do the parsing dependent on whether the function has a body.
|
||||
Ok(FuncCall(if has_body {
|
||||
Ok(if has_body {
|
||||
self.advance();
|
||||
|
||||
// Find out the string which makes the body of this function.
|
||||
let start = self.tokens.string_index();
|
||||
let end = find_closing_bracket(&self.src[start..])
|
||||
.map(|end| start + end)
|
||||
.ok_or_else(|| error!(@"expected closing bracket"))?;
|
||||
let start_index = self.tokens.string_index();
|
||||
let mut start_pos = self.tokens.get_position();
|
||||
start_pos.column -= 1;
|
||||
|
||||
let span = Span::new(start - 1, end + 1);
|
||||
let (mut end_index, mut end_pos) =
|
||||
find_closing_bracket(&self.src[start_index..])
|
||||
.ok_or_else(|| error!(@"expected closing bracket"))?;
|
||||
|
||||
end_index += start_index;
|
||||
end_pos.column += 1;
|
||||
|
||||
let span = Span::new(start_pos, end_pos);
|
||||
|
||||
// Parse the body.
|
||||
let body_string = &self.src[start..end];
|
||||
let body = parser(args, Some(Spanned::new(body_string, span)), self.ctx)?;
|
||||
let body_string = &self.src[start_index..end_index];
|
||||
let body = parser(args, Some(body_string), self.ctx)?;
|
||||
|
||||
// Skip to the end of the function in the token stream.
|
||||
self.tokens.set_string_index(end);
|
||||
self.tokens.set_string_index(end_index);
|
||||
|
||||
// Now the body should be closed.
|
||||
let token = self.tokens.next().expect("parse_func_body: expected token");
|
||||
assert!(token.v == Token::RightBracket);
|
||||
|
||||
body
|
||||
(FuncCall(body), Some(span))
|
||||
} else {
|
||||
parser(args, None, self.ctx)?
|
||||
}))
|
||||
(FuncCall(parser(args, None, self.ctx)?), None)
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse an expression.
|
||||
@ -399,16 +409,30 @@ impl<'s> Parser<'s> {
|
||||
}
|
||||
|
||||
/// Find the index of the first unbalanced and unescaped closing bracket.
|
||||
fn find_closing_bracket(src: &str) -> Option<usize> {
|
||||
fn find_closing_bracket(src: &str) -> Option<(usize, Position)> {
|
||||
let mut parens = 0;
|
||||
let mut escaped = false;
|
||||
let mut line = 1;
|
||||
let mut line_start_index = 0;
|
||||
|
||||
for (index, c) in src.char_indices() {
|
||||
match c {
|
||||
'\\' => {
|
||||
escaped = !escaped;
|
||||
continue;
|
||||
}
|
||||
']' if !escaped && parens == 0 => return Some(index),
|
||||
c if is_newline_char(c) => {
|
||||
line += 1;
|
||||
line_start_index = index + c.len_utf8();
|
||||
}
|
||||
']' if !escaped && parens == 0 => {
|
||||
let position = Position {
|
||||
line,
|
||||
column: index - line_start_index,
|
||||
};
|
||||
|
||||
return Some((index, position))
|
||||
}
|
||||
'[' if !escaped => parens += 1,
|
||||
']' if !escaped => parens -= 1,
|
||||
_ => {}
|
||||
@ -441,9 +465,16 @@ impl<'s> PeekableTokens<'s> {
|
||||
*self.peeked.get_or_insert_with(|| iter.next())
|
||||
}
|
||||
|
||||
fn string_index(&mut self) -> usize {
|
||||
fn get_position(&self) -> Position {
|
||||
match self.peeked {
|
||||
Some(Some(peeked)) => peeked.span.start,
|
||||
_ => self.tokens.get_position(),
|
||||
}
|
||||
}
|
||||
|
||||
fn string_index(&self) -> usize {
|
||||
match self.peeked {
|
||||
Some(Some(peeked)) => peeked.span.start.line,
|
||||
_ => self.tokens.string_index(),
|
||||
}
|
||||
}
|
||||
@ -577,7 +608,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn zerospan<T>(val: T) -> Spanned<T> {
|
||||
Spanned::new(val, Span::new(0, 0))
|
||||
Spanned::new(val, Span::new(Position::new(0, 0), Position::new(0, 0)))
|
||||
}
|
||||
|
||||
/// Shortcut macro to create a syntax tree. Is `vec`-like and the elements
|
||||
@ -751,36 +782,29 @@ mod tests {
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn parse_spans() {
|
||||
let mut scope = Scope::new();
|
||||
scope.add::<TreeFn>("hello");
|
||||
fn test_span(src: &str, correct: Vec<(usize, usize, usize, usize)>) {
|
||||
let mut scope = Scope::new();
|
||||
scope.add::<TreeFn>("hello");
|
||||
let tree = parse(src, ParseContext { scope: &scope }).unwrap();
|
||||
let spans = tree.nodes.into_iter()
|
||||
.map(|node| {
|
||||
let Span { start, end } = node.span;
|
||||
(start.line, start.column, end.line, end.column)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let parse = |string| {
|
||||
parse(string, ParseContext { scope: &scope }).unwrap().nodes
|
||||
};
|
||||
assert_eq!(spans, correct);
|
||||
}
|
||||
|
||||
let tree = parse("hello world");
|
||||
assert_eq!(tree[0].span.pair(), (0, 5));
|
||||
assert_eq!(tree[2].span.pair(), (6, 11));
|
||||
test_span("hello world", vec![(1, 0, 1, 5), (1, 5, 1, 6), (1, 6, 1, 11)]);
|
||||
test_span("p1\n \np2", vec![(1, 0, 1, 2), (1, 2, 2, 2), (3, 0, 3, 2)]);
|
||||
|
||||
let tree = parse("p1\n \np2");
|
||||
assert_eq!(tree[1].span.pair(), (2, 5));
|
||||
|
||||
let tree = parse("p1\n p2");
|
||||
assert_eq!(tree[1].span.pair(), (2, 4));
|
||||
|
||||
let src = "func [hello: pos, other][body _🌍_]";
|
||||
let tree = parse(src);
|
||||
assert_eq!(tree[0].span.pair(), (0, 4));
|
||||
assert_eq!(tree[1].span.pair(), (4, 5));
|
||||
assert_eq!(tree[2].span.pair(), (5, 37));
|
||||
|
||||
let func = if let Node::Func(f) = &tree[2].v { f } else { panic!() };
|
||||
let body = &func.0.downcast::<TreeFn>().unwrap().tree.nodes;
|
||||
assert_eq!(body[0].span.pair(), (0, 4));
|
||||
assert_eq!(body[1].span.pair(), (4, 5));
|
||||
assert_eq!(body[2].span.pair(), (5, 6));
|
||||
assert_eq!(body[3].span.pair(), (6, 10));
|
||||
assert_eq!(body[4].span.pair(), (10, 11));
|
||||
let src = "func\n [hello: pos, other][body\r\n _🌍_\n]";
|
||||
test_span(src, vec![
|
||||
(1, 0, 1, 4),
|
||||
(1, 4, 2, 1),
|
||||
(2, 1, 4, 1)
|
||||
]);
|
||||
}
|
||||
|
||||
/// Tests whether errors get reported correctly.
|
||||
|
@ -35,28 +35,26 @@ debug_display!(Spanned; T where T: std::fmt::Debug);
|
||||
/// Describes a slice of source code.
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
pub struct Span {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
pub start: Position,
|
||||
pub end: Position,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn new(start: usize, end: usize) -> Span {
|
||||
pub fn new(start: Position, end: Position) -> Span {
|
||||
Span { start, end }
|
||||
}
|
||||
|
||||
pub fn merge(a: Span, b: Span) -> Span {
|
||||
let start = a.start.min(b.start);
|
||||
|
||||
Span {
|
||||
start: a.start.min(b.start),
|
||||
end: a.end.max(b.end),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn at(index: usize) -> Span {
|
||||
Span { start: index, end: index + 1 }
|
||||
}
|
||||
|
||||
pub fn pair(&self) -> (usize, usize) {
|
||||
(self.start, self.end)
|
||||
pub fn at(pos: Position) -> Span {
|
||||
Span { start: pos, end: pos }
|
||||
}
|
||||
|
||||
pub fn expand(&mut self, other: Span) {
|
||||
@ -71,3 +69,26 @@ impl Display for Span {
|
||||
}
|
||||
|
||||
debug_display!(Span);
|
||||
|
||||
/// A line-column position in source code.
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
pub struct Position {
|
||||
/// The 1-indexed line (inclusive).
|
||||
pub line: usize,
|
||||
/// The 0-indexed column (inclusive).
|
||||
pub column: usize,
|
||||
}
|
||||
|
||||
impl Position {
|
||||
pub fn new(line: usize, column: usize) -> Position {
|
||||
Position { line, column }
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Position {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
write!(f, "{}:{}", self.line, self.column)
|
||||
}
|
||||
}
|
||||
|
||||
debug_display!(Position);
|
||||
|
@ -18,6 +18,8 @@ pub struct Tokens<'s> {
|
||||
chars: PeekableChars<'s>,
|
||||
state: TokensState,
|
||||
stack: SmallVec<[TokensState; 1]>,
|
||||
line: usize,
|
||||
line_start_index: usize,
|
||||
}
|
||||
|
||||
/// The state the tokenizer is in.
|
||||
@ -40,11 +42,13 @@ impl<'s> Tokens<'s> {
|
||||
chars: PeekableChars::new(src),
|
||||
state: TokensState::Body,
|
||||
stack: SmallVec::new(),
|
||||
line: 1,
|
||||
line_start_index: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// The index of the first character of the next token in the source string.
|
||||
pub fn string_index(&mut self) -> usize {
|
||||
pub fn string_index(&self) -> usize {
|
||||
self.chars.string_index()
|
||||
}
|
||||
|
||||
@ -53,6 +57,11 @@ impl<'s> Tokens<'s> {
|
||||
self.chars.set_string_index(index);
|
||||
}
|
||||
|
||||
/// The current position in the source.
|
||||
pub fn get_position(&self) -> Position {
|
||||
self.line_position(self.string_index())
|
||||
}
|
||||
|
||||
/// Advance the iterator by one step.
|
||||
fn advance(&mut self) {
|
||||
self.chars.next();
|
||||
@ -68,6 +77,14 @@ impl<'s> Tokens<'s> {
|
||||
fn unswitch(&mut self) {
|
||||
self.state = self.stack.pop().unwrap_or(TokensState::Body);
|
||||
}
|
||||
|
||||
/// The `Position` with line and column for a string index.
|
||||
fn line_position(&self, index: usize) -> Position {
|
||||
Position {
|
||||
line: self.line,
|
||||
column: index - self.line_start_index,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Iterator for Tokens<'s> {
|
||||
@ -83,7 +100,8 @@ impl<'s> Iterator for Tokens<'s> {
|
||||
if let Some((index, '[')) = self.chars.peek() {
|
||||
self.advance();
|
||||
self.state = TS::Body;
|
||||
return Some(Spanned::new(Token::LeftBracket, Span::at(index)));
|
||||
let span = Span::at(self.line_position(index));
|
||||
return Some(Spanned::new(Token::LeftBracket, span));
|
||||
} else {
|
||||
self.unswitch();
|
||||
}
|
||||
@ -93,6 +111,9 @@ impl<'s> Iterator for Tokens<'s> {
|
||||
let (pos, next) = self.chars.next()?;
|
||||
let afterwards = self.chars.peekc();
|
||||
|
||||
/// The index at which the line ended, if it did.
|
||||
let mut eol = None;
|
||||
|
||||
let token = match next {
|
||||
// Functions
|
||||
'[' => {
|
||||
@ -173,9 +194,13 @@ impl<'s> Iterator for Tokens<'s> {
|
||||
// Newlines
|
||||
'\r' if afterwards == Some('\n') => {
|
||||
self.advance();
|
||||
eol = Some(pos + "\r\n".len());
|
||||
Token::Newline
|
||||
},
|
||||
c if is_newline_char(c) => Token::Newline,
|
||||
}
|
||||
c if is_newline_char(c) => {
|
||||
eol = Some(pos + c.len_utf8());
|
||||
Token::Newline
|
||||
}
|
||||
|
||||
// Star/Underscore/Backtick in bodies
|
||||
'*' if self.state == TS::Body => Token::Star,
|
||||
@ -257,12 +282,21 @@ impl<'s> Iterator for Tokens<'s> {
|
||||
}
|
||||
};
|
||||
|
||||
Some(Spanned::new(token, Span::new(pos, self.string_index())))
|
||||
let start = self.line_position(pos);
|
||||
let end = self.get_position();
|
||||
let span = Span::new(start, end);
|
||||
|
||||
if let Some(index) = eol {
|
||||
self.line += 1;
|
||||
self.line_start_index = index;
|
||||
}
|
||||
|
||||
Some(Spanned::new(token, span))
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether this character is a newline (or starts one).
|
||||
fn is_newline_char(character: char) -> bool {
|
||||
pub(crate) fn is_newline_char(character: char) -> bool {
|
||||
match character {
|
||||
'\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
|
||||
_ => false,
|
||||
@ -316,7 +350,7 @@ impl<'s> PeekableChars<'s> {
|
||||
self.chars.next().map(|(i, c)| (self.base + i, c))
|
||||
}
|
||||
|
||||
fn string_index(&mut self) -> usize {
|
||||
fn string_index(&self) -> usize {
|
||||
self.index
|
||||
}
|
||||
|
||||
@ -363,9 +397,12 @@ mod tests {
|
||||
}
|
||||
|
||||
/// Test if the tokens of the source code have the correct spans.
|
||||
fn test_span(src: &str, spans: Vec<(usize, usize)>) {
|
||||
fn test_span(src: &str, spans: Vec<(usize, usize, usize, usize)>) {
|
||||
assert_eq!(Tokens::new(src)
|
||||
.map(|token| token.span.pair())
|
||||
.map(|token| {
|
||||
let Span { start, end } = token.span;
|
||||
(start.line, start.column, end.line, end.column)
|
||||
})
|
||||
.collect::<Vec<_>>(), spans);
|
||||
}
|
||||
|
||||
@ -496,8 +533,12 @@ mod tests {
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn tokenize_spans() {
|
||||
test_span("Hello World", vec![(0, 5), (5, 6), (6, 11)]);
|
||||
test_span("🌍_🎈", vec![(0, 4), (4, 5), (5, 9)]);
|
||||
test_span("[hello: world]", vec![(0, 1), (1, 6), (6, 7), (7, 8), (8, 13), (13, 14)]);
|
||||
test_span("Hello World", vec![(1, 0, 1, 5), (1, 5, 1, 6), (1, 6, 1, 11)]);
|
||||
test_span("🌍_🎈", vec![(1, 0, 1, 4), (1, 4, 1, 5), (1, 5, 1, 9)]);
|
||||
test_span("hello\nworld", vec![(1, 0, 1, 5), (1, 5, 1, 6), (2, 0, 2, 5)]);
|
||||
test_span("[hello: world]", vec![
|
||||
(1, 0, 1, 1), (1, 1, 1, 6), (1, 6, 1, 7),
|
||||
(1, 7, 1, 8), (1, 8, 1, 13), (1, 13, 1, 14)
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user