Span tests ↔

This commit is contained in:
Laurenz 2020-01-11 11:47:07 +01:00
parent b1e956419d
commit bd702c2029
4 changed files with 106 additions and 70 deletions

View File

@ -71,7 +71,7 @@ debug_display!(Span);
/// A line-column position in source code.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Position {
/// The 1-indexed line (inclusive).
/// The 0-indexed line (inclusive).
pub line: usize,
/// The 0-indexed column (inclusive).
pub column: usize,

View File

@ -102,7 +102,7 @@ impl<'s> Iterator for Tokens<'s> {
'*' if second == Some('/') => { self.eat(); StarSlash }
// Whitespace.
c if c.is_whitespace() => self.parse_whitespace(c),
c if c.is_whitespace() => self.parse_whitespace(start),
// Functions.
'[' => { self.set_state(Header); LeftBracket }
@ -196,20 +196,11 @@ impl<'s> Tokens<'s> {
}, true, 0, -2))
}
fn parse_whitespace(&mut self, c: char) -> Token<'s> {
let mut newlines = if is_newline_char(c) { 1 } else { 0 };
let mut last = c;
fn parse_whitespace(&mut self, start: Position) -> Token<'s> {
self.read_string_until(|n| !n.is_whitespace(), false, 0, 0);
let end = self.chars.position();
self.read_string_until(|n| {
if is_newline_char(n) && !(last == '\r' && n == '\n') {
newlines += 1;
}
last = n;
!n.is_whitespace()
}, false, 0, 0);
Whitespace(newlines)
Whitespace(end.line - start.line)
}
fn parse_string(&mut self) -> Token<'s> {

View File

@ -23,12 +23,31 @@ fn BOOL(b: bool) -> Token<'static> { E(Expr::Bool(b)) }
/// Parses the test syntax.
macro_rules! tokens {
($($src:expr =>($line:expr)=> $tokens:expr)*) => ({
($($task:ident $src:expr =>($line:expr)=> [$($target:tt)*])*) => ({
#[allow(unused_mut)]
let mut cases = Vec::new();
$(cases.push(($line, $src, $tokens.to_vec()));)*
$(cases.push(($line, $src, tokens!(@$task [$($target)*])));)*
cases
});
(@t $tokens:expr) => ({
Target::Tokenized($tokens.to_vec())
});
(@ts [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $t:expr)),* $(,)?]) => ({
Target::TokenizedSpanned(vec![
$(Spanned { v: $t, span: Span {
start: Position { line: $sl, column: $sc },
end: Position { line: $el, column: $ec },
}}),*
])
});
}
#[derive(Debug)]
enum Target {
Tokenized(Vec<Token<'static>>),
TokenizedSpanned(Vec<Spanned<Token<'static>>>),
}
fn main() {
@ -47,11 +66,11 @@ fn main() {
let mut failed = 0;
// Go through all tests in a test file.
for (line, src, expected) in cases.into_iter() {
let found: Vec<_> = tokenize(src).map(Spanned::value).collect();
for (line, src, target) in cases.into_iter() {
let (correct, expected, found) = test_case(src, target);
// Check whether the tokenization works correctly.
if found == expected {
if correct {
okay += 1;
} else {
if failed == 0 {
@ -82,3 +101,17 @@ fn main() {
std::process::exit(-1);
}
}
fn test_case(src: &str, target: Target) -> (bool, String, String) {
match target {
Target::Tokenized(tokens) => {
let found: Vec<_> = tokenize(src).map(Spanned::value).collect();
(found == tokens, format!("{:?}", tokens), format!("{:?}", found))
}
Target::TokenizedSpanned(tokens) => {
let found: Vec<_> = tokenize(src).collect();
(found == tokens, format!("{:?}", tokens), format!("{:?}", found))
}
}
}

View File

@ -1,62 +1,74 @@
// Whitespace.
"" => []
" " => [W(0)]
" " => [W(0)]
"\t" => [W(0)]
" \t" => [W(0)]
"\n" => [W(1)]
"\n " => [W(1)]
" \n" => [W(1)]
" \n " => [W(1)]
" \n\t \n " => [W(2)]
"\r\n" => [W(1)]
" \r\r\n \x0D" => [W(3)]
"\n\r" => [W(2)]
t "" => []
t " " => [W(0)]
t " " => [W(0)]
t "\t" => [W(0)]
t " \t" => [W(0)]
t "\n" => [W(1)]
t "\n " => [W(1)]
t " \n" => [W(1)]
t " \n " => [W(1)]
t " \n\t \n " => [W(2)]
t "\r\n" => [W(1)]
t " \r\r\n \x0D" => [W(3)]
t "\n\r" => [W(2)]
// Comments.
"a // bc\n " => [T("a"), W(0), LC(" bc"), W(1)]
"a //a//b\n " => [T("a"), W(0), LC("a//b"), W(1)]
"a //a//b\r\n" => [T("a"), W(0), LC("a//b"), W(1)]
"a //a//b\n\nhello" => [T("a"), W(0), LC("a//b"), W(2), T("hello")]
"/**/" => [BC("")]
"_/*_/*a*/*/" => [U, BC("_/*a*/")]
"/*/*/" => [BC("/*/")]
"abc*/" => [T("abc"), SS]
t "a // bc\n " => [T("a"), W(0), LC(" bc"), W(1)]
t "a //a//b\n " => [T("a"), W(0), LC("a//b"), W(1)]
t "a //a//b\r\n" => [T("a"), W(0), LC("a//b"), W(1)]
t "a //a//b\n\nhello" => [T("a"), W(0), LC("a//b"), W(2), T("hello")]
t "/**/" => [BC("")]
t "_/*_/*a*/*/" => [U, BC("_/*a*/")]
t "/*/*/" => [BC("/*/")]
t "abc*/" => [T("abc"), SS]
// Header only tokens.
"[" => [LB]
"]" => [RB]
"[(){}:=,]" => [LB, LP, RP, LBR, RBR, CL, EQ, CM, RB]
"[a:b]" => [LB, ID("a"), CL, ID("b"), RB]
"[🌓, 🌍,]" => [LB, T("🌓"), CM, W(0), T("🌍"), CM, RB]
"[=]" => [LB, EQ, RB]
"[,]" => [LB, CM, RB]
"a: b" => [T("a"), T(":"), W(0), T("b")]
"c=d, " => [T("c"), T("=d"), T(","), W(0)]
r#"["hello\"world"]"# => [LB, STR(r#"hello\"world"#), RB]
r#"["hi", 12pt]"# => [LB, STR("hi"), CM, W(0), SIZE(Size::pt(12.0)), RB]
"\"hi\"" => [T("\"hi"), T("\"")]
"[a: true, x=1]" => [LB, ID("a"), CL, W(0), BOOL(true), CM, W(0),
t "[" => [LB]
t "]" => [RB]
t "[(){}:=,]" => [LB, LP, RP, LBR, RBR, CL, EQ, CM, RB]
t "[a:b]" => [LB, ID("a"), CL, ID("b"), RB]
t "[🌓, 🌍,]" => [LB, T("🌓"), CM, W(0), T("🌍"), CM, RB]
t "[=]" => [LB, EQ, RB]
t "[,]" => [LB, CM, RB]
t "a: b" => [T("a"), T(":"), W(0), T("b")]
t "c=d, " => [T("c"), T("=d"), T(","), W(0)]
t r#"["hello\"world"]"# => [LB, STR(r#"hello\"world"#), RB]
t r#"["hi", 12pt]"# => [LB, STR("hi"), CM, W(0), SIZE(Size::pt(12.0)), RB]
t "\"hi\"" => [T("\"hi"), T("\"")]
t "[a: true, x=1]" => [LB, ID("a"), CL, W(0), BOOL(true), CM, W(0),
ID("x"), EQ, NUM(1.0), RB]
"[120%]" => [LB, NUM(1.2), RB]
t "[120%]" => [LB, NUM(1.2), RB]
// Body only tokens.
"_*`" => [U, ST, B]
"[_*`]" => [LB, T("_"), T("*"), T("`"), RB]
"hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")]
t "_*`" => [U, ST, B]
t "[func]*bold*" => [LB, ID("func"), RB, ST, T("bold"), ST]
t "[_*`]" => [LB, T("_"), T("*"), T("`"), RB]
t "hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")]
// Escapes.
r"\[" => [T("[")]
r"\]" => [T("]")]
r"\\" => [T(r"\")]
r"\/" => [T("/")]
r"\*" => [T("*")]
r"\_" => [T("_")]
r"\`" => [T("`")]
t r"\[" => [T("[")]
t r"\]" => [T("]")]
t r"\\" => [T(r"\")]
t r"\/" => [T("/")]
t r"\*" => [T("*")]
t r"\_" => [T("_")]
t r"\`" => [T("`")]
// Unescapable special symbols.
r"\:" => [T(r"\"), T(":")]
r"\=" => [T(r"\"), T("=")]
r"[\:]" => [LB, T(r"\"), CL, RB]
r"[\=]" => [LB, T(r"\"), EQ, RB]
r"[\,]" => [LB, T(r"\"), CM, RB]
t r"\:" => [T(r"\"), T(":")]
t r"\=" => [T(r"\"), T("=")]
t r"[\:]" => [LB, T(r"\"), CL, RB]
t r"[\=]" => [LB, T(r"\"), EQ, RB]
t r"[\,]" => [LB, T(r"\"), CM, RB]
// Spans
ts "hello" => [(0:0, 0:5, T("hello"))]
ts "ab\r\nc" => [(0:0, 0:2, T("ab")), (0:2, 1:0, W(1)), (1:0, 1:1, T("c"))]
ts "[a=10]" => [(0:0, 0:1, LB), (0:1, 0:2, ID("a")), (0:2, 0:3, EQ),
(0:3, 0:5, NUM(10.0)), (0:5, 0:6, RB)]
ts r#"[x = "(1)"]*"# => [(0:0, 0:1, LB), (0:1, 0:2, ID("x")), (0:2, 0:3, W(0)),
(0:3, 0:4, EQ), (0:4, 0:5, W(0)), (0:5, 0:10, STR("(1)")),
(0:10, 0:11, RB), (0:11, 0:12, ST)]
ts "// ab\r\n\nf" => [(0:0, 0:5, LC(" ab")), (0:5, 2:0, W(2)), (2:0, 2:1, T("f"))]
ts "/*b*/_" => [(0:0, 0:5, BC("b")), (0:5, 0:6, U)]