Improve syntax testing framework ♻

... and finally expand a few escape sequences in strings.
2020-02-07 22:29:16 +01:00 · 2020-02-07 22:29:16 +01:00 · 4e8359385f
commit 4e8359385f
parent 02dc29d18a
4 changed files with 237 additions and 158 deletions
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@ -9,6 +9,10 @@ use crate::{Pass, Feedback};
 use crate::layout::{LayoutContext, Commands, Command};
 use self::span::{Spanned, SpanVec};

+#[cfg(test)]
+#[macro_use]
+mod test;
+
 pub mod expr;
 pub mod func;
 pub mod span;
@ -16,9 +20,6 @@ pub_use_mod!(scope);
 pub_use_mod!(parsing);
 pub_use_mod!(tokens);

-#[cfg(test)]
-mod test;
-

 /// Represents a parsed piece of source that can be layouted and in the future
 /// also be queried for information used for refactorings, autocomplete, etc.
--- a/src/syntax/parsing.rs
+++ b/src/syntax/parsing.rs
@ -246,7 +246,7 @@ impl<'s> FuncParser<'s> {
                }

                self.eat();
-                spanned(Expr::Str(string.to_string()))
+                spanned(Expr::Str(unescape(string)))
            }
            Token::ExprNumber(n) => { self.eat(); spanned(Expr::Number(n)) }
            Token::ExprSize(s) => { self.eat(); spanned(Expr::Size(s)) }
@ -363,130 +363,108 @@ impl<'s> FuncParser<'s> {
    }
 }

+/// Unescape a string.
+fn unescape(string: &str) -> String {
+    let mut s = String::with_capacity(string.len());
+    let mut escaped = false;
+
+    for c in string.chars() {
+        if c == '\\' {
+            if escaped {
+                s.push('\\');
+            }
+            escaped = !escaped;
+        } else {
+            if escaped {
+                match c {
+                    '"' => s.push('"'),
+                    'n' => s.push('\n'),
+                    't' => s.push('\t'),
+                    c => { s.push('\\'); s.push(c); }
+                }
+            } else {
+                s.push(c);
+            }
+
+            escaped = false;
+        }
+    }
+
+    s
+}
+

 #[cfg(test)]
 #[allow(non_snake_case)]
 mod tests {
    use crate::size::Size;
-    use super::super::test::{DebugFn, SpanlessEq};
+    use super::super::test::{DebugFn, check, zspan};
    use super::*;

+    use Decoration::*;
    use Node::{
        Space as S, Newline as N,
        ToggleItalic as Italic, ToggleBolder as Bold, ToggleMonospace as Mono,
    };
-    use Decoration::*;
-
-    pub use Expr::{Number as Num, Bool};
-    pub fn Id(text: &str) -> Expr { Expr::Ident(Ident(text.to_string())) }
-    pub fn Str(text: &str) -> Expr { Expr::Str(text.to_string()) }

+    use Expr::{/*Number as Num,*/ Bool};
+    fn Id(text: &str) -> Expr { Expr::Ident(Ident(text.to_string())) }
+    fn Str(text: &str) -> Expr { Expr::Str(text.to_string()) }
    fn T(text: &str) -> Node { Node::Text(text.to_string()) }

+    /// Test whether the given string parses into the given transform pass.
+    macro_rules! test {
+        ($source:expr => [$($model:tt)*], $transform:expr) => {
+            let (exp, cmp) = spanned![vec $($model)*];
+
+            let mut scope = Scope::new::<DebugFn>();
+            scope.add::<DebugFn>("f");
+            scope.add::<DebugFn>("n");
+            scope.add::<DebugFn>("box");
+            let ctx = ParseContext { scope: &scope };
+
+            let found = parse(Position::ZERO, $source, ctx);
+            let (exp, found) = $transform(exp, found);
+
+            check($source, exp, found, cmp);
+        };
+    }
+
    /// Test whether the given string parses into the given node list.
    macro_rules! p {
-        ($s:expr => [$($b:tt)*]) => {
-            let ctx = ParseContext { scope: &scope() };
-            let model = parse(Position::ZERO, $s, ctx).output;
-            let (expected, cmp) = model!([$($b)*]);
-
-            if !cmp(&model, &expected) {
-                fail($s, model, expected);
-            }
+        ($($tts:tt)*) => {
+            test!($($tts)*, |exp, found: Pass<SyntaxModel>| (exp, found.output.nodes));
        };
    }

    /// Test whether the given string yields the given parse errors.
    macro_rules! e {
-        ($s:expr => [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $e:expr)),* $(,)?]) => {
-            let ctx = ParseContext { scope: &scope() };
-            let errors = parse(Position::ZERO, $s, ctx).feedback
-                .errors
-                .into_iter()
-                .map(|s| s.map(|e| e.message))
-                .collect::<Vec<_>>();
-
-            let expected = vec![
-                $(Spanned {
-                    v: $e.to_string(),
-                    span: Span {
-                        start: Position { line: $sl, column: $sc },
-                        end:   Position { line: $el, column: $ec },
-                    },
-                }),*
-            ];
-
-            if errors != expected {
-                fail($s, errors, expected);
-            }
+        ($($tts:tt)*) => {
+            test!($($tts)*, |exp: Vec<Spanned<&str>>, found: Pass<SyntaxModel>| (
+                exp.into_iter().map(|s| s.map(|e| e.to_string())).collect::<Vec<_>>(),
+                found.feedback.errors.into_iter().map(|s| s.map(|e| e.message))
+                    .collect::<Vec<_>>()
+            ));
        };
    }

    /// Test whether the given string yields the given decorations.
    macro_rules! d {
-        ($s:expr => [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $d:expr)),* $(,)?]) => {
-            let ctx = ParseContext { scope: &scope() };
-            let decos = parse(Position::ZERO, $s, ctx).feedback.decos;
-
-            let expected = vec![
-                $(Spanned {
-                    v: $d,
-                    span: Span {
-                        start: Position { line: $sl, column: $sc },
-                        end:   Position { line: $el, column: $ec },
-                    },
-                }),*
-            ];
-
-            if decos != expected {
-                fail($s, decos, expected);
-            }
+        ($($tts:tt)*) => {
+            test!($($tts)*, |exp, found: Pass<SyntaxModel>| (exp, found.feedback.decos));
        };
    }

-    fn scope() -> Scope {
-        let mut scope = Scope::new::<DebugFn>();
-        scope.add::<DebugFn>("f");
-        scope.add::<DebugFn>("n");
-        scope.add::<DebugFn>("box");
-        scope
-    }
-
-    fn fail(src: &str, found: impl Debug, expected: impl Debug) {
-        eprintln!("source:   {:?}", src);
-        eprintln!("found:    {:#?}", found);
-        eprintln!("expected: {:#?}", expected);
-        panic!("test failed");
-    }
-
-    /// Parse a list of optionally spanned nodes into a syntax model.
-    macro_rules! model {
-        ([$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $n:expr)),* $(,)?]) => ((SyntaxModel {
-            nodes: vec![
-                $(Spanned { v: $n, span: Span {
-                    start: Position { line: $sl, column: $sc },
-                    end:   Position { line: $el, column: $ec },
-                }}),*
-            ]
-        }, <SyntaxModel as PartialEq>::eq));
-
-        ([$($e:tt)*]) => ((SyntaxModel {
-            nodes: vec![$($e)*].into_iter().map(zspan).collect::<Vec<_>>()
-        }, <SyntaxModel as SpanlessEq>::spanless_eq));
-    }
-
-    /// Build a `DebugFn` function model.
+    /// Write down a `DebugFn` function model compactly.
    macro_rules! func {
        ($name:expr
         $(,pos: [$($item:expr),* $(,)?])?
         $(,key: [$($key:expr => $value:expr),* $(,)?])?;
         $($b:tt)*) => ({
-            #![allow(unused_mut, unused_assignments)]
-
-            let mut pos = Tuple::new();
-            let mut key = Object::new();
-            $(pos = Tuple { items: vec![$(zspan($item)),*] };)?
-            $(key = Object {
+            #[allow(unused_mut)]
+            let mut args = FuncArgs::new();
+            $(args.pos = Tuple { items: spanned![vec $($item),*].0 };)?
+            $(args.key = Object {
                pairs: vec![$(Pair {
                    key: zspan(Ident($key.to_string())),
                    value: zspan($value),
@ -496,22 +474,32 @@ mod tests {
            Node::Model(Box::new(DebugFn {
                header: FuncHeader {
                    name: zspan(Ident($name.to_string())),
-                    args: FuncArgs {
-                        pos,
-                        key,
-                    },
+                    args,
                },
                body: func!(@body $($b)*),
            }))
        });

-        (@body Some([$($b:tt)*])) => (Some(model!([$($b)*]).0));
+        (@body Some([$($body:tt)*])) => ({
+            Some(SyntaxModel { nodes: spanned![vec $($body)*].0 })
+        });
+
        (@body None) => (None);
    }

-    /// Span an element with a zero span.
-    fn zspan<T>(v: T) -> Spanned<T> {
-        Spanned { v, span: Span::ZERO }
+    #[test]
+    fn unescape_strings() {
+        fn test(string: &str, expected: &str) {
+            assert_eq!(unescape(string), expected.to_string());
+        }
+
+        test(r#"hello world"#,  "hello world");
+        test(r#"hello\nworld"#, "hello\nworld");
+        test(r#"a\"bc"#,        "a\"bc");
+        test(r#"a\\"#,          "a\\");
+        test(r#"a\\\nbc"#,      "a\\\nbc");
+        test(r#"a\tbc"#,        "a\tbc");
+        test("🌎",              "🌎");
    }

    #[test]
--- a/src/syntax/test.rs
+++ b/src/syntax/test.rs
@ -1,8 +1,62 @@
+use std::fmt::Debug;
+
 use super::func::FuncHeader;
 use super::expr::{Expr, Tuple, Object};
+use super::span::{Span, Spanned};
+use super::tokens::Token;
 use super::*;


+/// Check whether the expected and found results for the given source code
+/// match by the comparison function, and print them out otherwise.
+pub fn check<T>(src: &str, exp: T, found: T, spans: bool)
+where T: Debug + PartialEq + SpanlessEq {
+    let cmp = if spans { PartialEq::eq } else { SpanlessEq::spanless_eq };
+    if !cmp(&exp, &found) {
+        println!("source:   {:?}", src);
+        println!("expected: {:#?}", exp);
+        println!("found:    {:#?}", found);
+        panic!("test failed");
+    }
+}
+
+/// Create a vector of optionally spanned expressions from a list description.
+///
+/// # Examples
+/// When you want to add span information to the items, the format is as
+/// follows.
+/// ```
+/// spanned![(0:0, 0:5, "hello"), (0:5, 0:3, "world")]
+/// ```
+/// The span information can simply be omitted to create a vector with items
+/// that are spanned with dummy zero spans.
+macro_rules! spanned {
+    (item ($sl:tt:$sc:tt, $el:tt:$ec:tt, $v:expr)) => ({
+        #[allow(unused_imports)]
+        use $crate::syntax::span::{Position, Span, Spanned};
+        Spanned {
+            span: Span::new(
+                Position::new($sl, $sc),
+                Position::new($el, $ec)
+            ),
+            v: $v
+        }
+    });
+
+    (vec $(($sl:tt:$sc:tt, $el:tt:$ec:tt, $v:expr)),* $(,)?) => {
+        (vec![$(spanned![item ($sl:$sc, $el:$ec, $v)]),*], true)
+    };
+
+    (vec $($v:expr),* $(,)?) => {
+        (vec![$($crate::syntax::test::zspan($v)),*], false)
+    };
+}
+
+/// Span an element with a zero span.
+pub fn zspan<T>(v: T) -> Spanned<T> {
+    Spanned { v, span: Span::ZERO }
+}
+
 function! {
    /// Most functions in the tests are parsed into the debug function for easy
    /// inspection of arguments and body.
@ -30,26 +84,31 @@ pub trait SpanlessEq<Rhs=Self> {
    fn spanless_eq(&self, other: &Rhs) -> bool;
 }

-impl SpanlessEq for Vec<Spanned<Token<'_>>> {
-    fn spanless_eq(&self, other: &Vec<Spanned<Token>>) -> bool {
+impl<T: SpanlessEq> SpanlessEq for Vec<Spanned<T>> {
+    fn spanless_eq(&self, other: &Vec<Spanned<T>>) -> bool {
        self.len() == other.len()
-        && self.iter().zip(other).all(|(x, y)| x.v == y.v)
+        && self.iter().zip(other).all(|(x, y)| x.v.spanless_eq(&y.v))
    }
 }

 impl SpanlessEq for SyntaxModel {
    fn spanless_eq(&self, other: &SyntaxModel) -> bool {
+        self.nodes.spanless_eq(&other.nodes)
+    }
+}
+
+impl SpanlessEq for Node {
+    fn spanless_eq(&self, other: &Node) -> bool {
        fn downcast<'a>(func: &'a (dyn Model + 'static)) -> &'a DebugFn {
            func.downcast::<DebugFn>().expect("not a debug fn")
        }

-        self.nodes.len() == other.nodes.len()
-        && self.nodes.iter().zip(&other.nodes).all(|(x, y)| match (&x.v, &y.v) {
+        match (self, other) {
            (Node::Model(a), Node::Model(b)) => {
                downcast(a.as_ref()).spanless_eq(downcast(b.as_ref()))
            }
            (a, b) => a == b,
-        })
+        }
    }
 }

@ -86,3 +145,18 @@ impl SpanlessEq for Object {
            .all(|(x, y)| x.key.v == y.key.v && x.value.v.spanless_eq(&y.value.v))
    }
 }
+
+/// Implement `SpanlessEq` by just forwarding to `PartialEq`.
+macro_rules! forward {
+    ($type:ty) => {
+        impl SpanlessEq for $type {
+            fn spanless_eq(&self, other: &$type) -> bool {
+                self == other
+            }
+        }
+    };
+}
+
+forward!(String);
+forward!(Token<'_>);
+forward!(Decoration);
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@ -65,6 +65,10 @@ pub enum Token<'s> {
    /// A quoted string in a function header: `"..."`.
    ExprStr {
        /// The string inside the quotes.
+        ///
+        /// _Note_: If the string contains escape sequences these are not yet
+        /// applied to be able to just store a string slice here instead of
+        /// a String. The escaping is done later in the parser.
        string: &'s str,
        /// Whether the closing quote was present.
        terminated: bool
@ -210,11 +214,13 @@ impl<'s> Iterator for Tokens<'s> {

            // Expressions or just strings.
            c => {
+                let body = self.mode == Body;
                let text = self.read_string_until(|n| {
                    match n {
                        c if c.is_whitespace() => true,
-                        '\\' | '[' | ']' | '*' | '_' | '`' | ':' | '=' |
-                        ',' | '"' | '/' => true,
+                        '\\' | '[' | ']' | '/' => true,
+                        '*' | '_' | '`' if body => true,
+                        ':' | '=' | ',' | '"' if !body => true,
                        _ => false,
                    }
                }, false, -(c.len_utf8() as isize), 0).0;
@ -441,18 +447,19 @@ pub fn is_newline_char(character: char) -> bool {

 /// Whether this word is a valid identifier.
 pub fn is_identifier(string: &str) -> bool {
-    let mut chars = string.chars();
+    fn is_extra_allowed(c: char) -> bool {
+        c == '.' || c == '-' || c == '_'
+    }

+    let mut chars = string.chars();
    match chars.next() {
-        Some('-') => {}
-        Some(c) if UnicodeXID::is_xid_start(c) => {}
+        Some(c) if UnicodeXID::is_xid_start(c) || is_extra_allowed(c) => {}
        _ => return false,
    }

    while let Some(c) = chars.next() {
        match c {
-            '.' | '-' => {}
-            c if UnicodeXID::is_xid_continue(c) => {}
+            c if UnicodeXID::is_xid_continue(c) || is_extra_allowed(c) => {}
            _ => return false,
        }
    }
@ -460,11 +467,10 @@ pub fn is_identifier(string: &str) -> bool {
    true
 }

-
 #[cfg(test)]
 mod tests {
+    use super::super::test::check;
    use super::*;
-
    use Token::{
        Space as S,
        LineComment as LC, BlockComment as BC,
@ -481,32 +487,19 @@ mod tests {

    /// Test whether the given string tokenizes into the given list of tokens.
    macro_rules! t {
-        ($m:expr, $s:expr => [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $t:expr)),* $(,)?]) => {
-            let tokens = Tokens::new(Position::ZERO, $s, $m).collect::<Vec<_>>();
-            assert_eq!(tokens, vec![$(Spanned {
-                span: Span::new(Position::new($sl, $sc), Position::new($el, $ec)),
-                v: $t
-            }),*]);
-
-        };
-
-        ($m:expr, $s:expr => [$($t:expr),* $(,)?]) => {
-            let tokens = Tokens::new(Position::ZERO, $s, $m)
-                .map(Spanned::value)
-                .collect::<Vec<_>>();
-            assert_eq!(tokens, vec![$($t),*]);
-        };
+        ($mode:expr, $source:expr => [$($tokens:tt)*]) => {
+            let (exp, spans) = spanned![vec $($tokens)*];
+            let found = Tokens::new(Position::ZERO, $source, $mode).collect::<Vec<_>>();
+            check($source, exp, found, spans);
+        }
    }

-    /// Parse a function token.
+    /// Write down a function token compactly.
    macro_rules! func {
-        ($header:expr, Some(($sl:tt:$sc:tt, $el:tt:$ec:tt, $body:expr)), $terminated:expr) => {
+        ($header:expr, Some($($tokens:tt)*), $terminated:expr) => {
            Function {
                header: $header,
-                body: Some(Spanned {
-                    span: Span::new(Position::new($sl, $sc), Position::new($el, $ec)),
-                    v: $body,
-                }),
+                body: Some(spanned![item $($tokens)*]),
                terminated: $terminated,
            }
        };
@ -542,40 +535,63 @@ mod tests {
        t!(Body, "_/*_/*a*/*/"       => [Underscore, BC("_/*a*/")]);
        t!(Body, "/*/*/"             => [BC("/*/")]);
        t!(Body, "abc*/"             => [T("abc"), Invalid("*/")]);
-    }
-
-    #[test]
-    fn tokenize_header_only_tokens() {
-        t!(Body, "\"hi\""              => [T("\"hi"), T("\"")]);
-        t!(Body, "a: b"                => [T("a"), T(":"), S(0), T("b")]);
-        t!(Body, "c=d, "               => [T("c"), T("=d"), T(","), S(0)]);
-        t!(Header, "["                 => [func!("", None, false)]);
-        t!(Header, "]"                 => [Invalid("]")]);
-        t!(Header, "(){}:=,"           => [LP, RP, LB, RB, Colon, Equals, Comma]);
-        t!(Header, "a:b"               => [Id("a"), Colon, Id("b")]);
-        t!(Header, "="                 => [Equals]);
-        t!(Header, ","                 => [Comma]);
-        t!(Header, r#""hello\"world""# => [Str(r#"hello\"world"#, true)]);
-        t!(Header, r#""hi", 12pt"#     => [Str("hi", true), Comma, S(0), ExprSize(Size::pt(12.0))]);
-        t!(Header, "a: true, x=1"      => [Id("a"), Colon, S(0), Bool(true), Comma, S(0), Id("x"), Equals, Num(1.0)]);
-        t!(Header, "120%"              => [Num(1.2)]);
-        t!(Header, "🌓, 🌍,"           => [Invalid("🌓"), Comma, S(0), Invalid("🌍"), Comma]);
+        t!(Body, "/***/"             => [BC("*")]);
+        t!(Body, "/**\\****/*/*/"    => [BC("*\\***"), Invalid("*/"), Invalid("*/")]);
+        t!(Body, "/*abc"             => [BC("abc")]);
    }

    #[test]
    fn tokenize_body_only_tokens() {
        t!(Body, "_*`"           => [Underscore, Star, Backtick]);
+        t!(Body, "***"           => [Star, Star, Star]);
        t!(Body, "[func]*bold*"  => [func!("func", None, true), Star, T("bold"), Star]);
        t!(Body, "hi_you_ there" => [T("hi"), Underscore, T("you"), Underscore, S(0), T("there")]);
-        t!(Header, "_*`"         => [Invalid("_"), Invalid("*"), Invalid("`")]);
+        t!(Header, "_*`"         => [Invalid("_*`")]);
    }

    #[test]
-    fn tokenize_nested_functions() {
+    fn tokenize_header_only_tokens() {
+        t!(Body, "a: b"                => [T("a:"), S(0), T("b")]);
+        t!(Body, "c=d, "               => [T("c=d,"), S(0)]);
+        t!(Header, "(){}:=,"           => [LP, RP, LB, RB, Colon, Equals, Comma]);
+        t!(Header, "a:b"               => [Id("a"), Colon, Id("b")]);
+        t!(Header, "a: true, x=1"      => [Id("a"), Colon, S(0), Bool(true), Comma, S(0), Id("x"), Equals, Num(1.0)]);
+        t!(Header, "=3.14"             => [Equals, Num(3.14)]);
+        t!(Header, "12.3e5"            => [Num(12.3e5)]);
+        t!(Header, "120%"              => [Num(1.2)]);
+        t!(Header, "12e4%"             => [Num(1200.0)]);
+        t!(Header, "__main__"          => [Id("__main__")]);
+        t!(Header, ".func.box"         => [Id(".func.box")]);
+        t!(Header, "--arg, _b, _1"     => [Id("--arg"), Comma, S(0), Id("_b"), Comma, S(0), Id("_1")]);
+        t!(Header, "12_pt, 12pt"       => [Invalid("12_pt"), Comma, S(0), ExprSize(Size::pt(12.0))]);
+        t!(Header, "1e5in"             => [ExprSize(Size::inches(100000.0))]);
+        t!(Header, "2.3cm"             => [ExprSize(Size::cm(2.3))]);
+        t!(Header, "02.4mm"            => [ExprSize(Size::mm(2.4))]);
+        t!(Header, "2.4.cm"            => [Invalid("2.4.cm")]);
+        t!(Header, "🌓, 🌍,"           => [Invalid("🌓"), Comma, S(0), Invalid("🌍"), Comma]);
+    }
+
+    #[test]
+    fn tokenize_strings() {
+        t!(Body, "a \"hi\" string"           => [T("a"), S(0), T("\"hi\""), S(0), T("string")]);
+        t!(Header, "\"hello"                 => [Str("hello", false)]);
+        t!(Header, "\"hello world\""         => [Str("hello world", true)]);
+        t!(Header, "\"hello\nworld\""        => [Str("hello\nworld", true)]);
+        t!(Header, r#"1"hello\nworld"false"# => [Num(1.0), Str("hello\\nworld", true), Bool(false)]);
+        t!(Header, r#""a\"bc""#              => [Str(r#"a\"bc"#, true)]);
+        t!(Header, r#""a\\"bc""#             => [Str(r#"a\\"#, true), Id("bc"), Str("", false)]);
+        t!(Header, r#""a\tbc"#               => [Str("a\\tbc", false)]);
+        t!(Header, "\"🌎\""                      => [Str("🌎", true)]);
+    }
+
+    #[test]
+    fn tokenize_functions() {
        t!(Body, "[f: [=][*]]"    => [func!("f: [=][*]", None, true)]);
        t!(Body, "[_][[,],],"     => [func!("_", Some((0:3, 0:9, "[,],")), true), T(",")]);
        t!(Body, "[=][=][=]"      => [func!("=", Some((0:3, 0:6, "=")), true), func!("=", None, true)]);
        t!(Body, "[=][[=][=][=]]" => [func!("=", Some((0:3, 0:14, "[=][=][=]")), true)]);
+        t!(Header, "["            => [func!("", None, false)]);
+        t!(Header, "]"            => [Invalid("]")]);
    }

    #[test]