Merge Characters struct into tokenizer 🔀

2020-01-13 13:02:33 +01:00 · 2020-01-13 13:02:33 +01:00 · 6527d31dfb
commit 6527d31dfb
parent a8f711d49a
3 changed files with 72 additions and 100 deletions
--- a/src/syntax/expr.rs
+++ b/src/syntax/expr.rs
@ -166,27 +166,6 @@ impl Display for Ident {

 debug_display!(Ident);

-/// Whether this word is a valid identifier.
-pub fn is_identifier(string: &str) -> bool {
-    let mut chars = string.chars();
-
-    match chars.next() {
-        Some('-') => {}
-        Some(c) if UnicodeXID::is_xid_start(c) => {}
-        _ => return false,
-    }
-
-    while let Some(c) = chars.next() {
-        match c {
-            '.' | '-' => {}
-            c if UnicodeXID::is_xid_continue(c) => {}
-            _ => return false,
-        }
-    }
-
-    true
-}
-
 /// Kinds of expressions.
 pub trait ExpressionKind: Sized {
    const NAME: &'static str;
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@ -72,9 +72,11 @@ pub fn tokenize(src: &str) -> Tokens {
 /// An iterator over the tokens of a string of source code.
 pub struct Tokens<'s> {
    src: &'s str,
-    chars: Characters<'s>,
    state: State,
    stack: Vec<State>,
+    iter: Peekable<Chars<'s>>,
+    position: Position,
+    index: usize,
 }

 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
@ -88,9 +90,11 @@ impl<'s> Tokens<'s> {
    pub fn new(src: &'s str) -> Tokens<'s> {
        Tokens {
            src,
-            chars: Characters::new(src),
            state: State::Body,
            stack: vec![],
+            iter: src.chars().peekable(),
+            position: Position::ZERO,
+            index: 0,
        }
    }
 }
@ -100,26 +104,29 @@ impl<'s> Iterator for Tokens<'s> {

    /// Parse the next token in the source code.
    fn next(&mut self) -> Option<Spanned<Token<'s>>> {
-        let start = self.chars.position();
-        let first = self.chars.next()?;
-        let second = self.chars.peek();
+        let start = self.pos();
+        let first = self.eat()?;

        let token = match first {
            // Comments.
-            '/' if second == Some('/') => self.parse_line_comment(),
-            '/' if second == Some('*') => self.parse_block_comment(),
-            '*' if second == Some('/') => { self.eat(); StarSlash }
+            '/' if self.peek() == Some('/') => self.parse_line_comment(),
+            '/' if self.peek() == Some('*') => self.parse_block_comment(),
+            '*' if self.peek() == Some('/') => { self.eat(); StarSlash }

            // Whitespace.
            c if c.is_whitespace() => self.parse_whitespace(start),

            // Functions.
-            '[' => { self.set_state(Header); LeftBracket }
+            '[' => {
+                self.stack.push(self.state);
+                self.state = Header;
+                LeftBracket
+            }
            ']' => {
-                if self.state == Header && second == Some('[') {
+                if self.state == Header && self.peek() == Some('[') {
                    self.state = StartBody;
                } else {
-                    self.pop_state();
+                    self.state = self.stack.pop().unwrap_or(Body);
                }

                RightBracket
@ -164,7 +171,7 @@ impl<'s> Iterator for Tokens<'s> {
            }
        };

-        let end = self.chars.position();
+        let end = self.pos();
        let span = Span { start, end };

        Some(Spanned { v: token, span })
@ -206,7 +213,7 @@ impl<'s> Tokens<'s> {

    fn parse_whitespace(&mut self, start: Position) -> Token<'s> {
        self.read_string_until(|n| !n.is_whitespace(), false, 0, 0);
-        let end = self.chars.position();
+        let end = self.pos();

        Whitespace(end.line - start.line)
    }
@ -234,9 +241,9 @@ impl<'s> Tokens<'s> {
            }
        }

-        let c = self.chars.peek().unwrap_or('n');
+        let c = self.peek().unwrap_or('n');
        if self.state == Body && is_escapable(c) {
-            let index = self.chars.index();
+            let index = self.index();
            self.eat();
            Text(&self.src[index .. index + c.len_utf8()])
        } else {
@ -267,22 +274,22 @@ impl<'s> Tokens<'s> {
        offset_start: isize,
        offset_end: isize,
    ) -> &'s str where F: FnMut(char) -> bool {
-        let start = ((self.chars.index() as isize) + offset_start) as usize;
+        let start = ((self.index() as isize) + offset_start) as usize;
        let mut matched = false;

-        while let Some(c) = self.chars.peek() {
+        while let Some(c) = self.peek() {
            if f(c) {
                matched = true;
                if eat_match {
-                    self.chars.next();
+                    self.eat();
                }
                break;
            }

-            self.chars.next();
+            self.eat();
        }

-        let mut end = self.chars.index();
+        let mut end = self.index();
        if matched {
            end = ((end as isize) + offset_end) as usize;
        }
@ -290,55 +297,7 @@ impl<'s> Tokens<'s> {
        &self.src[start .. end]
    }

-    fn set_state(&mut self, state: State) {
-        self.stack.push(self.state);
-        self.state = state;
-    }
-
-    fn pop_state(&mut self) {
-        self.state = self.stack.pop().unwrap_or(Body);
-    }
-
-    fn eat(&mut self) {
-        self.chars.next();
-    }
-}
-
-fn parse_percentage(text: &str) -> Option<f64> {
-    if text.ends_with('%') {
-        text[.. text.len() - 1].parse::<f64>().ok()
-    } else {
-        None
-    }
-}
-
-/// Whether this character denotes a newline.
-fn is_newline_char(character: char) -> bool {
-    match character {
-        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
-        '\x0A' ..= '\x0D' => true,
-        // Next Line, Line Separator, Paragraph Separator.
-        '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
-        _ => false,
-    }
-}
-
-struct Characters<'s> {
-    iter: Peekable<Chars<'s>>,
-    position: Position,
-    index: usize,
-}
-
-impl<'s> Characters<'s> {
-    fn new(src: &'s str) -> Characters<'s> {
-        Characters {
-            iter: src.chars().peekable(),
-            position: Position::ZERO,
-            index: 0,
-        }
-    }
-
-    fn next(&mut self) -> Option<char> {
+    fn eat(&mut self) -> Option<char> {
        let c = self.iter.next()?;
        let len = c.len_utf8();

@ -362,7 +321,47 @@ impl<'s> Characters<'s> {
        self.index
    }

-    fn position(&self) -> Position {
+    fn pos(&self) -> Position {
        self.position
    }
 }
+
+fn parse_percentage(text: &str) -> Option<f64> {
+    if text.ends_with('%') {
+        text[.. text.len() - 1].parse::<f64>().ok()
+    } else {
+        None
+    }
+}
+
+/// Whether this character denotes a newline.
+pub fn is_newline_char(character: char) -> bool {
+    match character {
+        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+        '\x0A' ..= '\x0D' => true,
+        // Next Line, Line Separator, Paragraph Separator.
+        '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
+        _ => false,
+    }
+}
+
+/// Whether this word is a valid identifier.
+pub fn is_identifier(string: &str) -> bool {
+    let mut chars = string.chars();
+
+    match chars.next() {
+        Some('-') => {}
+        Some(c) if UnicodeXID::is_xid_start(c) => {}
+        _ => return false,
+    }
+
+    while let Some(c) = chars.next() {
+        match c {
+            '.' | '-' => {}
+            c if UnicodeXID::is_xid_continue(c) => {}
+            _ => return false,
+        }
+    }
+
+    true
+}
--- a/tests/parse.rs
+++ b/tests/parse.rs
@ -9,18 +9,12 @@ use Token::{
    LeftBracket as LB, RightBracket as RB,
    LeftParen as LP, RightParen as RP,
    LeftBrace as LBR, RightBrace as RBR,
-    Colon as CL, Comma as CM, Equals as EQ, Expr as E,
+    Colon as CL, Comma as CM, Equals as EQ,
+    ExprIdent as ID, ExprString as STR, ExprSize as SIZE,
+    ExprNumber as NUM, ExprBool as BOOL,
    Star as ST, Underscore as U, Backtick as B, Text as T,
 };

-use Expression as Expr;
-fn ID(ident: &str) -> Token { E(Expr::Ident(Ident::new(ident.to_string()).unwrap())) }
-fn STR(ident: &str) -> Token { E(Expr::Str(ident.to_string())) }
-fn SIZE(size: Size) -> Token<'static> { E(Expr::Size(size)) }
-fn NUM(num: f64) -> Token<'static> { E(Expr::Num(num)) }
-fn BOOL(b: bool) -> Token<'static> { E(Expr::Bool(b)) }
-
-
 /// Parses the test syntax.
 macro_rules! tokens {
    ($($task:ident $src:expr =>($line:expr)=> [$($target:tt)*])*) => ({