Merge Characters struct into tokenizer 🔀
This commit is contained in:
parent
a8f711d49a
commit
6527d31dfb
@ -166,27 +166,6 @@ impl Display for Ident {
|
||||
|
||||
debug_display!(Ident);
|
||||
|
||||
/// Whether this word is a valid identifier.
|
||||
pub fn is_identifier(string: &str) -> bool {
|
||||
let mut chars = string.chars();
|
||||
|
||||
match chars.next() {
|
||||
Some('-') => {}
|
||||
Some(c) if UnicodeXID::is_xid_start(c) => {}
|
||||
_ => return false,
|
||||
}
|
||||
|
||||
while let Some(c) = chars.next() {
|
||||
match c {
|
||||
'.' | '-' => {}
|
||||
c if UnicodeXID::is_xid_continue(c) => {}
|
||||
_ => return false,
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Kinds of expressions.
|
||||
pub trait ExpressionKind: Sized {
|
||||
const NAME: &'static str;
|
||||
|
@ -72,9 +72,11 @@ pub fn tokenize(src: &str) -> Tokens {
|
||||
/// An iterator over the tokens of a string of source code.
|
||||
pub struct Tokens<'s> {
|
||||
src: &'s str,
|
||||
chars: Characters<'s>,
|
||||
state: State,
|
||||
stack: Vec<State>,
|
||||
iter: Peekable<Chars<'s>>,
|
||||
position: Position,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
@ -88,9 +90,11 @@ impl<'s> Tokens<'s> {
|
||||
pub fn new(src: &'s str) -> Tokens<'s> {
|
||||
Tokens {
|
||||
src,
|
||||
chars: Characters::new(src),
|
||||
state: State::Body,
|
||||
stack: vec![],
|
||||
iter: src.chars().peekable(),
|
||||
position: Position::ZERO,
|
||||
index: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -100,26 +104,29 @@ impl<'s> Iterator for Tokens<'s> {
|
||||
|
||||
/// Parse the next token in the source code.
|
||||
fn next(&mut self) -> Option<Spanned<Token<'s>>> {
|
||||
let start = self.chars.position();
|
||||
let first = self.chars.next()?;
|
||||
let second = self.chars.peek();
|
||||
let start = self.pos();
|
||||
let first = self.eat()?;
|
||||
|
||||
let token = match first {
|
||||
// Comments.
|
||||
'/' if second == Some('/') => self.parse_line_comment(),
|
||||
'/' if second == Some('*') => self.parse_block_comment(),
|
||||
'*' if second == Some('/') => { self.eat(); StarSlash }
|
||||
'/' if self.peek() == Some('/') => self.parse_line_comment(),
|
||||
'/' if self.peek() == Some('*') => self.parse_block_comment(),
|
||||
'*' if self.peek() == Some('/') => { self.eat(); StarSlash }
|
||||
|
||||
// Whitespace.
|
||||
c if c.is_whitespace() => self.parse_whitespace(start),
|
||||
|
||||
// Functions.
|
||||
'[' => { self.set_state(Header); LeftBracket }
|
||||
'[' => {
|
||||
self.stack.push(self.state);
|
||||
self.state = Header;
|
||||
LeftBracket
|
||||
}
|
||||
']' => {
|
||||
if self.state == Header && second == Some('[') {
|
||||
if self.state == Header && self.peek() == Some('[') {
|
||||
self.state = StartBody;
|
||||
} else {
|
||||
self.pop_state();
|
||||
self.state = self.stack.pop().unwrap_or(Body);
|
||||
}
|
||||
|
||||
RightBracket
|
||||
@ -164,7 +171,7 @@ impl<'s> Iterator for Tokens<'s> {
|
||||
}
|
||||
};
|
||||
|
||||
let end = self.chars.position();
|
||||
let end = self.pos();
|
||||
let span = Span { start, end };
|
||||
|
||||
Some(Spanned { v: token, span })
|
||||
@ -206,7 +213,7 @@ impl<'s> Tokens<'s> {
|
||||
|
||||
fn parse_whitespace(&mut self, start: Position) -> Token<'s> {
|
||||
self.read_string_until(|n| !n.is_whitespace(), false, 0, 0);
|
||||
let end = self.chars.position();
|
||||
let end = self.pos();
|
||||
|
||||
Whitespace(end.line - start.line)
|
||||
}
|
||||
@ -234,9 +241,9 @@ impl<'s> Tokens<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
let c = self.chars.peek().unwrap_or('n');
|
||||
let c = self.peek().unwrap_or('n');
|
||||
if self.state == Body && is_escapable(c) {
|
||||
let index = self.chars.index();
|
||||
let index = self.index();
|
||||
self.eat();
|
||||
Text(&self.src[index .. index + c.len_utf8()])
|
||||
} else {
|
||||
@ -267,22 +274,22 @@ impl<'s> Tokens<'s> {
|
||||
offset_start: isize,
|
||||
offset_end: isize,
|
||||
) -> &'s str where F: FnMut(char) -> bool {
|
||||
let start = ((self.chars.index() as isize) + offset_start) as usize;
|
||||
let start = ((self.index() as isize) + offset_start) as usize;
|
||||
let mut matched = false;
|
||||
|
||||
while let Some(c) = self.chars.peek() {
|
||||
while let Some(c) = self.peek() {
|
||||
if f(c) {
|
||||
matched = true;
|
||||
if eat_match {
|
||||
self.chars.next();
|
||||
self.eat();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
self.chars.next();
|
||||
self.eat();
|
||||
}
|
||||
|
||||
let mut end = self.chars.index();
|
||||
let mut end = self.index();
|
||||
if matched {
|
||||
end = ((end as isize) + offset_end) as usize;
|
||||
}
|
||||
@ -290,55 +297,7 @@ impl<'s> Tokens<'s> {
|
||||
&self.src[start .. end]
|
||||
}
|
||||
|
||||
fn set_state(&mut self, state: State) {
|
||||
self.stack.push(self.state);
|
||||
self.state = state;
|
||||
}
|
||||
|
||||
fn pop_state(&mut self) {
|
||||
self.state = self.stack.pop().unwrap_or(Body);
|
||||
}
|
||||
|
||||
fn eat(&mut self) {
|
||||
self.chars.next();
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_percentage(text: &str) -> Option<f64> {
|
||||
if text.ends_with('%') {
|
||||
text[.. text.len() - 1].parse::<f64>().ok()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether this character denotes a newline.
|
||||
fn is_newline_char(character: char) -> bool {
|
||||
match character {
|
||||
// Line Feed, Vertical Tab, Form Feed, Carriage Return.
|
||||
'\x0A' ..= '\x0D' => true,
|
||||
// Next Line, Line Separator, Paragraph Separator.
|
||||
'\u{0085}' | '\u{2028}' | '\u{2029}' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
struct Characters<'s> {
|
||||
iter: Peekable<Chars<'s>>,
|
||||
position: Position,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl<'s> Characters<'s> {
|
||||
fn new(src: &'s str) -> Characters<'s> {
|
||||
Characters {
|
||||
iter: src.chars().peekable(),
|
||||
position: Position::ZERO,
|
||||
index: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Option<char> {
|
||||
fn eat(&mut self) -> Option<char> {
|
||||
let c = self.iter.next()?;
|
||||
let len = c.len_utf8();
|
||||
|
||||
@ -362,7 +321,47 @@ impl<'s> Characters<'s> {
|
||||
self.index
|
||||
}
|
||||
|
||||
fn position(&self) -> Position {
|
||||
fn pos(&self) -> Position {
|
||||
self.position
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_percentage(text: &str) -> Option<f64> {
|
||||
if text.ends_with('%') {
|
||||
text[.. text.len() - 1].parse::<f64>().ok()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether this character denotes a newline.
|
||||
pub fn is_newline_char(character: char) -> bool {
|
||||
match character {
|
||||
// Line Feed, Vertical Tab, Form Feed, Carriage Return.
|
||||
'\x0A' ..= '\x0D' => true,
|
||||
// Next Line, Line Separator, Paragraph Separator.
|
||||
'\u{0085}' | '\u{2028}' | '\u{2029}' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether this word is a valid identifier.
|
||||
pub fn is_identifier(string: &str) -> bool {
|
||||
let mut chars = string.chars();
|
||||
|
||||
match chars.next() {
|
||||
Some('-') => {}
|
||||
Some(c) if UnicodeXID::is_xid_start(c) => {}
|
||||
_ => return false,
|
||||
}
|
||||
|
||||
while let Some(c) = chars.next() {
|
||||
match c {
|
||||
'.' | '-' => {}
|
||||
c if UnicodeXID::is_xid_continue(c) => {}
|
||||
_ => return false,
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
@ -9,18 +9,12 @@ use Token::{
|
||||
LeftBracket as LB, RightBracket as RB,
|
||||
LeftParen as LP, RightParen as RP,
|
||||
LeftBrace as LBR, RightBrace as RBR,
|
||||
Colon as CL, Comma as CM, Equals as EQ, Expr as E,
|
||||
Colon as CL, Comma as CM, Equals as EQ,
|
||||
ExprIdent as ID, ExprString as STR, ExprSize as SIZE,
|
||||
ExprNumber as NUM, ExprBool as BOOL,
|
||||
Star as ST, Underscore as U, Backtick as B, Text as T,
|
||||
};
|
||||
|
||||
use Expression as Expr;
|
||||
fn ID(ident: &str) -> Token { E(Expr::Ident(Ident::new(ident.to_string()).unwrap())) }
|
||||
fn STR(ident: &str) -> Token { E(Expr::Str(ident.to_string())) }
|
||||
fn SIZE(size: Size) -> Token<'static> { E(Expr::Size(size)) }
|
||||
fn NUM(num: f64) -> Token<'static> { E(Expr::Num(num)) }
|
||||
fn BOOL(b: bool) -> Token<'static> { E(Expr::Bool(b)) }
|
||||
|
||||
|
||||
/// Parses the test syntax.
|
||||
macro_rules! tokens {
|
||||
($($task:ident $src:expr =>($line:expr)=> [$($target:tt)*])*) => ({
|
||||
|
Loading…
x
Reference in New Issue
Block a user