From c5b3f8ee98203191d83d3cfca39bb0f35ee6efc2 Mon Sep 17 00:00:00 2001
From: Laurenz <laurmaedje@gmail.com>
Date: Sat, 16 Apr 2022 22:23:57 +0200
Subject: [PATCH] Switch to `unscanny`

---
 Cargo.lock                    |   6 +
 Cargo.toml                    |   1 +
 benches/oneshot.rs            |   3 +-
 src/library/structure/list.rs |   9 +-
 src/parse/mod.rs              |   2 -
 src/parse/parser.rs           |  23 ++--
 src/parse/resolve.rs          |  20 ++--
 src/parse/scanner.rs          | 211 ----------------------------------
 src/parse/tokens.rs           | 135 +++++++++++++---------
 src/source.rs                 |  10 +-
 tests/typeset.rs              |   6 +-
 11 files changed, 127 insertions(+), 299 deletions(-)
 delete mode 100644 src/parse/scanner.rs
diff --git a/Cargo.lock b/Cargo.lock
index 2341a52e8..5a2e079eb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -876,6 +876,7 @@ dependencies = [
  "unicode-script",
  "unicode-segmentation",
  "unicode-xid",
+ "unscanny",
  "usvg",
  "walkdir",
  "xi-unicode",
@@ -938,6 +939,11 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
 
+[[package]]
+name = "unscanny"
+version = "0.1.0"
+source = "git+https://github.com/typst/unscanny#c943791649841388803b7ca873ce72683903fd39"
+
 [[package]]
 name = "usvg"
 version = "0.20.0"
diff --git a/Cargo.toml b/Cargo.toml
index fa7449afd..341a96d10 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,6 +21,7 @@ once_cell = "1"
 serde = { version = "1", features = ["derive"] }
 typed-arena = "2"
 parking_lot = "0.12"
+unscanny = { git = "https://github.com/typst/unscanny" }
 
 # Text and font handling
 hypher = "0.1"
diff --git a/benches/oneshot.rs b/benches/oneshot.rs
index ddd689703..c972d1bc4 100644
--- a/benches/oneshot.rs
+++ b/benches/oneshot.rs
@@ -1,9 +1,10 @@
 use std::path::Path;
 
 use iai::{black_box, main, Iai};
+use unscanny::Scanner;
 
 use typst::loading::MemLoader;
-use typst::parse::{parse, Scanner, TokenMode, Tokens};
+use typst::parse::{parse, TokenMode, Tokens};
 use typst::source::SourceId;
 use typst::Context;
 
diff --git a/src/library/structure/list.rs b/src/library/structure/list.rs
index 9d5d3a7d5..10dcfb7b9 100644
--- a/src/library/structure/list.rs
+++ b/src/library/structure/list.rs
@@ -1,8 +1,9 @@
+use unscanny::Scanner;
+
 use crate::library::layout::{GridNode, TrackSizing};
 use crate::library::prelude::*;
 use crate::library::text::ParNode;
 use crate::library::utility::Numbering;
-use crate::parse::Scanner;
 
 /// An unordered (bulleted) or ordered (numbered) list.
 #[derive(Debug, Hash)]
@@ -190,7 +191,7 @@ impl Cast<Spanned<Value>> for Label {
                 let mut s = Scanner::new(&pattern);
                 let mut prefix;
                 let numbering = loop {
-                    prefix = s.eaten();
+                    prefix = s.before();
                     match s.eat().map(|c| c.to_ascii_lowercase()) {
                         Some('1') => break Numbering::Arabic,
                         Some('a') => break Numbering::Letter,
@@ -200,8 +201,8 @@ impl Cast<Spanned<Value>> for Label {
                         None => Err("invalid pattern")?,
                     }
                 };
-                let upper = s.prev(0).map_or(false, char::is_uppercase);
-                let suffix = s.rest().into();
+                let upper = s.scout(-1).map_or(false, char::is_uppercase);
+                let suffix = s.after().into();
                 Ok(Self::Pattern(prefix.into(), numbering, upper, suffix))
             }
             Value::Content(v) => Ok(Self::Content(v)),
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 47cba1119..7536b2ca0 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -3,13 +3,11 @@
 mod incremental;
 mod parser;
 mod resolve;
-mod scanner;
 mod tokens;
 
 pub use incremental::*;
 pub use parser::*;
 pub use resolve::*;
-pub use scanner::*;
 pub use tokens::*;
 
 use std::collections::HashSet;
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 98adfba26..4bbbdc28f 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,6 +1,6 @@
-use core::slice::SliceIndex;
 use std::fmt::{self, Display, Formatter};
 use std::mem;
+use std::ops::Range;
 
 use super::{TokenMode, Tokens};
 use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
@@ -116,7 +116,7 @@ impl<'s> Parser<'s> {
             _ => false,
         };
 
-        self.prev_end = self.tokens.index();
+        self.prev_end = self.tokens.cursor();
         self.bump();
 
         if self.tokens.mode() == TokenMode::Code {
@@ -186,15 +186,12 @@ impl<'s> Parser<'s> {
 
     /// Peek at the source of the current token.
     pub fn peek_src(&self) -> &'s str {
-        self.tokens.scanner().get(self.current_start() .. self.current_end())
+        self.get(self.current_start() .. self.current_end())
     }
 
     /// Obtain a range of the source code.
-    pub fn get<I>(&self, index: I) -> &'s str
-    where
-        I: SliceIndex<str, Output = str>,
-    {
-        self.tokens.scanner().get(index)
+    pub fn get(&self, range: Range<usize>) -> &'s str {
+        self.tokens.scanner().get(range)
     }
 
     /// The byte index at which the last non-trivia token ended.
@@ -209,7 +206,7 @@ impl<'s> Parser<'s> {
 
     /// The byte index at which the current token ends.
     pub fn current_end(&self) -> usize {
-        self.tokens.index()
+        self.tokens.cursor()
     }
 
     /// Determine the column index for the given byte index.
@@ -294,8 +291,8 @@ impl<'s> Parser<'s> {
             }
 
             self.tokens.jump(target);
-            self.prev_end = self.tokens.index();
-            self.current_start = self.tokens.index();
+            self.prev_end = self.tokens.cursor();
+            self.current_start = self.tokens.cursor();
             self.current = self.tokens.next();
         }
 
@@ -311,9 +308,9 @@ impl<'s> Parser<'s> {
     /// handling.
     fn bump(&mut self) {
         let kind = self.current.take().unwrap();
-        let len = self.tokens.index() - self.current_start;
+        let len = self.tokens.cursor() - self.current_start;
         self.children.push(GreenData::new(kind, len).into());
-        self.current_start = self.tokens.index();
+        self.current_start = self.tokens.cursor();
         self.current = self.tokens.next();
     }
 
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index 0d4cf071f..dd9ed4f49 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -1,4 +1,6 @@
-use super::{is_ident, is_newline, Scanner};
+use unscanny::Scanner;
+
+use super::{is_ident, is_newline};
 use crate::syntax::ast::RawNode;
 use crate::util::EcoString;
 
@@ -13,7 +15,7 @@ pub fn resolve_string(string: &str) -> EcoString {
             continue;
         }
 
-        let start = s.last_index();
+        let start = s.locate(-1);
         match s.eat() {
             Some('\\') => out.push('\\'),
             Some('"') => out.push('"'),
@@ -22,17 +24,17 @@ pub fn resolve_string(string: &str) -> EcoString {
             Some('t') => out.push('\t'),
             Some('u') if s.eat_if('{') => {
                 // TODO: Feedback if closing brace is missing.
-                let sequence = s.eat_while(|c| c.is_ascii_hexdigit());
+                let sequence = s.eat_while(char::is_ascii_hexdigit);
                 let _terminated = s.eat_if('}');
 
                 match resolve_hex(sequence) {
                     Some(c) => out.push(c),
-                    None => out.push_str(s.eaten_from(start)),
+                    None => out.push_str(s.from(start)),
                 }
             }
 
             // TODO: Feedback about invalid escape sequence.
-            _ => out.push_str(s.eaten_from(start)),
+            _ => out.push_str(s.from(start)),
         }
     }
 
@@ -68,8 +70,8 @@ pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawNode {
 fn split_at_lang_tag(raw: &str) -> (&str, &str) {
     let mut s = Scanner::new(raw);
     (
-        s.eat_until(|c| c == '`' || c.is_whitespace() || is_newline(c)),
-        s.rest(),
+        s.eat_until(|c: char| c == '`' || c.is_whitespace() || is_newline(c)),
+        s.after(),
     )
 }
 
@@ -129,9 +131,9 @@ fn split_lines(text: &str) -> Vec<&str> {
             }
 
             lines.push(&text[start .. end]);
-            start = s.index();
+            start = s.cursor();
         }
-        end = s.index();
+        end = s.cursor();
     }
 
     lines.push(&text[start ..]);
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
deleted file mode 100644
index e4cf56e97..000000000
--- a/src/parse/scanner.rs
+++ /dev/null
@@ -1,211 +0,0 @@
-use std::slice::SliceIndex;
-
-use unicode_xid::UnicodeXID;
-
-/// A featureful char-based scanner.
-#[derive(Copy, Clone)]
-pub struct Scanner<'s> {
-    /// The string to scan.
-    src: &'s str,
-    /// The index at which the peekable character starts. Must be in bounds and
-    /// at a codepoint boundary to guarantee safety.
-    index: usize,
-}
-
-impl<'s> Scanner<'s> {
-    /// Create a new char scanner.
-    #[inline]
-    pub fn new(src: &'s str) -> Self {
-        Self { src, index: 0 }
-    }
-
-    /// Whether the end of the string is reached.
-    pub fn eof(&self) -> bool {
-        self.index == self.src.len()
-    }
-
-    /// Consume the next char.
-    #[inline]
-    pub fn eat(&mut self) -> Option<char> {
-        let next = self.peek();
-        if let Some(c) = next {
-            self.index += c.len_utf8();
-        }
-        next
-    }
-
-    /// Consume the next char if it is the given one.
-    ///
-    /// Returns whether the char was consumed.
-    #[inline]
-    pub fn eat_if(&mut self, c: char) -> bool {
-        let matches = self.peek() == Some(c);
-        if matches {
-            self.index += c.len_utf8();
-        }
-        matches
-    }
-
-    /// Consume the next char, debug-asserting that it is the given one.
-    #[inline]
-    pub fn eat_assert(&mut self, c: char) {
-        let next = self.eat();
-        debug_assert_eq!(next, Some(c));
-    }
-
-    /// Eat chars while the condition is true.
-    #[inline]
-    pub fn eat_while<F>(&mut self, mut f: F) -> &'s str
-    where
-        F: FnMut(char) -> bool,
-    {
-        self.eat_until(|c| !f(c))
-    }
-
-    /// Eat chars until the condition is true.
-    #[inline]
-    pub fn eat_until<F>(&mut self, mut f: F) -> &'s str
-    where
-        F: FnMut(char) -> bool,
-    {
-        let start = self.index;
-        while let Some(c) = self.peek() {
-            if f(c) {
-                break;
-            }
-            self.index += c.len_utf8();
-        }
-        self.eaten_from(start)
-    }
-
-    /// Uneat the last eaten char.
-    #[inline]
-    pub fn uneat(&mut self) {
-        self.index = self.last_index();
-    }
-
-    /// Peek at the next char without consuming it.
-    #[inline]
-    pub fn peek(&self) -> Option<char> {
-        self.rest().chars().next()
-    }
-
-    /// Get the nth-previous eaten char.
-    #[inline]
-    pub fn prev(&self, n: usize) -> Option<char> {
-        self.eaten().chars().nth_back(n)
-    }
-
-    /// Checks whether the next char fulfills a condition.
-    ///
-    /// Returns `default` if there is no next char.
-    #[inline]
-    pub fn check_or<F>(&self, default: bool, f: F) -> bool
-    where
-        F: FnOnce(char) -> bool,
-    {
-        self.peek().map_or(default, f)
-    }
-
-    /// The previous index in the source string.
-    #[inline]
-    pub fn last_index(&self) -> usize {
-        self.eaten().chars().last().map_or(0, |c| self.index - c.len_utf8())
-    }
-
-    /// The current index in the source string.
-    #[inline]
-    pub fn index(&self) -> usize {
-        self.index
-    }
-
-    /// Jump to an index in the source string.
-    #[inline]
-    pub fn jump(&mut self, index: usize) {
-        // Make sure that the index is in bounds and on a codepoint boundary.
-        self.src.get(index ..).expect("jumped to invalid index");
-        self.index = index;
-    }
-
-    /// The full source string.
-    #[inline]
-    pub fn src(&self) -> &'s str {
-        self.src
-    }
-
-    /// Slice out part of the source string.
-    #[inline]
-    pub fn get<I>(&self, index: I) -> &'s str
-    where
-        I: SliceIndex<str, Output = str>,
-    {
-        // See `eaten_from` for details about `unwrap_or_default`.
-        self.src.get(index).unwrap_or_default()
-    }
-
-    /// The remaining source string after the current index.
-    #[inline]
-    pub fn rest(&self) -> &'s str {
-        // Safety: The index is always in bounds and on a codepoint boundary
-        // since it starts at zero and is is:
-        // - either increased by the length of a scanned character, advacing
-        //   from one codepoint boundary to the next,
-        // - or checked upon jumping.
-        unsafe { self.src.get_unchecked(self.index ..) }
-    }
-
-    /// The full source string up to the current index.
-    #[inline]
-    pub fn eaten(&self) -> &'s str {
-        // Safety: The index is always okay, for details see `rest()`.
-        unsafe { self.src.get_unchecked(.. self.index) }
-    }
-
-    /// The source string from `start` to the current index.
-    #[inline]
-    pub fn eaten_from(&self, start: usize) -> &'s str {
-        // Using `unwrap_or_default` is much faster than unwrap, probably
-        // because then the whole call to `eaten_from` is pure and can be
-        // optimized away in some cases.
-        self.src.get(start .. self.index).unwrap_or_default()
-    }
-}
-
-/// Whether this character denotes a newline.
-#[inline]
-pub fn is_newline(character: char) -> bool {
-    matches!(
-        character,
-        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
-        '\n' | '\x0B' | '\x0C' | '\r' |
-        // Next Line, Line Separator, Paragraph Separator.
-        '\u{0085}' | '\u{2028}' | '\u{2029}'
-    )
-}
-
-/// Whether a string is a valid unicode identifier.
-///
-/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
-/// - `_` as a starting character,
-/// - `_` and `-` as continuing characters.
-///
-/// [uax31]: http://www.unicode.org/reports/tr31/
-#[inline]
-pub fn is_ident(string: &str) -> bool {
-    let mut chars = string.chars();
-    chars
-        .next()
-        .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
-}
-
-/// Whether a character can start an identifier.
-#[inline]
-pub fn is_id_start(c: char) -> bool {
-    c.is_xid_start() || c == '_'
-}
-
-/// Whether a character can continue an identifier.
-#[inline]
-pub fn is_id_continue(c: char) -> bool {
-    c.is_xid_continue() || c == '_' || c == '-'
-}
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 053a7f61e..ae3d7b9c5 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -1,9 +1,9 @@
 use std::sync::Arc;
 
-use super::{
-    is_id_continue, is_id_start, is_newline, resolve_hex, resolve_raw, resolve_string,
-    Scanner,
-};
+use unicode_xid::UnicodeXID;
+use unscanny::Scanner;
+
+use super::{resolve_hex, resolve_raw, resolve_string};
 use crate::geom::{AngleUnit, LengthUnit};
 use crate::syntax::ast::{MathNode, RawNode, Unit};
 use crate::syntax::{ErrorPos, NodeKind};
@@ -65,13 +65,11 @@ impl<'s> Tokens<'s> {
     /// The index in the string at which the last token ends and next token
     /// will start.
     #[inline]
-    pub fn index(&self) -> usize {
-        self.s.index()
+    pub fn cursor(&self) -> usize {
+        self.s.cursor()
     }
 
     /// Jump to the given index in the string.
-    ///
-    /// You need to know the correct column.
     #[inline]
     pub fn jump(&mut self, index: usize) {
         self.s.jump(index);
@@ -92,7 +90,7 @@ impl<'s> Tokens<'s> {
     /// The column index of a given index in the source string.
     #[inline]
     pub fn column(&self, index: usize) -> usize {
-        column(self.s.src(), index, self.column_offset)
+        column(self.s.string(), index, self.column_offset)
     }
 }
 
@@ -102,7 +100,7 @@ impl<'s> Iterator for Tokens<'s> {
     /// Parse the next token in the source code.
     #[inline]
     fn next(&mut self) -> Option<Self::Item> {
-        let start = self.s.index();
+        let start = self.s.cursor();
         let c = self.s.eat()?;
         Some(match c {
             // Blocks.
@@ -112,15 +110,13 @@ impl<'s> Iterator for Tokens<'s> {
             ']' => NodeKind::RightBracket,
 
             // Whitespace.
-            ' ' if self.s.check_or(true, |c| !c.is_whitespace()) => NodeKind::Space(0),
+            ' ' if self.s.done() || !self.s.at(char::is_whitespace) => NodeKind::Space(0),
             c if c.is_whitespace() => self.whitespace(),
 
             // Comments with special case for URLs.
             '/' if self.s.eat_if('*') => self.block_comment(),
             '/' if !self.maybe_in_url() && self.s.eat_if('/') => self.line_comment(),
-            '*' if self.s.eat_if('/') => {
-                NodeKind::Unknown(self.s.eaten_from(start).into())
-            }
+            '*' if self.s.eat_if('/') => NodeKind::Unknown(self.s.from(start).into()),
 
             // Other things.
             _ => match self.mode {
@@ -187,22 +183,20 @@ impl<'s> Tokens<'s> {
             '=' => NodeKind::Eq,
             '<' => NodeKind::Lt,
             '>' => NodeKind::Gt,
-            '.' if self.s.check_or(true, |n| !n.is_ascii_digit()) => NodeKind::Dot,
+            '.' if self.s.done() || !self.s.at(char::is_ascii_digit) => NodeKind::Dot,
 
             // Identifiers.
             c if is_id_start(c) => self.ident(start),
 
             // Numbers.
-            c if c.is_ascii_digit()
-                || (c == '.' && self.s.check_or(false, |n| n.is_ascii_digit())) =>
-            {
+            c if c.is_ascii_digit() || (c == '.' && self.s.at(char::is_ascii_digit)) => {
                 self.number(start, c)
             }
 
             // Strings.
             '"' => self.string(),
 
-            _ => NodeKind::Unknown(self.s.eaten_from(start).into()),
+            _ => NodeKind::Unknown(self.s.from(start).into()),
         }
     }
 
@@ -226,19 +220,19 @@ impl<'s> Tokens<'s> {
         };
 
         loop {
-            self.s.eat_until(|c| {
+            self.s.eat_until(|c: char| {
                 TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
             });
 
             let mut s = self.s;
-            if !(s.eat_if(' ') && s.check_or(false, char::is_alphanumeric)) {
+            if !(s.eat_if(' ') && s.at(char::is_alphanumeric)) {
                 break;
             }
 
             self.s.eat();
         }
 
-        NodeKind::Text(self.s.eaten_from(start).into())
+        NodeKind::Text(self.s.from(start).into())
     }
 
     fn whitespace(&mut self) -> NodeKind {
@@ -276,13 +270,11 @@ impl<'s> Tokens<'s> {
             '[' | ']' | '{' | '}' | '#' |
             // Markup.
             '~' | '\'' | '"' | '*' | '_' | '`' | '$' | '=' | '-' | '.' => {
-                self.s.eat_assert(c) ;
+                self.s.expect(c);
                 NodeKind::Escape(c)
             }
-            'u' if self.s.rest().starts_with("u{") => {
-                self.s.eat_assert('u');
-                self.s.eat_assert('{');
-                let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric());
+            'u' if self.s.eat_if("u{") => {
+                let sequence = self.s.eat_while(char::is_ascii_alphanumeric);
                 if self.s.eat_if('}') {
                     if let Some(c) = resolve_hex(sequence) {
                         NodeKind::Escape(c)
@@ -304,7 +296,7 @@ impl<'s> Tokens<'s> {
             // Linebreaks.
             c if c.is_whitespace() => NodeKind::Linebreak(false),
             '+' => {
-                self.s.eat_assert(c);
+                self.s.expect(c);
                 NodeKind::Linebreak(true)
             }
 
@@ -315,7 +307,7 @@ impl<'s> Tokens<'s> {
 
     #[inline]
     fn hash(&mut self) -> NodeKind {
-        if self.s.check_or(false, is_id_start) {
+        if self.s.at(is_id_start) {
             let read = self.s.eat_while(is_id_continue);
             match keyword(read) {
                 Some(keyword) => keyword,
@@ -342,10 +334,10 @@ impl<'s> Tokens<'s> {
 
     fn numbering(&mut self, start: usize, c: char) -> NodeKind {
         let number = if c != '.' {
-            self.s.eat_while(|c| c.is_ascii_digit());
-            let read = self.s.eaten_from(start);
+            self.s.eat_while(char::is_ascii_digit);
+            let read = self.s.from(start);
             if !self.s.eat_if('.') {
-                return NodeKind::Text(self.s.eaten_from(start).into());
+                return NodeKind::Text(self.s.from(start).into());
             }
             read.parse().ok()
         } else {
@@ -356,7 +348,7 @@ impl<'s> Tokens<'s> {
     }
 
     fn raw(&mut self) -> NodeKind {
-        let column = self.column(self.s.index() - 1);
+        let column = self.column(self.s.cursor() - 1);
 
         let mut backticks = 1;
         while self.s.eat_if('`') {
@@ -372,7 +364,7 @@ impl<'s> Tokens<'s> {
             }));
         }
 
-        let start = self.s.index();
+        let start = self.s.cursor();
 
         let mut found = 0;
         while found < backticks {
@@ -384,7 +376,7 @@ impl<'s> Tokens<'s> {
         }
 
         if found == backticks {
-            let end = self.s.index() - found as usize;
+            let end = self.s.cursor() - found as usize;
             NodeKind::Raw(Arc::new(resolve_raw(
                 column,
                 backticks,
@@ -412,7 +404,7 @@ impl<'s> Tokens<'s> {
             display = true;
         }
 
-        let start = self.s.index();
+        let start = self.s.cursor();
 
         let mut escaped = false;
         let mut dollar = !display;
@@ -429,7 +421,7 @@ impl<'s> Tokens<'s> {
             }
         };
 
-        let end = self.s.index()
+        let end = self.s.cursor()
             - match (terminated, display) {
                 (false, _) => 0,
                 (true, false) => 1,
@@ -456,7 +448,7 @@ impl<'s> Tokens<'s> {
 
     fn ident(&mut self, start: usize) -> NodeKind {
         self.s.eat_while(is_id_continue);
-        match self.s.eaten_from(start) {
+        match self.s.from(start) {
             "none" => NodeKind::None,
             "auto" => NodeKind::Auto,
             "true" => NodeKind::Bool(true),
@@ -467,30 +459,29 @@ impl<'s> Tokens<'s> {
 
     fn number(&mut self, start: usize, c: char) -> NodeKind {
         // Read the first part (integer or fractional depending on `first`).
-        self.s.eat_while(|c| c.is_ascii_digit());
+        self.s.eat_while(char::is_ascii_digit);
 
         // Read the fractional part if not already done.
         // Make sure not to confuse a range for the decimal separator.
-        if c != '.' && !self.s.rest().starts_with("..") && self.s.eat_if('.') {
-            self.s.eat_while(|c| c.is_ascii_digit());
+        if c != '.' && !self.s.at("..") && self.s.eat_if('.') {
+            self.s.eat_while(char::is_ascii_digit);
         }
 
         // Read the exponent.
-        let em = self.s.rest().starts_with("em");
-        if !em && self.s.eat_if('e') || self.s.eat_if('E') {
-            let _ = self.s.eat_if('+') || self.s.eat_if('-');
-            self.s.eat_while(|c| c.is_ascii_digit());
+        if !self.s.at("em") && self.s.eat_if(['e', 'E']) {
+            self.s.eat_if(['+', '-']);
+            self.s.eat_while(char::is_ascii_digit);
         }
 
         // Read the suffix.
-        let suffix_start = self.s.index();
+        let suffix_start = self.s.cursor();
         if !self.s.eat_if('%') {
-            self.s.eat_while(|c| c.is_ascii_alphanumeric());
+            self.s.eat_while(char::is_ascii_alphanumeric);
         }
 
         let number = self.s.get(start .. suffix_start);
-        let suffix = self.s.eaten_from(suffix_start);
-        let all = self.s.eaten_from(start);
+        let suffix = self.s.from(suffix_start);
+        let all = self.s.from(start);
 
         // Find out whether it is a simple number.
         if suffix.is_empty() {
@@ -575,13 +566,13 @@ impl<'s> Tokens<'s> {
 
     fn in_word(&self) -> bool {
         let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
-        let prev = self.s.prev(1);
+        let prev = self.s.scout(-2);
         let next = self.s.peek();
         alphanumeric(prev) && alphanumeric(next)
     }
 
     fn maybe_in_url(&self) -> bool {
-        self.mode == TokenMode::Markup && self.s.eaten().ends_with(":/")
+        self.mode == TokenMode::Markup && self.s.before().ends_with(":/")
     }
 }
 
@@ -610,7 +601,8 @@ fn keyword(ident: &str) -> Option<NodeKind> {
     })
 }
 
-/// The column index of a given index in the source string, given a column offset for the first line.
+/// The column index of a given index in the source string, given a column
+/// offset for the first line.
 #[inline]
 fn column(string: &str, index: usize, offset: usize) -> usize {
     let mut apply_offset = false;
@@ -634,6 +626,45 @@ fn column(string: &str, index: usize, offset: usize) -> usize {
     if apply_offset { res + offset } else { res }
 }
 
+/// Whether this character denotes a newline.
+#[inline]
+pub fn is_newline(character: char) -> bool {
+    matches!(
+        character,
+        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+        '\n' | '\x0B' | '\x0C' | '\r' |
+        // Next Line, Line Separator, Paragraph Separator.
+        '\u{0085}' | '\u{2028}' | '\u{2029}'
+    )
+}
+
+/// Whether a string is a valid unicode identifier.
+///
+/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
+/// - `_` as a starting character,
+/// - `_` and `-` as continuing characters.
+///
+/// [uax31]: http://www.unicode.org/reports/tr31/
+#[inline]
+pub fn is_ident(string: &str) -> bool {
+    let mut chars = string.chars();
+    chars
+        .next()
+        .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
+}
+
+/// Whether a character can start an identifier.
+#[inline]
+pub fn is_id_start(c: char) -> bool {
+    c.is_xid_start() || c == '_'
+}
+
+/// Whether a character can continue an identifier.
+#[inline]
+pub fn is_id_continue(c: char) -> bool {
+    c.is_xid_continue() || c == '_' || c == '-'
+}
+
 #[cfg(test)]
 #[allow(non_snake_case)]
 mod tests {
diff --git a/src/source.rs b/src/source.rs
index 9f2a01403..37aa96cac 100644
--- a/src/source.rs
+++ b/src/source.rs
@@ -6,9 +6,11 @@ use std::ops::Range;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 
+use unscanny::Scanner;
+
 use crate::diag::TypResult;
 use crate::loading::{FileHash, Loader};
-use crate::parse::{is_newline, parse, Reparser, Scanner};
+use crate::parse::{is_newline, parse, Reparser};
 use crate::syntax::ast::Markup;
 use crate::syntax::{self, Category, GreenNode, RedNode};
 use crate::util::{PathExt, StrExt};
@@ -382,12 +384,12 @@ impl Line {
         let mut utf16_idx = utf16_offset;
 
         std::iter::from_fn(move || {
-            s.eat_until(|c| {
+            s.eat_until(|c: char| {
                 utf16_idx += c.len_utf16();
                 is_newline(c)
             });
 
-            if s.eof() {
+            if s.done() {
                 return None;
             }
 
@@ -396,7 +398,7 @@ impl Line {
             }
 
             Some(Line {
-                byte_idx: byte_offset + s.index(),
+                byte_idx: byte_offset + s.cursor(),
                 utf16_idx,
             })
         })
diff --git a/tests/typeset.rs b/tests/typeset.rs
index bba826219..02d3ee389 100644
--- a/tests/typeset.rs
+++ b/tests/typeset.rs
@@ -6,6 +6,7 @@ use std::path::Path;
 use std::sync::Arc;
 
 use tiny_skia as sk;
+use unscanny::Scanner;
 use walkdir::WalkDir;
 
 use typst::diag::Error;
@@ -15,7 +16,6 @@ use typst::geom::{Length, RgbaColor};
 use typst::library::layout::PageNode;
 use typst::library::text::{TextNode, TextSize};
 use typst::loading::FsLoader;
-use typst::parse::Scanner;
 use typst::source::SourceFile;
 use typst::syntax::Span;
 use typst::{bail, Context};
@@ -329,7 +329,7 @@ fn parse_metadata(source: &SourceFile) -> (Option<bool>, Vec<Error>) {
         };
 
         fn num(s: &mut Scanner) -> usize {
-            s.eat_while(|c| c.is_numeric()).parse().unwrap()
+            s.eat_while(char::is_numeric).parse().unwrap()
         }
 
         let comments =
@@ -348,7 +348,7 @@ fn parse_metadata(source: &SourceFile) -> (Option<bool>, Vec<Error>) {
         let end = if s.eat_if('-') { pos(&mut s) } else { start };
         let span = Span::new(source.id(), start, end);
 
-        errors.push(Error::new(span, s.rest().trim()));
+        errors.push(Error::new(span, s.after().trim()));
     }
 
     (compare_ref, errors)