Switch to unscanny
This commit is contained in:
parent
2db4b603db
commit
c5b3f8ee98
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -876,6 +876,7 @@ dependencies = [
|
||||
"unicode-script",
|
||||
"unicode-segmentation",
|
||||
"unicode-xid",
|
||||
"unscanny",
|
||||
"usvg",
|
||||
"walkdir",
|
||||
"xi-unicode",
|
||||
@ -938,6 +939,11 @@ version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
|
||||
|
||||
[[package]]
|
||||
name = "unscanny"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/typst/unscanny#c943791649841388803b7ca873ce72683903fd39"
|
||||
|
||||
[[package]]
|
||||
name = "usvg"
|
||||
version = "0.20.0"
|
||||
|
@ -21,6 +21,7 @@ once_cell = "1"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
typed-arena = "2"
|
||||
parking_lot = "0.12"
|
||||
unscanny = { git = "https://github.com/typst/unscanny" }
|
||||
|
||||
# Text and font handling
|
||||
hypher = "0.1"
|
||||
|
@ -1,9 +1,10 @@
|
||||
use std::path::Path;
|
||||
|
||||
use iai::{black_box, main, Iai};
|
||||
use unscanny::Scanner;
|
||||
|
||||
use typst::loading::MemLoader;
|
||||
use typst::parse::{parse, Scanner, TokenMode, Tokens};
|
||||
use typst::parse::{parse, TokenMode, Tokens};
|
||||
use typst::source::SourceId;
|
||||
use typst::Context;
|
||||
|
||||
|
@ -1,8 +1,9 @@
|
||||
use unscanny::Scanner;
|
||||
|
||||
use crate::library::layout::{GridNode, TrackSizing};
|
||||
use crate::library::prelude::*;
|
||||
use crate::library::text::ParNode;
|
||||
use crate::library::utility::Numbering;
|
||||
use crate::parse::Scanner;
|
||||
|
||||
/// An unordered (bulleted) or ordered (numbered) list.
|
||||
#[derive(Debug, Hash)]
|
||||
@ -190,7 +191,7 @@ impl Cast<Spanned<Value>> for Label {
|
||||
let mut s = Scanner::new(&pattern);
|
||||
let mut prefix;
|
||||
let numbering = loop {
|
||||
prefix = s.eaten();
|
||||
prefix = s.before();
|
||||
match s.eat().map(|c| c.to_ascii_lowercase()) {
|
||||
Some('1') => break Numbering::Arabic,
|
||||
Some('a') => break Numbering::Letter,
|
||||
@ -200,8 +201,8 @@ impl Cast<Spanned<Value>> for Label {
|
||||
None => Err("invalid pattern")?,
|
||||
}
|
||||
};
|
||||
let upper = s.prev(0).map_or(false, char::is_uppercase);
|
||||
let suffix = s.rest().into();
|
||||
let upper = s.scout(-1).map_or(false, char::is_uppercase);
|
||||
let suffix = s.after().into();
|
||||
Ok(Self::Pattern(prefix.into(), numbering, upper, suffix))
|
||||
}
|
||||
Value::Content(v) => Ok(Self::Content(v)),
|
||||
|
@ -3,13 +3,11 @@
|
||||
mod incremental;
|
||||
mod parser;
|
||||
mod resolve;
|
||||
mod scanner;
|
||||
mod tokens;
|
||||
|
||||
pub use incremental::*;
|
||||
pub use parser::*;
|
||||
pub use resolve::*;
|
||||
pub use scanner::*;
|
||||
pub use tokens::*;
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
@ -1,6 +1,6 @@
|
||||
use core::slice::SliceIndex;
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
use std::mem;
|
||||
use std::ops::Range;
|
||||
|
||||
use super::{TokenMode, Tokens};
|
||||
use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
|
||||
@ -116,7 +116,7 @@ impl<'s> Parser<'s> {
|
||||
_ => false,
|
||||
};
|
||||
|
||||
self.prev_end = self.tokens.index();
|
||||
self.prev_end = self.tokens.cursor();
|
||||
self.bump();
|
||||
|
||||
if self.tokens.mode() == TokenMode::Code {
|
||||
@ -186,15 +186,12 @@ impl<'s> Parser<'s> {
|
||||
|
||||
/// Peek at the source of the current token.
|
||||
pub fn peek_src(&self) -> &'s str {
|
||||
self.tokens.scanner().get(self.current_start() .. self.current_end())
|
||||
self.get(self.current_start() .. self.current_end())
|
||||
}
|
||||
|
||||
/// Obtain a range of the source code.
|
||||
pub fn get<I>(&self, index: I) -> &'s str
|
||||
where
|
||||
I: SliceIndex<str, Output = str>,
|
||||
{
|
||||
self.tokens.scanner().get(index)
|
||||
pub fn get(&self, range: Range<usize>) -> &'s str {
|
||||
self.tokens.scanner().get(range)
|
||||
}
|
||||
|
||||
/// The byte index at which the last non-trivia token ended.
|
||||
@ -209,7 +206,7 @@ impl<'s> Parser<'s> {
|
||||
|
||||
/// The byte index at which the current token ends.
|
||||
pub fn current_end(&self) -> usize {
|
||||
self.tokens.index()
|
||||
self.tokens.cursor()
|
||||
}
|
||||
|
||||
/// Determine the column index for the given byte index.
|
||||
@ -294,8 +291,8 @@ impl<'s> Parser<'s> {
|
||||
}
|
||||
|
||||
self.tokens.jump(target);
|
||||
self.prev_end = self.tokens.index();
|
||||
self.current_start = self.tokens.index();
|
||||
self.prev_end = self.tokens.cursor();
|
||||
self.current_start = self.tokens.cursor();
|
||||
self.current = self.tokens.next();
|
||||
}
|
||||
|
||||
@ -311,9 +308,9 @@ impl<'s> Parser<'s> {
|
||||
/// handling.
|
||||
fn bump(&mut self) {
|
||||
let kind = self.current.take().unwrap();
|
||||
let len = self.tokens.index() - self.current_start;
|
||||
let len = self.tokens.cursor() - self.current_start;
|
||||
self.children.push(GreenData::new(kind, len).into());
|
||||
self.current_start = self.tokens.index();
|
||||
self.current_start = self.tokens.cursor();
|
||||
self.current = self.tokens.next();
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
use super::{is_ident, is_newline, Scanner};
|
||||
use unscanny::Scanner;
|
||||
|
||||
use super::{is_ident, is_newline};
|
||||
use crate::syntax::ast::RawNode;
|
||||
use crate::util::EcoString;
|
||||
|
||||
@ -13,7 +15,7 @@ pub fn resolve_string(string: &str) -> EcoString {
|
||||
continue;
|
||||
}
|
||||
|
||||
let start = s.last_index();
|
||||
let start = s.locate(-1);
|
||||
match s.eat() {
|
||||
Some('\\') => out.push('\\'),
|
||||
Some('"') => out.push('"'),
|
||||
@ -22,17 +24,17 @@ pub fn resolve_string(string: &str) -> EcoString {
|
||||
Some('t') => out.push('\t'),
|
||||
Some('u') if s.eat_if('{') => {
|
||||
// TODO: Feedback if closing brace is missing.
|
||||
let sequence = s.eat_while(|c| c.is_ascii_hexdigit());
|
||||
let sequence = s.eat_while(char::is_ascii_hexdigit);
|
||||
let _terminated = s.eat_if('}');
|
||||
|
||||
match resolve_hex(sequence) {
|
||||
Some(c) => out.push(c),
|
||||
None => out.push_str(s.eaten_from(start)),
|
||||
None => out.push_str(s.from(start)),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Feedback about invalid escape sequence.
|
||||
_ => out.push_str(s.eaten_from(start)),
|
||||
_ => out.push_str(s.from(start)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -68,8 +70,8 @@ pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawNode {
|
||||
fn split_at_lang_tag(raw: &str) -> (&str, &str) {
|
||||
let mut s = Scanner::new(raw);
|
||||
(
|
||||
s.eat_until(|c| c == '`' || c.is_whitespace() || is_newline(c)),
|
||||
s.rest(),
|
||||
s.eat_until(|c: char| c == '`' || c.is_whitespace() || is_newline(c)),
|
||||
s.after(),
|
||||
)
|
||||
}
|
||||
|
||||
@ -129,9 +131,9 @@ fn split_lines(text: &str) -> Vec<&str> {
|
||||
}
|
||||
|
||||
lines.push(&text[start .. end]);
|
||||
start = s.index();
|
||||
start = s.cursor();
|
||||
}
|
||||
end = s.index();
|
||||
end = s.cursor();
|
||||
}
|
||||
|
||||
lines.push(&text[start ..]);
|
||||
|
@ -1,211 +0,0 @@
|
||||
use std::slice::SliceIndex;
|
||||
|
||||
use unicode_xid::UnicodeXID;
|
||||
|
||||
/// A featureful char-based scanner.
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Scanner<'s> {
|
||||
/// The string to scan.
|
||||
src: &'s str,
|
||||
/// The index at which the peekable character starts. Must be in bounds and
|
||||
/// at a codepoint boundary to guarantee safety.
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl<'s> Scanner<'s> {
|
||||
/// Create a new char scanner.
|
||||
#[inline]
|
||||
pub fn new(src: &'s str) -> Self {
|
||||
Self { src, index: 0 }
|
||||
}
|
||||
|
||||
/// Whether the end of the string is reached.
|
||||
pub fn eof(&self) -> bool {
|
||||
self.index == self.src.len()
|
||||
}
|
||||
|
||||
/// Consume the next char.
|
||||
#[inline]
|
||||
pub fn eat(&mut self) -> Option<char> {
|
||||
let next = self.peek();
|
||||
if let Some(c) = next {
|
||||
self.index += c.len_utf8();
|
||||
}
|
||||
next
|
||||
}
|
||||
|
||||
/// Consume the next char if it is the given one.
|
||||
///
|
||||
/// Returns whether the char was consumed.
|
||||
#[inline]
|
||||
pub fn eat_if(&mut self, c: char) -> bool {
|
||||
let matches = self.peek() == Some(c);
|
||||
if matches {
|
||||
self.index += c.len_utf8();
|
||||
}
|
||||
matches
|
||||
}
|
||||
|
||||
/// Consume the next char, debug-asserting that it is the given one.
|
||||
#[inline]
|
||||
pub fn eat_assert(&mut self, c: char) {
|
||||
let next = self.eat();
|
||||
debug_assert_eq!(next, Some(c));
|
||||
}
|
||||
|
||||
/// Eat chars while the condition is true.
|
||||
#[inline]
|
||||
pub fn eat_while<F>(&mut self, mut f: F) -> &'s str
|
||||
where
|
||||
F: FnMut(char) -> bool,
|
||||
{
|
||||
self.eat_until(|c| !f(c))
|
||||
}
|
||||
|
||||
/// Eat chars until the condition is true.
|
||||
#[inline]
|
||||
pub fn eat_until<F>(&mut self, mut f: F) -> &'s str
|
||||
where
|
||||
F: FnMut(char) -> bool,
|
||||
{
|
||||
let start = self.index;
|
||||
while let Some(c) = self.peek() {
|
||||
if f(c) {
|
||||
break;
|
||||
}
|
||||
self.index += c.len_utf8();
|
||||
}
|
||||
self.eaten_from(start)
|
||||
}
|
||||
|
||||
/// Uneat the last eaten char.
|
||||
#[inline]
|
||||
pub fn uneat(&mut self) {
|
||||
self.index = self.last_index();
|
||||
}
|
||||
|
||||
/// Peek at the next char without consuming it.
|
||||
#[inline]
|
||||
pub fn peek(&self) -> Option<char> {
|
||||
self.rest().chars().next()
|
||||
}
|
||||
|
||||
/// Get the nth-previous eaten char.
|
||||
#[inline]
|
||||
pub fn prev(&self, n: usize) -> Option<char> {
|
||||
self.eaten().chars().nth_back(n)
|
||||
}
|
||||
|
||||
/// Checks whether the next char fulfills a condition.
|
||||
///
|
||||
/// Returns `default` if there is no next char.
|
||||
#[inline]
|
||||
pub fn check_or<F>(&self, default: bool, f: F) -> bool
|
||||
where
|
||||
F: FnOnce(char) -> bool,
|
||||
{
|
||||
self.peek().map_or(default, f)
|
||||
}
|
||||
|
||||
/// The previous index in the source string.
|
||||
#[inline]
|
||||
pub fn last_index(&self) -> usize {
|
||||
self.eaten().chars().last().map_or(0, |c| self.index - c.len_utf8())
|
||||
}
|
||||
|
||||
/// The current index in the source string.
|
||||
#[inline]
|
||||
pub fn index(&self) -> usize {
|
||||
self.index
|
||||
}
|
||||
|
||||
/// Jump to an index in the source string.
|
||||
#[inline]
|
||||
pub fn jump(&mut self, index: usize) {
|
||||
// Make sure that the index is in bounds and on a codepoint boundary.
|
||||
self.src.get(index ..).expect("jumped to invalid index");
|
||||
self.index = index;
|
||||
}
|
||||
|
||||
/// The full source string.
|
||||
#[inline]
|
||||
pub fn src(&self) -> &'s str {
|
||||
self.src
|
||||
}
|
||||
|
||||
/// Slice out part of the source string.
|
||||
#[inline]
|
||||
pub fn get<I>(&self, index: I) -> &'s str
|
||||
where
|
||||
I: SliceIndex<str, Output = str>,
|
||||
{
|
||||
// See `eaten_from` for details about `unwrap_or_default`.
|
||||
self.src.get(index).unwrap_or_default()
|
||||
}
|
||||
|
||||
/// The remaining source string after the current index.
|
||||
#[inline]
|
||||
pub fn rest(&self) -> &'s str {
|
||||
// Safety: The index is always in bounds and on a codepoint boundary
|
||||
// since it starts at zero and is is:
|
||||
// - either increased by the length of a scanned character, advacing
|
||||
// from one codepoint boundary to the next,
|
||||
// - or checked upon jumping.
|
||||
unsafe { self.src.get_unchecked(self.index ..) }
|
||||
}
|
||||
|
||||
/// The full source string up to the current index.
|
||||
#[inline]
|
||||
pub fn eaten(&self) -> &'s str {
|
||||
// Safety: The index is always okay, for details see `rest()`.
|
||||
unsafe { self.src.get_unchecked(.. self.index) }
|
||||
}
|
||||
|
||||
/// The source string from `start` to the current index.
|
||||
#[inline]
|
||||
pub fn eaten_from(&self, start: usize) -> &'s str {
|
||||
// Using `unwrap_or_default` is much faster than unwrap, probably
|
||||
// because then the whole call to `eaten_from` is pure and can be
|
||||
// optimized away in some cases.
|
||||
self.src.get(start .. self.index).unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether this character denotes a newline.
|
||||
#[inline]
|
||||
pub fn is_newline(character: char) -> bool {
|
||||
matches!(
|
||||
character,
|
||||
// Line Feed, Vertical Tab, Form Feed, Carriage Return.
|
||||
'\n' | '\x0B' | '\x0C' | '\r' |
|
||||
// Next Line, Line Separator, Paragraph Separator.
|
||||
'\u{0085}' | '\u{2028}' | '\u{2029}'
|
||||
)
|
||||
}
|
||||
|
||||
/// Whether a string is a valid unicode identifier.
|
||||
///
|
||||
/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
|
||||
/// - `_` as a starting character,
|
||||
/// - `_` and `-` as continuing characters.
|
||||
///
|
||||
/// [uax31]: http://www.unicode.org/reports/tr31/
|
||||
#[inline]
|
||||
pub fn is_ident(string: &str) -> bool {
|
||||
let mut chars = string.chars();
|
||||
chars
|
||||
.next()
|
||||
.map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
|
||||
}
|
||||
|
||||
/// Whether a character can start an identifier.
|
||||
#[inline]
|
||||
pub fn is_id_start(c: char) -> bool {
|
||||
c.is_xid_start() || c == '_'
|
||||
}
|
||||
|
||||
/// Whether a character can continue an identifier.
|
||||
#[inline]
|
||||
pub fn is_id_continue(c: char) -> bool {
|
||||
c.is_xid_continue() || c == '_' || c == '-'
|
||||
}
|
@ -1,9 +1,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::{
|
||||
is_id_continue, is_id_start, is_newline, resolve_hex, resolve_raw, resolve_string,
|
||||
Scanner,
|
||||
};
|
||||
use unicode_xid::UnicodeXID;
|
||||
use unscanny::Scanner;
|
||||
|
||||
use super::{resolve_hex, resolve_raw, resolve_string};
|
||||
use crate::geom::{AngleUnit, LengthUnit};
|
||||
use crate::syntax::ast::{MathNode, RawNode, Unit};
|
||||
use crate::syntax::{ErrorPos, NodeKind};
|
||||
@ -65,13 +65,11 @@ impl<'s> Tokens<'s> {
|
||||
/// The index in the string at which the last token ends and next token
|
||||
/// will start.
|
||||
#[inline]
|
||||
pub fn index(&self) -> usize {
|
||||
self.s.index()
|
||||
pub fn cursor(&self) -> usize {
|
||||
self.s.cursor()
|
||||
}
|
||||
|
||||
/// Jump to the given index in the string.
|
||||
///
|
||||
/// You need to know the correct column.
|
||||
#[inline]
|
||||
pub fn jump(&mut self, index: usize) {
|
||||
self.s.jump(index);
|
||||
@ -92,7 +90,7 @@ impl<'s> Tokens<'s> {
|
||||
/// The column index of a given index in the source string.
|
||||
#[inline]
|
||||
pub fn column(&self, index: usize) -> usize {
|
||||
column(self.s.src(), index, self.column_offset)
|
||||
column(self.s.string(), index, self.column_offset)
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,7 +100,7 @@ impl<'s> Iterator for Tokens<'s> {
|
||||
/// Parse the next token in the source code.
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let start = self.s.index();
|
||||
let start = self.s.cursor();
|
||||
let c = self.s.eat()?;
|
||||
Some(match c {
|
||||
// Blocks.
|
||||
@ -112,15 +110,13 @@ impl<'s> Iterator for Tokens<'s> {
|
||||
']' => NodeKind::RightBracket,
|
||||
|
||||
// Whitespace.
|
||||
' ' if self.s.check_or(true, |c| !c.is_whitespace()) => NodeKind::Space(0),
|
||||
' ' if self.s.done() || !self.s.at(char::is_whitespace) => NodeKind::Space(0),
|
||||
c if c.is_whitespace() => self.whitespace(),
|
||||
|
||||
// Comments with special case for URLs.
|
||||
'/' if self.s.eat_if('*') => self.block_comment(),
|
||||
'/' if !self.maybe_in_url() && self.s.eat_if('/') => self.line_comment(),
|
||||
'*' if self.s.eat_if('/') => {
|
||||
NodeKind::Unknown(self.s.eaten_from(start).into())
|
||||
}
|
||||
'*' if self.s.eat_if('/') => NodeKind::Unknown(self.s.from(start).into()),
|
||||
|
||||
// Other things.
|
||||
_ => match self.mode {
|
||||
@ -187,22 +183,20 @@ impl<'s> Tokens<'s> {
|
||||
'=' => NodeKind::Eq,
|
||||
'<' => NodeKind::Lt,
|
||||
'>' => NodeKind::Gt,
|
||||
'.' if self.s.check_or(true, |n| !n.is_ascii_digit()) => NodeKind::Dot,
|
||||
'.' if self.s.done() || !self.s.at(char::is_ascii_digit) => NodeKind::Dot,
|
||||
|
||||
// Identifiers.
|
||||
c if is_id_start(c) => self.ident(start),
|
||||
|
||||
// Numbers.
|
||||
c if c.is_ascii_digit()
|
||||
|| (c == '.' && self.s.check_or(false, |n| n.is_ascii_digit())) =>
|
||||
{
|
||||
c if c.is_ascii_digit() || (c == '.' && self.s.at(char::is_ascii_digit)) => {
|
||||
self.number(start, c)
|
||||
}
|
||||
|
||||
// Strings.
|
||||
'"' => self.string(),
|
||||
|
||||
_ => NodeKind::Unknown(self.s.eaten_from(start).into()),
|
||||
_ => NodeKind::Unknown(self.s.from(start).into()),
|
||||
}
|
||||
}
|
||||
|
||||
@ -226,19 +220,19 @@ impl<'s> Tokens<'s> {
|
||||
};
|
||||
|
||||
loop {
|
||||
self.s.eat_until(|c| {
|
||||
self.s.eat_until(|c: char| {
|
||||
TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
|
||||
});
|
||||
|
||||
let mut s = self.s;
|
||||
if !(s.eat_if(' ') && s.check_or(false, char::is_alphanumeric)) {
|
||||
if !(s.eat_if(' ') && s.at(char::is_alphanumeric)) {
|
||||
break;
|
||||
}
|
||||
|
||||
self.s.eat();
|
||||
}
|
||||
|
||||
NodeKind::Text(self.s.eaten_from(start).into())
|
||||
NodeKind::Text(self.s.from(start).into())
|
||||
}
|
||||
|
||||
fn whitespace(&mut self) -> NodeKind {
|
||||
@ -276,13 +270,11 @@ impl<'s> Tokens<'s> {
|
||||
'[' | ']' | '{' | '}' | '#' |
|
||||
// Markup.
|
||||
'~' | '\'' | '"' | '*' | '_' | '`' | '$' | '=' | '-' | '.' => {
|
||||
self.s.eat_assert(c) ;
|
||||
self.s.expect(c);
|
||||
NodeKind::Escape(c)
|
||||
}
|
||||
'u' if self.s.rest().starts_with("u{") => {
|
||||
self.s.eat_assert('u');
|
||||
self.s.eat_assert('{');
|
||||
let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric());
|
||||
'u' if self.s.eat_if("u{") => {
|
||||
let sequence = self.s.eat_while(char::is_ascii_alphanumeric);
|
||||
if self.s.eat_if('}') {
|
||||
if let Some(c) = resolve_hex(sequence) {
|
||||
NodeKind::Escape(c)
|
||||
@ -304,7 +296,7 @@ impl<'s> Tokens<'s> {
|
||||
// Linebreaks.
|
||||
c if c.is_whitespace() => NodeKind::Linebreak(false),
|
||||
'+' => {
|
||||
self.s.eat_assert(c);
|
||||
self.s.expect(c);
|
||||
NodeKind::Linebreak(true)
|
||||
}
|
||||
|
||||
@ -315,7 +307,7 @@ impl<'s> Tokens<'s> {
|
||||
|
||||
#[inline]
|
||||
fn hash(&mut self) -> NodeKind {
|
||||
if self.s.check_or(false, is_id_start) {
|
||||
if self.s.at(is_id_start) {
|
||||
let read = self.s.eat_while(is_id_continue);
|
||||
match keyword(read) {
|
||||
Some(keyword) => keyword,
|
||||
@ -342,10 +334,10 @@ impl<'s> Tokens<'s> {
|
||||
|
||||
fn numbering(&mut self, start: usize, c: char) -> NodeKind {
|
||||
let number = if c != '.' {
|
||||
self.s.eat_while(|c| c.is_ascii_digit());
|
||||
let read = self.s.eaten_from(start);
|
||||
self.s.eat_while(char::is_ascii_digit);
|
||||
let read = self.s.from(start);
|
||||
if !self.s.eat_if('.') {
|
||||
return NodeKind::Text(self.s.eaten_from(start).into());
|
||||
return NodeKind::Text(self.s.from(start).into());
|
||||
}
|
||||
read.parse().ok()
|
||||
} else {
|
||||
@ -356,7 +348,7 @@ impl<'s> Tokens<'s> {
|
||||
}
|
||||
|
||||
fn raw(&mut self) -> NodeKind {
|
||||
let column = self.column(self.s.index() - 1);
|
||||
let column = self.column(self.s.cursor() - 1);
|
||||
|
||||
let mut backticks = 1;
|
||||
while self.s.eat_if('`') {
|
||||
@ -372,7 +364,7 @@ impl<'s> Tokens<'s> {
|
||||
}));
|
||||
}
|
||||
|
||||
let start = self.s.index();
|
||||
let start = self.s.cursor();
|
||||
|
||||
let mut found = 0;
|
||||
while found < backticks {
|
||||
@ -384,7 +376,7 @@ impl<'s> Tokens<'s> {
|
||||
}
|
||||
|
||||
if found == backticks {
|
||||
let end = self.s.index() - found as usize;
|
||||
let end = self.s.cursor() - found as usize;
|
||||
NodeKind::Raw(Arc::new(resolve_raw(
|
||||
column,
|
||||
backticks,
|
||||
@ -412,7 +404,7 @@ impl<'s> Tokens<'s> {
|
||||
display = true;
|
||||
}
|
||||
|
||||
let start = self.s.index();
|
||||
let start = self.s.cursor();
|
||||
|
||||
let mut escaped = false;
|
||||
let mut dollar = !display;
|
||||
@ -429,7 +421,7 @@ impl<'s> Tokens<'s> {
|
||||
}
|
||||
};
|
||||
|
||||
let end = self.s.index()
|
||||
let end = self.s.cursor()
|
||||
- match (terminated, display) {
|
||||
(false, _) => 0,
|
||||
(true, false) => 1,
|
||||
@ -456,7 +448,7 @@ impl<'s> Tokens<'s> {
|
||||
|
||||
fn ident(&mut self, start: usize) -> NodeKind {
|
||||
self.s.eat_while(is_id_continue);
|
||||
match self.s.eaten_from(start) {
|
||||
match self.s.from(start) {
|
||||
"none" => NodeKind::None,
|
||||
"auto" => NodeKind::Auto,
|
||||
"true" => NodeKind::Bool(true),
|
||||
@ -467,30 +459,29 @@ impl<'s> Tokens<'s> {
|
||||
|
||||
fn number(&mut self, start: usize, c: char) -> NodeKind {
|
||||
// Read the first part (integer or fractional depending on `first`).
|
||||
self.s.eat_while(|c| c.is_ascii_digit());
|
||||
self.s.eat_while(char::is_ascii_digit);
|
||||
|
||||
// Read the fractional part if not already done.
|
||||
// Make sure not to confuse a range for the decimal separator.
|
||||
if c != '.' && !self.s.rest().starts_with("..") && self.s.eat_if('.') {
|
||||
self.s.eat_while(|c| c.is_ascii_digit());
|
||||
if c != '.' && !self.s.at("..") && self.s.eat_if('.') {
|
||||
self.s.eat_while(char::is_ascii_digit);
|
||||
}
|
||||
|
||||
// Read the exponent.
|
||||
let em = self.s.rest().starts_with("em");
|
||||
if !em && self.s.eat_if('e') || self.s.eat_if('E') {
|
||||
let _ = self.s.eat_if('+') || self.s.eat_if('-');
|
||||
self.s.eat_while(|c| c.is_ascii_digit());
|
||||
if !self.s.at("em") && self.s.eat_if(['e', 'E']) {
|
||||
self.s.eat_if(['+', '-']);
|
||||
self.s.eat_while(char::is_ascii_digit);
|
||||
}
|
||||
|
||||
// Read the suffix.
|
||||
let suffix_start = self.s.index();
|
||||
let suffix_start = self.s.cursor();
|
||||
if !self.s.eat_if('%') {
|
||||
self.s.eat_while(|c| c.is_ascii_alphanumeric());
|
||||
self.s.eat_while(char::is_ascii_alphanumeric);
|
||||
}
|
||||
|
||||
let number = self.s.get(start .. suffix_start);
|
||||
let suffix = self.s.eaten_from(suffix_start);
|
||||
let all = self.s.eaten_from(start);
|
||||
let suffix = self.s.from(suffix_start);
|
||||
let all = self.s.from(start);
|
||||
|
||||
// Find out whether it is a simple number.
|
||||
if suffix.is_empty() {
|
||||
@ -575,13 +566,13 @@ impl<'s> Tokens<'s> {
|
||||
|
||||
fn in_word(&self) -> bool {
|
||||
let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
|
||||
let prev = self.s.prev(1);
|
||||
let prev = self.s.scout(-2);
|
||||
let next = self.s.peek();
|
||||
alphanumeric(prev) && alphanumeric(next)
|
||||
}
|
||||
|
||||
fn maybe_in_url(&self) -> bool {
|
||||
self.mode == TokenMode::Markup && self.s.eaten().ends_with(":/")
|
||||
self.mode == TokenMode::Markup && self.s.before().ends_with(":/")
|
||||
}
|
||||
}
|
||||
|
||||
@ -610,7 +601,8 @@ fn keyword(ident: &str) -> Option<NodeKind> {
|
||||
})
|
||||
}
|
||||
|
||||
/// The column index of a given index in the source string, given a column offset for the first line.
|
||||
/// The column index of a given index in the source string, given a column
|
||||
/// offset for the first line.
|
||||
#[inline]
|
||||
fn column(string: &str, index: usize, offset: usize) -> usize {
|
||||
let mut apply_offset = false;
|
||||
@ -634,6 +626,45 @@ fn column(string: &str, index: usize, offset: usize) -> usize {
|
||||
if apply_offset { res + offset } else { res }
|
||||
}
|
||||
|
||||
/// Whether this character denotes a newline.
|
||||
#[inline]
|
||||
pub fn is_newline(character: char) -> bool {
|
||||
matches!(
|
||||
character,
|
||||
// Line Feed, Vertical Tab, Form Feed, Carriage Return.
|
||||
'\n' | '\x0B' | '\x0C' | '\r' |
|
||||
// Next Line, Line Separator, Paragraph Separator.
|
||||
'\u{0085}' | '\u{2028}' | '\u{2029}'
|
||||
)
|
||||
}
|
||||
|
||||
/// Whether a string is a valid unicode identifier.
|
||||
///
|
||||
/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
|
||||
/// - `_` as a starting character,
|
||||
/// - `_` and `-` as continuing characters.
|
||||
///
|
||||
/// [uax31]: http://www.unicode.org/reports/tr31/
|
||||
#[inline]
|
||||
pub fn is_ident(string: &str) -> bool {
|
||||
let mut chars = string.chars();
|
||||
chars
|
||||
.next()
|
||||
.map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
|
||||
}
|
||||
|
||||
/// Whether a character can start an identifier.
|
||||
#[inline]
|
||||
pub fn is_id_start(c: char) -> bool {
|
||||
c.is_xid_start() || c == '_'
|
||||
}
|
||||
|
||||
/// Whether a character can continue an identifier.
|
||||
#[inline]
|
||||
pub fn is_id_continue(c: char) -> bool {
|
||||
c.is_xid_continue() || c == '_' || c == '-'
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(non_snake_case)]
|
||||
mod tests {
|
||||
|
@ -6,9 +6,11 @@ use std::ops::Range;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use unscanny::Scanner;
|
||||
|
||||
use crate::diag::TypResult;
|
||||
use crate::loading::{FileHash, Loader};
|
||||
use crate::parse::{is_newline, parse, Reparser, Scanner};
|
||||
use crate::parse::{is_newline, parse, Reparser};
|
||||
use crate::syntax::ast::Markup;
|
||||
use crate::syntax::{self, Category, GreenNode, RedNode};
|
||||
use crate::util::{PathExt, StrExt};
|
||||
@ -382,12 +384,12 @@ impl Line {
|
||||
let mut utf16_idx = utf16_offset;
|
||||
|
||||
std::iter::from_fn(move || {
|
||||
s.eat_until(|c| {
|
||||
s.eat_until(|c: char| {
|
||||
utf16_idx += c.len_utf16();
|
||||
is_newline(c)
|
||||
});
|
||||
|
||||
if s.eof() {
|
||||
if s.done() {
|
||||
return None;
|
||||
}
|
||||
|
||||
@ -396,7 +398,7 @@ impl Line {
|
||||
}
|
||||
|
||||
Some(Line {
|
||||
byte_idx: byte_offset + s.index(),
|
||||
byte_idx: byte_offset + s.cursor(),
|
||||
utf16_idx,
|
||||
})
|
||||
})
|
||||
|
@ -6,6 +6,7 @@ use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use tiny_skia as sk;
|
||||
use unscanny::Scanner;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use typst::diag::Error;
|
||||
@ -15,7 +16,6 @@ use typst::geom::{Length, RgbaColor};
|
||||
use typst::library::layout::PageNode;
|
||||
use typst::library::text::{TextNode, TextSize};
|
||||
use typst::loading::FsLoader;
|
||||
use typst::parse::Scanner;
|
||||
use typst::source::SourceFile;
|
||||
use typst::syntax::Span;
|
||||
use typst::{bail, Context};
|
||||
@ -329,7 +329,7 @@ fn parse_metadata(source: &SourceFile) -> (Option<bool>, Vec<Error>) {
|
||||
};
|
||||
|
||||
fn num(s: &mut Scanner) -> usize {
|
||||
s.eat_while(|c| c.is_numeric()).parse().unwrap()
|
||||
s.eat_while(char::is_numeric).parse().unwrap()
|
||||
}
|
||||
|
||||
let comments =
|
||||
@ -348,7 +348,7 @@ fn parse_metadata(source: &SourceFile) -> (Option<bool>, Vec<Error>) {
|
||||
let end = if s.eat_if('-') { pos(&mut s) } else { start };
|
||||
let span = Span::new(source.id(), start, end);
|
||||
|
||||
errors.push(Error::new(span, s.rest().trim()));
|
||||
errors.push(Error::new(span, s.after().trim()));
|
||||
}
|
||||
|
||||
(compare_ref, errors)
|
||||
|
Loading…
Reference in New Issue
Block a user