New syntax features 👔

- Forced line breaks with backslash followed by whitespace
- (Multline) raw text in backticks
- Set font class fallbacks with [font.family] (e.g. [font.family: monospace=("CMU Typewriter Text")])
- More sophisticated procedure to find end of function, which accounts for comments, strings, raw text and nested functions (this is a mix of a feature and a bug fix)
This commit is contained in:
Laurenz 2020-02-13 21:58:49 +01:00
parent 60099aed50
commit 1658b00282
7 changed files with 272 additions and 117 deletions

View File

@ -164,7 +164,8 @@ impl<'a> ModelLayouter<'a> {
match node {
Space => self.layout_space(),
Newline => self.layout_paragraph(),
Parbreak => self.layout_paragraph(),
Linebreak => self.layouter.finish_line(),
Text(text) => {
if self.style.text.variant.style == FontStyle::Italic {
@ -175,10 +176,6 @@ impl<'a> ModelLayouter<'a> {
decorate(self, Decoration::Bold);
}
if self.style.text.monospace {
decorate(self, Decoration::Monospace);
}
self.layout_text(text).await;
}
@ -192,12 +189,28 @@ impl<'a> ModelLayouter<'a> {
decorate(self, Decoration::Bold);
}
ToggleMonospace => {
self.style.text.monospace = !self.style.text.monospace;
decorate(self, Decoration::Monospace);
Raw(lines) => {
// TODO: Make this more efficient.
let fallback = self.style.text.fallback.clone();
self.style.text.fallback.list.insert(0, "monospace".to_string());
self.style.text.fallback.flatten();
// Layout the first line.
let mut iter = lines.iter();
if let Some(line) = iter.next() {
self.layout_text(line).await;
}
// Put a newline before each following line.
for line in iter {
self.layouter.finish_line();
self.layout_text(line).await;
}
self.style.text.fallback = fallback;
}
Node::Model(model) => {
Model(model) => {
self.layout(Spanned::new(model.as_ref(), *span)).await;
}
}

View File

@ -118,23 +118,13 @@ impl<'a> TextLayouter<'a> {
variant.weight.0 += 300;
}
let queried = if self.ctx.style.monospace {
loader.get(FontQuery {
// FIXME: This is a hack.
fallback: std::iter::once("source code pro")
.chain(self.ctx.style.fallback.iter()),
variant,
c,
}).await
} else {
loader.get(FontQuery {
fallback: self.ctx.style.fallback.iter(),
variant,
c,
}).await
let query = FontQuery {
fallback: self.ctx.style.fallback.iter(),
variant,
c,
};
if let Some((font, index)) = queried {
if let Some((font, index)) = loader.get(query).await {
// Determine the width of the char.
let header = font.read_table::<Header>().ok()?;
let font_unit_ratio = 1.0 / (header.units_per_em as f32);

View File

@ -9,21 +9,45 @@ function! {
pub struct FontFamilyFunc {
body: Option<SyntaxModel>,
list: Vec<String>,
classes: Vec<(String, Vec<String>)>,
}
parse(header, body, ctx, f) {
let list = header.args.pos.get_all::<StringLike>(&mut f.errors)
.map(|s| s.0.to_lowercase())
.collect();
let tuples: Vec<_> = header.args.key
.get_all::<String, Tuple>(&mut f.errors)
.collect();
let classes = tuples.into_iter()
.map(|(class, mut tuple)| {
let fallback = tuple.get_all::<StringLike>(&mut f.errors)
.map(|s| s.0.to_lowercase())
.collect();
(class.to_lowercase(), fallback)
})
.collect();
FontFamilyFunc {
body: body!(opt: body, ctx, f),
list: header.args.pos.get_all::<StringLike>(&mut f.errors)
.map(|s| s.0.to_lowercase())
.collect(),
list,
classes,
}
}
layout(self, ctx, errors) {
styled(&self.body, ctx, Some(&self.list),
|s, list| {
s.fallback.list = list.clone();
styled(&self.body, ctx, Some(()),
|s, _| {
if !self.list.is_empty() {
s.fallback.list = self.list.clone();
}
for (class, fallback) in &self.classes {
s.fallback.set_class_list(class.clone(), fallback.clone());
}
s.fallback.flatten();
})
}

View File

@ -24,8 +24,6 @@ pub struct TextStyle {
/// Whether the bolder toggle is active or inactive. This determines
/// whether the next `*` adds or removes font weight.
pub bolder: bool,
/// Whether the monospace toggle is active or inactive.
pub monospace: bool,
/// The base font size.
pub base_font_size: Size,
/// The font scale to apply on the base font size.
@ -79,7 +77,6 @@ impl Default for TextStyle {
weight: FontWeight(400),
},
bolder: false,
monospace: false,
base_font_size: Size::pt(11.0),
font_scale: 1.0,
word_spacing_scale: 0.25,

View File

@ -62,15 +62,17 @@ pub enum Node {
/// Whitespace containing less than two newlines.
Space,
/// Whitespace with more than two newlines.
Newline,
Parbreak,
/// A forced line break.
Linebreak,
/// Plain text.
Text(String),
/// Lines of raw text.
Raw(Vec<String>),
/// Italics were enabled / disabled.
ToggleItalic,
/// Bolder was enabled / disabled.
ToggleBolder,
/// Monospace was enabled / disabled.
ToggleMonospace,
/// A submodel, typically a function invocation.
Model(Box<dyn Model>),
}
@ -80,11 +82,12 @@ impl PartialEq for Node {
use Node::*;
match (self, other) {
(Space, Space) => true,
(Newline, Newline) => true,
(Parbreak, Parbreak) => true,
(Linebreak, Linebreak) => true,
(Text(a), Text(b)) => a == b,
(Raw(a), Raw(b)) => a == b,
(ToggleItalic, ToggleItalic) => true,
(ToggleBolder, ToggleBolder) => true,
(ToggleMonospace, ToggleMonospace) => true,
(Model(a), Model(b)) => a == b,
_ => false,
}
@ -107,6 +110,7 @@ pub enum Decoration {
/// ^^^^^^
/// ```
InvalidFuncName,
/// A key of a keyword argument:
/// ```typst
/// [box: width=5cm]
@ -119,12 +123,11 @@ pub enum Decoration {
/// ^^^^ ^^^^^
/// ```
ObjectKey,
/// An italic word.
Italic,
/// A bold word.
Bold,
/// A monospace word.
Monospace,
}
impl dyn Model {

View File

@ -33,10 +33,12 @@ pub fn parse(start: Position, src: &str, ctx: ParseContext) -> Pass<SyntaxModel>
let span = token.span;
let node = match token.v {
Token::LineComment(_) | Token::BlockComment(_) => continue,
// Only at least two newlines mean a _real_ newline indicating a
// paragraph break.
Token::Space(newlines) => if newlines >= 2 {
Node::Newline
Node::Parbreak
} else {
Node::Space
},
@ -55,10 +57,18 @@ pub fn parse(start: Position, src: &str, ctx: ParseContext) -> Pass<SyntaxModel>
Token::Star => Node::ToggleBolder,
Token::Underscore => Node::ToggleItalic,
Token::Backtick => Node::ToggleMonospace,
Token::Text(text) => Node::Text(text.to_string()),
Token::Backslash => Node::Linebreak,
Token::LineComment(_) | Token::BlockComment(_) => continue,
Token::Raw { raw, terminated } => {
if !terminated {
feedback.errors.push(err!(Span::at(span.end);
"expected backtick"));
}
Node::Raw(unescape_raw(raw))
}
Token::Text(text) => Node::Text(text.to_string()),
other => {
feedback.errors.push(err!(span; "unexpected {}", other.name()));
@ -219,7 +229,7 @@ impl<'s> FuncParser<'s> {
self.expected_at("quote", first.span.end);
}
take!(Expr::Str(unescape(string)))
take!(Expr::Str(unescape_string(string)))
}
Token::ExprNumber(n) => take!(Expr::Number(n)),
@ -433,36 +443,57 @@ impl<'s> FuncParser<'s> {
}
}
/// Unescape a string.
fn unescape(string: &str) -> String {
/// Unescape a string: `the string is \"this\"` => `the string is "this"`.
fn unescape_string(string: &str) -> String {
let mut s = String::with_capacity(string.len());
let mut escaped = false;
let mut iter = string.chars();
for c in string.chars() {
while let Some(c) = iter.next() {
if c == '\\' {
if escaped {
s.push('\\');
match iter.next() {
Some('\\') => s.push('\\'),
Some('"') => s.push('"'),
Some('n') => s.push('\n'),
Some('t') => s.push('\t'),
Some(c) => { s.push('\\'); s.push(c); }
None => s.push('\\'),
}
escaped = !escaped;
} else {
if escaped {
match c {
'"' => s.push('"'),
'n' => s.push('\n'),
't' => s.push('\t'),
c => { s.push('\\'); s.push(c); }
}
} else {
s.push(c);
}
escaped = false;
s.push(c);
}
}
s
}
/// Unescape raw markup into lines.
fn unescape_raw(raw: &str) -> Vec<String> {
let mut lines = Vec::new();
let mut s = String::new();
let mut iter = raw.chars().peekable();
while let Some(c) = iter.next() {
if c == '\\' {
match iter.next() {
Some('`') => s.push('`'),
Some(c) => { s.push('\\'); s.push(c); }
None => s.push('\\'),
}
} else if is_newline_char(c) {
if c == '\r' && iter.peek() == Some(&'\n') {
iter.next();
}
lines.push(std::mem::replace(&mut s, String::new()));
} else {
s.push(c);
}
}
lines.push(s);
lines
}
#[cfg(test)]
#[allow(non_snake_case)]
@ -474,8 +505,8 @@ mod tests {
use Decoration::*;
use Node::{
Space as S, Newline as N,
ToggleItalic as Italic, ToggleBolder as Bold, ToggleMonospace as Mono,
Space as S, ToggleItalic as Italic, ToggleBolder as Bold,
Parbreak, Linebreak,
};
use Expr::{Number as Num, Size as Sz, Bool};
@ -484,6 +515,13 @@ mod tests {
fn Pt(points: f32) -> Expr { Expr::Size(Size::pt(points)) }
fn T(text: &str) -> Node { Node::Text(text.to_string()) }
/// Create a raw text node.
macro_rules! raw {
($($line:expr),* $(,)?) => {
Node::Raw(vec![$($line.to_string()),*])
};
}
/// Create a tuple expression.
macro_rules! tuple {
($($items:expr),* $(,)?) => {
@ -568,7 +606,7 @@ mod tests {
#[test]
fn unescape_strings() {
fn test(string: &str, expected: &str) {
assert_eq!(unescape(string), expected.to_string());
assert_eq!(unescape_string(string), expected.to_string());
}
test(r#"hello world"#, "hello world");
@ -577,24 +615,49 @@ mod tests {
test(r#"a\\"#, "a\\");
test(r#"a\\\nbc"#, "a\\\nbc");
test(r#"a\tbc"#, "a\tbc");
test("🌎", "🌎");
test(r"🌎", "🌎");
test(r"🌎\", r"🌎\");
test(r"\🌎", r"\🌎");
}
#[test]
fn parse_flat_nodes() {
fn unescape_raws() {
fn test(raw: &str, expected: Node) {
let vec = if let Node::Raw(v) = expected { v } else { panic!() };
assert_eq!(unescape_raw(raw), vec);
}
test("raw\\`", raw!["raw`"]);
test("raw\ntext", raw!["raw", "text"]);
test("a\r\nb", raw!["a", "b"]);
test("a\n\nb", raw!["a", "", "b"]);
test("a\r\x0Bb", raw!["a", "", "b"]);
test("a\r\n\r\nb", raw!["a", "", "b"]);
test("raw\\a", raw!["raw\\a"]);
test("raw\\", raw!["raw\\"]);
}
#[test]
fn parse_basic_nodes() {
// Basic nodes
p!("" => []);
p!("hi" => [T("hi")]);
p!("*hi" => [Bold, T("hi")]);
p!("hi_" => [T("hi"), Italic]);
p!("`py`" => [Mono, T("py"), Mono]);
p!("hi you" => [T("hi"), S, T("you")]);
p!("hi// you\nw" => [T("hi"), S, T("w")]);
p!("\n\n\nhello" => [N, T("hello")]);
p!("\n\n\nhello" => [Parbreak, T("hello")]);
p!("first//\n//\nsecond" => [T("first"), S, S, T("second")]);
p!("first//\n \nsecond" => [T("first"), N, T("second")]);
p!("first//\n \nsecond" => [T("first"), Parbreak, T("second")]);
p!("first/*\n \n*/second" => [T("first"), T("second")]);
p!("💜\n\n 🌍" => [T("💜"), N, T("🌍")]);
p!(r"a\ b" => [T("a"), Linebreak, S, T("b")]);
p!("💜\n\n 🌍" => [T("💜"), Parbreak, T("🌍")]);
// Raw markup
p!("`py`" => [raw!["py"]]);
p!("[val][`hi]`]" => [func!("val"; [raw!["hi]"]])]);
p!("`hi\nyou" => [raw!["hi", "you"]], [(1:3, 1:3, "expected backtick")]);
p!("`hi\\`du`" => [raw!["hi`du"]]);
// Spanned nodes
p!("Hi" => [(0:0, 0:2, T("Hi"))]);
@ -924,7 +987,7 @@ mod tests {
// Newline before function
p!(" \n\r\n[val]" =>
[(0:0, 2:0, N), (2:0, 2:5, func!((0:1, 0:4, "val")))], [],
[(0:0, 2:0, Parbreak), (2:0, 2:5, func!((0:1, 0:4, "val")))], [],
[(2:1, 2:4, ValidFuncName)],
);

View File

@ -83,8 +83,17 @@ pub enum Token<'s> {
Star,
/// An underscore in body-text.
Underscore,
/// A backtick in body-text.
Backtick,
/// A backslash followed by whitespace in text.
Backslash,
/// Raw text.
Raw {
/// The raw text (not yet unescaped as for strings).
raw: &'s str,
/// Whether the closing backtick was present.
terminated: bool,
},
/// Any other consecutive string.
Text(&'s str),
@ -115,8 +124,9 @@ impl<'s> Token<'s> {
ExprBool(_) => "bool",
Star => "star",
Underscore => "underscore",
Backtick => "backtick",
Text(_) => "invalid identifier",
Backslash => "backslash",
Raw { .. } => "raw text",
Text(_) => "text",
Invalid("]") => "closing bracket",
Invalid("*/") => "end of block comment",
Invalid(_) => "invalid token",
@ -206,7 +216,7 @@ impl<'s> Iterator for Tokens<'s> {
// Style toggles.
'*' if self.mode == Body => Star,
'_' if self.mode == Body => Underscore,
'`' if self.mode == Body => Backtick,
'`' if self.mode == Body => self.parse_raw(),
// An escaped thing.
'\\' if self.mode == Body => self.parse_escaped(),
@ -281,7 +291,7 @@ impl<'s> Tokens<'s> {
}
fn parse_function(&mut self, start: Position) -> Token<'s> {
let (header, terminated) = self.read_function_part();
let (header, terminated) = self.read_function_part(Header);
self.eat();
if self.peek() != Some('[') {
@ -291,7 +301,7 @@ impl<'s> Tokens<'s> {
self.eat();
let body_start = self.pos() - start;
let (body, terminated) = self.read_function_part();
let (body, terminated) = self.read_function_part(Body);
let body_end = self.pos() - start;
let span = Span::new(body_start, body_end);
@ -300,60 +310,73 @@ impl<'s> Tokens<'s> {
Function { header, body: Some(Spanned { v: body, span }), terminated }
}
fn read_function_part(&mut self) -> (&'s str, bool) {
let mut escaped = false;
let mut in_string = false;
let mut depth = 0;
fn read_function_part(&mut self, mode: TokenizationMode) -> (&'s str, bool) {
let start = self.index();
let mut terminated = false;
self.read_string_until(|n| {
match n {
'"' if !escaped => in_string = !in_string,
'[' if !escaped && !in_string => depth += 1,
']' if !escaped && !in_string => {
if depth == 0 {
return true;
} else {
depth -= 1;
}
}
'\\' => escaped = !escaped,
_ => escaped = false,
while let Some(n) = self.peek() {
if n == ']' {
terminated = true;
break;
}
false
}, false, 0, 0)
self.eat();
match n {
'[' => { self.parse_function(Position::ZERO); }
'/' if self.peek() == Some('/') => { self.parse_line_comment(); }
'/' if self.peek() == Some('*') => { self.parse_block_comment(); }
'"' if mode == Header => { self.parse_string(); }
'`' if mode == Body => { self.parse_raw(); }
'\\' => { self.eat(); }
_ => {}
}
}
let end = self.index();
(&self.src[start .. end], terminated)
}
fn parse_string(&mut self) -> Token<'s> {
let (string, terminated) = self.read_until_unescaped('"');
ExprStr { string, terminated }
}
fn parse_raw(&mut self) -> Token<'s> {
let (raw, terminated) = self.read_until_unescaped('`');
Raw { raw, terminated }
}
fn read_until_unescaped(&mut self, c: char) -> (&'s str, bool) {
let mut escaped = false;
let (string, terminated) = self.read_string_until(|n| {
self.read_string_until(|n| {
match n {
'"' if !escaped => return true,
n if n == c && !escaped => return true,
'\\' => escaped = !escaped,
_ => escaped = false,
}
false
}, true, 0, -1);
ExprStr { string, terminated }
}, true, 0, -1)
}
fn parse_escaped(&mut self) -> Token<'s> {
fn is_escapable(c: char) -> bool {
match c {
'[' | ']' | '\\' | '/' | '*' | '_' | '`' => true,
'[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' => true,
_ => false,
}
}
Text(match self.peek() {
match self.peek() {
Some(c) if is_escapable(c) => {
let index = self.index();
self.eat();
&self.src[index .. index + c.len_utf8()]
Text(&self.src[index .. index + c.len_utf8()])
}
_ => "\\"
})
Some(c) if c.is_whitespace() => Backslash,
Some(_) => Text("\\"),
None => Backslash,
}
}
fn parse_expr(&mut self, text: &'s str) -> Token<'s> {
@ -462,6 +485,7 @@ pub fn is_identifier(string: &str) -> bool {
true
}
#[cfg(test)]
mod tests {
use super::super::test::check;
@ -483,6 +507,11 @@ mod tests {
Token::ExprStr { string, terminated }
}
#[allow(non_snake_case)]
fn Raw(raw: &'static str, terminated: bool) -> Token<'static> {
Token::Raw { raw, terminated }
}
/// Test whether the given string tokenizes into the given list of tokens.
macro_rules! t {
($mode:expr, $source:expr => [$($tokens:tt)*]) => {
@ -540,10 +569,15 @@ mod tests {
#[test]
fn tokenize_body_only_tokens() {
t!(Body, "_*`" => [Underscore, Star, Backtick]);
t!(Body, "_*" => [Underscore, Star]);
t!(Body, "***" => [Star, Star, Star]);
t!(Body, "[func]*bold*" => [func!("func", None, true), Star, T("bold"), Star]);
t!(Body, "hi_you_ there" => [T("hi"), Underscore, T("you"), Underscore, S(0), T("there")]);
t!(Body, "`raw`" => [Raw("raw", true)]);
t!(Body, "`[func]`" => [Raw("[func]", true)]);
t!(Body, "`]" => [Raw("]", false)]);
t!(Body, "`\\``" => [Raw("\\`", true)]);
t!(Body, "\\ " => [Backslash, S(0)]);
t!(Header, "_*`" => [Invalid("_*`")]);
}
@ -598,15 +632,46 @@ mod tests {
t!(Header, "]" => [Invalid("]")]);
}
#[test]
fn tokenize_correct_end_of_function() {
// End of function with strings and carets in headers
t!(Body, r#"[f: "]"# => [func!(r#"f: "]"#, None, false)]);
t!(Body, "[f: \"s\"]" => [func!("f: \"s\"", None, true)]);
t!(Body, r#"[f: \"\"\"]"# => [func!(r#"f: \"\"\""#, None, true)]);
t!(Body, "[f: `]" => [func!("f: `", None, true)]);
// End of function with strings and carets in bodies
t!(Body, "[f][\"]" => [func!("f", Some((0:4, 0:5, "\"")), true)]);
t!(Body, r#"[f][\"]"# => [func!("f", Some((0:4, 0:6, r#"\""#)), true)]);
t!(Body, "[f][`]" => [func!("f", Some((0:4, 0:6, "`]")), false)]);
t!(Body, "[f][\\`]" => [func!("f", Some((0:4, 0:6, "\\`")), true)]);
t!(Body, "[f][`raw`]" => [func!("f", Some((0:4, 0:9, "`raw`")), true)]);
t!(Body, "[f][`raw]" => [func!("f", Some((0:4, 0:9, "`raw]")), false)]);
t!(Body, "[f][`raw]`]" => [func!("f", Some((0:4, 0:10, "`raw]`")), true)]);
t!(Body, "[f][`\\`]" => [func!("f", Some((0:4, 0:8, "`\\`]")), false)]);
t!(Body, "[f][`\\\\`]" => [func!("f", Some((0:4, 0:8, "`\\\\`")), true)]);
// End of function with comments
t!(Body, "[f][/*]" => [func!("f", Some((0:4, 0:7, "/*]")), false)]);
t!(Body, "[f][/*`*/]" => [func!("f", Some((0:4, 0:9, "/*`*/")), true)]);
t!(Body, "[f: //]\n]" => [func!("f: //]\n", None, true)]);
t!(Body, "[f: \"//]\n]" => [func!("f: \"//]\n]", None, false)]);
// End of function with escaped brackets
t!(Body, "[f][\\]]" => [func!("f", Some((0:4, 0:6, "\\]")), true)]);
t!(Body, "[f][\\[]" => [func!("f", Some((0:4, 0:6, "\\[")), true)]);
}
#[test]
fn tokenize_escaped_symbols() {
t!(Body, r"\\" => [T(r"\")]);
t!(Body, r"\[" => [T("[")]);
t!(Body, r"\]" => [T("]")]);
t!(Body, r"\*" => [T("*")]);
t!(Body, r"\_" => [T("_")]);
t!(Body, r"\`" => [T("`")]);
t!(Body, r"\/" => [T("/")]);
t!(Body, r"\\" => [T(r"\")]);
t!(Body, r"\[" => [T("[")]);
t!(Body, r"\]" => [T("]")]);
t!(Body, r"\*" => [T("*")]);
t!(Body, r"\_" => [T("_")]);
t!(Body, r"\`" => [T("`")]);
t!(Body, r"\/" => [T("/")]);
t!(Body, r#"\""# => [T("\"")]);
}
#[test]