This commit is contained in:
Laurenz 2022-04-16 22:42:49 +02:00
parent c5b3f8ee98
commit 4494b443bb
7 changed files with 23 additions and 5 deletions

View File

@ -214,6 +214,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| NodeKind::Shy
| NodeKind::EnDash
| NodeKind::EmDash
| NodeKind::Ellipsis
| NodeKind::Quote(_)
| NodeKind::Linebreak(_)
| NodeKind::Raw(_)

View File

@ -140,6 +140,7 @@ impl<'s> Tokens<'s> {
// Markup.
'~' => NodeKind::NonBreakingSpace,
'-' => self.hyph(),
'.' if self.s.eat_if("..") => NodeKind::Ellipsis,
'\'' => NodeKind::Quote(false),
'"' => NodeKind::Quote(true),
'*' if !self.in_word() => NodeKind::Star,
@ -216,7 +217,7 @@ impl<'s> Tokens<'s> {
// Comments, parentheses, code.
'/' | '[' | ']' | '{' | '}' | '#' |
// Markup
'~' | '\'' | '"' | '*' | '_' | '`' | '$' | '-' | '\\'
'~' | '-' | '.' | '\'' | '"' | '*' | '_' | '`' | '$' | '\\'
};
loop {
@ -224,12 +225,17 @@ impl<'s> Tokens<'s> {
TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
});
// Allow a single space, optionally preceded by . or - if something
// alphanumeric follows directly. This leads to less text nodes,
// which is good for performance.
let mut s = self.s;
if !(s.eat_if(' ') && s.at(char::is_alphanumeric)) {
s.eat_if(['.', '-']);
s.eat_if(' ');
if !s.at(char::is_alphanumeric) {
break;
}
self.s.eat();
self.s = s;
}
NodeKind::Text(self.s.from(start).into())
@ -831,7 +837,7 @@ mod tests {
fn test_tokenize_text() {
// Test basic text.
t!(Markup[" /"]: "hello" => Text("hello"));
t!(Markup[" /"]: "hello-world" => Text("hello"), Minus, Text("world"));
t!(Markup[" /"]: "hello-world" => Text("hello-world"));
// Test code symbols in text.
t!(Markup[" /"]: "a():\"b" => Text("a():"), Quote(true), Text("b"));
@ -897,7 +903,7 @@ mod tests {
t!(Markup[" "]: "." => EnumNumbering(None));
t!(Markup[" "]: "1." => EnumNumbering(Some(1)));
t!(Markup[" "]: "1.a" => EnumNumbering(Some(1)), Text("a"));
t!(Markup[" /"]: "a1." => Text("a1."));
t!(Markup[" /"]: "a1." => Text("a1"), EnumNumbering(None));
}
#[test]

View File

@ -69,6 +69,7 @@ impl Markup {
NodeKind::Shy => Some(MarkupNode::Text('\u{00AD}'.into())),
NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())),
NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())),
NodeKind::Ellipsis => Some(MarkupNode::Text('\u{2026}'.into())),
NodeKind::Quote(d) => Some(MarkupNode::Quote(*d)),
NodeKind::Strong => node.cast().map(MarkupNode::Strong),
NodeKind::Emph => node.cast().map(MarkupNode::Emph),

View File

@ -131,6 +131,7 @@ impl Category {
NodeKind::Shy => Some(Category::Shortcut),
NodeKind::EnDash => Some(Category::Shortcut),
NodeKind::EmDash => Some(Category::Shortcut),
NodeKind::Ellipsis => Some(Category::Shortcut),
NodeKind::Escape(_) => Some(Category::Escape),
NodeKind::Not => Some(Category::Keyword),
NodeKind::And => Some(Category::Keyword),

View File

@ -599,6 +599,8 @@ pub enum NodeKind {
EnDash,
/// An em-dash: `---`.
EmDash,
/// An ellipsis: `...`.
Ellipsis,
/// A smart quote: `'` (`false`) or `"` (true).
Quote(bool),
/// A slash and the letter "u" followed by a hexadecimal unicode entity
@ -774,6 +776,7 @@ impl NodeKind {
| Self::NonBreakingSpace
| Self::EnDash
| Self::EmDash
| Self::Ellipsis
| Self::Quote(_)
| Self::Escape(_)
| Self::Strong
@ -869,6 +872,7 @@ impl NodeKind {
Self::Shy => "soft hyphen",
Self::EnDash => "en dash",
Self::EmDash => "em dash",
Self::Ellipsis => "ellipsis",
Self::Quote(false) => "single quote",
Self::Quote(true) => "double quote",
Self::Escape(_) => "escape sequence",
@ -992,6 +996,7 @@ impl Hash for NodeKind {
Self::Shy => {}
Self::EnDash => {}
Self::EmDash => {}
Self::Ellipsis => {}
Self::Quote(d) => d.hash(state),
Self::Escape(c) => c.hash(state),
Self::Strong => {}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 4.3 KiB

View File

@ -6,3 +6,7 @@ The non-breaking~space does work.
---
- En dash: --
- Em dash: ---
---
#set text("Roboto")
A... vs {"A..."}