Do not parse special spaces to Space Token (#3267)

This commit is contained in:
Peng Guanwen 2024-01-29 18:46:41 +08:00 committed by GitHub
parent 6207b3d9b0
commit 269860c571
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 24 additions and 4 deletions

View File

@ -90,7 +90,7 @@ impl Lexer<'_> {
self.error = None;
let start = self.s.cursor();
match self.s.eat() {
Some(c) if c.is_whitespace() => self.whitespace(start, c),
Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
Some('/') if self.s.eat_if('/') => self.line_comment(),
Some('/') if self.s.eat_if('*') => self.block_comment(),
Some('*') if self.s.eat_if('/') => {
@ -108,7 +108,7 @@ impl Lexer<'_> {
}
fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
let more = self.s.eat_while(char::is_whitespace);
let more = self.s.eat_while(|c| is_space(c, self.mode));
let newlines = match c {
' ' if more.is_empty() => 0,
_ => count_newlines(self.s.from(start)),
@ -628,6 +628,15 @@ fn keyword(ident: &str) -> Option<SyntaxKind> {
})
}
/// Whether a character will become a Space token in Typst
#[inline]
fn is_space(character: char, mode: LexMode) -> bool {
match mode {
LexMode::Markup => matches!(character, ' ' | '\t') || is_newline(character),
_ => character.is_whitespace(),
}
}
/// Whether a character is interpreted as a newline by Typst.
#[inline]
pub fn is_newline(character: char) -> bool {

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.9 KiB

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -15,7 +15,7 @@
// Japanese typography is more complex, make sure it is at least a bit sensible.
#set page(width: auto)
#set par(justify: true)
#set text(lang: "jp", font: ("Linux Libertine", "Noto Serif CJK JP"))
#set text(lang: "ja", font: ("Linux Libertine", "Noto Serif CJK JP"))
#rect(inset: 0pt, width: 80pt, fill: rgb("eee"))[
ウィキペディア(英: Wikipediaは、世界中のボランティアの共同作業によって執筆及び作成されるフリーの多言語インターネット百科事典である。主に寄付に依って活動している非営利団体「ウィキメディア財団」が所有・運営している。

View File

@ -41,7 +41,7 @@
}
---
#set text(lang: "jp", font: ("Linux Libertine", "Noto Serif CJK JP"))
#set text(lang: "ja", font: ("Linux Libertine", "Noto Serif CJK JP"))
#for i in range(0, 4) {
numbering("イ", i)
[ (or ]

View File

@ -40,3 +40,14 @@ A#"\n" B
---
// Test that trailing space does not force a line break.
LLLLLLLLLLLLLLLLLL R _L_
---
// Test that ideographic spaces are preserved.
#set text(lang: "ja", font: "Noto Serif CJK JP")
だろうか? 何のために! 私は、
---
// Test that thin spaces are preserved.
| | U+0020 regular space \
|| U+2009 thin space