diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs index 4800c8def..a9ffb216a 100644 --- a/crates/typst-syntax/src/lexer.rs +++ b/crates/typst-syntax/src/lexer.rs @@ -90,7 +90,7 @@ impl Lexer<'_> { self.error = None; let start = self.s.cursor(); match self.s.eat() { - Some(c) if c.is_whitespace() => self.whitespace(start, c), + Some(c) if is_space(c, self.mode) => self.whitespace(start, c), Some('/') if self.s.eat_if('/') => self.line_comment(), Some('/') if self.s.eat_if('*') => self.block_comment(), Some('*') if self.s.eat_if('/') => { @@ -108,7 +108,7 @@ impl Lexer<'_> { } fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind { - let more = self.s.eat_while(char::is_whitespace); + let more = self.s.eat_while(|c| is_space(c, self.mode)); let newlines = match c { ' ' if more.is_empty() => 0, _ => count_newlines(self.s.from(start)), @@ -628,6 +628,15 @@ fn keyword(ident: &str) -> Option { }) } +/// Whether a character will become a Space token in Typst +#[inline] +fn is_space(character: char, mode: LexMode) -> bool { + match mode { + LexMode::Markup => matches!(character, ' ' | '\t') || is_newline(character), + _ => character.is_whitespace(), + } +} + /// Whether a character is interpreted as a newline by Typst. #[inline] pub fn is_newline(character: char) -> bool { diff --git a/tests/ref/text/space.png b/tests/ref/text/space.png index d2b9a5aab..bae0e0a83 100644 Binary files a/tests/ref/text/space.png and b/tests/ref/text/space.png differ diff --git a/tests/typ/layout/par-justify-cjk.typ b/tests/typ/layout/par-justify-cjk.typ index cd25fca17..51c86c7b9 100644 --- a/tests/typ/layout/par-justify-cjk.typ +++ b/tests/typ/layout/par-justify-cjk.typ @@ -15,7 +15,7 @@ // Japanese typography is more complex, make sure it is at least a bit sensible. #set page(width: auto) #set par(justify: true) -#set text(lang: "jp", font: ("Linux Libertine", "Noto Serif CJK JP")) +#set text(lang: "ja", font: ("Linux Libertine", "Noto Serif CJK JP")) #rect(inset: 0pt, width: 80pt, fill: rgb("eee"))[ ウィキペディア(英: Wikipedia)は、世界中のボランティアの共同作業によって執筆及び作成されるフリーの多言語インターネット百科事典である。主に寄付に依って活動している非営利団体「ウィキメディア財団」が所有・運営している。 diff --git a/tests/typ/meta/numbering.typ b/tests/typ/meta/numbering.typ index 171eca1a0..9c0c9b66d 100644 --- a/tests/typ/meta/numbering.typ +++ b/tests/typ/meta/numbering.typ @@ -41,7 +41,7 @@ } --- -#set text(lang: "jp", font: ("Linux Libertine", "Noto Serif CJK JP")) +#set text(lang: "ja", font: ("Linux Libertine", "Noto Serif CJK JP")) #for i in range(0, 4) { numbering("イ", i) [ (or ] diff --git a/tests/typ/text/space.typ b/tests/typ/text/space.typ index a3fd6c0a0..9d29f3473 100644 --- a/tests/typ/text/space.typ +++ b/tests/typ/text/space.typ @@ -40,3 +40,14 @@ A#"\n" B --- // Test that trailing space does not force a line break. LLLLLLLLLLLLLLLLLL R _L_ + +--- +// Test that ideographic spaces are preserved. +#set text(lang: "ja", font: "Noto Serif CJK JP") + +だろうか? 何のために! 私は、 + +--- +// Test that thin spaces are preserved. +| | U+0020 regular space \ +| | U+2009 thin space