Lexer change: Allow emphasis in CJK text without spaces (#2648)
This commit is contained in:
parent
50ea3b4f16
commit
f4a81091f7
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3117,6 +3117,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"unicode-ident",
|
||||
"unicode-math-class",
|
||||
"unicode-script",
|
||||
"unicode-segmentation",
|
||||
"unscanny",
|
||||
]
|
||||
|
BIN
assets/fonts/NotoSerifCJKsc-Bold.otf
Normal file
BIN
assets/fonts/NotoSerifCJKsc-Bold.otf
Normal file
Binary file not shown.
BIN
assets/fonts/NotoSerifCJKtc-Bold.otf
Normal file
BIN
assets/fonts/NotoSerifCJKtc-Bold.otf
Normal file
Binary file not shown.
@ -23,5 +23,6 @@ serde = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
unicode-ident = { workspace = true }
|
||||
unicode-math-class = { workspace = true }
|
||||
unicode-script = { workspace = true }
|
||||
unicode-segmentation = { workspace = true }
|
||||
unscanny = { workspace = true }
|
||||
|
@ -1,5 +1,6 @@
|
||||
use ecow::{eco_format, EcoString};
|
||||
use unicode_ident::{is_xid_continue, is_xid_start};
|
||||
use unicode_script::{Script, UnicodeScript};
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
use unscanny::Scanner;
|
||||
|
||||
@ -343,10 +344,18 @@ impl Lexer<'_> {
|
||||
}
|
||||
|
||||
fn in_word(&self) -> bool {
|
||||
let alphanum = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
|
||||
let wordy = |c: Option<char>| {
|
||||
c.map_or(false, |c| {
|
||||
c.is_alphanumeric()
|
||||
&& !matches!(
|
||||
c.script(),
|
||||
Script::Han | Script::Hiragana | Script::Katakana
|
||||
)
|
||||
})
|
||||
};
|
||||
let prev = self.s.scout(-2);
|
||||
let next = self.s.peek();
|
||||
alphanum(prev) && alphanum(next)
|
||||
wordy(prev) && wordy(next)
|
||||
}
|
||||
|
||||
fn space_or_end(&self) -> bool {
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 8.4 KiB After Width: | Height: | Size: 17 KiB |
Binary file not shown.
Before Width: | Height: | Size: 1.4 KiB After Width: | Height: | Size: 1.4 KiB |
@ -7,6 +7,13 @@ _Emphasized and *strong* words!_
|
||||
// Inside of a word it's a normal underscore or star.
|
||||
hello_world Nutzer*innen
|
||||
|
||||
// CJK characters will not need spaces.
|
||||
中文一般使用*粗体*或者_楷体_来表示强调。
|
||||
|
||||
日本語では、*太字*や_斜体_を使って強調します。
|
||||
|
||||
中文中混有*Strong*和_Empasis_。
|
||||
|
||||
// Can contain paragraph in nested content block.
|
||||
_Still #[
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user