Rename is_cjk_script() to is_cj_script() (#2829)

2023-12-05 13:04:38 -05:00 · 2023-12-05 13:04:38 -05:00 · 058f24f95e
commit 058f24f95e
parent 8a69d3bfb6
2 changed files with 22 additions and 20 deletions
--- a/crates/typst/src/layout/inline/mod.rs
+++ b/crates/typst/src/layout/inline/mod.rs
@ -7,7 +7,7 @@ use unicode_script::{Script, UnicodeScript};

 use self::linebreak::{breakpoints, Breakpoint};
 use self::shaping::{
-    is_gb_style, is_of_cjk_script, shape, ShapedGlyph, ShapedText, BEGIN_PUNCT_PAT,
+    is_gb_style, is_of_cj_script, shape, ShapedGlyph, ShapedText, BEGIN_PUNCT_PAT,
    END_PUNCT_PAT,
 };
 use crate::diag::{bail, SourceResult};
@ -639,16 +639,16 @@ fn add_cjk_latin_spacing(items: &mut [Item]) {
                    .and_then(|shaped| shaped.glyphs.first())
            });

-            // Case 1: CJK followed by a Latin character
-            if glyph.is_cjk_script() && next.map_or(false, |g| g.is_letter_or_number()) {
+            // Case 1: CJ followed by a Latin character
+            if glyph.is_cj_script() && next.map_or(false, |g| g.is_letter_or_number()) {
                // The spacing is default to 1/4 em, and can be shrunk to 1/8 em.
                glyph.x_advance += Em::new(0.25);
                glyph.adjustability.shrinkability.1 += Em::new(0.125);
                text.width += Em::new(0.25).at(text.size);
            }

-            // Case 2: Latin followed by a CJK character
-            if glyph.is_cjk_script() && prev.map_or(false, |g| g.is_letter_or_number()) {
+            // Case 2: Latin followed by a CJ character
+            if glyph.is_cj_script() && prev.map_or(false, |g| g.is_letter_or_number()) {
                glyph.x_advance += Em::new(0.25);
                glyph.x_offset += Em::new(0.25);
                glyph.adjustability.shrinkability.0 += Em::new(0.125);
@ -1028,7 +1028,7 @@ fn line<'a>(
        // Deal with CJK punctuation at line ends.
        let gb_style = is_gb_style(shaped.lang, shaped.region);
        let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
-            || (p.cjk_latin_spacing && trimmed.ends_with(is_of_cjk_script));
+            || (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script));

        // Usually, we don't want to shape an empty string because:
        // - We don't want the height of trimmed whitespace in a different
@ -1056,7 +1056,7 @@ fn line<'a>(
                        punct.shrink_right(shrink_amount);
                        reshaped.width -= shrink_amount.at(reshaped.size);
                    } else if p.cjk_latin_spacing
-                        && last_glyph.is_cjk_script()
+                        && last_glyph.is_cj_script()
                        && (last_glyph.x_advance - last_glyph.x_offset) > Em::one()
                    {
                        // If the last glyph is a CJK character adjusted by [`add_cjk_latin_spacing`],
@ -1078,10 +1078,10 @@ fn line<'a>(
        }
    }

-    // Deal with CJK characters at line starts.
+    // Deal with CJ characters at line starts.
    let text = &p.bidi.text[range.start..end];
    let maybe_adjust_first_glyph = text.starts_with(BEGIN_PUNCT_PAT)
-        || (p.cjk_latin_spacing && text.starts_with(is_of_cjk_script));
+        || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script));

    // Reshape the start item if it's split in half.
    let mut first = None;
@ -1116,7 +1116,7 @@ fn line<'a>(
                    reshaped.width -= amount_abs;
                    width -= amount_abs;
                } else if p.cjk_latin_spacing
-                    && first_glyph.is_cjk_script()
+                    && first_glyph.is_cj_script()
                    && first_glyph.x_offset > Em::zero()
                {
                    // If the first glyph is a CJK character adjusted by [`add_cjk_latin_spacing`],
--- a/crates/typst/src/layout/inline/shaping.rs
+++ b/crates/typst/src/layout/inline/shaping.rs
@ -107,9 +107,9 @@ impl ShapedGlyph {
        self.is_justifiable
    }

-    /// Whether the glyph is part of a CJK script.
-    pub fn is_cjk_script(&self) -> bool {
-        is_cjk_script(self.c, self.script)
+    /// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK).
+    pub fn is_cj_script(&self) -> bool {
+        is_cj_script(self.c, self.script)
    }

    pub fn is_cjk_punctuation(&self) -> bool {
@ -360,7 +360,7 @@ impl<'a> ShapedText<'a> {
    pub fn cjk_justifiable_at_last(&self) -> bool {
        self.glyphs
            .last()
-            .map(|g| g.is_cjk_script() || g.is_cjk_punctuation())
+            .map(|g| g.is_cj_script() || g.is_cjk_punctuation())
            .unwrap_or(false)
    }

@ -934,15 +934,17 @@ fn is_space(c: char) -> bool {
    matches!(c, ' ' | '\u{00A0}' | '　')
 }

-/// Whether the glyph is part of a CJK script.
+/// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK).
 #[inline]
-pub(super) fn is_of_cjk_script(c: char) -> bool {
-    is_cjk_script(c, c.script())
+pub(super) fn is_of_cj_script(c: char) -> bool {
+    is_cj_script(c, c.script())
 }

-/// Whether the glyph is part of a CJK script.
+/// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK).
+/// The function is dedicated to typesetting Chinese or Japanese, which do not
+/// have spaces between words, so K is not checked here.
 #[inline]
-fn is_cjk_script(c: char, script: Script) -> bool {
+fn is_cj_script(c: char, script: Script) -> bool {
    use Script::*;
    // U+30FC: Katakana-Hiragana Prolonged Sound Mark
    matches!(script, Hiragana | Katakana | Han) || c == '\u{30FC}'
@ -1016,7 +1018,7 @@ fn is_justifiable(
 ) -> bool {
    // GB style is not relevant here.
    is_space(c)
-        || is_cjk_script(c, script)
+        || is_cj_script(c, script)
        || is_cjk_left_aligned_punctuation(c, x_advance, stretchability, true)
        || is_cjk_right_aligned_punctuation(c, x_advance, stretchability)
        || is_cjk_center_aligned_punctuation(c, true)