diff --git a/assets/fonts/NotoSerifCJKjp-Regular.otf b/assets/fonts/NotoSerifCJKjp-Regular.otf new file mode 100644 index 000000000..6daac2061 Binary files /dev/null and b/assets/fonts/NotoSerifCJKjp-Regular.otf differ diff --git a/assets/fonts/NotoSerifCJKsc-Regular.otf b/assets/fonts/NotoSerifCJKsc-Regular.otf index 4c5f715ba..4f8e54046 100644 Binary files a/assets/fonts/NotoSerifCJKsc-Regular.otf and b/assets/fonts/NotoSerifCJKsc-Regular.otf differ diff --git a/assets/fonts/NotoSerifCJKtc-Regular.otf b/assets/fonts/NotoSerifCJKtc-Regular.otf new file mode 100644 index 000000000..7f6da6f79 Binary files /dev/null and b/assets/fonts/NotoSerifCJKtc-Regular.otf differ diff --git a/library/src/layout/par.rs b/library/src/layout/par.rs index 2edbd93d7..96d45f2f4 100644 --- a/library/src/layout/par.rs +++ b/library/src/layout/par.rs @@ -8,7 +8,8 @@ use crate::layout::AlignElem; use crate::math::EquationElem; use crate::prelude::*; use crate::text::{ - shape, LinebreakElem, Quoter, Quotes, ShapedText, SmartQuoteElem, SpaceElem, TextElem, + is_gb_style, shape, LinebreakElem, Quoter, Quotes, ShapedText, SmartQuoteElem, + SpaceElem, TextElem, }; /// Arrange text, spacing and inline-level elements into a paragraph. @@ -354,6 +355,13 @@ impl<'a> Item<'a> { } } + fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> { + match self { + Self::Text(shaped) => Some(shaped), + _ => None, + } + } + /// The text length of the item. fn len(&self) -> usize { match self { @@ -715,9 +723,12 @@ fn shape_range<'a>( spans: &SpanMapper, styles: StyleChain<'a>, ) { + let lang = TextElem::lang_in(styles); + let region = TextElem::region_in(styles); let mut process = |range: Range, level: BidiLevel| { let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL }; - let shaped = shape(vt, range.start, &bidi.text[range], spans, styles, dir); + let shaped = + shape(vt, range.start, &bidi.text[range], spans, styles, dir, lang, region); items.push(Item::Text(shaped)); }; @@ -905,15 +916,11 @@ fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec 0.0 { - ratio += 1.0; - } else { - ratio -= 1.0; - } + if ratio > 1.0 { + // We should stretch the line above its stretchability. Now calculate the extra amount. + let extra_stretch = (delta - adjust) / attempt.justifiables() as f64; + // Normalize the amount by half Em size. + ratio = 1.0 + extra_stretch / (em / 2.0); } // Determine the cost of the line. @@ -1124,7 +1131,9 @@ fn line<'a>( let base = expanded.end - shaped.text.len(); let start = range.start.max(base); let text = &p.bidi.text[start..range.end]; - let trimmed = text.trim_end(); + // U+200B ZERO WIDTH SPACE is used to provide a line break opportunity, + // we want to trim it too. + let trimmed = text.trim_end().trim_end_matches('\u{200B}'); range.end = start + trimmed.len(); // Deal with hyphens, dashes and justification. @@ -1132,6 +1141,11 @@ fn line<'a>( dash = hyphen || shy || trimmed.ends_with(['-', '–', '—']); justify |= text.ends_with('\u{2028}'); + // Deal with CJK punctuation at line ends. + let gb_style = is_gb_style(shaped.lang, shaped.region); + let end_cjk_punct = trimmed + .ends_with(['”', '’', ',', '。', '、', ':', ';', '》', ')', '』', '」']); + // Usually, we don't want to shape an empty string because: // - We don't want the height of trimmed whitespace in a different // font to be considered for the line height. @@ -1141,12 +1155,21 @@ fn line<'a>( // need the shaped empty string to make the line the appropriate // height. That is the case exactly if the string is empty and there // are no other items in the line. - if hyphen || start + shaped.text.len() > range.end { + if hyphen || start + shaped.text.len() > range.end || end_cjk_punct { if hyphen || start < range.end || before.is_empty() { let mut reshaped = shaped.reshape(vt, &p.spans, start..range.end); if hyphen || shy { reshaped.push_hyphen(vt); } + let punct = reshaped.glyphs.last(); + if let Some(punct) = punct { + if punct.is_cjk_left_aligned_punctuation(gb_style) { + let shrink_amount = punct.shrinkability().1; + let punct = reshaped.glyphs.to_mut().last_mut().unwrap(); + punct.shrink_right(shrink_amount); + reshaped.width -= shrink_amount.at(reshaped.size); + } + } width += reshaped.width; last = Some(Item::Text(reshaped)); } @@ -1155,6 +1178,10 @@ fn line<'a>( } } + // Deal with CJK punctuation at line starts. + let text = &p.bidi.text[range.start..end]; + let start_cjk_punct = text.starts_with(['“', '‘', '《', '(', '『', '「']); + // Reshape the start item if it's split in half. let mut first = None; if let Some((Item::Text(shaped), after)) = inner.split_first() { @@ -1163,8 +1190,8 @@ fn line<'a>( let end = range.end.min(base + shaped.text.len()); // Reshape if necessary. - if range.start + shaped.text.len() > end { - if range.start < end { + if range.start + shaped.text.len() > end || start_cjk_punct { + if range.start < end || start_cjk_punct { let reshaped = shaped.reshape(vt, &p.spans, range.start..end); width += reshaped.width; first = Some(Item::Text(reshaped)); @@ -1174,6 +1201,22 @@ fn line<'a>( } } + if start_cjk_punct { + let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut); + if let Some(reshaped) = reshaped { + if let Some(punct) = reshaped.glyphs.first() { + if punct.is_cjk_right_aligned_punctuation() { + let shrink_amount = punct.shrinkability().0; + let punct = reshaped.glyphs.to_mut().first_mut().unwrap(); + punct.shrink_left(shrink_amount); + let amount_abs = shrink_amount.at(reshaped.size); + reshaped.width -= amount_abs; + width -= amount_abs; + } + } + } + } + // Measure the inner items. for item in inner { width += item.width(); diff --git a/library/src/text/shaping.rs b/library/src/text/shaping.rs index b0be8bf6d..d0c879feb 100644 --- a/library/src/text/shaping.rs +++ b/library/src/text/shaping.rs @@ -23,6 +23,10 @@ pub struct ShapedText<'a> { pub text: &'a str, /// The text direction. pub dir: Dir, + /// The text language. + pub lang: Lang, + /// The text region. + pub region: Option, /// The text's style properties. pub styles: StyleChain<'a>, /// The font variant. @@ -48,6 +52,8 @@ pub struct ShapedGlyph { pub x_offset: Em, /// The vertical offset of the glyph. pub y_offset: Em, + /// The adjustability of the glyph. + pub adjustability: Adjustability, /// The byte range of this glyph's cluster in the full paragraph. A cluster /// is a sequence of one or multiple glyphs that cannot be separated and /// must always be treated as a union. @@ -78,41 +84,67 @@ impl ShapedGlyph { /// Whether the glyph is justifiable. pub fn is_justifiable(&self) -> bool { + // GB style is not relevant here. self.is_space() - || self.is_cjk() - || self.is_cjk_left_aligned_punctuation() + || self.is_cjk_script() + || self.is_cjk_left_aligned_punctuation(true) || self.is_cjk_right_aligned_punctuation() + || self.is_cjk_center_aligned_punctuation(true) } - pub fn is_cjk(&self) -> bool { + pub fn is_cjk_script(&self) -> bool { use Script::*; // U+30FC: Katakana-Hiragana Prolonged Sound Mark matches!(self.c.script(), Hiragana | Katakana | Han) || self.c == '\u{30FC}' } + pub fn is_cjk_adjustable(&self) -> bool { + self.is_cjk_left_aligned_punctuation(true) + || self.is_cjk_right_aligned_punctuation() + || self.is_cjk_center_aligned_punctuation(true) + } + /// See - pub fn is_cjk_left_aligned_punctuation(&self) -> bool { + pub fn is_cjk_left_aligned_punctuation(&self, gb_style: bool) -> bool { // CJK quotation marks shares codepoints with latin quotation marks. // But only the CJK ones have full width. - if matches!(self.c, '”' | '’') && self.x_advance == Em::one() { + if matches!(self.c, '”' | '’') + && self.x_advance + self.stretchability().1 == Em::one() + { return true; } - matches!(self.c, ',' | '。' | '、' | ':' | ';' | '》' | ')' | '』' | '」') + if gb_style && matches!(self.c, ',' | '。' | '、' | ':' | ';') { + return true; + } + + matches!(self.c, '》' | ')' | '』' | '」') } /// See pub fn is_cjk_right_aligned_punctuation(&self) -> bool { // CJK quotation marks shares codepoints with latin quotation marks. // But only the CJK ones have full width. - if matches!(self.c, '“' | '‘') && self.x_advance == Em::one() { + if matches!(self.c, '“' | '‘') + && self.x_advance + self.stretchability().0 == Em::one() + { return true; } matches!(self.c, '《' | '(' | '『' | '「') } - pub fn adjustability(&self) -> Adjustability { + /// See https://www.w3.org/TR/clreq/#punctuation_width_adjustment + pub fn is_cjk_center_aligned_punctuation(&self, gb_style: bool) -> bool { + if !gb_style && matches!(self.c, ',' | '。' | '、' | ':' | ';') { + return true; + } + + // U+30FB: Katakana Middle Dot + matches!(self.c, '\u{30FB}') + } + + pub fn base_adjustability(&self, gb_style: bool) -> Adjustability { let width = self.x_advance; if self.is_space() { Adjustability { @@ -120,7 +152,7 @@ impl ShapedGlyph { stretchability: (Em::zero(), width / 2.0), shrinkability: (Em::zero(), width / 3.0), } - } else if self.is_cjk_left_aligned_punctuation() { + } else if self.is_cjk_left_aligned_punctuation(gb_style) { Adjustability { stretchability: (Em::zero(), Em::zero()), shrinkability: (Em::zero(), width / 2.0), @@ -130,6 +162,11 @@ impl ShapedGlyph { stretchability: (Em::zero(), Em::zero()), shrinkability: (width / 2.0, Em::zero()), } + } else if self.is_cjk_center_aligned_punctuation(gb_style) { + Adjustability { + stretchability: (Em::zero(), Em::zero()), + shrinkability: (width / 4.0, width / 4.0), + } } else { Adjustability::default() } @@ -137,12 +174,27 @@ impl ShapedGlyph { /// The stretchability of the character. pub fn stretchability(&self) -> (Em, Em) { - self.adjustability().stretchability + self.adjustability.stretchability } /// The shrinkability of the character. pub fn shrinkability(&self) -> (Em, Em) { - self.adjustability().shrinkability + self.adjustability.shrinkability + } + + /// Shrink the width of glyph on the left side. + pub fn shrink_left(&mut self, amount: Em) { + self.x_offset -= amount; + self.x_advance -= amount; + self.adjustability.shrinkability.0 -= amount; + self.adjustability.stretchability.0 += amount; + } + + /// Shrink the width of glyph on the right side. + pub fn shrink_right(&mut self, amount: Em) { + self.x_advance -= amount; + self.adjustability.shrinkability.1 -= amount; + self.adjustability.stretchability.1 += amount; } } @@ -301,7 +353,7 @@ impl<'a> ShapedText<'a> { pub fn cjk_justifiable_at_last(&self) -> bool { self.glyphs .last() - .map(|g| g.is_cjk() || g.is_cjk_left_aligned_punctuation()) + .map(|g| g.is_cjk_script() || g.is_cjk_adjustable()) .unwrap_or(false) } @@ -339,6 +391,8 @@ impl<'a> ShapedText<'a> { base: text_range.start, text, dir: self.dir, + lang: self.lang, + region: self.region, styles: self.styles, size: self.size, variant: self.variant, @@ -346,7 +400,16 @@ impl<'a> ShapedText<'a> { glyphs: Cow::Borrowed(glyphs), } } else { - shape(vt, text_range.start, text, spans, self.styles, self.dir) + shape( + vt, + text_range.start, + text, + spans, + self.styles, + self.dir, + self.lang, + self.region, + ) } } @@ -373,6 +436,7 @@ impl<'a> ShapedText<'a> { x_advance, x_offset: Em::zero(), y_offset: Em::zero(), + adjustability: Adjustability::default(), range, safe_to_break: true, c: '-', @@ -462,6 +526,7 @@ struct ShapingContext<'a, 'v> { } /// Shape text into [`ShapedText`]. +#[allow(clippy::too_many_arguments)] pub fn shape<'a>( vt: &Vt, base: usize, @@ -469,6 +534,8 @@ pub fn shape<'a>( spans: &SpanMapper, styles: StyleChain<'a>, dir: Dir, + lang: Lang, + region: Option, ) -> ShapedText<'a> { let size = TextElem::size_in(styles); let mut ctx = ShapingContext { @@ -489,11 +556,14 @@ pub fn shape<'a>( } track_and_space(&mut ctx); + calculate_adjustability(&mut ctx, lang, region); ShapedText { base, text, dir, + lang, + region, styles, variant: ctx.variant, size, @@ -581,6 +651,7 @@ fn shape_segment( x_advance: font.to_em(pos[i].x_advance), x_offset: font.to_em(pos[i].x_offset), y_offset: font.to_em(pos[i].y_offset), + adjustability: Adjustability::default(), range: start..end, safe_to_break: !info.unsafe_to_break(), c: text[cluster..].chars().next().unwrap(), @@ -645,6 +716,7 @@ fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) { x_advance, x_offset: Em::zero(), y_offset: Em::zero(), + adjustability: Adjustability::default(), range: start..end, safe_to_break: true, c, @@ -679,6 +751,43 @@ fn track_and_space(ctx: &mut ShapingContext) { } } +pub fn is_gb_style(lang: Lang, region: Option) -> bool { + // Most CJK variants, including zh-CN, ja-JP, zh-SG, zh-MY use GB-style punctuation, + // while zh-HK and zh-TW use alternative style. We default to use GB-style. + !(lang == Lang::CHINESE + && matches!(region.as_ref().map(Region::as_str), Some("TW" | "HK"))) +} + +/// Calculate stretchability and shrinkability of each glyph, +/// and CJK punctuation adjustments according to Chinese Layout Requirements. +fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option) { + let gb_style = is_gb_style(lang, region); + + let mut glyphs = ctx.glyphs.iter_mut().peekable(); + while let Some(glyph) = glyphs.next() { + glyph.adjustability = glyph.base_adjustability(gb_style); + + // Only GB style needs further adjustment. + if glyph.is_cjk_adjustable() && !gb_style { + continue; + } + + // Now we apply consecutive punctuation adjustment, specified in Chinese Layout + // Requirements, section 3.1.6.1 Punctuation Adjustment Space, and Japanese Layout + // Requirements, section 3.1 Line Composition Rules for Punctuation Marks + let Some(next) = glyphs.peek_mut() else { continue }; + let width = glyph.x_advance; + let delta = width / 2.0; + if next.is_cjk_adjustable() + && (glyph.shrinkability().1 + next.shrinkability().0) >= delta + { + let left_delta = glyph.shrinkability().1.min(delta); + glyph.shrink_right(left_delta); + next.shrink_left(delta - left_delta); + } + } +} + /// Difference between non-breaking and normal space. fn nbsp_delta(font: &Font) -> Option { let space = font.ttf().glyph_index(' ')?.0; diff --git a/tests/ref/layout/par-justify-cjk.png b/tests/ref/layout/par-justify-cjk.png index 5efcc1e1b..89a9af7d1 100644 Binary files a/tests/ref/layout/par-justify-cjk.png and b/tests/ref/layout/par-justify-cjk.png differ diff --git a/tests/ref/meta/numbering.png b/tests/ref/meta/numbering.png index 984cf04c9..8ddf3324a 100644 Binary files a/tests/ref/meta/numbering.png and b/tests/ref/meta/numbering.png differ diff --git a/tests/ref/text/chinese.png b/tests/ref/text/chinese.png index 89ee357b4..4762558d7 100644 Binary files a/tests/ref/text/chinese.png and b/tests/ref/text/chinese.png differ diff --git a/tests/typ/layout/par-justify-cjk.typ b/tests/typ/layout/par-justify-cjk.typ index d82e72cd7..8ceab65fa 100644 --- a/tests/typ/layout/par-justify-cjk.typ +++ b/tests/typ/layout/par-justify-cjk.typ @@ -5,7 +5,7 @@ // Most Chinese publications do not use hanging punctuation at line end. #set page(width: auto) #set par(justify: true) -#set text(overhang: false, lang: "zh") +#set text(font: "Noto Serif CJK SC", lang: "zh", overhang: false) #rect(inset: 0pt, width: 80pt, fill: rgb("eee"))[ 中文维基百科使用汉字书写,汉字是汉族或华人的共同文字,是中国大陆、新加坡、马来西亚、台湾、香港、澳门的唯一官方文字或官方文字之一。25.9%,而美国和荷兰则分別占13.7%及8.2%。近年來,中国大陆地区的维基百科编辑者正在迅速增加; @@ -33,4 +33,17 @@ 《书名》《测试》下一行 《书名》《测试》。 -] \ No newline at end of file +] + +--- +// Test Variants of Mainland China, Hong Kong, and Japan. + +// 17 characters a line. +#set page(width: 170pt + 10pt, margin: (x: 5pt)) +#set text(font: "Noto Serif CJK SC", lang: "zh", overhang: false) +#set par(justify: true) + +孔雀最早见于《山海经》中的《海内经》:\u{200b}“有孔雀。”东汉杨孚著《异物志》记载,岭南:“孔雀,其大如大雁而足高,毛皆有斑纹彩,捕而蓄之,拍手即舞。” + +#set text(font: "Noto Serif CJK TC", lang: "zh", region: "hk") +孔雀最早见于《山海经》中的《海内经》:「有孔雀。」东汉杨孚著《异物志》记载,岭南:「孔雀,其大如大雁而足高,毛皆有斑纹彩,捕而蓄之,拍手即舞。」