Fix JIS style punctuation (#3543)

This commit is contained in:
Peng Guanwen 2024-03-04 17:02:25 +08:00 committed by GitHub
parent decb4fd9b9
commit 086bca9576
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 56 additions and 28 deletions

View File

@ -7,7 +7,7 @@ use unicode_script::{Script, UnicodeScript};
use self::linebreak::{breakpoints, Breakpoint};
use self::shaping::{
is_gb_style, is_of_cj_script, shape, ShapedGlyph, ShapedText, BEGIN_PUNCT_PAT,
cjk_punct_style, is_of_cj_script, shape, ShapedGlyph, ShapedText, BEGIN_PUNCT_PAT,
END_PUNCT_PAT,
};
use crate::diag::{bail, SourceResult};
@ -1041,7 +1041,7 @@ fn line<'a>(
justify |= text.ends_with('\u{2028}');
// Deal with CJK punctuation at line ends.
let gb_style = is_gb_style(shaped.lang, shaped.region);
let gb_style = cjk_punct_style(shaped.lang, shaped.region);
let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
|| (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script));

View File

@ -114,18 +114,18 @@ impl ShapedGlyph {
}
pub fn is_cjk_punctuation(&self) -> bool {
self.is_cjk_left_aligned_punctuation(true)
self.is_cjk_left_aligned_punctuation(CjkPunctStyle::Gb)
|| self.is_cjk_right_aligned_punctuation()
|| self.is_cjk_center_aligned_punctuation(true)
|| self.is_cjk_center_aligned_punctuation(CjkPunctStyle::Gb)
}
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
pub fn is_cjk_left_aligned_punctuation(&self, gb_style: bool) -> bool {
pub fn is_cjk_left_aligned_punctuation(&self, style: CjkPunctStyle) -> bool {
is_cjk_left_aligned_punctuation(
self.c,
self.x_advance,
self.stretchability(),
gb_style,
style,
)
}
@ -135,8 +135,8 @@ impl ShapedGlyph {
}
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
pub fn is_cjk_center_aligned_punctuation(&self, gb_style: bool) -> bool {
is_cjk_center_aligned_punctuation(self.c, gb_style)
pub fn is_cjk_center_aligned_punctuation(&self, style: CjkPunctStyle) -> bool {
is_cjk_center_aligned_punctuation(self.c, style)
}
/// Whether the glyph is a western letter or number.
@ -146,7 +146,7 @@ impl ShapedGlyph {
|| self.c.is_ascii_digit()
}
pub fn base_adjustability(&self, gb_style: bool) -> Adjustability {
pub fn base_adjustability(&self, style: CjkPunctStyle) -> Adjustability {
let width = self.x_advance;
if self.is_space() {
Adjustability {
@ -154,7 +154,7 @@ impl ShapedGlyph {
stretchability: (Em::zero(), width / 2.0),
shrinkability: (Em::zero(), width / 3.0),
}
} else if self.is_cjk_left_aligned_punctuation(gb_style) {
} else if self.is_cjk_left_aligned_punctuation(style) {
Adjustability {
stretchability: (Em::zero(), Em::zero()),
shrinkability: (Em::zero(), width / 2.0),
@ -164,7 +164,7 @@ impl ShapedGlyph {
stretchability: (Em::zero(), Em::zero()),
shrinkability: (width / 2.0, Em::zero()),
}
} else if self.is_cjk_center_aligned_punctuation(gb_style) {
} else if self.is_cjk_center_aligned_punctuation(style) {
Adjustability {
stretchability: (Em::zero(), Em::zero()),
shrinkability: (width / 4.0, width / 4.0),
@ -883,16 +883,16 @@ fn track_and_space(ctx: &mut ShapingContext) {
/// Calculate stretchability and shrinkability of each glyph,
/// and CJK punctuation adjustments according to Chinese Layout Requirements.
fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option<Region>) {
let gb_style = is_gb_style(lang, region);
let style = cjk_punct_style(lang, region);
for glyph in &mut ctx.glyphs {
glyph.adjustability = glyph.base_adjustability(gb_style);
glyph.adjustability = glyph.base_adjustability(style);
}
let mut glyphs = ctx.glyphs.iter_mut().peekable();
while let Some(glyph) = glyphs.next() {
// Only GB style needs further adjustment.
if glyph.is_cjk_punctuation() && !gb_style {
// CNS style needs not further adjustment.
if glyph.is_cjk_punctuation() && matches!(style, CjkPunctStyle::Cns) {
continue;
}
@ -976,11 +976,23 @@ pub(super) const END_PUNCT_PAT: &[char] = &[
'〗', '', '', '', '', '',
];
pub(super) fn is_gb_style(lang: Lang, region: Option<Region>) -> bool {
// Most CJK variants, including zh-CN, ja-JP, zh-SG, zh-MY use GB-style punctuation,
// while zh-HK and zh-TW use alternative style. We default to use GB-style.
!(lang == Lang::CHINESE
&& matches!(region.as_ref().map(Region::as_str), Some("TW" | "HK")))
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) enum CjkPunctStyle {
/// Standard GB/T 15834-2011, used mostly in mainland China.
Gb,
/// Standard by Taiwan Ministry of Education, used in Taiwan and Hong Kong.
Cns,
/// Standard JIS X 4051, used in Japan.
Jis,
}
pub(super) fn cjk_punct_style(lang: Lang, region: Option<Region>) -> CjkPunctStyle {
match (lang, region.as_ref().map(Region::as_str)) {
(Lang::CHINESE, Some("TW" | "HK")) => CjkPunctStyle::Cns,
(Lang::JAPANESE, _) => CjkPunctStyle::Jis,
// zh-CN, zh-SG, zh-MY use GB-style punctuation,
_ => CjkPunctStyle::Gb,
}
}
/// Whether the glyph is a space.
@ -1007,16 +1019,22 @@ fn is_cjk_left_aligned_punctuation(
c: char,
x_advance: Em,
stretchability: (Em, Em),
gb_style: bool,
style: CjkPunctStyle,
) -> bool {
use CjkPunctStyle::*;
// CJK quotation marks shares codepoints with latin quotation marks.
// But only the CJK ones have full width.
if matches!(c, '”' | '') && x_advance + stretchability.1 == Em::one() {
return true;
}
if gb_style && matches!(c, '' | '。' | '' | '、' | '' | '' | '' | '')
if matches!(style, Gb | Jis) && matches!(c, '' | '。' | '' | '、' | '' | '')
{
return true;
}
if matches!(style, Gb) && matches!(c, '' | '') {
// In GB style, exclamations and question marks are also left aligned and can be adjusted.
// Note that they are not adjustable in other styles.
return true;
@ -1042,13 +1060,16 @@ fn is_cjk_right_aligned_punctuation(
}
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
fn is_cjk_center_aligned_punctuation(c: char, gb_style: bool) -> bool {
if !gb_style && matches!(c, '' | '。' | '' | '、' | '' | '') {
fn is_cjk_center_aligned_punctuation(c: char, style: CjkPunctStyle) -> bool {
if matches!(style, CjkPunctStyle::Cns)
&& matches!(c, '' | '。' | '' | '、' | '' | '')
{
return true;
}
// U+30FB: Katakana Middle Dot
matches!(c, '\u{30FB}')
// U+00B7: Middle Dot
matches!(c, '\u{30FB}' | '\u{00B7}')
}
/// Whether the glyph is justifiable.
@ -1064,10 +1085,11 @@ fn is_justifiable(
x_advance: Em,
stretchability: (Em, Em),
) -> bool {
// GB style is not relevant here.
// punctuation style is not relevant here.
let style = CjkPunctStyle::Gb;
is_space(c)
|| is_cj_script(c, script)
|| is_cjk_left_aligned_punctuation(c, x_advance, stretchability, true)
|| is_cjk_left_aligned_punctuation(c, x_advance, stretchability, style)
|| is_cjk_right_aligned_punctuation(c, x_advance, stretchability)
|| is_cjk_center_aligned_punctuation(c, true)
|| is_cjk_center_aligned_punctuation(c, style)
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

@ -9,6 +9,12 @@
// because zh-TW does not follow GB style
#set text(lang: "zh", region: "TW", font: "Noto Serif CJK TC")
原來,你也玩《原神》!
#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC")
「真的吗?」
#set text(lang: "ja", font: "Noto Serif CJK JP")
「本当に?」
---
#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC")