More Chinese punctuation adjustment (#836)

and also general linebreak improvements
This commit is contained in:
Peng Guanwen 2023-04-18 00:20:59 +08:00 committed by GitHub
parent 9bdc4a7de0
commit 480ad7670b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 109 additions and 36 deletions

View File

@ -906,7 +906,7 @@ fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<L
}
// Determine the cost of the line.
let min_ratio = if attempt.justify { MIN_RATIO } else { 0.0 };
let min_ratio = if p.justify { MIN_RATIO } else { 0.0 };
let mut cost = if ratio < min_ratio {
// The line is overfull. This is the case if
// - justification is on, but we'd need to shrink too much
@ -920,7 +920,9 @@ fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<L
// all breakpoints before this one become inactive since no line
// can span above the mandatory break.
active = k;
if attempt.justify {
// If ratio > 0, we need to stretch the line only when justify is needed.
// If ratio < 0, we always need to shrink the line.
if (ratio > 0.0 && attempt.justify) || ratio < 0.0 {
ratio.powi(3).abs()
} else {
0.0

View File

@ -63,6 +63,14 @@ pub struct ShapedGlyph {
pub offset: u16,
}
#[derive(Debug, Clone, Default)]
pub struct Adjustability {
/// The left and right strechability
pub stretchability: (Em, Em),
/// The left and right shrinkability
pub shrinkability: (Em, Em),
}
impl ShapedGlyph {
/// Whether the glyph is a space.
pub fn is_space(&self) -> bool {
@ -71,40 +79,71 @@ impl ShapedGlyph {
/// Whether the glyph is justifiable.
pub fn is_justifiable(&self) -> bool {
self.is_space() || self.is_cjk() || self.is_cjk_punctuation()
self.is_space()
|| self.is_cjk()
|| self.is_cjk_left_aligned_punctuation()
|| self.is_cjk_right_aligned_punctuation()
}
pub fn is_cjk(&self) -> bool {
use Script::*;
matches!(self.c.script(), Hiragana | Katakana | Han)
// U+30FC: Katakana-Hiragana Prolonged Sound Mark
matches!(self.c.script(), Hiragana | Katakana | Han) || self.c == '\u{30FC}'
}
pub fn is_cjk_punctuation(&self) -> bool {
matches!(self.c, '' | '。' | '、' | '' | '')
/// See https://www.w3.org/TR/clreq/#punctuation_width_adjustment
pub fn is_cjk_left_aligned_punctuation(&self) -> bool {
// CJK quotation marks shares codepoints with latin quotation marks.
// But only the CJK ones have full width.
if matches!(self.c, '”' | '') && self.x_advance == Em::one() {
return true;
}
matches!(self.c, '' | '。' | '、' | '' | '' | '》' | '' | '』' | '」')
}
/// See https://www.w3.org/TR/clreq/#punctuation_width_adjustment
pub fn is_cjk_right_aligned_punctuation(&self) -> bool {
// CJK quotation marks shares codepoints with latin quotation marks.
// But only the CJK ones have full width.
if matches!(self.c, '“' | '') && self.x_advance == Em::one() {
return true;
}
matches!(self.c, '《' | '' | '『' | '「')
}
pub fn adjustability(&self) -> Adjustability {
let width = self.x_advance;
if self.is_space() {
Adjustability {
// The number for spaces is from Knuth-Plass' paper
stretchability: (Em::zero(), width / 2.0),
shrinkability: (Em::zero(), width / 3.0),
}
} else if self.is_cjk_left_aligned_punctuation() {
Adjustability {
stretchability: (Em::zero(), Em::zero()),
shrinkability: (Em::zero(), width / 2.0),
}
} else if self.is_cjk_right_aligned_punctuation() {
Adjustability {
stretchability: (Em::zero(), Em::zero()),
shrinkability: (width / 2.0, Em::zero()),
}
} else {
Adjustability::default()
}
}
/// The stretchability of the character.
pub fn stretchability(&self) -> Em {
let width = self.x_advance;
if self.is_space() {
// The number for spaces is from Knuth-Plass' paper
width / 2.0
} else {
Em::zero()
}
pub fn stretchability(&self) -> (Em, Em) {
self.adjustability().stretchability
}
/// The shrinkability of the character.
pub fn shrinkability(&self) -> Em {
let width = self.x_advance;
if self.is_space() {
// The number for spaces is from Knuth-Plass' paper
width / 3.0
} else if self.is_cjk_punctuation() {
width / 2.0
} else {
Em::zero()
}
pub fn shrinkability(&self) -> (Em, Em) {
self.adjustability().shrinkability
}
}
@ -146,20 +185,33 @@ impl<'a> ShapedText<'a> {
let glyphs = group
.iter()
.map(|glyph| {
let mut justification = Em::zero();
if justification_ratio < 0.0 {
justification += glyph.shrinkability() * justification_ratio
let adjustability_left = if justification_ratio < 0.0 {
glyph.shrinkability().0
} else {
justification += glyph.stretchability() * justification_ratio
}
glyph.stretchability().0
};
let adjustability_right = if justification_ratio < 0.0 {
glyph.shrinkability().1
} else {
glyph.stretchability().1
};
let justification_left = adjustability_left * justification_ratio;
let mut justification_right =
adjustability_right * justification_ratio;
if glyph.is_justifiable() {
justification += Em::from_length(extra_justification, self.size)
justification_right +=
Em::from_length(extra_justification, self.size)
}
frame.size_mut().x += justification.at(self.size);
frame.size_mut().x += justification_left.at(self.size)
+ justification_right.at(self.size);
Glyph {
id: glyph.glyph_id,
x_advance: glyph.x_advance + justification,
x_offset: glyph.x_offset,
x_advance: glyph.x_advance
+ justification_left
+ justification_right,
x_offset: glyph.x_offset + justification_left,
c: glyph.c,
span: glyph.span,
offset: glyph.offset,
@ -242,7 +294,7 @@ impl<'a> ShapedText<'a> {
pub fn cjk_justifiable_at_last(&self) -> bool {
self.glyphs
.last()
.map(|g| g.is_cjk() || g.is_cjk_punctuation())
.map(|g| g.is_cjk() || g.is_cjk_left_aligned_punctuation())
.unwrap_or(false)
}
@ -250,7 +302,7 @@ impl<'a> ShapedText<'a> {
pub fn stretchability(&self) -> Abs {
self.glyphs
.iter()
.map(|g| g.stretchability())
.map(|g| g.stretchability().0 + g.stretchability().1)
.sum::<Em>()
.at(self.size)
}
@ -259,7 +311,7 @@ impl<'a> ShapedText<'a> {
pub fn shrinkability(&self) -> Abs {
self.glyphs
.iter()
.map(|g| g.shrinkability())
.map(|g| g.shrinkability().0 + g.shrinkability().1)
.sum::<Em>()
.at(self.size)
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 43 KiB

After

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 41 KiB

View File

@ -20,4 +20,17 @@
ウィキペディア(英: Wikipediaは、世界中のボランティアの共同作業によって執筆及び作成されるフリーの多言語インターネット百科事典である。主に寄付に依って活動している非営利団体「ウィキメディア財団」が所有・運営している。
専門家によるオンライン百科事典プロジェクトNupediaヌーペディアを前身として、2001年1月、ラリー・サンガーとジミー・ウェールズ英: Jimmy Donal "Jimbo" Walesにより英語でプロジェクトが開始された。
]
---
// Test punctuation whitespace adjustment
#set page(width: auto)
#set text(lang: "zh", font: "Noto Serif CJK SC", overhang: false)
#set par(justify: true)
#rect(inset: 0pt, width: 80pt, fill: rgb("eee"))[
“引号测试”,还,
《书名》《测试》下一行
《书名》《测试》。
]

View File

@ -25,3 +25,9 @@ D E F #linebreak(justify: true)
// basically empty paragraph.
#set par(justify: true)
#""
---
// Test that the last line can be shrinked
#set page(width: 155pt)
#set par(justify: true)
This text can be fitted in one line.