Segment by script
This commit is contained in:
parent
56968bc0d6
commit
c3a387b8f7
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -873,6 +873,7 @@ dependencies = [
|
||||
"typed-arena",
|
||||
"typst-macros",
|
||||
"unicode-bidi",
|
||||
"unicode-script",
|
||||
"unicode-segmentation",
|
||||
"unicode-xid",
|
||||
"usvg",
|
||||
|
@ -30,6 +30,7 @@ rustybuzz = "0.4"
|
||||
unicode-bidi = "0.3.5"
|
||||
unicode-segmentation = "1"
|
||||
unicode-xid = "0.2"
|
||||
unicode-script = "0.5"
|
||||
xi-unicode = "0.3"
|
||||
|
||||
# Raster and vector graphics handling
|
||||
|
BIN
fonts/IBMPlexSansDevanagari-Regular.ttf
Normal file
BIN
fonts/IBMPlexSansDevanagari-Regular.ttf
Normal file
Binary file not shown.
@ -1,13 +1,14 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use unicode_bidi::{BidiInfo, Level};
|
||||
use unicode_script::{Script, UnicodeScript};
|
||||
use xi_unicode::LineBreakIterator;
|
||||
|
||||
use super::{shape, Lang, ShapedText, TextNode};
|
||||
use crate::font::FontStore;
|
||||
use crate::library::layout::Spacing;
|
||||
use crate::library::prelude::*;
|
||||
use crate::util::{ArcExt, EcoString, SliceExt};
|
||||
use crate::util::{ArcExt, EcoString};
|
||||
|
||||
/// Arrange text, spacing and inline-level nodes into a paragraph.
|
||||
#[derive(Hash)]
|
||||
@ -437,23 +438,46 @@ fn prepare<'a>(
|
||||
_ => None,
|
||||
});
|
||||
|
||||
let mut items = vec![];
|
||||
let mut cursor = 0;
|
||||
let mut items = vec![];
|
||||
|
||||
// Layout the children and collect them into items.
|
||||
for (segment, styles) in segments {
|
||||
let end = cursor + segment.len();
|
||||
match segment {
|
||||
Segment::Text(len) => {
|
||||
// TODO: Also split by script.
|
||||
let mut start = cursor;
|
||||
for (level, count) in bidi.levels[cursor .. cursor + len].group() {
|
||||
let end = start + count;
|
||||
let text = &bidi.text[start .. end];
|
||||
Segment::Text(_) => {
|
||||
let mut process = |text, level: Level| {
|
||||
let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
|
||||
let shaped = shape(&mut ctx.fonts, text, styles, dir);
|
||||
items.push(Item::Text(shaped));
|
||||
start = end;
|
||||
};
|
||||
|
||||
let mut prev_level = Level::ltr();
|
||||
let mut prev_script = Script::Unknown;
|
||||
|
||||
// Group by embedding level and script.
|
||||
for i in cursor .. end {
|
||||
if !text.is_char_boundary(i) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let level = bidi.levels[i];
|
||||
let script =
|
||||
text[i ..].chars().next().map_or(Script::Unknown, |c| c.script());
|
||||
|
||||
if level != prev_level || !is_compatible(script, prev_script) {
|
||||
if cursor < i {
|
||||
process(&text[cursor .. i], prev_level);
|
||||
}
|
||||
cursor = i;
|
||||
prev_level = level;
|
||||
prev_script = script;
|
||||
} else if is_generic_script(prev_script) {
|
||||
prev_script = script;
|
||||
}
|
||||
}
|
||||
|
||||
process(&text[cursor .. end], prev_level);
|
||||
}
|
||||
Segment::Spacing(spacing) => match spacing {
|
||||
Spacing::Relative(v) => {
|
||||
@ -482,12 +506,22 @@ fn prepare<'a>(
|
||||
}
|
||||
}
|
||||
|
||||
cursor += segment.len();
|
||||
cursor = end;
|
||||
}
|
||||
|
||||
Ok(Preparation { bidi, items, styles, children: &par.0 })
|
||||
}
|
||||
|
||||
/// Whether this is not a specific script.
|
||||
fn is_generic_script(script: Script) -> bool {
|
||||
matches!(script, Script::Unknown | Script::Common | Script::Inherited)
|
||||
}
|
||||
|
||||
/// Whether these script can be part of the same shape run.
|
||||
fn is_compatible(a: Script, b: Script) -> bool {
|
||||
is_generic_script(a) || is_generic_script(b) || a == b
|
||||
}
|
||||
|
||||
/// Find suitable linebreaks.
|
||||
fn linebreak<'a>(
|
||||
p: &'a Preparation<'a>,
|
||||
|
@ -12,7 +12,6 @@ use crate::util::SliceExt;
|
||||
/// This type contains owned or borrowed shaped text runs, which can be
|
||||
/// measured, used to reshape substrings more quickly and converted into a
|
||||
/// frame.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ShapedText<'a> {
|
||||
/// The text that was shaped.
|
||||
pub text: &'a str,
|
||||
@ -269,11 +268,13 @@ impl<'a> ShapedText<'a> {
|
||||
// RTL needs offset one because the left side of the range should be
|
||||
// exclusive and the right side inclusive, contrary to the normal
|
||||
// behaviour of ranges.
|
||||
if !ltr {
|
||||
idx += 1;
|
||||
}
|
||||
self.glyphs[idx].safe_to_break.then(|| idx + (!ltr) as usize)
|
||||
}
|
||||
}
|
||||
|
||||
self.glyphs[idx].safe_to_break.then(|| idx)
|
||||
impl Debug for ShapedText<'_> {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
self.text.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -103,12 +103,6 @@ where
|
||||
|
||||
/// Additional methods for slices.
|
||||
pub trait SliceExt<T> {
|
||||
/// Find consecutive runs of the same elements in a slice and yield for
|
||||
/// each such run the element and number of times it appears.
|
||||
fn group(&self) -> Group<'_, T>
|
||||
where
|
||||
T: PartialEq;
|
||||
|
||||
/// Split a slice into consecutive runs with the same key and yield for
|
||||
/// each such run the key and the slice of elements with that key.
|
||||
fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F>
|
||||
@ -118,35 +112,11 @@ pub trait SliceExt<T> {
|
||||
}
|
||||
|
||||
impl<T> SliceExt<T> for [T] {
|
||||
fn group(&self) -> Group<'_, T> {
|
||||
Group { slice: self }
|
||||
}
|
||||
|
||||
fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F> {
|
||||
GroupByKey { slice: self, f }
|
||||
}
|
||||
}
|
||||
|
||||
/// This struct is created by [`SliceExt::group`].
|
||||
pub struct Group<'a, T> {
|
||||
slice: &'a [T],
|
||||
}
|
||||
|
||||
impl<'a, T> Iterator for Group<'a, T>
|
||||
where
|
||||
T: PartialEq,
|
||||
{
|
||||
type Item = (&'a T, usize);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let mut iter = self.slice.iter();
|
||||
let first = iter.next()?;
|
||||
let count = 1 + iter.take_while(|&t| t == first).count();
|
||||
self.slice = &self.slice[count ..];
|
||||
Some((first, count))
|
||||
}
|
||||
}
|
||||
|
||||
/// This struct is created by [`SliceExt::group_by_key`].
|
||||
pub struct GroupByKey<'a, T, F> {
|
||||
slice: &'a [T],
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 2.6 KiB |
Binary file not shown.
Before Width: | Height: | Size: 6.3 KiB After Width: | Height: | Size: 6.3 KiB |
18
tests/typ/text/shaping.typ
Normal file
18
tests/typ/text/shaping.typ
Normal file
@ -0,0 +1,18 @@
|
||||
// Test shaping quirks.
|
||||
|
||||
---
|
||||
// Test separation by script.
|
||||
ABCअपार्टमेंट
|
||||
|
||||
// This is how it should look like.
|
||||
अपार्टमेंट
|
||||
|
||||
// This (without the spaces) is how it would look
|
||||
// if we didn't separate by script.
|
||||
अ पा र् ट में ट
|
||||
|
||||
---
|
||||
// Test that RTL safe-to-break doesn't panic even though newline
|
||||
// doesn't exist in shaping output.
|
||||
#set text(dir: rtl, "Noto Serif Hebrew")
|
||||
\ ט
|
Loading…
Reference in New Issue
Block a user