parent
bcc014c4e1
commit
ad347632ab
12
Cargo.lock
generated
12
Cargo.lock
generated
@ -116,6 +116,12 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "az"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b7e4c2464d97fe331d41de9d5db0def0a96f4d823b8b32a2efd503578988973"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.13.1"
|
||||
@ -1385,9 +1391,9 @@ checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd"
|
||||
|
||||
[[package]]
|
||||
name = "pdf-writer"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "63f45f7c7538e67c58cb4977e4f97bbd75fbd3990d827d28d597ec746291f644"
|
||||
checksum = "30900f178ea696fc5d9637171f98aaa93d5aae54f0726726df68fc3e32810db6"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"itoa",
|
||||
@ -2306,6 +2312,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"ttf-parser",
|
||||
"typst-macros",
|
||||
"unicode-general-category",
|
||||
"unicode-math-class",
|
||||
"unicode-segmentation",
|
||||
"unicode-xid",
|
||||
@ -2366,6 +2373,7 @@ dependencies = [
|
||||
name = "typst-library"
|
||||
version = "0.3.0"
|
||||
dependencies = [
|
||||
"az",
|
||||
"chinese-number",
|
||||
"comemo",
|
||||
"csv",
|
||||
|
@ -33,7 +33,7 @@ indexmap = "1.9.3"
|
||||
log = "0.4"
|
||||
miniz_oxide = "0.7"
|
||||
once_cell = "1"
|
||||
pdf-writer = "0.7"
|
||||
pdf-writer = "0.7.1"
|
||||
pixglyph = "0.1"
|
||||
regex = "1"
|
||||
resvg = { version = "0.32", default-features = false }
|
||||
@ -46,6 +46,7 @@ svg2pdf = { git = "https://github.com/typst/svg2pdf" }
|
||||
tiny-skia = "0.9.0"
|
||||
tracing = "0.1.37"
|
||||
ttf-parser = "0.18.1"
|
||||
unicode-general-category = "0.6"
|
||||
unicode-math-class = "0.1"
|
||||
unicode-segmentation = "1"
|
||||
unicode-xid = "0.2"
|
||||
|
BIN
assets/fonts/IBMPlexSansDevanagari-Regular.ttf
Normal file
BIN
assets/fonts/IBMPlexSansDevanagari-Regular.ttf
Normal file
Binary file not shown.
@ -159,7 +159,7 @@ construct: |
|
||||
data-loading: |
|
||||
Data loading from external files.
|
||||
|
||||
These functions help you with embedding data from experiments and APIs in your
|
||||
These functions help you with embedding data from experiments in your
|
||||
documents.
|
||||
|
||||
utility: |
|
||||
|
@ -16,6 +16,7 @@ bench = false
|
||||
|
||||
[dependencies]
|
||||
typst = { path = ".." }
|
||||
az = "1.2"
|
||||
chinese-number = { version = "0.7.2", default-features = false, features = ["number-to-chinese"] }
|
||||
comemo = "0.2.2"
|
||||
csv = "1"
|
||||
|
@ -1139,8 +1139,7 @@ fn line<'a>(
|
||||
// are no other items in the line.
|
||||
if hyphen || start + shaped.text.len() > range.end {
|
||||
if hyphen || start < range.end || before.is_empty() {
|
||||
let shifted = start - base..range.end - base;
|
||||
let mut reshaped = shaped.reshape(vt, &p.spans, shifted);
|
||||
let mut reshaped = shaped.reshape(vt, &p.spans, start..range.end);
|
||||
if hyphen || shy {
|
||||
reshaped.push_hyphen(vt);
|
||||
}
|
||||
@ -1162,8 +1161,7 @@ fn line<'a>(
|
||||
// Reshape if necessary.
|
||||
if range.start + shaped.text.len() > end {
|
||||
if range.start < end {
|
||||
let shifted = range.start - base..end - base;
|
||||
let reshaped = shaped.reshape(vt, &p.spans, shifted);
|
||||
let reshaped = shaped.reshape(vt, &p.spans, range.start..end);
|
||||
width += reshaped.width;
|
||||
first = Some(Item::Text(reshaped));
|
||||
}
|
||||
|
@ -222,13 +222,13 @@ impl GlyphFragment {
|
||||
size: self.font_size,
|
||||
fill: self.fill,
|
||||
lang: self.lang,
|
||||
text: self.c.into(),
|
||||
glyphs: vec![Glyph {
|
||||
id: self.id.0,
|
||||
c: self.c,
|
||||
x_advance: Em::from_length(self.width, self.font_size),
|
||||
x_offset: Em::zero(),
|
||||
span: self.span,
|
||||
offset: 0,
|
||||
range: 0..self.c.len_utf8() as u16,
|
||||
span: (self.span, 0),
|
||||
}],
|
||||
};
|
||||
let size = Size::new(self.width, self.ascent + self.descent);
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::ops::Range;
|
||||
use std::str::FromStr;
|
||||
|
||||
use az::SaturatingAs;
|
||||
use rustybuzz::{Feature, Tag, UnicodeBuffer};
|
||||
use typst::font::{Font, FontVariant};
|
||||
use typst::util::SliceExt;
|
||||
@ -47,20 +48,18 @@ pub struct ShapedGlyph {
|
||||
pub x_offset: Em,
|
||||
/// The vertical offset of the glyph.
|
||||
pub y_offset: Em,
|
||||
/// The byte index in the source text where this glyph's cluster starts. A
|
||||
/// cluster is a sequence of one or multiple glyphs that cannot be
|
||||
/// separated and must always be treated as a union.
|
||||
pub cluster: usize,
|
||||
/// The byte range of this glyph's cluster in the full paragraph. A cluster
|
||||
/// is a sequence of one or multiple glyphs that cannot be separated and
|
||||
/// must always be treated as a union.
|
||||
pub range: Range<usize>,
|
||||
/// Whether splitting the shaping result before this glyph would yield the
|
||||
/// same results as shaping the parts to both sides of `text_index`
|
||||
/// separately.
|
||||
pub safe_to_break: bool,
|
||||
/// The first char in this glyph's cluster.
|
||||
pub c: char,
|
||||
/// The source code location of the text.
|
||||
pub span: Span,
|
||||
/// The offset within the spanned text.
|
||||
pub offset: u16,
|
||||
/// The source code location of the glyph and its byte offset within it.
|
||||
pub span: (Span, u16),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
@ -181,6 +180,12 @@ impl<'a> ShapedText<'a> {
|
||||
for ((font, y_offset), group) in
|
||||
self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset))
|
||||
{
|
||||
let mut range = group[0].range.clone();
|
||||
for glyph in group {
|
||||
range.start = range.start.min(glyph.range.start);
|
||||
range.end = range.end.max(glyph.range.end);
|
||||
}
|
||||
|
||||
let pos = Point::new(offset, top + shift - y_offset.at(self.size));
|
||||
let glyphs = group
|
||||
.iter()
|
||||
@ -195,8 +200,8 @@ impl<'a> ShapedText<'a> {
|
||||
} else {
|
||||
glyph.stretchability().1
|
||||
};
|
||||
let justification_left = adjustability_left * justification_ratio;
|
||||
|
||||
let justification_left = adjustability_left * justification_ratio;
|
||||
let mut justification_right =
|
||||
adjustability_right * justification_ratio;
|
||||
if glyph.is_justifiable() {
|
||||
@ -206,15 +211,16 @@ impl<'a> ShapedText<'a> {
|
||||
|
||||
frame.size_mut().x += justification_left.at(self.size)
|
||||
+ justification_right.at(self.size);
|
||||
|
||||
Glyph {
|
||||
id: glyph.glyph_id,
|
||||
x_advance: glyph.x_advance
|
||||
+ justification_left
|
||||
+ justification_right,
|
||||
x_offset: glyph.x_offset + justification_left,
|
||||
c: glyph.c,
|
||||
range: (glyph.range.start - range.start).saturating_as()
|
||||
..(glyph.range.end - range.start).saturating_as(),
|
||||
span: glyph.span,
|
||||
offset: glyph.offset,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
@ -224,6 +230,7 @@ impl<'a> ShapedText<'a> {
|
||||
size: self.size,
|
||||
lang,
|
||||
fill: fill.clone(),
|
||||
text: self.text[range.start - self.base..range.end - self.base].into(),
|
||||
glyphs,
|
||||
};
|
||||
|
||||
@ -318,16 +325,19 @@ impl<'a> ShapedText<'a> {
|
||||
|
||||
/// Reshape a range of the shaped text, reusing information from this
|
||||
/// shaping process if possible.
|
||||
///
|
||||
/// The text `range` is relative to the whole paragraph.
|
||||
pub fn reshape(
|
||||
&'a self,
|
||||
vt: &Vt,
|
||||
spans: &SpanMapper,
|
||||
text_range: Range<usize>,
|
||||
) -> ShapedText<'a> {
|
||||
let text = &self.text[text_range.start - self.base..text_range.end - self.base];
|
||||
if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) {
|
||||
Self {
|
||||
base: self.base + text_range.start,
|
||||
text: &self.text[text_range],
|
||||
base: text_range.start,
|
||||
text,
|
||||
dir: self.dir,
|
||||
styles: self.styles,
|
||||
size: self.size,
|
||||
@ -336,14 +346,7 @@ impl<'a> ShapedText<'a> {
|
||||
glyphs: Cow::Borrowed(glyphs),
|
||||
}
|
||||
} else {
|
||||
shape(
|
||||
vt,
|
||||
self.base + text_range.start,
|
||||
&self.text[text_range],
|
||||
spans,
|
||||
self.styles,
|
||||
self.dir,
|
||||
)
|
||||
shape(vt, text_range.start, text, spans, self.styles, self.dir)
|
||||
}
|
||||
}
|
||||
|
||||
@ -358,7 +361,11 @@ impl<'a> ShapedText<'a> {
|
||||
let ttf = font.ttf();
|
||||
let glyph_id = ttf.glyph_index('-')?;
|
||||
let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?);
|
||||
let cluster = self.glyphs.last().map(|g| g.cluster).unwrap_or_default();
|
||||
let range = self
|
||||
.glyphs
|
||||
.last()
|
||||
.map(|g| g.range.end..g.range.end)
|
||||
.unwrap_or_default();
|
||||
self.width += x_advance.at(self.size);
|
||||
self.glyphs.to_mut().push(ShapedGlyph {
|
||||
font,
|
||||
@ -366,11 +373,10 @@ impl<'a> ShapedText<'a> {
|
||||
x_advance,
|
||||
x_offset: Em::zero(),
|
||||
y_offset: Em::zero(),
|
||||
cluster,
|
||||
range,
|
||||
safe_to_break: true,
|
||||
c: '-',
|
||||
span: Span::detached(),
|
||||
offset: 0,
|
||||
span: (Span::detached(), 0),
|
||||
});
|
||||
Some(())
|
||||
});
|
||||
@ -396,9 +402,9 @@ impl<'a> ShapedText<'a> {
|
||||
|
||||
// Handle edge cases.
|
||||
let len = self.glyphs.len();
|
||||
if text_index == 0 {
|
||||
if text_index == self.base {
|
||||
return Some(if ltr { 0 } else { len });
|
||||
} else if text_index == self.text.len() {
|
||||
} else if text_index == self.base + self.text.len() {
|
||||
return Some(if ltr { len } else { 0 });
|
||||
}
|
||||
|
||||
@ -406,7 +412,7 @@ impl<'a> ShapedText<'a> {
|
||||
let mut idx = self
|
||||
.glyphs
|
||||
.binary_search_by(|g| {
|
||||
let ordering = g.cluster.cmp(&text_index);
|
||||
let ordering = g.range.start.cmp(&text_index);
|
||||
if ltr {
|
||||
ordering
|
||||
} else {
|
||||
@ -422,7 +428,7 @@ impl<'a> ShapedText<'a> {
|
||||
|
||||
// Search for the outermost glyph with the text index.
|
||||
while let Some(next) = next(idx, 1) {
|
||||
if self.glyphs.get(next).map_or(true, |g| g.cluster != text_index) {
|
||||
if self.glyphs.get(next).map_or(true, |g| g.range.start != text_index) {
|
||||
break;
|
||||
}
|
||||
idx = next;
|
||||
@ -444,7 +450,6 @@ impl Debug for ShapedText<'_> {
|
||||
/// Holds shaping results and metadata common to all shaped segments.
|
||||
struct ShapingContext<'a> {
|
||||
vt: &'a Vt<'a>,
|
||||
base: usize,
|
||||
spans: &'a SpanMapper,
|
||||
glyphs: Vec<ShapedGlyph>,
|
||||
used: Vec<Font>,
|
||||
@ -468,7 +473,6 @@ pub fn shape<'a>(
|
||||
let size = TextElem::size_in(styles);
|
||||
let mut ctx = ShapingContext {
|
||||
vt,
|
||||
base,
|
||||
spans,
|
||||
size,
|
||||
glyphs: vec![],
|
||||
@ -481,7 +485,7 @@ pub fn shape<'a>(
|
||||
};
|
||||
|
||||
if !text.is_empty() {
|
||||
shape_segment(&mut ctx, 0, text, families(styles));
|
||||
shape_segment(&mut ctx, base, text, families(styles));
|
||||
}
|
||||
|
||||
track_and_space(&mut ctx);
|
||||
@ -552,6 +556,7 @@ fn shape_segment(
|
||||
let buffer = rustybuzz::shape(font.rusty(), &ctx.tags, buffer);
|
||||
let infos = buffer.glyph_infos();
|
||||
let pos = buffer.glyph_positions();
|
||||
let ltr = ctx.dir.is_positive();
|
||||
|
||||
// Collect the shaped glyphs, doing fallback and shaping parts again with
|
||||
// the next font if necessary.
|
||||
@ -560,68 +565,66 @@ fn shape_segment(
|
||||
let info = &infos[i];
|
||||
let cluster = info.cluster as usize;
|
||||
|
||||
// Add the glyph to the shaped output.
|
||||
if info.glyph_id != 0 {
|
||||
// Add the glyph to the shaped output.
|
||||
// TODO: Don't ignore y_advance.
|
||||
let (span, offset) = ctx.spans.span_at(ctx.base + cluster);
|
||||
ctx.glyphs.push(ShapedGlyph {
|
||||
font: font.clone(),
|
||||
glyph_id: info.glyph_id as u16,
|
||||
x_advance: font.to_em(pos[i].x_advance),
|
||||
x_offset: font.to_em(pos[i].x_offset),
|
||||
y_offset: font.to_em(pos[i].y_offset),
|
||||
cluster: base + cluster,
|
||||
safe_to_break: !info.unsafe_to_break(),
|
||||
c: text[cluster..].chars().next().unwrap(),
|
||||
span,
|
||||
offset,
|
||||
});
|
||||
} else {
|
||||
// Determine the source text range for the tofu sequence.
|
||||
let range = {
|
||||
// First, search for the end of the tofu sequence.
|
||||
let k = i;
|
||||
while infos.get(i + 1).map_or(false, |info| info.glyph_id == 0) {
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// Then, determine the start and end text index.
|
||||
//
|
||||
// Examples:
|
||||
// Everything is shown in visual order. Tofus are written as "_".
|
||||
// We want to find out that the tofus span the text `2..6`.
|
||||
// Note that the clusters are longer than 1 char.
|
||||
//
|
||||
// Left-to-right:
|
||||
// Text: h a l i h a l l o
|
||||
// Glyphs: A _ _ C E
|
||||
// Clusters: 0 2 4 6 8
|
||||
// k=1 i=2
|
||||
//
|
||||
// Right-to-left:
|
||||
// Text: O L L A H I L A H
|
||||
// Glyphs: E C _ _ A
|
||||
// Clusters: 8 6 4 2 0
|
||||
// k=2 i=3
|
||||
let ltr = ctx.dir.is_positive();
|
||||
let first = if ltr { k } else { i };
|
||||
let start = infos[first].cluster as usize;
|
||||
let last = if ltr { i.checked_add(1) } else { k.checked_sub(1) };
|
||||
let end = last
|
||||
// Determine the text range of the glyph.
|
||||
let start = base + cluster;
|
||||
let end = base
|
||||
+ if ltr { i.checked_add(1) } else { i.checked_sub(1) }
|
||||
.and_then(|last| infos.get(last))
|
||||
.map_or(text.len(), |info| info.cluster as usize);
|
||||
|
||||
start..end
|
||||
};
|
||||
ctx.glyphs.push(ShapedGlyph {
|
||||
font: font.clone(),
|
||||
glyph_id: info.glyph_id as u16,
|
||||
// TODO: Don't ignore y_advance.
|
||||
x_advance: font.to_em(pos[i].x_advance),
|
||||
x_offset: font.to_em(pos[i].x_offset),
|
||||
y_offset: font.to_em(pos[i].y_offset),
|
||||
range: start..end,
|
||||
safe_to_break: !info.unsafe_to_break(),
|
||||
c: text[cluster..].chars().next().unwrap(),
|
||||
span: ctx.spans.span_at(start),
|
||||
});
|
||||
} else {
|
||||
// First, search for the end of the tofu sequence.
|
||||
let k = i;
|
||||
while infos.get(i + 1).map_or(false, |info| info.glyph_id == 0) {
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// Then, determine the start and end text index for the tofu
|
||||
// sequence.
|
||||
//
|
||||
// Examples:
|
||||
// Everything is shown in visual order. Tofus are written as "_".
|
||||
// We want to find out that the tofus span the text `2..6`.
|
||||
// Note that the clusters are longer than 1 char.
|
||||
//
|
||||
// Left-to-right:
|
||||
// Text: h a l i h a l l o
|
||||
// Glyphs: A _ _ C E
|
||||
// Clusters: 0 2 4 6 8
|
||||
// k=1 i=2
|
||||
//
|
||||
// Right-to-left:
|
||||
// Text: O L L A H I L A H
|
||||
// Glyphs: E C _ _ A
|
||||
// Clusters: 8 6 4 2 0
|
||||
// k=2 i=3
|
||||
let start = infos[if ltr { k } else { i }].cluster as usize;
|
||||
let end = if ltr { i.checked_add(1) } else { k.checked_sub(1) }
|
||||
.and_then(|last| infos.get(last))
|
||||
.map_or(text.len(), |info| info.cluster as usize);
|
||||
|
||||
// Trim half-baked cluster.
|
||||
let remove = base + range.start..base + range.end;
|
||||
while ctx.glyphs.last().map_or(false, |g| remove.contains(&g.cluster)) {
|
||||
let remove = base + start..base + end;
|
||||
while ctx.glyphs.last().map_or(false, |g| remove.contains(&g.range.start)) {
|
||||
ctx.glyphs.pop();
|
||||
}
|
||||
|
||||
// Recursively shape the tofu sequence with the next family.
|
||||
shape_segment(ctx, base + range.start, &text[range], families.clone());
|
||||
shape_segment(ctx, base + start, &text[start..end], families.clone());
|
||||
}
|
||||
|
||||
i += 1;
|
||||
@ -634,19 +637,18 @@ fn shape_segment(
|
||||
fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) {
|
||||
let x_advance = font.advance(0).unwrap_or_default();
|
||||
for (cluster, c) in text.char_indices() {
|
||||
let cluster = base + cluster;
|
||||
let (span, offset) = ctx.spans.span_at(ctx.base + cluster);
|
||||
let start = base + cluster;
|
||||
let end = start + c.len_utf8();
|
||||
ctx.glyphs.push(ShapedGlyph {
|
||||
font: font.clone(),
|
||||
glyph_id: 0,
|
||||
x_advance,
|
||||
x_offset: Em::zero(),
|
||||
y_offset: Em::zero(),
|
||||
cluster,
|
||||
range: start..end,
|
||||
safe_to_break: true,
|
||||
c,
|
||||
span,
|
||||
offset,
|
||||
span: ctx.spans.span_at(start),
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -668,7 +670,10 @@ fn track_and_space(ctx: &mut ShapingContext) {
|
||||
glyph.x_advance = spacing.relative_to(glyph.x_advance);
|
||||
}
|
||||
|
||||
if glyphs.peek().map_or(false, |next| glyph.cluster != next.cluster) {
|
||||
if glyphs
|
||||
.peek()
|
||||
.map_or(false, |next| glyph.range.start != next.range.start)
|
||||
{
|
||||
glyph.x_advance += tracking;
|
||||
}
|
||||
}
|
||||
|
50
src/doc.rs
50
src/doc.rs
@ -1,7 +1,8 @@
|
||||
//! Finished documents.
|
||||
|
||||
use std::fmt::{self, Debug, Formatter, Write};
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::num::NonZeroUsize;
|
||||
use std::ops::Range;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
@ -114,23 +115,6 @@ impl Frame {
|
||||
pub fn items(&self) -> std::slice::Iter<'_, (Point, FrameItem)> {
|
||||
self.items.iter()
|
||||
}
|
||||
|
||||
/// Approximately recover the text inside of the frame and its children.
|
||||
pub fn text(&self) -> EcoString {
|
||||
let mut text = EcoString::new();
|
||||
for (_, item) in self.items() {
|
||||
match item {
|
||||
FrameItem::Text(item) => {
|
||||
for glyph in &item.glyphs {
|
||||
text.push(glyph.c);
|
||||
}
|
||||
}
|
||||
FrameItem::Group(group) => text.push_str(&group.frame.text()),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
text
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert items and subframes.
|
||||
@ -476,6 +460,8 @@ pub struct TextItem {
|
||||
pub fill: Paint,
|
||||
/// The natural language of the text.
|
||||
pub lang: Lang,
|
||||
/// The item's plain text.
|
||||
pub text: EcoString,
|
||||
/// The glyphs.
|
||||
pub glyphs: Vec<Glyph>,
|
||||
}
|
||||
@ -489,19 +475,14 @@ impl TextItem {
|
||||
|
||||
impl Debug for TextItem {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
// This is only a rough approximation of the source text.
|
||||
f.write_str("Text(\"")?;
|
||||
for glyph in &self.glyphs {
|
||||
for c in glyph.c.escape_debug() {
|
||||
f.write_char(c)?;
|
||||
}
|
||||
}
|
||||
f.write_str("\")")
|
||||
f.write_str("Text(")?;
|
||||
self.text.fmt(f)?;
|
||||
f.write_str(")")
|
||||
}
|
||||
}
|
||||
|
||||
/// A glyph in a run of shaped text.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
|
||||
pub struct Glyph {
|
||||
/// The glyph's index in the font.
|
||||
pub id: u16,
|
||||
@ -509,12 +490,17 @@ pub struct Glyph {
|
||||
pub x_advance: Em,
|
||||
/// The horizontal offset of the glyph.
|
||||
pub x_offset: Em,
|
||||
/// The first character of the glyph's cluster.
|
||||
pub c: char,
|
||||
/// The range of the glyph in its item's text.
|
||||
pub range: Range<u16>,
|
||||
/// The source code location of the text.
|
||||
pub span: Span,
|
||||
/// The offset within the spanned text.
|
||||
pub offset: u16,
|
||||
pub span: (Span, u16),
|
||||
}
|
||||
|
||||
impl Glyph {
|
||||
/// The range of the glyph in its item's text.
|
||||
pub fn range(&self) -> Range<usize> {
|
||||
usize::from(self.range.start)..usize::from(self.range.end)
|
||||
}
|
||||
}
|
||||
|
||||
/// An identifier for a natural language.
|
||||
|
@ -1,13 +1,21 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use ecow::eco_format;
|
||||
use ecow::{eco_format, EcoString};
|
||||
use pdf_writer::types::{CidFontType, FontFlags, SystemInfo, UnicodeCmap};
|
||||
use pdf_writer::{Filter, Finish, Name, Rect, Str};
|
||||
use ttf_parser::{name_id, GlyphId, Tag};
|
||||
use unicode_general_category::GeneralCategory;
|
||||
|
||||
use super::{deflate, EmExt, PdfContext, RefExt};
|
||||
use crate::util::SliceExt;
|
||||
|
||||
const CMAP_NAME: Name = Name(b"Custom");
|
||||
const SYSTEM_INFO: SystemInfo = SystemInfo {
|
||||
registry: Str(b"Adobe"),
|
||||
ordering: Str(b"Identity"),
|
||||
supplement: 0,
|
||||
};
|
||||
|
||||
/// Embed all used fonts into the PDF.
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub fn write_fonts(ctx: &mut PdfContext) {
|
||||
@ -19,7 +27,7 @@ pub fn write_fonts(ctx: &mut PdfContext) {
|
||||
let data_ref = ctx.alloc.bump();
|
||||
ctx.font_refs.push(type0_ref);
|
||||
|
||||
let glyphs = &ctx.glyph_sets[font];
|
||||
let glyph_set = ctx.glyph_sets.get_mut(font).unwrap();
|
||||
let metrics = font.metrics();
|
||||
let ttf = font.ttf();
|
||||
|
||||
@ -29,12 +37,6 @@ pub fn write_fonts(ctx: &mut PdfContext) {
|
||||
|
||||
let base_font = eco_format!("ABCDEF+{}", postscript_name);
|
||||
let base_font = Name(base_font.as_bytes());
|
||||
let cmap_name = Name(b"Custom");
|
||||
let system_info = SystemInfo {
|
||||
registry: Str(b"Adobe"),
|
||||
ordering: Str(b"Identity"),
|
||||
supplement: 0,
|
||||
};
|
||||
|
||||
// Write the base font object referencing the CID font.
|
||||
ctx.writer
|
||||
@ -59,7 +61,7 @@ pub fn write_fonts(ctx: &mut PdfContext) {
|
||||
let mut cid = ctx.writer.cid_font(cid_ref);
|
||||
cid.subtype(subtype);
|
||||
cid.base_font(base_font);
|
||||
cid.system_info(system_info);
|
||||
cid.system_info(SYSTEM_INFO);
|
||||
cid.font_descriptor(descriptor_ref);
|
||||
cid.default_width(0.0);
|
||||
|
||||
@ -70,7 +72,7 @@ pub fn write_fonts(ctx: &mut PdfContext) {
|
||||
// Extract the widths of all glyphs.
|
||||
let num_glyphs = ttf.number_of_glyphs();
|
||||
let mut widths = vec![0.0; num_glyphs as usize];
|
||||
for &g in glyphs {
|
||||
for &g in glyph_set.keys() {
|
||||
let x = ttf.glyph_hor_advance(GlyphId(g)).unwrap_or(0);
|
||||
widths[g as usize] = font.to_em(x).to_font_units();
|
||||
}
|
||||
@ -130,42 +132,15 @@ pub fn write_fonts(ctx: &mut PdfContext) {
|
||||
|
||||
font_descriptor.finish();
|
||||
|
||||
// Compute a reverse mapping from glyphs to unicode.
|
||||
let cmap = {
|
||||
let mut mapping = BTreeMap::new();
|
||||
for subtable in
|
||||
ttf.tables().cmap.into_iter().flat_map(|table| table.subtables)
|
||||
{
|
||||
if subtable.is_unicode() {
|
||||
subtable.codepoints(|n| {
|
||||
if let Some(c) = std::char::from_u32(n) {
|
||||
if let Some(GlyphId(g)) = ttf.glyph_index(c) {
|
||||
if glyphs.contains(&g) {
|
||||
mapping.insert(g, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut cmap = UnicodeCmap::new(cmap_name, system_info);
|
||||
for (g, c) in mapping {
|
||||
cmap.pair(g, c);
|
||||
}
|
||||
cmap
|
||||
};
|
||||
|
||||
// Write the /ToUnicode character map, which maps glyph ids back to
|
||||
// unicode codepoints to enable copying out of the PDF.
|
||||
ctx.writer
|
||||
.cmap(cmap_ref, &deflate(&cmap.finish()))
|
||||
.filter(Filter::FlateDecode);
|
||||
let cmap = create_cmap(ttf, glyph_set);
|
||||
ctx.writer.cmap(cmap_ref, &cmap.finish());
|
||||
|
||||
// Subset and write the font's bytes.
|
||||
let data = font.data();
|
||||
let subsetted = {
|
||||
let glyphs: Vec<_> = glyphs.iter().copied().collect();
|
||||
let glyphs: Vec<_> = glyph_set.keys().copied().collect();
|
||||
let profile = subsetter::Profile::pdf(&glyphs);
|
||||
subsetter::subset(data, font.index(), profile)
|
||||
};
|
||||
@ -183,3 +158,44 @@ pub fn write_fonts(ctx: &mut PdfContext) {
|
||||
stream.finish();
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a /ToUnicode CMap.
|
||||
fn create_cmap(
|
||||
ttf: &ttf_parser::Face,
|
||||
glyph_set: &mut BTreeMap<u16, EcoString>,
|
||||
) -> UnicodeCmap {
|
||||
// For glyphs that have codepoints mapping to in the font's cmap table, we
|
||||
// prefer them over pre-existing text mappings from the document. Only
|
||||
// things that don't have a corresponding codepoint (or only a private-use
|
||||
// one) like the "Th" in Linux Libertine get the text of their first
|
||||
// occurances in the document instead.
|
||||
for subtable in ttf.tables().cmap.into_iter().flat_map(|table| table.subtables) {
|
||||
if !subtable.is_unicode() {
|
||||
continue;
|
||||
}
|
||||
|
||||
subtable.codepoints(|n| {
|
||||
let Some(c) = std::char::from_u32(n) else { return };
|
||||
if unicode_general_category::get_general_category(c)
|
||||
== GeneralCategory::PrivateUse
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
let Some(GlyphId(g)) = ttf.glyph_index(c) else { return };
|
||||
if glyph_set.contains_key(&g) {
|
||||
glyph_set.insert(g, c.into());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Produce a reverse mapping from glyphs to unicode strings.
|
||||
let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO);
|
||||
for (&g, text) in glyph_set.iter() {
|
||||
if !text.is_empty() {
|
||||
cmap.pair_with_multiple(g, text.chars());
|
||||
}
|
||||
}
|
||||
|
||||
cmap
|
||||
}
|
||||
|
@ -6,9 +6,10 @@ mod outline;
|
||||
mod page;
|
||||
|
||||
use std::cmp::Eq;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::hash::Hash;
|
||||
|
||||
use ecow::EcoString;
|
||||
use pdf_writer::types::Direction;
|
||||
use pdf_writer::{Finish, Name, PdfWriter, Ref, TextStr};
|
||||
use xmp_writer::{LangId, RenditionClass, XmpWriter};
|
||||
@ -52,7 +53,13 @@ pub struct PdfContext<'a> {
|
||||
page_refs: Vec<Ref>,
|
||||
font_map: Remapper<Font>,
|
||||
image_map: Remapper<Image>,
|
||||
glyph_sets: HashMap<Font, HashSet<u16>>,
|
||||
/// For each font a mapping from used glyphs to their text representation.
|
||||
/// May contain multiple chars in case of ligatures or similar things. The
|
||||
/// same glyph can have a different text representation within one document,
|
||||
/// then we just save the first one. The resulting strings are used for the
|
||||
/// PDF's /ToUnicode map for glyphs that don't have an entry in the font's
|
||||
/// cmap. This is important for copy-paste and searching.
|
||||
glyph_sets: HashMap<Font, BTreeMap<u16, EcoString>>,
|
||||
languages: HashMap<Lang, usize>,
|
||||
}
|
||||
|
||||
|
@ -364,11 +364,12 @@ fn write_group(ctx: &mut PageContext, pos: Point, group: &GroupItem) {
|
||||
/// Encode a text run into the content stream.
|
||||
fn write_text(ctx: &mut PageContext, x: f32, y: f32, text: &TextItem) {
|
||||
*ctx.parent.languages.entry(text.lang).or_insert(0) += text.glyphs.len();
|
||||
ctx.parent
|
||||
.glyph_sets
|
||||
.entry(text.font.clone())
|
||||
.or_default()
|
||||
.extend(text.glyphs.iter().map(|g| g.id));
|
||||
|
||||
let glyph_set = ctx.parent.glyph_sets.entry(text.font.clone()).or_default();
|
||||
for g in &text.glyphs {
|
||||
let segment = &text.text[g.range()];
|
||||
glyph_set.entry(g.id).or_insert_with(|| segment.into());
|
||||
}
|
||||
|
||||
ctx.set_fill(&text.fill);
|
||||
ctx.set_font(&text.font, text.size);
|
||||
|
@ -67,7 +67,8 @@ pub fn jump_from_click(
|
||||
|
||||
FrameItem::Text(text) => {
|
||||
for glyph in &text.glyphs {
|
||||
if glyph.span.is_detached() {
|
||||
let (span, span_offset) = glyph.span;
|
||||
if span.is_detached() {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -77,13 +78,13 @@ pub fn jump_from_click(
|
||||
Size::new(width, text.size),
|
||||
click,
|
||||
) {
|
||||
let source = world.source(glyph.span.source());
|
||||
let node = source.find(glyph.span)?;
|
||||
let source = world.source(span.source());
|
||||
let node = source.find(span)?;
|
||||
let pos = if node.kind() == SyntaxKind::Text {
|
||||
let range = node.range();
|
||||
let mut offset = range.start + usize::from(glyph.offset);
|
||||
let mut offset = range.start + usize::from(span_offset);
|
||||
if (click.x - pos.x) > width / 2.0 {
|
||||
offset += glyph.c.len_utf8();
|
||||
offset += glyph.range().len();
|
||||
}
|
||||
offset.min(range.end)
|
||||
} else {
|
||||
@ -150,7 +151,7 @@ fn find_in_frame(frame: &Frame, span: Span) -> Option<Point> {
|
||||
|
||||
if let FrameItem::Text(text) = item {
|
||||
for glyph in &text.glyphs {
|
||||
if glyph.span == span {
|
||||
if glyph.span.0 == span {
|
||||
return Some(pos);
|
||||
}
|
||||
pos.x += glyph.x_advance.at(text.size);
|
||||
|
BIN
tests/ref/text/copy-paste.png
Normal file
BIN
tests/ref/text/copy-paste.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.1 KiB |
Binary file not shown.
Before Width: | Height: | Size: 1.4 KiB After Width: | Height: | Size: 2.5 KiB |
@ -353,9 +353,18 @@ fn test(
|
||||
pdf_path: Option<&Path>,
|
||||
args: &Args,
|
||||
) -> bool {
|
||||
let name = src_path.strip_prefix(TYP_DIR).unwrap_or(src_path);
|
||||
struct PanicGuard<'a>(&'a Path);
|
||||
impl Drop for PanicGuard<'_> {
|
||||
fn drop(&mut self) {
|
||||
if std::thread::panicking() {
|
||||
println!("Panicked in {}", self.0.display());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let name = src_path.strip_prefix(TYP_DIR).unwrap_or(src_path);
|
||||
let text = fs::read_to_string(src_path).unwrap();
|
||||
let _guard = PanicGuard(name);
|
||||
|
||||
let mut output = String::new();
|
||||
let mut ok = true;
|
||||
@ -401,6 +410,7 @@ fn test(
|
||||
line,
|
||||
&mut rng,
|
||||
);
|
||||
|
||||
ok &= part_ok;
|
||||
compare_ever |= compare_here;
|
||||
frames.extend(part_frames);
|
||||
|
8
tests/typ/text/copy-paste.typ
Normal file
8
tests/typ/text/copy-paste.typ
Normal file
@ -0,0 +1,8 @@
|
||||
// Test copy-paste and search in PDF with ligatures
|
||||
// and Arabic test. Must be tested manually!
|
||||
|
||||
---
|
||||
The after fira 🏳️🌈!
|
||||
|
||||
#set text(lang: "ar", font: "Noto Sans Arabic")
|
||||
مرحبًا
|
Loading…
x
Reference in New Issue
Block a user