Make ligatures copyable and searchable

Fixes #479
Fixes #1040
This commit is contained in:
Laurenz 2023-05-03 10:33:18 +02:00
parent bcc014c4e1
commit ad347632ab
17 changed files with 229 additions and 187 deletions

12
Cargo.lock generated
View File

@ -116,6 +116,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "az"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b7e4c2464d97fe331d41de9d5db0def0a96f4d823b8b32a2efd503578988973"
[[package]]
name = "base64"
version = "0.13.1"
@ -1385,9 +1391,9 @@ checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd"
[[package]]
name = "pdf-writer"
version = "0.7.0"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63f45f7c7538e67c58cb4977e4f97bbd75fbd3990d827d28d597ec746291f644"
checksum = "30900f178ea696fc5d9637171f98aaa93d5aae54f0726726df68fc3e32810db6"
dependencies = [
"bitflags 1.3.2",
"itoa",
@ -2306,6 +2312,7 @@ dependencies = [
"tracing",
"ttf-parser",
"typst-macros",
"unicode-general-category",
"unicode-math-class",
"unicode-segmentation",
"unicode-xid",
@ -2366,6 +2373,7 @@ dependencies = [
name = "typst-library"
version = "0.3.0"
dependencies = [
"az",
"chinese-number",
"comemo",
"csv",

View File

@ -33,7 +33,7 @@ indexmap = "1.9.3"
log = "0.4"
miniz_oxide = "0.7"
once_cell = "1"
pdf-writer = "0.7"
pdf-writer = "0.7.1"
pixglyph = "0.1"
regex = "1"
resvg = { version = "0.32", default-features = false }
@ -46,6 +46,7 @@ svg2pdf = { git = "https://github.com/typst/svg2pdf" }
tiny-skia = "0.9.0"
tracing = "0.1.37"
ttf-parser = "0.18.1"
unicode-general-category = "0.6"
unicode-math-class = "0.1"
unicode-segmentation = "1"
unicode-xid = "0.2"

Binary file not shown.

View File

@ -159,7 +159,7 @@ construct: |
data-loading: |
Data loading from external files.
These functions help you with embedding data from experiments and APIs in your
These functions help you with embedding data from experiments in your
documents.
utility: |

View File

@ -16,6 +16,7 @@ bench = false
[dependencies]
typst = { path = ".." }
az = "1.2"
chinese-number = { version = "0.7.2", default-features = false, features = ["number-to-chinese"] }
comemo = "0.2.2"
csv = "1"

View File

@ -1139,8 +1139,7 @@ fn line<'a>(
// are no other items in the line.
if hyphen || start + shaped.text.len() > range.end {
if hyphen || start < range.end || before.is_empty() {
let shifted = start - base..range.end - base;
let mut reshaped = shaped.reshape(vt, &p.spans, shifted);
let mut reshaped = shaped.reshape(vt, &p.spans, start..range.end);
if hyphen || shy {
reshaped.push_hyphen(vt);
}
@ -1162,8 +1161,7 @@ fn line<'a>(
// Reshape if necessary.
if range.start + shaped.text.len() > end {
if range.start < end {
let shifted = range.start - base..end - base;
let reshaped = shaped.reshape(vt, &p.spans, shifted);
let reshaped = shaped.reshape(vt, &p.spans, range.start..end);
width += reshaped.width;
first = Some(Item::Text(reshaped));
}

View File

@ -222,13 +222,13 @@ impl GlyphFragment {
size: self.font_size,
fill: self.fill,
lang: self.lang,
text: self.c.into(),
glyphs: vec![Glyph {
id: self.id.0,
c: self.c,
x_advance: Em::from_length(self.width, self.font_size),
x_offset: Em::zero(),
span: self.span,
offset: 0,
range: 0..self.c.len_utf8() as u16,
span: (self.span, 0),
}],
};
let size = Size::new(self.width, self.ascent + self.descent);

View File

@ -1,6 +1,7 @@
use std::ops::Range;
use std::str::FromStr;
use az::SaturatingAs;
use rustybuzz::{Feature, Tag, UnicodeBuffer};
use typst::font::{Font, FontVariant};
use typst::util::SliceExt;
@ -47,20 +48,18 @@ pub struct ShapedGlyph {
pub x_offset: Em,
/// The vertical offset of the glyph.
pub y_offset: Em,
/// The byte index in the source text where this glyph's cluster starts. A
/// cluster is a sequence of one or multiple glyphs that cannot be
/// separated and must always be treated as a union.
pub cluster: usize,
/// The byte range of this glyph's cluster in the full paragraph. A cluster
/// is a sequence of one or multiple glyphs that cannot be separated and
/// must always be treated as a union.
pub range: Range<usize>,
/// Whether splitting the shaping result before this glyph would yield the
/// same results as shaping the parts to both sides of `text_index`
/// separately.
pub safe_to_break: bool,
/// The first char in this glyph's cluster.
pub c: char,
/// The source code location of the text.
pub span: Span,
/// The offset within the spanned text.
pub offset: u16,
/// The source code location of the glyph and its byte offset within it.
pub span: (Span, u16),
}
#[derive(Debug, Clone, Default)]
@ -181,6 +180,12 @@ impl<'a> ShapedText<'a> {
for ((font, y_offset), group) in
self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset))
{
let mut range = group[0].range.clone();
for glyph in group {
range.start = range.start.min(glyph.range.start);
range.end = range.end.max(glyph.range.end);
}
let pos = Point::new(offset, top + shift - y_offset.at(self.size));
let glyphs = group
.iter()
@ -195,8 +200,8 @@ impl<'a> ShapedText<'a> {
} else {
glyph.stretchability().1
};
let justification_left = adjustability_left * justification_ratio;
let justification_left = adjustability_left * justification_ratio;
let mut justification_right =
adjustability_right * justification_ratio;
if glyph.is_justifiable() {
@ -206,15 +211,16 @@ impl<'a> ShapedText<'a> {
frame.size_mut().x += justification_left.at(self.size)
+ justification_right.at(self.size);
Glyph {
id: glyph.glyph_id,
x_advance: glyph.x_advance
+ justification_left
+ justification_right,
x_offset: glyph.x_offset + justification_left,
c: glyph.c,
range: (glyph.range.start - range.start).saturating_as()
..(glyph.range.end - range.start).saturating_as(),
span: glyph.span,
offset: glyph.offset,
}
})
.collect();
@ -224,6 +230,7 @@ impl<'a> ShapedText<'a> {
size: self.size,
lang,
fill: fill.clone(),
text: self.text[range.start - self.base..range.end - self.base].into(),
glyphs,
};
@ -318,16 +325,19 @@ impl<'a> ShapedText<'a> {
/// Reshape a range of the shaped text, reusing information from this
/// shaping process if possible.
///
/// The text `range` is relative to the whole paragraph.
pub fn reshape(
&'a self,
vt: &Vt,
spans: &SpanMapper,
text_range: Range<usize>,
) -> ShapedText<'a> {
let text = &self.text[text_range.start - self.base..text_range.end - self.base];
if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) {
Self {
base: self.base + text_range.start,
text: &self.text[text_range],
base: text_range.start,
text,
dir: self.dir,
styles: self.styles,
size: self.size,
@ -336,14 +346,7 @@ impl<'a> ShapedText<'a> {
glyphs: Cow::Borrowed(glyphs),
}
} else {
shape(
vt,
self.base + text_range.start,
&self.text[text_range],
spans,
self.styles,
self.dir,
)
shape(vt, text_range.start, text, spans, self.styles, self.dir)
}
}
@ -358,7 +361,11 @@ impl<'a> ShapedText<'a> {
let ttf = font.ttf();
let glyph_id = ttf.glyph_index('-')?;
let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?);
let cluster = self.glyphs.last().map(|g| g.cluster).unwrap_or_default();
let range = self
.glyphs
.last()
.map(|g| g.range.end..g.range.end)
.unwrap_or_default();
self.width += x_advance.at(self.size);
self.glyphs.to_mut().push(ShapedGlyph {
font,
@ -366,11 +373,10 @@ impl<'a> ShapedText<'a> {
x_advance,
x_offset: Em::zero(),
y_offset: Em::zero(),
cluster,
range,
safe_to_break: true,
c: '-',
span: Span::detached(),
offset: 0,
span: (Span::detached(), 0),
});
Some(())
});
@ -396,9 +402,9 @@ impl<'a> ShapedText<'a> {
// Handle edge cases.
let len = self.glyphs.len();
if text_index == 0 {
if text_index == self.base {
return Some(if ltr { 0 } else { len });
} else if text_index == self.text.len() {
} else if text_index == self.base + self.text.len() {
return Some(if ltr { len } else { 0 });
}
@ -406,7 +412,7 @@ impl<'a> ShapedText<'a> {
let mut idx = self
.glyphs
.binary_search_by(|g| {
let ordering = g.cluster.cmp(&text_index);
let ordering = g.range.start.cmp(&text_index);
if ltr {
ordering
} else {
@ -422,7 +428,7 @@ impl<'a> ShapedText<'a> {
// Search for the outermost glyph with the text index.
while let Some(next) = next(idx, 1) {
if self.glyphs.get(next).map_or(true, |g| g.cluster != text_index) {
if self.glyphs.get(next).map_or(true, |g| g.range.start != text_index) {
break;
}
idx = next;
@ -444,7 +450,6 @@ impl Debug for ShapedText<'_> {
/// Holds shaping results and metadata common to all shaped segments.
struct ShapingContext<'a> {
vt: &'a Vt<'a>,
base: usize,
spans: &'a SpanMapper,
glyphs: Vec<ShapedGlyph>,
used: Vec<Font>,
@ -468,7 +473,6 @@ pub fn shape<'a>(
let size = TextElem::size_in(styles);
let mut ctx = ShapingContext {
vt,
base,
spans,
size,
glyphs: vec![],
@ -481,7 +485,7 @@ pub fn shape<'a>(
};
if !text.is_empty() {
shape_segment(&mut ctx, 0, text, families(styles));
shape_segment(&mut ctx, base, text, families(styles));
}
track_and_space(&mut ctx);
@ -552,6 +556,7 @@ fn shape_segment(
let buffer = rustybuzz::shape(font.rusty(), &ctx.tags, buffer);
let infos = buffer.glyph_infos();
let pos = buffer.glyph_positions();
let ltr = ctx.dir.is_positive();
// Collect the shaped glyphs, doing fallback and shaping parts again with
// the next font if necessary.
@ -560,68 +565,66 @@ fn shape_segment(
let info = &infos[i];
let cluster = info.cluster as usize;
// Add the glyph to the shaped output.
if info.glyph_id != 0 {
// Add the glyph to the shaped output.
// TODO: Don't ignore y_advance.
let (span, offset) = ctx.spans.span_at(ctx.base + cluster);
ctx.glyphs.push(ShapedGlyph {
font: font.clone(),
glyph_id: info.glyph_id as u16,
x_advance: font.to_em(pos[i].x_advance),
x_offset: font.to_em(pos[i].x_offset),
y_offset: font.to_em(pos[i].y_offset),
cluster: base + cluster,
safe_to_break: !info.unsafe_to_break(),
c: text[cluster..].chars().next().unwrap(),
span,
offset,
});
} else {
// Determine the source text range for the tofu sequence.
let range = {
// First, search for the end of the tofu sequence.
let k = i;
while infos.get(i + 1).map_or(false, |info| info.glyph_id == 0) {
i += 1;
}
// Then, determine the start and end text index.
//
// Examples:
// Everything is shown in visual order. Tofus are written as "_".
// We want to find out that the tofus span the text `2..6`.
// Note that the clusters are longer than 1 char.
//
// Left-to-right:
// Text: h a l i h a l l o
// Glyphs: A _ _ C E
// Clusters: 0 2 4 6 8
// k=1 i=2
//
// Right-to-left:
// Text: O L L A H I L A H
// Glyphs: E C _ _ A
// Clusters: 8 6 4 2 0
// k=2 i=3
let ltr = ctx.dir.is_positive();
let first = if ltr { k } else { i };
let start = infos[first].cluster as usize;
let last = if ltr { i.checked_add(1) } else { k.checked_sub(1) };
let end = last
// Determine the text range of the glyph.
let start = base + cluster;
let end = base
+ if ltr { i.checked_add(1) } else { i.checked_sub(1) }
.and_then(|last| infos.get(last))
.map_or(text.len(), |info| info.cluster as usize);
start..end
};
ctx.glyphs.push(ShapedGlyph {
font: font.clone(),
glyph_id: info.glyph_id as u16,
// TODO: Don't ignore y_advance.
x_advance: font.to_em(pos[i].x_advance),
x_offset: font.to_em(pos[i].x_offset),
y_offset: font.to_em(pos[i].y_offset),
range: start..end,
safe_to_break: !info.unsafe_to_break(),
c: text[cluster..].chars().next().unwrap(),
span: ctx.spans.span_at(start),
});
} else {
// First, search for the end of the tofu sequence.
let k = i;
while infos.get(i + 1).map_or(false, |info| info.glyph_id == 0) {
i += 1;
}
// Then, determine the start and end text index for the tofu
// sequence.
//
// Examples:
// Everything is shown in visual order. Tofus are written as "_".
// We want to find out that the tofus span the text `2..6`.
// Note that the clusters are longer than 1 char.
//
// Left-to-right:
// Text: h a l i h a l l o
// Glyphs: A _ _ C E
// Clusters: 0 2 4 6 8
// k=1 i=2
//
// Right-to-left:
// Text: O L L A H I L A H
// Glyphs: E C _ _ A
// Clusters: 8 6 4 2 0
// k=2 i=3
let start = infos[if ltr { k } else { i }].cluster as usize;
let end = if ltr { i.checked_add(1) } else { k.checked_sub(1) }
.and_then(|last| infos.get(last))
.map_or(text.len(), |info| info.cluster as usize);
// Trim half-baked cluster.
let remove = base + range.start..base + range.end;
while ctx.glyphs.last().map_or(false, |g| remove.contains(&g.cluster)) {
let remove = base + start..base + end;
while ctx.glyphs.last().map_or(false, |g| remove.contains(&g.range.start)) {
ctx.glyphs.pop();
}
// Recursively shape the tofu sequence with the next family.
shape_segment(ctx, base + range.start, &text[range], families.clone());
shape_segment(ctx, base + start, &text[start..end], families.clone());
}
i += 1;
@ -634,19 +637,18 @@ fn shape_segment(
fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) {
let x_advance = font.advance(0).unwrap_or_default();
for (cluster, c) in text.char_indices() {
let cluster = base + cluster;
let (span, offset) = ctx.spans.span_at(ctx.base + cluster);
let start = base + cluster;
let end = start + c.len_utf8();
ctx.glyphs.push(ShapedGlyph {
font: font.clone(),
glyph_id: 0,
x_advance,
x_offset: Em::zero(),
y_offset: Em::zero(),
cluster,
range: start..end,
safe_to_break: true,
c,
span,
offset,
span: ctx.spans.span_at(start),
});
}
}
@ -668,7 +670,10 @@ fn track_and_space(ctx: &mut ShapingContext) {
glyph.x_advance = spacing.relative_to(glyph.x_advance);
}
if glyphs.peek().map_or(false, |next| glyph.cluster != next.cluster) {
if glyphs
.peek()
.map_or(false, |next| glyph.range.start != next.range.start)
{
glyph.x_advance += tracking;
}
}

View File

@ -1,7 +1,8 @@
//! Finished documents.
use std::fmt::{self, Debug, Formatter, Write};
use std::fmt::{self, Debug, Formatter};
use std::num::NonZeroUsize;
use std::ops::Range;
use std::str::FromStr;
use std::sync::Arc;
@ -114,23 +115,6 @@ impl Frame {
pub fn items(&self) -> std::slice::Iter<'_, (Point, FrameItem)> {
self.items.iter()
}
/// Approximately recover the text inside of the frame and its children.
pub fn text(&self) -> EcoString {
let mut text = EcoString::new();
for (_, item) in self.items() {
match item {
FrameItem::Text(item) => {
for glyph in &item.glyphs {
text.push(glyph.c);
}
}
FrameItem::Group(group) => text.push_str(&group.frame.text()),
_ => {}
}
}
text
}
}
/// Insert items and subframes.
@ -476,6 +460,8 @@ pub struct TextItem {
pub fill: Paint,
/// The natural language of the text.
pub lang: Lang,
/// The item's plain text.
pub text: EcoString,
/// The glyphs.
pub glyphs: Vec<Glyph>,
}
@ -489,19 +475,14 @@ impl TextItem {
impl Debug for TextItem {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
// This is only a rough approximation of the source text.
f.write_str("Text(\"")?;
for glyph in &self.glyphs {
for c in glyph.c.escape_debug() {
f.write_char(c)?;
}
}
f.write_str("\")")
f.write_str("Text(")?;
self.text.fmt(f)?;
f.write_str(")")
}
}
/// A glyph in a run of shaped text.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct Glyph {
/// The glyph's index in the font.
pub id: u16,
@ -509,12 +490,17 @@ pub struct Glyph {
pub x_advance: Em,
/// The horizontal offset of the glyph.
pub x_offset: Em,
/// The first character of the glyph's cluster.
pub c: char,
/// The range of the glyph in its item's text.
pub range: Range<u16>,
/// The source code location of the text.
pub span: Span,
/// The offset within the spanned text.
pub offset: u16,
pub span: (Span, u16),
}
impl Glyph {
/// The range of the glyph in its item's text.
pub fn range(&self) -> Range<usize> {
usize::from(self.range.start)..usize::from(self.range.end)
}
}
/// An identifier for a natural language.

View File

@ -1,13 +1,21 @@
use std::collections::BTreeMap;
use ecow::eco_format;
use ecow::{eco_format, EcoString};
use pdf_writer::types::{CidFontType, FontFlags, SystemInfo, UnicodeCmap};
use pdf_writer::{Filter, Finish, Name, Rect, Str};
use ttf_parser::{name_id, GlyphId, Tag};
use unicode_general_category::GeneralCategory;
use super::{deflate, EmExt, PdfContext, RefExt};
use crate::util::SliceExt;
const CMAP_NAME: Name = Name(b"Custom");
const SYSTEM_INFO: SystemInfo = SystemInfo {
registry: Str(b"Adobe"),
ordering: Str(b"Identity"),
supplement: 0,
};
/// Embed all used fonts into the PDF.
#[tracing::instrument(skip_all)]
pub fn write_fonts(ctx: &mut PdfContext) {
@ -19,7 +27,7 @@ pub fn write_fonts(ctx: &mut PdfContext) {
let data_ref = ctx.alloc.bump();
ctx.font_refs.push(type0_ref);
let glyphs = &ctx.glyph_sets[font];
let glyph_set = ctx.glyph_sets.get_mut(font).unwrap();
let metrics = font.metrics();
let ttf = font.ttf();
@ -29,12 +37,6 @@ pub fn write_fonts(ctx: &mut PdfContext) {
let base_font = eco_format!("ABCDEF+{}", postscript_name);
let base_font = Name(base_font.as_bytes());
let cmap_name = Name(b"Custom");
let system_info = SystemInfo {
registry: Str(b"Adobe"),
ordering: Str(b"Identity"),
supplement: 0,
};
// Write the base font object referencing the CID font.
ctx.writer
@ -59,7 +61,7 @@ pub fn write_fonts(ctx: &mut PdfContext) {
let mut cid = ctx.writer.cid_font(cid_ref);
cid.subtype(subtype);
cid.base_font(base_font);
cid.system_info(system_info);
cid.system_info(SYSTEM_INFO);
cid.font_descriptor(descriptor_ref);
cid.default_width(0.0);
@ -70,7 +72,7 @@ pub fn write_fonts(ctx: &mut PdfContext) {
// Extract the widths of all glyphs.
let num_glyphs = ttf.number_of_glyphs();
let mut widths = vec![0.0; num_glyphs as usize];
for &g in glyphs {
for &g in glyph_set.keys() {
let x = ttf.glyph_hor_advance(GlyphId(g)).unwrap_or(0);
widths[g as usize] = font.to_em(x).to_font_units();
}
@ -130,42 +132,15 @@ pub fn write_fonts(ctx: &mut PdfContext) {
font_descriptor.finish();
// Compute a reverse mapping from glyphs to unicode.
let cmap = {
let mut mapping = BTreeMap::new();
for subtable in
ttf.tables().cmap.into_iter().flat_map(|table| table.subtables)
{
if subtable.is_unicode() {
subtable.codepoints(|n| {
if let Some(c) = std::char::from_u32(n) {
if let Some(GlyphId(g)) = ttf.glyph_index(c) {
if glyphs.contains(&g) {
mapping.insert(g, c);
}
}
}
});
}
}
let mut cmap = UnicodeCmap::new(cmap_name, system_info);
for (g, c) in mapping {
cmap.pair(g, c);
}
cmap
};
// Write the /ToUnicode character map, which maps glyph ids back to
// unicode codepoints to enable copying out of the PDF.
ctx.writer
.cmap(cmap_ref, &deflate(&cmap.finish()))
.filter(Filter::FlateDecode);
let cmap = create_cmap(ttf, glyph_set);
ctx.writer.cmap(cmap_ref, &cmap.finish());
// Subset and write the font's bytes.
let data = font.data();
let subsetted = {
let glyphs: Vec<_> = glyphs.iter().copied().collect();
let glyphs: Vec<_> = glyph_set.keys().copied().collect();
let profile = subsetter::Profile::pdf(&glyphs);
subsetter::subset(data, font.index(), profile)
};
@ -183,3 +158,44 @@ pub fn write_fonts(ctx: &mut PdfContext) {
stream.finish();
}
}
/// Create a /ToUnicode CMap.
fn create_cmap(
ttf: &ttf_parser::Face,
glyph_set: &mut BTreeMap<u16, EcoString>,
) -> UnicodeCmap {
// For glyphs that have codepoints mapping to in the font's cmap table, we
// prefer them over pre-existing text mappings from the document. Only
// things that don't have a corresponding codepoint (or only a private-use
// one) like the "Th" in Linux Libertine get the text of their first
// occurances in the document instead.
for subtable in ttf.tables().cmap.into_iter().flat_map(|table| table.subtables) {
if !subtable.is_unicode() {
continue;
}
subtable.codepoints(|n| {
let Some(c) = std::char::from_u32(n) else { return };
if unicode_general_category::get_general_category(c)
== GeneralCategory::PrivateUse
{
return;
}
let Some(GlyphId(g)) = ttf.glyph_index(c) else { return };
if glyph_set.contains_key(&g) {
glyph_set.insert(g, c.into());
}
});
}
// Produce a reverse mapping from glyphs to unicode strings.
let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO);
for (&g, text) in glyph_set.iter() {
if !text.is_empty() {
cmap.pair_with_multiple(g, text.chars());
}
}
cmap
}

View File

@ -6,9 +6,10 @@ mod outline;
mod page;
use std::cmp::Eq;
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeMap, HashMap};
use std::hash::Hash;
use ecow::EcoString;
use pdf_writer::types::Direction;
use pdf_writer::{Finish, Name, PdfWriter, Ref, TextStr};
use xmp_writer::{LangId, RenditionClass, XmpWriter};
@ -52,7 +53,13 @@ pub struct PdfContext<'a> {
page_refs: Vec<Ref>,
font_map: Remapper<Font>,
image_map: Remapper<Image>,
glyph_sets: HashMap<Font, HashSet<u16>>,
/// For each font a mapping from used glyphs to their text representation.
/// May contain multiple chars in case of ligatures or similar things. The
/// same glyph can have a different text representation within one document,
/// then we just save the first one. The resulting strings are used for the
/// PDF's /ToUnicode map for glyphs that don't have an entry in the font's
/// cmap. This is important for copy-paste and searching.
glyph_sets: HashMap<Font, BTreeMap<u16, EcoString>>,
languages: HashMap<Lang, usize>,
}

View File

@ -364,11 +364,12 @@ fn write_group(ctx: &mut PageContext, pos: Point, group: &GroupItem) {
/// Encode a text run into the content stream.
fn write_text(ctx: &mut PageContext, x: f32, y: f32, text: &TextItem) {
*ctx.parent.languages.entry(text.lang).or_insert(0) += text.glyphs.len();
ctx.parent
.glyph_sets
.entry(text.font.clone())
.or_default()
.extend(text.glyphs.iter().map(|g| g.id));
let glyph_set = ctx.parent.glyph_sets.entry(text.font.clone()).or_default();
for g in &text.glyphs {
let segment = &text.text[g.range()];
glyph_set.entry(g.id).or_insert_with(|| segment.into());
}
ctx.set_fill(&text.fill);
ctx.set_font(&text.font, text.size);

View File

@ -67,7 +67,8 @@ pub fn jump_from_click(
FrameItem::Text(text) => {
for glyph in &text.glyphs {
if glyph.span.is_detached() {
let (span, span_offset) = glyph.span;
if span.is_detached() {
continue;
}
@ -77,13 +78,13 @@ pub fn jump_from_click(
Size::new(width, text.size),
click,
) {
let source = world.source(glyph.span.source());
let node = source.find(glyph.span)?;
let source = world.source(span.source());
let node = source.find(span)?;
let pos = if node.kind() == SyntaxKind::Text {
let range = node.range();
let mut offset = range.start + usize::from(glyph.offset);
let mut offset = range.start + usize::from(span_offset);
if (click.x - pos.x) > width / 2.0 {
offset += glyph.c.len_utf8();
offset += glyph.range().len();
}
offset.min(range.end)
} else {
@ -150,7 +151,7 @@ fn find_in_frame(frame: &Frame, span: Span) -> Option<Point> {
if let FrameItem::Text(text) = item {
for glyph in &text.glyphs {
if glyph.span == span {
if glyph.span.0 == span {
return Some(pos);
}
pos.x += glyph.x_advance.at(text.size);

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 2.5 KiB

View File

@ -353,9 +353,18 @@ fn test(
pdf_path: Option<&Path>,
args: &Args,
) -> bool {
let name = src_path.strip_prefix(TYP_DIR).unwrap_or(src_path);
struct PanicGuard<'a>(&'a Path);
impl Drop for PanicGuard<'_> {
fn drop(&mut self) {
if std::thread::panicking() {
println!("Panicked in {}", self.0.display());
}
}
}
let name = src_path.strip_prefix(TYP_DIR).unwrap_or(src_path);
let text = fs::read_to_string(src_path).unwrap();
let _guard = PanicGuard(name);
let mut output = String::new();
let mut ok = true;
@ -401,6 +410,7 @@ fn test(
line,
&mut rng,
);
ok &= part_ok;
compare_ever |= compare_here;
frames.extend(part_frames);

View File

@ -0,0 +1,8 @@
// Test copy-paste and search in PDF with ligatures
// and Arabic test. Must be tested manually!
---
The after fira 🏳️‍🌈!
#set text(lang: "ar", font: "Noto Sans Arabic")
مرحبًا