Fix embedding of CID-keyed fonts into PDF

Fixes #2032
Fixes #573
This commit is contained in:
Laurenz 2023-09-13 14:58:51 +02:00
parent f9feea3f29
commit dad7c88576
3 changed files with 52 additions and 21 deletions

View File

@ -37,14 +37,8 @@ pub fn write_fonts(ctx: &mut PdfContext) {
// Do we have a TrueType or CFF font?
//
// FIXME 1: CFF2 must be handled differently and requires PDF 2.0
// FIXME: CFF2 must be handled differently and requires PDF 2.0
// (or we have to convert it to CFF).
//
// FIXME 2: CFF fonts that have a Top DICT that uses CIDFont operators
// may not have an identity CID-GID encoding. These are currently not
// handled correctly. See also:
// - PDF Spec, Section 9.7.4.2
// - https://stackoverflow.com/questions/74165171/embedded-opentype-cff-font-in-a-pdf-shows-strange-behaviour-in-some-viewers
let is_cff = ttf
.raw_face()
.table(CFF)
@ -83,11 +77,15 @@ pub fn write_fonts(ctx: &mut PdfContext) {
}
// Extract the widths of all glyphs.
let num_glyphs = ttf.number_of_glyphs();
let mut widths = vec![0.0; num_glyphs as usize];
for g in std::iter::once(0).chain(glyph_set.keys().copied()) {
let x = ttf.glyph_hor_advance(GlyphId(g)).unwrap_or(0);
widths[g as usize] = font.to_em(x).to_font_units();
let mut widths = vec![];
for gid in std::iter::once(0).chain(glyph_set.keys().copied()) {
let width = ttf.glyph_hor_advance(GlyphId(gid)).unwrap_or(0);
let units = font.to_em(width).to_font_units();
let cid = glyph_cid(font, gid);
if usize::from(cid) >= widths.len() {
widths.resize(usize::from(cid) + 1, 0.0);
widths[usize::from(cid)] = units;
}
}
// Write all non-zero glyph widths.
@ -203,8 +201,8 @@ fn create_cmap(
ttf: &ttf_parser::Face,
glyph_set: &mut BTreeMap<u16, EcoString>,
) -> UnicodeCmap {
// For glyphs that have codepoints mapping to in the font's cmap table, we
// prefer them over pre-existing text mappings from the document. Only
// For glyphs that have codepoints mapping to them in the font's cmap table,
// we prefer them over pre-existing text mappings from the document. Only
// things that don't have a corresponding codepoint (or only a private-use
// one) like the "Th" in Linux Libertine get the text of their first
// occurrences in the document instead.
@ -236,3 +234,35 @@ fn create_cmap(
cmap
}
/// Get the CID for a glyph id.
///
/// When writing text into a PDF, we have to specify CIDs (character ids) not
/// GIDs (glyph IDs).
///
/// Most of the time, the mapping between these two is an identity mapping. In
/// particular, for TrueType fonts, the mapping is an identity mapping because
/// of this line above:
/// ```ignore
/// cid.cid_to_gid_map_predefined(Name(b"Identity"));
/// ```
///
/// However, CID-keyed CFF fonts may have a non-identity mapping defined in
/// their charset. For those, we must map the glyph IDs in a `TextItem` to CIDs.
/// The font defines the map through its charset. The charset usually maps
/// glyphs to SIDs (string ids) specifying the glyph's name. Not for CID-keyed
/// fonts though! For these, the SIDs are CIDs in disguise. Relevant quote from
/// the CFF spec:
///
/// > The charset data, although in the same format as non-CIDFonts, will
/// > represent CIDs rather than SIDs, [...]
///
/// This function performs the mapping from glyph ID to CID. It also works for
/// non CID-keyed fonts. Then, it will simply return the glyph ID.
pub(super) fn glyph_cid(font: &Font, glyph_id: u16) -> u16 {
font.ttf()
.tables()
.cff
.and_then(|cff| cff.glyph_cid(ttf_parser::GlyphId(glyph_id)))
.unwrap_or(glyph_id)
}

View File

@ -253,8 +253,8 @@ where
});
}
fn map(&self, item: T) -> usize {
self.to_pdf[&item]
fn map(&self, item: &T) -> usize {
self.to_pdf[item]
}
fn pdf_indices<'a>(

View File

@ -236,7 +236,7 @@ impl PageContext<'_, '_> {
let current_state = self.state.external_graphics_state.as_ref();
if current_state != Some(graphics_state) {
self.parent.ext_gs_map.insert(*graphics_state);
let name = eco_format!("Gs{}", self.parent.ext_gs_map.map(*graphics_state));
let name = eco_format!("Gs{}", self.parent.ext_gs_map.map(graphics_state));
self.content.set_parameters(Name(name.as_bytes()));
if graphics_state.uses_opacities() {
@ -288,7 +288,7 @@ impl PageContext<'_, '_> {
fn set_font(&mut self, font: &Font, size: Abs) {
if self.state.font.as_ref().map(|(f, s)| (f, *s)) != Some((font, size)) {
self.parent.font_map.insert(font.clone());
let name = eco_format!("F{}", self.parent.font_map.map(font.clone()));
let name = eco_format!("F{}", self.parent.font_map.map(font));
self.content.set_font(Name(name.as_bytes()), size.to_f32());
self.state.font = Some((font.clone(), size));
}
@ -472,8 +472,9 @@ fn write_text(ctx: &mut PageContext, x: f32, y: f32, text: &TextItem) {
adjustment = Em::zero();
}
encoded.push((glyph.id >> 8) as u8);
encoded.push((glyph.id & 0xff) as u8);
let cid = super::font::glyph_cid(&text.font, glyph.id);
encoded.push((cid >> 8) as u8);
encoded.push((cid & 0xff) as u8);
if let Some(advance) = text.font.advance(glyph.id) {
adjustment += glyph.x_advance - advance;
@ -568,7 +569,7 @@ fn write_path(ctx: &mut PageContext, x: f32, y: f32, path: &geom::Path) {
/// Encode a vector or raster image into the content stream.
fn write_image(ctx: &mut PageContext, x: f32, y: f32, image: &Image, size: Size) {
ctx.parent.image_map.insert(image.clone());
let name = eco_format!("Im{}", ctx.parent.image_map.map(image.clone()));
let name = eco_format!("Im{}", ctx.parent.image_map.map(image));
let w = size.x.to_f32();
let h = size.y.to_f32();
ctx.content.save_state();