Fix subsetting for composite glyphs 🔨

This commit is contained in:
Laurenz 2019-03-03 18:36:56 +01:00
parent 06101492dc
commit d217d4f02a
2 changed files with 131 additions and 37 deletions

View File

@ -1,7 +1,7 @@
//! Font utility and subsetting.
use std::fmt;
use std::io::{self, Cursor};
use std::io::{self, Cursor, Seek, SeekFrom};
use std::collections::HashMap;
use byteorder::{BE, ReadBytesExt, WriteBytesExt};
use opentype::{OpenTypeReader, Outlines, TableRecord, Tag};
@ -39,7 +39,6 @@ impl Font {
{
let mut chars: Vec<char> = chars.into_iter().collect();
chars.sort();
let mut cursor = Cursor::new(&self.program);
let mut reader = OpenTypeReader::new(&mut cursor);
let outlines = reader.outlines()?;
@ -54,6 +53,7 @@ impl Font {
cmap: None,
hmtx: None,
loca: None,
glyphs: Vec::with_capacity(chars.len()),
chars,
records: Vec::new(),
body: Vec::new(),
@ -70,6 +70,7 @@ struct Subsetter<'p> {
cmap: Option<CharMap>,
hmtx: Option<HorizontalMetrics>,
loca: Option<Vec<u32>>,
glyphs: Vec<u16>,
// Subsetted font
chars: Vec<char>,
@ -84,6 +85,10 @@ impl<'p> Subsetter<'p> {
I1: IntoIterator<Item=S1>, S1: AsRef<str>,
I2: IntoIterator<Item=S2>, S2: AsRef<str>
{
// Find out which glyphs to include based on which characters we want
// and which glyphs are used by composition.
self.build_glyphs()?;
// Iterate through the needed tables first
for table in needed_tables.into_iter() {
let table = table.as_ref();
@ -116,6 +121,64 @@ impl<'p> Subsetter<'p> {
Ok((self.body, mapping))
}
fn build_glyphs(&mut self) -> SubsetResult<()> {
self.read_cmap()?;
let cmap = self.cmap.as_ref().unwrap();
for &c in &self.chars {
self.glyphs.push(take_char(cmap.get(c), c)?)
}
// Composite glyphs may need additional glyphs we have not yet in our list.
// So now we have a look at the glyf table to check that and add glyphs
// we need additionally.
if self.contains("glyf".parse().unwrap()) {
self.read_loca()?;
let loca = self.loca.as_ref().unwrap();
let table = self.get_table_data("glyf".parse().unwrap())?;
let mut i = 0;
while i < self.glyphs.len() {
let glyph = self.glyphs[i];
let start = *take_invalid(loca.get(glyph as usize))? as usize;
let end = *take_invalid(loca.get(glyph as usize + 1))? as usize;
let glyph = table.get(start..end).ok_or(SubsettingError::InvalidFont)?;
if end > start {
let mut cursor = Cursor::new(&glyph);
let num_contours = cursor.read_i16::<BE>()?;
// This is a composite glyph
if num_contours < 0 {
cursor.seek(SeekFrom::Current(8))?;
loop {
let flags = cursor.read_u16::<BE>()?;
let glyph_index = cursor.read_u16::<BE>()?;
if self.glyphs.iter().rev().find(|&&x| x == glyph_index).is_none() {
self.glyphs.push(glyph_index);
}
// This was the last component
if flags & 0x0020 == 0 {
break;
}
let args_len = if flags & 0x0001 == 1 { 4 } else { 2 };
cursor.seek(SeekFrom::Current(args_len))?;
}
}
}
i += 1;
}
}
Ok(())
}
fn write_header(&mut self) -> SubsetResult<()> {
// Create an output buffer
let header_len = 12 + self.records.len() * 16;
@ -165,7 +228,7 @@ impl<'p> Subsetter<'p> {
},
b"hhea" => {
let table = self.get_table_data(tag)?;
let glyph_count = self.chars.len() as u16;
let glyph_count = self.glyphs.len() as u16;
self.write_table_body(tag, |this| {
this.body.extend(&table[..table.len() - 2]);
Ok(this.body.write_u16::<BE>(glyph_count)?)
@ -173,7 +236,7 @@ impl<'p> Subsetter<'p> {
},
b"maxp" => {
let table = self.get_table_data(tag)?;
let glyph_count = self.chars.len() as u16;
let glyph_count = self.glyphs.len() as u16;
self.write_table_body(tag, |this| {
this.body.extend(&table[..4]);
this.body.write_u16::<BE>(glyph_count)?;
@ -182,14 +245,11 @@ impl<'p> Subsetter<'p> {
},
b"hmtx" => {
self.write_table_body(tag, |this| {
this.read_cmap()?;
this.read_hmtx()?;
let cmap = this.cmap.as_ref().unwrap();
let metrics = this.hmtx.as_ref().unwrap();
for &c in &this.chars {
let glyph_id = take(cmap.get(c), c)?;
let metrics = take(metrics.get(glyph_id), c)?;
for &glyph in &this.glyphs {
let metrics = take_invalid(metrics.get(glyph))?;
this.body.write_i16::<BE>(metrics.advance_width)?;
this.body.write_i16::<BE>(metrics.left_side_bearing)?;
@ -199,40 +259,70 @@ impl<'p> Subsetter<'p> {
},
b"loca" => {
self.write_table_body(tag, |this| {
this.read_cmap()?;
this.read_loca()?;
let cmap = this.cmap.as_ref().unwrap();
let loca = this.loca.as_ref().unwrap();
let mut offset = 0;
for &c in &this.chars {
for &glyph in &this.glyphs {
this.body.write_u32::<BE>(offset)?;
let glyph = take(cmap.get(c), c)? as usize;
let len = take(loca.get(glyph + 1), c)? - take(loca.get(glyph), c)?;
let len = take_invalid(loca.get(glyph as usize + 1))?
- take_invalid(loca.get(glyph as usize))?;
offset += len;
}
this.body.write_u32::<BE>(offset)?;
Ok(())
})
},
b"glyf" => {
self.write_table_body(tag, |this| {
let table = this.get_table_data(tag)?;
this.read_cmap()?;
this.read_loca()?;
let cmap = this.cmap.as_ref().unwrap();
let loca = this.loca.as_ref().unwrap();
let table = this.get_table_data(tag)?;
for &c in &this.chars {
let glyph = take(cmap.get(c), c)? as usize;
let start = *take(loca.get(glyph), c)? as usize;
let end = *take(loca.get(glyph + 1), c)? as usize;
let shapes = table.get(start..end).ok_or(SubsettingError::InvalidFont)?;
this.body.extend(shapes);
for &glyph in &this.glyphs {
let start = *take_invalid(loca.get(glyph as usize))? as usize;
let end = *take_invalid(loca.get(glyph as usize + 1))? as usize;
let mut data = table.get(start..end)
.ok_or(SubsettingError::InvalidFont)?.to_vec();
if end > start {
let mut cursor = Cursor::new(&mut data);
let num_contours = cursor.read_i16::<BE>()?;
// This is a composite glyph
if num_contours < 0 {
cursor.seek(SeekFrom::Current(8))?;
loop {
let flags = cursor.read_u16::<BE>()?;
let glyph_index = cursor.read_u16::<BE>()?;
let new_glyph_index = this.glyphs.iter()
.position(|&g| g == glyph_index)
.ok_or(SubsettingError::InvalidFont)? as u16;
cursor.seek(SeekFrom::Current(-2))?;
cursor.write_u16::<BE>(new_glyph_index)?;
// This was the last component
if flags & 0x0020 == 0 {
break;
}
let args_len = if flags & 0x0001 == 1 { 4 } else { 2 };
cursor.seek(SeekFrom::Current(args_len))?;
}
}
}
this.body.extend(data);
}
Ok(())
})
},
b"cmap" => {
// Always uses format 12 for simplicity
self.write_table_body(tag, |this| {
@ -365,21 +455,33 @@ fn calculate_check_sum(data: &[u8]) -> u32 {
}
/// Returns an error about a missing character or the wrapped data.
fn take<T>(opt: Option<T>, c: char) -> SubsetResult<T> {
opt.ok_or(SubsettingError::MissingCharacter(c))
fn take_char<T>(opt: Option<T>, character: char) -> SubsetResult<T> {
opt.ok_or(SubsettingError::MissingCharacter(character))
}
/// Returns an error about a missing glyph or the wrapped data.
fn take_invalid<T>(opt: Option<T>) -> SubsetResult<T> {
opt.ok_or(SubsettingError::InvalidFont)
}
type SubsetResult<T> = Result<T, SubsettingError>;
/// A failure when subsetting a font.
#[derive(Debug)]
pub enum SubsettingError {
/// A requested table was not present in the source font.
MissingTable(String),
/// The table is unknown to the engine (unimplemented or invalid).
UnsupportedTable(String),
/// A requested character was not present in the source.
MissingCharacter(char),
/// The font is invalid.
InvalidFont,
/// There was an error while parsing the font file.
FontError(opentype::Error),
/// A general I/O error.
IoError(io::Error),
}

View File

@ -346,16 +346,8 @@ mod pdf_tests {
");
}
// #[test]
// fn pdf_fix_1() {
// use unicode_normalization::UnicodeNormalization;
// let text = "Hello World! from Typeset‼";
// let chars = text.nfd().collect::<HashSet<char>>();
// // Create a subsetted pdf font.
// let data = std::fs::read("../fonts/NotoSans-Regular.ttf").unwrap();
// let font = PdfFont::new("NotoSans-Regular", data, chars).unwrap();
// std::fs::write("../target/NotoTest.ttf", font.data).unwrap();
// }
#[test]
fn pdf_composite_glyph() {
test("composite-glyph", "Composite character‼");
}
}