Improve subsetter testing ✔

This commit is contained in:
Laurenz Mädje 2019-07-27 20:21:44 +02:00
parent 7eec0b8dd7
commit 6c8b5caa9f
7 changed files with 257 additions and 190 deletions

View File

@ -1,7 +1,7 @@
use std::env;
use std::error::Error;
use std::fs::File;
use std::io::Read;
use std::io::{Read, BufWriter};
use std::path::{Path, PathBuf};
use std::process;
@ -19,15 +19,11 @@ fn main() {
/// The actual main function.
fn run() -> Result<(), Box<Error>> {
// Check the command line arguments.
let args: Vec<String> = env::args().collect();
if args.len() < 2 || args.len() > 3 {
help_and_quit();
}
// Open the input file.
let mut file = File::open(&args[1]).map_err(|_| "failed to open source file")?;
let source_path = Path::new(&args[1]);
// Compute the output filename from the input filename by replacing the extension.
@ -39,14 +35,13 @@ fn run() -> Result<(), Box<Error>> {
PathBuf::from(&args[2])
};
// We do not want to overwrite the source file.
if dest_path == source_path {
return Err("source and destination path are the same".into());
}
// Read the input file.
let mut src = String::new();
file.read_to_string(&mut src).map_err(|_| "failed to read from source file")?;
let mut source_file = File::open(source_path).map_err(|_| "failed to open source file")?;
source_file.read_to_string(&mut src).map_err(|_| "failed to read from source file")?;
// Create a typesetter with a font provider that provides the default fonts.
let mut typesetter = Typesetter::new();
@ -71,15 +66,15 @@ fn run() -> Result<(), Box<Error>> {
// Export the document into a PDF file.
let exporter = PdfExporter::new();
let output_file = File::create(&dest_path)?;
exporter.export(&document, output_file)?;
let dest_file = File::create(&dest_path)?;
exporter.export(&document, BufWriter::new(dest_file))?;
Ok(())
}
/// Print a usage message and quit.
fn help_and_quit() {
let name = env::args().next().unwrap_or("typeset".to_string());
let name = env::args().next().unwrap_or("typst".to_string());
println!("usage: {} source [destination]", name);
process::exit(0);
}

View File

@ -42,7 +42,7 @@ struct PdfEngine<'d, W: Write> {
}
/// Offsets for the various groups of ids.
#[derive(Debug, Copy, Clone)]
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
struct Offsets {
catalog: Ref,
page_tree: Ref,
@ -67,8 +67,7 @@ impl<'d, W: Write> PdfEngine<'d, W> {
let mut font = 0usize;
let mut chars = vec![HashSet::new(); doc.fonts.len()];
// Iterate through every text object on every page and find out which characters they
// use.
// Find out which characters are used for each font.
for page in &doc.pages {
for action in &page.actions {
match action {
@ -141,16 +140,14 @@ impl<'d, W: Write> PdfEngine<'d, W> {
/// Write the content of a page.
fn write_page(&mut self, id: u32, page: &DocPage) -> PdfResult<()> {
// The currently used font.
let mut text = Text::new();
let mut active_font = (std::usize::MAX, 0.0);
// The last set position and font, these get flushed when content is written.
// The last set position and font,
// these only get flushed lazily when content is written.
let mut next_pos = Some(Size2D::zero());
let mut next_font = None;
// The output text.
let mut text = Text::new();
for action in &page.actions {
match action {
LayoutAction::MoveAbsolute(pos) => next_pos = Some(*pos),
@ -174,7 +171,7 @@ impl<'d, W: Write> PdfEngine<'d, W> {
}
// Write the text.
text.tj(self.fonts[active_font.0].encode(&string));
text.tj(self.fonts[active_font.0].encode_text(&string));
},
}
}
@ -227,7 +224,7 @@ impl<'d, W: Write> PdfEngine<'d, W> {
.font_file_2(id + 4)
)?;
// The CMap, which maps glyphs to unicode codepoints.
// Write the CMap, which maps glyphs to unicode codepoints.
let mapping = font.font.mapping.iter().map(|(&c, &cid)| (cid, c));
self.writer.write_obj(id + 3, &CMap::new("Custom", system_info, mapping))?;
@ -261,15 +258,14 @@ struct PdfFont {
}
impl PdfFont {
/// Create a subetted version of the font and calculate some information needed for creating the
/// _PDF_.
/// Create a subetted version of the font and calculate some information
/// needed for creating the _PDF_.
fn new(font: &Font, chars: &HashSet<char>) -> PdfResult<PdfFont> {
/// Convert a size into a _PDF_ glyph unit.
fn size_to_glyph_unit(size: Size) -> GlyphUnit {
(1000.0 * size.to_pt()).round() as GlyphUnit
}
// Subset the font using the selected characters.
let subset_result = font.subsetted(
chars.iter().cloned(),
&["head", "hhea", "hmtx", "maxp", "cmap", "cvt ", "fpgm", "prep", "loca", "glyf"][..]
@ -283,7 +279,6 @@ impl PdfFont {
Err(err) => return Err(err.into()),
};
// Specify flags for the font.
let mut flags = FontFlags::empty();
flags.set(FontFlags::FIXED_PITCH, font.metrics.monospace);
flags.set(FontFlags::SERIF, font.name.contains("Serif"));
@ -291,7 +286,6 @@ impl PdfFont {
flags.set(FontFlags::ITALIC, font.metrics.italic);
flags.insert(FontFlags::SMALL_CAP);
// Transform the widths.
let widths = subsetted.widths.iter().map(|&x| size_to_glyph_unit(x)).collect();
Ok(PdfFont {

View File

@ -1,4 +1,4 @@
//! Loads fonts matching queries.
//! Loading of fonts matching queries.
use std::cell::{RefCell, Ref};
use std::collections::HashMap;
@ -12,7 +12,7 @@ pub struct FontLoader<'p> {
/// The font providers.
providers: Vec<&'p (dyn FontProvider + 'p)>,
/// The fonts available from each provider (indexed like `providers`).
provider_fonts: Vec<&'p [FontInfo]>,
infos: Vec<&'p [FontInfo]>,
/// The internal state. Uses interior mutability because the loader works behind
/// an immutable reference to ease usage.
state: RefCell<FontLoaderState<'p>>,
@ -20,29 +20,29 @@ pub struct FontLoader<'p> {
/// Internal state of the font loader (seperated to wrap it in a `RefCell`).
struct FontLoaderState<'p> {
/// The loaded fonts alongside their external indices. Some fonts may not have external indices
/// because they were loaded but did not contain the required character. However, these are
/// still stored because they may be needed later. The index is just set to `None` then.
/// The loaded fonts alongside their external indices. Some fonts may not
/// have external indices because they were loaded but did not contain the
/// required character. However, these are still stored because they may
/// be needed later. The index is just set to `None` then.
fonts: Vec<(Option<usize>, Font)>,
/// Allows to retrieve a font (index) quickly if a query was submitted before.
query_cache: HashMap<FontQuery, usize>,
/// Allows to re-retrieve loaded fonts by their info instead of loading them again.
info_cache: HashMap<&'p FontInfo, usize>,
/// Indexed by external indices (the ones inside the tuples in the `fonts` vector) and maps to
/// internal indices (the actual indices into the vector).
/// Indexed by external indices (the ones inside the tuples in the `fonts` vector)
/// and maps to internal indices (the actual indices into the vector).
inner_index: Vec<usize>,
}
impl<'p> FontLoader<'p> {
/// Create a new font loader using a set of providers.
#[inline]
pub fn new<P: 'p>(providers: &'p [P]) -> FontLoader<'p> where P: AsRef<dyn FontProvider + 'p> {
let providers: Vec<_> = providers.iter().map(|p| p.as_ref()).collect();
let provider_fonts = providers.iter().map(|prov| prov.available()).collect();
let infos = providers.iter().map(|prov| prov.available()).collect();
FontLoader {
providers,
provider_fonts,
infos,
state: RefCell::new(FontLoaderState {
query_cache: HashMap::new(),
info_cache: HashMap::new(),
@ -66,26 +66,24 @@ impl<'p> FontLoader<'p> {
}
drop(state);
// The outermost loop goes over the fallbacks because we want to serve the font that matches
// the first possible class.
// The outermost loop goes over the fallbacks because we want to serve the
// font that matches the first possible class.
for class in &query.fallback {
// For each class now go over all font infos from all font providers.
for (provider, infos) in self.providers.iter().zip(&self.provider_fonts) {
// For each class now go over all fonts from all font providers.
for (provider, infos) in self.providers.iter().zip(&self.infos) {
for info in infos.iter() {
let matches = info.classes.contains(class)
&& query.classes.iter().all(|class| info.classes.contains(class));
let viable = info.classes.contains(class);
let matches = viable && query.classes.iter()
.all(|class| info.classes.contains(class));
// Proceed only if this font matches the query up to now.
if matches {
let mut state = self.state.borrow_mut();
// Check if we have already loaded this font before, otherwise, we will load
// it from the provider. Anyway, have it stored and find out its internal
// index.
// Check if we have already loaded this font before, otherwise,
// we will load it from the provider.
let index = if let Some(&index) = state.info_cache.get(info) {
index
} else if let Some(mut source) = provider.get(info) {
// Read the font program into a vector and parse it.
let mut program = Vec::new();
source.read_to_end(&mut program).ok()?;
let font = Font::new(program).ok()?;
@ -107,8 +105,8 @@ impl<'p> FontLoader<'p> {
// This font is suitable, thus we cache the query result.
state.query_cache.insert(query, index);
// Now we have to find out the external index of it or assign a new one
// if it has none.
// Now we have to find out the external index of it or assign
// a new one if it has none.
let external_index = state.fonts[index].0.unwrap_or_else(|| {
// We have to assign an external index before serving.
let new_index = state.inner_index.len();
@ -133,7 +131,8 @@ impl<'p> FontLoader<'p> {
None
}
/// Return the font previously loaded at this index. Panics if the index is not assigned.
/// Return the font previously loaded at this index.
/// Panics if the index is not assigned.
#[inline]
pub fn get_with_index(&self, index: usize) -> Ref<Font> {
let state = self.state.borrow();
@ -143,9 +142,9 @@ impl<'p> FontLoader<'p> {
/// Move the whole list of fonts out.
pub fn into_fonts(self) -> Vec<Font> {
// Sort the fonts by external index so that they are in the correct order. All fonts that
// were cached but not used by the outside are sorted to the back and are removed in the
// next step.
// Sort the fonts by external index so that they are in the correct order.
// All fonts that were cached but not used by the outside are sorted to the back
// and are removed in the next step.
let mut fonts = self.state.into_inner().fonts;
fonts.sort_by_key(|&(maybe_index, _)| match maybe_index {
Some(index) => index,
@ -164,7 +163,7 @@ impl Debug for FontLoader<'_> {
let state = self.state.borrow();
f.debug_struct("FontLoader")
.field("providers", &self.providers.len())
.field("provider_fonts", &self.provider_fonts)
.field("infos", &self.infos)
.field("fonts", &state.fonts)
.field("query_cache", &state.query_cache)
.field("info_cache", &state.info_cache)
@ -180,7 +179,6 @@ pub struct FontQuery {
pub character: char,
/// Which classes the font has to be part of.
pub classes: Vec<FontClass>,
/// A sequence of classes. The font matching the leftmost class in this sequence
/// should be returned.
/// The font matching the leftmost class in this sequence should be returned.
pub fallback: Vec<FontClass>,
}

View File

@ -1,7 +1,7 @@
//! Font loading and transforming.
//! Font loading and subsetting.
//!
//! # Font handling
//! To do the typesetting, the typesetting engine needs font data. To be highly portable the engine
//! To do the typesetting, the engine needs font data. However, to be highly portable the engine
//! itself assumes nothing about the environment. To still work with fonts, the consumer of this
//! library has to add _font providers_ to their typesetting instance. These can be queried for font
//! data given flexible font filters specifying required font families and styles. A font provider
@ -19,28 +19,29 @@ use opentype::{Error as OpentypeError, OpenTypeReader};
use opentype::tables::{Header, Name, CharMap, HorizontalMetrics, Post, OS2};
use opentype::types::{MacStyleFlags, NameEntry};
pub use self::loader::{FontLoader, FontQuery};
use self::subset::Subsetter;
use crate::size::Size;
mod loader;
mod subset;
pub use loader::{FontLoader, FontQuery};
/// A loaded and parsed font program.
/// A parsed _OpenType_ font program.
#[derive(Debug, Clone)]
pub struct Font {
/// The base name of the font.
/// The name of the font.
pub name: String,
/// The raw bytes of the font program.
/// The complete, raw bytes of the font program.
pub program: Vec<u8>,
/// A mapping from character codes to glyph ids.
/// The mapping from character codes to glyph ids.
pub mapping: HashMap<char, u16>,
/// The widths of the glyphs indexed by glyph id.
pub widths: Vec<Size>,
/// The fallback glyph.
/// The id of the fallback glyph.
pub default_glyph: u16,
/// The typesetting-relevant metrics of this font.
/// The typesetting or exporting-relevant metrics of this font.
pub metrics: FontMetrics,
}
@ -51,32 +52,31 @@ pub struct FontMetrics {
pub italic: bool,
/// Whether font is monospace.
pub monospace: bool,
/// The angle of text in italics.
/// The angle of text in italics (in counter-clockwise degrees from vertical).
pub italic_angle: f32,
/// The glyph bounding box: [x_min, y_min, x_max, y_max],
/// The extremal values [x_min, y_min, x_max, y_max] for all glyph bounding boxes.
pub bounding_box: [Size; 4],
/// The typographics ascender.
/// The typographic ascender.
pub ascender: Size,
/// The typographics descender.
/// The typographic descender.
pub descender: Size,
/// The approximate height of capital letters.
pub cap_height: Size,
/// The weight class of the font.
/// The weight class of the font (from 100 for thin to 900 for heavy).
pub weight_class: u16,
}
impl Font {
/// Create a new font from a raw font program.
/// Create a `Font` from a raw font program.
pub fn new(program: Vec<u8>) -> FontResult<Font> {
// Create an OpentypeReader to parse the font tables.
let cursor = Cursor::new(&program);
let mut reader = OpenTypeReader::new(cursor);
// Read the relevant tables
// (all of these are required by the OpenType specification, so we expect them).
// All of these tables are required by the OpenType specification,
// so we do not really have to handle the case that they are missing.
let head = reader.read_table::<Header>()?;
let name = reader.read_table::<Name>()?;
let os2 = reader.read_table::<OS2>()?;
let os2 = reader.read_table::<OS2>()?;
let cmap = reader.read_table::<CharMap>()?;
let hmtx = reader.read_table::<HorizontalMetrics>()?;
let post = reader.read_table::<Post>()?;
@ -85,15 +85,13 @@ impl Font {
let font_unit_ratio = 1.0 / (head.units_per_em as f32);
let font_unit_to_size = |x| Size::pt(font_unit_ratio * x);
// Find out the name of the font.
let font_name = name.get_decoded(NameEntry::PostScriptName)
let font_name = name
.get_decoded(NameEntry::PostScriptName)
.unwrap_or_else(|| "unknown".to_owned());
// Convert the widths from font units to sizes.
let widths = hmtx.metrics.iter()
.map(|m| font_unit_to_size(m.advance_width as f32)).collect();
// Calculate the typesetting-relevant metrics.
let metrics = FontMetrics {
italic: head.mac_style.contains(MacStyleFlags::ITALIC),
monospace: post.is_fixed_pitch,
@ -120,51 +118,82 @@ impl Font {
})
}
/// Map a character to it's glyph index.
/// Encode a character into it's glyph id.
#[inline]
pub fn map(&self, c: char) -> u16 {
self.mapping.get(&c).map(|&g| g).unwrap_or(self.default_glyph)
pub fn encode(&self, character: char) -> u16 {
self.mapping.get(&character).map(|&g| g).unwrap_or(self.default_glyph)
}
/// Encode the given text for this font (into glyph ids).
/// Encode the given text into a vector of glyph ids.
#[inline]
pub fn encode(&self, text: &str) -> Vec<u8> {
// Each glyph id takes two bytes that we encode in big endian.
let mut bytes = Vec::with_capacity(2 * text.len());
for glyph in text.chars().map(|c| self.map(c)) {
pub fn encode_text(&self, text: &str) -> Vec<u8> {
const BYTES_PER_GLYPH: usize = 2;
let mut bytes = Vec::with_capacity(BYTES_PER_GLYPH * text.len());
for c in text.chars() {
let glyph = self.encode(c);
bytes.push((glyph >> 8) as u8);
bytes.push((glyph & 0xff) as u8);
}
bytes
}
/// Generate a subsetted version of this font including only the chars listed in `chars`.
/// Generate a subsetted version of this font.
///
/// The filter functions decides which tables to keep and which not based on their tag.
/// This version includes only the given `chars` and _OpenType_ `tables`.
#[inline]
pub fn subsetted<C, I, S>(&self, chars: C, tables: I) -> Result<Font, FontError>
where C: IntoIterator<Item=char>, I: IntoIterator<Item=S>, S: AsRef<str> {
where
C: IntoIterator<Item=char>,
I: IntoIterator<Item=S>,
S: AsRef<str>
{
Subsetter::subset(self, chars, tables)
}
}
/// Categorizes a font.
/// A type that provides fonts.
pub trait FontProvider {
/// Returns a font with the given info if this provider has one.
fn get(&self, info: &FontInfo) -> Option<Box<dyn FontData>>;
/// The available fonts this provider can serve. While these should generally
/// be retrievable through the `get` method, this is not guaranteed.
fn available<'p>(&'p self) -> &'p [FontInfo];
}
/// A wrapper trait around `Read + Seek`.
///
/// Can be constructed conveniently with the [`font`] macro.
/// This type is needed because currently you can't make a trait object with two traits, like
/// `Box<dyn Read + Seek>`. Automatically implemented for all types that are [`Read`] and [`Seek`].
pub trait FontData: Read + Seek {}
impl<T> FontData for T where T: Read + Seek {}
/// Classifies a font by listing the font classes it is part of.
///
/// All fonts with the same [`FontInfo`] are part of the same intersection
/// of [font classes](FontClass).
///
/// This structure can be constructed conveniently through the [`font`] macro.
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct FontInfo {
/// The font families this font is part of.
/// The font classes this font is part of.
pub classes: Vec<FontClass>,
}
impl FontInfo {
/// Create a new font info from an iterator of classes.
/// Create a new font info from a collection of classes.
#[inline]
pub fn new<I>(classes: I) -> FontInfo where I: IntoIterator<Item=FontClass> {
FontInfo { classes: classes.into_iter().collect() }
FontInfo {
classes: classes.into_iter().collect()
}
}
}
/// A class of fonts.
///
/// The set of all fonts can be classified into subsets of font classes like
/// _serif_ or _bold_. This enum lists such subclasses.
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub enum FontClass {
Serif,
@ -183,27 +212,18 @@ pub enum FontClass {
/// into custom `Family`-variants and others can be named directly.
///
/// # Examples
/// The font _Noto Sans_ in regular typeface.
/// ```
/// # use typeset::font;
/// // Noto Sans in regular typeface.
/// font!["NotoSans", "Noto", Regular, SansSerif];
/// ```
///
/// The font _Noto Serif_ in italics and boldface.
/// ```
/// # use typeset::font;
/// // Noto Serif in italics and boldface.
/// font!["NotoSerif", "Noto", Bold, Italic, Serif];
/// ```
///
/// The font _Arial_ in italics.
/// ```
/// # use typeset::font;
/// // Arial in italics.
/// font!["Arial", Italic, SansSerif];
/// ```
///
/// The font _Noto Emoji_, which works with all base families. 🙂
/// ```
/// # use typeset::font;
/// // Noto Emoji, which works in sans-serif and serif contexts.
/// font!["NotoEmoji", "Noto", Regular, SansSerif, Serif, Monospace];
/// ```
#[macro_export]
@ -229,37 +249,21 @@ macro_rules! font {
}};
}
/// A type that provides fonts.
pub trait FontProvider {
/// Returns a font with the given info if this provider has one.
fn get(&self, info: &FontInfo) -> Option<Box<dyn FontData>>;
/// The available fonts this provider can serve. While these should generally be retrievable
/// through the `get` method, it does not have to be guaranteed that a font info, that is
/// contained, here yields a `Some` value when passed into `get`.
fn available<'p>(&'p self) -> &'p [FontInfo];
}
/// A wrapper trait around `Read + Seek`.
///
/// This type is needed because currently you can't make a trait object with two traits, like
/// `Box<dyn Read + Seek>`. Automatically implemented for all types that are [`Read`] and [`Seek`].
pub trait FontData: Read + Seek {}
impl<T> FontData for T where T: Read + Seek {}
/// A font provider serving fonts from a folder on the local file system.
#[derive(Debug)]
pub struct FileSystemFontProvider {
/// The root folder.
/// The base folder all other paths are relative to.
base: PathBuf,
/// Paths of the fonts relative to the `base` path.
paths: Vec<PathBuf>,
/// The information for the font with the same index in `paths`.
/// The info for the font with the same index in `paths`.
infos: Vec<FontInfo>,
}
impl FileSystemFontProvider {
/// Create a new provider from a folder and an iterator of pairs of font paths and font infos.
/// Create a new provider serving fonts from a base path. The `fonts` iterator
/// should contain paths of fonts relative to the base alongside matching
/// infos for these fonts.
///
/// # Example
/// Serve the two fonts `NotoSans-Regular` and `NotoSans-Italic` from the local folder
@ -271,21 +275,20 @@ impl FileSystemFontProvider {
/// ("NotoSans-Italic.ttf", font!["NotoSans", Italic, SansSerif]),
/// ]);
/// ```
#[inline]
pub fn new<B, I, P>(base: B, infos: I) -> FileSystemFontProvider
pub fn new<B, I, P>(base: B, fonts: I) -> FileSystemFontProvider
where
B: Into<PathBuf>,
I: IntoIterator<Item = (P, FontInfo)>,
P: Into<PathBuf>,
{
// Find out how long the iterator is at least, to reserve the correct capacity for the
// vectors.
let iter = infos.into_iter();
let min = iter.size_hint().0;
let iter = fonts.into_iter();
// Split the iterator into two seperated vectors.
// Find out how long the iterator is at least, to reserve the correct
// capacity for the vectors.
let min = iter.size_hint().0;
let mut paths = Vec::with_capacity(min);
let mut infos = Vec::with_capacity(min);
for (path, info) in iter {
paths.push(path.into());
infos.push(info);
@ -302,12 +305,10 @@ impl FileSystemFontProvider {
impl FontProvider for FileSystemFontProvider {
#[inline]
fn get(&self, info: &FontInfo) -> Option<Box<dyn FontData>> {
// Find the index of the font in both arrays (early exit if there is no match).
let index = self.infos.iter().position(|i| i == info)?;
// Open the file and return a boxed reader operating on it.
let index = self.infos.iter().position(|c| c == info)?;
let path = &self.paths[index];
let file = File::open(self.base.join(path)).ok()?;
let full_path = self.base.join(path);
let file = File::open(full_path).ok()?;
Some(Box::new(BufReader::new(file)) as Box<FontData>)
}
@ -317,13 +318,14 @@ impl FontProvider for FileSystemFontProvider {
}
}
/// The error type for font operations.
pub enum FontError {
/// The font file is incorrect.
InvalidFont(String),
/// A character requested for subsetting was not present in the source font.
MissingCharacter(char),
/// A requested table was not present.
/// A requested or required table was not present.
MissingTable(String),
/// The table is unknown to the subsetting engine.
UnsupportedTable(String),

View File

@ -30,13 +30,15 @@ pub struct Subsetter<'a> {
impl<'a> Subsetter<'a> {
/// Subset a font. See [`Font::subetted`] for more details.
pub fn subset<C, I, S>(font: &Font, chars: C, tables: I) -> Result<Font, FontError>
where C: IntoIterator<Item=char>, I: IntoIterator<Item=S>, S: AsRef<str> {
// Parse some header information.
where
C: IntoIterator<Item=char>,
I: IntoIterator<Item=S>,
S: AsRef<str>
{
let mut reader = OpenTypeReader::from_slice(&font.program);
let outlines = reader.outlines()?;
let table_records = reader.tables()?.to_vec();
// Store all chars we want in a vector.
let chars: Vec<_> = chars.into_iter().collect();
let subsetter = Subsetter {
@ -64,7 +66,7 @@ impl<'a> Subsetter<'a> {
// which glyphs are additionally used by composite glyphs.
self.find_glyphs()?;
// Write all the tables the callee wants.
// Copy/subset all the tables the caller wants.
for table in tables.into_iter() {
let tag = table.as_ref().parse()
.map_err(|_| FontError::UnsupportedTable(table.as_ref().to_string()))?;
@ -91,20 +93,19 @@ impl<'a> Subsetter<'a> {
/// Store all glyphs the subset shall contain into `self.glyphs`.
fn find_glyphs(&mut self) -> FontResult<()> {
if self.outlines == Outlines::TrueType {
// Parse the necessary information.
let char_map = self.read_table::<CharMap>()?;
let glyf = self.read_table::<Glyphs>()?;
// Add the default glyph at index 0 in any case.
// The default glyph should always be at index 0.
self.glyphs.push(self.font.default_glyph);
// Add all the glyphs for the chars requested.
for &c in &self.chars {
let glyph = char_map.get(c).ok_or_else(|| FontError::MissingCharacter(c))?;
self.glyphs.push(glyph);
}
// Collect the composite glyphs.
// Collect the glyphs not used mapping from characters but used in
// composite glyphs, too.
let mut i = 0;
while i < self.glyphs.len() as u16 {
let glyph_id = self.glyphs[i as usize];
@ -115,6 +116,7 @@ impl<'a> Subsetter<'a> {
self.glyphs.push(composite);
}
}
i += 1;
}
} else {
@ -127,13 +129,13 @@ impl<'a> Subsetter<'a> {
/// Prepend the new header to the constructed body.
fn write_header(&mut self) -> FontResult<()> {
// Create an output buffer
let header_len = 12 + self.records.len() * 16;
const BASE_HEADER_LEN: usize = 12;
const TABLE_RECORD_LEN: usize = 16;
let header_len = BASE_HEADER_LEN + self.records.len() * TABLE_RECORD_LEN;
let mut header = Vec::with_capacity(header_len);
// Compute the first four header entries.
let num_tables = self.records.len() as u16;
// The highester power lower than the table count.
let mut max_power = 1u16;
while max_power * 2 <= num_tables {
max_power *= 2;
@ -144,7 +146,7 @@ impl<'a> Subsetter<'a> {
let entry_selector = (max_power as f32).log2() as u16;
let range_shift = num_tables * 16 - search_range;
// Write the base header
// Write the base OpenType header
header.write_u32::<BE>(match self.outlines {
Outlines::TrueType => 0x00010000,
Outlines::CFF => 0x4f54544f,
@ -169,7 +171,7 @@ impl<'a> Subsetter<'a> {
Ok(())
}
/// Compute the new widths.
/// Compute the new subsetted widths vector.
fn compute_widths(&self) -> FontResult<Vec<Size>> {
let mut widths = Vec::with_capacity(self.glyphs.len());
for &glyph in &self.glyphs {
@ -180,11 +182,12 @@ impl<'a> Subsetter<'a> {
Ok(widths)
}
/// Compute the new mapping.
/// Compute the new character to glyph id mapping.
fn compute_mapping(&self) -> HashMap<char, u16> {
// The mapping is basically just the index in the char vector, but we add one
// The mapping is basically just the index into the char vector, but we add one
// to each index here because we added the default glyph to the front.
self.chars.iter().enumerate().map(|(i, &c)| (c, 1 + i as u16))
self.chars.iter().enumerate()
.map(|(i, &c)| (c, 1 + i as u16))
.collect::<HashMap<char, u16>>()
}
@ -192,13 +195,14 @@ impl<'a> Subsetter<'a> {
fn subset_table(&mut self, tag: Tag) -> FontResult<()> {
match tag.value() {
// These tables can just be copied.
b"head" | b"name" | b"OS/2" | b"post" |
b"head" | b"name" | b"OS/2" |
b"cvt " | b"fpgm" | b"prep" | b"gasp" => self.copy_table(tag),
// These tables have more complex subsetting routines.
b"hhea" => self.subset_hhea(),
b"hmtx" => self.subset_hmtx(),
b"maxp" => self.subset_maxp(),
b"post" => self.subset_post(),
b"cmap" => self.subset_cmap(),
b"glyf" => self.subset_glyf(),
b"loca" => self.subset_loca(),
@ -253,11 +257,21 @@ impl<'a> Subsetter<'a> {
})
}
/// Subset the `cmap` table by
/// Subset the `post` table by removing all name information.
fn subset_post(&mut self) -> FontResult<()> {
let tag = "post".parse().unwrap();
let post = self.read_table_data(tag)?;
self.write_table_body(tag, |this| {
this.body.write_u32::<BE>(0x00030000)?;
Ok(this.body.extend(&post[4..32]))
})
}
/// Subset the `cmap` table by only including the selected characters.
/// Always uses format 12 for simplicity.
fn subset_cmap(&mut self) -> FontResult<()> {
let tag = "cmap".parse().unwrap();
// Always uses format 12 for simplicity.
self.write_table_body(tag, |this| {
let mut groups = Vec::new();
@ -281,7 +295,7 @@ impl<'a> Subsetter<'a> {
this.body.write_u16::<BE>(0)?;
this.body.write_u16::<BE>(1)?;
this.body.write_u16::<BE>(3)?;
this.body.write_u16::<BE>(1)?;
this.body.write_u16::<BE>(10)?;
this.body.write_u32::<BE>(12)?;
// Write the subtable header.
@ -319,27 +333,23 @@ impl<'a> Subsetter<'a> {
continue;
}
// Extract the glyph data.
let mut glyph_data = glyf.get(start as usize .. end as usize)
.take_invalid("missing glyph data")?.to_vec();
// Construct a cursor to operate on the data.
let mut cursor = Cursor::new(&mut glyph_data);
let num_contours = cursor.read_i16::<BE>()?;
// This is a composite glyph
let num_contours = cursor.read_i16::<BE>()?;
if num_contours < 0 {
cursor.seek(SeekFrom::Current(8))?;
loop {
let flags = cursor.read_u16::<BE>()?;
// Read the old glyph index.
let glyph_index = cursor.read_u16::<BE>()?;
let old_glyph_index = cursor.read_u16::<BE>()?;
// Compute the new glyph index by searching for it's index
// in the glyph vector.
let new_glyph_index = this.glyphs.iter()
.position(|&g| g == glyph_index)
.position(|&g| g == old_glyph_index)
.take_invalid("invalid composite glyph")? as u16;
// Overwrite the old index with the new one.
@ -386,7 +396,14 @@ impl<'a> Subsetter<'a> {
let len = loca.length(glyph).take_invalid("missing loca entry")?;
offset += len;
}
this.body.write_u32::<BE>(offset)?;
// Write the final offset (so that it is known how long the last glyph is).
if format == 0 {
this.body.write_u16::<BE>((offset / 2) as u16)?;
} else {
this.body.write_u32::<BE>(offset)?;
}
Ok(())
})
}
@ -399,7 +416,7 @@ impl<'a> Subsetter<'a> {
writer(self)?;
let end = self.body.len();
// Pad with zeroes.
// Pad with zeros.
while (self.body.len() - start) % 4 != 0 {
self.body.push(0);
}
@ -412,6 +429,11 @@ impl<'a> Subsetter<'a> {
}))
}
/// Whether this font contains a given table.
fn contains_table(&self, tag: Tag) -> bool {
self.tables.binary_search_by_key(&tag, |r| r.tag).is_ok()
}
/// Read a table with the opentype reader.
fn read_table<T: Table>(&mut self) -> FontResult<T> {
self.reader.read_table::<T>().map_err(Into::into)
@ -428,15 +450,10 @@ impl<'a> Subsetter<'a> {
.get(record.offset as usize .. (record.offset + record.length) as usize)
.take_invalid("missing table data")
}
/// Whether this font contains a given table.
fn contains_table(&self, tag: Tag) -> bool {
self.tables.binary_search_by_key(&tag, |r| r.tag).is_ok()
}
}
/// Calculate a checksum over the sliced data as sum of u32's. The data length has to be a multiple
/// of four.
/// Calculate a checksum over the sliced data as sum of u32's. The data
/// length has to be a multiple of four.
fn calculate_check_sum(data: &[u8]) -> u32 {
let mut sum = 0u32;
data.chunks_exact(4).for_each(|c| {
@ -452,7 +469,8 @@ fn calculate_check_sum(data: &[u8]) -> u32 {
/// Helper trait to create subsetting errors more easily.
trait TakeInvalid<T>: Sized {
/// Pull the type out of the option, returning an invalid font error if self was not valid.
/// Pull the type out of self, returning an invalid font
/// error if self was not valid.
fn take_invalid<S: Into<String>>(self, message: S) -> FontResult<T>;
}
@ -465,19 +483,80 @@ impl<T> TakeInvalid<T> for Option<T> {
#[cfg(test)]
mod tests {
use std::fs;
use crate::font::Font;
use opentype::{OpenTypeReader, TableRecord};
use opentype::tables::{CharMap, Locations};
#[test]
fn subset() {
let program = std::fs::read("../fonts/SourceSansPro-Regular.ttf").unwrap();
const ALPHABET: &str = "abcdefghijklmnopqrstuvwxyz";
/// Stores some tables for inspections.
struct Tables<'a> {
cmap: CharMap,
loca: Locations,
glyf_data: &'a [u8],
}
impl<'a> Tables<'a> {
/// Load the tables from the font.
fn new(font: &'a Font) -> Tables<'a> {
let mut reader = OpenTypeReader::from_slice(&font.program);
let cmap = reader.read_table::<CharMap>().unwrap();
let loca = reader.read_table::<Locations>().unwrap();
let &TableRecord { offset, length, .. } = reader.get_table_record("glyf").unwrap();
let glyf_data = &font.program[offset as usize .. (offset + length) as usize];
Tables { cmap, loca, glyf_data }
}
/// Return the glyph data for the given character.
fn glyph_data(&self, character: char) -> Option<&'a [u8]> {
let glyph = self.cmap.get(character)?;
let start = self.loca.offset(glyph)?;
let end = self.loca.offset(glyph + 1)?;
Some(&self.glyf_data[start as usize .. end as usize])
}
}
/// Return the original and subsetted version of a font with the characters
/// included that are given as the chars of the string.
fn subset(font: &str, chars: &str) -> (Font, Font) {
let program = fs::read(format!("../fonts/{}", font)).unwrap();
let font = Font::new(program).unwrap();
let subsetted = font.subsetted(
"abcdefghijklmnopqrstuvwxyz‼".chars(),
chars.chars(),
&["name", "OS/2", "post", "head", "hhea", "hmtx", "maxp", "cmap",
"cvt ", "fpgm", "prep", "loca", "glyf"][..]
"cvt ", "fpgm", "prep", "gasp", "loca", "glyf"][..]
).unwrap();
std::fs::write("../target/SourceSansPro-Subsetted.ttf", &subsetted.program).unwrap();
(font, subsetted)
}
/// A test that creates a subsetted fonts in the `target` directory
/// for manual inspection.
#[test]
fn manual_files() {
let subsetted = subset("SourceSansPro-Regular.ttf", ALPHABET).1;
fs::write("../target/SourceSansPro-Subsetted.ttf", &subsetted.program).unwrap();
let subsetted = subset("NotoSans-Regular.ttf", ALPHABET).1;
fs::write("../target/NotoSans-Subsetted.ttf", &subsetted.program).unwrap();
}
/// Tests whether the glyph data for specific glyphs match in the original
/// and subsetted version.
#[test]
fn glyph_data() {
let (font, subsetted) = subset("SourceSansPro-Regular.ttf", ALPHABET);
let font_tables = Tables::new(&font);
let subset_tables = Tables::new(&subsetted);
// Go through all characters but skip the composite glyphs.
for c in ALPHABET.chars().filter(|&x| x != 'i' && x != 'j') {
assert_eq!(font_tables.glyph_data(c), subset_tables.glyph_data(c));
}
}
}

View File

@ -32,7 +32,7 @@ pub fn layout(text: &str, ctx: TextContext) -> LayoutResult<BoxLayout> {
}).ok_or_else(|| LayoutError::NoSuitableFont(character))?;
// Add the char width to the total box width.
let char_width = font.widths[font.map(character) as usize] * ctx.style.font_size;
let char_width = font.widths[font.encode(character) as usize] * ctx.style.font_size;
width += char_width;
// Change the font if necessary.

View File

@ -30,10 +30,9 @@
//! ("CMU-Serif-Italic.ttf", font!["Computer Modern", Italic, Serif]),
//! ("NotoEmoji-Regular.ttf", font!["Noto", Regular, Serif, SansSerif, Monospace]),
//! ]));
//! // Typeset the source code into a document.
//! let document = typesetter.typeset(src).unwrap();
//!
//! // Export the document into a PDF file.
//! // Typeset the document and export it into a PDF file.
//! let document = typesetter.typeset(src).unwrap();
//! # /*
//! let file = File::create("hello-typeset.pdf").unwrap();
//! # */