Improve subsetter testing ✔
This commit is contained in:
parent
7eec0b8dd7
commit
6c8b5caa9f
@ -1,7 +1,7 @@
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
use std::io::{Read, BufWriter};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process;
|
||||
|
||||
@ -19,15 +19,11 @@ fn main() {
|
||||
|
||||
/// The actual main function.
|
||||
fn run() -> Result<(), Box<Error>> {
|
||||
// Check the command line arguments.
|
||||
let args: Vec<String> = env::args().collect();
|
||||
if args.len() < 2 || args.len() > 3 {
|
||||
help_and_quit();
|
||||
}
|
||||
|
||||
// Open the input file.
|
||||
let mut file = File::open(&args[1]).map_err(|_| "failed to open source file")?;
|
||||
|
||||
let source_path = Path::new(&args[1]);
|
||||
|
||||
// Compute the output filename from the input filename by replacing the extension.
|
||||
@ -39,14 +35,13 @@ fn run() -> Result<(), Box<Error>> {
|
||||
PathBuf::from(&args[2])
|
||||
};
|
||||
|
||||
// We do not want to overwrite the source file.
|
||||
if dest_path == source_path {
|
||||
return Err("source and destination path are the same".into());
|
||||
}
|
||||
|
||||
// Read the input file.
|
||||
let mut src = String::new();
|
||||
file.read_to_string(&mut src).map_err(|_| "failed to read from source file")?;
|
||||
let mut source_file = File::open(source_path).map_err(|_| "failed to open source file")?;
|
||||
source_file.read_to_string(&mut src).map_err(|_| "failed to read from source file")?;
|
||||
|
||||
// Create a typesetter with a font provider that provides the default fonts.
|
||||
let mut typesetter = Typesetter::new();
|
||||
@ -71,15 +66,15 @@ fn run() -> Result<(), Box<Error>> {
|
||||
|
||||
// Export the document into a PDF file.
|
||||
let exporter = PdfExporter::new();
|
||||
let output_file = File::create(&dest_path)?;
|
||||
exporter.export(&document, output_file)?;
|
||||
let dest_file = File::create(&dest_path)?;
|
||||
exporter.export(&document, BufWriter::new(dest_file))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Print a usage message and quit.
|
||||
fn help_and_quit() {
|
||||
let name = env::args().next().unwrap_or("typeset".to_string());
|
||||
let name = env::args().next().unwrap_or("typst".to_string());
|
||||
println!("usage: {} source [destination]", name);
|
||||
process::exit(0);
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ struct PdfEngine<'d, W: Write> {
|
||||
}
|
||||
|
||||
/// Offsets for the various groups of ids.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
struct Offsets {
|
||||
catalog: Ref,
|
||||
page_tree: Ref,
|
||||
@ -67,8 +67,7 @@ impl<'d, W: Write> PdfEngine<'d, W> {
|
||||
let mut font = 0usize;
|
||||
let mut chars = vec![HashSet::new(); doc.fonts.len()];
|
||||
|
||||
// Iterate through every text object on every page and find out which characters they
|
||||
// use.
|
||||
// Find out which characters are used for each font.
|
||||
for page in &doc.pages {
|
||||
for action in &page.actions {
|
||||
match action {
|
||||
@ -141,16 +140,14 @@ impl<'d, W: Write> PdfEngine<'d, W> {
|
||||
|
||||
/// Write the content of a page.
|
||||
fn write_page(&mut self, id: u32, page: &DocPage) -> PdfResult<()> {
|
||||
// The currently used font.
|
||||
let mut text = Text::new();
|
||||
let mut active_font = (std::usize::MAX, 0.0);
|
||||
|
||||
// The last set position and font, these get flushed when content is written.
|
||||
// The last set position and font,
|
||||
// these only get flushed lazily when content is written.
|
||||
let mut next_pos = Some(Size2D::zero());
|
||||
let mut next_font = None;
|
||||
|
||||
// The output text.
|
||||
let mut text = Text::new();
|
||||
|
||||
for action in &page.actions {
|
||||
match action {
|
||||
LayoutAction::MoveAbsolute(pos) => next_pos = Some(*pos),
|
||||
@ -174,7 +171,7 @@ impl<'d, W: Write> PdfEngine<'d, W> {
|
||||
}
|
||||
|
||||
// Write the text.
|
||||
text.tj(self.fonts[active_font.0].encode(&string));
|
||||
text.tj(self.fonts[active_font.0].encode_text(&string));
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -227,7 +224,7 @@ impl<'d, W: Write> PdfEngine<'d, W> {
|
||||
.font_file_2(id + 4)
|
||||
)?;
|
||||
|
||||
// The CMap, which maps glyphs to unicode codepoints.
|
||||
// Write the CMap, which maps glyphs to unicode codepoints.
|
||||
let mapping = font.font.mapping.iter().map(|(&c, &cid)| (cid, c));
|
||||
self.writer.write_obj(id + 3, &CMap::new("Custom", system_info, mapping))?;
|
||||
|
||||
@ -261,15 +258,14 @@ struct PdfFont {
|
||||
}
|
||||
|
||||
impl PdfFont {
|
||||
/// Create a subetted version of the font and calculate some information needed for creating the
|
||||
/// _PDF_.
|
||||
/// Create a subetted version of the font and calculate some information
|
||||
/// needed for creating the _PDF_.
|
||||
fn new(font: &Font, chars: &HashSet<char>) -> PdfResult<PdfFont> {
|
||||
/// Convert a size into a _PDF_ glyph unit.
|
||||
fn size_to_glyph_unit(size: Size) -> GlyphUnit {
|
||||
(1000.0 * size.to_pt()).round() as GlyphUnit
|
||||
}
|
||||
|
||||
// Subset the font using the selected characters.
|
||||
let subset_result = font.subsetted(
|
||||
chars.iter().cloned(),
|
||||
&["head", "hhea", "hmtx", "maxp", "cmap", "cvt ", "fpgm", "prep", "loca", "glyf"][..]
|
||||
@ -283,7 +279,6 @@ impl PdfFont {
|
||||
Err(err) => return Err(err.into()),
|
||||
};
|
||||
|
||||
// Specify flags for the font.
|
||||
let mut flags = FontFlags::empty();
|
||||
flags.set(FontFlags::FIXED_PITCH, font.metrics.monospace);
|
||||
flags.set(FontFlags::SERIF, font.name.contains("Serif"));
|
||||
@ -291,7 +286,6 @@ impl PdfFont {
|
||||
flags.set(FontFlags::ITALIC, font.metrics.italic);
|
||||
flags.insert(FontFlags::SMALL_CAP);
|
||||
|
||||
// Transform the widths.
|
||||
let widths = subsetted.widths.iter().map(|&x| size_to_glyph_unit(x)).collect();
|
||||
|
||||
Ok(PdfFont {
|
||||
|
@ -1,4 +1,4 @@
|
||||
//! Loads fonts matching queries.
|
||||
//! Loading of fonts matching queries.
|
||||
|
||||
use std::cell::{RefCell, Ref};
|
||||
use std::collections::HashMap;
|
||||
@ -12,7 +12,7 @@ pub struct FontLoader<'p> {
|
||||
/// The font providers.
|
||||
providers: Vec<&'p (dyn FontProvider + 'p)>,
|
||||
/// The fonts available from each provider (indexed like `providers`).
|
||||
provider_fonts: Vec<&'p [FontInfo]>,
|
||||
infos: Vec<&'p [FontInfo]>,
|
||||
/// The internal state. Uses interior mutability because the loader works behind
|
||||
/// an immutable reference to ease usage.
|
||||
state: RefCell<FontLoaderState<'p>>,
|
||||
@ -20,29 +20,29 @@ pub struct FontLoader<'p> {
|
||||
|
||||
/// Internal state of the font loader (seperated to wrap it in a `RefCell`).
|
||||
struct FontLoaderState<'p> {
|
||||
/// The loaded fonts alongside their external indices. Some fonts may not have external indices
|
||||
/// because they were loaded but did not contain the required character. However, these are
|
||||
/// still stored because they may be needed later. The index is just set to `None` then.
|
||||
/// The loaded fonts alongside their external indices. Some fonts may not
|
||||
/// have external indices because they were loaded but did not contain the
|
||||
/// required character. However, these are still stored because they may
|
||||
/// be needed later. The index is just set to `None` then.
|
||||
fonts: Vec<(Option<usize>, Font)>,
|
||||
/// Allows to retrieve a font (index) quickly if a query was submitted before.
|
||||
query_cache: HashMap<FontQuery, usize>,
|
||||
/// Allows to re-retrieve loaded fonts by their info instead of loading them again.
|
||||
info_cache: HashMap<&'p FontInfo, usize>,
|
||||
/// Indexed by external indices (the ones inside the tuples in the `fonts` vector) and maps to
|
||||
/// internal indices (the actual indices into the vector).
|
||||
/// Indexed by external indices (the ones inside the tuples in the `fonts` vector)
|
||||
/// and maps to internal indices (the actual indices into the vector).
|
||||
inner_index: Vec<usize>,
|
||||
}
|
||||
|
||||
impl<'p> FontLoader<'p> {
|
||||
/// Create a new font loader using a set of providers.
|
||||
#[inline]
|
||||
pub fn new<P: 'p>(providers: &'p [P]) -> FontLoader<'p> where P: AsRef<dyn FontProvider + 'p> {
|
||||
let providers: Vec<_> = providers.iter().map(|p| p.as_ref()).collect();
|
||||
let provider_fonts = providers.iter().map(|prov| prov.available()).collect();
|
||||
let infos = providers.iter().map(|prov| prov.available()).collect();
|
||||
|
||||
FontLoader {
|
||||
providers,
|
||||
provider_fonts,
|
||||
infos,
|
||||
state: RefCell::new(FontLoaderState {
|
||||
query_cache: HashMap::new(),
|
||||
info_cache: HashMap::new(),
|
||||
@ -66,26 +66,24 @@ impl<'p> FontLoader<'p> {
|
||||
}
|
||||
drop(state);
|
||||
|
||||
// The outermost loop goes over the fallbacks because we want to serve the font that matches
|
||||
// the first possible class.
|
||||
// The outermost loop goes over the fallbacks because we want to serve the
|
||||
// font that matches the first possible class.
|
||||
for class in &query.fallback {
|
||||
// For each class now go over all font infos from all font providers.
|
||||
for (provider, infos) in self.providers.iter().zip(&self.provider_fonts) {
|
||||
// For each class now go over all fonts from all font providers.
|
||||
for (provider, infos) in self.providers.iter().zip(&self.infos) {
|
||||
for info in infos.iter() {
|
||||
let matches = info.classes.contains(class)
|
||||
&& query.classes.iter().all(|class| info.classes.contains(class));
|
||||
let viable = info.classes.contains(class);
|
||||
let matches = viable && query.classes.iter()
|
||||
.all(|class| info.classes.contains(class));
|
||||
|
||||
// Proceed only if this font matches the query up to now.
|
||||
if matches {
|
||||
let mut state = self.state.borrow_mut();
|
||||
|
||||
// Check if we have already loaded this font before, otherwise, we will load
|
||||
// it from the provider. Anyway, have it stored and find out its internal
|
||||
// index.
|
||||
// Check if we have already loaded this font before, otherwise,
|
||||
// we will load it from the provider.
|
||||
let index = if let Some(&index) = state.info_cache.get(info) {
|
||||
index
|
||||
} else if let Some(mut source) = provider.get(info) {
|
||||
// Read the font program into a vector and parse it.
|
||||
let mut program = Vec::new();
|
||||
source.read_to_end(&mut program).ok()?;
|
||||
let font = Font::new(program).ok()?;
|
||||
@ -107,8 +105,8 @@ impl<'p> FontLoader<'p> {
|
||||
// This font is suitable, thus we cache the query result.
|
||||
state.query_cache.insert(query, index);
|
||||
|
||||
// Now we have to find out the external index of it or assign a new one
|
||||
// if it has none.
|
||||
// Now we have to find out the external index of it or assign
|
||||
// a new one if it has none.
|
||||
let external_index = state.fonts[index].0.unwrap_or_else(|| {
|
||||
// We have to assign an external index before serving.
|
||||
let new_index = state.inner_index.len();
|
||||
@ -133,7 +131,8 @@ impl<'p> FontLoader<'p> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Return the font previously loaded at this index. Panics if the index is not assigned.
|
||||
/// Return the font previously loaded at this index.
|
||||
/// Panics if the index is not assigned.
|
||||
#[inline]
|
||||
pub fn get_with_index(&self, index: usize) -> Ref<Font> {
|
||||
let state = self.state.borrow();
|
||||
@ -143,9 +142,9 @@ impl<'p> FontLoader<'p> {
|
||||
|
||||
/// Move the whole list of fonts out.
|
||||
pub fn into_fonts(self) -> Vec<Font> {
|
||||
// Sort the fonts by external index so that they are in the correct order. All fonts that
|
||||
// were cached but not used by the outside are sorted to the back and are removed in the
|
||||
// next step.
|
||||
// Sort the fonts by external index so that they are in the correct order.
|
||||
// All fonts that were cached but not used by the outside are sorted to the back
|
||||
// and are removed in the next step.
|
||||
let mut fonts = self.state.into_inner().fonts;
|
||||
fonts.sort_by_key(|&(maybe_index, _)| match maybe_index {
|
||||
Some(index) => index,
|
||||
@ -164,7 +163,7 @@ impl Debug for FontLoader<'_> {
|
||||
let state = self.state.borrow();
|
||||
f.debug_struct("FontLoader")
|
||||
.field("providers", &self.providers.len())
|
||||
.field("provider_fonts", &self.provider_fonts)
|
||||
.field("infos", &self.infos)
|
||||
.field("fonts", &state.fonts)
|
||||
.field("query_cache", &state.query_cache)
|
||||
.field("info_cache", &state.info_cache)
|
||||
@ -180,7 +179,6 @@ pub struct FontQuery {
|
||||
pub character: char,
|
||||
/// Which classes the font has to be part of.
|
||||
pub classes: Vec<FontClass>,
|
||||
/// A sequence of classes. The font matching the leftmost class in this sequence
|
||||
/// should be returned.
|
||||
/// The font matching the leftmost class in this sequence should be returned.
|
||||
pub fallback: Vec<FontClass>,
|
||||
}
|
||||
|
174
src/font/mod.rs
174
src/font/mod.rs
@ -1,7 +1,7 @@
|
||||
//! Font loading and transforming.
|
||||
//! Font loading and subsetting.
|
||||
//!
|
||||
//! # Font handling
|
||||
//! To do the typesetting, the typesetting engine needs font data. To be highly portable the engine
|
||||
//! To do the typesetting, the engine needs font data. However, to be highly portable the engine
|
||||
//! itself assumes nothing about the environment. To still work with fonts, the consumer of this
|
||||
//! library has to add _font providers_ to their typesetting instance. These can be queried for font
|
||||
//! data given flexible font filters specifying required font families and styles. A font provider
|
||||
@ -19,28 +19,29 @@ use opentype::{Error as OpentypeError, OpenTypeReader};
|
||||
use opentype::tables::{Header, Name, CharMap, HorizontalMetrics, Post, OS2};
|
||||
use opentype::types::{MacStyleFlags, NameEntry};
|
||||
|
||||
pub use self::loader::{FontLoader, FontQuery};
|
||||
use self::subset::Subsetter;
|
||||
use crate::size::Size;
|
||||
|
||||
mod loader;
|
||||
mod subset;
|
||||
|
||||
pub use loader::{FontLoader, FontQuery};
|
||||
|
||||
/// A loaded and parsed font program.
|
||||
|
||||
/// A parsed _OpenType_ font program.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Font {
|
||||
/// The base name of the font.
|
||||
/// The name of the font.
|
||||
pub name: String,
|
||||
/// The raw bytes of the font program.
|
||||
/// The complete, raw bytes of the font program.
|
||||
pub program: Vec<u8>,
|
||||
/// A mapping from character codes to glyph ids.
|
||||
/// The mapping from character codes to glyph ids.
|
||||
pub mapping: HashMap<char, u16>,
|
||||
/// The widths of the glyphs indexed by glyph id.
|
||||
pub widths: Vec<Size>,
|
||||
/// The fallback glyph.
|
||||
/// The id of the fallback glyph.
|
||||
pub default_glyph: u16,
|
||||
/// The typesetting-relevant metrics of this font.
|
||||
/// The typesetting or exporting-relevant metrics of this font.
|
||||
pub metrics: FontMetrics,
|
||||
}
|
||||
|
||||
@ -51,32 +52,31 @@ pub struct FontMetrics {
|
||||
pub italic: bool,
|
||||
/// Whether font is monospace.
|
||||
pub monospace: bool,
|
||||
/// The angle of text in italics.
|
||||
/// The angle of text in italics (in counter-clockwise degrees from vertical).
|
||||
pub italic_angle: f32,
|
||||
/// The glyph bounding box: [x_min, y_min, x_max, y_max],
|
||||
/// The extremal values [x_min, y_min, x_max, y_max] for all glyph bounding boxes.
|
||||
pub bounding_box: [Size; 4],
|
||||
/// The typographics ascender.
|
||||
/// The typographic ascender.
|
||||
pub ascender: Size,
|
||||
/// The typographics descender.
|
||||
/// The typographic descender.
|
||||
pub descender: Size,
|
||||
/// The approximate height of capital letters.
|
||||
pub cap_height: Size,
|
||||
/// The weight class of the font.
|
||||
/// The weight class of the font (from 100 for thin to 900 for heavy).
|
||||
pub weight_class: u16,
|
||||
}
|
||||
|
||||
impl Font {
|
||||
/// Create a new font from a raw font program.
|
||||
/// Create a `Font` from a raw font program.
|
||||
pub fn new(program: Vec<u8>) -> FontResult<Font> {
|
||||
// Create an OpentypeReader to parse the font tables.
|
||||
let cursor = Cursor::new(&program);
|
||||
let mut reader = OpenTypeReader::new(cursor);
|
||||
|
||||
// Read the relevant tables
|
||||
// (all of these are required by the OpenType specification, so we expect them).
|
||||
// All of these tables are required by the OpenType specification,
|
||||
// so we do not really have to handle the case that they are missing.
|
||||
let head = reader.read_table::<Header>()?;
|
||||
let name = reader.read_table::<Name>()?;
|
||||
let os2 = reader.read_table::<OS2>()?;
|
||||
let os2 = reader.read_table::<OS2>()?;
|
||||
let cmap = reader.read_table::<CharMap>()?;
|
||||
let hmtx = reader.read_table::<HorizontalMetrics>()?;
|
||||
let post = reader.read_table::<Post>()?;
|
||||
@ -85,15 +85,13 @@ impl Font {
|
||||
let font_unit_ratio = 1.0 / (head.units_per_em as f32);
|
||||
let font_unit_to_size = |x| Size::pt(font_unit_ratio * x);
|
||||
|
||||
// Find out the name of the font.
|
||||
let font_name = name.get_decoded(NameEntry::PostScriptName)
|
||||
let font_name = name
|
||||
.get_decoded(NameEntry::PostScriptName)
|
||||
.unwrap_or_else(|| "unknown".to_owned());
|
||||
|
||||
// Convert the widths from font units to sizes.
|
||||
let widths = hmtx.metrics.iter()
|
||||
.map(|m| font_unit_to_size(m.advance_width as f32)).collect();
|
||||
|
||||
// Calculate the typesetting-relevant metrics.
|
||||
let metrics = FontMetrics {
|
||||
italic: head.mac_style.contains(MacStyleFlags::ITALIC),
|
||||
monospace: post.is_fixed_pitch,
|
||||
@ -120,51 +118,82 @@ impl Font {
|
||||
})
|
||||
}
|
||||
|
||||
/// Map a character to it's glyph index.
|
||||
/// Encode a character into it's glyph id.
|
||||
#[inline]
|
||||
pub fn map(&self, c: char) -> u16 {
|
||||
self.mapping.get(&c).map(|&g| g).unwrap_or(self.default_glyph)
|
||||
pub fn encode(&self, character: char) -> u16 {
|
||||
self.mapping.get(&character).map(|&g| g).unwrap_or(self.default_glyph)
|
||||
}
|
||||
|
||||
/// Encode the given text for this font (into glyph ids).
|
||||
/// Encode the given text into a vector of glyph ids.
|
||||
#[inline]
|
||||
pub fn encode(&self, text: &str) -> Vec<u8> {
|
||||
// Each glyph id takes two bytes that we encode in big endian.
|
||||
let mut bytes = Vec::with_capacity(2 * text.len());
|
||||
for glyph in text.chars().map(|c| self.map(c)) {
|
||||
pub fn encode_text(&self, text: &str) -> Vec<u8> {
|
||||
const BYTES_PER_GLYPH: usize = 2;
|
||||
let mut bytes = Vec::with_capacity(BYTES_PER_GLYPH * text.len());
|
||||
for c in text.chars() {
|
||||
let glyph = self.encode(c);
|
||||
bytes.push((glyph >> 8) as u8);
|
||||
bytes.push((glyph & 0xff) as u8);
|
||||
}
|
||||
bytes
|
||||
}
|
||||
|
||||
/// Generate a subsetted version of this font including only the chars listed in `chars`.
|
||||
/// Generate a subsetted version of this font.
|
||||
///
|
||||
/// The filter functions decides which tables to keep and which not based on their tag.
|
||||
/// This version includes only the given `chars` and _OpenType_ `tables`.
|
||||
#[inline]
|
||||
pub fn subsetted<C, I, S>(&self, chars: C, tables: I) -> Result<Font, FontError>
|
||||
where C: IntoIterator<Item=char>, I: IntoIterator<Item=S>, S: AsRef<str> {
|
||||
where
|
||||
C: IntoIterator<Item=char>,
|
||||
I: IntoIterator<Item=S>,
|
||||
S: AsRef<str>
|
||||
{
|
||||
Subsetter::subset(self, chars, tables)
|
||||
}
|
||||
}
|
||||
|
||||
/// Categorizes a font.
|
||||
/// A type that provides fonts.
|
||||
pub trait FontProvider {
|
||||
/// Returns a font with the given info if this provider has one.
|
||||
fn get(&self, info: &FontInfo) -> Option<Box<dyn FontData>>;
|
||||
|
||||
/// The available fonts this provider can serve. While these should generally
|
||||
/// be retrievable through the `get` method, this is not guaranteed.
|
||||
fn available<'p>(&'p self) -> &'p [FontInfo];
|
||||
}
|
||||
|
||||
/// A wrapper trait around `Read + Seek`.
|
||||
///
|
||||
/// Can be constructed conveniently with the [`font`] macro.
|
||||
/// This type is needed because currently you can't make a trait object with two traits, like
|
||||
/// `Box<dyn Read + Seek>`. Automatically implemented for all types that are [`Read`] and [`Seek`].
|
||||
pub trait FontData: Read + Seek {}
|
||||
impl<T> FontData for T where T: Read + Seek {}
|
||||
|
||||
/// Classifies a font by listing the font classes it is part of.
|
||||
///
|
||||
/// All fonts with the same [`FontInfo`] are part of the same intersection
|
||||
/// of [font classes](FontClass).
|
||||
///
|
||||
/// This structure can be constructed conveniently through the [`font`] macro.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
|
||||
pub struct FontInfo {
|
||||
/// The font families this font is part of.
|
||||
/// The font classes this font is part of.
|
||||
pub classes: Vec<FontClass>,
|
||||
}
|
||||
|
||||
impl FontInfo {
|
||||
/// Create a new font info from an iterator of classes.
|
||||
/// Create a new font info from a collection of classes.
|
||||
#[inline]
|
||||
pub fn new<I>(classes: I) -> FontInfo where I: IntoIterator<Item=FontClass> {
|
||||
FontInfo { classes: classes.into_iter().collect() }
|
||||
FontInfo {
|
||||
classes: classes.into_iter().collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A class of fonts.
|
||||
///
|
||||
/// The set of all fonts can be classified into subsets of font classes like
|
||||
/// _serif_ or _bold_. This enum lists such subclasses.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
|
||||
pub enum FontClass {
|
||||
Serif,
|
||||
@ -183,27 +212,18 @@ pub enum FontClass {
|
||||
/// into custom `Family`-variants and others can be named directly.
|
||||
///
|
||||
/// # Examples
|
||||
/// The font _Noto Sans_ in regular typeface.
|
||||
/// ```
|
||||
/// # use typeset::font;
|
||||
/// // Noto Sans in regular typeface.
|
||||
/// font!["NotoSans", "Noto", Regular, SansSerif];
|
||||
/// ```
|
||||
///
|
||||
/// The font _Noto Serif_ in italics and boldface.
|
||||
/// ```
|
||||
/// # use typeset::font;
|
||||
/// // Noto Serif in italics and boldface.
|
||||
/// font!["NotoSerif", "Noto", Bold, Italic, Serif];
|
||||
/// ```
|
||||
///
|
||||
/// The font _Arial_ in italics.
|
||||
/// ```
|
||||
/// # use typeset::font;
|
||||
/// // Arial in italics.
|
||||
/// font!["Arial", Italic, SansSerif];
|
||||
/// ```
|
||||
///
|
||||
/// The font _Noto Emoji_, which works with all base families. 🙂
|
||||
/// ```
|
||||
/// # use typeset::font;
|
||||
/// // Noto Emoji, which works in sans-serif and serif contexts.
|
||||
/// font!["NotoEmoji", "Noto", Regular, SansSerif, Serif, Monospace];
|
||||
/// ```
|
||||
#[macro_export]
|
||||
@ -229,37 +249,21 @@ macro_rules! font {
|
||||
}};
|
||||
}
|
||||
|
||||
/// A type that provides fonts.
|
||||
pub trait FontProvider {
|
||||
/// Returns a font with the given info if this provider has one.
|
||||
fn get(&self, info: &FontInfo) -> Option<Box<dyn FontData>>;
|
||||
|
||||
/// The available fonts this provider can serve. While these should generally be retrievable
|
||||
/// through the `get` method, it does not have to be guaranteed that a font info, that is
|
||||
/// contained, here yields a `Some` value when passed into `get`.
|
||||
fn available<'p>(&'p self) -> &'p [FontInfo];
|
||||
}
|
||||
|
||||
/// A wrapper trait around `Read + Seek`.
|
||||
///
|
||||
/// This type is needed because currently you can't make a trait object with two traits, like
|
||||
/// `Box<dyn Read + Seek>`. Automatically implemented for all types that are [`Read`] and [`Seek`].
|
||||
pub trait FontData: Read + Seek {}
|
||||
impl<T> FontData for T where T: Read + Seek {}
|
||||
|
||||
/// A font provider serving fonts from a folder on the local file system.
|
||||
#[derive(Debug)]
|
||||
pub struct FileSystemFontProvider {
|
||||
/// The root folder.
|
||||
/// The base folder all other paths are relative to.
|
||||
base: PathBuf,
|
||||
/// Paths of the fonts relative to the `base` path.
|
||||
paths: Vec<PathBuf>,
|
||||
/// The information for the font with the same index in `paths`.
|
||||
/// The info for the font with the same index in `paths`.
|
||||
infos: Vec<FontInfo>,
|
||||
}
|
||||
|
||||
impl FileSystemFontProvider {
|
||||
/// Create a new provider from a folder and an iterator of pairs of font paths and font infos.
|
||||
/// Create a new provider serving fonts from a base path. The `fonts` iterator
|
||||
/// should contain paths of fonts relative to the base alongside matching
|
||||
/// infos for these fonts.
|
||||
///
|
||||
/// # Example
|
||||
/// Serve the two fonts `NotoSans-Regular` and `NotoSans-Italic` from the local folder
|
||||
@ -271,21 +275,20 @@ impl FileSystemFontProvider {
|
||||
/// ("NotoSans-Italic.ttf", font!["NotoSans", Italic, SansSerif]),
|
||||
/// ]);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn new<B, I, P>(base: B, infos: I) -> FileSystemFontProvider
|
||||
pub fn new<B, I, P>(base: B, fonts: I) -> FileSystemFontProvider
|
||||
where
|
||||
B: Into<PathBuf>,
|
||||
I: IntoIterator<Item = (P, FontInfo)>,
|
||||
P: Into<PathBuf>,
|
||||
{
|
||||
// Find out how long the iterator is at least, to reserve the correct capacity for the
|
||||
// vectors.
|
||||
let iter = infos.into_iter();
|
||||
let min = iter.size_hint().0;
|
||||
let iter = fonts.into_iter();
|
||||
|
||||
// Split the iterator into two seperated vectors.
|
||||
// Find out how long the iterator is at least, to reserve the correct
|
||||
// capacity for the vectors.
|
||||
let min = iter.size_hint().0;
|
||||
let mut paths = Vec::with_capacity(min);
|
||||
let mut infos = Vec::with_capacity(min);
|
||||
|
||||
for (path, info) in iter {
|
||||
paths.push(path.into());
|
||||
infos.push(info);
|
||||
@ -302,12 +305,10 @@ impl FileSystemFontProvider {
|
||||
impl FontProvider for FileSystemFontProvider {
|
||||
#[inline]
|
||||
fn get(&self, info: &FontInfo) -> Option<Box<dyn FontData>> {
|
||||
// Find the index of the font in both arrays (early exit if there is no match).
|
||||
let index = self.infos.iter().position(|i| i == info)?;
|
||||
|
||||
// Open the file and return a boxed reader operating on it.
|
||||
let index = self.infos.iter().position(|c| c == info)?;
|
||||
let path = &self.paths[index];
|
||||
let file = File::open(self.base.join(path)).ok()?;
|
||||
let full_path = self.base.join(path);
|
||||
let file = File::open(full_path).ok()?;
|
||||
Some(Box::new(BufReader::new(file)) as Box<FontData>)
|
||||
}
|
||||
|
||||
@ -317,13 +318,14 @@ impl FontProvider for FileSystemFontProvider {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// The error type for font operations.
|
||||
pub enum FontError {
|
||||
/// The font file is incorrect.
|
||||
InvalidFont(String),
|
||||
/// A character requested for subsetting was not present in the source font.
|
||||
MissingCharacter(char),
|
||||
/// A requested table was not present.
|
||||
/// A requested or required table was not present.
|
||||
MissingTable(String),
|
||||
/// The table is unknown to the subsetting engine.
|
||||
UnsupportedTable(String),
|
||||
|
@ -30,13 +30,15 @@ pub struct Subsetter<'a> {
|
||||
impl<'a> Subsetter<'a> {
|
||||
/// Subset a font. See [`Font::subetted`] for more details.
|
||||
pub fn subset<C, I, S>(font: &Font, chars: C, tables: I) -> Result<Font, FontError>
|
||||
where C: IntoIterator<Item=char>, I: IntoIterator<Item=S>, S: AsRef<str> {
|
||||
// Parse some header information.
|
||||
where
|
||||
C: IntoIterator<Item=char>,
|
||||
I: IntoIterator<Item=S>,
|
||||
S: AsRef<str>
|
||||
{
|
||||
let mut reader = OpenTypeReader::from_slice(&font.program);
|
||||
|
||||
let outlines = reader.outlines()?;
|
||||
let table_records = reader.tables()?.to_vec();
|
||||
|
||||
// Store all chars we want in a vector.
|
||||
let chars: Vec<_> = chars.into_iter().collect();
|
||||
|
||||
let subsetter = Subsetter {
|
||||
@ -64,7 +66,7 @@ impl<'a> Subsetter<'a> {
|
||||
// which glyphs are additionally used by composite glyphs.
|
||||
self.find_glyphs()?;
|
||||
|
||||
// Write all the tables the callee wants.
|
||||
// Copy/subset all the tables the caller wants.
|
||||
for table in tables.into_iter() {
|
||||
let tag = table.as_ref().parse()
|
||||
.map_err(|_| FontError::UnsupportedTable(table.as_ref().to_string()))?;
|
||||
@ -91,20 +93,19 @@ impl<'a> Subsetter<'a> {
|
||||
/// Store all glyphs the subset shall contain into `self.glyphs`.
|
||||
fn find_glyphs(&mut self) -> FontResult<()> {
|
||||
if self.outlines == Outlines::TrueType {
|
||||
// Parse the necessary information.
|
||||
let char_map = self.read_table::<CharMap>()?;
|
||||
let glyf = self.read_table::<Glyphs>()?;
|
||||
|
||||
// Add the default glyph at index 0 in any case.
|
||||
// The default glyph should always be at index 0.
|
||||
self.glyphs.push(self.font.default_glyph);
|
||||
|
||||
// Add all the glyphs for the chars requested.
|
||||
for &c in &self.chars {
|
||||
let glyph = char_map.get(c).ok_or_else(|| FontError::MissingCharacter(c))?;
|
||||
self.glyphs.push(glyph);
|
||||
}
|
||||
|
||||
// Collect the composite glyphs.
|
||||
// Collect the glyphs not used mapping from characters but used in
|
||||
// composite glyphs, too.
|
||||
let mut i = 0;
|
||||
while i < self.glyphs.len() as u16 {
|
||||
let glyph_id = self.glyphs[i as usize];
|
||||
@ -115,6 +116,7 @@ impl<'a> Subsetter<'a> {
|
||||
self.glyphs.push(composite);
|
||||
}
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
} else {
|
||||
@ -127,13 +129,13 @@ impl<'a> Subsetter<'a> {
|
||||
/// Prepend the new header to the constructed body.
|
||||
fn write_header(&mut self) -> FontResult<()> {
|
||||
// Create an output buffer
|
||||
let header_len = 12 + self.records.len() * 16;
|
||||
const BASE_HEADER_LEN: usize = 12;
|
||||
const TABLE_RECORD_LEN: usize = 16;
|
||||
let header_len = BASE_HEADER_LEN + self.records.len() * TABLE_RECORD_LEN;
|
||||
let mut header = Vec::with_capacity(header_len);
|
||||
|
||||
// Compute the first four header entries.
|
||||
let num_tables = self.records.len() as u16;
|
||||
|
||||
// The highester power lower than the table count.
|
||||
let mut max_power = 1u16;
|
||||
while max_power * 2 <= num_tables {
|
||||
max_power *= 2;
|
||||
@ -144,7 +146,7 @@ impl<'a> Subsetter<'a> {
|
||||
let entry_selector = (max_power as f32).log2() as u16;
|
||||
let range_shift = num_tables * 16 - search_range;
|
||||
|
||||
// Write the base header
|
||||
// Write the base OpenType header
|
||||
header.write_u32::<BE>(match self.outlines {
|
||||
Outlines::TrueType => 0x00010000,
|
||||
Outlines::CFF => 0x4f54544f,
|
||||
@ -169,7 +171,7 @@ impl<'a> Subsetter<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Compute the new widths.
|
||||
/// Compute the new subsetted widths vector.
|
||||
fn compute_widths(&self) -> FontResult<Vec<Size>> {
|
||||
let mut widths = Vec::with_capacity(self.glyphs.len());
|
||||
for &glyph in &self.glyphs {
|
||||
@ -180,11 +182,12 @@ impl<'a> Subsetter<'a> {
|
||||
Ok(widths)
|
||||
}
|
||||
|
||||
/// Compute the new mapping.
|
||||
/// Compute the new character to glyph id mapping.
|
||||
fn compute_mapping(&self) -> HashMap<char, u16> {
|
||||
// The mapping is basically just the index in the char vector, but we add one
|
||||
// The mapping is basically just the index into the char vector, but we add one
|
||||
// to each index here because we added the default glyph to the front.
|
||||
self.chars.iter().enumerate().map(|(i, &c)| (c, 1 + i as u16))
|
||||
self.chars.iter().enumerate()
|
||||
.map(|(i, &c)| (c, 1 + i as u16))
|
||||
.collect::<HashMap<char, u16>>()
|
||||
}
|
||||
|
||||
@ -192,13 +195,14 @@ impl<'a> Subsetter<'a> {
|
||||
fn subset_table(&mut self, tag: Tag) -> FontResult<()> {
|
||||
match tag.value() {
|
||||
// These tables can just be copied.
|
||||
b"head" | b"name" | b"OS/2" | b"post" |
|
||||
b"head" | b"name" | b"OS/2" |
|
||||
b"cvt " | b"fpgm" | b"prep" | b"gasp" => self.copy_table(tag),
|
||||
|
||||
// These tables have more complex subsetting routines.
|
||||
b"hhea" => self.subset_hhea(),
|
||||
b"hmtx" => self.subset_hmtx(),
|
||||
b"maxp" => self.subset_maxp(),
|
||||
b"post" => self.subset_post(),
|
||||
b"cmap" => self.subset_cmap(),
|
||||
b"glyf" => self.subset_glyf(),
|
||||
b"loca" => self.subset_loca(),
|
||||
@ -253,11 +257,21 @@ impl<'a> Subsetter<'a> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Subset the `cmap` table by
|
||||
/// Subset the `post` table by removing all name information.
|
||||
fn subset_post(&mut self) -> FontResult<()> {
|
||||
let tag = "post".parse().unwrap();
|
||||
let post = self.read_table_data(tag)?;
|
||||
self.write_table_body(tag, |this| {
|
||||
this.body.write_u32::<BE>(0x00030000)?;
|
||||
Ok(this.body.extend(&post[4..32]))
|
||||
})
|
||||
}
|
||||
|
||||
/// Subset the `cmap` table by only including the selected characters.
|
||||
/// Always uses format 12 for simplicity.
|
||||
fn subset_cmap(&mut self) -> FontResult<()> {
|
||||
let tag = "cmap".parse().unwrap();
|
||||
|
||||
// Always uses format 12 for simplicity.
|
||||
self.write_table_body(tag, |this| {
|
||||
let mut groups = Vec::new();
|
||||
|
||||
@ -281,7 +295,7 @@ impl<'a> Subsetter<'a> {
|
||||
this.body.write_u16::<BE>(0)?;
|
||||
this.body.write_u16::<BE>(1)?;
|
||||
this.body.write_u16::<BE>(3)?;
|
||||
this.body.write_u16::<BE>(1)?;
|
||||
this.body.write_u16::<BE>(10)?;
|
||||
this.body.write_u32::<BE>(12)?;
|
||||
|
||||
// Write the subtable header.
|
||||
@ -319,27 +333,23 @@ impl<'a> Subsetter<'a> {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract the glyph data.
|
||||
let mut glyph_data = glyf.get(start as usize .. end as usize)
|
||||
.take_invalid("missing glyph data")?.to_vec();
|
||||
|
||||
// Construct a cursor to operate on the data.
|
||||
let mut cursor = Cursor::new(&mut glyph_data);
|
||||
let num_contours = cursor.read_i16::<BE>()?;
|
||||
|
||||
// This is a composite glyph
|
||||
let num_contours = cursor.read_i16::<BE>()?;
|
||||
if num_contours < 0 {
|
||||
cursor.seek(SeekFrom::Current(8))?;
|
||||
loop {
|
||||
let flags = cursor.read_u16::<BE>()?;
|
||||
|
||||
// Read the old glyph index.
|
||||
let glyph_index = cursor.read_u16::<BE>()?;
|
||||
let old_glyph_index = cursor.read_u16::<BE>()?;
|
||||
|
||||
// Compute the new glyph index by searching for it's index
|
||||
// in the glyph vector.
|
||||
let new_glyph_index = this.glyphs.iter()
|
||||
.position(|&g| g == glyph_index)
|
||||
.position(|&g| g == old_glyph_index)
|
||||
.take_invalid("invalid composite glyph")? as u16;
|
||||
|
||||
// Overwrite the old index with the new one.
|
||||
@ -386,7 +396,14 @@ impl<'a> Subsetter<'a> {
|
||||
let len = loca.length(glyph).take_invalid("missing loca entry")?;
|
||||
offset += len;
|
||||
}
|
||||
this.body.write_u32::<BE>(offset)?;
|
||||
|
||||
// Write the final offset (so that it is known how long the last glyph is).
|
||||
if format == 0 {
|
||||
this.body.write_u16::<BE>((offset / 2) as u16)?;
|
||||
} else {
|
||||
this.body.write_u32::<BE>(offset)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
@ -399,7 +416,7 @@ impl<'a> Subsetter<'a> {
|
||||
writer(self)?;
|
||||
let end = self.body.len();
|
||||
|
||||
// Pad with zeroes.
|
||||
// Pad with zeros.
|
||||
while (self.body.len() - start) % 4 != 0 {
|
||||
self.body.push(0);
|
||||
}
|
||||
@ -412,6 +429,11 @@ impl<'a> Subsetter<'a> {
|
||||
}))
|
||||
}
|
||||
|
||||
/// Whether this font contains a given table.
|
||||
fn contains_table(&self, tag: Tag) -> bool {
|
||||
self.tables.binary_search_by_key(&tag, |r| r.tag).is_ok()
|
||||
}
|
||||
|
||||
/// Read a table with the opentype reader.
|
||||
fn read_table<T: Table>(&mut self) -> FontResult<T> {
|
||||
self.reader.read_table::<T>().map_err(Into::into)
|
||||
@ -428,15 +450,10 @@ impl<'a> Subsetter<'a> {
|
||||
.get(record.offset as usize .. (record.offset + record.length) as usize)
|
||||
.take_invalid("missing table data")
|
||||
}
|
||||
|
||||
/// Whether this font contains a given table.
|
||||
fn contains_table(&self, tag: Tag) -> bool {
|
||||
self.tables.binary_search_by_key(&tag, |r| r.tag).is_ok()
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate a checksum over the sliced data as sum of u32's. The data length has to be a multiple
|
||||
/// of four.
|
||||
/// Calculate a checksum over the sliced data as sum of u32's. The data
|
||||
/// length has to be a multiple of four.
|
||||
fn calculate_check_sum(data: &[u8]) -> u32 {
|
||||
let mut sum = 0u32;
|
||||
data.chunks_exact(4).for_each(|c| {
|
||||
@ -452,7 +469,8 @@ fn calculate_check_sum(data: &[u8]) -> u32 {
|
||||
|
||||
/// Helper trait to create subsetting errors more easily.
|
||||
trait TakeInvalid<T>: Sized {
|
||||
/// Pull the type out of the option, returning an invalid font error if self was not valid.
|
||||
/// Pull the type out of self, returning an invalid font
|
||||
/// error if self was not valid.
|
||||
fn take_invalid<S: Into<String>>(self, message: S) -> FontResult<T>;
|
||||
}
|
||||
|
||||
@ -465,19 +483,80 @@ impl<T> TakeInvalid<T> for Option<T> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs;
|
||||
use crate::font::Font;
|
||||
use opentype::{OpenTypeReader, TableRecord};
|
||||
use opentype::tables::{CharMap, Locations};
|
||||
|
||||
#[test]
|
||||
fn subset() {
|
||||
let program = std::fs::read("../fonts/SourceSansPro-Regular.ttf").unwrap();
|
||||
const ALPHABET: &str = "abcdefghijklmnopqrstuvwxyz";
|
||||
|
||||
/// Stores some tables for inspections.
|
||||
struct Tables<'a> {
|
||||
cmap: CharMap,
|
||||
loca: Locations,
|
||||
glyf_data: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> Tables<'a> {
|
||||
/// Load the tables from the font.
|
||||
fn new(font: &'a Font) -> Tables<'a> {
|
||||
let mut reader = OpenTypeReader::from_slice(&font.program);
|
||||
|
||||
let cmap = reader.read_table::<CharMap>().unwrap();
|
||||
let loca = reader.read_table::<Locations>().unwrap();
|
||||
|
||||
let &TableRecord { offset, length, .. } = reader.get_table_record("glyf").unwrap();
|
||||
let glyf_data = &font.program[offset as usize .. (offset + length) as usize];
|
||||
|
||||
Tables { cmap, loca, glyf_data }
|
||||
}
|
||||
|
||||
/// Return the glyph data for the given character.
|
||||
fn glyph_data(&self, character: char) -> Option<&'a [u8]> {
|
||||
let glyph = self.cmap.get(character)?;
|
||||
let start = self.loca.offset(glyph)?;
|
||||
let end = self.loca.offset(glyph + 1)?;
|
||||
Some(&self.glyf_data[start as usize .. end as usize])
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the original and subsetted version of a font with the characters
|
||||
/// included that are given as the chars of the string.
|
||||
fn subset(font: &str, chars: &str) -> (Font, Font) {
|
||||
let program = fs::read(format!("../fonts/{}", font)).unwrap();
|
||||
let font = Font::new(program).unwrap();
|
||||
|
||||
let subsetted = font.subsetted(
|
||||
"abcdefghijklmnopqrstuvwxyz‼".chars(),
|
||||
chars.chars(),
|
||||
&["name", "OS/2", "post", "head", "hhea", "hmtx", "maxp", "cmap",
|
||||
"cvt ", "fpgm", "prep", "loca", "glyf"][..]
|
||||
"cvt ", "fpgm", "prep", "gasp", "loca", "glyf"][..]
|
||||
).unwrap();
|
||||
|
||||
std::fs::write("../target/SourceSansPro-Subsetted.ttf", &subsetted.program).unwrap();
|
||||
(font, subsetted)
|
||||
}
|
||||
|
||||
/// A test that creates a subsetted fonts in the `target` directory
|
||||
/// for manual inspection.
|
||||
#[test]
|
||||
fn manual_files() {
|
||||
let subsetted = subset("SourceSansPro-Regular.ttf", ALPHABET).1;
|
||||
fs::write("../target/SourceSansPro-Subsetted.ttf", &subsetted.program).unwrap();
|
||||
|
||||
let subsetted = subset("NotoSans-Regular.ttf", ALPHABET).1;
|
||||
fs::write("../target/NotoSans-Subsetted.ttf", &subsetted.program).unwrap();
|
||||
}
|
||||
|
||||
/// Tests whether the glyph data for specific glyphs match in the original
|
||||
/// and subsetted version.
|
||||
#[test]
|
||||
fn glyph_data() {
|
||||
let (font, subsetted) = subset("SourceSansPro-Regular.ttf", ALPHABET);
|
||||
let font_tables = Tables::new(&font);
|
||||
let subset_tables = Tables::new(&subsetted);
|
||||
|
||||
// Go through all characters but skip the composite glyphs.
|
||||
for c in ALPHABET.chars().filter(|&x| x != 'i' && x != 'j') {
|
||||
assert_eq!(font_tables.glyph_data(c), subset_tables.glyph_data(c));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ pub fn layout(text: &str, ctx: TextContext) -> LayoutResult<BoxLayout> {
|
||||
}).ok_or_else(|| LayoutError::NoSuitableFont(character))?;
|
||||
|
||||
// Add the char width to the total box width.
|
||||
let char_width = font.widths[font.map(character) as usize] * ctx.style.font_size;
|
||||
let char_width = font.widths[font.encode(character) as usize] * ctx.style.font_size;
|
||||
width += char_width;
|
||||
|
||||
// Change the font if necessary.
|
||||
|
@ -30,10 +30,9 @@
|
||||
//! ("CMU-Serif-Italic.ttf", font!["Computer Modern", Italic, Serif]),
|
||||
//! ("NotoEmoji-Regular.ttf", font!["Noto", Regular, Serif, SansSerif, Monospace]),
|
||||
//! ]));
|
||||
//! // Typeset the source code into a document.
|
||||
//! let document = typesetter.typeset(src).unwrap();
|
||||
//!
|
||||
//! // Export the document into a PDF file.
|
||||
//! // Typeset the document and export it into a PDF file.
|
||||
//! let document = typesetter.typeset(src).unwrap();
|
||||
//! # /*
|
||||
//! let file = File::create("hello-typeset.pdf").unwrap();
|
||||
//! # */
|
||||
|
Loading…
x
Reference in New Issue
Block a user