diff --git a/src/bin/main.rs b/src/bin/main.rs index 4ed643d9c..612375807 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -1,7 +1,7 @@ use std::env; use std::error::Error; use std::fs::File; -use std::io::Read; +use std::io::{Read, BufWriter}; use std::path::{Path, PathBuf}; use std::process; @@ -19,15 +19,11 @@ fn main() { /// The actual main function. fn run() -> Result<(), Box> { - // Check the command line arguments. let args: Vec = env::args().collect(); if args.len() < 2 || args.len() > 3 { help_and_quit(); } - // Open the input file. - let mut file = File::open(&args[1]).map_err(|_| "failed to open source file")?; - let source_path = Path::new(&args[1]); // Compute the output filename from the input filename by replacing the extension. @@ -39,14 +35,13 @@ fn run() -> Result<(), Box> { PathBuf::from(&args[2]) }; - // We do not want to overwrite the source file. if dest_path == source_path { return Err("source and destination path are the same".into()); } - // Read the input file. let mut src = String::new(); - file.read_to_string(&mut src).map_err(|_| "failed to read from source file")?; + let mut source_file = File::open(source_path).map_err(|_| "failed to open source file")?; + source_file.read_to_string(&mut src).map_err(|_| "failed to read from source file")?; // Create a typesetter with a font provider that provides the default fonts. let mut typesetter = Typesetter::new(); @@ -71,15 +66,15 @@ fn run() -> Result<(), Box> { // Export the document into a PDF file. let exporter = PdfExporter::new(); - let output_file = File::create(&dest_path)?; - exporter.export(&document, output_file)?; + let dest_file = File::create(&dest_path)?; + exporter.export(&document, BufWriter::new(dest_file))?; Ok(()) } /// Print a usage message and quit. fn help_and_quit() { - let name = env::args().next().unwrap_or("typeset".to_string()); + let name = env::args().next().unwrap_or("typst".to_string()); println!("usage: {} source [destination]", name); process::exit(0); } diff --git a/src/export/pdf.rs b/src/export/pdf.rs index 639d18b93..aae624155 100644 --- a/src/export/pdf.rs +++ b/src/export/pdf.rs @@ -42,7 +42,7 @@ struct PdfEngine<'d, W: Write> { } /// Offsets for the various groups of ids. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] struct Offsets { catalog: Ref, page_tree: Ref, @@ -67,8 +67,7 @@ impl<'d, W: Write> PdfEngine<'d, W> { let mut font = 0usize; let mut chars = vec![HashSet::new(); doc.fonts.len()]; - // Iterate through every text object on every page and find out which characters they - // use. + // Find out which characters are used for each font. for page in &doc.pages { for action in &page.actions { match action { @@ -141,16 +140,14 @@ impl<'d, W: Write> PdfEngine<'d, W> { /// Write the content of a page. fn write_page(&mut self, id: u32, page: &DocPage) -> PdfResult<()> { - // The currently used font. + let mut text = Text::new(); let mut active_font = (std::usize::MAX, 0.0); - // The last set position and font, these get flushed when content is written. + // The last set position and font, + // these only get flushed lazily when content is written. let mut next_pos = Some(Size2D::zero()); let mut next_font = None; - // The output text. - let mut text = Text::new(); - for action in &page.actions { match action { LayoutAction::MoveAbsolute(pos) => next_pos = Some(*pos), @@ -174,7 +171,7 @@ impl<'d, W: Write> PdfEngine<'d, W> { } // Write the text. - text.tj(self.fonts[active_font.0].encode(&string)); + text.tj(self.fonts[active_font.0].encode_text(&string)); }, } } @@ -227,7 +224,7 @@ impl<'d, W: Write> PdfEngine<'d, W> { .font_file_2(id + 4) )?; - // The CMap, which maps glyphs to unicode codepoints. + // Write the CMap, which maps glyphs to unicode codepoints. let mapping = font.font.mapping.iter().map(|(&c, &cid)| (cid, c)); self.writer.write_obj(id + 3, &CMap::new("Custom", system_info, mapping))?; @@ -261,15 +258,14 @@ struct PdfFont { } impl PdfFont { - /// Create a subetted version of the font and calculate some information needed for creating the - /// _PDF_. + /// Create a subetted version of the font and calculate some information + /// needed for creating the _PDF_. fn new(font: &Font, chars: &HashSet) -> PdfResult { /// Convert a size into a _PDF_ glyph unit. fn size_to_glyph_unit(size: Size) -> GlyphUnit { (1000.0 * size.to_pt()).round() as GlyphUnit } - // Subset the font using the selected characters. let subset_result = font.subsetted( chars.iter().cloned(), &["head", "hhea", "hmtx", "maxp", "cmap", "cvt ", "fpgm", "prep", "loca", "glyf"][..] @@ -283,7 +279,6 @@ impl PdfFont { Err(err) => return Err(err.into()), }; - // Specify flags for the font. let mut flags = FontFlags::empty(); flags.set(FontFlags::FIXED_PITCH, font.metrics.monospace); flags.set(FontFlags::SERIF, font.name.contains("Serif")); @@ -291,7 +286,6 @@ impl PdfFont { flags.set(FontFlags::ITALIC, font.metrics.italic); flags.insert(FontFlags::SMALL_CAP); - // Transform the widths. let widths = subsetted.widths.iter().map(|&x| size_to_glyph_unit(x)).collect(); Ok(PdfFont { diff --git a/src/font/loader.rs b/src/font/loader.rs index 37c52f09d..a7c3bf4ad 100644 --- a/src/font/loader.rs +++ b/src/font/loader.rs @@ -1,4 +1,4 @@ -//! Loads fonts matching queries. +//! Loading of fonts matching queries. use std::cell::{RefCell, Ref}; use std::collections::HashMap; @@ -12,7 +12,7 @@ pub struct FontLoader<'p> { /// The font providers. providers: Vec<&'p (dyn FontProvider + 'p)>, /// The fonts available from each provider (indexed like `providers`). - provider_fonts: Vec<&'p [FontInfo]>, + infos: Vec<&'p [FontInfo]>, /// The internal state. Uses interior mutability because the loader works behind /// an immutable reference to ease usage. state: RefCell>, @@ -20,29 +20,29 @@ pub struct FontLoader<'p> { /// Internal state of the font loader (seperated to wrap it in a `RefCell`). struct FontLoaderState<'p> { - /// The loaded fonts alongside their external indices. Some fonts may not have external indices - /// because they were loaded but did not contain the required character. However, these are - /// still stored because they may be needed later. The index is just set to `None` then. + /// The loaded fonts alongside their external indices. Some fonts may not + /// have external indices because they were loaded but did not contain the + /// required character. However, these are still stored because they may + /// be needed later. The index is just set to `None` then. fonts: Vec<(Option, Font)>, /// Allows to retrieve a font (index) quickly if a query was submitted before. query_cache: HashMap, /// Allows to re-retrieve loaded fonts by their info instead of loading them again. info_cache: HashMap<&'p FontInfo, usize>, - /// Indexed by external indices (the ones inside the tuples in the `fonts` vector) and maps to - /// internal indices (the actual indices into the vector). + /// Indexed by external indices (the ones inside the tuples in the `fonts` vector) + /// and maps to internal indices (the actual indices into the vector). inner_index: Vec, } impl<'p> FontLoader<'p> { /// Create a new font loader using a set of providers. - #[inline] pub fn new(providers: &'p [P]) -> FontLoader<'p> where P: AsRef { let providers: Vec<_> = providers.iter().map(|p| p.as_ref()).collect(); - let provider_fonts = providers.iter().map(|prov| prov.available()).collect(); + let infos = providers.iter().map(|prov| prov.available()).collect(); FontLoader { providers, - provider_fonts, + infos, state: RefCell::new(FontLoaderState { query_cache: HashMap::new(), info_cache: HashMap::new(), @@ -66,26 +66,24 @@ impl<'p> FontLoader<'p> { } drop(state); - // The outermost loop goes over the fallbacks because we want to serve the font that matches - // the first possible class. + // The outermost loop goes over the fallbacks because we want to serve the + // font that matches the first possible class. for class in &query.fallback { - // For each class now go over all font infos from all font providers. - for (provider, infos) in self.providers.iter().zip(&self.provider_fonts) { + // For each class now go over all fonts from all font providers. + for (provider, infos) in self.providers.iter().zip(&self.infos) { for info in infos.iter() { - let matches = info.classes.contains(class) - && query.classes.iter().all(|class| info.classes.contains(class)); + let viable = info.classes.contains(class); + let matches = viable && query.classes.iter() + .all(|class| info.classes.contains(class)); - // Proceed only if this font matches the query up to now. if matches { let mut state = self.state.borrow_mut(); - // Check if we have already loaded this font before, otherwise, we will load - // it from the provider. Anyway, have it stored and find out its internal - // index. + // Check if we have already loaded this font before, otherwise, + // we will load it from the provider. let index = if let Some(&index) = state.info_cache.get(info) { index } else if let Some(mut source) = provider.get(info) { - // Read the font program into a vector and parse it. let mut program = Vec::new(); source.read_to_end(&mut program).ok()?; let font = Font::new(program).ok()?; @@ -107,8 +105,8 @@ impl<'p> FontLoader<'p> { // This font is suitable, thus we cache the query result. state.query_cache.insert(query, index); - // Now we have to find out the external index of it or assign a new one - // if it has none. + // Now we have to find out the external index of it or assign + // a new one if it has none. let external_index = state.fonts[index].0.unwrap_or_else(|| { // We have to assign an external index before serving. let new_index = state.inner_index.len(); @@ -133,7 +131,8 @@ impl<'p> FontLoader<'p> { None } - /// Return the font previously loaded at this index. Panics if the index is not assigned. + /// Return the font previously loaded at this index. + /// Panics if the index is not assigned. #[inline] pub fn get_with_index(&self, index: usize) -> Ref { let state = self.state.borrow(); @@ -143,9 +142,9 @@ impl<'p> FontLoader<'p> { /// Move the whole list of fonts out. pub fn into_fonts(self) -> Vec { - // Sort the fonts by external index so that they are in the correct order. All fonts that - // were cached but not used by the outside are sorted to the back and are removed in the - // next step. + // Sort the fonts by external index so that they are in the correct order. + // All fonts that were cached but not used by the outside are sorted to the back + // and are removed in the next step. let mut fonts = self.state.into_inner().fonts; fonts.sort_by_key(|&(maybe_index, _)| match maybe_index { Some(index) => index, @@ -164,7 +163,7 @@ impl Debug for FontLoader<'_> { let state = self.state.borrow(); f.debug_struct("FontLoader") .field("providers", &self.providers.len()) - .field("provider_fonts", &self.provider_fonts) + .field("infos", &self.infos) .field("fonts", &state.fonts) .field("query_cache", &state.query_cache) .field("info_cache", &state.info_cache) @@ -180,7 +179,6 @@ pub struct FontQuery { pub character: char, /// Which classes the font has to be part of. pub classes: Vec, - /// A sequence of classes. The font matching the leftmost class in this sequence - /// should be returned. + /// The font matching the leftmost class in this sequence should be returned. pub fallback: Vec, } diff --git a/src/font/mod.rs b/src/font/mod.rs index 5d257a9a8..80d900dc6 100644 --- a/src/font/mod.rs +++ b/src/font/mod.rs @@ -1,7 +1,7 @@ -//! Font loading and transforming. +//! Font loading and subsetting. //! //! # Font handling -//! To do the typesetting, the typesetting engine needs font data. To be highly portable the engine +//! To do the typesetting, the engine needs font data. However, to be highly portable the engine //! itself assumes nothing about the environment. To still work with fonts, the consumer of this //! library has to add _font providers_ to their typesetting instance. These can be queried for font //! data given flexible font filters specifying required font families and styles. A font provider @@ -19,28 +19,29 @@ use opentype::{Error as OpentypeError, OpenTypeReader}; use opentype::tables::{Header, Name, CharMap, HorizontalMetrics, Post, OS2}; use opentype::types::{MacStyleFlags, NameEntry}; -pub use self::loader::{FontLoader, FontQuery}; use self::subset::Subsetter; use crate::size::Size; mod loader; mod subset; +pub use loader::{FontLoader, FontQuery}; -/// A loaded and parsed font program. + +/// A parsed _OpenType_ font program. #[derive(Debug, Clone)] pub struct Font { - /// The base name of the font. + /// The name of the font. pub name: String, - /// The raw bytes of the font program. + /// The complete, raw bytes of the font program. pub program: Vec, - /// A mapping from character codes to glyph ids. + /// The mapping from character codes to glyph ids. pub mapping: HashMap, /// The widths of the glyphs indexed by glyph id. pub widths: Vec, - /// The fallback glyph. + /// The id of the fallback glyph. pub default_glyph: u16, - /// The typesetting-relevant metrics of this font. + /// The typesetting or exporting-relevant metrics of this font. pub metrics: FontMetrics, } @@ -51,32 +52,31 @@ pub struct FontMetrics { pub italic: bool, /// Whether font is monospace. pub monospace: bool, - /// The angle of text in italics. + /// The angle of text in italics (in counter-clockwise degrees from vertical). pub italic_angle: f32, - /// The glyph bounding box: [x_min, y_min, x_max, y_max], + /// The extremal values [x_min, y_min, x_max, y_max] for all glyph bounding boxes. pub bounding_box: [Size; 4], - /// The typographics ascender. + /// The typographic ascender. pub ascender: Size, - /// The typographics descender. + /// The typographic descender. pub descender: Size, /// The approximate height of capital letters. pub cap_height: Size, - /// The weight class of the font. + /// The weight class of the font (from 100 for thin to 900 for heavy). pub weight_class: u16, } impl Font { - /// Create a new font from a raw font program. + /// Create a `Font` from a raw font program. pub fn new(program: Vec) -> FontResult { - // Create an OpentypeReader to parse the font tables. let cursor = Cursor::new(&program); let mut reader = OpenTypeReader::new(cursor); - // Read the relevant tables - // (all of these are required by the OpenType specification, so we expect them). + // All of these tables are required by the OpenType specification, + // so we do not really have to handle the case that they are missing. let head = reader.read_table::
()?; let name = reader.read_table::()?; - let os2 = reader.read_table::()?; + let os2 = reader.read_table::()?; let cmap = reader.read_table::()?; let hmtx = reader.read_table::()?; let post = reader.read_table::()?; @@ -85,15 +85,13 @@ impl Font { let font_unit_ratio = 1.0 / (head.units_per_em as f32); let font_unit_to_size = |x| Size::pt(font_unit_ratio * x); - // Find out the name of the font. - let font_name = name.get_decoded(NameEntry::PostScriptName) + let font_name = name + .get_decoded(NameEntry::PostScriptName) .unwrap_or_else(|| "unknown".to_owned()); - // Convert the widths from font units to sizes. let widths = hmtx.metrics.iter() .map(|m| font_unit_to_size(m.advance_width as f32)).collect(); - // Calculate the typesetting-relevant metrics. let metrics = FontMetrics { italic: head.mac_style.contains(MacStyleFlags::ITALIC), monospace: post.is_fixed_pitch, @@ -120,51 +118,82 @@ impl Font { }) } - /// Map a character to it's glyph index. + /// Encode a character into it's glyph id. #[inline] - pub fn map(&self, c: char) -> u16 { - self.mapping.get(&c).map(|&g| g).unwrap_or(self.default_glyph) + pub fn encode(&self, character: char) -> u16 { + self.mapping.get(&character).map(|&g| g).unwrap_or(self.default_glyph) } - /// Encode the given text for this font (into glyph ids). + /// Encode the given text into a vector of glyph ids. #[inline] - pub fn encode(&self, text: &str) -> Vec { - // Each glyph id takes two bytes that we encode in big endian. - let mut bytes = Vec::with_capacity(2 * text.len()); - for glyph in text.chars().map(|c| self.map(c)) { + pub fn encode_text(&self, text: &str) -> Vec { + const BYTES_PER_GLYPH: usize = 2; + let mut bytes = Vec::with_capacity(BYTES_PER_GLYPH * text.len()); + for c in text.chars() { + let glyph = self.encode(c); bytes.push((glyph >> 8) as u8); bytes.push((glyph & 0xff) as u8); } bytes } - /// Generate a subsetted version of this font including only the chars listed in `chars`. + /// Generate a subsetted version of this font. /// - /// The filter functions decides which tables to keep and which not based on their tag. + /// This version includes only the given `chars` and _OpenType_ `tables`. #[inline] pub fn subsetted(&self, chars: C, tables: I) -> Result - where C: IntoIterator, I: IntoIterator, S: AsRef { + where + C: IntoIterator, + I: IntoIterator, + S: AsRef + { Subsetter::subset(self, chars, tables) } } -/// Categorizes a font. +/// A type that provides fonts. +pub trait FontProvider { + /// Returns a font with the given info if this provider has one. + fn get(&self, info: &FontInfo) -> Option>; + + /// The available fonts this provider can serve. While these should generally + /// be retrievable through the `get` method, this is not guaranteed. + fn available<'p>(&'p self) -> &'p [FontInfo]; +} + +/// A wrapper trait around `Read + Seek`. /// -/// Can be constructed conveniently with the [`font`] macro. +/// This type is needed because currently you can't make a trait object with two traits, like +/// `Box`. Automatically implemented for all types that are [`Read`] and [`Seek`]. +pub trait FontData: Read + Seek {} +impl FontData for T where T: Read + Seek {} + +/// Classifies a font by listing the font classes it is part of. +/// +/// All fonts with the same [`FontInfo`] are part of the same intersection +/// of [font classes](FontClass). +/// +/// This structure can be constructed conveniently through the [`font`] macro. #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct FontInfo { - /// The font families this font is part of. + /// The font classes this font is part of. pub classes: Vec, } impl FontInfo { - /// Create a new font info from an iterator of classes. + /// Create a new font info from a collection of classes. + #[inline] pub fn new(classes: I) -> FontInfo where I: IntoIterator { - FontInfo { classes: classes.into_iter().collect() } + FontInfo { + classes: classes.into_iter().collect() + } } } /// A class of fonts. +/// +/// The set of all fonts can be classified into subsets of font classes like +/// _serif_ or _bold_. This enum lists such subclasses. #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub enum FontClass { Serif, @@ -183,27 +212,18 @@ pub enum FontClass { /// into custom `Family`-variants and others can be named directly. /// /// # Examples -/// The font _Noto Sans_ in regular typeface. /// ``` /// # use typeset::font; +/// // Noto Sans in regular typeface. /// font!["NotoSans", "Noto", Regular, SansSerif]; -/// ``` /// -/// The font _Noto Serif_ in italics and boldface. -/// ``` -/// # use typeset::font; +/// // Noto Serif in italics and boldface. /// font!["NotoSerif", "Noto", Bold, Italic, Serif]; -/// ``` /// -/// The font _Arial_ in italics. -/// ``` -/// # use typeset::font; +/// // Arial in italics. /// font!["Arial", Italic, SansSerif]; -/// ``` /// -/// The font _Noto Emoji_, which works with all base families. 🙂 -/// ``` -/// # use typeset::font; +/// // Noto Emoji, which works in sans-serif and serif contexts. /// font!["NotoEmoji", "Noto", Regular, SansSerif, Serif, Monospace]; /// ``` #[macro_export] @@ -229,37 +249,21 @@ macro_rules! font { }}; } -/// A type that provides fonts. -pub trait FontProvider { - /// Returns a font with the given info if this provider has one. - fn get(&self, info: &FontInfo) -> Option>; - - /// The available fonts this provider can serve. While these should generally be retrievable - /// through the `get` method, it does not have to be guaranteed that a font info, that is - /// contained, here yields a `Some` value when passed into `get`. - fn available<'p>(&'p self) -> &'p [FontInfo]; -} - -/// A wrapper trait around `Read + Seek`. -/// -/// This type is needed because currently you can't make a trait object with two traits, like -/// `Box`. Automatically implemented for all types that are [`Read`] and [`Seek`]. -pub trait FontData: Read + Seek {} -impl FontData for T where T: Read + Seek {} - /// A font provider serving fonts from a folder on the local file system. #[derive(Debug)] pub struct FileSystemFontProvider { - /// The root folder. + /// The base folder all other paths are relative to. base: PathBuf, /// Paths of the fonts relative to the `base` path. paths: Vec, - /// The information for the font with the same index in `paths`. + /// The info for the font with the same index in `paths`. infos: Vec, } impl FileSystemFontProvider { - /// Create a new provider from a folder and an iterator of pairs of font paths and font infos. + /// Create a new provider serving fonts from a base path. The `fonts` iterator + /// should contain paths of fonts relative to the base alongside matching + /// infos for these fonts. /// /// # Example /// Serve the two fonts `NotoSans-Regular` and `NotoSans-Italic` from the local folder @@ -271,21 +275,20 @@ impl FileSystemFontProvider { /// ("NotoSans-Italic.ttf", font!["NotoSans", Italic, SansSerif]), /// ]); /// ``` - #[inline] - pub fn new(base: B, infos: I) -> FileSystemFontProvider + pub fn new(base: B, fonts: I) -> FileSystemFontProvider where B: Into, I: IntoIterator, P: Into, { - // Find out how long the iterator is at least, to reserve the correct capacity for the - // vectors. - let iter = infos.into_iter(); - let min = iter.size_hint().0; + let iter = fonts.into_iter(); - // Split the iterator into two seperated vectors. + // Find out how long the iterator is at least, to reserve the correct + // capacity for the vectors. + let min = iter.size_hint().0; let mut paths = Vec::with_capacity(min); let mut infos = Vec::with_capacity(min); + for (path, info) in iter { paths.push(path.into()); infos.push(info); @@ -302,12 +305,10 @@ impl FileSystemFontProvider { impl FontProvider for FileSystemFontProvider { #[inline] fn get(&self, info: &FontInfo) -> Option> { - // Find the index of the font in both arrays (early exit if there is no match). - let index = self.infos.iter().position(|i| i == info)?; - - // Open the file and return a boxed reader operating on it. + let index = self.infos.iter().position(|c| c == info)?; let path = &self.paths[index]; - let file = File::open(self.base.join(path)).ok()?; + let full_path = self.base.join(path); + let file = File::open(full_path).ok()?; Some(Box::new(BufReader::new(file)) as Box) } @@ -317,13 +318,14 @@ impl FontProvider for FileSystemFontProvider { } } + /// The error type for font operations. pub enum FontError { /// The font file is incorrect. InvalidFont(String), /// A character requested for subsetting was not present in the source font. MissingCharacter(char), - /// A requested table was not present. + /// A requested or required table was not present. MissingTable(String), /// The table is unknown to the subsetting engine. UnsupportedTable(String), diff --git a/src/font/subset.rs b/src/font/subset.rs index 006c00e2a..840d9e6b4 100644 --- a/src/font/subset.rs +++ b/src/font/subset.rs @@ -30,13 +30,15 @@ pub struct Subsetter<'a> { impl<'a> Subsetter<'a> { /// Subset a font. See [`Font::subetted`] for more details. pub fn subset(font: &Font, chars: C, tables: I) -> Result - where C: IntoIterator, I: IntoIterator, S: AsRef { - // Parse some header information. + where + C: IntoIterator, + I: IntoIterator, + S: AsRef + { let mut reader = OpenTypeReader::from_slice(&font.program); + let outlines = reader.outlines()?; let table_records = reader.tables()?.to_vec(); - - // Store all chars we want in a vector. let chars: Vec<_> = chars.into_iter().collect(); let subsetter = Subsetter { @@ -64,7 +66,7 @@ impl<'a> Subsetter<'a> { // which glyphs are additionally used by composite glyphs. self.find_glyphs()?; - // Write all the tables the callee wants. + // Copy/subset all the tables the caller wants. for table in tables.into_iter() { let tag = table.as_ref().parse() .map_err(|_| FontError::UnsupportedTable(table.as_ref().to_string()))?; @@ -91,20 +93,19 @@ impl<'a> Subsetter<'a> { /// Store all glyphs the subset shall contain into `self.glyphs`. fn find_glyphs(&mut self) -> FontResult<()> { if self.outlines == Outlines::TrueType { - // Parse the necessary information. let char_map = self.read_table::()?; let glyf = self.read_table::()?; - // Add the default glyph at index 0 in any case. + // The default glyph should always be at index 0. self.glyphs.push(self.font.default_glyph); - // Add all the glyphs for the chars requested. for &c in &self.chars { let glyph = char_map.get(c).ok_or_else(|| FontError::MissingCharacter(c))?; self.glyphs.push(glyph); } - // Collect the composite glyphs. + // Collect the glyphs not used mapping from characters but used in + // composite glyphs, too. let mut i = 0; while i < self.glyphs.len() as u16 { let glyph_id = self.glyphs[i as usize]; @@ -115,6 +116,7 @@ impl<'a> Subsetter<'a> { self.glyphs.push(composite); } } + i += 1; } } else { @@ -127,13 +129,13 @@ impl<'a> Subsetter<'a> { /// Prepend the new header to the constructed body. fn write_header(&mut self) -> FontResult<()> { // Create an output buffer - let header_len = 12 + self.records.len() * 16; + const BASE_HEADER_LEN: usize = 12; + const TABLE_RECORD_LEN: usize = 16; + let header_len = BASE_HEADER_LEN + self.records.len() * TABLE_RECORD_LEN; let mut header = Vec::with_capacity(header_len); - // Compute the first four header entries. let num_tables = self.records.len() as u16; - // The highester power lower than the table count. let mut max_power = 1u16; while max_power * 2 <= num_tables { max_power *= 2; @@ -144,7 +146,7 @@ impl<'a> Subsetter<'a> { let entry_selector = (max_power as f32).log2() as u16; let range_shift = num_tables * 16 - search_range; - // Write the base header + // Write the base OpenType header header.write_u32::(match self.outlines { Outlines::TrueType => 0x00010000, Outlines::CFF => 0x4f54544f, @@ -169,7 +171,7 @@ impl<'a> Subsetter<'a> { Ok(()) } - /// Compute the new widths. + /// Compute the new subsetted widths vector. fn compute_widths(&self) -> FontResult> { let mut widths = Vec::with_capacity(self.glyphs.len()); for &glyph in &self.glyphs { @@ -180,11 +182,12 @@ impl<'a> Subsetter<'a> { Ok(widths) } - /// Compute the new mapping. + /// Compute the new character to glyph id mapping. fn compute_mapping(&self) -> HashMap { - // The mapping is basically just the index in the char vector, but we add one + // The mapping is basically just the index into the char vector, but we add one // to each index here because we added the default glyph to the front. - self.chars.iter().enumerate().map(|(i, &c)| (c, 1 + i as u16)) + self.chars.iter().enumerate() + .map(|(i, &c)| (c, 1 + i as u16)) .collect::>() } @@ -192,13 +195,14 @@ impl<'a> Subsetter<'a> { fn subset_table(&mut self, tag: Tag) -> FontResult<()> { match tag.value() { // These tables can just be copied. - b"head" | b"name" | b"OS/2" | b"post" | + b"head" | b"name" | b"OS/2" | b"cvt " | b"fpgm" | b"prep" | b"gasp" => self.copy_table(tag), // These tables have more complex subsetting routines. b"hhea" => self.subset_hhea(), b"hmtx" => self.subset_hmtx(), b"maxp" => self.subset_maxp(), + b"post" => self.subset_post(), b"cmap" => self.subset_cmap(), b"glyf" => self.subset_glyf(), b"loca" => self.subset_loca(), @@ -253,11 +257,21 @@ impl<'a> Subsetter<'a> { }) } - /// Subset the `cmap` table by + /// Subset the `post` table by removing all name information. + fn subset_post(&mut self) -> FontResult<()> { + let tag = "post".parse().unwrap(); + let post = self.read_table_data(tag)?; + self.write_table_body(tag, |this| { + this.body.write_u32::(0x00030000)?; + Ok(this.body.extend(&post[4..32])) + }) + } + + /// Subset the `cmap` table by only including the selected characters. + /// Always uses format 12 for simplicity. fn subset_cmap(&mut self) -> FontResult<()> { let tag = "cmap".parse().unwrap(); - // Always uses format 12 for simplicity. self.write_table_body(tag, |this| { let mut groups = Vec::new(); @@ -281,7 +295,7 @@ impl<'a> Subsetter<'a> { this.body.write_u16::(0)?; this.body.write_u16::(1)?; this.body.write_u16::(3)?; - this.body.write_u16::(1)?; + this.body.write_u16::(10)?; this.body.write_u32::(12)?; // Write the subtable header. @@ -319,27 +333,23 @@ impl<'a> Subsetter<'a> { continue; } - // Extract the glyph data. let mut glyph_data = glyf.get(start as usize .. end as usize) .take_invalid("missing glyph data")?.to_vec(); - - // Construct a cursor to operate on the data. let mut cursor = Cursor::new(&mut glyph_data); - let num_contours = cursor.read_i16::()?; // This is a composite glyph + let num_contours = cursor.read_i16::()?; if num_contours < 0 { cursor.seek(SeekFrom::Current(8))?; loop { let flags = cursor.read_u16::()?; - // Read the old glyph index. - let glyph_index = cursor.read_u16::()?; + let old_glyph_index = cursor.read_u16::()?; // Compute the new glyph index by searching for it's index // in the glyph vector. let new_glyph_index = this.glyphs.iter() - .position(|&g| g == glyph_index) + .position(|&g| g == old_glyph_index) .take_invalid("invalid composite glyph")? as u16; // Overwrite the old index with the new one. @@ -386,7 +396,14 @@ impl<'a> Subsetter<'a> { let len = loca.length(glyph).take_invalid("missing loca entry")?; offset += len; } - this.body.write_u32::(offset)?; + + // Write the final offset (so that it is known how long the last glyph is). + if format == 0 { + this.body.write_u16::((offset / 2) as u16)?; + } else { + this.body.write_u32::(offset)?; + } + Ok(()) }) } @@ -399,7 +416,7 @@ impl<'a> Subsetter<'a> { writer(self)?; let end = self.body.len(); - // Pad with zeroes. + // Pad with zeros. while (self.body.len() - start) % 4 != 0 { self.body.push(0); } @@ -412,6 +429,11 @@ impl<'a> Subsetter<'a> { })) } + /// Whether this font contains a given table. + fn contains_table(&self, tag: Tag) -> bool { + self.tables.binary_search_by_key(&tag, |r| r.tag).is_ok() + } + /// Read a table with the opentype reader. fn read_table(&mut self) -> FontResult { self.reader.read_table::().map_err(Into::into) @@ -428,15 +450,10 @@ impl<'a> Subsetter<'a> { .get(record.offset as usize .. (record.offset + record.length) as usize) .take_invalid("missing table data") } - - /// Whether this font contains a given table. - fn contains_table(&self, tag: Tag) -> bool { - self.tables.binary_search_by_key(&tag, |r| r.tag).is_ok() - } } -/// Calculate a checksum over the sliced data as sum of u32's. The data length has to be a multiple -/// of four. +/// Calculate a checksum over the sliced data as sum of u32's. The data +/// length has to be a multiple of four. fn calculate_check_sum(data: &[u8]) -> u32 { let mut sum = 0u32; data.chunks_exact(4).for_each(|c| { @@ -452,7 +469,8 @@ fn calculate_check_sum(data: &[u8]) -> u32 { /// Helper trait to create subsetting errors more easily. trait TakeInvalid: Sized { - /// Pull the type out of the option, returning an invalid font error if self was not valid. + /// Pull the type out of self, returning an invalid font + /// error if self was not valid. fn take_invalid>(self, message: S) -> FontResult; } @@ -465,19 +483,80 @@ impl TakeInvalid for Option { #[cfg(test)] mod tests { + use std::fs; use crate::font::Font; + use opentype::{OpenTypeReader, TableRecord}; + use opentype::tables::{CharMap, Locations}; - #[test] - fn subset() { - let program = std::fs::read("../fonts/SourceSansPro-Regular.ttf").unwrap(); + const ALPHABET: &str = "abcdefghijklmnopqrstuvwxyz"; + + /// Stores some tables for inspections. + struct Tables<'a> { + cmap: CharMap, + loca: Locations, + glyf_data: &'a [u8], + } + + impl<'a> Tables<'a> { + /// Load the tables from the font. + fn new(font: &'a Font) -> Tables<'a> { + let mut reader = OpenTypeReader::from_slice(&font.program); + + let cmap = reader.read_table::().unwrap(); + let loca = reader.read_table::().unwrap(); + + let &TableRecord { offset, length, .. } = reader.get_table_record("glyf").unwrap(); + let glyf_data = &font.program[offset as usize .. (offset + length) as usize]; + + Tables { cmap, loca, glyf_data } + } + + /// Return the glyph data for the given character. + fn glyph_data(&self, character: char) -> Option<&'a [u8]> { + let glyph = self.cmap.get(character)?; + let start = self.loca.offset(glyph)?; + let end = self.loca.offset(glyph + 1)?; + Some(&self.glyf_data[start as usize .. end as usize]) + } + } + + /// Return the original and subsetted version of a font with the characters + /// included that are given as the chars of the string. + fn subset(font: &str, chars: &str) -> (Font, Font) { + let program = fs::read(format!("../fonts/{}", font)).unwrap(); let font = Font::new(program).unwrap(); let subsetted = font.subsetted( - "abcdefghijklmnopqrstuvwxyz‼".chars(), + chars.chars(), &["name", "OS/2", "post", "head", "hhea", "hmtx", "maxp", "cmap", - "cvt ", "fpgm", "prep", "loca", "glyf"][..] + "cvt ", "fpgm", "prep", "gasp", "loca", "glyf"][..] ).unwrap(); - std::fs::write("../target/SourceSansPro-Subsetted.ttf", &subsetted.program).unwrap(); + (font, subsetted) + } + + /// A test that creates a subsetted fonts in the `target` directory + /// for manual inspection. + #[test] + fn manual_files() { + let subsetted = subset("SourceSansPro-Regular.ttf", ALPHABET).1; + fs::write("../target/SourceSansPro-Subsetted.ttf", &subsetted.program).unwrap(); + + let subsetted = subset("NotoSans-Regular.ttf", ALPHABET).1; + fs::write("../target/NotoSans-Subsetted.ttf", &subsetted.program).unwrap(); + } + + /// Tests whether the glyph data for specific glyphs match in the original + /// and subsetted version. + #[test] + fn glyph_data() { + let (font, subsetted) = subset("SourceSansPro-Regular.ttf", ALPHABET); + let font_tables = Tables::new(&font); + let subset_tables = Tables::new(&subsetted); + + // Go through all characters but skip the composite glyphs. + for c in ALPHABET.chars().filter(|&x| x != 'i' && x != 'j') { + assert_eq!(font_tables.glyph_data(c), subset_tables.glyph_data(c)); + } } } diff --git a/src/layout/text.rs b/src/layout/text.rs index 75a592542..6aa5ef719 100644 --- a/src/layout/text.rs +++ b/src/layout/text.rs @@ -32,7 +32,7 @@ pub fn layout(text: &str, ctx: TextContext) -> LayoutResult { }).ok_or_else(|| LayoutError::NoSuitableFont(character))?; // Add the char width to the total box width. - let char_width = font.widths[font.map(character) as usize] * ctx.style.font_size; + let char_width = font.widths[font.encode(character) as usize] * ctx.style.font_size; width += char_width; // Change the font if necessary. diff --git a/src/lib.rs b/src/lib.rs index d12e167a6..ed1e079ed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,10 +30,9 @@ //! ("CMU-Serif-Italic.ttf", font!["Computer Modern", Italic, Serif]), //! ("NotoEmoji-Regular.ttf", font!["Noto", Regular, Serif, SansSerif, Monospace]), //! ])); -//! // Typeset the source code into a document. -//! let document = typesetter.typeset(src).unwrap(); //! -//! // Export the document into a PDF file. +//! // Typeset the document and export it into a PDF file. +//! let document = typesetter.typeset(src).unwrap(); //! # /* //! let file = File::create("hello-typeset.pdf").unwrap(); //! # */