diff --git a/src/bin/main.rs b/src/bin/main.rs index 9e1403b2b..6ca1ba56e 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -1,16 +1,25 @@ use std::env; -use std::fs::File; use std::error::Error; -use std::process; +use std::fs::File; use std::io::Read; use std::path::{Path, PathBuf}; +use std::process; use typeset::Typesetter; use typeset::{font::FileSystemFontProvider, font_info}; use typeset::export::pdf::PdfExporter; +fn main() { + if let Err(err) = run() { + eprintln!("error: {}", err); + process::exit(1); + } +} + +/// The actual main function. fn run() -> Result<(), Box> { + // Check the command line arguments. let args: Vec = env::args().collect(); if args.len() < 2 || args.len() > 3 { help_and_quit(); @@ -19,9 +28,10 @@ fn run() -> Result<(), Box> { // Open the input file. let mut file = File::open(&args[1]).map_err(|_| "failed to open source file")?; - // The output file name. - let output_filename = if args.len() <= 2 { - let source_path = Path::new(&args[1]); + let source_path = Path::new(&args[1]); + + // Compute the output filename from the input filename by replacing the extension. + let dest_path = if args.len() <= 2 { let stem = source_path.file_stem().ok_or_else(|| "missing destation file name")?; let base = source_path.parent().ok_or_else(|| "missing destation folder")?; base.join(format!("{}.pdf", stem.to_string_lossy())) @@ -29,6 +39,11 @@ fn run() -> Result<(), Box> { PathBuf::from(&args[2]) }; + // We do not want to overwrite the source file. + if dest_path == source_path { + return Err("source and destination path are the same".into()); + } + // Read the input file. let mut src = String::new(); file.read_to_string(&mut src).map_err(|_| "failed to read from source file")?; @@ -50,22 +65,15 @@ fn run() -> Result<(), Box> { // Export the document into a PDF file. let exporter = PdfExporter::new(); - let output_file = File::create(&output_filename)?; + let output_file = File::create(&dest_path)?; exporter.export(&document, output_file)?; Ok(()) } -fn main() { - if let Err(err) = run() { - eprintln!("error: {}", err); - process::exit(1); - } -} - /// Print a usage message and quit. fn help_and_quit() { - let name = env::args().next().unwrap_or("help".to_string()); - println!("usage: {} []", name); + let name = env::args().next().unwrap_or("typeset".to_string()); + println!("usage: {} source [destination]", name); process::exit(0); } diff --git a/src/doc.rs b/src/doc.rs index 51583bdc8..4f87a7f2a 100644 --- a/src/doc.rs +++ b/src/doc.rs @@ -9,35 +9,28 @@ use crate::layout::Size; pub struct Document { /// The pages of the document. pub pages: Vec, - /// The fonts used in the document. + /// The fonts used (the page contents index into this). pub fonts: Vec, } -/// A page with text contents in a document. +/// A page of a document. #[derive(Debug, Clone)] pub struct Page { /// The width of the page. pub width: Size, /// The height of the page. pub height: Size, - /// Text content on the page. - pub text: Vec, + /// Text actions specifying how to draw text content on the page. + pub actions: Vec, } -/// A series of text command, that can be written on to a page. +/// A series of text layouting actions. #[derive(Debug, Clone)] -pub struct Text { - /// The text commands. - pub commands: Vec, -} - -/// Different commands for rendering text. -#[derive(Debug, Clone)] -pub enum TextCommand { - /// Writing of the text. - Text(String), - /// Moves from the *start* of the current line by an (x,y) offset. - Move(Size, Size), - /// Use the indexed font in the documents font list with a given font size. +pub enum TextAction { + /// Move from the _start_ of the current line by an (x, y) offset. + MoveNewline(Size, Size), + /// Write text starting at the current position. + WriteText(String), + /// Set the font by index and font size. SetFont(usize, f32), } diff --git a/src/error.rs b/src/error.rs index 442971d0a..514eb1a89 100644 --- a/src/error.rs +++ b/src/error.rs @@ -2,13 +2,19 @@ /// Create an error type. macro_rules! error_type { - ( + ( // The variable used instead of self in functions + // followed by the error type things are happening on. $var:ident: $err:ident, + // Optionally the name of a result type to generate. $(res: $res:ident,)* + // A `Display` and `Debug` implementation. show: $f:ident => $show:expr, + // Optionally a `source` function for the `std::error::Error` trait. $(source: $source:expr,)* + // Any number of `From` implementations. $(from: ($from:path, $conv:expr),)* ) => { + // Possibly create a result type. $(type $res = std::result::Result;)* impl std::fmt::Display for $err { @@ -25,12 +31,14 @@ macro_rules! error_type { } impl std::error::Error for $err { + // The source method is only generated if an implementation was given. $(fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { let $var = self; $source })* } + // Create any number of from implementations. $(impl From<$from> for $err { fn from($var: $from) -> $err { $conv diff --git a/src/export/pdf.rs b/src/export/pdf.rs index 55e00abbb..87b9d52c8 100644 --- a/src/export/pdf.rs +++ b/src/export/pdf.rs @@ -8,12 +8,13 @@ use pdf::doc::{Catalog, PageTree, Page, Resource, Text}; use pdf::font::{Type0Font, CIDFont, CIDFontType, CIDSystemInfo, FontDescriptor, FontFlags}; use pdf::font::{GlyphUnit, CMap, CMapEncoding, WidthRecord, FontStream}; -use crate::doc::{Document, Text as DocText, TextCommand}; +use crate::doc::{Document, TextAction}; use crate::font::{Font, FontError}; use crate::layout::Size; /// Exports documents into _PDFs_. +#[derive(Debug)] pub struct PdfExporter {} impl PdfExporter { @@ -32,6 +33,7 @@ impl PdfExporter { } /// Writes documents in the _PDF_ format. +#[derive(Debug)] struct PdfEngine<'d, W: Write> { writer: PdfWriter, doc: &'d Document, @@ -40,7 +42,7 @@ struct PdfEngine<'d, W: Write> { } /// Offsets for the various groups of ids. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Debug, Copy, Clone)] struct Offsets { catalog: Ref, page_tree: Ref, @@ -50,14 +52,13 @@ struct Offsets { } impl<'d, W: Write> PdfEngine<'d, W> { - /// Create a new _PDF_ Creator. + /// Create a new _PDF_ engine. fn new(doc: &'d Document, target: W) -> PdfResult> { // Calculate a unique id for all objects that will be written. let catalog = 1; let page_tree = catalog + 1; let pages = (page_tree + 1, page_tree + doc.pages.len() as Ref); - let content_count = doc.pages.iter().flat_map(|p| p.text.iter()).count() as Ref; - let contents = (pages.1 + 1, pages.1 + content_count); + let contents = (pages.1 + 1, pages.1 + doc.pages.len() as Ref); let fonts = (contents.1 + 1, contents.1 + 5 * doc.fonts.len() as Ref); let offsets = Offsets { catalog, page_tree, pages, contents, fonts }; @@ -66,13 +67,13 @@ impl<'d, W: Write> PdfEngine<'d, W> { let mut font = 0usize; let mut chars = vec![HashSet::new(); doc.fonts.len()]; - // Iterate through every text object on every page and find out - // which characters they use. - for text in doc.pages.iter().flat_map(|page| page.text.iter()) { - for command in &text.commands { - match command { - TextCommand::Text(string) => chars[font].extend(string.chars()), - TextCommand::SetFont(id, _) => font = *id, + // Iterate through every text object on every page and find out which characters they + // use. + for page in &doc.pages { + for action in &page.actions { + match action { + TextAction::WriteText(string) => chars[font].extend(string.chars()), + TextAction::SetFont(id, _) => font = *id, _ => {}, } } @@ -94,7 +95,6 @@ impl<'d, W: Write> PdfEngine<'d, W> { /// Write the complete document. fn write(&mut self) -> PdfResult { - // Write all the things! self.writer.write_header(&Version::new(1, 7))?; self.write_pages()?; self.write_contents()?; @@ -106,20 +106,20 @@ impl<'d, W: Write> PdfEngine<'d, W> { /// Write the document catalog and page tree. fn write_pages(&mut self) -> PdfResult<()> { - // The document catalog. + // The document catalog self.writer.write_obj(self.offsets.catalog, &Catalog::new(self.offsets.page_tree))?; - // The font resources. + // The font resources let fonts = (0 .. self.fonts.len()) .map(|i| Resource::Font((i + 1) as u32, self.offsets.fonts.0 + 5 * i as u32)); - // The root page tree. + // The root page tree self.writer.write_obj(self.offsets.page_tree, PageTree::new() .kids(ids(self.offsets.pages)) .resources(fonts) )?; - // The page objects. + // The page objects for (id, page) in ids(self.offsets.pages).zip(&self.doc.pages) { self.writer.write_obj(id, Page::new(self.offsets.page_tree) .media_box(Rect::new(0.0, 0.0, page.width.to_points(), page.height.to_points())) @@ -132,26 +132,22 @@ impl<'d, W: Write> PdfEngine<'d, W> { /// Write the contents of all pages. fn write_contents(&mut self) -> PdfResult<()> { - let mut id = self.offsets.contents.0; - for page in &self.doc.pages { - for text in &page.text { - self.write_text(id, text)?; - id += 1; - } + for (id, page) in ids(self.offsets.contents).zip(&self.doc.pages) { + self.write_text_actions(id, &page.actions)?; } Ok(()) } - /// Write one text object. - fn write_text(&mut self, id: u32, doc_text: &DocText) -> PdfResult<()> { + /// Write a series of text actions. + fn write_text_actions(&mut self, id: u32, actions: &[TextAction]) -> PdfResult<()> { let mut font = 0; let mut text = Text::new(); - for command in &doc_text.commands { - match command { - TextCommand::Text(string) => { text.tj(self.fonts[font].encode(&string)); }, - TextCommand::Move(x, y) => { text.td(x.to_points(), y.to_points()); }, - TextCommand::SetFont(id, size) => { + for action in actions { + match action { + TextAction::MoveNewline(x, y) => { text.td(x.to_points(), y.to_points()); }, + TextAction::WriteText(string) => { text.tj(self.fonts[font].encode(&string)); }, + TextAction::SetFont(id, size) => { font = *id; text.tf(*id as u32 + 1, *size); }, @@ -220,7 +216,7 @@ impl<'d, W: Write> PdfEngine<'d, W> { } } -/// Create an iterator from reference pair. +/// Create an iterator from a reference pair. fn ids((start, end): (Ref, Ref)) -> impl Iterator { start ..= end } @@ -240,30 +236,30 @@ struct PdfFont { } impl PdfFont { - /// Create a subetted version of the font and calculate some information - /// needed for creating the _PDF_. + /// Create a subetted version of the font and calculate some information needed for creating the + /// _PDF_. fn new(font: &Font, chars: &HashSet) -> PdfResult { /// Convert a size into a _PDF_ glyph unit. fn size_to_glyph_unit(size: Size) -> GlyphUnit { (1000.0 * size.to_points()).round() as GlyphUnit } - // Subset the font using the selected characters + // Subset the font using the selected characters. let subsetted = font.subsetted( chars.iter().cloned(), &["head", "hhea", "maxp", "hmtx", "loca", "glyf"][..], &["cvt ", "prep", "fpgm", /* "OS/2", "cmap", "name", "post" */][..], )?; - // Specify flags for the font + // Specify flags for the font. let mut flags = FontFlags::empty(); - flags.set(FontFlags::FIXED_PITCH, font.metrics.is_fixed_pitch); + flags.set(FontFlags::FIXED_PITCH, font.metrics.monospace); flags.set(FontFlags::SERIF, font.name.contains("Serif")); flags.insert(FontFlags::SYMBOLIC); - flags.set(FontFlags::ITALIC, font.metrics.is_italic); + flags.set(FontFlags::ITALIC, font.metrics.italic); flags.insert(FontFlags::SMALL_CAP); - // Transform the widths + // Transform the widths. let widths = subsetted.widths.iter().map(|&x| size_to_glyph_unit(x)).collect(); Ok(PdfFont { diff --git a/src/font.rs b/src/font.rs index 173312343..1a31c6811 100644 --- a/src/font.rs +++ b/src/font.rs @@ -25,7 +25,7 @@ use opentype::global::{MacStyleFlags, NameEntry}; use crate::layout::Size; -/// A loaded font, containing relevant information for typesetting. +/// A loaded and parsed font program. #[derive(Debug, Clone)] pub struct Font { /// The base name of the font. @@ -38,19 +38,19 @@ pub struct Font { pub widths: Vec, /// The fallback glyph. pub default_glyph: u16, - /// The relevant metrics of this font. + /// The typesetting-relevant metrics of this font. pub metrics: FontMetrics, } impl Font { - /// Create a new font from a font program. + /// Create a new font from a raw font program. pub fn new(program: Vec) -> FontResult { - // Create opentype reader to parse font tables + // Create an OpentypeReader to parse the font tables. let cursor = Cursor::new(&program); let mut reader = OpenTypeReader::new(cursor); // Read the relevant tables - // (all of these are required by the OpenType specification) + // (all of these are required by the OpenType specification, so we expect them). let head = reader.read_table::
()?; let name = reader.read_table::()?; let os2 = reader.read_table::()?; @@ -58,21 +58,21 @@ impl Font { let hmtx = reader.read_table::()?; let post = reader.read_table::()?; - // Create conversion function between font units and sizes + // Create a conversion function between font units and sizes. let font_unit_ratio = 1.0 / (head.units_per_em as f32); let font_unit_to_size = |x| Size::from_points(font_unit_ratio * x as f32); - // Find out the name of the font + // Find out the name of the font. let font_name = name.get_decoded(NameEntry::PostScriptName) .unwrap_or_else(|| "unknown".to_owned()); - // Convert the widths + // Convert the widths from font units to sizes. let widths = hmtx.metrics.iter().map(|m| font_unit_to_size(m.advance_width)).collect(); - // Calculate some metrics + // Calculate the typesetting-relevant metrics. let metrics = FontMetrics { - is_italic: head.mac_style.contains(MacStyleFlags::ITALIC), - is_fixed_pitch: post.is_fixed_pitch, + italic: head.mac_style.contains(MacStyleFlags::ITALIC), + monospace: post.is_fixed_pitch, italic_angle: post.italic_angle.to_f32(), bounding_box: [ font_unit_to_size(head.x_min), @@ -105,6 +105,7 @@ impl Font { /// Encode the given text for this font (into glyph ids). #[inline] pub fn encode(&self, text: &str) -> Vec { + // Each glyph id takes two bytes that we encode in big endian. let mut bytes = Vec::with_capacity(2 * text.len()); for glyph in text.chars().map(|c| self.map(c)) { bytes.push((glyph >> 8) as u8); @@ -113,62 +114,37 @@ impl Font { bytes } - /// Generate a subsetted version of this font including only the chars listed in - /// `chars`. + /// Generate a subsetted version of this font including only the chars listed in `chars`. /// - /// All needed tables will be included (returning an error if a table was not present - /// in the source font) and optional tables will be included if there were present - /// in the source font. All other tables will be dropped. - pub fn subsetted( - &self, - chars: C, - needed_tables: I, - optional_tables: I, - ) -> Result + /// All needed tables will be included (returning an error if a table was not present in the + /// source font) and optional tables will be included if they were present in the source font. + /// All other tables will be dropped. + #[inline] + pub fn subsetted(&self, chars: C, needed_tables: I, optional_tables: I) + -> Result where C: IntoIterator, I: IntoIterator, - S: AsRef, + S: AsRef { - let mut chars: Vec = chars.into_iter().collect(); - chars.sort(); - - let mut reader = OpenTypeReader::from_slice(&self.program); - let outlines = reader.outlines()?; - let tables = reader.tables()?.to_vec(); - - let subsetter = Subsetter { - font: &self, - reader, - outlines, - tables, - cmap: None, - hmtx: None, - loca: None, - glyphs: Vec::with_capacity(1 + chars.len()), - chars, - records: vec![], - body: vec![], - }; - - subsetter.subset(needed_tables, optional_tables) + Subsetter::subset(self, chars, needed_tables, optional_tables) } } -/// Font metrics relevant to the typesetting engine. +/// Font metrics relevant to the typesetting or exporting processes. #[derive(Debug, Copy, Clone)] pub struct FontMetrics { /// Whether the font is italic. - pub is_italic: bool, - /// Whether font is fixed pitch. - pub is_fixed_pitch: bool, - /// The angle of italics. + pub italic: bool, + /// Whether font is monospace. + pub monospace: bool, + /// The angle of text in italics. pub italic_angle: f32, /// The glyph bounding box: [x_min, y_min, x_max, y_max], pub bounding_box: [Size; 4], - /// The typographics ascender relevant for line spacing. + /// The typographics ascender. pub ascender: Size, - /// The typographics descender relevant for line spacing. + /// The typographics descender. pub descender: Size, /// The approximate height of capital letters. pub cap_height: Size, @@ -176,25 +152,34 @@ pub struct FontMetrics { pub weight_class: u16, } -/// Describes a font. +/// Categorizes a font. /// /// Can be constructed conveniently with the [`font_info`] macro. #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct FontInfo { /// The font families this font is part of. pub families: Vec, - /// Whether the font is in italics. + /// Whether the font is italic. pub italic: bool, - /// Whether the font is bold. + /// Whether the font bold. pub bold: bool, } +/// A family of fonts. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub enum FontFamily { + Serif, + SansSerif, + Monospace, + /// A custom class like _Arial_ or _Times_. + Named(String), +} + /// A macro to create [FontInfos](crate::font::FontInfo) easily. /// -/// Accepts first a bracketed (ordered) list of font families. Allowed are string expressions -/// as well as the three base families `SansSerif`, `Serif` and `Monospace`. -/// -/// Then there may follow (separated by commas) the keywords `italic` and/or `bold`. +/// Accepts first a bracketed, ordered list of font families. Allowed are string expressions as well +/// as the three base families `SansSerif`, `Serif` and `Monospace`. Then there may follow +/// (separated by commas) the keywords `italic` and/or `bold`. /// /// # Examples /// The font _Noto Sans_ in regular typeface. @@ -259,48 +244,43 @@ macro_rules! font_info { (@__gen Monospace) => { $crate::font::FontFamily::Monospace }; } -/// A family of fonts (either generic or named). -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub enum FontFamily { - SansSerif, - Serif, - Monospace, - Named(String), -} +//------------------------------------------------------------------------------------------------// /// A type that provides fonts. pub trait FontProvider { - /// Returns the font with the given info if this provider has it. + /// Returns a font with the given info if this provider has one. fn get(&self, info: &FontInfo) -> Option>; /// The available fonts this provider can serve. While these should generally be retrievable - /// through the `get` method, it is not guaranteed that a font info that is contained here - /// yields a `Some` value when passed into `get`. - fn available<'a>(&'a self) -> &'a [FontInfo]; + /// through the `get` method, it does not have to be guaranteed that a font info, that is + /// contained, here yields a `Some` value when passed into `get`. + fn available<'p>(&'p self) -> &'p [FontInfo]; } /// A wrapper trait around `Read + Seek`. /// -/// This type is needed because currently you can't make a trait object -/// with two traits, like `Box`. -/// Automatically implemented for all types that are [`Read`] and [`Seek`]. +/// This type is needed because currently you can't make a trait object with two traits, like +/// `Box`. Automatically implemented for all types that are [`Read`] and [`Seek`]. pub trait FontData: Read + Seek {} impl FontData for T where T: Read + Seek {} /// A font provider serving fonts from a folder on the local file system. +#[derive(Debug)] pub struct FileSystemFontProvider { + /// The root folder. base: PathBuf, + /// Paths of the fonts relative to the `base` path. paths: Vec, + /// The information for the font with the same index in `paths`. infos: Vec, } impl FileSystemFontProvider { - /// Create a new provider from a folder and an iterator of pairs of - /// font paths and font infos. + /// Create a new provider from a folder and an iterator of pairs of font paths and font infos. /// /// # Example - /// Serve the two fonts `NotoSans-Regular` and `NotoSans-Italic` from the local - /// folder `../fonts`. + /// Serve the two fonts `NotoSans-Regular` and `NotoSans-Italic` from the local folder + /// `../fonts`. /// ``` /// # use typeset::{font::FileSystemFontProvider, font_info}; /// FileSystemFontProvider::new("../fonts", vec![ @@ -315,8 +295,8 @@ impl FileSystemFontProvider { I: IntoIterator, P: Into, { - // Find out how long the iterator is at least, to reserve the correct - // capacity for the vectors. + // Find out how long the iterator is at least, to reserve the correct capacity for the + // vectors. let iter = infos.into_iter(); let min = iter.size_hint().0; @@ -339,42 +319,52 @@ impl FileSystemFontProvider { impl FontProvider for FileSystemFontProvider { #[inline] fn get(&self, info: &FontInfo) -> Option> { + // Find the index of the font in both arrays (early exit if there is no match). let index = self.infos.iter().position(|i| i == info)?; + + // Open the file and return a boxed reader operating on it. let path = &self.paths[index]; let file = File::open(self.base.join(path)).ok()?; Some(Box::new(BufReader::new(file)) as Box) } #[inline] - fn available<'a>(&'a self) -> &'a [FontInfo] { + fn available<'p>(&'p self) -> &'p [FontInfo] { &self.infos } } -/// Serves matching fonts given a query. +//------------------------------------------------------------------------------------------------// + +/// Serves fonts matching queries. pub struct FontLoader<'p> { /// The font providers. providers: Vec<&'p (dyn FontProvider + 'p)>, - /// All available fonts indexed by provider. + /// The fonts available from each provider (indexed like `providers`). provider_fonts: Vec<&'p [FontInfo]>, - /// The internal state. + /// The internal state. Uses interior mutability because the loader works behind + /// an immutable reference to ease usage. state: RefCell>, } -/// Internal state of the font loader (wrapped in a RefCell). +/// Internal state of the font loader (seperated to wrap it in a `RefCell`). struct FontLoaderState<'p> { - /// The loaded fonts along with their external indices. + /// The loaded fonts alongside their external indices. Some fonts may not have external indices + /// because they were loaded but did not contain the required character. However, these are + /// still stored because they may be needed later. The index is just set to `None` then. fonts: Vec<(Option, Font)>, - /// Allows to retrieve cached results for queries. - query_cache: HashMap, usize>, - /// Allows to lookup fonts by their infos. + /// Allows to retrieve a font (index) quickly if a query was submitted before. + query_cache: HashMap, + /// Allows to re-retrieve loaded fonts by their info instead of loading them again. info_cache: HashMap<&'p FontInfo, usize>, - /// Indexed by outside and indices maps to internal indices. + /// Indexed by external indices (the ones inside the tuples in the `fonts` vector) and maps to + /// internal indices (the actual indices into the vector). inner_index: Vec, } impl<'p> FontLoader<'p> { - /// Create a new font loader. + /// Create a new font loader using a set of providers. + #[inline] pub fn new(providers: &'p [P]) -> FontLoader<'p> where P: AsRef { let providers: Vec<_> = providers.iter().map(|p| p.as_ref()).collect(); let provider_fonts = providers.iter().map(|prov| prov.available()).collect(); @@ -391,12 +381,13 @@ impl<'p> FontLoader<'p> { } } - /// Return the best matching font and it's index (if there is any) given the query. - pub fn get(&self, query: FontQuery<'p>) -> Option<(usize, Ref)> { - // Check if we had the exact same query before. + /// Returns the font (and its index) best matching the query, if there is any. + pub fn get(&self, query: FontQuery) -> Option<(usize, Ref)> { + // Load results from the cache, if we had the exact same query before. let state = self.state.borrow(); if let Some(&index) = state.query_cache.get(&query) { - // That this is the query cache means it must has an index as we've served it before. + // The font must have an external index already because it is in the query cache. + // It has been served before. let extern_index = state.fonts[index].0.unwrap(); let font = Ref::map(state, |s| &s.fonts[index].1); @@ -404,94 +395,99 @@ impl<'p> FontLoader<'p> { } drop(state); - // Go over all font infos from all font providers that match the query. - for family in query.families { + // The outermost loop goes over the families because we want to serve + // the font that matches the first possible family. + for family in &query.families { + // For each family now go over all font infos from all font providers. for (provider, infos) in self.providers.iter().zip(&self.provider_fonts) { for info in infos.iter() { - // Check whether this info matches the query. - if Self::matches(query, family, info) { + // Proceed only if this font matches the query. + if Self::matches(&query, family, info) { let mut state = self.state.borrow_mut(); - // Check if we have already loaded this font before. - // Otherwise we'll fetch the font from the provider. + // Check if we have already loaded this font before, otherwise, we will + // load it from the provider. Anyway, have it stored and find out its + // internal index. let index = if let Some(&index) = state.info_cache.get(info) { index } else if let Some(mut source) = provider.get(info) { - // Read the font program into a vec. + // Read the font program into a vector and parse it. let mut program = Vec::new(); source.read_to_end(&mut program).ok()?; - - // Create a font from it. let font = Font::new(program).ok()?; - // Insert it into the storage. + // Insert it into the storage and cache it by its info. let index = state.fonts.len(); state.info_cache.insert(info, index); state.fonts.push((None, font)); index } else { + // Strangely, this provider lied and cannot give us the promised font. continue; }; - // Check whether this font has the character we need. + // Proceed if this font has the character we need. let has_char = state.fonts[index].1.mapping.contains_key(&query.character); if has_char { - // We can take this font, so we store the query. + // This font is suitable, thus we cache the query result. state.query_cache.insert(query, index); - // Now we have to find out the external index of it, or assign a new - // one if it has not already one. - let maybe_extern_index = state.fonts[index].0; - let extern_index = maybe_extern_index.unwrap_or_else(|| { + // Now we have to find out the external index of it or assign a new one + // if it has none. + let external_index = state.fonts[index].0.unwrap_or_else(|| { // We have to assign an external index before serving. - let extern_index = state.inner_index.len(); + let new_index = state.inner_index.len(); state.inner_index.push(index); - state.fonts[index].0 = Some(extern_index); - extern_index + state.fonts[index].0 = Some(new_index); + new_index }); - // Release the mutable borrow and borrow immutably. + // Release the mutable borrow to be allowed to borrow immutably. drop(state); - let font = Ref::map(self.state.borrow(), |s| &s.fonts[index].1); - // Finally we can return it. - return Some((extern_index, font)); + // Finally, get a reference to the actual font. + let font = Ref::map(self.state.borrow(), |s| &s.fonts[index].1); + return Some((external_index, font)); } } } } } + // Not a single match! None } - /// Return a loaded font at an index. Panics if the index is out of bounds. + /// Return the font previously loaded at this index. Panics if the index is not assigned. + #[inline] pub fn get_with_index(&self, index: usize) -> Ref { let state = self.state.borrow(); let internal = state.inner_index[index]; Ref::map(state, |s| &s.fonts[internal].1) } - /// Return the list of fonts. + /// Move the whole list of fonts out. pub fn into_fonts(self) -> Vec { - // Sort the fonts by external key so that they are in the correct order. + // Sort the fonts by external index so that they are in the correct order. + // All fonts that were cached but not used by the outside are sorted to the back + // and are removed in the next step. let mut fonts = self.state.into_inner().fonts; fonts.sort_by_key(|&(maybe_index, _)| match maybe_index { - Some(index) => index as isize, - None => -1, + Some(index) => index, + None => std::usize::MAX, }); - // Remove the fonts that are not used from the outside + // Remove the fonts that are not used from the outside. fonts.into_iter().filter_map(|(maybe_index, font)| { - maybe_index.map(|_| font) + if maybe_index.is_some() { Some(font) } else { None } }).collect() } - /// Check whether the query and the current family match the info. - fn matches(query: FontQuery, family: &FontFamily, info: &FontInfo) -> bool { - info.families.contains(family) - && info.italic == query.italic && info.bold == query.bold + /// Checks whether the query and the family match the info. + fn matches(query: &FontQuery, family: &FontFamily, info: &FontInfo) -> bool { + info.italic == query.italic && info.bold == query.bold + && info.families.contains(family) } } @@ -510,21 +506,25 @@ impl Debug for FontLoader<'_> { } /// A query for a font with specific properties. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub struct FontQuery<'a> { - /// A fallback list of font families to accept. The first family in this list, that also - /// satisfies the other conditions, shall be returned. - pub families: &'a [FontFamily], - /// Whether the font shall be in italics. - pub italic: bool, - /// Whether the font shall be in boldface. - pub bold: bool, - /// Which character we need. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct FontQuery { + /// Which character is needed. pub character: char, + /// Whether the font should be in italics. + pub italic: bool, + /// Whether the font should be in boldface. + pub bold: bool, + /// A fallback list of font families to accept. The font matching the first possible family in + /// this list satisfying all other constraints should be returned. + pub families: Vec, } +//------------------------------------------------------------------------------------------------// + +/// Subsets a font. +#[derive(Debug)] struct Subsetter<'a> { - // Original font + // The original font font: &'a Font, reader: OpenTypeReader>, outlines: Outlines, @@ -534,17 +534,53 @@ struct Subsetter<'a> { loca: Option>, glyphs: Vec, - // Subsetted font + // The subsetted font chars: Vec, records: Vec, body: Vec, } impl<'a> Subsetter<'a> { - fn subset(mut self, needed_tables: I, optional_tables: I) -> FontResult + /// Subset a font. See [`Font::subetted`] for more details. + pub fn subset( + font: &Font, + chars: C, + needed_tables: I, + optional_tables: I, + ) -> Result + where + C: IntoIterator, + I: IntoIterator, + S: AsRef + { + // Parse some header information and keep the reading around. + let mut reader = OpenTypeReader::from_slice(&font.program); + let outlines = reader.outlines()?; + let tables = reader.tables()?.to_vec(); + + let chars: Vec<_> = chars.into_iter().collect(); + + let subsetter = Subsetter { + font, + reader, + outlines, + tables, + cmap: None, + hmtx: None, + loca: None, + glyphs: Vec::with_capacity(1 + chars.len()), + chars, + records: vec![], + body: vec![], + }; + + subsetter.run(needed_tables, optional_tables) + } + + fn run(mut self, needed_tables: I, optional_tables: I) -> FontResult where I: IntoIterator, S: AsRef { - // Find out which glyphs to include based on which characters we want - // and which glyphs are used by composition. + // Find out which glyphs to include based on which characters we want and which glyphs are + // used by other composite glyphs. self.build_glyphs()?; // Iterate through the needed tables first @@ -553,7 +589,7 @@ impl<'a> Subsetter<'a> { let tag: Tag = table.parse() .map_err(|_| FontError::UnsupportedTable(table.to_string()))?; - if self.contains(tag) { + if self.contains_table(tag) { self.write_table(tag)?; } else { return Err(FontError::MissingTable(tag.to_string())); @@ -566,7 +602,7 @@ impl<'a> Subsetter<'a> { let tag: Tag = table.parse() .map_err(|_| FontError::UnsupportedTable(table.to_string()))?; - if self.contains(tag) { + if self.contains_table(tag) { self.write_table(tag)?; } } @@ -598,16 +634,16 @@ impl<'a> Subsetter<'a> { self.read_cmap()?; let cmap = self.cmap.as_ref().unwrap(); - // The default glyph should be always present. + // The default glyph should be always present, others only if used. self.glyphs.push(self.font.default_glyph); for &c in &self.chars { - self.glyphs.push(cmap.get(c).ok_or_else(|| FontError::MissingCharacter(c))?) + let glyph = cmap.get(c).ok_or_else(|| FontError::MissingCharacter(c))?; + self.glyphs.push(glyph); } - // Composite glyphs may need additional glyphs we have not yet in our list. - // So now we have a look at the glyf table to check that and add glyphs - // we need additionally. - if self.contains("glyf".parse().unwrap()) { + // Composite glyphs may need additional glyphs we do not have in our list yet. So now we + // have a look at the `glyf` table to check that and add glyphs we need additionally. + if self.contains_table("glyf".parse().unwrap()) { self.read_loca()?; let loca = self.loca.as_ref().unwrap(); let table = self.get_table_data("glyf".parse().unwrap())?; @@ -880,7 +916,8 @@ impl<'a> Subsetter<'a> { .take_bytes() } - fn contains(&self, tag: Tag) -> bool { + /// Whether this font contains some table. + fn contains_table(&self, tag: Tag) -> bool { self.tables.binary_search_by_key(&tag, |r| r.tag).is_ok() } @@ -916,8 +953,8 @@ impl<'a> Subsetter<'a> { } } -/// Calculate a checksum over the sliced data as sum of u32's. -/// The data length has to be a multiple of four. +/// Calculate a checksum over the sliced data as sum of u32's. The data length has to be a multiple +/// of four. fn calculate_check_sum(data: &[u8]) -> u32 { let mut sum = 0u32; data.chunks_exact(4).for_each(|c| { @@ -931,13 +968,12 @@ fn calculate_check_sum(data: &[u8]) -> u32 { sum } +/// Helper trait to create subsetting errors more easily. trait TakeInvalid: Sized { - /// Pull the type out of the option, returning a subsetting error - /// about an invalid font wrong. + /// Pull the type out of the option, returning an invalid font error if self was not valid. fn take_invalid>(self, message: S) -> FontResult; - /// Pull the type out of the option, returning an error about missing - /// bytes if it is nothing. + /// Same as above with predefined message "expected more bytes". fn take_bytes(self) -> FontResult { self.take_invalid("expected more bytes") } @@ -949,15 +985,17 @@ impl TakeInvalid for Option { } } +//------------------------------------------------------------------------------------------------// + /// The error type for font operations. pub enum FontError { /// The font file is incorrect. InvalidFont(String), /// A requested table was not present in the source font. MissingTable(String), - /// The table is unknown to the subsetting engine (unimplemented or invalid). + /// The table is unknown to the subsetting engine. UnsupportedTable(String), - /// A requested character was not present in the source font. + /// A character requested for subsetting was not present in the source font. MissingCharacter(char), /// An I/O Error occured while reading the font program. Io(io::Error), diff --git a/src/func.rs b/src/func.rs index ef5120dfa..402bf6fa3 100644 --- a/src/func.rs +++ b/src/func.rs @@ -4,27 +4,27 @@ use std::any::Any; use std::collections::HashMap; use std::fmt::{self, Debug, Formatter}; -use crate::syntax::FuncHeader; -use crate::parsing::{ParseContext, ParseResult}; use crate::layout::{Layout, LayoutContext, LayoutResult}; +use crate::parsing::{ParseContext, ParseResult}; +use crate::syntax::FuncHeader; -/// Types that act as functions. +/// Typesetting function types. /// -/// These types have to be able to parse tokens into themselves and store the -/// relevant information from the parsing to do their role in typesetting later. +/// These types have to be able to parse tokens into themselves and store the relevant information +/// from the parsing to do their role in typesetting later. /// -/// The trait `FunctionBounds` is automatically implemented for types which can be -/// used as functions, that is they fulfill the bounds `Debug + PartialEq + 'static`. +/// The trait `FunctionBounds` is automatically implemented for types which can be used as +/// functions, that is they fulfill the bounds `Debug + PartialEq + 'static`. pub trait Function: FunctionBounds { - /// Parse the header and body into this function given this context. + /// Parse the header and body into this function given a context. fn parse(header: &FuncHeader, body: Option<&str>, ctx: &ParseContext) -> ParseResult where Self: Sized; /// Layout this function given a context. /// - /// Returns optionally the resulting layout and a if changes to the context - /// should be made new context. + /// Returns optionally the resulting layout and a new context if changes to the context should + /// be made. fn layout(&self, ctx: &LayoutContext) -> LayoutResult<(Option, Option)>; } @@ -37,8 +37,8 @@ impl PartialEq for dyn Function { /// A helper trait that describes requirements for types that can implement [`Function`]. /// -/// Automatically implemented for all types which fulfill to the bounds -/// `Debug + PartialEq + 'static`. There should be no need to implement this manually. +/// Automatically implemented for all types which fulfill to the bounds `Debug + PartialEq + +/// 'static`. There should be no need to implement this manually. pub trait FunctionBounds: Debug { /// Cast self into `Any`. fn help_cast_as_any(&self) -> &dyn Any; @@ -66,7 +66,7 @@ pub struct Scope { parsers: HashMap>, } -/// A function which transforms a parsing context into a boxed function. +/// A function which parses a function invocation into a function type. type ParseFunc = dyn Fn(&FuncHeader, Option<&str>, &ParseContext) -> ParseResult>; @@ -81,7 +81,7 @@ impl Scope { Scope::new() } - /// Add a function type to the scope with a given name. + /// Add a function type to the scope giving it a name. pub fn add(&mut self, name: &str) { self.parsers.insert( name.to_owned(), diff --git a/src/layout/mod.rs b/src/layout/mod.rs index 7bc62bd3f..f2ca30296 100644 --- a/src/layout/mod.rs +++ b/src/layout/mod.rs @@ -1,73 +1,73 @@ -//! Layouting engine. +//! The layouting engine. -use crate::doc::Document; +use crate::doc::{Document, Page, TextAction}; use crate::font::{Font, FontLoader, FontFamily, FontError}; use crate::syntax::SyntaxTree; mod size; + pub use size::Size; -/// Layout a syntax tree given a context. +/// Layout a syntax tree in a given context. #[allow(unused_variables)] pub fn layout(tree: &SyntaxTree, ctx: &LayoutContext) -> LayoutResult { - Ok(Layout {}) + Ok(Layout { + extent: LayoutDimensions { width: Size::zero(), height: Size::zero() }, + actions: vec![], + }) } /// A collection of layouted content. -pub struct Layout {} +#[derive(Debug, Clone)] +pub struct Layout { + /// The extent of this layout into all directions. + extent: LayoutDimensions, + /// Actions composing this layout. + actions: Vec, +} impl Layout { /// Convert this layout into a document given the list of fonts referenced by it. pub fn into_document(self, fonts: Vec) -> Document { Document { - pages: vec![], + pages: vec![Page { + width: self.extent.width, + height: self.extent.height, + actions: self.actions, + }], fonts, } } } +/// Types supporting some kind of layouting. +pub trait Layouter { + /// Finishing the current layouting process and return a layout. + fn finish(self) -> LayoutResult; +} + /// The context for layouting. +#[derive(Debug, Clone)] pub struct LayoutContext<'a, 'p> { + /// Loads fonts matching queries. pub loader: &'a FontLoader<'p>, + /// The spacial constraints to layout in. + pub max_extent: LayoutDimensions, + /// Base style to set text with. + pub text_style: TextStyle, } -/// Default styles for pages. -#[derive(Debug, Clone, PartialEq)] -pub struct PageStyle { - /// The width of the paper. +#[derive(Debug, Clone)] +pub struct LayoutDimensions { + /// Horizontal extent. pub width: Size, - /// The height of the paper. + /// Vertical extent. pub height: Size, - - /// The left margin of the paper. - pub margin_left: Size, - /// The top margin of the paper. - pub margin_top: Size, - /// The right margin of the paper. - pub margin_right: Size, - /// The bottom margin of the paper. - pub margin_bottom: Size, } -impl Default for PageStyle { - fn default() -> PageStyle { - PageStyle { - // A4 paper. - width: Size::from_mm(210.0), - height: Size::from_mm(297.0), - - // Margins. A bit more on top and bottom. - margin_left: Size::from_cm(3.0), - margin_top: Size::from_cm(3.0), - margin_right: Size::from_cm(3.0), - margin_bottom: Size::from_cm(3.0), - } - } -} - -/// Default styles for texts. -#[derive(Debug, Clone, PartialEq)] +/// Default styles for text. +#[derive(Debug, Clone)] pub struct TextStyle { /// A fallback list of font families to use. pub font_families: Vec, @@ -75,7 +75,7 @@ pub struct TextStyle { pub font_size: f32, /// The line spacing (as a multiple of the font size). pub line_spacing: f32, - /// The spacing for paragraphs (as a multiple of the line spacing). + /// The paragraphs spacing (as a multiple of the line spacing). pub paragraph_spacing: f32, } @@ -92,10 +92,44 @@ impl Default for TextStyle { } } +/// Default styles for pages. +#[derive(Debug, Clone)] +pub struct PageStyle { + /// The width of the page. + pub width: Size, + /// The height of the page. + pub height: Size, + + /// The amount of white space on the left side. + pub margin_left: Size, + /// The amount of white space on the top side. + pub margin_top: Size, + /// The amount of white space on the right side. + pub margin_right: Size, + /// The amount of white space on the bottom side. + pub margin_bottom: Size, +} + +impl Default for PageStyle { + fn default() -> PageStyle { + PageStyle { + // A4 paper. + width: Size::from_mm(210.0), + height: Size::from_mm(297.0), + + // All the same margins. + margin_left: Size::from_cm(3.0), + margin_top: Size::from_cm(3.0), + margin_right: Size::from_cm(3.0), + margin_bottom: Size::from_cm(3.0), + } + } +} + /// The error type for layouting. pub enum LayoutError { /// There was no suitable font. - MissingFont, + NoSuitableFont, /// An error occured while gathering font data. Font(FontError), } @@ -106,7 +140,7 @@ pub type LayoutResult = Result; error_type! { err: LayoutError, show: f => match err { - LayoutError::MissingFont => write!(f, "missing font"), + LayoutError::NoSuitableFont => write!(f, "no suitable font"), LayoutError::Font(err) => write!(f, "font error: {}", err), }, source: match err { diff --git a/src/layout/size.rs b/src/layout/size.rs index bf79a3c42..d0b557d72 100644 --- a/src/layout/size.rs +++ b/src/layout/size.rs @@ -1,10 +1,12 @@ +//! A general spacing type. + use std::cmp::Ordering; use std::fmt::{self, Display, Debug, Formatter}; use std::iter::Sum; use std::ops::*; -/// A general size (unit of length) type. +/// A general spacing type. #[derive(Copy, Clone, PartialEq, Default)] pub struct Size { /// The size in typographic points (1/72 inches). @@ -12,39 +14,39 @@ pub struct Size { } impl Size { - /// Create an zeroed size. + /// Create a zeroed size. #[inline] pub fn zero() -> Size { Size { points: 0.0 } } - /// Create a size from a number of points. + /// Create a size from an amount of points. #[inline] pub fn from_points(points: f32) -> Size { Size { points } } - /// Create a size from a number of inches. + /// Create a size from an amount of inches. #[inline] pub fn from_inches(inches: f32) -> Size { Size { points: 72.0 * inches } } - /// Create a size from a number of millimeters. + /// Create a size from an amount of millimeters. #[inline] pub fn from_mm(mm: f32) -> Size { Size { points: 2.83465 * mm } } - /// Create a size from a number of centimeters. + /// Create a size from an amount of centimeters. #[inline] pub fn from_cm(cm: f32) -> Size { Size { points: 28.3465 * cm } } - /// Create a size from a number of points. + /// Convert this size into points. #[inline] pub fn to_points(&self) -> f32 { self.points } - /// Create a size from a number of inches. + /// Convert this size into inches. #[inline] pub fn to_inches(&self) -> f32 { self.points * 0.0138889 } - /// Create a size from a number of millimeters. + /// Convert this size into millimeters. #[inline] pub fn to_mm(&self) -> f32 { self.points * 0.352778 } - /// Create a size from a number of centimeters. + /// Convert this size into centimeters. #[inline] pub fn to_cm(&self) -> f32 { self.points * 0.0352778 } } diff --git a/src/lib.rs b/src/lib.rs index 9a1082c6c..b088f633b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,13 @@ //! The compiler for the _Typeset_ typesetting language 📜. //! //! # Compilation -//! - **Parsing:** The parsing step first transforms a plain string into an -//! [iterator of tokens](crate::parsing::Tokens). Then the [parser](crate::parsing::Parser) -//! operates on that to construct a syntax tree. The structures describing the tree can be found -//! in the [syntax] module. -//! - **Layouting:** The next step is to transform the syntax tree into a portable representation -//! of the typesetted document. Types for these can be found in the [doc] and [layout] modules. -//! This representation contains already the finished layout. +//! - **Parsing:** The parsing step first transforms a plain string into an [iterator of +//! tokens](crate::parsing::Tokens). Then the [parser](crate::parsing::Parser) operates on that to +//! construct a syntax tree. The structures describing the tree can be found in the [syntax] +//! module. +//! - **Layouting:** The next step is to transform the syntax tree into a portable representation of +//! the typesetted document. Types for these can be found in the [doc] and [layout] modules. This +//! representation contains already the finished layout. //! - **Exporting:** The finished document can then be exported into supported formats. Submodules //! for the supported formats are located in the [export] module. Currently the only supported //! format is _PDF_. @@ -43,11 +43,13 @@ //! exporter.export(&document, file).unwrap(); //! ``` +use std::fmt::{self, Debug, Formatter}; + use crate::doc::Document; -use crate::func::Scope; use crate::font::{Font, FontLoader, FontProvider}; -use crate::layout::{layout, Layout, LayoutContext, LayoutResult, LayoutError}; -use crate::layout::{PageStyle, TextStyle}; +use crate::func::Scope; +use crate::layout::{layout, Layout, LayoutContext, LayoutDimensions}; +use crate::layout::{PageStyle, TextStyle, LayoutResult, LayoutError}; use crate::parsing::{parse, ParseContext, ParseResult, ParseError}; use crate::syntax::SyntaxTree; @@ -65,12 +67,12 @@ pub mod syntax; /// Transforms source code into typesetted documents. /// -/// Holds the typesetting context, which can be configured through various methods. +/// Can be configured through various methods. pub struct Typesetter<'p> { /// The default page style. - base_page_style: PageStyle, + page_style: PageStyle, /// The default text style. - base_text_style: TextStyle, + text_style: TextStyle, /// Font providers. font_providers: Vec>, } @@ -80,8 +82,8 @@ impl<'p> Typesetter<'p> { #[inline] pub fn new() -> Typesetter<'p> { Typesetter { - base_page_style: PageStyle::default(), - base_text_style: TextStyle::default(), + page_style: PageStyle::default(), + text_style: TextStyle::default(), font_providers: vec![], } } @@ -89,13 +91,13 @@ impl<'p> Typesetter<'p> { /// Set the default page style for the document. #[inline] pub fn set_page_style(&mut self, style: PageStyle) { - self.base_page_style = style; + self.page_style = style; } /// Set the default text style for the document. #[inline] pub fn set_text_style(&mut self, style: TextStyle) { - self.base_text_style = style; + self.text_style = style; } /// Add a font provider to the context of this typesetter. @@ -112,12 +114,23 @@ impl<'p> Typesetter<'p> { parse(src, &ctx) } - /// Layout a parsed syntax tree and return the layout and the referenced font list. + /// Layout a syntax tree and return the layout and the referenced font list. #[inline] pub fn layout(&self, tree: &SyntaxTree) -> LayoutResult<(Layout, Vec)> { let loader = FontLoader::new(&self.font_providers); - let ctx = LayoutContext { loader: &loader }; + + let page = &self.page_style; + let ctx = LayoutContext { + loader: &loader, + text_style: self.text_style.clone(), + max_extent: LayoutDimensions { + width: page.width - page.margin_left - page.margin_right, + height: page.height - page.margin_top - page.margin_bottom, + }, + }; + let layout = layout(&tree, &ctx)?; + Ok((layout, loader.into_fonts())) } @@ -127,14 +140,25 @@ impl<'p> Typesetter<'p> { let tree = self.parse(src)?; let (layout, fonts) = self.layout(&tree)?; let document = layout.into_document(fonts); + println!("fonts = {}", document.fonts.len()); + println!("document = {:?}", document.pages); Ok(document) } } +impl Debug for Typesetter<'_> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.debug_struct("Typesetter") + .field("page_style", &self.page_style) + .field("text_style", &self.text_style) + .field("font_providers", &self.font_providers.len()) + .finish() + } +} + /// The general error type for typesetting. pub enum TypesetError { - /// An error that occured while transforming source code into - /// an abstract syntax tree. + /// An error that occured while parsing. Parse(ParseError), /// An error that occured while layouting. Layout(LayoutError), @@ -163,7 +187,7 @@ mod test { use crate::export::pdf::PdfExporter; use crate::font::FileSystemFontProvider; - /// Create a pdf with a name from the source code. + /// Create a _PDF_ with a name from the source code. fn test(name: &str, src: &str) { let mut typesetter = Typesetter::new(); typesetter.add_font_provider(FileSystemFontProvider::new("../fonts", vec![ @@ -175,10 +199,10 @@ mod test { ("NotoEmoji-Regular.ttf", font_info!(["NotoEmoji", "Noto", SansSerif, Serif, Monospace])), ])); - // Typeset into document + // Typeset into document. let document = typesetter.typeset(src).unwrap(); - // Write to file + // Write to file. let path = format!("../target/typeset-unit-{}.pdf", name); let file = BufWriter::new(File::create(path).unwrap()); let exporter = PdfExporter::new(); diff --git a/src/parsing.rs b/src/parsing.rs index 594077f76..5ee8b382d 100644 --- a/src/parsing.rs +++ b/src/parsing.rs @@ -1,14 +1,13 @@ //! Tokenization and parsing of source code into syntax trees. use std::collections::HashMap; -use std::mem::swap; use std::str::CharIndices; use smallvec::SmallVec; use unicode_xid::UnicodeXID; -use crate::syntax::*; use crate::func::{Function, Scope}; +use crate::syntax::*; /// Builds an iterator over the tokens of the source code. @@ -27,7 +26,7 @@ pub struct Tokens<'s> { } /// The state the tokenizer is in. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] enum TokensState { /// The base state if there is nothing special we are in. Body, @@ -55,9 +54,9 @@ impl<'s> Tokens<'s> { } /// Switch to the given state. - fn switch(&mut self, mut state: TokensState) { - swap(&mut state, &mut self.state); - self.stack.push(state); + fn switch(&mut self, state: TokensState) { + self.stack.push(self.state); + self.state = state; } /// Go back to the top-of-stack state. @@ -84,7 +83,7 @@ impl<'s> Iterator for Tokens<'s> { fn next(&mut self) -> Option> { use TokensState as TS; - // Function maybe has a body + // Go to the body state if the function has a body or return to the top-of-stack state. if self.state == TS::MaybeBody { if self.chars.peek()?.1 == '[' { self.state = TS::Body; @@ -244,7 +243,7 @@ fn is_newline_char(character: char) -> bool { } } -/// A index + char iterator with double lookahead. +/// A (index, char) iterator with double lookahead. #[derive(Debug, Clone)] struct PeekableChars<'s> { offset: usize, @@ -324,6 +323,8 @@ impl Iterator for PeekableChars<'_> { } } +//------------------------------------------------------------------------------------------------// + /// Parses source code into a syntax tree given a context. #[inline] pub fn parse(src: &str, ctx: &ParseContext) -> ParseResult { @@ -338,6 +339,7 @@ pub struct ParseContext<'a> { } /// Transforms token streams to syntax trees. +#[derive(Debug)] struct Parser<'s> { src: &'s str, tokens: PeekableTokens<'s>, @@ -358,7 +360,7 @@ enum ParserState { } impl<'s> Parser<'s> { - /// Create a new parser from a stream of tokens and a scope of functions. + /// Create a new parser from the source and the context. fn new(src: &'s str, ctx: &'s ParseContext) -> Parser<'s> { Parser { src, @@ -380,7 +382,7 @@ impl<'s> Parser<'s> { Ok(self.tree) } - /// Parse part of the body. + /// Parse the next part of the body. fn parse_body_part(&mut self) -> ParseResult<()> { if let Some(token) = self.tokens.peek() { match token { @@ -398,8 +400,8 @@ impl<'s> Parser<'s> { Token::Colon | Token::Equals => panic!("bad token for body: {:?}", token), - // The rest is handled elsewhere or should not happen, because Tokens does - // not yield colons or equals in the body, but their text equivalents instead. + // The rest is handled elsewhere or should not happen, because `Tokens` does not + // yield colons or equals in the body, but their text equivalents instead. _ => panic!("unexpected token: {:?}", token), } } @@ -526,6 +528,7 @@ impl<'s> Parser<'s> { } } } + Ok(()) } @@ -564,7 +567,7 @@ impl<'s> Parser<'s> { } } -/// Find the index of the first unbalanced (unescaped) closing bracket. +/// Find the index of the first unbalanced and unescaped closing bracket. fn find_closing_bracket(src: &str) -> Option { let mut parens = 0; let mut escaped = false; @@ -584,8 +587,8 @@ fn find_closing_bracket(src: &str) -> Option { None } -/// A peekable iterator for tokens which allows access to the original iterator -/// inside this module (which is needed by the parser). +/// A peekable iterator for tokens which allows access to the original iterator inside this module +/// (which is needed by the parser). #[derive(Debug, Clone)] struct PeekableTokens<'s> { tokens: Tokens<'s>, @@ -649,6 +652,8 @@ fn is_identifier(string: &str) -> bool { true } +//------------------------------------------------------------------------------------------------// + /// The error type for parsing. pub struct ParseError(String); @@ -693,7 +698,7 @@ mod token_tests { test("\n", vec![N]); } - /// This test looks if LF- and CRLF-style newlines get both identified correctly + /// This test looks if LF- and CRLF-style newlines get both identified correctly. #[test] fn tokenize_whitespace_newlines() { test(" \t", vec![S]); @@ -743,8 +748,8 @@ mod token_tests { ]); } - /// This test checks whether the colon and equals symbols get parsed correctly - /// depending on the context: Either in a function header or in a body. + /// This test checks whether the colon and equals symbols get parsed correctly depending on the + /// context: Either in a function header or in a body. #[test] fn tokenize_symbols_context() { test("[func: key=value][Answer: 7]", @@ -801,8 +806,8 @@ mod parse_tests { use Node::{Space as S, Newline as N, Func as F}; use funcs::*; - /// Two test functions, one which parses it's body as another syntax tree - /// and another one which does not expect a body. + /// Two test functions, one which parses it's body as another syntax tree and another one which + /// does not expect a body. mod funcs { use super::*; @@ -871,8 +876,7 @@ mod parse_tests { #[allow(non_snake_case)] fn T(s: &str) -> Node { Node::Text(s.to_owned()) } - /// Shortcut macro to create a syntax tree. - /// Is `vec`-like and the elements are the nodes. + /// Shortcut macro to create a syntax tree. Is `vec`-like and the elements are the nodes. macro_rules! tree { ($($x:expr),*) => ( SyntaxTree { nodes: vec![$($x),*] } diff --git a/src/syntax.rs b/src/syntax.rs index b3f1d04de..cfd417196 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -17,8 +17,8 @@ pub enum Token<'s> { RightBracket, /// A colon (`:`) indicating the beginning of function arguments. /// - /// If a colon occurs outside of a function header, it will be - /// tokenized as a [Word](Token::Word). + /// If a colon occurs outside of a function header, it will be tokenized as a + /// [Word](Token::Word). Colon, /// An equals (`=`) sign assigning a function argument a value. /// @@ -34,9 +34,8 @@ pub enum Token<'s> { LineComment(&'s str), /// A block comment. BlockComment(&'s str), - /// A star followed by a slash unexpectedly ending a block comment - /// (the comment was not started before, otherwise a - /// [BlockComment](Token::BlockComment) would be returned). + /// A star followed by a slash unexpectedly ending a block comment (the comment was not started + /// before, otherwise a [BlockComment](Token::BlockComment) would be returned). StarSlash, /// Everything else is just text. Text(&'s str), @@ -64,11 +63,11 @@ pub enum Node { Space, /// A line feed. Newline, - /// Indicates that italics were enabled/disabled. + /// Indicates that italics were enabled / disabled. ToggleItalics, - /// Indicates that boldface was enabled/disabled. + /// Indicates that boldface was enabled / disabled. ToggleBold, - /// Indicates that math mode was enabled/disabled. + /// Indicates that math mode was enabled / disabled. ToggleMath, /// Literal text. Text(String),