Move crate into workspace subfolder

This commit is contained in:
Laurenz 2019-02-12 21:31:35 +01:00
commit 5a600eb354
7 changed files with 1687 additions and 0 deletions

10
Cargo.toml Normal file
View File

@ -0,0 +1,10 @@
[package]
name = "typeset"
version = "0.1.0"
authors = ["Laurenz Mädje <laurmaedje@gmail.com>"]
edition = "2018"
[dependencies]
unicode-segmentation = "1.2"
unicode-xid = "0.1.0"
byteorder = "1"

187
src/doc.rs Normal file
View File

@ -0,0 +1,187 @@
//! Generation of abstract documents from syntax trees.
use std::fmt;
use crate::parsing::{SyntaxTree, Node};
use crate::font::{Font, BuiltinFont};
/// Abstract representation of a complete typesetted document.
///
/// This abstract thing can then be serialized into a specific format like PDF.
#[derive(Debug, Clone, PartialEq)]
pub struct Document {
/// The pages of the document.
pub pages: Vec<Page>,
/// The fonts used by the document.
pub fonts: Vec<DocumentFont>,
}
impl Document {
/// Create a new document without content.
pub fn new() -> Document {
Document {
pages: vec![],
fonts: vec![],
}
}
}
/// A page of a document.
#[derive(Debug, Clone, PartialEq)]
pub struct Page {
/// The width and height of the page.
pub size: [Size; 2],
/// The contents of the page.
pub contents: Vec<Text>,
}
/// Plain text.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Text(pub String);
/// A font (either built-in or external).
#[derive(Debug, Clone, PartialEq)]
pub enum DocumentFont {
/// One of the 14 built-in fonts.
Builtin(BuiltinFont),
/// An externally loaded font.
Loaded(Font),
}
/// A distance that can be created from different units of length.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct Size {
/// The size in typographic points (1/72 inches).
pub points: f32,
}
impl Size {
/// Create a size from a number of points.
pub fn from_points(points: f32) -> Size {
Size { points }
}
/// Create a size from a number of inches.
pub fn from_inches(inches: f32) -> Size {
Size { points: inches / 72.0 }
}
/// Create a size from a number of millimeters.
pub fn from_mm(mm: f32) -> Size {
Size { points: 2.8345 * mm }
}
/// Create a size from a number of centimeters.
pub fn from_cm(cm: f32) -> Size {
Size { points: 0.028345 * cm }
}
}
/// A type that can be generated into a document.
pub trait Generate {
/// Generate a document from self.
fn generate(self) -> GenResult<Document>;
}
impl Generate for SyntaxTree<'_> {
fn generate(self) -> GenResult<Document> {
Generator::new(self).generate()
}
}
/// Result type used for parsing.
type GenResult<T> = std::result::Result<T, GenerationError>;
/// A failure when generating.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct GenerationError {
/// A message describing the error.
pub message: String,
}
impl fmt::Display for GenerationError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "generation error: {}", self.message)
}
}
/// Transforms an abstract syntax tree into a document.
#[derive(Debug, Clone)]
struct Generator<'s> {
tree: SyntaxTree<'s>,
}
impl<'s> Generator<'s> {
/// Create a new generator from a syntax tree.
fn new(tree: SyntaxTree<'s>) -> Generator<'s> {
Generator { tree }
}
/// Generate the abstract document.
fn generate(&mut self) -> GenResult<Document> {
let fonts = vec![DocumentFont::Builtin(BuiltinFont::Helvetica)];
let mut text = String::new();
for node in &self.tree.nodes {
match node {
Node::Space if !text.is_empty() => text.push(' '),
Node::Space | Node::Newline => (),
Node::Word(word) => text.push_str(word),
Node::ToggleItalics | Node::ToggleBold | Node::ToggleMath => unimplemented!(),
Node::Func(_) => unimplemented!(),
}
}
let page = Page {
size: [Size::from_mm(210.0), Size::from_mm(297.0)],
contents: vec![ Text(text) ],
};
Ok(Document {
pages: vec![page],
fonts,
})
}
/// Gives a generation error with a message.
#[inline]
fn err<R, S: Into<String>>(&self, message: S) -> GenResult<R> {
Err(GenerationError { message: message.into() })
}
}
#[cfg(test)]
mod generator_tests {
use super::*;
use crate::parsing::{Tokenize, Parse};
/// Test if the source gets generated into the document.
fn test(src: &str, doc: Document) {
assert_eq!(src.tokenize().parse().unwrap().generate(), Ok(doc));
}
/// Test if generation gives this error for the source code.
fn test_err(src: &str, err: GenerationError) {
assert_eq!(src.tokenize().parse().unwrap().generate(), Err(err));
}
#[test]
fn generator_simple() {
test("This is an example of a sentence.", Document {
pages: vec![
Page {
size: [Size::from_mm(210.0), Size::from_mm(297.0)],
contents: vec![
Text("This is an example of a sentence.".to_owned()),
]
}
],
fonts: vec![DocumentFont::Builtin(BuiltinFont::Helvetica)],
});
}
}

270
src/font.rs Normal file
View File

@ -0,0 +1,270 @@
//! Reading of metrics and font data from _OpenType_ and _TrueType_ font files.
#![allow(unused_variables)]
use std::fmt;
use std::io::{self, Read, Seek, SeekFrom};
use byteorder::{BE, ReadBytesExt};
/// A loaded opentype (or truetype) font.
#[derive(Debug, Clone, PartialEq)]
pub struct Font {
/// The PostScript name of this font.
pub name: String,
}
impl Font {
/// Create a new font from a byte source.
pub fn new<R>(data: &mut R) -> FontResult<Font> where R: Read + Seek {
OpenTypeReader::new(data).read()
}
}
/// Built-in fonts.
#[derive(Debug, Copy, Clone, PartialEq)]
#[allow(missing_docs)]
pub enum BuiltinFont {
Courier,
CourierBold,
CourierOblique,
CourierBoldOblique,
Helvetica,
HelveticaBold,
HelveticaOblique,
HelveticaBoldOblique,
TimesRoman,
TimesBold,
TimeItalic,
TimeBoldItalic,
Symbol,
ZapfDingbats,
}
impl BuiltinFont {
/// The name of the font.
pub fn name(&self) -> &'static str {
use BuiltinFont::*;
match self {
Courier => "Courier",
CourierBold => "Courier-Bold",
CourierOblique => "Courier-Oblique",
CourierBoldOblique => "Courier-BoldOblique",
Helvetica => "Helvetica",
HelveticaBold => "Helvetica-Bold",
HelveticaOblique => "Helvetica-Oblique",
HelveticaBoldOblique => "Helvetica-BoldOblique",
TimesRoman => "Times-Roman",
TimesBold => "Times-Bold",
TimeItalic => "Time-Italic",
TimeBoldItalic => "Time-BoldItalic",
Symbol => "Symbol",
ZapfDingbats => "ZapfDingbats",
}
}
}
/// Result type used for tokenization.
type FontResult<T> = std::result::Result<T, LoadingError>;
/// A failure when loading a font.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct LoadingError {
/// A message describing the error.
pub message: String,
}
impl From<io::Error> for LoadingError {
fn from(err: io::Error) -> LoadingError {
LoadingError { message: format!("io error: {}", err) }
}
}
impl fmt::Display for LoadingError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "font loading error: {}", self.message)
}
}
/// Reads a font from a _OpenType_ or _TrueType_ font file.
struct OpenTypeReader<'r, R> where R: Read + Seek {
data: &'r mut R,
font: Font,
table_records: Vec<TableRecord>,
}
/// Used to identify a table, design-variation axis, script,
/// language system, feature, or baseline.
#[derive(Clone, PartialEq)]
struct Tag(pub [u8; 4]);
impl PartialEq<&str> for Tag {
fn eq(&self, other: &&str) -> bool {
other.as_bytes() == &self.0
}
}
impl fmt::Debug for Tag {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "\"{}\"", self)
}
}
impl fmt::Display for Tag {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let a = self.0;
write!(f, "{}{}{}{}", a[0] as char, a[1] as char, a[2] as char, a[3] as char)
}
}
/// Stores information about one table.
#[derive(Debug, Clone, PartialEq)]
struct TableRecord {
table: Tag,
check_sum: u32,
offset: u32,
length: u32,
}
impl<'r, R> OpenTypeReader<'r, R> where R: Read + Seek {
/// Create a new reader from a byte source.
pub fn new(data: &'r mut R) -> OpenTypeReader<'r, R> {
OpenTypeReader {
data,
font: Font {
name: String::new(),
},
table_records: vec![],
}
}
/// Read the font from the byte source.
pub fn read(mut self) -> FontResult<Font> {
self.read_table_records()?;
self.read_name_table()?;
Ok(self.font)
}
/// Read the offset table.
fn read_table_records(&mut self) -> FontResult<()> {
let sfnt_version = self.data.read_u32::<BE>()?;
let num_tables = self.data.read_u16::<BE>()?;
let search_range = self.data.read_u16::<BE>()?;
let entry_selector = self.data.read_u16::<BE>()?;
let range_shift = self.data.read_u16::<BE>()?;
let outlines = match sfnt_version {
0x00010000 => "truetype",
0x4F54544F => "cff",
_ => return self.err("unsuported font outlines"),
};
for _ in 0 .. num_tables {
let table = self.read_tag()?;
let check_sum = self.data.read_u32::<BE>()?;
let offset = self.data.read_u32::<BE>()?;
let length = self.data.read_u32::<BE>()?;
self.table_records.push(TableRecord {
table,
check_sum,
offset,
length,
});
}
Ok(())
}
/// Read the name table (gives general information about the font).
fn read_name_table(&mut self) -> FontResult<()> {
let table = match self.table_records.iter().find(|record| record.table == "name") {
Some(table) => table,
None => return self.err("missing 'name' table"),
};
self.data.seek(SeekFrom::Start(table.offset as u64))?;
let format = self.data.read_u16::<BE>()?;
let count = self.data.read_u16::<BE>()?;
let string_offset = self.data.read_u16::<BE>()?;
let storage = (table.offset + string_offset as u32) as u64;
let mut name = None;
for _ in 0 .. count {
let platform_id = self.data.read_u16::<BE>()?;
let encoding_id = self.data.read_u16::<BE>()?;
let language_id = self.data.read_u16::<BE>()?;
let name_id = self.data.read_u16::<BE>()?;
let length = self.data.read_u16::<BE>()?;
let offset = self.data.read_u16::<BE>()?;
// Postscript name is what we are interested in
if name_id == 6 && platform_id == 3 && encoding_id == 1 {
if length % 2 != 0 {
return self.err("invalid encoded name");
}
self.data.seek(SeekFrom::Start(storage + offset as u64))?;
let mut buffer = Vec::with_capacity(length as usize / 2);
for _ in 0 .. length / 2 {
buffer.push(self.data.read_u16::<BE>()?);
}
name = match String::from_utf16(&buffer) {
Ok(string) => Some(string),
Err(_) => return self.err("invalid encoded name"),
};
break;
}
}
self.font.name = match name {
Some(name) => name,
None => return self.err("missing postscript font name"),
};
Ok(())
}
/// Read a tag (array of four u8's).
fn read_tag(&mut self) -> FontResult<Tag> {
let mut tag = [0u8; 4];
self.data.read(&mut tag)?;
Ok(Tag(tag))
}
/// Gives a font loading error with a message.
fn err<T, S: Into<String>>(&self, message: S) -> FontResult<T> {
Err(LoadingError { message: message.into() })
}
}
#[cfg(test)]
mod font_tests {
use super::*;
/// Test if the loaded font is the same as the expected font.
fn test(path: &str, font: Font) {
let mut file = std::fs::File::open(path).unwrap();
assert_eq!(Font::new(&mut file), Ok(font));
}
#[test]
fn opentype() {
test("../fonts/NotoSerif-Regular.ttf", Font {
name: "NotoSerif".to_owned(),
});
test("../fonts/NotoSansMath-Regular.ttf", Font {
name: "NotoSansMath-Regular".to_owned(),
});
}
}

11
src/lib.rs Normal file
View File

@ -0,0 +1,11 @@
//! Typeset is a library for compiling _plain-text_ strings written in the
//! corresponding typesetting language into a typesetted document in a
//! file format like _PDF_.
#![allow(unused)]
pub mod parsing;
pub mod doc;
pub mod font;
pub mod pdf;
pub mod utility;

696
src/parsing.rs Normal file
View File

@ -0,0 +1,696 @@
//! Parsing of source code into tokens and syntax trees.
use std::fmt;
use std::iter::Peekable;
use std::mem::swap;
use unicode_segmentation::{UnicodeSegmentation, UWordBounds};
use crate::utility::{Splinor, Spline, Splined, StrExt};
/// A logical unit of the incoming text stream.
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Token<'s> {
/// One or more whitespace (non-newline) codepoints.
Space,
/// A line feed (either `\n` or `\r\n`).
Newline,
/// A left bracket: `[`.
LeftBracket,
/// A right bracket: `]`.
RightBracket,
/// A colon (`:`) indicating the beginning of function arguments.
///
/// If a colon occurs outside of the function header, it will be
/// tokenized as a `Word`.
Colon,
/// Same as with `Colon`.
Equals,
/// Two underscores, indicating text in _italics_.
DoubleUnderscore,
/// Two stars, indicating **bold** text.
DoubleStar,
/// A dollar sign, indicating mathematical content.
Dollar,
/// A hashtag starting a comment.
Hashtag,
/// Everything else just is a literal word.
Word(&'s str),
}
/// A type that is seperable into logical units (tokens).
pub trait Tokenize {
/// Tokenize self into logical units.
fn tokenize<'s>(&'s self) -> Tokens<'s>;
}
impl Tokenize for str {
fn tokenize<'s>(&'s self) -> Tokens<'s> {
Tokens::new(self)
}
}
/// An iterator over the tokens of a text.
#[derive(Clone)]
pub struct Tokens<'s> {
source: &'s str,
words: Peekable<UWordBounds<'s>>,
state: TokensState<'s>,
stack: Vec<TokensState<'s>>,
}
impl fmt::Debug for Tokens<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("Tokens")
.field("source", &self.source)
.field("words", &"Peekable<UWordBounds>")
.field("state", &self.state)
.field("stack", &self.stack)
.finish()
}
}
/// The state the tokenizer is in.
#[derive(Debug, Clone)]
enum TokensState<'s> {
/// The base state if there is nothing special we are in.
Body,
/// Inside a function header. Here colons and equal signs get parsed
/// as distinct tokens rather than text.
Function,
/// We expect either the end of the function or the beginning of the body.
MaybeBody,
/// We are inside one unicode word that consists of multiple tokens,
/// because it contains double underscores.
DoubleUnderscore(Spline<'s, Token<'s>>),
}
impl PartialEq for TokensState<'_> {
fn eq(&self, other: &TokensState) -> bool {
use TokensState as TS;
match (self, other) {
(TS::Body, TS::Body) => true,
(TS::Function, TS::Function) => true,
(TS::MaybeBody, TS::MaybeBody) => true,
// They are not necessarily different, but we don't care
_ => false,
}
}
}
impl<'s> Iterator for Tokens<'s> {
type Item = Token<'s>;
/// Advance the iterator, return the next token or nothing.
fn next(&mut self) -> Option<Token<'s>> {
use TokensState as TS;
// Return the remaining words and double underscores.
if let TS::DoubleUnderscore(ref mut splinor) = self.state {
loop {
if let Some(splined) = splinor.next() {
return Some(match splined {
Splined::Value(word) if word != "" => Token::Word(word),
Splined::Splinor(s) => s,
_ => continue,
});
} else {
self.unswitch();
break;
}
}
}
// Skip whitespace, but if at least one whitespace word existed,
// remember that, because we return a space token.
let mut whitespace = false;
while let Some(word) = self.words.peek() {
if !word.is_whitespace() {
break;
}
whitespace = true;
self.advance();
}
if whitespace {
return Some(Token::Space);
}
// Function maybe has a body
if self.state == TS::MaybeBody {
match *self.words.peek()? {
"[" => {
self.state = TS::Body;
return Some(self.consumed(Token::LeftBracket));
},
_ => self.unswitch(),
}
}
// Now all special cases are handled and we can finally look at the
// next words.
let next = self.words.next()?;
let afterwards = self.words.peek();
Some(match next {
// Special characters
"[" => {
self.switch(TS::Function);
Token::LeftBracket
},
"]" => {
if self.state == TS::Function {
self.state = TS::MaybeBody;
}
Token::RightBracket
},
"$" => Token::Dollar,
"#" => Token::Hashtag,
// Context sensitive operators
":" if self.state == TS::Function => Token::Colon,
"=" if self.state == TS::Function => Token::Equals,
// Double star/underscore
"*" if afterwards == Some(&"*") => {
self.consumed(Token::DoubleStar)
},
"__" => Token::DoubleUnderscore,
// Newlines
"\n" | "\r\n" => Token::Newline,
// Escaping
r"\" => {
if let Some(next) = afterwards {
let escapable = match *next {
"[" | "]" | "$" | "#" | r"\" | ":" | "=" | "*" | "_" => true,
w if w.starts_with("__") => true,
_ => false,
};
if escapable {
let next = *next;
self.advance();
return Some(Token::Word(next));
}
}
Token::Word(r"\")
},
// Double underscores hidden in words.
word if word.contains("__") => {
let spline = word.spline("__", Token::DoubleUnderscore);
self.switch(TS::DoubleUnderscore(spline));
return self.next();
},
// Now it seems like it's just a normal word.
word => Token::Word(word),
})
}
}
impl<'s> Tokens<'s> {
/// Create a new token stream from text.
#[inline]
pub fn new(source: &'s str) -> Tokens<'s> {
Tokens {
source,
words: source.split_word_bounds().peekable(),
state: TokensState::Body,
stack: vec![],
}
}
/// Advance the iterator by one step.
#[inline]
fn advance(&mut self) {
self.words.next();
}
/// Switch to the given state.
#[inline]
fn switch(&mut self, mut state: TokensState<'s>) {
swap(&mut state, &mut self.state);
self.stack.push(state);
}
/// Go back to the top-of-stack state.
#[inline]
fn unswitch(&mut self) {
self.state = self.stack.pop().unwrap_or(TokensState::Body);
}
/// Advance and return the given token.
#[inline]
fn consumed(&mut self, token: Token<'s>) -> Token<'s> {
self.advance();
token
}
}
/// A tree representation of the source.
#[derive(Debug, Clone, PartialEq)]
pub struct SyntaxTree<'s> {
/// The children.
pub nodes: Vec<Node<'s>>,
}
impl<'s> SyntaxTree<'s> {
/// Create an empty syntax tree.
pub fn new() -> SyntaxTree<'s> {
SyntaxTree { nodes: vec![] }
}
}
/// A node in the abstract syntax tree.
#[derive(Debug, Clone, PartialEq)]
pub enum Node<'s> {
/// Whitespace between other nodes.
Space,
/// A line feed.
Newline,
/// Indicates that italics were enabled/disabled.
ToggleItalics,
/// Indicates that boldface was enabled/disabled.
ToggleBold,
/// Indicates that math mode was enabled/disabled.
ToggleMath,
/// A literal word.
Word(&'s str),
/// A function invocation.
Func(Function<'s>),
}
/// A node representing a function invocation.
#[derive(Debug, Clone, PartialEq)]
pub struct Function<'s> {
/// The name of the function.
pub name: &'s str,
/// Some syntax tree if the function had a body (second set of brackets),
/// otherwise nothing.
pub body: Option<SyntaxTree<'s>>,
}
/// A type that is parseable into a syntax tree.
pub trait Parse<'s> {
/// Parse self into a syntax tree.
fn parse(self) -> ParseResult<SyntaxTree<'s>>;
}
impl<'s> Parse<'s> for Tokens<'s> {
fn parse(self) -> ParseResult<SyntaxTree<'s>> {
Parser::new(self).parse()
}
}
impl<'s> Parse<'s> for Vec<Token<'s>> {
fn parse(self) -> ParseResult<SyntaxTree<'s>> {
Parser::new(self.into_iter()).parse()
}
}
/// Result type used for parsing.
type ParseResult<T> = std::result::Result<T, ParseError>;
/// A failure when parsing.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct ParseError {
/// A message describing the error.
pub message: String,
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "parse error: {}", self.message)
}
}
/// Parses a token stream into an abstract syntax tree.
#[derive(Debug, Clone)]
struct Parser<'s, T> where T: Iterator<Item = Token<'s>> {
tokens: Peekable<T>,
state: ParserState,
stack: Vec<Function<'s>>,
tree: SyntaxTree<'s>,
}
/// The state the parser is in.
#[derive(Debug, Clone, PartialEq)]
enum ParserState {
/// The base state of the parser.
Body,
/// Inside a function header.
Function,
}
impl<'s, T> Parser<'s, T> where T: Iterator<Item = Token<'s>> {
/// Create a new parser from a type that emits results of tokens.
fn new(tokens: T) -> Parser<'s, T> {
Parser {
tokens: tokens.peekable(),
state: ParserState::Body,
stack: vec![],
tree: SyntaxTree::new(),
}
}
/// Parse into an abstract syntax tree.
fn parse(mut self) -> ParseResult<SyntaxTree<'s>> {
use ParserState as PS;
while let Some(token) = self.tokens.next() {
// Comment
if token == Token::Hashtag {
self.skip_while(|t| *t != Token::Newline);
self.advance();
}
match self.state {
PS::Body => match token {
// Whitespace
Token::Space => self.append(Node::Space),
Token::Newline => self.append(Node::Newline),
// Words
Token::Word(word) => self.append(Node::Word(word)),
// Functions
Token::LeftBracket => self.switch(PS::Function),
Token::RightBracket => {
match self.stack.pop() {
Some(func) => self.append(Node::Func(func)),
None => return self.err("unexpected closing bracket"),
}
},
// Modifiers
Token::DoubleUnderscore => self.append(Node::ToggleItalics),
Token::DoubleStar => self.append(Node::ToggleBold),
Token::Dollar => self.append(Node::ToggleMath),
// Should not happen
Token::Colon | Token::Equals | Token::Hashtag => unreachable!(),
},
PS::Function => {
let name = match token {
Token::Word(word) if word.is_identifier() => word,
_ => return self.err("expected identifier"),
};
if self.tokens.next() != Some(Token::RightBracket) {
return self.err("expected closing bracket");
}
let mut func = Function {
name,
body: None,
};
// This function has a body.
if let Some(Token::LeftBracket) = self.tokens.peek() {
self.advance();
func.body = Some(SyntaxTree::new());
self.stack.push(func);
} else {
self.append(Node::Func(func));
}
self.switch(PS::Body);
},
}
}
if !self.stack.is_empty() {
return self.err("expected closing bracket");
}
Ok(self.tree)
}
/// Advance the iterator by one step.
#[inline]
fn advance(&mut self) {
self.tokens.next();
}
/// Skip tokens until the condition is met.
#[inline]
fn skip_while<F>(&mut self, f: F) where F: Fn(&Token) -> bool {
while let Some(token) = self.tokens.peek() {
if !f(token) {
break;
}
self.advance();
}
}
/// Switch the state.
#[inline]
fn switch(&mut self, state: ParserState) {
self.state = state;
}
/// Append a node to the top-of-stack function or the main tree itself.
#[inline]
fn append(&mut self, node: Node<'s>) {
let tree = match self.stack.last_mut() {
Some(func) => func.body.get_or_insert_with(|| SyntaxTree::new()),
None => &mut self.tree,
};
tree.nodes.push(node);
}
/// Gives a parsing error with a message.
#[inline]
fn err<R, S: Into<String>>(&self, message: S) -> ParseResult<R> {
Err(ParseError { message: message.into() })
}
}
#[cfg(test)]
mod token_tests {
use super::*;
use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
Colon as C, Equals as E, DoubleUnderscore as DU, DoubleStar as DS,
Dollar as D, Hashtag as H, Word as W};
/// Test if the source code tokenizes to the tokens.
fn test(src: &str, tokens: Vec<Token>) {
assert_eq!(src.tokenize().collect::<Vec<_>>(), tokens);
}
/// Tokenizes the basic building blocks.
#[test]
fn tokenize_base() {
test("", vec![]);
test("Hallo", vec![W("Hallo")]);
test("[", vec![L]);
test("]", vec![R]);
test("$", vec![D]);
test("#", vec![H]);
test("**", vec![DS]);
test("__", vec![DU]);
test("\n", vec![N]);
}
/// Tests if escaping with backslash works as it should.
#[test]
fn tokenize_escape() {
test(r"\[", vec![W("[")]);
test(r"\]", vec![W("]")]);
test(r"\#", vec![W("#")]);
test(r"\$", vec![W("$")]);
test(r"\:", vec![W(":")]);
test(r"\=", vec![W("=")]);
test(r"\**", vec![W("*"), W("*")]);
test(r"\*", vec![W("*")]);
test(r"\__", vec![W("__")]);
test(r"\_", vec![W("_")]);
test(r"\hello", vec![W(r"\"), W("hello")]);
}
/// Tokenizes some more realistic examples.
#[test]
fn tokenize_examples() {
test(r"
[function][
Test [italic][example]!
]
", vec![
N, S, L, W("function"), R, L, N, S, W("Test"), S, L, W("italic"), R, L,
W("example"), R, W("!"), N, S, R, N, S
]);
test(r"
[page: size=A4]
[font: size=12pt]
Das ist ein Beispielsatz mit **fetter** Schrift.
", vec![
N, S, L, W("page"), C, S, W("size"), E, W("A4"), R, N, S,
L, W("font"), C, S, W("size"), E, W("12pt"), R, N, N, S,
W("Das"), S, W("ist"), S, W("ein"), S, W("Beispielsatz"), S, W("mit"), S,
DS, W("fetter"), DS, S, W("Schrift"), W("."), N, S
]);
}
/// This test checks whether the colon and equals symbols get parsed correctly
/// depending on the context: Either in a function header or in a body.
#[test]
fn tokenize_symbols_context() {
test("[func: key=value][Answer: 7]",
vec![L, W("func"), C, S, W("key"), E, W("value"), R, L,
W("Answer"), W(":"), S, W("7"), R]);
test("[[n: k=v]:x][:[=]]:=",
vec![L, L, W("n"), C, S, W("k"), E, W("v"), R, C, W("x"), R,
L, W(":"), L, E, R, R, W(":"), W("=")]);
test("[func: __key__=value]",
vec![L, W("func"), C, S, DU, W("key"), DU, E, W("value"), R]);
}
/// This test has a special look at the double underscore syntax, because
/// per Unicode standard they are not seperate words and thus harder to parse
/// than the stars.
#[test]
fn tokenize_double_underscore() {
test("he__llo__world_ _ __ Now this_ is__ special!",
vec![W("he"), DU, W("llo"), DU, W("world_"), S, W("_"), S, DU, S, W("Now"), S,
W("this_"), S, W("is"), DU, S, W("special"), W("!")]);
}
/// This test is for checking if non-ASCII characters get parsed correctly.
#[test]
fn tokenize_unicode() {
test("[document][Hello 🌍!]",
vec![L, W("document"), R, L, W("Hello"), S, W("🌍"), W("!"), R]);
test("[f]⺐.", vec![L, W("f"), R, W(""), W(".")]);
}
/// This test looks if LF- and CRLF-style newlines get both identified correctly.
#[test]
fn tokenize_whitespace_newlines() {
test(" \t", vec![S]);
test("First line\r\nSecond line\nThird line\n",
vec![W("First"), S, W("line"), N, W("Second"), S, W("line"), N,
W("Third"), S, W("line"), N]);
}
}
#[cfg(test)]
mod parse_tests {
use super::*;
use Node::{Space as S, Newline as N, Word as W, Func as F};
/// Test if the source code parses into the syntax tree.
fn test(src: &str, tree: SyntaxTree) {
assert_eq!(src.tokenize().parse(), Ok(tree));
}
/// Test if the source parses into the error.
fn test_err(src: &str, err: ParseError) {
assert_eq!(src.tokenize().parse(), Err(err));
}
/// Short cut macro to create a syntax tree.
/// Is `vec`-like and the elements are the nodes.
macro_rules! tree {
($($x:expr),*) => (
SyntaxTree { nodes: vec![$($x),*] }
);
($($x:expr,)*) => (tree![$($x),*])
}
/// Parse the basic cases.
#[test]
fn parse_base() {
test("", tree! {});
test("Hello World!", tree! { W("Hello"), S, W("World"), W("!")});
}
/// Parse things dealing with functions.
#[test]
fn parse_functions() {
test("[test]", tree! { F(Function { name: "test", body: None }) });
test("This is an [modifier][example] of a function invocation.", tree! {
W("This"), S, W("is"), S, W("an"), S,
F(Function { name: "modifier", body: Some(tree! { W("example") }) }), S,
W("of"), S, W("a"), S, W("function"), S, W("invocation"), W(".")
});
test("[func][Hello][links][Here][end]", tree! {
F(Function {
name: "func",
body: Some(tree! { W("Hello") }),
}),
F(Function {
name: "links",
body: Some(tree! { W("Here") }),
}),
F(Function {
name: "end",
body: None,
}),
});
test("[bodyempty][]", tree! {
F(Function {
name: "bodyempty",
body: Some(tree! {})
})
});
test("[nested][[func][call]] outside", tree! {
F(Function {
name: "nested",
body: Some(tree! { F(Function {
name: "func",
body: Some(tree! { W("call") }),
}), }),
}),
S, W("outside")
});
}
/// Tests if the parser handles non-ASCII stuff correctly.
#[test]
fn parse_unicode() {
test("[lib_parse] ⺐.", tree! {
F(Function {
name: "lib_parse",
body: None
}),
S, W(""), W(".")
});
test("[func123][Hello 🌍!]", tree! {
F(Function {
name: "func123",
body: Some(tree! { W("Hello"), S, W("🌍"), W("!") }),
})
});
}
/// Tests whether errors get reported correctly.
#[test]
fn parse_errors() {
test_err("No functions here]", ParseError {
message: "unexpected closing bracket".to_owned(),
});
test_err("[hello][world", ParseError {
message: "expected closing bracket".to_owned(),
});
test_err("[hello world", ParseError {
message: "expected closing bracket".to_owned(),
});
test_err("[ no-name][Why?]", ParseError {
message: "expected identifier".to_owned(),
});
}
}

375
src/pdf.rs Normal file
View File

@ -0,0 +1,375 @@
//! Writing of documents in the _PDF_ format.
use std::io::{self, Write};
use crate::doc::{Document, Text, DocumentFont, Size};
/// A type that is a sink for types that can be written conforming
/// to the _PDF_ format (that may be things like sizes, other objects
/// or whole documents).
pub trait WritePdf<T> {
/// Write self into a byte sink, returning how many bytes were written.
fn write_pdf(&mut self, object: &T) -> io::Result<usize>;
}
impl<W: Write> WritePdf<Document> for W {
fn write_pdf(&mut self, document: &Document) -> io::Result<usize> {
PdfWriter::new(document).write(self)
}
}
impl<W: Write> WritePdf<Size> for W {
fn write_pdf(&mut self, size: &Size) -> io::Result<usize> {
self.write_str(size.points)
}
}
/// A type that is a sink for types that can be converted to strings
/// and thus can be written string-like into a byte sink.
pub trait WriteByteString {
/// Write the string-like type into self, returning how many
/// bytes were written.
fn write_str<S: ToString>(&mut self, string_like: S) -> io::Result<usize>;
}
impl<W: Write> WriteByteString for W {
fn write_str<S: ToString>(&mut self, string_like: S) -> io::Result<usize> {
self.write(string_like.to_string().as_bytes())
}
}
/// Writes an abstract document into a byte sink in the _PDF_ format.
#[derive(Debug, Clone)]
struct PdfWriter<'d> {
doc: &'d Document,
w: usize,
catalog_id: u32,
page_tree_id: u32,
resources_start: u32,
pages_start: u32,
content_start: u32,
xref_table: Vec<u32>,
offset_xref: u32,
}
impl<'d> PdfWriter<'d> {
/// Create a new pdf writer from a document.
fn new(doc: &'d Document) -> PdfWriter<'d> {
// Calculate unique ids for each object
let catalog_id: u32 = 1;
let page_tree_id = catalog_id + 1;
let pages_start = page_tree_id + 1;
let resources_start = pages_start + doc.pages.len() as u32;
let content_start = resources_start + doc.fonts.len() as u32;
PdfWriter {
doc,
catalog_id,
page_tree_id,
resources_start,
pages_start,
content_start,
w: 0,
xref_table: vec![],
offset_xref: 0,
}
}
/// Write the document into a byte sink.
fn write<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
self.write_header(target)?;
self.write_document_catalog(target)?;
self.write_page_tree(target)?;
self.write_pages(target)?;
self.write_resources(target)?;
self.write_content(target)?;
// self.write_fonts(target)?;
self.write_xref_table(target)?;
self.write_trailer(target)?;
self.write_start_xref(target)?;
Ok(self.w)
}
/// Write the pdf header.
fn write_header<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
// Write the magic start
self.w += target.write(b"%PDF-1.7\n")?;
Ok(self.w)
}
/// Write the document catalog (contains general info about the document).
fn write_document_catalog<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
self.xref_table.push(self.w as u32);
self.w += target.write_str(self.catalog_id)?;
self.w += target.write(b" 0 obj\n")?;
self.w += target.write(b"<<\n")?;
self.w += target.write(b"/Type /Catalog\n")?;
self.w += target.write(b"/Pages ")?;
self.w += target.write_str(self.page_tree_id)?;
self.w += target.write(b" 0 R\n")?;
self.w += target.write(b">>\n")?;
self.w += target.write(b"endobj\n")?;
Ok(self.w)
}
/// Write the page tree (overview over the pages of a document).
fn write_page_tree<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
self.xref_table.push(self.w as u32);
// Create page tree
self.w += target.write_str(self.page_tree_id)?;
self.w += target.write(b" 0 obj\n")?;
self.w += target.write(b"<<\n")?;
self.w += target.write(b"/Type /Pages\n")?;
self.w += target.write(b"/Count ")?;
self.w += target.write_str(self.doc.pages.len())?;
self.w += target.write(b"\n")?;
self.w += target.write(b"/Kids [")?;
for id in self.pages_start .. self.pages_start + self.doc.pages.len() as u32 {
self.w += target.write_str(id)?;
self.w += target.write(b" 0 R ")?;
}
self.w += target.write(b"]\n")?;
self.w += target.write(b"/Resources\n")?;
self.w += target.write(b"<<\n")?;
self.w += target.write(b"/Font\n")?;
self.w += target.write(b"<<\n")?;
let mut font_id = self.resources_start;
for nr in 1 ..= self.doc.fonts.len() as u32 {
self.w += target.write(b"/F")?;
self.w += target.write_str(nr)?;
self.w += target.write(b" ")?;
self.w += target.write_str(font_id)?;
self.w += target.write(b" 0 R\n")?;
font_id += 1;
}
self.w += target.write(b">>\n")?;
self.w += target.write(b">>\n")?;
self.w += target.write(b">>\n")?;
self.w += target.write(b"endobj\n")?;
Ok(self.w)
}
/// Write the page descriptions.
fn write_pages<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
let mut page_id = self.pages_start;
let mut content_id = self.content_start;
for page in &self.doc.pages {
self.xref_table.push(self.w as u32);
self.w += target.write_str(page_id)?;
self.w += target.write(b" 0 obj\n")?;
self.w += target.write(b"<<\n")?;
self.w += target.write(b"/Type /Page\n")?;
self.w += target.write(b"/Parent ")?;
self.w += target.write_str(self.page_tree_id)?;
self.w += target.write(b" 0 R\n")?;
self.w += target.write(b"/MediaBox [0 0 ")?;
self.w += target.write_pdf(&page.size[0])?;
self.w += target.write(b" ")?;
self.w += target.write_pdf(&page.size[1])?;
self.w += target.write(b"]\n")?;
self.w += target.write(b"/Contents [")?;
for _ in &page.contents {
self.w += target.write_str(content_id)?;
self.w += target.write(b" 0 R ")?;
content_id += 1;
}
self.w += target.write(b"]\n")?;
self.w += target.write(b">>\n")?;
self.w += target.write(b"endobj\n")?;
page_id += 1;
}
Ok(self.w)
}
/// Write the resources used by the file (fonts and friends).
fn write_resources<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
let mut id = self.resources_start;
for font in &self.doc.fonts {
self.xref_table.push(self.w as u32);
self.w += target.write_str(id)?;
self.w += target.write(b" 0 obj\n")?;
self.w += target.write(b"<<\n")?;
self.w += target.write(b"/Type /Font\n")?;
match font {
DocumentFont::Builtin(builtin) => {
self.w += target.write(b"/Subtype /Type1\n")?;
self.w += target.write(b"/BaseFont /")?;
self.w += target.write_str(builtin.name())?;
self.w += target.write(b"\n")?;
},
DocumentFont::Loaded(font) => {
self.w += target.write(b"/Subtype /TrueType\n")?;
self.w += target.write(b"/BaseFont /")?;
self.w += target.write_str(font.name.as_str())?;
self.w += target.write(b"\n")?;
unimplemented!();
},
}
self.w += target.write(b">>\n")?;
self.w += target.write(b"endobj\n")?;
id += 1;
}
Ok(self.w)
}
/// Write the page contents.
fn write_content<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
let mut id = self.content_start;
for page in &self.doc.pages {
for content in &page.contents {
self.xref_table.push(self.w as u32);
self.w += target.write_str(id)?;
self.w += target.write(b" 0 obj\n")?;
self.w += target.write(b"<<\n")?;
let mut buffer = Vec::new();
buffer.write(b"BT/\n")?;
buffer.write(b"/F1 13 Tf\n")?;
buffer.write(b"108 734 Td\n")?;
buffer.write(b"(")?;
let Text(string) = content;
buffer.write(string.as_bytes())?;
buffer.write(b") Tj\n")?;
buffer.write(b"ET\n")?;
self.w += target.write(b"/Length ")?;
self.w += target.write_str(buffer.len())?;
self.w += target.write(b"\n")?;
self.w += target.write(b">>\n")?;
self.w += target.write(b"stream\n")?;
self.w += target.write(&buffer)?;
self.w += target.write(b"endstream\n")?;
self.w += target.write(b"endobj\n")?;
id += 1;
}
}
Ok(self.w)
}
/// Write the cross-reference table.
fn write_xref_table<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
self.offset_xref = self.w as u32;
self.w += target.write(b"xref\n")?;
self.w += target.write(b"0 ")?;
self.w += target.write_str(self.xref_table.len())?;
self.w += target.write(b"\n")?;
self.w += target.write(b"0000000000 65535 f\r\n")?;
for offset in &self.xref_table {
self.w += target.write(format!("{:010}", offset).as_bytes())?;
self.w += target.write(b" 00000 n")?;
self.w += target.write(b"\r\n")?;
}
Ok(self.w)
}
/// Write the trailer (points to the root object).
fn write_trailer<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
self.w += target.write(b"trailer\n")?;
self.w += target.write(b"<<\n")?;
self.w += target.write(b"/Root ")?;
self.w += target.write_str(self.catalog_id)?;
self.w += target.write(b" 0 R\n")?;
self.w += target.write(b"/Size ")?;
self.w += target.write_str(self.xref_table.len() + 1)?;
self.w += target.write(b"\n")?;
self.w += target.write(b">>\n")?;
Ok(self.w)
}
/// Write where the cross-reference table starts.
fn write_start_xref<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
self.w += target.write(b"startxref\n")?;
self.w += target.write_str(self.offset_xref)?;
self.w += target.write(b"\n")?;
Ok(self.w)
}
}
#[cfg(test)]
mod pdf_tests {
use super::*;
use crate::parsing::{Tokenize, Parse};
use crate::doc::Generate;
/// Create a pdf with a name from the source code.
fn test(name: &str, src: &str) {
let mut file = std::fs::File::create(name).unwrap();
let doc = src.tokenize()
.parse().unwrap()
.generate().unwrap();
file.write_pdf(&doc).unwrap();
}
#[test]
fn pdf_simple() {
test("../target/write1.pdf", "This is an example of a sentence.");
test("../target/write2.pdf","
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed
diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd
gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor
sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut
labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et
justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est
Lorem ipsum dolor sit amet.
");
}
}

138
src/utility.rs Normal file
View File

@ -0,0 +1,138 @@
//! Utility functionality.
use std::str::Split;
use std::iter::Peekable;
use unicode_xid::UnicodeXID;
/// Types that can be splined.
pub trait Splinor {
/// Returns an iterator over the substrings splitted by the pattern,
/// intertwined with the splinor.
///
/// # Example
///
/// ```
/// # use typeset::utility::*;
/// #[derive(Debug, Copy, Clone, PartialEq)]
/// struct Space;
///
/// let v: Vec<Splined<Space>> = "My airplane flies!".spline(" ", Space).collect();
/// assert_eq!(v, [
/// Splined::Value("My"),
/// Splined::Splinor(Space),
/// Splined::Value("airplane"),
/// Splined::Splinor(Space),
/// Splined::Value("flies!"),
/// ]);
/// ```
fn spline<'s, T: Clone>(&'s self, pat: &'s str, splinor: T) -> Spline<'s, T>;
}
impl Splinor for str {
fn spline<'s, T: Clone>(&'s self, pat: &'s str, splinor: T) -> Spline<'s, T> {
Spline {
splinor: Splined::Splinor(splinor),
split: self.split(pat).peekable(),
next_splinor: false,
}
}
}
/// Iterator over splitted values and splinors.
///
/// Created by the [`spline`](Splinor::spline) function.
#[derive(Debug, Clone)]
pub struct Spline<'s, T> {
splinor: Splined<'s, T>,
split: Peekable<Split<'s, &'s str>>,
next_splinor: bool,
}
/// Represents either a splitted substring or a splinor.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum Splined<'s, T> {
/// A substring.
Value(&'s str),
/// An intertwined splinor.
Splinor(T),
}
impl<'s, T: Clone> Iterator for Spline<'s, T> {
type Item = Splined<'s, T>;
fn next(&mut self) -> Option<Splined<'s, T>> {
if self.next_splinor && self.split.peek().is_some() {
self.next_splinor = false;
return Some(self.splinor.clone());
} else {
self.next_splinor = true;
return Some(Splined::Value(self.split.next()?))
}
}
}
/// More useful functions on `str`'s.
pub trait StrExt {
/// Whether self consists only of whitespace.
fn is_whitespace(&self) -> bool;
/// Whether this word is a valid unicode identifier.
fn is_identifier(&self) -> bool;
}
impl StrExt for str {
#[inline]
fn is_whitespace(&self) -> bool {
self.chars().all(|c| c.is_whitespace() && c != '\n')
}
fn is_identifier(&self) -> bool {
let mut chars = self.chars();
match chars.next() {
Some(c) if !UnicodeXID::is_xid_start(c) => return false,
None => return false,
_ => (),
}
while let Some(c) = chars.next() {
if !UnicodeXID::is_xid_continue(c) {
return false;
}
}
true
}
}
#[cfg(test)]
mod splinor_tests {
use super::*;
use Splined::{Value as V, Splinor as S};
#[derive(Debug, Copy, Clone, PartialEq)]
enum Token { DoubleUnderscore }
fn test<T>(string: &str, pat: &str, splinor: T, vec: Vec<Splined<T>>)
where T: std::fmt::Debug + Clone + PartialEq {
assert_eq!(string.spline(pat, splinor).collect::<Vec<_>>(), vec);
}
#[test]
fn splinor() {
let s = S(Token::DoubleUnderscore);
test("__he__llo__world__", "__", Token::DoubleUnderscore,
vec![V(""), s, V("he"), s, V("llo"), s, V("world"), s, V("")]);
test("__Italic__", "__", Token::DoubleUnderscore,
vec![V(""), s, V("Italic"), s, V("")]);
test("Key__Value", "__", Token::DoubleUnderscore,
vec![V("Key"), s, V("Value")]);
test("__Start__NoEnd", "__", Token::DoubleUnderscore,
vec![V(""), s, V("Start"), s, V("NoEnd")]);
test("NoStart__End__", "__", Token::DoubleUnderscore,
vec![V("NoStart"), s, V("End"), s, V("")]);
}
}