Create test runner which renders layouts to images 🗺

This commit is contained in:
Laurenz 2019-10-11 17:53:28 +02:00
parent 8f788f9a4f
commit c0e4fd55e6
12 changed files with 753 additions and 519 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
**/*.rs.bk
Cargo.lock
things
test-cache

View File

@ -1,5 +1,6 @@
//! Representation of typesetted documents.
use std::io::{self, Write};
use crate::size::{Size, Size2D};
@ -31,3 +32,15 @@ pub enum LayoutAction {
/// Write text starting at the current position.
WriteText(String),
}
impl LayoutAction {
/// Serialize this layout action into a string representation.
pub fn serialize<W: Write>(&self, f: &mut W) -> io::Result<()> {
use LayoutAction::*;
match self {
MoveAbsolute(s) => write!(f, "m {:.4} {:.4}", s.x.to_pt(), s.y.to_pt()),
SetFont(i, s) => write!(f, "f {} {}", i, s),
WriteText(s) => write!(f, "w {}", s),
}
}
}

View File

@ -1,5 +1,6 @@
//! Block-style layouting of boxes.
use std::io::{self, Write};
use crate::doc::{Document, Page, LayoutAction};
use crate::size::{Size, Size2D};
use super::{ActionList, LayoutSpace, Alignment, LayoutResult, LayoutError};
@ -25,6 +26,16 @@ impl BoxLayout {
}],
}
}
/// Serialize this layout into a string representation.
pub fn serialize<W: Write>(&self, f: &mut W) -> io::Result<()> {
writeln!(f, "{:.4} {:.4}", self.dimensions.x.to_pt(), self.dimensions.y.to_pt())?;
for action in &self.actions {
action.serialize(f)?;
writeln!(f)?;
}
Ok(())
}
}
/// The context for layouting boxes.

View File

@ -157,6 +157,9 @@ impl FlexFinisher {
/// Layout the glue.
fn glue(&mut self, glue: BoxLayout) {
if let Some(glue) = self.glue.take() {
self.append(glue);
}
self.glue = Some(glue);
}

View File

@ -136,56 +136,3 @@ error_type! {
from: (ParseError, TypesetError::Parse(err)),
from: (LayoutError, TypesetError::Layout(err)),
}
#[cfg(test)]
mod test {
use std::fs::File;
use std::io::BufWriter;
use crate::Typesetter;
use crate::export::pdf::PdfExporter;
use toddle::query::FileSystemFontProvider;
/// Create a _PDF_ with a name from the source code.
fn test(name: &str, src: &str) {
let mut typesetter = Typesetter::new();
let provider = FileSystemFontProvider::from_listing("fonts/fonts.toml").unwrap();
typesetter.add_font_provider(provider);
// Typeset into document.
let document = typesetter.typeset(src).unwrap();
// Write to file.
let path = format!("../target/typeset-unit-{}.pdf", name);
let file = BufWriter::new(File::create(path).unwrap());
let exporter = PdfExporter::new();
exporter.export(&document, typesetter.loader(), file).unwrap();
}
#[test]
fn features() {
test("features", r"
*Features Test Page*
_Multiline:_
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy
eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet
clita kasd gubergren, no sea takimata sanctus est.
_Emoji:_ Hello World! 🌍
_Styles:_ This is made *bold*, that _italic_ and this one `monospace` using the
built-in syntax!
_Styles with functions:_ This [bold][word] is made bold and [italic][that] is italic
using the standard library functions [mono][bold] and `italic`!
");
}
#[test]
fn shakespeare() {
test("shakespeare", include_str!("../test/shakespeare.tps"));
test("shakespeare-right", &format!("[align:right][{}]", include_str!("../test/shakespeare.tps")));
}
}

View File

@ -1,346 +1,15 @@
//! Tokenization and parsing of source code into syntax trees.
//! Parsing of source code into token streams an syntax trees.
use std::collections::HashMap;
use std::str::CharIndices;
use smallvec::SmallVec;
use unicode_xid::UnicodeXID;
use crate::func::{Function, Scope};
use crate::syntax::*;
use crate::size::Size;
mod tokens;
pub use tokens::{tokenize, Tokens};
/// Builds an iterator over the tokens of the source code.
#[inline]
pub fn tokenize(src: &str) -> Tokens {
Tokens::new(src)
}
/// An iterator over the tokens of source code.
#[derive(Debug, Clone)]
pub struct Tokens<'s> {
src: &'s str,
chars: PeekableChars<'s>,
state: TokensState,
stack: SmallVec<[TokensState; 1]>,
}
/// The state the tokenizer is in.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum TokensState {
/// The base state if there is nothing special we are in.
Body,
/// Inside a function header. Here colons and equal signs get parsed
/// as distinct tokens rather than text.
Function,
/// We expect either the end of the function or the beginning of the body.
MaybeBody,
}
impl<'s> Tokens<'s> {
/// Create a new token stream from source code.
fn new(src: &'s str) -> Tokens<'s> {
Tokens {
src,
chars: PeekableChars::new(src),
state: TokensState::Body,
stack: SmallVec::new(),
}
}
/// Advance the iterator by one step.
fn advance(&mut self) {
self.chars.next();
}
/// Switch to the given state.
fn switch(&mut self, state: TokensState) {
self.stack.push(self.state);
self.state = state;
}
/// Go back to the top-of-stack state.
fn unswitch(&mut self) {
self.state = self.stack.pop().unwrap_or(TokensState::Body);
}
/// Advance and return the given token.
fn consumed(&mut self, token: Token<'s>) -> Token<'s> {
self.advance();
token
}
/// Returns a word containing the string bounded by the given indices.
fn text(&self, start: usize, end: usize) -> Token<'s> {
Token::Text(&self.src[start .. end])
}
}
impl<'s> Iterator for Tokens<'s> {
type Item = Token<'s>;
/// Advance the iterator, return the next token or nothing.
fn next(&mut self) -> Option<Token<'s>> {
use TokensState as TU;
// Go to the body state if the function has a body or return to the top-of-stack state.
if self.state == TU::MaybeBody {
if self.chars.peek()?.1 == '[' {
self.state = TU::Body;
return Some(self.consumed(Token::LeftBracket));
} else {
self.unswitch();
}
}
// Take the next char and peek at the one behind.
let (next_pos, next) = self.chars.next()?;
let afterwards = self.chars.peek().map(|p| p.1);
Some(match next {
// Functions
'[' => {
self.switch(TU::Function);
Token::LeftBracket
},
']' => {
if self.state == TU::Function {
self.state = TU::MaybeBody;
} else {
self.unswitch();
}
Token::RightBracket
},
// Line comment
'/' if afterwards == Some('/') => {
let mut end = self.chars.next().unwrap();
let start = end.0 + end.1.len_utf8();
while let Some((index, c)) = self.chars.peek() {
if is_newline_char(c) {
break;
}
self.advance();
end = (index, c);
}
let end = end.0 + end.1.len_utf8();
Token::LineComment(&self.src[start .. end])
},
// Block comment
'/' if afterwards == Some('*') => {
let mut end = self.chars.next().unwrap();
let start = end.0 + end.1.len_utf8();
let mut nested = 0;
while let Some((index, c)) = self.chars.next() {
let after = self.chars.peek().map(|p| p.1);
match (c, after) {
('*', Some('/')) if nested == 0 => { self.advance(); break },
('/', Some('*')) => { self.advance(); nested += 1 },
('*', Some('/')) => { self.advance(); nested -= 1 },
_ => {},
}
end = (index, c);
}
let end = end.0 + end.1.len_utf8();
Token::BlockComment(&self.src[start .. end])
},
// Unexpected end of block comment
'*' if afterwards == Some('/') => self.consumed(Token::StarSlash),
// Whitespace
' ' | '\t' => {
while let Some((_, c)) = self.chars.peek() {
match c {
' ' | '\t' => self.advance(),
_ => break,
}
}
Token::Space
}
// Newlines
'\r' if afterwards == Some('\n') => self.consumed(Token::Newline),
c if is_newline_char(c) => Token::Newline,
// Star/Underscore/Backtick in bodies
'*' if self.state == TU::Body => Token::Star,
'_' if self.state == TU::Body => Token::Underscore,
'`' if self.state == TU::Body => Token::Backtick,
// Context sensitive operators in headers
':' if self.state == TU::Function => Token::Colon,
'=' if self.state == TU::Function => Token::Equals,
',' if self.state == TU::Function => Token::Comma,
// A string value.
'"' if self.state == TU::Function => {
// Find out when the word ends.
let mut escaped = false;
let mut end = (next_pos, next);
while let Some((index, c)) = self.chars.next() {
if c == '"' && !escaped {
break;
}
escaped = c == '\\';
end = (index, c);
}
let end_pos = end.0 + end.1.len_utf8();
Token::Quoted(&self.src[next_pos + 1 .. end_pos])
}
// Escaping
'\\' => {
if let Some((index, c)) = self.chars.peek() {
let escapable = match c {
'[' | ']' | '\\' | '*' | '_' | '`' | ':' | '=' | '/' => true,
_ => false,
};
if escapable {
self.advance();
return Some(self.text(index, index + c.len_utf8()));
}
}
Token::Text("\\")
},
// Normal text
_ => {
// Find out when the word ends.
let mut end = (next_pos, next);
while let Some((index, c)) = self.chars.peek() {
let second = self.chars.peek_second().map(|p| p.1);
// Whether the next token is still from the text or not.
let continues = match c {
'[' | ']' | '\\' => false,
'*' | '_' | '`' if self.state == TU::Body => false,
':' | '=' | ',' | '"' if self.state == TU::Function => false,
'/' => second != Some('/') && second != Some('*'),
'*' => second != Some('/'),
' ' | '\t' => false,
c if is_newline_char(c) => false,
_ => true,
};
if !continues {
break;
}
end = (index, c);
self.advance();
}
let end_pos = end.0 + end.1.len_utf8();
self.text(next_pos, end_pos)
},
})
}
}
/// Whether this character is a newline (or starts one).
fn is_newline_char(character: char) -> bool {
match character {
'\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
_ => false,
}
}
/// A (index, char) iterator with double lookahead.
#[derive(Debug, Clone)]
struct PeekableChars<'s> {
offset: usize,
string: &'s str,
chars: CharIndices<'s>,
peek1: Option<Option<(usize, char)>>,
peek2: Option<Option<(usize, char)>>,
}
impl<'s> PeekableChars<'s> {
/// Create a new iterator from a string.
fn new(string: &'s str) -> PeekableChars<'s> {
PeekableChars {
offset: 0,
string,
chars: string.char_indices(),
peek1: None,
peek2: None,
}
}
/// Peek at the next element.
fn peek(&mut self) -> Option<(usize, char)> {
match self.peek1 {
Some(peeked) => peeked,
None => {
let next = self.next_inner();
self.peek1 = Some(next);
next
}
}
}
/// Peek at the element after the next element.
fn peek_second(&mut self) -> Option<(usize, char)> {
match self.peek2 {
Some(peeked) => peeked,
None => {
self.peek();
let next = self.next_inner();
self.peek2 = Some(next);
next
}
}
}
/// Return the next value of the inner iterator mapped with the offset.
fn next_inner(&mut self) -> Option<(usize, char)> {
self.chars.next().map(|(i, c)| (i + self.offset, c))
}
/// The index of the first character of the next token in the source string.
fn current_index(&mut self) -> Option<usize> {
self.peek().map(|p| p.0)
}
/// Go to a new position in the underlying string.
fn goto(&mut self, index: usize) {
self.offset = index;
self.chars = self.string[index..].char_indices();
self.peek1 = None;
self.peek2 = None;
}
}
impl Iterator for PeekableChars<'_> {
type Item = (usize, char);
fn next(&mut self) -> Option<(usize, char)> {
match self.peek1.take() {
Some(value) => {
self.peek1 = self.peek2.take();
value
},
None => self.next_inner(),
}
}
}
//------------------------------------------------------------------------------------------------//
/// Parses source code into a syntax tree given a context.
#[inline]
@ -740,7 +409,6 @@ fn is_identifier(string: &str) -> bool {
true
}
//------------------------------------------------------------------------------------------------//
/// The error type for parsing.
pub struct ParseError(String);
@ -762,137 +430,7 @@ error_type! {
#[cfg(test)]
mod token_tests {
use super::*;
use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
Colon as C, Equals as E, Quoted as Q, Underscore as TU, Star as TS,
Backtick as TB, Text as T, LineComment as LC, BlockComment as BC,
StarSlash as SS};
/// Test if the source code tokenizes to the tokens.
fn test(src: &str, tokens: Vec<Token>) {
assert_eq!(Tokens::new(src).collect::<Vec<_>>(), tokens);
}
/// Tokenizes the basic building blocks.
#[test]
fn tokenize_base() {
test("", vec![]);
test("Hallo", vec![T("Hallo")]);
test("[", vec![L]);
test("]", vec![R]);
test("*", vec![TS]);
test("_", vec![TU]);
test("`", vec![TB]);
test("\n", vec![N]);
}
/// This test looks if LF- and CRLF-style newlines get both identified correctly.
#[test]
fn tokenize_whitespace_newlines() {
test(" \t", vec![S]);
test("First line\r\nSecond line\nThird line\n",
vec![T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
T("Third"), S, T("line"), N]);
test("Hello \n ", vec![T("Hello"), S, N, S]);
test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
}
/// Tests if escaping with backslash works as it should.
#[test]
fn tokenize_escape() {
test(r"\[", vec![T("[")]);
test(r"\]", vec![T("]")]);
test(r"\**", vec![T("*"), TS]);
test(r"\*", vec![T("*")]);
test(r"\__", vec![T("_"), TU]);
test(r"\_", vec![T("_")]);
test(r"\hello", vec![T("\\"), T("hello")]);
}
/// Tests if escaped strings work.
#[test]
fn tokenize_quoted() {
test(r#"[align: "hello\"world"]"#, vec![L, T("align"), C, S, Q(r#"hello\"world"#), R]);
}
/// Tokenizes some more realistic examples.
#[test]
fn tokenize_examples() {
test(r"
[function][
Test [italic][example]!
]
", vec![
N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
T("example"), R, T("!"), N, S, R, N, S
]);
test(r"
[page: size=A4]
[font: size=12pt]
Das ist ein Beispielsatz mit *fetter* Schrift.
", vec![
N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
TS, T("fetter"), TS, S, T("Schrift."), N, S
]);
}
/// This test checks whether the colon and equals symbols get parsed correctly depending on the
/// context: Either in a function header or in a body.
#[test]
fn tokenize_symbols_context() {
test("[func: key=value][Answer: 7]",
vec![L, T("func"), C, S, T("key"), E, T("value"), R, L,
T("Answer:"), S, T("7"), R]);
test("[[n: k=v]:x][:[=]]:=",
vec![L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
L, T(":"), L, E, R, R, T(":=")]);
test("[hi: k=[func][body] v=1][hello]",
vec![L, T("hi"), C, S, T("k"), E, L, T("func"), R, L, T("body"), R, S,
T("v"), E, T("1"), R, L, T("hello"), R]);
test("[func: __key__=value]",
vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
test("The /*[*/ answer: 7.",
vec![T("The"), S, BC("["), S, T("answer:"), S, T("7.")]);
}
/// Test if block and line comments get tokenized as expected.
#[test]
fn tokenize_comments() {
test("These // Line comments.",
vec![T("These"), S, LC(" Line comments.")]);
test("This /* is */ a comment.",
vec![T("This"), S, BC(" is "), S, T("a"), S, T("comment.")]);
test("[Head/*of*/][Body]", vec![L, T("Head"), BC("of"), R, L, T("Body"), R]);
test("/* Hey */ */", vec![BC(" Hey "), S, SS]);
test("Hey\n// Yoo /*\n*/", vec![T("Hey"), N, LC(" Yoo /*"), N, SS]);
test("/* My /* line // */ comment */", vec![BC(" My /* line // */ comment ")])
}
/// This test has a special look at the underscore syntax.
#[test]
fn tokenize_underscores() {
test("he_llo_world_ __ Now this_ is_ special!",
vec![T("he"), TU, T("llo"), TU, T("world"), TU, S, TU, TU, S, T("Now"), S,
T("this"), TU, S, T("is"), TU, S, T("special!")]);
}
/// This test is for checking if non-ASCII characters get parsed correctly.
#[test]
fn tokenize_unicode() {
test("[document][Hello 🌍!]",
vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
}
}
#[cfg(test)]
mod parse_tests {
mod tests {
use super::*;
use crate::func::{Function, Scope};
use crate::layout::{LayoutContext, LayoutResult, Layout};

465
src/parsing/tokens.rs Normal file
View File

@ -0,0 +1,465 @@
//! Tokenization of text.
use std::str::CharIndices;
use smallvec::SmallVec;
use crate::syntax::*;
/// Builds an iterator over the tokens of the source code.
#[inline]
pub fn tokenize(src: &str) -> Tokens {
Tokens::new(src)
}
/// An iterator over the tokens of source code.
#[derive(Debug, Clone)]
pub struct Tokens<'s> {
src: &'s str,
pub(in super) chars: PeekableChars<'s>,
state: TokensState,
stack: SmallVec<[TokensState; 1]>,
}
/// The state the tokenizer is in.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum TokensState {
/// The base state if there is nothing special we are in.
Body,
/// Inside a function header. Here colons and equal signs get parsed
/// as distinct tokens rather than text.
Function,
/// We expect either the end of the function or the beginning of the body.
MaybeBody,
}
impl<'s> Tokens<'s> {
/// Create a new token stream from source code.
fn new(src: &'s str) -> Tokens<'s> {
Tokens {
src,
chars: PeekableChars::new(src),
state: TokensState::Body,
stack: SmallVec::new(),
}
}
/// Advance the iterator by one step.
fn advance(&mut self) {
self.chars.next();
}
/// Switch to the given state.
fn switch(&mut self, state: TokensState) {
self.stack.push(self.state);
self.state = state;
}
/// Go back to the top-of-stack state.
fn unswitch(&mut self) {
self.state = self.stack.pop().unwrap_or(TokensState::Body);
}
/// Advance and return the given token.
fn consumed(&mut self, token: Token<'s>) -> Token<'s> {
self.advance();
token
}
/// Returns a word containing the string bounded by the given indices.
fn text(&self, start: usize, end: usize) -> Token<'s> {
Token::Text(&self.src[start .. end])
}
}
impl<'s> Iterator for Tokens<'s> {
type Item = Token<'s>;
/// Advance the iterator, return the next token or nothing.
fn next(&mut self) -> Option<Token<'s>> {
use TokensState as TU;
// Go to the body state if the function has a body or return to the top-of-stack state.
if self.state == TU::MaybeBody {
if self.chars.peek()?.1 == '[' {
self.state = TU::Body;
return Some(self.consumed(Token::LeftBracket));
} else {
self.unswitch();
}
}
// Take the next char and peek at the one behind.
let (next_pos, next) = self.chars.next()?;
let afterwards = self.chars.peek().map(|p| p.1);
Some(match next {
// Functions
'[' => {
self.switch(TU::Function);
Token::LeftBracket
},
']' => {
if self.state == TU::Function {
self.state = TU::MaybeBody;
} else {
self.unswitch();
}
Token::RightBracket
},
// Line comment
'/' if afterwards == Some('/') => {
let mut end = self.chars.next().unwrap();
let start = end.0 + end.1.len_utf8();
while let Some((index, c)) = self.chars.peek() {
if is_newline_char(c) {
break;
}
self.advance();
end = (index, c);
}
let end = end.0 + end.1.len_utf8();
Token::LineComment(&self.src[start .. end])
},
// Block comment
'/' if afterwards == Some('*') => {
let mut end = self.chars.next().unwrap();
let start = end.0 + end.1.len_utf8();
let mut nested = 0;
while let Some((index, c)) = self.chars.next() {
let after = self.chars.peek().map(|p| p.1);
match (c, after) {
('*', Some('/')) if nested == 0 => { self.advance(); break },
('/', Some('*')) => { self.advance(); nested += 1 },
('*', Some('/')) => { self.advance(); nested -= 1 },
_ => {},
}
end = (index, c);
}
let end = end.0 + end.1.len_utf8();
Token::BlockComment(&self.src[start .. end])
},
// Unexpected end of block comment
'*' if afterwards == Some('/') => self.consumed(Token::StarSlash),
// Whitespace
' ' | '\t' => {
while let Some((_, c)) = self.chars.peek() {
match c {
' ' | '\t' => self.advance(),
_ => break,
}
}
Token::Space
}
// Newlines
'\r' if afterwards == Some('\n') => self.consumed(Token::Newline),
c if is_newline_char(c) => Token::Newline,
// Star/Underscore/Backtick in bodies
'*' if self.state == TU::Body => Token::Star,
'_' if self.state == TU::Body => Token::Underscore,
'`' if self.state == TU::Body => Token::Backtick,
// Context sensitive operators in headers
':' if self.state == TU::Function => Token::Colon,
'=' if self.state == TU::Function => Token::Equals,
',' if self.state == TU::Function => Token::Comma,
// A string value.
'"' if self.state == TU::Function => {
// Find out when the word ends.
let mut escaped = false;
let mut end = (next_pos, next);
while let Some((index, c)) = self.chars.next() {
if c == '"' && !escaped {
break;
}
escaped = c == '\\';
end = (index, c);
}
let end_pos = end.0 + end.1.len_utf8();
Token::Quoted(&self.src[next_pos + 1 .. end_pos])
}
// Escaping
'\\' => {
if let Some((index, c)) = self.chars.peek() {
let escapable = match c {
'[' | ']' | '\\' | '*' | '_' | '`' | ':' | '=' | '/' => true,
_ => false,
};
if escapable {
self.advance();
return Some(self.text(index, index + c.len_utf8()));
}
}
Token::Text("\\")
},
// Normal text
_ => {
// Find out when the word ends.
let mut end = (next_pos, next);
while let Some((index, c)) = self.chars.peek() {
let second = self.chars.peek_second().map(|p| p.1);
// Whether the next token is still from the text or not.
let continues = match c {
'[' | ']' | '\\' => false,
'*' | '_' | '`' if self.state == TU::Body => false,
':' | '=' | ',' | '"' if self.state == TU::Function => false,
'/' => second != Some('/') && second != Some('*'),
'*' => second != Some('/'),
' ' | '\t' => false,
c if is_newline_char(c) => false,
_ => true,
};
if !continues {
break;
}
end = (index, c);
self.advance();
}
let end_pos = end.0 + end.1.len_utf8();
self.text(next_pos, end_pos)
},
})
}
}
/// Whether this character is a newline (or starts one).
fn is_newline_char(character: char) -> bool {
match character {
'\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
_ => false,
}
}
/// A (index, char) iterator with double lookahead.
#[derive(Debug, Clone)]
pub struct PeekableChars<'s> {
offset: usize,
string: &'s str,
chars: CharIndices<'s>,
peek1: Option<Option<(usize, char)>>,
peek2: Option<Option<(usize, char)>>,
}
impl<'s> PeekableChars<'s> {
/// Create a new iterator from a string.
pub fn new(string: &'s str) -> PeekableChars<'s> {
PeekableChars {
offset: 0,
string,
chars: string.char_indices(),
peek1: None,
peek2: None,
}
}
/// Peek at the next element.
pub fn peek(&mut self) -> Option<(usize, char)> {
match self.peek1 {
Some(peeked) => peeked,
None => {
let next = self.next_inner();
self.peek1 = Some(next);
next
}
}
}
/// Peek at the element after the next element.
pub fn peek_second(&mut self) -> Option<(usize, char)> {
match self.peek2 {
Some(peeked) => peeked,
None => {
self.peek();
let next = self.next_inner();
self.peek2 = Some(next);
next
}
}
}
/// Return the next value of the inner iterator mapped with the offset.
pub fn next_inner(&mut self) -> Option<(usize, char)> {
self.chars.next().map(|(i, c)| (i + self.offset, c))
}
/// The index of the first character of the next token in the source string.
pub fn current_index(&mut self) -> Option<usize> {
self.peek().map(|p| p.0)
}
/// Go to a new position in the underlying string.
pub fn goto(&mut self, index: usize) {
self.offset = index;
self.chars = self.string[index..].char_indices();
self.peek1 = None;
self.peek2 = None;
}
}
impl Iterator for PeekableChars<'_> {
type Item = (usize, char);
fn next(&mut self) -> Option<(usize, char)> {
match self.peek1.take() {
Some(value) => {
self.peek1 = self.peek2.take();
value
},
None => self.next_inner(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
Colon as C, Equals as E, Quoted as Q, Underscore as TU, Star as TS,
Backtick as TB, Text as T, LineComment as LC, BlockComment as BC,
StarSlash as SS};
/// Test if the source code tokenizes to the tokens.
fn test(src: &str, tokens: Vec<Token>) {
assert_eq!(Tokens::new(src).collect::<Vec<_>>(), tokens);
}
/// Tokenizes the basic building blocks.
#[test]
fn tokenize_base() {
test("", vec![]);
test("Hallo", vec![T("Hallo")]);
test("[", vec![L]);
test("]", vec![R]);
test("*", vec![TS]);
test("_", vec![TU]);
test("`", vec![TB]);
test("\n", vec![N]);
}
/// This test looks if LF- and CRLF-style newlines get both identified correctly.
#[test]
fn tokenize_whitespace_newlines() {
test(" \t", vec![S]);
test("First line\r\nSecond line\nThird line\n",
vec![T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
T("Third"), S, T("line"), N]);
test("Hello \n ", vec![T("Hello"), S, N, S]);
test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
}
/// Tests if escaping with backslash works as it should.
#[test]
fn tokenize_escape() {
test(r"\[", vec![T("[")]);
test(r"\]", vec![T("]")]);
test(r"\**", vec![T("*"), TS]);
test(r"\*", vec![T("*")]);
test(r"\__", vec![T("_"), TU]);
test(r"\_", vec![T("_")]);
test(r"\hello", vec![T("\\"), T("hello")]);
}
/// Tests if escaped strings work.
#[test]
fn tokenize_quoted() {
test(r#"[align: "hello\"world"]"#, vec![L, T("align"), C, S, Q(r#"hello\"world"#), R]);
}
/// Tokenizes some more realistic examples.
#[test]
fn tokenize_examples() {
test(r"
[function][
Test [italic][example]!
]
", vec![
N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
T("example"), R, T("!"), N, S, R, N, S
]);
test(r"
[page: size=A4]
[font: size=12pt]
Das ist ein Beispielsatz mit *fetter* Schrift.
", vec![
N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
TS, T("fetter"), TS, S, T("Schrift."), N, S
]);
}
/// This test checks whether the colon and equals symbols get parsed correctly depending on the
/// context: Either in a function header or in a body.
#[test]
fn tokenize_symbols_context() {
test("[func: key=value][Answer: 7]",
vec![L, T("func"), C, S, T("key"), E, T("value"), R, L,
T("Answer:"), S, T("7"), R]);
test("[[n: k=v]:x][:[=]]:=",
vec![L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
L, T(":"), L, E, R, R, T(":=")]);
test("[hi: k=[func][body] v=1][hello]",
vec![L, T("hi"), C, S, T("k"), E, L, T("func"), R, L, T("body"), R, S,
T("v"), E, T("1"), R, L, T("hello"), R]);
test("[func: __key__=value]",
vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
test("The /*[*/ answer: 7.",
vec![T("The"), S, BC("["), S, T("answer:"), S, T("7.")]);
}
/// Test if block and line comments get tokenized as expected.
#[test]
fn tokenize_comments() {
test("These // Line comments.",
vec![T("These"), S, LC(" Line comments.")]);
test("This /* is */ a comment.",
vec![T("This"), S, BC(" is "), S, T("a"), S, T("comment.")]);
test("[Head/*of*/][Body]", vec![L, T("Head"), BC("of"), R, L, T("Body"), R]);
test("/* Hey */ */", vec![BC(" Hey "), S, SS]);
test("Hey\n// Yoo /*\n*/", vec![T("Hey"), N, LC(" Yoo /*"), N, SS]);
test("/* My /* line // */ comment */", vec![BC(" My /* line // */ comment ")])
}
/// This test has a special look at the underscore syntax.
#[test]
fn tokenize_underscores() {
test("he_llo_world_ __ Now this_ is_ special!",
vec![T("he"), TU, T("llo"), TU, T("world"), TU, S, TU, TU, S, T("Now"), S,
T("this"), TU, S, T("is"), TU, S, T("special!")]);
}
/// This test is for checking if non-ASCII characters get parsed correctly.
#[test]
fn tokenize_unicode() {
test("[document][Hello 🌍!]",
vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
}
}

82
tests/layouting.rs Normal file
View File

@ -0,0 +1,82 @@
use std::fs::{self, File};
use std::io::{Write, Read, BufWriter};
use std::process::Command;
use typst::Typesetter;
use typst::toddle::query::FileSystemFontProvider;
use typst::export::pdf::PdfExporter;
use typst::doc::LayoutAction;
const CACHE_DIR: &str = "test-cache";
#[test]
fn layouting() {
fs::create_dir_all(format!("{}/serialized", CACHE_DIR)).unwrap();
fs::create_dir_all(format!("{}/rendered", CACHE_DIR)).unwrap();
fs::create_dir_all(format!("{}/pdf", CACHE_DIR)).unwrap();
for entry in fs::read_dir("tests/layouts/").unwrap() {
let path = entry.unwrap().path();
let mut file = File::open(&path).unwrap();
let mut src = String::new();
file.read_to_string(&mut src).unwrap();
let name = path
.file_stem().unwrap()
.to_str().unwrap();
test(name, &src);
}
}
/// Create a _PDF_ with a name from the source code.
fn test(name: &str, src: &str) {
let mut typesetter = Typesetter::new();
let provider = FileSystemFontProvider::from_listing("fonts/fonts.toml").unwrap();
typesetter.add_font_provider(provider.clone());
// Layout into box layout.
let tree = typesetter.parse(src).unwrap();
let layout = typesetter.layout(&tree).unwrap();
// Write the serialed layout file.
let path = format!("{}/serialized/{}.box", CACHE_DIR, name);
let mut file = File::create(path).unwrap();
// Find all used fonts and their filenames.
let mut map = Vec::new();
let mut loader = typesetter.loader().borrow_mut();
for action in &layout.actions {
if let LayoutAction::SetFont(index, _) = action {
if map.iter().find(|(i, _)| i == index).is_none() {
let (_, provider_index) = loader.get_provider_and_index(*index);
let filename = provider.get_path(provider_index).to_str().unwrap();
map.push((*index, filename));
}
}
}
drop(loader);
// Write the font mapping into the serialization file.
writeln!(file, "{}", map.len()).unwrap();
for (index, path) in map {
writeln!(file, "{} {}", index, path).unwrap();
}
layout.serialize(&mut file).unwrap();
// Render the layout into a PNG.
Command::new("python")
.arg("tests/render.py")
.arg(name)
.spawn()
.expect("failed to run python-based renderer");
// Write the PDF file.
let path = format!("{}/pdf/{}.pdf", CACHE_DIR, name);
let file = BufWriter::new(File::create(path).unwrap());
let document = layout.into_doc();
let exporter = PdfExporter::new();
exporter.export(&document, typesetter.loader(), file).unwrap();
}

View File

@ -0,0 +1,88 @@
[align: right][
[bold][Scene 5: _The Tower of London_]
[italic][Enter Mortimer, brought in a chair, and Gaolers.]
*Mortimer.* Kind keepers of my weak decaying age,
Let dying Mortimer here rest himself.
Even like a man new haled from the rack,
So fare my limbs with long imprisonment;
And these grey locks, the pursuivants of death,
Nestor-like aged in an age of care,
Argue the end of Edmund Mortimer.
These eyes, like lamps whose wasting oil is spent,
Wax dim, as drawing to their exigent;
Weak shoulders, overborne with burdening grief,
And pithless arms, like to a withered vine
That droops his sapless branches to the ground.
Yet are these feet, whose strengthless stay is numb,
Unable to support this lump of clay,
Swift-winged with desire to get a grave,
As witting I no other comfort have.
But tell me, keeper, will my nephew come?
*First Keeper.* Richard Plantagenet, my lord, will come.
We sent unto the Temple, unto his chamber;
And answer was return'd that he will come.
*Mortimer.* Enough; my soul shall then be satisfied.
Poor gentleman! his wrong doth equal mine.
Since Henry Monmouth first began to reign,
Before whose glory I was great in arms,
This loathsome sequestration have I had;
And even since then hath Richard been obscur'd,
Depriv'd of honour and inheritance.
But now the arbitrator of despairs,
Just Death, kind umpire of men's miseries,
With sweet enlargement doth dismiss me hence.
I would his troubles likewise were expir'd,
That so he might recover what was lost.
[italic][Enter Richard Plantagenet]
*First Keeper.* My lord, your loving nephew now is come.
*Mortimer.* Richard Plantagenet, my friend, is he come?
*Plantagenet.* Ay, noble uncle, thus ignobly us'd,
Your nephew, late despised Richard, comes.
*Mortimer.* Direct mine arms I may embrace his neck
And in his bosom spend my latter gasp.
O, tell me when my lips do touch his cheeks,
That I may kindly give one fainting kiss.
And now declare, sweet stem from York's great stock,
Why didst thou say of late thou wert despis'd?
*Plantagenet.* First, lean thine aged back against mine arm;
And, in that ease, I'll tell thee my disease.
This day, in argument upon a case,
Some words there grew 'twixt Somerset and me;
Among which terms he us'd his lavish tongue
And did upbraid me with my father's death;
Which obloquy set bars before my tongue,
Else with the like I had requited him.
Therefore, good uncle, for my father's sake,
In honour of a true Plantagenet,
And for alliance sake, declare the cause
My father, Earl of Cambridge, lost his head.
*Mortimer.* That cause, fair nephew, that imprison'd me
And hath detain'd me all my flow'ring youth
Within a loathsome dungeon, there to pine,
Was cursed instrument of his decease.
*Plantagenet.* Discover more at large what cause that was,
For I am ignorant and cannot guess.
*Mortimer.* I will, if that my fading breath permit
And death approach not ere my tale be done.
Henry the Fourth, grandfather to this king,
Depos'd his nephew Richard, Edward's son,
The first-begotten and the lawful heir
Of Edward king, the third of that descent;
During whose reign the Percies of the north,
Finding his usurpation most unjust,
Endeavour'd my advancement to the throne ...
]

13
tests/layouts/styles.tps Normal file
View File

@ -0,0 +1,13 @@
_Multiline:_
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy
eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet
clita kasd gubergren, no sea takimata sanctus est.
_Emoji:_ Hello World! 🌍
_Styles:_ This is made *bold*, that _italic_ and this one `monospace` using the
built-in syntax!
_Styles with functions:_ This [bold][word] is made bold and [italic][that] is italic
using the standard library functions [mono][bold] and `italic`!

73
tests/render.py Normal file
View File

@ -0,0 +1,73 @@
import sys
import os
import pathlib
from PIL import Image, ImageDraw, ImageFont
BASE = os.path.dirname(__file__)
CACHE_DIR = os.path.join(BASE, "../test-cache/");
def main():
assert len(sys.argv) == 2, "usage: python render.py <name>"
name = sys.argv[1]
filename = os.path.join(CACHE_DIR, f"serialized/{name}.box")
with open(filename, encoding="utf-8") as file:
lines = [line[:-1] for line in file.readlines()]
fonts = {}
font_count = int(lines[0])
for i in range(font_count):
parts = lines[1 + i].split(' ', 1)
index = int(parts[0])
path = parts[1]
fonts[index] = os.path.join(BASE, "../fonts", path)
width, height = (float(s) for s in lines[font_count + 1].split())
renderer = Renderer(fonts, width, height)
for command in lines[font_count + 2:]:
renderer.execute(command)
pathlib.Path(os.path.join(CACHE_DIR, "rendered")).mkdir(parents=True, exist_ok=True)
renderer.export(name)
class Renderer:
def __init__(self, fonts, width, height):
self.fonts = fonts
self.img = Image.new("RGBA", (pix(width), pix(height)), (255, 255, 255))
self.draw = ImageDraw.Draw(self.img)
self.cursor = (0, 0)
def execute(self, command):
cmd = command[0]
parts = command.split()[1:]
if cmd == 'm':
x, y = (pix(float(s)) for s in parts)
self.cursor = (x, y)
elif cmd == 'f':
index = int(parts[0])
size = pix(float(parts[1]))
self.font = ImageFont.truetype(self.fonts[index], size)
elif cmd == 'w':
text = command[2:]
self.draw.text(self.cursor, text, (0, 0, 0), font=self.font)
else:
raise Exception("invalid command")
def export(self, name):
self.img.save(CACHE_DIR + "rendered/" + name + ".png")
def pix(points):
return int(2 * points)
if __name__ == "__main__":
main()