Add logical numbering support for PDF export (#1933)

Co-authored-by: Laurenz <laurmaedje@gmail.com>
This commit is contained in:
Kevin K 2023-09-13 10:18:08 +02:00 committed by GitHub
parent c1a8ea68cb
commit 8927f3d572
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 335 additions and 40 deletions

View File

@ -4,7 +4,7 @@ use std::str::FromStr;
use typst::eval::AutoValue;
use super::{AlignElem, ColumnsElem};
use crate::meta::{Counter, CounterKey, Numbering};
use crate::meta::{Counter, CounterKey, ManualPageCounter, Numbering};
use crate::prelude::*;
use crate::text::TextElem;
@ -327,7 +327,7 @@ impl PageElem {
&self,
vt: &mut Vt,
styles: StyleChain,
mut number: NonZeroUsize,
page_counter: &mut ManualPageCounter,
) -> SourceResult<Fragment> {
tracing::info!("Page layout");
@ -378,7 +378,10 @@ impl PageElem {
let mut frames = child.layout(vt, styles, regions)?.into_frames();
// Align the child to the pagebreak's parity.
if self.clear_to(styles).is_some_and(|p| !p.matches(number.get())) {
if self
.clear_to(styles)
.is_some_and(|p| !p.matches(page_counter.physical().get()))
{
let size = area.map(Abs::is_finite).select(area, Size::zero());
frames.insert(0, Frame::new(size));
}
@ -389,6 +392,7 @@ impl PageElem {
let header_ascent = self.header_ascent(styles);
let footer_descent = self.footer_descent(styles);
let numbering = self.numbering(styles);
let numbering_meta = Meta::PageNumbering(numbering.clone().into_value());
let number_align = self.number_align(styles);
let mut header = self.header(styles);
let mut footer = self.footer(styles);
@ -418,12 +422,9 @@ impl PageElem {
footer = footer.or(numbering_marginal);
}
let numbering_meta =
FrameItem::Meta(Meta::PageNumbering(numbering.into_value()), Size::zero());
// Post-process pages.
for frame in frames.iter_mut() {
tracing::info!("Layouting page #{number}");
tracing::info!("Layouting page #{}", page_counter.physical());
// The padded width of the page's content without margins.
let pw = frame.width();
@ -432,14 +433,14 @@ impl PageElem {
// Thus, for left-bound pages, we want to swap on even pages and
// for right-bound pages, we want to swap on odd pages.
let mut margin = margin;
if two_sided && binding.swap(number) {
if two_sided && binding.swap(page_counter.physical()) {
std::mem::swap(&mut margin.left, &mut margin.right);
}
// Realize margins.
frame.set_size(frame.size() + margin.sum_by_axis());
frame.translate(Point::new(margin.left, margin.top));
frame.push(Point::zero(), numbering_meta.clone());
frame.push_positionless_meta(numbering_meta.clone());
// The page size with margins.
let size = frame.size();
@ -490,7 +491,16 @@ impl PageElem {
frame.fill(fill.clone());
}
number = number.saturating_add(1);
page_counter.visit(vt, frame)?;
// Add a PDF page label if there is a numbering.
if let Some(num) = &numbering {
if let Some(page_label) = num.apply_pdf(page_counter.logical()) {
frame.push_positionless_meta(Meta::PdfPageLabel(page_label));
}
}
page_counter.step();
}
Ok(Fragment::frames(frames))

View File

@ -233,7 +233,7 @@ impl Counter {
Ok(CounterState(smallvec![at_state.first(), final_state.first()]))
}
/// Produces the whole sequence of counter states.
/// Produce the whole sequence of counter states.
///
/// This has to happen just once for all counters, cutting down the number
/// of counter updates from quadratic to linear.
@ -268,11 +268,8 @@ impl Counter {
delayed,
tracer,
};
let mut state = CounterState(match &self.0 {
// special case, because pages always start at one.
CounterKey::Page => smallvec![1],
_ => smallvec![0],
});
let mut state = CounterState::init(&self.0);
let mut page = NonZeroUsize::ONE;
let mut stops = eco_vec![(state.clone(), page)];
@ -543,6 +540,15 @@ pub trait Count {
pub struct CounterState(pub SmallVec<[usize; 3]>);
impl CounterState {
/// Get the initial counter state for the key.
pub fn init(key: &CounterKey) -> Self {
Self(match key {
// special case, because pages always start at one.
CounterKey::Page => smallvec![1],
_ => smallvec![0],
})
}
/// Advance the counter and return the numbers for the given heading.
pub fn update(&mut self, vt: &mut Vt, update: CounterUpdate) -> SourceResult<()> {
match update {
@ -642,7 +648,7 @@ impl Show for DisplayElem {
}
}
/// Executes a display of a state.
/// Executes an update of a counter.
#[elem(Locatable, Show)]
struct UpdateElem {
/// The key that identifies the counter.
@ -660,3 +666,60 @@ impl Show for UpdateElem {
Ok(Content::empty())
}
}
/// An specialized handler of the page counter that tracks both the physical
/// and the logical page counter.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub struct ManualPageCounter {
physical: NonZeroUsize,
logical: usize,
}
impl ManualPageCounter {
/// Create a new fast page counter, starting at 1.
pub fn new() -> Self {
Self { physical: NonZeroUsize::ONE, logical: 1 }
}
/// Get the current physical page counter state.
pub fn physical(&self) -> NonZeroUsize {
self.physical
}
/// Get the current logical page counter state.
pub fn logical(&self) -> usize {
self.logical
}
/// Advance past a page.
pub fn visit(&mut self, vt: &mut Vt, page: &Frame) -> SourceResult<()> {
for (_, item) in page.items() {
match item {
FrameItem::Group(group) => self.visit(vt, &group.frame)?,
FrameItem::Meta(Meta::Elem(elem), _) => {
let Some(elem) = elem.to::<UpdateElem>() else { continue };
if elem.key() == CounterKey::Page {
let mut state = CounterState(smallvec![self.logical]);
state.update(vt, elem.update())?;
self.logical = state.first();
}
}
_ => {}
}
}
Ok(())
}
/// Step past a page _boundary._
pub fn step(&mut self) {
self.physical = self.physical.saturating_add(1);
self.logical += 1;
}
}
impl Default for ManualPageCounter {
fn default() -> Self {
Self::new()
}
}

View File

@ -1,4 +1,5 @@
use crate::layout::{LayoutRoot, PageElem};
use crate::meta::ManualPageCounter;
use crate::prelude::*;
/// The root element of a document and its metadata.
@ -45,6 +46,7 @@ impl LayoutRoot for DocumentElem {
tracing::info!("Document layout");
let mut pages = vec![];
let mut page_counter = ManualPageCounter::new();
for mut child in &self.children() {
let outer = styles;
@ -55,8 +57,7 @@ impl LayoutRoot for DocumentElem {
}
if let Some(page) = child.to::<PageElem>() {
let number = NonZeroUsize::ONE.saturating_add(pages.len());
let fragment = page.layout(vt, styles, number)?;
let fragment = page.layout(vt, styles, &mut page_counter)?;
pages.extend(fragment);
} else {
bail!(child.span(), "unexpected document child");

View File

@ -2,6 +2,7 @@ use std::str::FromStr;
use chinese_number::{ChineseCase, ChineseCountMethod, ChineseVariant, NumberToChinese};
use ecow::EcoVec;
use typst::export::{PdfPageLabel, PdfPageLabelStyle};
use crate::prelude::*;
use crate::text::Case;
@ -96,6 +97,50 @@ impl Numbering {
})
}
/// Create a new `PdfNumbering` from a `Numbering` applied to a page
/// number.
pub fn apply_pdf(&self, number: usize) -> Option<PdfPageLabel> {
let Numbering::Pattern(pat) = self else {
return None;
};
let Some((prefix, kind, case)) = pat.pieces.first() else {
return None;
};
// If there is a suffix, we cannot use the common style optimisation,
// since PDF does not provide a suffix field.
let mut style = None;
if pat.suffix.is_empty() {
use NumberingKind as Kind;
use PdfPageLabelStyle as Style;
match (kind, case) {
(Kind::Arabic, _) => style = Some(Style::Arabic),
(Kind::Roman, Case::Lower) => style = Some(Style::LowerRoman),
(Kind::Roman, Case::Upper) => style = Some(Style::UpperRoman),
(Kind::Letter, Case::Lower) if number <= 26 => {
style = Some(Style::LowerAlpha)
}
(Kind::Letter, Case::Upper) if number <= 26 => {
style = Some(Style::UpperAlpha)
}
_ => {}
}
}
// Prefix and offset depend on the style: If it is supported by the PDF
// spec, we use the given prefix and an offset. Otherwise, everything
// goes into prefix.
let prefix = if style.is_none() {
Some(pat.apply(&[number]))
} else {
(!prefix.is_empty()).then(|| prefix.clone())
};
let offset = style.and(NonZeroUsize::new(number));
Some(PdfPageLabel { prefix, style, offset })
}
/// Trim the prefix suffix if this is a pattern.
pub fn trimmed(mut self) -> Self {
if let Self::Pattern(pattern) = &mut self {
@ -132,8 +177,8 @@ cast! {
/// - `(I)`
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct NumberingPattern {
pieces: EcoVec<(EcoString, NumberingKind, Case)>,
suffix: EcoString,
pub pieces: EcoVec<(EcoString, NumberingKind, Case)>,
pub suffix: EcoString,
trimmed: bool,
}
@ -242,7 +287,7 @@ cast! {
/// Different kinds of numberings.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
enum NumberingKind {
pub enum NumberingKind {
Arabic,
Letter,
Roman,

View File

@ -9,6 +9,7 @@ use std::sync::Arc;
use ecow::EcoString;
use crate::eval::{cast, dict, ty, Dict, Value};
use crate::export::PdfPageLabel;
use crate::font::Font;
use crate::geom::{
self, rounded_rect, Abs, Axes, Color, Corners, Dir, Em, FixedAlign, FixedStroke,
@ -143,6 +144,11 @@ impl Frame {
}
}
/// Add zero-sized metadata at the origin.
pub fn push_positionless_meta(&mut self, meta: Meta) {
self.push(Point::zero(), FrameItem::Meta(meta, Size::zero()));
}
/// Insert an item at the given layer in the frame.
///
/// This panics if the layer is greater than the number of layers present.
@ -668,6 +674,8 @@ pub enum Meta {
Elem(Content),
/// The numbering of the current page.
PageNumbering(Value),
/// A PDF page label of the current page.
PdfPageLabel(PdfPageLabel),
/// Indicates that content should be hidden. This variant doesn't appear
/// in the final frames as it is removed alongside the content that should
/// be hidden.
@ -684,6 +692,7 @@ impl Debug for Meta {
Self::Link(dest) => write!(f, "Link({dest:?})"),
Self::Elem(content) => write!(f, "Elem({:?})", content.func()),
Self::PageNumbering(value) => write!(f, "PageNumbering({value:?})"),
Self::PdfPageLabel(label) => write!(f, "PdfPageLabel({label:?})"),
Self::Hide => f.pad("Hide"),
}
}

View File

@ -82,11 +82,6 @@ impl Dict {
self.0.get(key).ok_or_else(|| missing_key(key))
}
/// Remove the value if the dictionary contains the given key.
pub fn take(&mut self, key: &str) -> StrResult<Value> {
Arc::make_mut(&mut self.0).remove(key).ok_or_else(|| missing_key(key))
}
/// Mutably borrow the value the given `key` maps to.
pub fn at_mut(&mut self, key: &str) -> StrResult<&mut Value> {
Arc::make_mut(&mut self.0)
@ -94,6 +89,11 @@ impl Dict {
.ok_or_else(|| missing_key_no_default(key))
}
/// Remove the value if the dictionary contains the given key.
pub fn take(&mut self, key: &str) -> StrResult<Value> {
Arc::make_mut(&mut self.0).remove(key).ok_or_else(|| missing_key(key))
}
/// Whether the dictionary contains a specific key.
pub fn contains(&self, key: &str) -> bool {
self.0.contains_key(key)

View File

@ -4,6 +4,6 @@ mod pdf;
mod render;
mod svg;
pub use self::pdf::pdf;
pub use self::pdf::{pdf, PdfPageLabel, PdfPageLabelStyle};
pub use self::render::{render, render_merged};
pub use self::svg::{svg, svg_merged};

View File

@ -6,12 +6,16 @@ mod image;
mod outline;
mod page;
pub use self::page::{PdfPageLabel, PdfPageLabelStyle};
use std::cmp::Eq;
use std::collections::{BTreeMap, HashMap};
use std::hash::Hash;
use std::num::NonZeroUsize;
use ecow::EcoString;
use pdf_writer::types::Direction;
use pdf_writer::writers::PageLabel;
use pdf_writer::{Finish, Name, PdfWriter, Ref, TextStr};
use xmp_writer::{LangId, RenditionClass, XmpWriter};
@ -112,6 +116,9 @@ fn write_catalog(ctx: &mut PdfContext) {
// Write the outline tree.
let outline_root_id = outline::write_outline(ctx);
// Write the page labels.
let page_labels = write_page_labels(ctx);
// Write the document information.
let mut info = ctx.writer.document_info(ctx.alloc.bump());
let mut xmp = XmpWriter::new();
@ -147,6 +154,15 @@ fn write_catalog(ctx: &mut PdfContext) {
catalog.viewer_preferences().direction(dir);
catalog.pair(Name(b"Metadata"), meta_ref);
// Insert the page labels.
if !page_labels.is_empty() {
let mut num_tree = catalog.page_labels();
let mut entries = num_tree.nums();
for (n, r) in &page_labels {
entries.insert(n.get() as i32 - 1, *r);
}
}
if let Some(outline_root_id) = outline_root_id {
catalog.outlines(outline_root_id);
}
@ -156,6 +172,55 @@ fn write_catalog(ctx: &mut PdfContext) {
}
}
/// Write the page labels.
#[tracing::instrument(skip_all)]
fn write_page_labels(ctx: &mut PdfContext) -> Vec<(NonZeroUsize, Ref)> {
let mut result = vec![];
let mut prev: Option<&PdfPageLabel> = None;
for (i, page) in ctx.pages.iter().enumerate() {
let nr = NonZeroUsize::new(1 + i).unwrap();
let Some(label) = &page.label else { continue };
// Don't create a label if neither style nor prefix are specified.
if label.prefix.is_none() && label.style.is_none() {
continue;
}
if let Some(pre) = prev {
if label.prefix == pre.prefix
&& label.style == pre.style
&& label.offset == pre.offset.map(|n| n.saturating_add(1))
{
prev = Some(label);
continue;
}
}
let id = ctx.alloc.bump();
let mut entry = ctx.writer.indirect(id).start::<PageLabel>();
// Only add what is actually provided. Don't add empty prefix string if
// it wasn't given for example.
if let Some(prefix) = &label.prefix {
entry.prefix(TextStr(prefix));
}
if let Some(style) = label.style {
entry.style(style.into());
}
if let Some(offset) = label.offset {
entry.offset(offset.get() as i32);
}
result.push((nr, id));
prev = Some(label);
}
result
}
/// Compress data with the DEFLATE algorithm.
#[tracing::instrument(skip_all)]
fn deflate(data: &[u8]) -> Vec<u8> {

View File

@ -1,6 +1,9 @@
use ecow::eco_format;
use std::num::NonZeroUsize;
use ecow::{eco_format, EcoString};
use pdf_writer::types::{
ActionType, AnnotationType, ColorSpaceOperand, LineCapStyle, LineJoinStyle,
NumberingStyle,
};
use pdf_writer::writers::ColorSpace;
use pdf_writer::{Content, Filter, Finish, Name, Rect, Ref, Str};
@ -33,6 +36,7 @@ pub fn construct_page(ctx: &mut PdfContext, frame: &Frame) {
let mut ctx = PageContext {
parent: ctx,
page_ref,
label: None,
uses_opacities: false,
content: Content::new(),
state: State::default(),
@ -59,10 +63,11 @@ pub fn construct_page(ctx: &mut PdfContext, frame: &Frame) {
let page = Page {
size,
content: ctx.content,
content: ctx.content.finish(),
id: ctx.page_ref,
uses_opacities: ctx.uses_opacities,
links: ctx.links,
label: ctx.label,
};
ctx.parent.pages.push(page);
@ -71,8 +76,8 @@ pub fn construct_page(ctx: &mut PdfContext, frame: &Frame) {
/// Write the page tree.
#[tracing::instrument(skip_all)]
pub fn write_page_tree(ctx: &mut PdfContext) {
for page in std::mem::take(&mut ctx.pages).into_iter() {
write_page(ctx, page);
for i in 0..ctx.pages.len() {
write_page(ctx, i);
}
let mut pages = ctx.writer.pages(ctx.page_tree_ref);
@ -115,7 +120,8 @@ pub fn write_page_tree(ctx: &mut PdfContext) {
/// Write a page tree node.
#[tracing::instrument(skip_all)]
fn write_page(ctx: &mut PdfContext, page: Page) {
fn write_page(ctx: &mut PdfContext, i: usize) {
let page = &ctx.pages[i];
let content_id = ctx.alloc.bump();
let mut page_writer = ctx.writer.page(page.id);
@ -137,9 +143,9 @@ fn write_page(ctx: &mut PdfContext, page: Page) {
}
let mut annotations = page_writer.annotations();
for (dest, rect) in page.links {
for (dest, rect) in &page.links {
let mut annotation = annotations.push();
annotation.subtype(AnnotationType::Link).rect(rect);
annotation.subtype(AnnotationType::Link).rect(*rect);
annotation.border(0.0, 0.0, 0.0, None);
let pos = match dest {
@ -150,8 +156,8 @@ fn write_page(ctx: &mut PdfContext, page: Page) {
.uri(Str(uri.as_bytes()));
continue;
}
Destination::Position(pos) => pos,
Destination::Location(loc) => ctx.introspector.position(loc),
Destination::Position(pos) => *pos,
Destination::Location(loc) => ctx.introspector.position(*loc),
};
let index = pos.page.get() - 1;
@ -169,8 +175,7 @@ fn write_page(ctx: &mut PdfContext, page: Page) {
annotations.finish();
page_writer.finish();
let data = page.content.finish();
let data = deflate(&data);
let data = deflate(&page.content);
ctx.writer.stream(content_id, &data).filter(Filter::FlateDecode);
}
@ -181,17 +186,20 @@ pub struct Page {
/// The page's dimensions.
pub size: Size,
/// The page's content stream.
pub content: Content,
pub content: Vec<u8>,
/// Whether the page uses opacities.
pub uses_opacities: bool,
/// Links in the PDF coordinate system.
pub links: Vec<(Destination, Rect)>,
/// The page's PDF label.
pub label: Option<PdfPageLabel>,
}
/// An exporter for the contents of a single PDF page.
struct PageContext<'a, 'b> {
parent: &'a mut PdfContext<'b>,
page_ref: Ref,
label: Option<PdfPageLabel>,
content: Content,
state: State,
saves: Vec<State>,
@ -398,6 +406,7 @@ fn write_frame(ctx: &mut PageContext, frame: &Frame) {
Meta::Elem(_) => {}
Meta::Hide => {}
Meta::PageNumbering(_) => {}
Meta::PdfPageLabel(label) => ctx.label = Some(label.clone()),
},
}
}
@ -631,3 +640,48 @@ impl From<&LineJoin> for LineJoinStyle {
}
}
}
/// Specification for a PDF page label.
#[derive(Debug, Clone, PartialEq, Hash, Default)]
pub struct PdfPageLabel {
/// Can be any string or none. Will always be prepended to the numbering style.
pub prefix: Option<EcoString>,
/// Based on the numbering pattern.
///
/// If `None` or numbering is a function, the field will be empty.
pub style: Option<PdfPageLabelStyle>,
/// Offset for the page label start.
///
/// Describes where to start counting from when setting a style.
/// (Has to be greater or equal than 1)
pub offset: Option<NonZeroUsize>,
}
/// A PDF page label number style.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum PdfPageLabelStyle {
/// Decimal arabic numerals (1, 2, 3).
Arabic,
/// Lowercase roman numerals (i, ii, iii).
LowerRoman,
/// Uppercase roman numerals (I, II, III).
UpperRoman,
/// Lowercase letters (`a` to `z` for the first 26 pages,
/// `aa` to `zz` and so on for the next).
LowerAlpha,
/// Uppercase letters (`A` to `Z` for the first 26 pages,
/// `AA` to `ZZ` and so on for the next).
UpperAlpha,
}
impl From<PdfPageLabelStyle> for NumberingStyle {
fn from(value: PdfPageLabelStyle) -> Self {
match value {
PdfPageLabelStyle::Arabic => Self::Arabic,
PdfPageLabelStyle::LowerRoman => Self::LowerRoman,
PdfPageLabelStyle::UpperRoman => Self::UpperRoman,
PdfPageLabelStyle::LowerAlpha => Self::LowerAlpha,
PdfPageLabelStyle::UpperAlpha => Self::UpperAlpha,
}
}
}

View File

@ -107,6 +107,7 @@ fn render_frame(
Meta::Link(_) => {}
Meta::Elem(_) => {}
Meta::PageNumbering(_) => {}
Meta::PdfPageLabel(_) => {}
Meta::Hide => {}
},
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

View File

@ -0,0 +1,47 @@
#set page(margin: (bottom: 20pt, rest: 10pt))
#let filler = lorem(20)
// (i) - (ii). No style opt. because of suffix.
#set page(numbering: "(i)")
#filler
#pagebreak()
#filler
// 3 - 4. Style opt. Page Label should use /D style.
#set page(numbering: "1")
#filler
#pagebreak()
#filler
// I - IV. Style opt. Page Label should use /R style and start at 1 again.
#set page(numbering: "I / I")
#counter(page).update(1)
#filler
#pagebreak()
#filler
#pagebreak()
#filler
#pagebreak()
#filler
// Pre: ほ, Pre: ろ, Pre: は, Pre: に. No style opt. Uses prefix field entirely.
// Counter update without numbering change.
#set page(numbering: "Pre: い")
#filler
#pagebreak()
#filler
#counter(page).update(2)
#filler
#pagebreak()
#filler
#pagebreak()
#filler
// aa & ba. Style opt only for values <= 26. Page Label uses lower alphabet style.
// Repeats letter each 26 pages or uses numbering directly as prefix.
#set page(numbering: "a")
#counter(page).update(27)
#filler
#pagebreak()
#counter(page).update(53)
#filler