Generate PDF ID automatically unless we really have a stable ID (#3591)

This commit is contained in:
Laurenz 2024-03-09 12:55:03 +01:00 committed by GitHub
parent 204c4ecfcb
commit 82617a6a3c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 40 additions and 38 deletions

View File

@ -9,7 +9,7 @@ use parking_lot::RwLock;
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
use typst::diag::{bail, At, Severity, SourceDiagnostic, StrResult};
use typst::eval::Tracer;
use typst::foundations::Datetime;
use typst::foundations::{Datetime, Smart};
use typst::layout::Frame;
use typst::model::Document;
use typst::syntax::{FileId, Source, Span};
@ -157,18 +157,13 @@ fn export(
OutputFormat::Svg => {
export_image(world, document, command, watching, ImageExportFormat::Svg)
}
OutputFormat::Pdf => export_pdf(document, command, world),
OutputFormat::Pdf => export_pdf(document, command),
}
}
/// Export to a PDF.
fn export_pdf(
document: &Document,
command: &CompileCommand,
world: &SystemWorld,
) -> StrResult<()> {
let ident = world.input().map(|i| i.to_string_lossy());
let buffer = typst_pdf::pdf(document, ident.as_deref(), now());
fn export_pdf(document: &Document, command: &CompileCommand) -> StrResult<()> {
let buffer = typst_pdf::pdf(document, Smart::Auto, now());
let output = command.output();
fs::write(output, buffer)
.map_err(|err| eco_format!("failed to write PDF file ({err})"))?;

View File

@ -29,8 +29,6 @@ static STDIN_ID: Lazy<FileId> =
pub struct SystemWorld {
/// The working directory.
workdir: Option<PathBuf>,
/// The canonical path to the input file.
input: Option<PathBuf>,
/// The root relative to which absolute paths are resolved.
root: PathBuf,
/// The input path.
@ -108,7 +106,6 @@ impl SystemWorld {
Ok(Self {
workdir: std::env::current_dir().ok(),
input,
root,
main,
library: Prehashed::new(library),
@ -152,11 +149,6 @@ impl SystemWorld {
self.now.take();
}
/// Return the canonical path to the input file.
pub fn input(&self) -> Option<&PathBuf> {
self.input.as_ref()
}
/// Lookup a source file by id.
#[track_caller]
pub fn lookup(&self, id: FileId) -> Source {

View File

@ -19,7 +19,7 @@ use ecow::{eco_format, EcoString};
use pdf_writer::types::Direction;
use pdf_writer::writers::Destination;
use pdf_writer::{Finish, Name, Pdf, Ref, Str, TextStr};
use typst::foundations::{Datetime, Label, NativeElement};
use typst::foundations::{Datetime, Label, NativeElement, Smart};
use typst::introspection::Location;
use typst::layout::{Abs, Dir, Em, Transform};
use typst::model::{Document, HeadingElem};
@ -39,12 +39,17 @@ use crate::pattern::PdfPattern;
///
/// Returns the raw bytes making up the PDF file.
///
/// The `ident` parameter shall be a string that uniquely and stably identifies
/// the document. It should not change between compilations of the same
/// document. Its hash will be used to create a PDF document identifier (the
/// identifier itself is not leaked). If `ident` is `None`, a hash of the
/// document is used instead (which means that it _will_ change across
/// compilations).
/// The `ident` parameter, if given, shall be a string that uniquely and stably
/// identifies the document. It should not change between compilations of the
/// same document. **If you cannot provide such a stable identifier, just pass
/// `Smart::Auto` rather than trying to come up with one.** The CLI, for
/// example, does not have a well-defined notion of a long-lived project and as
/// such just passes `Smart::Auto`.
///
/// If an `ident` is given, the hash of it will be used to create a PDF document
/// identifier (the identifier itself is not leaked). If `ident` is `Auto`, a
/// hash of the document's title and author is used instead (which is reasonably
/// unique and stable).
///
/// The `timestamp`, if given, is expected to be the creation date of the
/// document as a UTC datetime. It will only be used if `set document(date: ..)`
@ -52,7 +57,7 @@ use crate::pattern::PdfPattern;
#[typst_macros::time(name = "pdf")]
pub fn pdf(
document: &Document,
ident: Option<&str>,
ident: Smart<&str>,
timestamp: Option<Datetime>,
) -> Vec<u8> {
let mut ctx = PdfContext::new(document);
@ -158,7 +163,7 @@ impl<'a> PdfContext<'a> {
}
/// Write the document catalog.
fn write_catalog(ctx: &mut PdfContext, ident: Option<&str>, timestamp: Option<Datetime>) {
fn write_catalog(ctx: &mut PdfContext, ident: Smart<&str>, timestamp: Option<Datetime>) {
let lang = ctx.languages.iter().max_by_key(|(_, &count)| count).map(|(&l, _)| l);
let dir = if lang.map(Lang::dir) == Some(Dir::RTL) {
@ -236,18 +241,25 @@ fn write_catalog(ctx: &mut PdfContext, ident: Option<&str>, timestamp: Option<Da
// changes in the frames.
let instance_id = hash_base64(&ctx.pdf.as_bytes());
if let Some(ident) = ident {
// A unique ID for the document that stays stable across compilations.
let doc_id = hash_base64(&("PDF-1.7", ident));
xmp.document_id(&doc_id);
xmp.instance_id(&instance_id);
ctx.pdf
.set_file_id((doc_id.clone().into_bytes(), instance_id.into_bytes()));
// Determine the document's ID. It should be as stable as possible.
const PDF_VERSION: &str = "PDF-1.7";
let doc_id = if let Smart::Custom(ident) = ident {
// We were provided with a stable ID. Yay!
hash_base64(&(PDF_VERSION, ident))
} else if ctx.document.title.is_some() && !ctx.document.author.is_empty() {
// If not provided from the outside, but title and author were given, we
// compute a hash of them, which should be reasonably stable and unique.
hash_base64(&(PDF_VERSION, &ctx.document.title, &ctx.document.author))
} else {
// This is not spec-compliant, but some PDF readers really want an ID.
let bytes = instance_id.into_bytes();
ctx.pdf.set_file_id((bytes.clone(), bytes));
}
// The user provided no usable metadata which we can use as an `/ID`.
instance_id.clone()
};
// Write IDs.
xmp.document_id(&doc_id);
xmp.instance_id(&instance_id);
ctx.pdf
.set_file_id((doc_id.clone().into_bytes(), instance_id.into_bytes()));
xmp.rendition_class(RenditionClass::Proof);
xmp.pdf_version("1.7");

View File

@ -51,6 +51,9 @@ pub struct DocumentElem {
///
/// The year component must be at least zero in order to be embedded into a
/// PDF.
///
/// If you want to create byte-by-byte reproducible PDFs, set this to
/// something other than `{auto}`.
#[ghost]
pub date: Smart<Option<Datetime>>,

View File

@ -501,7 +501,7 @@ fn test(
if let Some(pdf_path) = pdf_path {
let pdf_data = typst_pdf::pdf(
&document,
Some(&format!("typst-test: {}", name.display())),
Smart::Custom(&format!("typst-test: {}", name.display())),
world.today(Some(0)),
);
fs::create_dir_all(pdf_path.parent().unwrap()).unwrap();