Parallel image encoding (#2693)

This commit is contained in:
Sébastien d'Herbais de Thun 2023-11-17 19:42:49 +01:00 committed by GitHub
parent b0e81d4b3f
commit f5b3af3c1b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 148 additions and 44 deletions

1
Cargo.lock generated
View File

@ -2903,6 +2903,7 @@ dependencies = [
"log",
"once_cell",
"palette",
"rayon",
"regex",
"roxmltree",
"rustybuzz",

View File

@ -1,42 +1,66 @@
use std::collections::HashMap;
use std::io::Cursor;
use std::sync::Arc;
use image::{DynamicImage, GenericImageView, Rgba};
use pdf_writer::{Chunk, Filter, Finish, Ref};
use typst::geom::ColorSpace;
use typst::image::{ImageKind, RasterFormat, RasterImage, SvgImage};
use typst::image::{Image, ImageKind, RasterFormat, RasterImage, SvgImage};
use typst::util::Deferred;
use crate::{deflate, PdfContext};
/// Creates a new PDF image from the given image.
///
/// Also starts the deferred encoding of the image.
#[comemo::memoize]
pub fn deferred_image(image: Image) -> Deferred<EncodedImage> {
Deferred::new(move || match image.kind() {
ImageKind::Raster(raster) => {
let raster = raster.clone();
let (width, height) = (image.width(), image.height());
let (data, filter, has_color) = encode_raster_image(&raster);
let icc = raster.icc().map(deflate);
let alpha =
raster.dynamic().color().has_alpha().then(|| encode_alpha(&raster));
EncodedImage::Raster { data, filter, has_color, width, height, icc, alpha }
}
ImageKind::Svg(svg) => EncodedImage::Svg(encode_svg(svg)),
})
}
/// Embed all used images into the PDF.
#[tracing::instrument(skip_all)]
pub(crate) fn write_images(ctx: &mut PdfContext) {
for image in ctx.image_map.items() {
// Add the primary image.
match image.kind() {
ImageKind::Raster(raster) => {
// TODO: Error if image could not be encoded.
let (data, filter, has_color) = encode_raster_image(raster);
let width = image.width();
let height = image.height();
for (i, _) in ctx.image_map.items().enumerate() {
let handle = ctx.image_deferred_map.get(&i).unwrap();
match handle.wait() {
EncodedImage::Raster {
data,
filter,
has_color,
width,
height,
icc,
alpha,
} => {
let image_ref = ctx.alloc.bump();
ctx.image_refs.push(image_ref);
let mut image = ctx.pdf.image_xobject(image_ref, &data);
image.filter(filter);
image.width(width as i32);
image.height(height as i32);
let mut image = ctx.pdf.image_xobject(image_ref, data);
image.filter(*filter);
image.width(*width as i32);
image.height(*height as i32);
image.bits_per_component(8);
let mut icc_ref = None;
let space = image.color_space();
if raster.icc().is_some() {
if icc.is_some() {
let id = ctx.alloc.bump();
space.icc_based(id);
icc_ref = Some(id);
} else if has_color {
} else if *has_color {
ctx.colors.write(ColorSpace::Srgb, space, &mut ctx.alloc);
} else {
ctx.colors.write(ColorSpace::D65Gray, space, &mut ctx.alloc);
@ -44,27 +68,25 @@ pub(crate) fn write_images(ctx: &mut PdfContext) {
// Add a second gray-scale image containing the alpha values if
// this image has an alpha channel.
if raster.dynamic().color().has_alpha() {
let (alpha_data, alpha_filter) = encode_alpha(raster);
if let Some((alpha_data, alpha_filter)) = alpha {
let mask_ref = ctx.alloc.bump();
image.s_mask(mask_ref);
image.finish();
let mut mask = ctx.pdf.image_xobject(mask_ref, &alpha_data);
mask.filter(alpha_filter);
mask.width(width as i32);
mask.height(height as i32);
let mut mask = ctx.pdf.image_xobject(mask_ref, alpha_data);
mask.filter(*alpha_filter);
mask.width(*width as i32);
mask.height(*height as i32);
mask.color_space().device_gray();
mask.bits_per_component(8);
} else {
image.finish();
}
if let (Some(icc), Some(icc_ref)) = (raster.icc(), icc_ref) {
let compressed = deflate(icc);
let mut stream = ctx.pdf.icc_profile(icc_ref, &compressed);
if let (Some(icc), Some(icc_ref)) = (icc, icc_ref) {
let mut stream = ctx.pdf.icc_profile(icc_ref, icc);
stream.filter(Filter::FlateDecode);
if has_color {
if *has_color {
stream.n(3);
stream.alternate().srgb();
} else {
@ -73,9 +95,7 @@ pub(crate) fn write_images(ctx: &mut PdfContext) {
}
}
}
ImageKind::Svg(svg) => {
let chunk = encode_svg(svg);
EncodedImage::Svg(chunk) => {
let mut map = HashMap::new();
chunk.renumber_into(&mut ctx.pdf, |old| {
*map.entry(old).or_insert_with(|| ctx.alloc.bump())
@ -90,23 +110,22 @@ pub(crate) fn write_images(ctx: &mut PdfContext) {
/// whether the image has color.
///
/// Skips the alpha channel as that's encoded separately.
#[comemo::memoize]
#[tracing::instrument(skip_all)]
fn encode_raster_image(image: &RasterImage) -> (Arc<Vec<u8>>, Filter, bool) {
fn encode_raster_image(image: &RasterImage) -> (Vec<u8>, Filter, bool) {
let dynamic = image.dynamic();
match (image.format(), dynamic) {
// 8-bit gray JPEG.
(RasterFormat::Jpg, DynamicImage::ImageLuma8(_)) => {
let mut data = Cursor::new(vec![]);
dynamic.write_to(&mut data, image::ImageFormat::Jpeg).unwrap();
(data.into_inner().into(), Filter::DctDecode, false)
(data.into_inner(), Filter::DctDecode, false)
}
// 8-bit RGB JPEG (CMYK JPEGs get converted to RGB earlier).
(RasterFormat::Jpg, DynamicImage::ImageRgb8(_)) => {
let mut data = Cursor::new(vec![]);
dynamic.write_to(&mut data, image::ImageFormat::Jpeg).unwrap();
(data.into_inner().into(), Filter::DctDecode, true)
(data.into_inner(), Filter::DctDecode, true)
}
// TODO: Encode flate streams with PNG-predictor?
@ -114,7 +133,7 @@ fn encode_raster_image(image: &RasterImage) -> (Arc<Vec<u8>>, Filter, bool) {
// 8-bit gray PNG.
(RasterFormat::Png, DynamicImage::ImageLuma8(luma)) => {
let data = deflate(luma.as_raw());
(data.into(), Filter::FlateDecode, false)
(data, Filter::FlateDecode, false)
}
// Anything else (including Rgb(a) PNGs).
@ -128,29 +147,27 @@ fn encode_raster_image(image: &RasterImage) -> (Arc<Vec<u8>>, Filter, bool) {
}
let data = deflate(&pixels);
(data.into(), Filter::FlateDecode, true)
(data, Filter::FlateDecode, true)
}
}
}
/// Encode an image's alpha channel if present.
#[comemo::memoize]
#[tracing::instrument(skip_all)]
fn encode_alpha(raster: &RasterImage) -> (Arc<Vec<u8>>, Filter) {
fn encode_alpha(raster: &RasterImage) -> (Vec<u8>, Filter) {
let pixels: Vec<_> = raster
.dynamic()
.pixels()
.map(|(_, _, Rgba([_, _, _, a]))| a)
.collect();
(Arc::new(deflate(&pixels)), Filter::FlateDecode)
(deflate(&pixels), Filter::FlateDecode)
}
/// Encode an SVG into a chunk of PDF objects.
///
/// The main XObject will have ID 1.
#[comemo::memoize]
#[tracing::instrument(skip_all)]
fn encode_svg(svg: &SvgImage) -> Arc<Chunk> {
fn encode_svg(svg: &SvgImage) -> Chunk {
let mut chunk = Chunk::new();
// Safety: We do not keep any references to tree nodes beyond the
@ -166,5 +183,30 @@ fn encode_svg(svg: &SvgImage) -> Arc<Chunk> {
});
}
Arc::new(chunk)
chunk
}
/// A pre-encoded image.
pub enum EncodedImage {
/// A pre-encoded rasterized image.
Raster {
/// The raw, pre-deflated image data.
data: Vec<u8>,
/// The filter to use for the image.
filter: Filter,
/// Whether the image has color.
has_color: bool,
/// The image's width.
width: u32,
/// The image's height.
height: u32,
/// The image's ICC profile, pre-deflated, if any.
icc: Option<Vec<u8>>,
/// The alpha channel of the image, pre-deflated, if any.
alpha: Option<(Vec<u8>, Filter)>,
},
/// A vector graphic.
///
/// The chunk is the SVG converted to PDF objects.
Svg(Chunk),
}

View File

@ -22,11 +22,13 @@ use typst::font::Font;
use typst::geom::{Abs, Dir, Em};
use typst::image::Image;
use typst::model::Introspector;
use typst::util::Deferred;
use xmp_writer::{DateTime, LangId, RenditionClass, Timezone, XmpWriter};
use crate::color::ColorSpaces;
use crate::extg::ExtGState;
use crate::gradient::PdfGradient;
use crate::image::EncodedImage;
use crate::page::Page;
/// Export a document into a PDF file.
@ -104,6 +106,8 @@ struct PdfContext<'a> {
font_map: Remapper<Font>,
/// Deduplicates images used across the document.
image_map: Remapper<Image>,
/// Handles to deferred image conversions.
image_deferred_map: HashMap<usize, Deferred<EncodedImage>>,
/// Deduplicates gradients used across the document.
gradient_map: Remapper<PdfGradient>,
/// Deduplicates external graphics states used across the document.
@ -131,6 +135,7 @@ impl<'a> PdfContext<'a> {
colors: ColorSpaces::default(),
font_map: Remapper::new(),
image_map: Remapper::new(),
image_deferred_map: HashMap::default(),
gradient_map: Remapper::new(),
extg_map: Remapper::new(),
}

View File

@ -21,6 +21,7 @@ use typst::image::Image;
use crate::color::PaintEncode;
use crate::extg::ExtGState;
use crate::image::deferred_image;
use crate::{deflate, AbsExt, EmExt, PdfContext};
/// Construct page objects.
@ -679,8 +680,13 @@ fn write_path(ctx: &mut PageContext, x: f32, y: f32, path: &geom::Path) {
/// Encode a vector or raster image into the content stream.
fn write_image(ctx: &mut PageContext, x: f32, y: f32, image: &Image, size: Size) {
ctx.parent.image_map.insert(image.clone());
let name = eco_format!("Im{}", ctx.parent.image_map.map(image));
let idx = ctx.parent.image_map.insert(image.clone());
ctx.parent
.image_deferred_map
.entry(idx)
.or_insert_with(|| deferred_image(image.clone()));
let name = eco_format!("Im{idx}");
let w = size.x.to_f32();
let h = size.y.to_f32();
ctx.content.save_state();

View File

@ -29,6 +29,7 @@ lasso = { workspace = true }
log = { workspace = true }
once_cell = { workspace = true }
palette = { workspace = true }
rayon = { workspace = true }
regex = { workspace = true }
roxmltree = { workspace = true }
rustybuzz = { workspace = true }

View File

@ -0,0 +1,47 @@
use std::sync::Arc;
use once_cell::sync::OnceCell;
/// A deferred value.
///
/// This is a value that is being executed in parallel and can be waited on.
pub struct Deferred<T>(Arc<OnceCell<T>>);
impl<T: Send + Sync + 'static> Deferred<T> {
/// Creates a new deferred value.
///
/// The closure will be called on a secondary thread such that the value
/// can be initialized in parallel.
pub fn new<F>(f: F) -> Self
where
F: FnOnce() -> T + Send + Sync + 'static,
{
let inner = Arc::new(OnceCell::new());
let cloned = Arc::clone(&inner);
rayon::spawn(move || {
// Initialize the value if it hasn't been initialized yet.
// We do this to avoid panicking in case it was set externally.
cloned.get_or_init(f);
});
Self(inner)
}
/// Waits on the value to be initialized.
///
/// If the value has already been initialized, this will return
/// immediately. Otherwise, this will block until the value is
/// initialized in another thread.
pub fn wait(&self) -> &T {
// Ensure that we yield to give the deferred value a chance to compute
// single-threaded platforms (for WASM compatibility).
while let Some(rayon::Yield::Executed) = rayon::yield_now() {}
self.0.wait()
}
}
impl<T> Clone for Deferred<T> {
fn clone(&self) -> Self {
Self(Arc::clone(&self.0))
}
}

View File

@ -1,7 +1,9 @@
//! Utilities.
mod deferred;
mod pico;
pub use self::deferred::Deferred;
pub use self::pico::PicoStr;
use std::fmt::{Debug, Formatter};