Compress images in PDFs ⚙

This commit is contained in:
Laurenz 2020-11-30 22:07:08 +01:00
parent 21857064db
commit fdc1b378a3
14 changed files with 158 additions and 62 deletions

View File

@ -14,6 +14,7 @@ fs = ["fontdock/fs"]
[dependencies]
fontdock = { path = "../fontdock", default-features = false }
pdf-writer = { path = "../pdf-writer" }
deflate = { version = "0.8.6" }
image = { version = "0.23", default-features = false, features = ["jpeg", "png"] }
itoa = "0.4"
ttf-parser = "0.8.2"

View File

@ -5,9 +5,13 @@ use std::cell::RefCell;
use std::collections::{hash_map::Entry, HashMap};
use std::fmt::{self, Debug, Formatter};
use std::fs;
use std::io::Cursor;
use std::path::{Path, PathBuf};
use std::rc::Rc;
use image::io::Reader as ImageReader;
use image::{DynamicImage, GenericImageView, ImageFormat};
use crate::font::FontLoader;
/// A reference-counted shared environment.
@ -48,11 +52,11 @@ impl ResourceLoader {
let id = match self.paths.entry(path.to_owned()) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
let id = *entry.insert(ResourceId(self.entries.len()));
let data = fs::read(path).ok()?;
let resource = parse(data)?;
let len = self.entries.len();
self.entries.push(Box::new(resource));
id
*entry.insert(ResourceId(len))
}
};
@ -63,6 +67,7 @@ impl ResourceLoader {
///
/// # Panics
/// This panics if no resource with this id was loaded.
#[track_caller]
pub fn get_loaded<R: 'static>(&self, id: ResourceId) -> &R {
self.entries[id.0].downcast_ref().expect("bad resource type")
}
@ -73,3 +78,32 @@ impl Debug for ResourceLoader {
f.debug_set().entries(self.paths.keys()).finish()
}
}
/// A loaded image resource.
pub struct ImageResource {
/// The original format the image was encoded in.
pub format: ImageFormat,
/// The decoded image.
pub buf: DynamicImage,
}
impl ImageResource {
pub fn parse(data: Vec<u8>) -> Option<Self> {
let reader = ImageReader::new(Cursor::new(data)).with_guessed_format().ok()?;
let format = reader.format()?;
let buf = reader.decode().ok()?;
Some(Self { format, buf })
}
}
impl Debug for ImageResource {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let (width, height) = self.buf.dimensions();
f.debug_struct("ImageResource")
.field("format", &self.format)
.field("color", &self.buf.color())
.field("width", &width)
.field("height", &height)
.finish()
}
}

View File

@ -37,7 +37,7 @@ pub fn eval(tree: &SynTree, env: SharedEnv, state: State) -> Pass<Document> {
let mut ctx = EvalContext::new(env, state);
ctx.start_page_group(false);
tree.eval(&mut ctx);
ctx.end_page_group();
ctx.end_page_group(true);
ctx.finish()
}
@ -117,7 +117,8 @@ impl EvalContext {
/// Start a page group based on the active page state.
///
/// If `hard` is false, empty page runs will be omitted from the output.
/// If both this `hard` and the one in the matching call to `end_page_group`
/// are false, empty page runs will be omitted from the output.
///
/// This also starts an inner paragraph.
pub fn start_page_group(&mut self, hard: bool) {
@ -134,10 +135,10 @@ impl EvalContext {
/// End a page group and push it to the finished page runs.
///
/// This also ends an inner paragraph.
pub fn end_page_group(&mut self) {
pub fn end_page_group(&mut self, hard: bool) {
self.end_par_group();
let (group, children) = self.end_group::<PageGroup>();
if group.hard || !children.is_empty() {
if hard || group.hard || !children.is_empty() {
self.runs.push(Pages {
size: group.size,
child: LayoutNode::dynamic(Pad {
@ -208,6 +209,7 @@ impl EvalContext {
/// End a layouting group started with [`start_group`](Self::start_group).
///
/// This returns the stored metadata and the collected nodes.
#[track_caller]
fn end_group<T: 'static>(&mut self) -> (T, Vec<LayoutNode>) {
if let Some(&LayoutNode::Spacing(spacing)) = self.inner.last() {
if spacing.softness == Softness::Soft {

View File

@ -3,16 +3,19 @@
use std::cmp::Eq;
use std::collections::HashMap;
use std::hash::Hash;
use std::io::Write;
use deflate::write::ZlibEncoder;
use deflate::Compression;
use fontdock::FaceId;
use image::{DynamicImage, GenericImageView, Rgba};
use image::{DynamicImage, GenericImageView, ImageFormat, ImageResult, Luma, Rgba};
use pdf_writer::{
CidFontType, ColorSpace, Content, FontFlags, Name, PdfWriter, Rect, Ref, Str,
CidFontType, ColorSpace, Content, Filter, FontFlags, Name, PdfWriter, Rect, Ref, Str,
SystemInfo, UnicodeCmap,
};
use ttf_parser::{name_id, GlyphId};
use crate::env::{Env, ResourceId};
use crate::env::{Env, ImageResource, ResourceId};
use crate::geom::Length;
use crate::layout::{BoxLayout, LayoutElement};
@ -50,8 +53,8 @@ impl<'a> PdfExporter<'a> {
match element {
LayoutElement::Text(shaped) => fonts.insert(shaped.face),
LayoutElement::Image(image) => {
let buf = env.resources.get_loaded::<DynamicImage>(image.res);
if buf.color().has_alpha() {
let img = env.resources.get_loaded::<ImageResource>(image.res);
if img.buf.color().has_alpha() {
alpha_masks += 1;
}
images.insert(image.res);
@ -266,7 +269,7 @@ impl<'a> PdfExporter<'a> {
// Write the to-unicode character map, which maps glyph ids back to
// unicode codepoints to enable copying out of the PDF.
self.writer
.cmap_stream(refs.cmap, &{
.cmap(refs.cmap, &{
let mut cmap = UnicodeCmap::new(cmap_name, system_info);
for subtable in face.character_mapping_subtables() {
subtable.codepoints(|n| {
@ -288,39 +291,49 @@ impl<'a> PdfExporter<'a> {
}
fn write_images(&mut self) {
let mut mask = 0;
let mut masks_seen = 0;
for (id, resource) in self.refs.images().zip(self.images.layout_indices()) {
let buf = self.env.resources.get_loaded::<DynamicImage>(resource);
let data = buf.to_rgb8().into_raw();
let img = self.env.resources.get_loaded::<ImageResource>(resource);
let (width, height) = img.buf.dimensions();
let mut image = self.writer.image_stream(id, &data);
image.width(buf.width() as i32);
image.height(buf.height() as i32);
image.color_space(ColorSpace::DeviceRGB);
image.bits_per_component(8);
// Add the primary image.
if let Ok((data, filter, color_space)) = encode_image(img) {
let mut image = self.writer.image(id, &data);
image.inner().filter(filter);
image.width(width as i32);
image.height(height as i32);
image.color_space(color_space);
image.bits_per_component(8);
// Add a second gray-scale image containing the alpha values if this
// is image has an alpha channel.
if buf.color().has_alpha() {
let mask_id = self.refs.alpha_mask(mask);
// Add a second gray-scale image containing the alpha values if
// this image has an alpha channel.
if img.buf.color().has_alpha() {
if let Ok((alpha_data, alpha_filter)) = encode_alpha(img) {
let mask_id = self.refs.alpha_mask(masks_seen);
image.s_mask(mask_id);
drop(image);
image.s_mask(mask_id);
drop(image);
let mut mask = self.writer.image(mask_id, &alpha_data);
mask.inner().filter(alpha_filter);
mask.width(width as i32);
mask.height(height as i32);
mask.color_space(ColorSpace::DeviceGray);
mask.bits_per_component(8);
} else {
// TODO: Warn that alpha channel could not be encoded.
}
let mut samples = vec![];
for (_, _, Rgba([_, _, _, a])) in buf.pixels() {
samples.push(a);
masks_seen += 1;
}
} else {
// TODO: Warn that image could not be encoded.
self.writer
.image_stream(mask_id, &samples)
.width(buf.width() as i32)
.height(buf.height() as i32)
.image(id, &[])
.width(0)
.height(0)
.color_space(ColorSpace::DeviceGray)
.bits_per_component(8);
mask += 1;
.bits_per_component(1);
}
}
}
@ -446,3 +459,57 @@ where
self.to_layout.iter().copied()
}
}
/// Encode an image with a suitable filter.
///
/// Skips the alpha channel as that's encoded separately.
fn encode_image(img: &ImageResource) -> ImageResult<(Vec<u8>, Filter, ColorSpace)> {
let mut data = vec![];
let (filter, space) = match (img.format, &img.buf) {
// 8-bit gray JPEG.
(ImageFormat::Jpeg, DynamicImage::ImageLuma8(_)) => {
img.buf.write_to(&mut data, img.format)?;
(Filter::DctDecode, ColorSpace::DeviceGray)
}
// 8-bit Rgb JPEG (Cmyk JPEGs get converted to Rgb earlier).
(ImageFormat::Jpeg, DynamicImage::ImageRgb8(_)) => {
img.buf.write_to(&mut data, img.format)?;
(Filter::DctDecode, ColorSpace::DeviceRgb)
}
// TODO: Encode flate streams with PNG-predictor?
// 8-bit gray PNG.
(ImageFormat::Png, DynamicImage::ImageLuma8(luma)) => {
let mut enc = ZlibEncoder::new(&mut data, Compression::default());
for &Luma([value]) in luma.pixels() {
enc.write_all(&[value])?;
}
enc.finish()?;
(Filter::FlateDecode, ColorSpace::DeviceGray)
}
// Anything else (including Rgb(a) PNGs).
(_, buf) => {
let mut enc = ZlibEncoder::new(&mut data, Compression::default());
for (_, _, Rgba([r, g, b, _])) in buf.pixels() {
enc.write_all(&[r, g, b])?;
}
enc.finish()?;
(Filter::FlateDecode, ColorSpace::DeviceRgb)
}
};
Ok((data, filter, space))
}
/// Encode an image's alpha channel if present.
fn encode_alpha(img: &ImageResource) -> ImageResult<(Vec<u8>, Filter)> {
let mut data = vec![];
let mut enc = ZlibEncoder::new(&mut data, Compression::default());
for (_, _, Rgba([_, _, _, a])) in img.buf.pixels() {
enc.write_all(&[a])?;
}
enc.finish()?;
Ok((data, Filter::FlateDecode))
}

View File

@ -1,9 +1,6 @@
use std::io::Cursor;
use image::io::Reader;
use image::GenericImageView;
use crate::env::ResourceId;
use crate::env::{ImageResource, ResourceId};
use crate::layout::*;
use crate::prelude::*;
@ -20,15 +17,10 @@ pub fn image(mut args: Args, ctx: &mut EvalContext) -> Value {
if let Some(path) = path {
let mut env = ctx.env.borrow_mut();
let loaded = env.resources.load(path.v, |data| {
Reader::new(Cursor::new(data))
.with_guessed_format()
.ok()
.and_then(|reader| reader.decode().ok())
});
let loaded = env.resources.load(path.v, ImageResource::parse);
if let Some((res, buf)) = loaded {
let dimensions = buf.dimensions();
if let Some((res, img)) = loaded {
let dimensions = img.buf.dimensions();
drop(env);
ctx.push(Image {
res,

View File

@ -316,13 +316,13 @@ pub fn page(mut args: Args, ctx: &mut EvalContext) -> Value {
args.done(ctx);
if let Some(body) = body {
ctx.end_page_group();
ctx.end_page_group(false);
ctx.start_page_group(true);
body.eval(ctx);
ctx.state = snapshot;
}
ctx.end_page_group();
ctx.end_page_group(false);
ctx.start_page_group(false);
Value::None
@ -331,7 +331,7 @@ pub fn page(mut args: Args, ctx: &mut EvalContext) -> Value {
/// `pagebreak`: Start a new page.
pub fn pagebreak(args: Args, ctx: &mut EvalContext) -> Value {
args.done(ctx);
ctx.end_page_group();
ctx.end_page_group(false);
ctx.start_page_group(true);
Value::None
}

View File

@ -3,6 +3,6 @@
- `typ`: Input files
- `pdf`: PDF files produced by tests
- `png`: PNG files produced by tests
- `ref`: Reference images which the PNGs are compared to byte-wise to determine
- `cmp`: Reference images which the PNGs are compared to byte-wise to determine
whether the test passed or failed
- `res`: Resource files used by tests

View File

Before

Width:  |  Height:  |  Size: 77 KiB

After

Width:  |  Height:  |  Size: 77 KiB

BIN
tests/cmp/image.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 275 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 292 KiB

BIN
tests/res/rhino.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 227 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 119 KiB

View File

@ -6,8 +6,8 @@
# Tiger
[image: "res/tiger.jpg", width=2cm]
[image: "res/tiger-alpha.png", width=1cm]
[image: "res/tiger-alpha.png", height=2cm]
[image: "res/rhino.png", width=1cm]
[image: "res/rhino.png", height=2cm]
[pagebreak]

View File

@ -6,7 +6,7 @@ use std::path::Path;
use std::rc::Rc;
use fontdock::fs::{FsIndex, FsSource};
use image::{DynamicImage, GenericImageView, Rgba};
use image::{GenericImageView, Rgba};
use memmap::Mmap;
use tiny_skia::{
Canvas, Color, ColorU8, FillRule, FilterQuality, Paint, PathBuilder, Pattern, Pixmap,
@ -15,7 +15,7 @@ use tiny_skia::{
use ttf_parser::OutlineBuilder;
use typst::diag::{Feedback, Pass};
use typst::env::{Env, ResourceLoader, SharedEnv};
use typst::env::{Env, ImageResource, ResourceLoader, SharedEnv};
use typst::eval::State;
use typst::export::pdf;
use typst::font::FontLoader;
@ -29,7 +29,7 @@ const FONT_DIR: &str = "../fonts";
const TYP_DIR: &str = "typ";
const PDF_DIR: &str = "pdf";
const PNG_DIR: &str = "png";
const REF_DIR: &str = "ref";
const CMP_DIR: &str = "cmp";
fn main() {
env::set_current_dir(env::current_dir().unwrap().join("tests")).unwrap();
@ -46,7 +46,7 @@ fn main() {
let name = src_path.file_stem().unwrap().to_string_lossy().to_string();
let pdf_path = Path::new(PDF_DIR).join(&name).with_extension("pdf");
let png_path = Path::new(PNG_DIR).join(&name).with_extension("png");
let ref_path = Path::new(REF_DIR).join(&name).with_extension("png");
let ref_path = Path::new(CMP_DIR).join(&name).with_extension("png");
if filter.matches(&name) {
filtered.push((name, src_path, pdf_path, png_path, ref_path));
@ -247,8 +247,8 @@ fn draw_text(canvas: &mut Canvas, pos: Point, env: &Env, shaped: &Shaped) {
}
}
fn draw_image(canvas: &mut Canvas, pos: Point, env: &Env, image: &ImageElement) {
let buf = env.resources.get_loaded::<DynamicImage>(image.res);
fn draw_image(canvas: &mut Canvas, pos: Point, env: &Env, img: &ImageElement) {
let buf = &env.resources.get_loaded::<ImageResource>(img.res).buf;
let mut pixmap = Pixmap::new(buf.width(), buf.height()).unwrap();
for ((_, _, src), dest) in buf.pixels().zip(pixmap.pixels_mut()) {
@ -256,8 +256,8 @@ fn draw_image(canvas: &mut Canvas, pos: Point, env: &Env, image: &ImageElement)
*dest = ColorU8::from_rgba(r, g, b, a).premultiply();
}
let view_width = image.size.width.to_pt() as f32;
let view_height = image.size.height.to_pt() as f32;
let view_width = img.size.width.to_pt() as f32;
let view_height = img.size.height.to_pt() as f32;
let x = pos.x.to_pt() as f32;
let y = pos.y.to_pt() as f32;