separate decode from string/bytes for all data functions; and encode for json, toml, yaml via serde (#1935)

This commit is contained in:
Beiri22 2023-08-25 14:31:03 +02:00 committed by GitHub
parent ea469c2df6
commit 22b5959608
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 387 additions and 103 deletions

View File

@ -3,6 +3,9 @@ integer = 42
float = 3.14
boolean = true
date_time = 2023-02-01T15:38:57Z
date_time2 = 2023-02-01T15:38:57
date = 2023-02-01
time = 15:38:57
array = [1, "string", 3.0, false]
inline_table = { first = "amazing", second = "greater" }

View File

@ -41,7 +41,7 @@ serde_yaml = "0.8"
smallvec = "1.10"
syntect = { version = "5", default-features = false, features = ["parsing", "regex-fancy", "plist-load", "yaml-load"] }
time = { version = "0.3.20", features = ["formatting"] }
toml = { version = "0.7.3", default-features = false, features = ["parse"] }
toml = { version = "0.7.4"}
tracing = "0.1.37"
ttf-parser = "0.18.1"
typed-arena = "2"

View File

@ -1,5 +1,5 @@
use typst::diag::{format_xml_like_error, FileError};
use typst::eval::{Bytes, Datetime};
use typst::eval::Bytes;
use crate::prelude::*;
@ -65,6 +65,15 @@ pub enum Readable {
Bytes(Bytes),
}
impl Readable {
fn as_slice(&self) -> &[u8] {
match self {
Readable::Bytes(v) => v,
Readable::Str(v) => v.as_bytes(),
}
}
}
cast! {
Readable,
self => match self {
@ -105,6 +114,10 @@ impl From<Readable> for Bytes {
/// Display: CSV
/// Category: data-loading
#[func]
#[scope(
scope.define("decode", csv_decode_func());
scope
)]
pub fn csv(
/// Path to a CSV file.
path: Spanned<EcoString>,
@ -119,11 +132,27 @@ pub fn csv(
let Spanned { v: path, span } = path;
let id = vm.location().join(&path).at(span)?;
let data = vm.world().file(id).at(span)?;
csv_decode(Spanned::new(Readable::Bytes(data), span), delimiter)
}
/// Reads structured data from a CSV string/bytes.
///
/// Display: CSV
/// Category: data-loading
#[func]
pub fn csv_decode(
/// CSV data.
data: Spanned<Readable>,
/// The delimiter that separates columns in the CSV file.
/// Must be a single ASCII character.
#[named]
#[default]
delimiter: Delimiter,
) -> SourceResult<Array> {
let Spanned { v: data, span } = data;
let mut builder = csv::ReaderBuilder::new();
builder.has_headers(false);
builder.delimiter(delimiter.0 as u8);
let mut reader = builder.from_reader(data.as_slice());
let mut array = Array::new();
@ -221,6 +250,11 @@ fn format_csv_error(error: csv::Error, line: usize) -> EcoString {
/// Display: JSON
/// Category: data-loading
#[func]
#[scope(
scope.define("decode", json_decode_func());
scope.define("encode", json_encode_func());
scope
)]
pub fn json(
/// Path to a JSON file.
path: Spanned<EcoString>,
@ -230,30 +264,48 @@ pub fn json(
let Spanned { v: path, span } = path;
let id = vm.location().join(&path).at(span)?;
let data = vm.world().file(id).at(span)?;
let value: serde_json::Value =
serde_json::from_slice(&data).map_err(format_json_error).at(span)?;
Ok(convert_json(value))
json_decode(Spanned::new(Readable::Bytes(data), span))
}
/// Convert a JSON value to a Typst value.
fn convert_json(value: serde_json::Value) -> Value {
match value {
serde_json::Value::Null => Value::None,
serde_json::Value::Bool(v) => v.into_value(),
serde_json::Value::Number(v) => match v.as_i64() {
Some(int) => int.into_value(),
None => v.as_f64().unwrap_or(f64::NAN).into_value(),
},
serde_json::Value::String(v) => v.into_value(),
serde_json::Value::Array(v) => {
v.into_iter().map(convert_json).collect::<Array>().into_value()
}
serde_json::Value::Object(v) => v
.into_iter()
.map(|(key, value)| (key.into(), convert_json(value)))
.collect::<Dict>()
.into_value(),
/// Reads structured data from a JSON string/bytes.
///
/// Display: JSON
/// Category: data-loading
#[func]
pub fn json_decode(
/// JSON data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
let Spanned { v: data, span } = data;
let value: Value = serde_json::from_slice(data.as_slice())
.map_err(format_json_error)
.at(span)?;
Ok(value)
}
/// Encode structured data into a JSON string.
///
/// Display: JSON
/// Category: data-loading
#[func]
pub fn json_encode(
/// Value to be encoded.
value: Spanned<Value>,
/// Whether to pretty print the JSON with newlines and indentation.
#[named]
#[default(true)]
pretty: bool,
) -> SourceResult<Str> {
let Spanned { v: value, span } = value;
if pretty {
serde_json::to_string_pretty(&value)
} else {
serde_json::to_string(&value)
}
.map(|v| v.into())
.map_err(|e| eco_format!("failed to encode value as json: {e}"))
.at(span)
}
/// Format the user-facing JSON error message.
@ -286,6 +338,11 @@ fn format_json_error(error: serde_json::Error) -> EcoString {
/// Display: TOML
/// Category: data-loading
#[func]
#[scope(
scope.define("decode", toml_decode_func());
scope.define("encode", toml_encode_func());
scope
)]
pub fn toml(
/// Path to a TOML file.
path: Spanned<EcoString>,
@ -296,48 +353,46 @@ pub fn toml(
let id = vm.location().join(&path).at(span)?;
let data = vm.world().file(id).at(span)?;
let raw = std::str::from_utf8(&data)
toml_decode(Spanned::new(Readable::Bytes(data), span))
}
/// Reads structured data from a TOML string/bytes.
///
/// Display: TOML
/// Category: data-loading
#[func]
pub fn toml_decode(
/// TOML data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
let Spanned { v: data, span } = data;
let raw = std::str::from_utf8(data.as_slice())
.map_err(|_| "file is not valid utf-8")
.at(span)?;
let value: toml::Value = toml::from_str(raw).map_err(format_toml_error).at(span)?;
Ok(convert_toml(value))
let value: Value = toml::from_str(raw).map_err(format_toml_error).at(span)?;
Ok(value)
}
/// Convert a TOML value to a Typst value.
fn convert_toml(value: toml::Value) -> Value {
match value {
toml::Value::String(v) => v.into_value(),
toml::Value::Integer(v) => v.into_value(),
toml::Value::Float(v) => v.into_value(),
toml::Value::Boolean(v) => v.into_value(),
toml::Value::Array(v) => {
v.into_iter().map(convert_toml).collect::<Array>().into_value()
}
toml::Value::Table(v) => v
.into_iter()
.map(|(key, value)| (key.into(), convert_toml(value)))
.collect::<Dict>()
.into_value(),
toml::Value::Datetime(v) => match (v.date, v.time) {
(None, None) => Value::None,
(Some(date), None) => {
Datetime::from_ymd(date.year as i32, date.month, date.day).into_value()
}
(None, Some(time)) => {
Datetime::from_hms(time.hour, time.minute, time.second).into_value()
}
(Some(date), Some(time)) => Datetime::from_ymd_hms(
date.year as i32,
date.month,
date.day,
time.hour,
time.minute,
time.second,
)
.into_value(),
},
}
/// Encode structured data into a TOML string.
///
/// Display: TOML
/// Category: data-loading
#[func]
pub fn toml_encode(
/// Value to be encoded.
value: Spanned<Value>,
/// Apply a default pretty policy to the document.
#[named]
#[default(true)]
pretty: bool,
) -> SourceResult<Str> {
let Spanned { v: value, span } = value;
if pretty { toml::to_string_pretty(&value) } else { toml::to_string(&value) }
.map(|v| v.into())
.map_err(|e| eco_format!("failed to encode value as toml: {e}"))
.at(span)
}
/// Format the user-facing TOML error message.
@ -395,6 +450,11 @@ fn format_toml_error(error: toml::de::Error) -> EcoString {
/// Display: YAML
/// Category: data-loading
#[func]
#[scope(
scope.define("decode", yaml_decode_func());
scope.define("encode", yaml_encode_func());
scope
)]
pub fn yaml(
/// Path to a YAML file.
path: Spanned<EcoString>,
@ -404,41 +464,40 @@ pub fn yaml(
let Spanned { v: path, span } = path;
let id = vm.location().join(&path).at(span)?;
let data = vm.world().file(id).at(span)?;
let value: serde_yaml::Value =
serde_yaml::from_slice(&data).map_err(format_yaml_error).at(span)?;
Ok(convert_yaml(value))
yaml_decode(Spanned::new(Readable::Bytes(data), span))
}
/// Convert a YAML value to a Typst value.
fn convert_yaml(value: serde_yaml::Value) -> Value {
match value {
serde_yaml::Value::Null => Value::None,
serde_yaml::Value::Bool(v) => v.into_value(),
serde_yaml::Value::Number(v) => match v.as_i64() {
Some(int) => int.into_value(),
None => v.as_f64().unwrap_or(f64::NAN).into_value(),
},
serde_yaml::Value::String(v) => v.into_value(),
serde_yaml::Value::Sequence(v) => {
v.into_iter().map(convert_yaml).collect::<Array>().into_value()
}
serde_yaml::Value::Mapping(v) => v
.into_iter()
.map(|(key, value)| (convert_yaml_key(key), convert_yaml(value)))
.filter_map(|(key, value)| key.map(|key| (key, value)))
.collect::<Dict>()
.into_value(),
}
/// Reads structured data from a YAML string/bytes.
///
/// Display: YAML
/// Category: data-loading
#[func]
pub fn yaml_decode(
/// YAML data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
let Spanned { v: data, span } = data;
let value: Value = serde_yaml::from_slice(data.as_slice())
.map_err(format_yaml_error)
.at(span)?;
Ok(value)
}
/// Converts an arbitrary YAML mapping key into a Typst Dict Key.
/// Currently it only does so for strings, everything else
/// returns None
fn convert_yaml_key(key: serde_yaml::Value) -> Option<Str> {
match key {
serde_yaml::Value::String(v) => Some(Str::from(v)),
_ => None,
}
/// Encode structured data into a yaml string.
///
/// Display: YAML
/// Category: data-loading
#[func]
pub fn yaml_encode(
/// Value to be encoded.
value: Spanned<Value>,
) -> SourceResult<Str> {
let Spanned { v: value, span } = value;
serde_yaml::to_string(&value)
.map(|v| v.into())
.map_err(|e| eco_format!("failed to encode value as yaml: {e}"))
.at(span)
}
/// Format the user-facing YAML error message.
@ -498,6 +557,10 @@ fn format_yaml_error(error: serde_yaml::Error) -> EcoString {
/// Display: XML
/// Category: data-loading
#[func]
#[scope(
scope.define("decode", xml_decode_func());
scope
)]
pub fn xml(
/// Path to an XML file.
path: Spanned<EcoString>,
@ -507,7 +570,22 @@ pub fn xml(
let Spanned { v: path, span } = path;
let id = vm.location().join(&path).at(span)?;
let data = vm.world().file(id).at(span)?;
let text = std::str::from_utf8(&data).map_err(FileError::from).at(span)?;
xml_decode(Spanned::new(Readable::Bytes(data), span))
}
/// Reads structured data from an XML string/bytes.
///
/// Display: XML
/// Category: data-loading
#[func]
pub fn xml_decode(
/// XML data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
let Spanned { v: data, span } = data;
let text = std::str::from_utf8(data.as_slice())
.map_err(FileError::from)
.at(span)?;
let document = roxmltree::Document::parse(text).map_err(format_xml_error).at(span)?;
Ok(convert_xml(document.root()))
}

View File

@ -43,7 +43,7 @@ siphasher = "0.3"
subsetter = "0.1.1"
svg2pdf = "0.6"
tiny-skia = "0.9.0"
toml = { version = "0.7.3", default-features = false, features = ["parse"] }
toml = { version = "0.7.4", default-features = false, features = ["parse"] }
tracing = "0.1.37"
ttf-parser = "0.18.1"
unicode-general-category = "0.6"
@ -54,7 +54,7 @@ unscanny = "0.1"
usvg = { version = "0.32", default-features = false, features = ["text"] }
xmlwriter = "0.1.0"
xmp-writer = "0.1"
time = { version = "0.3.20", features = ["std", "formatting"] }
time = { version = "0.3.20", features = ["std", "formatting", "macros"] }
wasmi = "0.30.0"
xmlparser = "0.13.5"

View File

@ -3,7 +3,7 @@ use std::fmt::{self, Debug, Formatter};
use std::ops::{Add, AddAssign};
use ecow::{eco_format, EcoString, EcoVec};
use serde::Serialize;
use serde::{Deserialize, Serialize};
use super::{ops, Args, CastInfo, FromValue, Func, IntoValue, Reflect, Value, Vm};
use crate::diag::{At, SourceResult, StrResult};
@ -35,7 +35,8 @@ pub use crate::__array as array;
pub use ecow::eco_vec;
/// A reference counted array with value semantics.
#[derive(Default, Clone, PartialEq, Hash, Serialize)]
#[derive(Default, Clone, PartialEq, Hash, Serialize, Deserialize)]
#[serde(transparent)]
pub struct Array(EcoVec<Value>);
impl Array {

View File

@ -4,7 +4,7 @@ use std::ops::{Add, AddAssign};
use std::sync::Arc;
use ecow::{eco_format, EcoString};
use serde::{Serialize, Serializer};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use super::{array, Array, Str, Value};
use crate::diag::StrResult;
@ -198,6 +198,15 @@ impl Serialize for Dict {
}
}
impl<'de> Deserialize<'de> for Dict {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Ok(IndexMap::<Str, Value>::deserialize(deserializer)?.into())
}
}
impl Extend<(Str, Value)> for Dict {
fn extend<T: IntoIterator<Item = (Str, Value)>>(&mut self, iter: T) {
Arc::make_mut(&mut self.0).extend(iter);

View File

@ -4,7 +4,7 @@ use std::hash::{Hash, Hasher};
use std::ops::{Add, AddAssign, Deref, Range};
use ecow::EcoString;
use serde::Serialize;
use serde::{Deserialize, Serialize};
use unicode_segmentation::UnicodeSegmentation;
use super::{cast, dict, Args, Array, Dict, Func, IntoValue, Value, Vm};
@ -26,7 +26,8 @@ pub use crate::__format_str as format_str;
pub use ecow::eco_format;
/// An immutable reference counted string.
#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize)]
#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
#[serde(transparent)]
pub struct Str(EcoString);
impl Str {

View File

@ -5,14 +5,18 @@ use std::hash::{Hash, Hasher};
use std::sync::Arc;
use ecow::eco_format;
use serde::{Serialize, Serializer};
use serde::de::value::{MapAccessDeserializer, SeqAccessDeserializer};
use serde::de::{Error, MapAccess, SeqAccess, Visitor};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use siphasher::sip128::{Hasher128, SipHasher13};
use time::macros::format_description;
use super::{
cast, fields, format_str, ops, Args, Array, Bytes, CastInfo, Content, Dict,
FromValue, Func, IntoValue, Module, Reflect, Str, Symbol,
};
use crate::diag::StrResult;
use crate::eval::Datetime;
use crate::geom::{Abs, Angle, Color, Em, Fr, Length, Ratio, Rel};
use crate::model::{Label, Styles};
use crate::syntax::{ast, Span};
@ -274,6 +278,176 @@ impl Serialize for Value {
}
}
fn parse_toml_date(dict: &Dict) -> Option<Datetime> {
if dict.len() != 1 || !dict.contains("$__toml_private_datetime") {
return None;
}
let Ok(s) = String::from_value(dict.at("$__toml_private_datetime", None).unwrap()) else {
return None;
};
if let Ok(d) = time::PrimitiveDateTime::parse(
&s,
&format_description!("[year]-[month]-[day]T[hour]:[minute]:[second]Z"),
) {
Some(
Datetime::from_ymd_hms(
d.year(),
d.month() as u8,
d.day(),
d.hour(),
d.minute(),
d.second(),
)
.unwrap(),
)
} else if let Ok(d) = time::PrimitiveDateTime::parse(
&s,
&format_description!("[year]-[month]-[day]T[hour]:[minute]:[second]"),
) {
Some(
Datetime::from_ymd_hms(
d.year(),
d.month() as u8,
d.day(),
d.hour(),
d.minute(),
d.second(),
)
.unwrap(),
)
} else if let Ok(d) =
time::Date::parse(&s, &format_description!("[year]-[month]-[day]"))
{
Some(Datetime::from_ymd(d.year(), d.month() as u8, d.day()).unwrap())
} else if let Ok(d) =
time::Time::parse(&s, &format_description!("[hour]:[minute]:[second]"))
{
Some(Datetime::from_hms(d.hour(), d.minute(), d.second()).unwrap())
} else {
None
}
}
struct ValueVisitor;
impl<'de> Visitor<'de> for ValueVisitor {
type Value = Value;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a typst value")
}
fn visit_bool<E: Error>(self, v: bool) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_i8<E: Error>(self, v: i8) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_i16<E: Error>(self, v: i16) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_i32<E: Error>(self, v: i32) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_i64<E: Error>(self, v: i64) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_u8<E: Error>(self, v: u8) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_u16<E: Error>(self, v: u16) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_u32<E: Error>(self, v: u32) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_u64<E: Error>(self, v: u64) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_f32<E: Error>(self, v: f32) -> Result<Self::Value, E> {
Ok((v as f64).into_value())
}
fn visit_f64<E: Error>(self, v: f64) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_char<E: Error>(self, v: char) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_str<E: Error>(self, v: &str) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_borrowed_str<E: Error>(self, v: &'de str) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_string<E: Error>(self, v: String) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_bytes<E: Error>(self, v: &[u8]) -> Result<Self::Value, E> {
Ok(Bytes::from(v).into_value())
}
fn visit_borrowed_bytes<E: Error>(self, v: &'de [u8]) -> Result<Self::Value, E> {
Ok(Bytes::from(v).into_value())
}
fn visit_byte_buf<E: Error>(self, v: Vec<u8>) -> Result<Self::Value, E> {
Ok(Bytes::from(v).into_value())
}
fn visit_none<E: Error>(self) -> Result<Self::Value, E> {
Ok(Value::None)
}
fn visit_some<D: Deserializer<'de>>(
self,
deserializer: D,
) -> Result<Self::Value, D::Error> {
Value::deserialize(deserializer)
}
fn visit_unit<E: Error>(self) -> Result<Self::Value, E> {
Ok(Value::None)
}
fn visit_seq<A: SeqAccess<'de>>(self, seq: A) -> Result<Self::Value, A::Error> {
Ok(Array::deserialize(SeqAccessDeserializer::new(seq))?.into_value())
}
fn visit_map<A: MapAccess<'de>>(self, map: A) -> Result<Self::Value, A::Error> {
let dict = Dict::deserialize(MapAccessDeserializer::new(map))?;
Ok(match parse_toml_date(&dict) {
None => dict.into_value(),
Some(dt) => Value::dynamic(dt),
})
}
}
impl<'de> Deserialize<'de> for Value {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(ValueVisitor)
}
}
/// A dynamic value.
#[derive(Clone, Hash)]
#[allow(clippy::derived_hash_with_manual_eq)]

View File

@ -60,6 +60,24 @@
minute: 38,
second: 57,
))
#test(data.date_time2, datetime(
year: 2023,
month: 2,
day: 1,
hour: 15,
minute: 38,
second: 57,
))
#test(data.date, datetime(
year: 2023,
month: 2,
day: 1,
))
#test(data.time, datetime(
hour: 15,
minute: 38,
second: 57,
))
---
// Error: 7-24 failed to parse toml file: expected `.`, `=`, index 15-16
@ -68,7 +86,7 @@
---
// Test reading YAML data
#let data = yaml("/files/yaml-types.yaml")
#test(data.len(), 7)
#test(data.len(), 8)
#test(data.null_key, (none, none))
#test(data.string, "text")
#test(data.integer, 5)
@ -76,7 +94,7 @@
#test(data.mapping, ("1": "one", "2": "two"))
#test(data.seq, (1,2,3,4))
#test(data.bool, false)
#test(data.keys().contains("true"), false)
#test(data.keys().contains("true"), true)
---
---