Use/allow U+2212 MINUS SIGN instead of U+002D HYPHEN MINUS everywhere (#2318)

2023-10-09 15:30:40 +02:00 · 2023-10-09 15:30:40 +02:00 · 0804a9e25d
commit 0804a9e25d
parent df49d3f0c6
16 changed files with 182 additions and 36 deletions
--- a/crates/typst-syntax/src/ast.rs
+++ b/crates/typst-syntax/src/ast.rs
@ -454,6 +454,7 @@ impl Shorthand<'_> {
    pub const MARKUP_LIST: &'static [(&'static str, char)] = &[
        ("...", '…'),
        ("~", '\u{00A0}'),
+        ("-", '\u{2212}'), // Only before a digit
        ("--", '\u{2013}'),
        ("---", '\u{2014}'),
        ("-?", '\u{00AD}'),
--- a/crates/typst-syntax/src/lexer.rs
+++ b/crates/typst-syntax/src/lexer.rs
@ -171,6 +171,7 @@ impl Lexer<'_> {
            '-' if self.s.eat_if("--") => SyntaxKind::Shorthand,
            '-' if self.s.eat_if('-') => SyntaxKind::Shorthand,
            '-' if self.s.eat_if('?') => SyntaxKind::Shorthand,
+            '-' if self.s.at(char::is_numeric) => SyntaxKind::Shorthand,
            '*' if !self.in_word() => SyntaxKind::Star,
            '_' if !self.in_word() => SyntaxKind::Underscore,

@ -480,7 +481,7 @@ impl Lexer<'_> {
            '<' if self.s.eat_if('=') => SyntaxKind::LtEq,
            '>' if self.s.eat_if('=') => SyntaxKind::GtEq,
            '+' if self.s.eat_if('=') => SyntaxKind::PlusEq,
-            '-' if self.s.eat_if('=') => SyntaxKind::HyphEq,
+            '-' | '\u{2212}' if self.s.eat_if('=') => SyntaxKind::HyphEq,
            '*' if self.s.eat_if('=') => SyntaxKind::StarEq,
            '/' if self.s.eat_if('=') => SyntaxKind::SlashEq,
            '.' if self.s.eat_if('.') => SyntaxKind::Dots,
@ -498,7 +499,7 @@ impl Lexer<'_> {
            ':' => SyntaxKind::Colon,
            '.' => SyntaxKind::Dot,
            '+' => SyntaxKind::Plus,
-            '-' => SyntaxKind::Minus,
+            '-' | '\u{2212}' => SyntaxKind::Minus,
            '*' => SyntaxKind::Star,
            '/' => SyntaxKind::Slash,
            '=' => SyntaxKind::Eq,
--- a/crates/typst/src/eval/float.rs
+++ b/crates/typst/src/eval/float.rs
@ -1,7 +1,10 @@
-use ecow::eco_format;
+use std::num::ParseFloatError;

-use super::{cast, func, scope, ty, Str};
+use ecow::{eco_format, EcoString};
+
+use super::{cast, func, scope, ty, Repr, Str};
 use crate::geom::Ratio;
+use crate::util::fmt::{format_float, MINUS_SIGN};

 /// A floating-point number.
 ///
@ -47,6 +50,12 @@ impl f64 {
    }
 }

+impl Repr for f64 {
+    fn repr(&self) -> EcoString {
+        format_float(*self, None, "")
+    }
+}
+
 /// A value that can be cast to a float.
 pub struct ToFloat(f64);

@ -55,6 +64,13 @@ cast! {
    v: bool => Self(v as i64 as f64),
    v: i64 => Self(v as f64),
    v: Ratio => Self(v.get()),
-    v: Str => Self(v.parse().map_err(|_| eco_format!("invalid float: {}", v))?),
+    v: Str => Self(
+        parse_float(v.clone().into())
+            .map_err(|_| eco_format!("invalid float: {}", v))?
+    ),
    v: f64 => Self(v),
 }
+
+fn parse_float(s: EcoString) -> Result<f64, ParseFloatError> {
+    s.replace(MINUS_SIGN, "-").parse()
+}
--- a/crates/typst/src/eval/int.rs
+++ b/crates/typst/src/eval/int.rs
@ -1,5 +1,6 @@
-use std::num::{NonZeroI64, NonZeroIsize, NonZeroU64, NonZeroUsize};
+use std::num::{NonZeroI64, NonZeroIsize, NonZeroU64, NonZeroUsize, ParseIntError};

+use crate::util::fmt::{format_int_with_base, MINUS_SIGN};
 use ecow::{eco_format, EcoString};

 use super::{cast, func, scope, ty, Repr, Str, Value};
@ -53,13 +54,7 @@ impl i64 {

 impl Repr for i64 {
    fn repr(&self) -> EcoString {
-        eco_format!("{self}")
-    }
-}
-
-impl Repr for f64 {
-    fn repr(&self) -> EcoString {
-        eco_format!("{self}")
+        format_int_with_base(*self, 10)
    }
 }

@ -70,10 +65,22 @@ cast! {
    ToInt,
    v: bool => Self(v as i64),
    v: f64 => Self(v as i64),
-    v: Str => Self(v.parse().map_err(|_| eco_format!("invalid integer: {}", v))?),
+    v: Str => Self(parse_int(&v).map_err(|_| eco_format!("invalid integer: {}", v))?),
    v: i64 => Self(v),
 }

+fn parse_int(mut s: &str) -> Result<i64, ParseIntError> {
+    let mut sign = 1;
+    if let Some(rest) = s.strip_prefix('-').or_else(|| s.strip_prefix(MINUS_SIGN)) {
+        sign = -1;
+        s = rest;
+    }
+    if sign == -1 && s == "9223372036854775808" {
+        return Ok(i64::MIN);
+    }
+    Ok(sign * s.parse::<i64>()?)
+}
+
 macro_rules! signed_int {
    ($($ty:ty)*) => {
        $(cast! {
--- a/crates/typst/src/eval/str.rs
+++ b/crates/typst/src/eval/str.rs
@ -15,7 +15,7 @@ use crate::diag::{bail, At, SourceResult, StrResult};
 use crate::geom::Align;
 use crate::model::Label;
 use crate::syntax::{Span, Spanned};
-use crate::util::fmt::format_int_with_base;
+use crate::util::fmt::{format_float, format_int_with_base};

 /// Create a new [`Str`] from a format string.
 #[macro_export]
@ -610,7 +610,7 @@ pub enum ToStr {
 cast! {
    ToStr,
    v: i64 => Self::Int(v),
-    v: f64 => Self::Str(format_str!("{}", v)),
+    v: f64 => Self::Str(format_float(v, None, "").into()),
    v: Version => Self::Str(format_str!("{}", v)),
    v: Bytes => Self::Str(
        std::str::from_utf8(&v)
@ -970,13 +970,13 @@ mod tests {
        );
        assert_eq!(
            &format_int_with_base(i64::MIN, 2),
-            "-1000000000000000000000000000000000000000000000000000000000000000"
+            "\u{2212}1000000000000000000000000000000000000000000000000000000000000000"
        );
        assert_eq!(&format_int_with_base(i64::MAX, 10), "9223372036854775807");
-        assert_eq!(&format_int_with_base(i64::MIN, 10), "-9223372036854775808");
+        assert_eq!(&format_int_with_base(i64::MIN, 10), "\u{2212}9223372036854775808");
        assert_eq!(&format_int_with_base(i64::MAX, 16), "7fffffffffffffff");
-        assert_eq!(&format_int_with_base(i64::MIN, 16), "-8000000000000000");
+        assert_eq!(&format_int_with_base(i64::MIN, 16), "\u{2212}8000000000000000");
        assert_eq!(&format_int_with_base(i64::MAX, 36), "1y2p0ij32e8e7");
-        assert_eq!(&format_int_with_base(i64::MIN, 36), "-1y2p0ij32e8e8");
+        assert_eq!(&format_int_with_base(i64::MIN, 36), "\u{2212}1y2p0ij32e8e8");
    }
 }
--- a/crates/typst/src/eval/value.rs
+++ b/crates/typst/src/eval/value.rs
@ -21,6 +21,7 @@ use crate::eval::Datetime;
 use crate::geom::{Abs, Angle, Color, Em, Fr, Gradient, Length, Ratio, Rel};
 use crate::model::{Label, Styles};
 use crate::syntax::{ast, Span};
+use crate::util::fmt::{format_float, format_int_with_base};

 /// A computational value.
 #[derive(Debug, Default, Clone)]
@ -198,8 +199,8 @@ impl Value {
    pub fn display(self) -> Content {
        match self {
            Self::None => Content::empty(),
-            Self::Int(v) => item!(text)(eco_format!("{v}")),
-            Self::Float(v) => item!(text)(eco_format!("{v}")),
+            Self::Int(v) => item!(text)(format_int_with_base(v, 10)),
+            Self::Float(v) => item!(text)(format_float(v, None, "")),
            Self::Str(v) => item!(text)(v.into()),
            Self::Version(v) => item!(text)(eco_format!("{v}")),
            Self::Symbol(v) => item!(text)(v.get().into()),
--- a/crates/typst/src/util/fmt.rs
+++ b/crates/typst/src/util/fmt.rs
@ -1,19 +1,16 @@
 use ecow::{eco_format, EcoString};

+pub const MINUS_SIGN: &str = "\u{2212}";
+
 /// Format an integer in a base.
 pub fn format_int_with_base(mut n: i64, base: i64) -> EcoString {
    if n == 0 {
        return "0".into();
    }

-    // In Rust, `format!("{:x}", -14i64)` is not `-e` but `fffffffffffffff2`.
-    // So we can only use the built-in for decimal, not bin/oct/hex.
-    if base == 10 {
-        return eco_format!("{n}");
-    }
-
-    // The largest output is `to_base(i64::MIN, 2)`, which is 65 chars long.
-    const SIZE: usize = 65;
+    // The largest output is `to_base(i64::MIN, 2)`, which is 64 bytes long,
+    // plus the length of the minus sign.
+    const SIZE: usize = 64 + MINUS_SIGN.len();
    let mut digits = [b'\0'; SIZE];
    let mut i = SIZE;

@ -32,8 +29,9 @@ pub fn format_int_with_base(mut n: i64, base: i64) -> EcoString {
    }

    if negative {
-        i -= 1;
-        digits[i] = b'-';
+        let prev = i;
+        i -= MINUS_SIGN.len();
+        digits[i..prev].copy_from_slice(MINUS_SIGN.as_bytes());
    }

    std::str::from_utf8(&digits[i..]).unwrap_or_default().into()
@ -46,7 +44,13 @@ pub fn format_float(mut value: f64, precision: Option<u8>, suffix: &str) -> EcoS
        let offset = 10_f64.powi(p as i32);
        value = (value * offset).round() / offset;
    }
-    eco_format!("{}{}", value, suffix)
+    if value.is_nan() {
+        "NaN".into()
+    } else if value.is_sign_negative() {
+        eco_format!("{}{}{}", MINUS_SIGN, value.abs(), suffix)
+    } else {
+        eco_format!("{}{}", value, suffix)
+    }
 }

 /// Format pieces separated with commas and a final "and" or "or".
--- a/tests/ref/compiler/array.png
+++ b/tests/ref/compiler/array.png
--- a/tests/ref/compiler/repr-color-gradient.png
+++ b/tests/ref/compiler/repr-color-gradient.png
--- a/tests/ref/layout/grid-2.png
+++ b/tests/ref/layout/grid-2.png
--- a/tests/ref/text/edge.png
+++ b/tests/ref/text/edge.png
--- a/tests/ref/text/numbers.png
+++ b/tests/ref/text/numbers.png
--- a/tests/typ/compiler/methods.typ
+++ b/tests/typ/compiler/methods.typ
@ -76,7 +76,7 @@
 #test((5em + 6in).abs.inches(), 6.0)

 ---
-// Error: 2-21 cannot convert a length with non-zero em units (`-6pt + 10.5em`) to pt
+// Error: 2-21 cannot convert a length with non-zero em units (`−6pt + 10.5em`) to pt
 // Hint: 2-21 use `length.abs.pt()` instead to ignore its em component
 #(10.5em - 6pt).pt()

@ -86,7 +86,7 @@
 #(3em).cm()

 ---
-// Error: 2-20 cannot convert a length with non-zero em units (`-226.77pt + 93em`) to mm
+// Error: 2-20 cannot convert a length with non-zero em units (`−226.77pt + 93em`) to mm
 // Hint: 2-20 use `length.abs.mm()` instead to ignore its em component
 #(93em - 80mm).mm()

--- a/tests/typ/compute/calc.typ
+++ b/tests/typ/compute/calc.typ
@ -7,10 +7,16 @@
 #test(int(true), 1)
 #test(int(10), 10)
 #test(int("150"), 150)
+#test(int("-834"), -834)
+#test(int("\u{2212}79"), -79)
 #test(int(10 / 3), 3)
 #test(float(10), 10.0)
 #test(float(50% * 30%), 0.15)
 #test(float("31.4e-1"), 3.14)
+#test(float("31.4e\u{2212}1"), 3.14)
+#test(float("3.1415"), 3.1415)
+#test(float("-7654.321"), -7654.321)
+#test(float("\u{2212}7654.321"), -7654.321)
 #test(type(float(10)), float)

 ---
--- a/tests/typ/compute/construct.typ
+++ b/tests/typ/compute/construct.typ
@ -162,7 +162,7 @@
 // Test conversion to string.
 #test(str(123), "123")
 #test(str(123, base: 3), "11120")
-#test(str(-123, base: 16), "-7b")
+#test(str(-123, base: 16), "−7b")
 #test(str(9223372036854775807, base: 36), "1y2p0ij32e8e7")
 #test(str(50.14), "50.14")
 #test(str(10 / 3).len() > 10, true)
--- a/tests/typ/text/numbers.typ
+++ b/tests/typ/text/numbers.typ
@ -0,0 +1,110 @@
+// Test how numbers are displayed.
+
+---
+// Test numbers in text mode.
+12 \
+12.0 \
+3.14 \
+1234567890 \
+0123456789 \
+0 \
+0.0 \
+0 \
+0.0 \
+-0 \
+-0.0 \
+-1 \
+-3.14 \
+-9876543210 \
+-0987654321 \
+٣٫١٤ \
+-٣٫١٤ \
+-¾ \
+#text(fractions: true)[-3/2] \
+2022 - 2023 \
+2022 -- 2023 \
+2022--2023 \
+2022-2023 \
+٢٠٢٢ - ٢٠٢٣ \
+٢٠٢٢ -- ٢٠٢٣ \
+٢٠٢٢--٢٠٢٣ \
+٢٠٢٢-٢٠٢٣ \
+-500 -- -400
+
+---
+// Test integers.
+#12 \
+#1234567890 \
+#0123456789 \
+#0 \
+#(-0) \
+#(-1) \
+#(-9876543210) \
+#(-0987654321) \
+#(4 - 8)
+
+---
+// Test floats.
+#12.0 \
+#3.14 \
+#1234567890.0 \
+#0123456789.0 \
+#0.0 \
+#(-0.0) \
+#(-1.0) \
+#(-9876543210.0) \
+#(-0987654321.0) \
+#(-3.14) \
+#(4.0 - 8.0)
+
+---
+// Test the `str` function with integers.
+#str(12) \
+#str(1234567890) \
+#str(0123456789) \
+#str(0) \
+#str(-0) \
+#str(-1) \
+#str(-9876543210) \
+#str(-0987654321) \
+#str(4 - 8)
+
+---
+// Test the `str` function with floats.
+#str(12.0) \
+#str(3.14) \
+#str(1234567890.0) \
+#str(0123456789.0) \
+#str(0.0) \
+#str(-0.0) \
+#str(-1.0) \
+#str(-9876543210.0) \
+#str(-0987654321.0) \
+#str(-3.14) \
+#str(4.0 - 8.0)
+
+---
+// Test the `repr` function with integers.
+#repr(12) \
+#repr(1234567890) \
+#repr(0123456789) \
+#repr(0) \
+#repr(-0) \
+#repr(-1) \
+#repr(-9876543210) \
+#repr(-0987654321) \
+#repr(4 - 8)
+
+---
+// Test the `repr` function with floats.
+#repr(12.0) \
+#repr(3.14) \
+#repr(1234567890.0) \
+#repr(0123456789.0) \
+#repr(0.0) \
+#repr(-0.0) \
+#repr(-1.0) \
+#repr(-9876543210.0) \
+#repr(-0987654321.0) \
+#repr(-3.14) \
+#repr(4.0 - 8.0)