Smart quotes
Co-Authored-By: Martin Haug <mhaug@live.de>
@ -3,12 +3,14 @@
|
||||
mod deco;
|
||||
mod link;
|
||||
mod par;
|
||||
mod quotes;
|
||||
mod raw;
|
||||
mod shaping;
|
||||
|
||||
pub use deco::*;
|
||||
pub use link::*;
|
||||
pub use par::*;
|
||||
pub use quotes::*;
|
||||
pub use raw::*;
|
||||
pub use shaping::*;
|
||||
|
||||
@ -72,6 +74,8 @@ impl TextNode {
|
||||
/// will will be hyphenated if and only if justification is enabled.
|
||||
#[property(resolve)]
|
||||
pub const HYPHENATE: Smart<Hyphenate> = Smart::Auto;
|
||||
/// Whether to apply smart quotes.
|
||||
pub const SMART_QUOTES: bool = true;
|
||||
|
||||
/// Whether to apply kerning ("kern").
|
||||
pub const KERNING: bool = true;
|
||||
|
@ -4,7 +4,7 @@ use unicode_bidi::{BidiInfo, Level};
|
||||
use unicode_script::{Script, UnicodeScript};
|
||||
use xi_unicode::LineBreakIterator;
|
||||
|
||||
use super::{shape, Lang, ShapedText, TextNode};
|
||||
use super::{shape, Lang, Quoter, Quotes, ShapedText, TextNode};
|
||||
use crate::font::FontStore;
|
||||
use crate::library::layout::Spacing;
|
||||
use crate::library::prelude::*;
|
||||
@ -386,9 +386,11 @@ fn collect<'a>(
|
||||
styles: &'a StyleChain<'a>,
|
||||
) -> (String, Vec<(Segment<'a>, StyleChain<'a>)>) {
|
||||
let mut full = String::new();
|
||||
let mut quoter = Quoter::new();
|
||||
let mut segments = vec![];
|
||||
let mut iter = par.0.iter().peekable();
|
||||
|
||||
for (child, map) in par.0.iter() {
|
||||
while let Some((child, map)) = iter.next() {
|
||||
let styles = map.chain(&styles);
|
||||
let segment = match child {
|
||||
ParChild::Text(text) => {
|
||||
@ -402,7 +404,25 @@ fn collect<'a>(
|
||||
}
|
||||
ParChild::Quote(double) => {
|
||||
let prev = full.len();
|
||||
full.push(if *double { '"' } else { '\'' });
|
||||
if styles.get(TextNode::SMART_QUOTES) {
|
||||
// TODO: Also get region.
|
||||
let lang = styles.get(TextNode::LANG);
|
||||
let quotes = lang
|
||||
.as_ref()
|
||||
.map(|lang| Quotes::from_lang(lang.as_str(), ""))
|
||||
.unwrap_or_default();
|
||||
|
||||
let peeked = iter.peek().and_then(|(child, _)| match child {
|
||||
ParChild::Text(text) => text.chars().next(),
|
||||
ParChild::Quote(_) => Some('"'),
|
||||
ParChild::Spacing(_) => Some(SPACING_REPLACE),
|
||||
ParChild::Node(_) => Some(NODE_REPLACE),
|
||||
});
|
||||
|
||||
full.push_str(quoter.quote("es, *double, peeked));
|
||||
} else {
|
||||
full.push(if *double { '"' } else { '\'' });
|
||||
}
|
||||
Segment::Text(full.len() - prev)
|
||||
}
|
||||
ParChild::Spacing(spacing) => {
|
||||
@ -415,6 +435,10 @@ fn collect<'a>(
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(last) = full.chars().last() {
|
||||
quoter.last(last);
|
||||
}
|
||||
|
||||
if let (Some((Segment::Text(last_len), last_styles)), Segment::Text(len)) =
|
||||
(segments.last_mut(), segment)
|
||||
{
|
||||
|
146
src/library/text/quotes.rs
Normal file
@ -0,0 +1,146 @@
|
||||
use crate::parse::is_newline;
|
||||
|
||||
/// State machine for smart quote subtitution.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Quoter {
|
||||
/// How many quotes have been opened.
|
||||
quote_depth: usize,
|
||||
/// Whether an opening quote might follow.
|
||||
expect_opening: bool,
|
||||
/// Whether the last character was numeric.
|
||||
last_num: bool,
|
||||
}
|
||||
|
||||
impl Quoter {
|
||||
/// Start quoting.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
quote_depth: 0,
|
||||
expect_opening: true,
|
||||
last_num: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Process the last seen character.
|
||||
pub fn last(&mut self, c: char) {
|
||||
self.expect_opening = is_ignorable(c) || is_opening_bracket(c);
|
||||
self.last_num = c.is_numeric();
|
||||
}
|
||||
|
||||
/// Process and substitute a quote.
|
||||
pub fn quote<'a>(
|
||||
&mut self,
|
||||
quotes: &Quotes<'a>,
|
||||
double: bool,
|
||||
peeked: Option<char>,
|
||||
) -> &'a str {
|
||||
let peeked = peeked.unwrap_or(' ');
|
||||
if self.expect_opening {
|
||||
self.quote_depth += 1;
|
||||
quotes.open(double)
|
||||
} else if self.quote_depth > 0
|
||||
&& (peeked.is_ascii_punctuation() || is_ignorable(peeked))
|
||||
{
|
||||
self.quote_depth -= 1;
|
||||
quotes.close(double)
|
||||
} else if self.last_num {
|
||||
quotes.prime(double)
|
||||
} else {
|
||||
quotes.fallback(double)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Quoter {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
fn is_ignorable(c: char) -> bool {
|
||||
c.is_whitespace() || is_newline(c)
|
||||
}
|
||||
|
||||
fn is_opening_bracket(c: char) -> bool {
|
||||
matches!(c, '(' | '{' | '[')
|
||||
}
|
||||
|
||||
/// Decides which quotes to subtitute smart quotes with.
|
||||
pub struct Quotes<'s> {
|
||||
/// The opening single quote.
|
||||
pub single_open: &'s str,
|
||||
/// The closing single quote.
|
||||
pub single_close: &'s str,
|
||||
/// The opening double quote.
|
||||
pub double_open: &'s str,
|
||||
/// The closing double quote.
|
||||
pub double_close: &'s str,
|
||||
}
|
||||
|
||||
impl<'s> Quotes<'s> {
|
||||
/// Create a new `Quotes` struct with the defaults for a language and
|
||||
/// region.
|
||||
///
|
||||
/// The language should be specified as an all-lowercase ISO 639-1 code, the
|
||||
/// region as an all-uppercase ISO 3166-alpha2 code.
|
||||
///
|
||||
/// Currently, the supported languages are: English, Czech, Danish, German,
|
||||
/// Swiss / Liechtensteinian German, Estonian, Icelandic, Lithuanian,
|
||||
/// Latvian, Slovak, Slovenian, Bosnian, Finnish, Swedish, French,
|
||||
/// Hungarian, Polish, Romanian, Japanese, Traditional Chinese, Russian, and
|
||||
/// Norwegian.
|
||||
///
|
||||
/// For unknown languages, the English quotes are used.
|
||||
pub fn from_lang(language: &str, region: &str) -> Self {
|
||||
let (single_open, single_close, double_open, double_close) = match language {
|
||||
"de" if matches!(region, "CH" | "LI") => ("‹", "›", "«", "»"),
|
||||
"cs" | "da" | "de" | "et" | "is" | "lt" | "lv" | "sk" | "sl" => {
|
||||
("‚", "‘", "„", "“")
|
||||
}
|
||||
"fr" => ("‹\u{00A0}", "\u{00A0}›", "«\u{00A0}", "\u{00A0}»"),
|
||||
"bs" | "fi" | "sv" => ("’", "’", "”", "”"),
|
||||
"hu" | "pl" | "ro" => ("’", "’", "„", "”"),
|
||||
"ru" | "no" | "nn" => ("’", "’", "«", "»"),
|
||||
_ => return Self::default(),
|
||||
};
|
||||
|
||||
Self {
|
||||
single_open,
|
||||
single_close,
|
||||
double_open,
|
||||
double_close,
|
||||
}
|
||||
}
|
||||
|
||||
/// The opening quote.
|
||||
fn open(&self, double: bool) -> &'s str {
|
||||
if double { self.double_open } else { self.single_open }
|
||||
}
|
||||
|
||||
/// The closing quote.
|
||||
fn close(&self, double: bool) -> &'s str {
|
||||
if double { self.double_close } else { self.single_close }
|
||||
}
|
||||
|
||||
/// Which character should be used as a prime.
|
||||
fn prime(&self, double: bool) -> &'static str {
|
||||
if double { "″" } else { "′" }
|
||||
}
|
||||
|
||||
/// Which character should be used as a fallback quote.
|
||||
fn fallback(&self, double: bool) -> &'static str {
|
||||
if double { "\"" } else { "’" }
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Quotes<'_> {
|
||||
/// Returns the english quotes as default.
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
single_open: "‘",
|
||||
single_close: "’",
|
||||
double_open: "“",
|
||||
double_close: "”",
|
||||
}
|
||||
}
|
||||
}
|
@ -100,6 +100,7 @@ impl Show for RawNode {
|
||||
let mut map = StyleMap::new();
|
||||
map.set(TextNode::OVERHANG, false);
|
||||
map.set(TextNode::HYPHENATE, Smart::Custom(Hyphenate(false)));
|
||||
map.set(TextNode::SMART_QUOTES, false);
|
||||
|
||||
if let Smart::Custom(family) = styles.get(Self::FAMILY) {
|
||||
map.set_family(family.clone(), styles);
|
||||
|
Before Width: | Height: | Size: 801 B After Width: | Height: | Size: 520 B |
Before Width: | Height: | Size: 47 KiB After Width: | Height: | Size: 47 KiB |
Before Width: | Height: | Size: 106 KiB After Width: | Height: | Size: 120 KiB |
Before Width: | Height: | Size: 58 KiB After Width: | Height: | Size: 57 KiB |
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 13 KiB |
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 29 KiB After Width: | Height: | Size: 29 KiB |
BIN
tests/ref/text/quotes.png
Normal file
After Width: | Height: | Size: 58 KiB |
Before Width: | Height: | Size: 6.3 KiB After Width: | Height: | Size: 6.3 KiB |
@ -5,11 +5,10 @@
|
||||
// Don't parse closure directly in content.
|
||||
// Ref: true
|
||||
|
||||
#let x = "\"hi\""
|
||||
#let x = "x"
|
||||
|
||||
// Should output `"hi" => "bye"`.
|
||||
#set text(overhang: false)
|
||||
#x => "bye"
|
||||
// Should output `x => y`.
|
||||
#x => y
|
||||
|
||||
---
|
||||
// Basic closure without captures.
|
||||
|
@ -2,7 +2,8 @@
|
||||
|
||||
---
|
||||
// Escapable symbols.
|
||||
\\ \/ \[ \] \{ \} \# \* \_ \= \~ \` \$
|
||||
\\ \/ \[ \] \{ \} \# \* \_ \
|
||||
\= \~ \` \$ \" \'
|
||||
|
||||
// No need to escape.
|
||||
( ) ; < >
|
||||
|
54
tests/typ/text/quotes.typ
Normal file
@ -0,0 +1,54 @@
|
||||
// Test smart quotes.
|
||||
|
||||
---
|
||||
#set page(width: 200pt)
|
||||
|
||||
// Test simple quotations in various languages.
|
||||
#set text(lang: "en")
|
||||
"The horse eats no cucumber salad" was the first sentence ever uttered on the 'telephone.'
|
||||
|
||||
#set text(lang: "de")
|
||||
"Das Pferd frisst keinen Gurkensalat" war der erste jemals am 'Fernsprecher' gesagte Satz.
|
||||
|
||||
#set text(lang: "fr")
|
||||
"Le cheval ne mange pas de salade de concombres" est la première phrase jamais prononcée au 'téléphone'.
|
||||
|
||||
#set text(lang: "fi")
|
||||
"Hevonen ei syö kurkkusalaattia" oli ensimmäinen koskaan 'puhelimessa' lausuttu lause.
|
||||
|
||||
#set text(lang: "ro")
|
||||
"Calul nu mănâncă salată de castraveți" a fost prima propoziție rostită vreodată la 'telefon'.
|
||||
|
||||
#set text(lang: "ru")
|
||||
"Лошадь не ест салат из огурцов" - это была первая фраза, сказанная по 'телефону'.
|
||||
|
||||
---
|
||||
// Test single pair of quotes.
|
||||
#set text(lang: "en")
|
||||
""
|
||||
|
||||
---
|
||||
// Test sentences with numbers and apostrophes.
|
||||
#set text(lang: "en")
|
||||
The 5'11" 'quick' brown fox jumps over the "lazy" dog's ear.
|
||||
|
||||
He said "I'm a big fella."
|
||||
|
||||
---
|
||||
// Test escape sequences.
|
||||
The 5\'11\" 'quick\' brown fox jumps over the \"lazy" dog\'s ear.
|
||||
|
||||
---
|
||||
// Test turning smart quotes off.
|
||||
#set text(lang: "en")
|
||||
He's told some books contain questionable "example text".
|
||||
|
||||
#set text(smart-quotes: false)
|
||||
He's told some books contain questionable "example text".
|
||||
|
||||
---
|
||||
// Test changing properties within text.
|
||||
#set text(lang: "en")
|
||||
"She suddenly started speaking french: #text(lang: "fr")['Je suis une banane.']" Roman told me.
|
||||
|
||||
Some people's thought on this would be #text(smart-quotes: false)["strange."]
|