Smart quotes

Co-Authored-By: Martin Haug <mhaug@live.de>
This commit is contained in:
Laurenz 2022-04-13 13:07:45 +02:00
parent e8dd842c66
commit 2279c26543
16 changed files with 237 additions and 8 deletions

View File

@ -3,12 +3,14 @@
mod deco;
mod link;
mod par;
mod quotes;
mod raw;
mod shaping;
pub use deco::*;
pub use link::*;
pub use par::*;
pub use quotes::*;
pub use raw::*;
pub use shaping::*;
@ -72,6 +74,8 @@ impl TextNode {
/// will will be hyphenated if and only if justification is enabled.
#[property(resolve)]
pub const HYPHENATE: Smart<Hyphenate> = Smart::Auto;
/// Whether to apply smart quotes.
pub const SMART_QUOTES: bool = true;
/// Whether to apply kerning ("kern").
pub const KERNING: bool = true;

View File

@ -4,7 +4,7 @@ use unicode_bidi::{BidiInfo, Level};
use unicode_script::{Script, UnicodeScript};
use xi_unicode::LineBreakIterator;
use super::{shape, Lang, ShapedText, TextNode};
use super::{shape, Lang, Quoter, Quotes, ShapedText, TextNode};
use crate::font::FontStore;
use crate::library::layout::Spacing;
use crate::library::prelude::*;
@ -386,9 +386,11 @@ fn collect<'a>(
styles: &'a StyleChain<'a>,
) -> (String, Vec<(Segment<'a>, StyleChain<'a>)>) {
let mut full = String::new();
let mut quoter = Quoter::new();
let mut segments = vec![];
let mut iter = par.0.iter().peekable();
for (child, map) in par.0.iter() {
while let Some((child, map)) = iter.next() {
let styles = map.chain(&styles);
let segment = match child {
ParChild::Text(text) => {
@ -402,7 +404,25 @@ fn collect<'a>(
}
ParChild::Quote(double) => {
let prev = full.len();
full.push(if *double { '"' } else { '\'' });
if styles.get(TextNode::SMART_QUOTES) {
// TODO: Also get region.
let lang = styles.get(TextNode::LANG);
let quotes = lang
.as_ref()
.map(|lang| Quotes::from_lang(lang.as_str(), ""))
.unwrap_or_default();
let peeked = iter.peek().and_then(|(child, _)| match child {
ParChild::Text(text) => text.chars().next(),
ParChild::Quote(_) => Some('"'),
ParChild::Spacing(_) => Some(SPACING_REPLACE),
ParChild::Node(_) => Some(NODE_REPLACE),
});
full.push_str(quoter.quote(&quotes, *double, peeked));
} else {
full.push(if *double { '"' } else { '\'' });
}
Segment::Text(full.len() - prev)
}
ParChild::Spacing(spacing) => {
@ -415,6 +435,10 @@ fn collect<'a>(
}
};
if let Some(last) = full.chars().last() {
quoter.last(last);
}
if let (Some((Segment::Text(last_len), last_styles)), Segment::Text(len)) =
(segments.last_mut(), segment)
{

146
src/library/text/quotes.rs Normal file
View File

@ -0,0 +1,146 @@
use crate::parse::is_newline;
/// State machine for smart quote subtitution.
#[derive(Debug, Clone)]
pub struct Quoter {
/// How many quotes have been opened.
quote_depth: usize,
/// Whether an opening quote might follow.
expect_opening: bool,
/// Whether the last character was numeric.
last_num: bool,
}
impl Quoter {
/// Start quoting.
pub fn new() -> Self {
Self {
quote_depth: 0,
expect_opening: true,
last_num: false,
}
}
/// Process the last seen character.
pub fn last(&mut self, c: char) {
self.expect_opening = is_ignorable(c) || is_opening_bracket(c);
self.last_num = c.is_numeric();
}
/// Process and substitute a quote.
pub fn quote<'a>(
&mut self,
quotes: &Quotes<'a>,
double: bool,
peeked: Option<char>,
) -> &'a str {
let peeked = peeked.unwrap_or(' ');
if self.expect_opening {
self.quote_depth += 1;
quotes.open(double)
} else if self.quote_depth > 0
&& (peeked.is_ascii_punctuation() || is_ignorable(peeked))
{
self.quote_depth -= 1;
quotes.close(double)
} else if self.last_num {
quotes.prime(double)
} else {
quotes.fallback(double)
}
}
}
impl Default for Quoter {
fn default() -> Self {
Self::new()
}
}
fn is_ignorable(c: char) -> bool {
c.is_whitespace() || is_newline(c)
}
fn is_opening_bracket(c: char) -> bool {
matches!(c, '(' | '{' | '[')
}
/// Decides which quotes to subtitute smart quotes with.
pub struct Quotes<'s> {
/// The opening single quote.
pub single_open: &'s str,
/// The closing single quote.
pub single_close: &'s str,
/// The opening double quote.
pub double_open: &'s str,
/// The closing double quote.
pub double_close: &'s str,
}
impl<'s> Quotes<'s> {
/// Create a new `Quotes` struct with the defaults for a language and
/// region.
///
/// The language should be specified as an all-lowercase ISO 639-1 code, the
/// region as an all-uppercase ISO 3166-alpha2 code.
///
/// Currently, the supported languages are: English, Czech, Danish, German,
/// Swiss / Liechtensteinian German, Estonian, Icelandic, Lithuanian,
/// Latvian, Slovak, Slovenian, Bosnian, Finnish, Swedish, French,
/// Hungarian, Polish, Romanian, Japanese, Traditional Chinese, Russian, and
/// Norwegian.
///
/// For unknown languages, the English quotes are used.
pub fn from_lang(language: &str, region: &str) -> Self {
let (single_open, single_close, double_open, double_close) = match language {
"de" if matches!(region, "CH" | "LI") => ("", "", "«", "»"),
"cs" | "da" | "de" | "et" | "is" | "lt" | "lv" | "sk" | "sl" => {
("", "", "", "")
}
"fr" => ("\u{00A0}", "\u{00A0}", "«\u{00A0}", "\u{00A0}»"),
"bs" | "fi" | "sv" => ("", "", "", ""),
"hu" | "pl" | "ro" => ("", "", "", ""),
"ru" | "no" | "nn" => ("", "", "«", "»"),
_ => return Self::default(),
};
Self {
single_open,
single_close,
double_open,
double_close,
}
}
/// The opening quote.
fn open(&self, double: bool) -> &'s str {
if double { self.double_open } else { self.single_open }
}
/// The closing quote.
fn close(&self, double: bool) -> &'s str {
if double { self.double_close } else { self.single_close }
}
/// Which character should be used as a prime.
fn prime(&self, double: bool) -> &'static str {
if double { "" } else { "" }
}
/// Which character should be used as a fallback quote.
fn fallback(&self, double: bool) -> &'static str {
if double { "\"" } else { "" }
}
}
impl Default for Quotes<'_> {
/// Returns the english quotes as default.
fn default() -> Self {
Self {
single_open: "",
single_close: "",
double_open: "",
double_close: "",
}
}
}

View File

@ -100,6 +100,7 @@ impl Show for RawNode {
let mut map = StyleMap::new();
map.set(TextNode::OVERHANG, false);
map.set(TextNode::HYPHENATE, Smart::Custom(Hyphenate(false)));
map.set(TextNode::SMART_QUOTES, false);
if let Smart::Custom(family) = styles.get(Self::FAMILY) {
map.set_family(family.clone(), styles);

Binary file not shown.

Before

Width:  |  Height:  |  Size: 801 B

After

Width:  |  Height:  |  Size: 520 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 47 KiB

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 106 KiB

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 58 KiB

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

After

Width:  |  Height:  |  Size: 29 KiB

BIN
tests/ref/text/quotes.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.3 KiB

After

Width:  |  Height:  |  Size: 6.3 KiB

View File

@ -5,11 +5,10 @@
// Don't parse closure directly in content.
// Ref: true
#let x = "\"hi\""
#let x = "x"
// Should output `"hi" => "bye"`.
#set text(overhang: false)
#x => "bye"
// Should output `x => y`.
#x => y
---
// Basic closure without captures.

View File

@ -2,7 +2,8 @@
---
// Escapable symbols.
\\ \/ \[ \] \{ \} \# \* \_ \= \~ \` \$
\\ \/ \[ \] \{ \} \# \* \_ \
\= \~ \` \$ \" \'
// No need to escape.
( ) ; < >

54
tests/typ/text/quotes.typ Normal file
View File

@ -0,0 +1,54 @@
// Test smart quotes.
---
#set page(width: 200pt)
// Test simple quotations in various languages.
#set text(lang: "en")
"The horse eats no cucumber salad" was the first sentence ever uttered on the 'telephone.'
#set text(lang: "de")
"Das Pferd frisst keinen Gurkensalat" war der erste jemals am 'Fernsprecher' gesagte Satz.
#set text(lang: "fr")
"Le cheval ne mange pas de salade de concombres" est la première phrase jamais prononcée au 'téléphone'.
#set text(lang: "fi")
"Hevonen ei syö kurkkusalaattia" oli ensimmäinen koskaan 'puhelimessa' lausuttu lause.
#set text(lang: "ro")
"Calul nu mănâncă salată de castraveți" a fost prima propoziție rostită vreodată la 'telefon'.
#set text(lang: "ru")
"Лошадь не ест салат из огурцов" - это была первая фраза, сказанная по 'телефону'.
---
// Test single pair of quotes.
#set text(lang: "en")
""
---
// Test sentences with numbers and apostrophes.
#set text(lang: "en")
The 5'11" 'quick' brown fox jumps over the "lazy" dog's ear.
He said "I'm a big fella."
---
// Test escape sequences.
The 5\'11\" 'quick\' brown fox jumps over the \"lazy" dog\'s ear.
---
// Test turning smart quotes off.
#set text(lang: "en")
He's told some books contain questionable "example text".
#set text(smart-quotes: false)
He's told some books contain questionable "example text".
---
// Test changing properties within text.
#set text(lang: "en")
"She suddenly started speaking french: #text(lang: "fr")['Je suis une banane.']" Roman told me.
Some people's thought on this would be #text(smart-quotes: false)["strange."]