Soft breaks and shy hyphens

This commit is contained in:
Laurenz 2022-04-13 14:48:19 +02:00
parent 2279c26543
commit 67e9313b91
13 changed files with 103 additions and 58 deletions

View File

@ -39,8 +39,9 @@ use crate::util::EcoString;
pub enum Content {
/// A word space.
Space,
/// A line break.
Linebreak,
/// A forced line break. If soft (`true`), the preceding line can still be
/// justified, if hard (`false`) not.
Linebreak(bool),
/// Horizontal spacing.
Horizontal(Spacing),
/// Plain text.
@ -213,10 +214,10 @@ impl Debug for Content {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Self::Space => f.pad("Space"),
Self::Linebreak => f.pad("Linebreak"),
Self::Linebreak(soft) => write!(f, "Linebreak({soft})"),
Self::Horizontal(kind) => write!(f, "Horizontal({kind:?})"),
Self::Text(text) => write!(f, "Text({text:?})"),
Self::Quote(double) => write!(f, "Quote({double:?})"),
Self::Quote(double) => write!(f, "Quote({double})"),
Self::Inline(node) => {
f.write_str("Inline(")?;
node.fmt(f)?;
@ -376,8 +377,9 @@ impl<'a> Builder<'a> {
Content::Space => {
self.par.weak(ParChild::Text(' '.into()), 0, styles);
}
Content::Linebreak => {
self.par.destructive(ParChild::Text('\n'.into()), styles);
Content::Linebreak(soft) => {
let c = if *soft { '\u{2028}' } else { '\n' };
self.par.destructive(ParChild::Text(c.into()), styles);
}
Content::Horizontal(kind) => {
let child = ParChild::Spacing(*kind);

View File

@ -108,8 +108,8 @@ impl Eval for MarkupNode {
fn eval(&self, ctx: &mut Context, scp: &mut Scopes) -> EvalResult<Self::Output> {
Ok(match self {
Self::Space => Content::Space,
Self::Linebreak => Content::Linebreak,
Self::Parbreak => Content::Parbreak,
Self::Linebreak(soft) => Content::Linebreak(*soft),
Self::Text(text) => Content::Text(text.clone()),
Self::Quote(double) => Content::Quote(*double),
Self::Strong(strong) => strong.eval(ctx, scp)?,

View File

@ -167,8 +167,9 @@ pub struct LinebreakNode;
#[node]
impl LinebreakNode {
fn construct(_: &mut Context, _: &mut Args) -> TypResult<Content> {
Ok(Content::Linebreak)
fn construct(_: &mut Context, args: &mut Args) -> TypResult<Content> {
let soft = args.named("soft")?.unwrap_or(false);
Ok(Content::Linebreak(soft))
}
}
@ -315,8 +316,8 @@ struct Line<'a> {
last: Option<Item<'a>>,
/// The width of the line.
width: Length,
/// Whether the line ends at a mandatory break.
mandatory: bool,
/// Whether the line is allowed to be justified.
justify: bool,
/// Whether the line ends with a hyphen or dash, either naturally or through
/// hyphenation.
dash: bool,
@ -856,7 +857,7 @@ fn line<'a>(
items: &[],
last: None,
width: Length::zero(),
mandatory,
justify: !mandatory,
dash: false,
};
}
@ -879,15 +880,18 @@ fn line<'a>(
// Reshape the last item if it's split in half.
let mut last = None;
let mut dash = false;
let mut justify = !mandatory;
if let Some((Item::Text(shaped), before)) = items.split_last() {
// Compute the range we want to shape, trimming whitespace at the
// end of the line.
let base = last_offset;
let start = range.start.max(last_offset);
let end = range.end;
let trimmed = p.bidi.text[start .. end].trim_end();
let text = &p.bidi.text[start .. end];
let trimmed = text.trim_end();
let shy = trimmed.ends_with('\u{ad}');
dash = hyphen || shy || trimmed.ends_with(['-', '', '—']);
justify |= text.ends_with('\u{2028}');
// Usually, we don't want to shape an empty string because:
// - We don't want the height of trimmed whitespace in a different
@ -947,7 +951,7 @@ fn line<'a>(
items,
last,
width,
mandatory,
justify,
dash,
}
}
@ -1050,7 +1054,7 @@ fn commit(
let mut justification = Length::zero();
if remaining < Length::zero()
|| (justify
&& !line.mandatory
&& line.justify
&& line.range.end < line.bidi.text.len()
&& fr.is_zero())
{

View File

@ -84,7 +84,7 @@ impl Show for RawNode {
let mut highlighter = HighlightLines::new(syntax, &THEME);
for (i, line) in self.text.lines().enumerate() {
if i != 0 {
seq.push(Content::Linebreak);
seq.push(Content::Linebreak(false));
}
for (style, piece) in highlighter.highlight(line, &SYNTAXES) {

View File

@ -213,10 +213,11 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
// Text and markup.
NodeKind::Text(_)
| NodeKind::NonBreakingSpace
| NodeKind::Shy
| NodeKind::EnDash
| NodeKind::EmDash
| NodeKind::Quote(_)
| NodeKind::Linebreak
| NodeKind::Linebreak(_)
| NodeKind::Raw(_)
| NodeKind::Math(_)
| NodeKind::Escape(_) => {

View File

@ -264,42 +264,52 @@ impl<'s> Tokens<'s> {
}
fn backslash(&mut self) -> NodeKind {
match self.s.peek() {
Some(c) => match c {
// Backslash and comments.
'\\' | '/' |
// Parenthesis and hashtag.
'[' | ']' | '{' | '}' | '#' |
// Markup.
'~' | '\'' | '"' | '*' | '_' | '`' | '$' | '=' | '-' | '.' => {
self.s.eat_assert(c) ;
NodeKind::Escape(c)
}
'u' if self.s.rest().starts_with("u{") => {
self.s.eat_assert('u');
self.s.eat_assert('{');
let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric());
if self.s.eat_if('}') {
if let Some(c) = resolve_hex(sequence) {
NodeKind::Escape(c)
} else {
NodeKind::Error(
ErrorPos::Full,
"invalid unicode escape sequence".into(),
)
}
let c = match self.s.peek() {
Some(c) => c,
None => return NodeKind::Linebreak(false),
};
match c {
// Backslash and comments.
'\\' | '/' |
// Parenthesis and hashtag.
'[' | ']' | '{' | '}' | '#' |
// Markup.
'~' | '\'' | '"' | '*' | '_' | '`' | '$' | '=' | '-' | '.' => {
self.s.eat_assert(c) ;
NodeKind::Escape(c)
}
'u' if self.s.rest().starts_with("u{") => {
self.s.eat_assert('u');
self.s.eat_assert('{');
let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric());
if self.s.eat_if('}') {
if let Some(c) = resolve_hex(sequence) {
NodeKind::Escape(c)
} else {
self.terminated = false;
NodeKind::Error(
ErrorPos::End,
"expected closing brace".into(),
ErrorPos::Full,
"invalid unicode escape sequence".into(),
)
}
} else {
self.terminated = false;
NodeKind::Error(
ErrorPos::End,
"expected closing brace".into(),
)
}
c if c.is_whitespace() => NodeKind::Linebreak,
_ => NodeKind::Text('\\'.into()),
},
None => NodeKind::Linebreak,
}
// Linebreaks.
c if c.is_whitespace() => NodeKind::Linebreak(false),
'+' => {
self.s.eat_assert(c);
NodeKind::Linebreak(true)
}
// Just the backslash.
_ => NodeKind::Text('\\'.into()),
}
}
@ -323,6 +333,8 @@ impl<'s> Tokens<'s> {
} else {
NodeKind::EnDash
}
} else if self.s.eat_if('?') {
NodeKind::Shy
} else {
NodeKind::Minus
}
@ -845,8 +857,10 @@ mod tests {
t!(Markup: "_" => Underscore);
t!(Markup[""]: "===" => Eq, Eq, Eq);
t!(Markup["a1/"]: "= " => Eq, Space(0));
t!(Markup[" "]: r"\" => Linebreak(false));
t!(Markup[" "]: r"\+" => Linebreak(true));
t!(Markup: "~" => NonBreakingSpace);
t!(Markup[" "]: r"\" => Linebreak);
t!(Markup["a1/"]: "-?" => Shy);
t!(Markup["a "]: r"a--" => Text("a"), EnDash);
t!(Markup["a1/"]: "- " => Minus, Space(0));
t!(Markup[" "]: "." => EnumNumbering(None));

View File

@ -62,10 +62,11 @@ impl Markup {
self.0.children().filter_map(|node| match node.kind() {
NodeKind::Space(2 ..) => Some(MarkupNode::Parbreak),
NodeKind::Space(_) => Some(MarkupNode::Space),
NodeKind::Linebreak => Some(MarkupNode::Linebreak),
NodeKind::Linebreak(s) => Some(MarkupNode::Linebreak(*s)),
NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())),
NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())),
NodeKind::NonBreakingSpace => Some(MarkupNode::Text('\u{00A0}'.into())),
NodeKind::Shy => Some(MarkupNode::Text('\u{00AD}'.into())),
NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())),
NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())),
NodeKind::Quote(d) => Some(MarkupNode::Quote(*d)),
@ -86,8 +87,9 @@ impl Markup {
pub enum MarkupNode {
/// Whitespace containing less than two newlines.
Space,
/// A forced line break: `\`.
Linebreak,
/// A forced line break. If soft (`\`, `true`), the preceding line can still
/// be justified, if hard (`\+`, `false`) not.
Linebreak(bool),
/// A paragraph break: Two or more newlines.
Parbreak,
/// Plain text.

View File

@ -126,8 +126,9 @@ impl Category {
_ => Some(Category::Operator),
},
NodeKind::EnumNumbering(_) => Some(Category::List),
NodeKind::Linebreak => Some(Category::Shortcut),
NodeKind::Linebreak(_) => Some(Category::Shortcut),
NodeKind::NonBreakingSpace => Some(Category::Shortcut),
NodeKind::Shy => Some(Category::Shortcut),
NodeKind::EnDash => Some(Category::Shortcut),
NodeKind::EmDash => Some(Category::Shortcut),
NodeKind::Escape(_) => Some(Category::Escape),

View File

@ -586,12 +586,15 @@ pub enum NodeKind {
Markup(usize),
/// One or more whitespace characters.
Space(usize),
/// A forced line break: `\`.
Linebreak,
/// A consecutive non-markup string.
Text(EcoString),
/// A forced line break. If soft (`\`, `true`), the preceding line can still
/// be justified, if hard (`\+`, `false`) not.
Linebreak(bool),
/// A non-breaking space: `~`.
NonBreakingSpace,
/// A soft hyphen: `-?`.
Shy,
/// An en-dash: `--`.
EnDash,
/// An em-dash: `---`.
@ -766,7 +769,7 @@ impl NodeKind {
pub fn only_in_mode(&self) -> Option<TokenMode> {
match self {
Self::Markup(_)
| Self::Linebreak
| Self::Linebreak(_)
| Self::Text(_)
| Self::NonBreakingSpace
| Self::EnDash
@ -859,9 +862,11 @@ impl NodeKind {
Self::Markup(_) => "markup",
Self::Space(2 ..) => "paragraph break",
Self::Space(_) => "space",
Self::Linebreak => "forced linebreak",
Self::Linebreak(false) => "hard linebreak",
Self::Linebreak(true) => "soft linebreak",
Self::Text(_) => "text",
Self::NonBreakingSpace => "non-breaking space",
Self::Shy => "soft hyphen",
Self::EnDash => "en dash",
Self::EmDash => "em dash",
Self::Quote(false) => "single quote",
@ -981,9 +986,10 @@ impl Hash for NodeKind {
Self::From => {}
Self::Markup(c) => c.hash(state),
Self::Space(n) => n.hash(state),
Self::Linebreak => {}
Self::Linebreak(s) => s.hash(state),
Self::Text(s) => s.hash(state),
Self::NonBreakingSpace => {}
Self::Shy => {}
Self::EnDash => {}
Self::EmDash => {}
Self::Quote(d) => d.hash(state),

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 18 KiB

View File

@ -30,6 +30,16 @@ Welcome to wo#text(hyphenate: true)[nd]erful experiences. \
#set text(lang: "en", hyphenate: true)
It's a #emph[Tree]beard.
---
// Test shy hyphens.
#set text(lang: "de", hyphenate: true)
#grid(
columns: 2 * (20pt,),
gutter: 20pt,
[Barankauf],
[Bar-?ankauf],
)
---
// This sequence would confuse hypher if we passed trailing / leading
// punctuation instead of just the words. So this tests that we don't

View File

@ -26,3 +26,8 @@ Two consecutive \ \ breaks and three \ \ \ more.
---
// Test forcing an empty trailing line.
Trailing break \ \
---
// Test soft breaks.
#set par(justify: true)
With a soft \+ break you can force a break without breaking justification.