diff --git a/src/eval/content.rs b/src/eval/content.rs index 274b64b0c..605abe516 100644 --- a/src/eval/content.rs +++ b/src/eval/content.rs @@ -39,8 +39,9 @@ use crate::util::EcoString; pub enum Content { /// A word space. Space, - /// A line break. - Linebreak, + /// A forced line break. If soft (`true`), the preceding line can still be + /// justified, if hard (`false`) not. + Linebreak(bool), /// Horizontal spacing. Horizontal(Spacing), /// Plain text. @@ -213,10 +214,10 @@ impl Debug for Content { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::Space => f.pad("Space"), - Self::Linebreak => f.pad("Linebreak"), + Self::Linebreak(soft) => write!(f, "Linebreak({soft})"), Self::Horizontal(kind) => write!(f, "Horizontal({kind:?})"), Self::Text(text) => write!(f, "Text({text:?})"), - Self::Quote(double) => write!(f, "Quote({double:?})"), + Self::Quote(double) => write!(f, "Quote({double})"), Self::Inline(node) => { f.write_str("Inline(")?; node.fmt(f)?; @@ -376,8 +377,9 @@ impl<'a> Builder<'a> { Content::Space => { self.par.weak(ParChild::Text(' '.into()), 0, styles); } - Content::Linebreak => { - self.par.destructive(ParChild::Text('\n'.into()), styles); + Content::Linebreak(soft) => { + let c = if *soft { '\u{2028}' } else { '\n' }; + self.par.destructive(ParChild::Text(c.into()), styles); } Content::Horizontal(kind) => { let child = ParChild::Spacing(*kind); diff --git a/src/eval/mod.rs b/src/eval/mod.rs index f2c03c0f3..4a616b58b 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -108,8 +108,8 @@ impl Eval for MarkupNode { fn eval(&self, ctx: &mut Context, scp: &mut Scopes) -> EvalResult { Ok(match self { Self::Space => Content::Space, - Self::Linebreak => Content::Linebreak, Self::Parbreak => Content::Parbreak, + Self::Linebreak(soft) => Content::Linebreak(*soft), Self::Text(text) => Content::Text(text.clone()), Self::Quote(double) => Content::Quote(*double), Self::Strong(strong) => strong.eval(ctx, scp)?, diff --git a/src/library/text/par.rs b/src/library/text/par.rs index 8dcbfeb39..19ab10824 100644 --- a/src/library/text/par.rs +++ b/src/library/text/par.rs @@ -167,8 +167,9 @@ pub struct LinebreakNode; #[node] impl LinebreakNode { - fn construct(_: &mut Context, _: &mut Args) -> TypResult { - Ok(Content::Linebreak) + fn construct(_: &mut Context, args: &mut Args) -> TypResult { + let soft = args.named("soft")?.unwrap_or(false); + Ok(Content::Linebreak(soft)) } } @@ -315,8 +316,8 @@ struct Line<'a> { last: Option>, /// The width of the line. width: Length, - /// Whether the line ends at a mandatory break. - mandatory: bool, + /// Whether the line is allowed to be justified. + justify: bool, /// Whether the line ends with a hyphen or dash, either naturally or through /// hyphenation. dash: bool, @@ -856,7 +857,7 @@ fn line<'a>( items: &[], last: None, width: Length::zero(), - mandatory, + justify: !mandatory, dash: false, }; } @@ -879,15 +880,18 @@ fn line<'a>( // Reshape the last item if it's split in half. let mut last = None; let mut dash = false; + let mut justify = !mandatory; if let Some((Item::Text(shaped), before)) = items.split_last() { // Compute the range we want to shape, trimming whitespace at the // end of the line. let base = last_offset; let start = range.start.max(last_offset); let end = range.end; - let trimmed = p.bidi.text[start .. end].trim_end(); + let text = &p.bidi.text[start .. end]; + let trimmed = text.trim_end(); let shy = trimmed.ends_with('\u{ad}'); dash = hyphen || shy || trimmed.ends_with(['-', '–', '—']); + justify |= text.ends_with('\u{2028}'); // Usually, we don't want to shape an empty string because: // - We don't want the height of trimmed whitespace in a different @@ -947,7 +951,7 @@ fn line<'a>( items, last, width, - mandatory, + justify, dash, } } @@ -1050,7 +1054,7 @@ fn commit( let mut justification = Length::zero(); if remaining < Length::zero() || (justify - && !line.mandatory + && line.justify && line.range.end < line.bidi.text.len() && fr.is_zero()) { diff --git a/src/library/text/raw.rs b/src/library/text/raw.rs index 80b6ef2a2..db97da075 100644 --- a/src/library/text/raw.rs +++ b/src/library/text/raw.rs @@ -84,7 +84,7 @@ impl Show for RawNode { let mut highlighter = HighlightLines::new(syntax, &THEME); for (i, line) in self.text.lines().enumerate() { if i != 0 { - seq.push(Content::Linebreak); + seq.push(Content::Linebreak(false)); } for (style, piece) in highlighter.highlight(line, &SYNTAXES) { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 92e864507..47cba1119 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -213,10 +213,11 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { // Text and markup. NodeKind::Text(_) | NodeKind::NonBreakingSpace + | NodeKind::Shy | NodeKind::EnDash | NodeKind::EmDash | NodeKind::Quote(_) - | NodeKind::Linebreak + | NodeKind::Linebreak(_) | NodeKind::Raw(_) | NodeKind::Math(_) | NodeKind::Escape(_) => { diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index a98ef2649..053a7f61e 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -264,42 +264,52 @@ impl<'s> Tokens<'s> { } fn backslash(&mut self) -> NodeKind { - match self.s.peek() { - Some(c) => match c { - // Backslash and comments. - '\\' | '/' | - // Parenthesis and hashtag. - '[' | ']' | '{' | '}' | '#' | - // Markup. - '~' | '\'' | '"' | '*' | '_' | '`' | '$' | '=' | '-' | '.' => { - self.s.eat_assert(c) ; - NodeKind::Escape(c) - } - 'u' if self.s.rest().starts_with("u{") => { - self.s.eat_assert('u'); - self.s.eat_assert('{'); - let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric()); - if self.s.eat_if('}') { - if let Some(c) = resolve_hex(sequence) { - NodeKind::Escape(c) - } else { - NodeKind::Error( - ErrorPos::Full, - "invalid unicode escape sequence".into(), - ) - } + let c = match self.s.peek() { + Some(c) => c, + None => return NodeKind::Linebreak(false), + }; + + match c { + // Backslash and comments. + '\\' | '/' | + // Parenthesis and hashtag. + '[' | ']' | '{' | '}' | '#' | + // Markup. + '~' | '\'' | '"' | '*' | '_' | '`' | '$' | '=' | '-' | '.' => { + self.s.eat_assert(c) ; + NodeKind::Escape(c) + } + 'u' if self.s.rest().starts_with("u{") => { + self.s.eat_assert('u'); + self.s.eat_assert('{'); + let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric()); + if self.s.eat_if('}') { + if let Some(c) = resolve_hex(sequence) { + NodeKind::Escape(c) } else { - self.terminated = false; NodeKind::Error( - ErrorPos::End, - "expected closing brace".into(), + ErrorPos::Full, + "invalid unicode escape sequence".into(), ) } + } else { + self.terminated = false; + NodeKind::Error( + ErrorPos::End, + "expected closing brace".into(), + ) } - c if c.is_whitespace() => NodeKind::Linebreak, - _ => NodeKind::Text('\\'.into()), - }, - None => NodeKind::Linebreak, + } + + // Linebreaks. + c if c.is_whitespace() => NodeKind::Linebreak(false), + '+' => { + self.s.eat_assert(c); + NodeKind::Linebreak(true) + } + + // Just the backslash. + _ => NodeKind::Text('\\'.into()), } } @@ -323,6 +333,8 @@ impl<'s> Tokens<'s> { } else { NodeKind::EnDash } + } else if self.s.eat_if('?') { + NodeKind::Shy } else { NodeKind::Minus } @@ -845,8 +857,10 @@ mod tests { t!(Markup: "_" => Underscore); t!(Markup[""]: "===" => Eq, Eq, Eq); t!(Markup["a1/"]: "= " => Eq, Space(0)); + t!(Markup[" "]: r"\" => Linebreak(false)); + t!(Markup[" "]: r"\+" => Linebreak(true)); t!(Markup: "~" => NonBreakingSpace); - t!(Markup[" "]: r"\" => Linebreak); + t!(Markup["a1/"]: "-?" => Shy); t!(Markup["a "]: r"a--" => Text("a"), EnDash); t!(Markup["a1/"]: "- " => Minus, Space(0)); t!(Markup[" "]: "." => EnumNumbering(None)); diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index d629b1fdc..b01eeb47a 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -62,10 +62,11 @@ impl Markup { self.0.children().filter_map(|node| match node.kind() { NodeKind::Space(2 ..) => Some(MarkupNode::Parbreak), NodeKind::Space(_) => Some(MarkupNode::Space), - NodeKind::Linebreak => Some(MarkupNode::Linebreak), + NodeKind::Linebreak(s) => Some(MarkupNode::Linebreak(*s)), NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())), NodeKind::NonBreakingSpace => Some(MarkupNode::Text('\u{00A0}'.into())), + NodeKind::Shy => Some(MarkupNode::Text('\u{00AD}'.into())), NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())), NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())), NodeKind::Quote(d) => Some(MarkupNode::Quote(*d)), @@ -86,8 +87,9 @@ impl Markup { pub enum MarkupNode { /// Whitespace containing less than two newlines. Space, - /// A forced line break: `\`. - Linebreak, + /// A forced line break. If soft (`\`, `true`), the preceding line can still + /// be justified, if hard (`\+`, `false`) not. + Linebreak(bool), /// A paragraph break: Two or more newlines. Parbreak, /// Plain text. diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 90f1c548c..34e5b4a70 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -126,8 +126,9 @@ impl Category { _ => Some(Category::Operator), }, NodeKind::EnumNumbering(_) => Some(Category::List), - NodeKind::Linebreak => Some(Category::Shortcut), + NodeKind::Linebreak(_) => Some(Category::Shortcut), NodeKind::NonBreakingSpace => Some(Category::Shortcut), + NodeKind::Shy => Some(Category::Shortcut), NodeKind::EnDash => Some(Category::Shortcut), NodeKind::EmDash => Some(Category::Shortcut), NodeKind::Escape(_) => Some(Category::Escape), diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index f0d3cdd4a..1f02217a1 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -586,12 +586,15 @@ pub enum NodeKind { Markup(usize), /// One or more whitespace characters. Space(usize), - /// A forced line break: `\`. - Linebreak, /// A consecutive non-markup string. Text(EcoString), + /// A forced line break. If soft (`\`, `true`), the preceding line can still + /// be justified, if hard (`\+`, `false`) not. + Linebreak(bool), /// A non-breaking space: `~`. NonBreakingSpace, + /// A soft hyphen: `-?`. + Shy, /// An en-dash: `--`. EnDash, /// An em-dash: `---`. @@ -766,7 +769,7 @@ impl NodeKind { pub fn only_in_mode(&self) -> Option { match self { Self::Markup(_) - | Self::Linebreak + | Self::Linebreak(_) | Self::Text(_) | Self::NonBreakingSpace | Self::EnDash @@ -859,9 +862,11 @@ impl NodeKind { Self::Markup(_) => "markup", Self::Space(2 ..) => "paragraph break", Self::Space(_) => "space", - Self::Linebreak => "forced linebreak", + Self::Linebreak(false) => "hard linebreak", + Self::Linebreak(true) => "soft linebreak", Self::Text(_) => "text", Self::NonBreakingSpace => "non-breaking space", + Self::Shy => "soft hyphen", Self::EnDash => "en dash", Self::EmDash => "em dash", Self::Quote(false) => "single quote", @@ -981,9 +986,10 @@ impl Hash for NodeKind { Self::From => {} Self::Markup(c) => c.hash(state), Self::Space(n) => n.hash(state), - Self::Linebreak => {} + Self::Linebreak(s) => s.hash(state), Self::Text(s) => s.hash(state), Self::NonBreakingSpace => {} + Self::Shy => {} Self::EnDash => {} Self::EmDash => {} Self::Quote(d) => d.hash(state), diff --git a/tests/ref/text/hyphenate.png b/tests/ref/text/hyphenate.png index 48338f586..47a8ffa54 100644 Binary files a/tests/ref/text/hyphenate.png and b/tests/ref/text/hyphenate.png differ diff --git a/tests/ref/text/linebreak.png b/tests/ref/text/linebreak.png index 1498a8453..43ac9c68a 100644 Binary files a/tests/ref/text/linebreak.png and b/tests/ref/text/linebreak.png differ diff --git a/tests/typ/text/hyphenate.typ b/tests/typ/text/hyphenate.typ index 02a332770..6bb87b13b 100644 --- a/tests/typ/text/hyphenate.typ +++ b/tests/typ/text/hyphenate.typ @@ -30,6 +30,16 @@ Welcome to wo#text(hyphenate: true)[nd]erful experiences. \ #set text(lang: "en", hyphenate: true) It's a #emph[Tree]beard. +--- +// Test shy hyphens. +#set text(lang: "de", hyphenate: true) +#grid( + columns: 2 * (20pt,), + gutter: 20pt, + [Barankauf], + [Bar-?ankauf], +) + --- // This sequence would confuse hypher if we passed trailing / leading // punctuation instead of just the words. So this tests that we don't diff --git a/tests/typ/text/linebreak.typ b/tests/typ/text/linebreak.typ index ff8559d6a..bee17c6bc 100644 --- a/tests/typ/text/linebreak.typ +++ b/tests/typ/text/linebreak.typ @@ -26,3 +26,8 @@ Two consecutive \ \ breaks and three \ \ \ more. --- // Test forcing an empty trailing line. Trailing break \ \ + +--- +// Test soft breaks. +#set par(justify: true) +With a soft \+ break you can force a break without breaking justification.