Soft breaks and shy hyphens
This commit is contained in:
parent
2279c26543
commit
67e9313b91
@ -39,8 +39,9 @@ use crate::util::EcoString;
|
||||
pub enum Content {
|
||||
/// A word space.
|
||||
Space,
|
||||
/// A line break.
|
||||
Linebreak,
|
||||
/// A forced line break. If soft (`true`), the preceding line can still be
|
||||
/// justified, if hard (`false`) not.
|
||||
Linebreak(bool),
|
||||
/// Horizontal spacing.
|
||||
Horizontal(Spacing),
|
||||
/// Plain text.
|
||||
@ -213,10 +214,10 @@ impl Debug for Content {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::Space => f.pad("Space"),
|
||||
Self::Linebreak => f.pad("Linebreak"),
|
||||
Self::Linebreak(soft) => write!(f, "Linebreak({soft})"),
|
||||
Self::Horizontal(kind) => write!(f, "Horizontal({kind:?})"),
|
||||
Self::Text(text) => write!(f, "Text({text:?})"),
|
||||
Self::Quote(double) => write!(f, "Quote({double:?})"),
|
||||
Self::Quote(double) => write!(f, "Quote({double})"),
|
||||
Self::Inline(node) => {
|
||||
f.write_str("Inline(")?;
|
||||
node.fmt(f)?;
|
||||
@ -376,8 +377,9 @@ impl<'a> Builder<'a> {
|
||||
Content::Space => {
|
||||
self.par.weak(ParChild::Text(' '.into()), 0, styles);
|
||||
}
|
||||
Content::Linebreak => {
|
||||
self.par.destructive(ParChild::Text('\n'.into()), styles);
|
||||
Content::Linebreak(soft) => {
|
||||
let c = if *soft { '\u{2028}' } else { '\n' };
|
||||
self.par.destructive(ParChild::Text(c.into()), styles);
|
||||
}
|
||||
Content::Horizontal(kind) => {
|
||||
let child = ParChild::Spacing(*kind);
|
||||
|
@ -108,8 +108,8 @@ impl Eval for MarkupNode {
|
||||
fn eval(&self, ctx: &mut Context, scp: &mut Scopes) -> EvalResult<Self::Output> {
|
||||
Ok(match self {
|
||||
Self::Space => Content::Space,
|
||||
Self::Linebreak => Content::Linebreak,
|
||||
Self::Parbreak => Content::Parbreak,
|
||||
Self::Linebreak(soft) => Content::Linebreak(*soft),
|
||||
Self::Text(text) => Content::Text(text.clone()),
|
||||
Self::Quote(double) => Content::Quote(*double),
|
||||
Self::Strong(strong) => strong.eval(ctx, scp)?,
|
||||
|
@ -167,8 +167,9 @@ pub struct LinebreakNode;
|
||||
|
||||
#[node]
|
||||
impl LinebreakNode {
|
||||
fn construct(_: &mut Context, _: &mut Args) -> TypResult<Content> {
|
||||
Ok(Content::Linebreak)
|
||||
fn construct(_: &mut Context, args: &mut Args) -> TypResult<Content> {
|
||||
let soft = args.named("soft")?.unwrap_or(false);
|
||||
Ok(Content::Linebreak(soft))
|
||||
}
|
||||
}
|
||||
|
||||
@ -315,8 +316,8 @@ struct Line<'a> {
|
||||
last: Option<Item<'a>>,
|
||||
/// The width of the line.
|
||||
width: Length,
|
||||
/// Whether the line ends at a mandatory break.
|
||||
mandatory: bool,
|
||||
/// Whether the line is allowed to be justified.
|
||||
justify: bool,
|
||||
/// Whether the line ends with a hyphen or dash, either naturally or through
|
||||
/// hyphenation.
|
||||
dash: bool,
|
||||
@ -856,7 +857,7 @@ fn line<'a>(
|
||||
items: &[],
|
||||
last: None,
|
||||
width: Length::zero(),
|
||||
mandatory,
|
||||
justify: !mandatory,
|
||||
dash: false,
|
||||
};
|
||||
}
|
||||
@ -879,15 +880,18 @@ fn line<'a>(
|
||||
// Reshape the last item if it's split in half.
|
||||
let mut last = None;
|
||||
let mut dash = false;
|
||||
let mut justify = !mandatory;
|
||||
if let Some((Item::Text(shaped), before)) = items.split_last() {
|
||||
// Compute the range we want to shape, trimming whitespace at the
|
||||
// end of the line.
|
||||
let base = last_offset;
|
||||
let start = range.start.max(last_offset);
|
||||
let end = range.end;
|
||||
let trimmed = p.bidi.text[start .. end].trim_end();
|
||||
let text = &p.bidi.text[start .. end];
|
||||
let trimmed = text.trim_end();
|
||||
let shy = trimmed.ends_with('\u{ad}');
|
||||
dash = hyphen || shy || trimmed.ends_with(['-', '–', '—']);
|
||||
justify |= text.ends_with('\u{2028}');
|
||||
|
||||
// Usually, we don't want to shape an empty string because:
|
||||
// - We don't want the height of trimmed whitespace in a different
|
||||
@ -947,7 +951,7 @@ fn line<'a>(
|
||||
items,
|
||||
last,
|
||||
width,
|
||||
mandatory,
|
||||
justify,
|
||||
dash,
|
||||
}
|
||||
}
|
||||
@ -1050,7 +1054,7 @@ fn commit(
|
||||
let mut justification = Length::zero();
|
||||
if remaining < Length::zero()
|
||||
|| (justify
|
||||
&& !line.mandatory
|
||||
&& line.justify
|
||||
&& line.range.end < line.bidi.text.len()
|
||||
&& fr.is_zero())
|
||||
{
|
||||
|
@ -84,7 +84,7 @@ impl Show for RawNode {
|
||||
let mut highlighter = HighlightLines::new(syntax, &THEME);
|
||||
for (i, line) in self.text.lines().enumerate() {
|
||||
if i != 0 {
|
||||
seq.push(Content::Linebreak);
|
||||
seq.push(Content::Linebreak(false));
|
||||
}
|
||||
|
||||
for (style, piece) in highlighter.highlight(line, &SYNTAXES) {
|
||||
|
@ -213,10 +213,11 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
|
||||
// Text and markup.
|
||||
NodeKind::Text(_)
|
||||
| NodeKind::NonBreakingSpace
|
||||
| NodeKind::Shy
|
||||
| NodeKind::EnDash
|
||||
| NodeKind::EmDash
|
||||
| NodeKind::Quote(_)
|
||||
| NodeKind::Linebreak
|
||||
| NodeKind::Linebreak(_)
|
||||
| NodeKind::Raw(_)
|
||||
| NodeKind::Math(_)
|
||||
| NodeKind::Escape(_) => {
|
||||
|
@ -264,42 +264,52 @@ impl<'s> Tokens<'s> {
|
||||
}
|
||||
|
||||
fn backslash(&mut self) -> NodeKind {
|
||||
match self.s.peek() {
|
||||
Some(c) => match c {
|
||||
// Backslash and comments.
|
||||
'\\' | '/' |
|
||||
// Parenthesis and hashtag.
|
||||
'[' | ']' | '{' | '}' | '#' |
|
||||
// Markup.
|
||||
'~' | '\'' | '"' | '*' | '_' | '`' | '$' | '=' | '-' | '.' => {
|
||||
self.s.eat_assert(c) ;
|
||||
NodeKind::Escape(c)
|
||||
}
|
||||
'u' if self.s.rest().starts_with("u{") => {
|
||||
self.s.eat_assert('u');
|
||||
self.s.eat_assert('{');
|
||||
let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric());
|
||||
if self.s.eat_if('}') {
|
||||
if let Some(c) = resolve_hex(sequence) {
|
||||
NodeKind::Escape(c)
|
||||
} else {
|
||||
NodeKind::Error(
|
||||
ErrorPos::Full,
|
||||
"invalid unicode escape sequence".into(),
|
||||
)
|
||||
}
|
||||
let c = match self.s.peek() {
|
||||
Some(c) => c,
|
||||
None => return NodeKind::Linebreak(false),
|
||||
};
|
||||
|
||||
match c {
|
||||
// Backslash and comments.
|
||||
'\\' | '/' |
|
||||
// Parenthesis and hashtag.
|
||||
'[' | ']' | '{' | '}' | '#' |
|
||||
// Markup.
|
||||
'~' | '\'' | '"' | '*' | '_' | '`' | '$' | '=' | '-' | '.' => {
|
||||
self.s.eat_assert(c) ;
|
||||
NodeKind::Escape(c)
|
||||
}
|
||||
'u' if self.s.rest().starts_with("u{") => {
|
||||
self.s.eat_assert('u');
|
||||
self.s.eat_assert('{');
|
||||
let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric());
|
||||
if self.s.eat_if('}') {
|
||||
if let Some(c) = resolve_hex(sequence) {
|
||||
NodeKind::Escape(c)
|
||||
} else {
|
||||
self.terminated = false;
|
||||
NodeKind::Error(
|
||||
ErrorPos::End,
|
||||
"expected closing brace".into(),
|
||||
ErrorPos::Full,
|
||||
"invalid unicode escape sequence".into(),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
self.terminated = false;
|
||||
NodeKind::Error(
|
||||
ErrorPos::End,
|
||||
"expected closing brace".into(),
|
||||
)
|
||||
}
|
||||
c if c.is_whitespace() => NodeKind::Linebreak,
|
||||
_ => NodeKind::Text('\\'.into()),
|
||||
},
|
||||
None => NodeKind::Linebreak,
|
||||
}
|
||||
|
||||
// Linebreaks.
|
||||
c if c.is_whitespace() => NodeKind::Linebreak(false),
|
||||
'+' => {
|
||||
self.s.eat_assert(c);
|
||||
NodeKind::Linebreak(true)
|
||||
}
|
||||
|
||||
// Just the backslash.
|
||||
_ => NodeKind::Text('\\'.into()),
|
||||
}
|
||||
}
|
||||
|
||||
@ -323,6 +333,8 @@ impl<'s> Tokens<'s> {
|
||||
} else {
|
||||
NodeKind::EnDash
|
||||
}
|
||||
} else if self.s.eat_if('?') {
|
||||
NodeKind::Shy
|
||||
} else {
|
||||
NodeKind::Minus
|
||||
}
|
||||
@ -845,8 +857,10 @@ mod tests {
|
||||
t!(Markup: "_" => Underscore);
|
||||
t!(Markup[""]: "===" => Eq, Eq, Eq);
|
||||
t!(Markup["a1/"]: "= " => Eq, Space(0));
|
||||
t!(Markup[" "]: r"\" => Linebreak(false));
|
||||
t!(Markup[" "]: r"\+" => Linebreak(true));
|
||||
t!(Markup: "~" => NonBreakingSpace);
|
||||
t!(Markup[" "]: r"\" => Linebreak);
|
||||
t!(Markup["a1/"]: "-?" => Shy);
|
||||
t!(Markup["a "]: r"a--" => Text("a"), EnDash);
|
||||
t!(Markup["a1/"]: "- " => Minus, Space(0));
|
||||
t!(Markup[" "]: "." => EnumNumbering(None));
|
||||
|
@ -62,10 +62,11 @@ impl Markup {
|
||||
self.0.children().filter_map(|node| match node.kind() {
|
||||
NodeKind::Space(2 ..) => Some(MarkupNode::Parbreak),
|
||||
NodeKind::Space(_) => Some(MarkupNode::Space),
|
||||
NodeKind::Linebreak => Some(MarkupNode::Linebreak),
|
||||
NodeKind::Linebreak(s) => Some(MarkupNode::Linebreak(*s)),
|
||||
NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())),
|
||||
NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())),
|
||||
NodeKind::NonBreakingSpace => Some(MarkupNode::Text('\u{00A0}'.into())),
|
||||
NodeKind::Shy => Some(MarkupNode::Text('\u{00AD}'.into())),
|
||||
NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())),
|
||||
NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())),
|
||||
NodeKind::Quote(d) => Some(MarkupNode::Quote(*d)),
|
||||
@ -86,8 +87,9 @@ impl Markup {
|
||||
pub enum MarkupNode {
|
||||
/// Whitespace containing less than two newlines.
|
||||
Space,
|
||||
/// A forced line break: `\`.
|
||||
Linebreak,
|
||||
/// A forced line break. If soft (`\`, `true`), the preceding line can still
|
||||
/// be justified, if hard (`\+`, `false`) not.
|
||||
Linebreak(bool),
|
||||
/// A paragraph break: Two or more newlines.
|
||||
Parbreak,
|
||||
/// Plain text.
|
||||
|
@ -126,8 +126,9 @@ impl Category {
|
||||
_ => Some(Category::Operator),
|
||||
},
|
||||
NodeKind::EnumNumbering(_) => Some(Category::List),
|
||||
NodeKind::Linebreak => Some(Category::Shortcut),
|
||||
NodeKind::Linebreak(_) => Some(Category::Shortcut),
|
||||
NodeKind::NonBreakingSpace => Some(Category::Shortcut),
|
||||
NodeKind::Shy => Some(Category::Shortcut),
|
||||
NodeKind::EnDash => Some(Category::Shortcut),
|
||||
NodeKind::EmDash => Some(Category::Shortcut),
|
||||
NodeKind::Escape(_) => Some(Category::Escape),
|
||||
|
@ -586,12 +586,15 @@ pub enum NodeKind {
|
||||
Markup(usize),
|
||||
/// One or more whitespace characters.
|
||||
Space(usize),
|
||||
/// A forced line break: `\`.
|
||||
Linebreak,
|
||||
/// A consecutive non-markup string.
|
||||
Text(EcoString),
|
||||
/// A forced line break. If soft (`\`, `true`), the preceding line can still
|
||||
/// be justified, if hard (`\+`, `false`) not.
|
||||
Linebreak(bool),
|
||||
/// A non-breaking space: `~`.
|
||||
NonBreakingSpace,
|
||||
/// A soft hyphen: `-?`.
|
||||
Shy,
|
||||
/// An en-dash: `--`.
|
||||
EnDash,
|
||||
/// An em-dash: `---`.
|
||||
@ -766,7 +769,7 @@ impl NodeKind {
|
||||
pub fn only_in_mode(&self) -> Option<TokenMode> {
|
||||
match self {
|
||||
Self::Markup(_)
|
||||
| Self::Linebreak
|
||||
| Self::Linebreak(_)
|
||||
| Self::Text(_)
|
||||
| Self::NonBreakingSpace
|
||||
| Self::EnDash
|
||||
@ -859,9 +862,11 @@ impl NodeKind {
|
||||
Self::Markup(_) => "markup",
|
||||
Self::Space(2 ..) => "paragraph break",
|
||||
Self::Space(_) => "space",
|
||||
Self::Linebreak => "forced linebreak",
|
||||
Self::Linebreak(false) => "hard linebreak",
|
||||
Self::Linebreak(true) => "soft linebreak",
|
||||
Self::Text(_) => "text",
|
||||
Self::NonBreakingSpace => "non-breaking space",
|
||||
Self::Shy => "soft hyphen",
|
||||
Self::EnDash => "en dash",
|
||||
Self::EmDash => "em dash",
|
||||
Self::Quote(false) => "single quote",
|
||||
@ -981,9 +986,10 @@ impl Hash for NodeKind {
|
||||
Self::From => {}
|
||||
Self::Markup(c) => c.hash(state),
|
||||
Self::Space(n) => n.hash(state),
|
||||
Self::Linebreak => {}
|
||||
Self::Linebreak(s) => s.hash(state),
|
||||
Self::Text(s) => s.hash(state),
|
||||
Self::NonBreakingSpace => {}
|
||||
Self::Shy => {}
|
||||
Self::EnDash => {}
|
||||
Self::EmDash => {}
|
||||
Self::Quote(d) => d.hash(state),
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 22 KiB |
Binary file not shown.
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 18 KiB |
@ -30,6 +30,16 @@ Welcome to wo#text(hyphenate: true)[nd]erful experiences. \
|
||||
#set text(lang: "en", hyphenate: true)
|
||||
It's a #emph[Tree]beard.
|
||||
|
||||
---
|
||||
// Test shy hyphens.
|
||||
#set text(lang: "de", hyphenate: true)
|
||||
#grid(
|
||||
columns: 2 * (20pt,),
|
||||
gutter: 20pt,
|
||||
[Barankauf],
|
||||
[Bar-?ankauf],
|
||||
)
|
||||
|
||||
---
|
||||
// This sequence would confuse hypher if we passed trailing / leading
|
||||
// punctuation instead of just the words. So this tests that we don't
|
||||
|
@ -26,3 +26,8 @@ Two consecutive \ \ breaks and three \ \ \ more.
|
||||
---
|
||||
// Test forcing an empty trailing line.
|
||||
Trailing break \ \
|
||||
|
||||
---
|
||||
// Test soft breaks.
|
||||
#set par(justify: true)
|
||||
With a soft \+ break you can force a break without breaking justification.
|
||||
|
Loading…
x
Reference in New Issue
Block a user