From 030041466b5b8453ca23e43a6385f4592f78a56c Mon Sep 17 00:00:00 2001 From: Myriad-Dreamin <35292584+Myriad-Dreamin@users.noreply.github.com> Date: Fri, 1 Mar 2024 17:17:41 +0800 Subject: [PATCH] Provide more fine-grained spans in raw blocks (#3257) Co-authored-by: Laurenz --- crates/typst-syntax/src/ast.rs | 100 ++++--------- crates/typst-syntax/src/highlight.rs | 3 + crates/typst-syntax/src/kind.rs | 9 ++ crates/typst-syntax/src/lexer.rs | 134 ++++++++++++++++- crates/typst-syntax/src/parser.rs | 22 ++- crates/typst-syntax/src/set.rs | 16 +- crates/typst/src/eval/markup.rs | 9 +- crates/typst/src/foundations/value.rs | 4 +- crates/typst/src/layout/inline/mod.rs | 2 +- crates/typst/src/layout/inline/shaping.rs | 9 +- crates/typst/src/text/mod.rs | 6 + crates/typst/src/text/raw.rs | 140 +++++++++++++----- tests/typ/compiler/raw.typ | 170 ++++++++++++++++++++++ 13 files changed, 500 insertions(+), 124 deletions(-) create mode 100644 tests/typ/compiler/raw.typ diff --git a/crates/typst-syntax/src/ast.rs b/crates/typst-syntax/src/ast.rs index 8f8eaac47..fc689a689 100644 --- a/crates/typst-syntax/src/ast.rs +++ b/crates/typst-syntax/src/ast.rs @@ -8,9 +8,7 @@ use std::ops::Deref; use ecow::EcoString; use unscanny::Scanner; -use crate::{ - is_id_continue, is_id_start, is_newline, split_newlines, Span, SyntaxKind, SyntaxNode, -}; +use crate::{is_newline, Span, SyntaxKind, SyntaxNode}; /// A typed AST node. pub trait AstNode<'a>: Sized { @@ -558,86 +556,50 @@ node! { } impl<'a> Raw<'a> { - /// The trimmed raw text. - pub fn text(self) -> EcoString { - let mut text = self.0.text().as_str(); - let blocky = text.starts_with("```"); - text = text.trim_matches('`'); - - // Trim tag, one space at the start, and one space at the end if the - // last non-whitespace char is a backtick. - if blocky { - let mut s = Scanner::new(text); - if s.eat_if(is_id_start) { - s.eat_while(is_id_continue); - } - text = s.after(); - text = text.strip_prefix(' ').unwrap_or(text); - if text.trim_end().ends_with('`') { - text = text.strip_suffix(' ').unwrap_or(text); - } - } - - // Split into lines. - let mut lines = split_newlines(text); - - if blocky { - let dedent = lines - .iter() - .skip(1) - .filter(|line| !line.chars().all(char::is_whitespace)) - // The line with the closing ``` is always taken into account - .chain(lines.last()) - .map(|line| line.chars().take_while(|c| c.is_whitespace()).count()) - .min() - .unwrap_or(0); - - // Dedent based on column, but not for the first line. - for line in lines.iter_mut().skip(1) { - let offset = line.chars().take(dedent).map(char::len_utf8).sum(); - *line = &line[offset..]; - } - - let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace); - - // Trims a sequence of whitespace followed by a newline at the start. - if lines.first().is_some_and(is_whitespace) { - lines.remove(0); - } - - // Trims a newline followed by a sequence of whitespace at the end. - if lines.last().is_some_and(is_whitespace) { - lines.pop(); - } - } - - lines.join("\n").into() + /// The lines in the raw block. + pub fn lines(self) -> impl DoubleEndedIterator> { + self.0.children().filter_map(SyntaxNode::cast) } /// An optional identifier specifying the language to syntax-highlight in. - pub fn lang(self) -> Option<&'a str> { - let text = self.0.text(); - + pub fn lang(self) -> Option> { // Only blocky literals are supposed to contain a language. - if !text.starts_with("```") { + let delim: RawDelim = self.0.cast_first_match()?; + if delim.0.len() < 3 { return Option::None; } - let inner = text.trim_start_matches('`'); - let mut s = Scanner::new(inner); - s.eat_if(is_id_start).then(|| { - s.eat_while(is_id_continue); - s.before() - }) + self.0.cast_first_match() } /// Whether the raw text should be displayed in a separate block. pub fn block(self) -> bool { - let text = self.0.text(); - text.starts_with("```") && text.chars().any(is_newline) + self.0 + .cast_first_match() + .is_some_and(|delim: RawDelim| delim.0.len() >= 3) + && self.0.children().any(|e| { + e.kind() == SyntaxKind::RawTrimmed && e.text().chars().any(is_newline) + }) } } +node! { + /// A language tag at the start of raw element: ``typ ``. + RawLang +} + +impl<'a> RawLang<'a> { + /// Get the language tag. + pub fn get(self) -> &'a EcoString { + self.0.text() + } +} + +node! { + /// A raw delimiter in single or 3+ backticks: `` ` ``. + RawDelim +} + node! { /// A hyperlink: `https://typst.org`. Link diff --git a/crates/typst-syntax/src/highlight.rs b/crates/typst-syntax/src/highlight.rs index 19d35d0ac..f1c8a298f 100644 --- a/crates/typst-syntax/src/highlight.rs +++ b/crates/typst-syntax/src/highlight.rs @@ -153,6 +153,9 @@ pub fn highlight(node: &LinkedNode) -> Option { SyntaxKind::Strong => Some(Tag::Strong), SyntaxKind::Emph => Some(Tag::Emph), SyntaxKind::Raw => Some(Tag::Raw), + SyntaxKind::RawLang => None, + SyntaxKind::RawTrimmed => None, + SyntaxKind::RawDelim => None, SyntaxKind::Link => Some(Tag::Link), SyntaxKind::Label => Some(Tag::Label), SyntaxKind::Ref => Some(Tag::Ref), diff --git a/crates/typst-syntax/src/kind.rs b/crates/typst-syntax/src/kind.rs index e5dd4e9bb..c34f60029 100644 --- a/crates/typst-syntax/src/kind.rs +++ b/crates/typst-syntax/src/kind.rs @@ -28,6 +28,12 @@ pub enum SyntaxKind { Emph, /// Raw text with optional syntax highlighting: `` `...` ``. Raw, + /// A language tag at the start of raw text: ``typ ``. + RawLang, + /// A raw delimiter consisting of 1 or 3+ backticks: `` ` ``. + RawDelim, + /// A sequence of whitespace to ignore in a raw block: ` `. + RawTrimmed, /// A hyperlink: `https://typst.org`. Link, /// A label: ``. @@ -369,6 +375,9 @@ impl SyntaxKind { Self::Strong => "strong content", Self::Emph => "emphasized content", Self::Raw => "raw block", + Self::RawLang => "raw language tag", + Self::RawTrimmed => "raw trimmed", + Self::RawDelim => "raw delimiter", Self::Link => "link", Self::Label => "label", Self::Ref => "reference", diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs index 300a83537..aacbee62e 100644 --- a/crates/typst-syntax/src/lexer.rs +++ b/crates/typst-syntax/src/lexer.rs @@ -16,6 +16,8 @@ pub(super) struct Lexer<'s> { mode: LexMode, /// Whether the last token contained a newline. newline: bool, + /// The state held by raw line lexing. + raw: Vec<(SyntaxKind, usize)>, /// An error for the last token. error: Option, } @@ -29,6 +31,8 @@ pub(super) enum LexMode { Math, /// Keywords, literals and operators. Code, + /// The contents of a raw block. + Raw, } impl<'s> Lexer<'s> { @@ -40,6 +44,7 @@ impl<'s> Lexer<'s> { mode, newline: false, error: None, + raw: Vec::new(), } } @@ -86,6 +91,14 @@ impl Lexer<'_> { /// Shared. impl Lexer<'_> { pub fn next(&mut self) -> SyntaxKind { + if self.mode == LexMode::Raw { + let Some((kind, end)) = self.raw.pop() else { + return SyntaxKind::Eof; + }; + self.s.jump(end); + return kind; + } + self.newline = false; self.error = None; let start = self.s.cursor(); @@ -101,6 +114,7 @@ impl Lexer<'_> { LexMode::Markup => self.markup(start, c), LexMode::Math => self.math(start, c), LexMode::Code => self.code(start, c), + LexMode::Raw => unreachable!(), }, None => SyntaxKind::Eof, @@ -224,15 +238,23 @@ impl Lexer<'_> { } fn raw(&mut self) -> SyntaxKind { + let start = self.s.cursor() - 1; + self.raw.clear(); + + // Determine number of opening backticks. let mut backticks = 1; while self.s.eat_if('`') { backticks += 1; } + // Special case for ``. if backticks == 2 { - return SyntaxKind::Raw; + self.push_raw(SyntaxKind::RawDelim); + self.s.jump(start + 1); + return SyntaxKind::RawDelim; } + // Find end of raw text. let mut found = 0; while found < backticks { match self.s.eat() { @@ -246,12 +268,99 @@ impl Lexer<'_> { return self.error("unclosed raw text"); } - SyntaxKind::Raw + let end = self.s.cursor(); + if backticks >= 3 { + self.blocky_raw(start, end, backticks); + } else { + // Single backtick needs no trimming or extra fancyness. + self.s.jump(end - backticks); + self.push_raw(SyntaxKind::Text); + self.s.jump(end); + } + + // Closing delimiter. + self.push_raw(SyntaxKind::RawDelim); + + // The saved tokens will be removed in reverse. + self.raw.reverse(); + + // Opening delimiter. + self.s.jump(start + backticks); + SyntaxKind::RawDelim + } + + fn blocky_raw(&mut self, start: usize, end: usize, backticks: usize) { + // Language tag. + self.s.jump(start + backticks); + if self.s.eat_if(is_id_start) { + self.s.eat_while(is_id_continue); + self.push_raw(SyntaxKind::RawLang); + } + + // Determine inner content between backticks and with trimmed + // single spaces (line trimming comes later). + self.s.eat_if(' '); + let mut inner = self.s.to(end - backticks); + if inner.trim_end().ends_with('`') { + inner = inner.strip_suffix(' ').unwrap_or(inner); + } + + // Determine dedent level. + let lines = split_newlines(inner); + let dedent = lines + .iter() + .skip(1) + .filter(|line| !line.chars().all(char::is_whitespace)) + // The line with the closing ``` is always taken into account + .chain(lines.last()) + .map(|line| line.chars().take_while(|c| c.is_whitespace()).count()) + .min() + .unwrap_or(0); + + let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace); + let starts_whitespace = lines.first().is_some_and(is_whitespace); + let ends_whitespace = lines.last().is_some_and(is_whitespace); + + let mut lines = lines.into_iter(); + let mut skipped = false; + + // Trim whitespace + newline at start. + if starts_whitespace { + self.s.advance(lines.next().unwrap().len()); + skipped = true; + } + // Trim whitespace + newline at end. + if ends_whitespace { + lines.next_back(); + } + + // Add lines. + for (i, line) in lines.enumerate() { + let dedent = if i == 0 && !skipped { 0 } else { dedent }; + let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum(); + self.s.eat_newline(); + self.s.advance(offset); + self.push_raw(SyntaxKind::RawTrimmed); + self.s.advance(line.len() - offset); + self.push_raw(SyntaxKind::Text); + } + + // Add final trimmed. + if self.s.cursor() < end - backticks { + self.s.jump(end - backticks); + self.push_raw(SyntaxKind::RawTrimmed); + } + self.s.jump(end); + } + + fn push_raw(&mut self, kind: SyntaxKind) { + let end = self.s.cursor(); + self.raw.push((kind, end)); } fn link(&mut self) -> SyntaxKind { let (link, balanced) = link_prefix(self.s.after()); - self.s.jump(self.s.cursor() + link.len()); + self.s.advance(link.len()); if !balanced { return self.error( @@ -632,6 +741,25 @@ fn keyword(ident: &str) -> Option { }) } +trait ScannerExt { + fn advance(&mut self, by: usize); + fn eat_newline(&mut self) -> bool; +} + +impl ScannerExt for Scanner<'_> { + fn advance(&mut self, by: usize) { + self.jump(self.cursor() + by); + } + + fn eat_newline(&mut self) -> bool { + let ate = self.eat_if(is_newline); + if ate && self.before().ends_with('\r') { + self.eat_if('\n'); + } + ate + } +} + /// Whether a character will become a Space token in Typst #[inline] fn is_space(character: char, mode: LexMode) -> bool { diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs index f4bb19e1c..4785b8a19 100644 --- a/crates/typst-syntax/src/parser.rs +++ b/crates/typst-syntax/src/parser.rs @@ -116,13 +116,13 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) { | SyntaxKind::Escape | SyntaxKind::Shorthand | SyntaxKind::SmartQuote - | SyntaxKind::Raw | SyntaxKind::Link | SyntaxKind::Label => p.eat(), SyntaxKind::Hash => embedded_code_expr(p), SyntaxKind::Star => strong(p), SyntaxKind::Underscore => emph(p), + SyntaxKind::RawDelim => raw(p), SyntaxKind::HeadingMarker if *at_start => heading(p), SyntaxKind::ListMarker if *at_start => list_item(p), SyntaxKind::EnumMarker if *at_start => enum_item(p), @@ -172,6 +172,22 @@ fn emph(p: &mut Parser) { p.wrap(m, SyntaxKind::Emph); } +/// Parses raw text with optional syntax highlighting: `` `...` ``. +fn raw(p: &mut Parser) { + let m = p.marker(); + p.enter(LexMode::Raw); + p.assert(SyntaxKind::RawDelim); + + // Eats until the closing delimiter. + while !p.eof() && !p.at(SyntaxKind::RawDelim) { + p.eat(); + } + + p.expect(SyntaxKind::RawDelim); + p.exit(); + p.wrap(m, SyntaxKind::Raw); +} + /// Parses a section heading: `= Introduction`. fn heading(p: &mut Parser) { const END: SyntaxSet = SyntaxSet::new() @@ -747,6 +763,7 @@ fn code_primary(p: &mut Parser, atomic: bool) { SyntaxKind::LeftBrace => code_block(p), SyntaxKind::LeftBracket => content_block(p), SyntaxKind::LeftParen => expr_with_paren(p, atomic), + SyntaxKind::RawDelim => raw(p), SyntaxKind::Dollar => equation(p), SyntaxKind::Let => let_binding(p), SyntaxKind::Set => set_rule(p), @@ -768,8 +785,7 @@ fn code_primary(p: &mut Parser, atomic: bool) { | SyntaxKind::Bool | SyntaxKind::Numeric | SyntaxKind::Str - | SyntaxKind::Label - | SyntaxKind::Raw => p.eat(), + | SyntaxKind::Label => p.eat(), _ => p.expected("expression"), } diff --git a/crates/typst-syntax/src/set.rs b/crates/typst-syntax/src/set.rs index 906d5fac5..39e64651b 100644 --- a/crates/typst-syntax/src/set.rs +++ b/crates/typst-syntax/src/set.rs @@ -15,7 +15,10 @@ impl SyntaxSet { } /// Insert a syntax kind into the set. + /// + /// You can only add kinds with discriminator < 128. pub const fn add(self, kind: SyntaxKind) -> Self { + assert!((kind as u8) < BITS); Self(self.0 | bit(kind)) } @@ -26,10 +29,12 @@ impl SyntaxSet { /// Whether the set contains the given syntax kind. pub const fn contains(&self, kind: SyntaxKind) -> bool { - (self.0 & bit(kind)) != 0 + (kind as u8) < BITS && (self.0 & bit(kind)) != 0 } } +const BITS: u8 = 128; + const fn bit(kind: SyntaxKind) -> u128 { 1 << (kind as usize) } @@ -54,7 +59,7 @@ pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new() .add(SyntaxKind::Escape) .add(SyntaxKind::Shorthand) .add(SyntaxKind::SmartQuote) - .add(SyntaxKind::Raw) + .add(SyntaxKind::RawDelim) .add(SyntaxKind::Link) .add(SyntaxKind::Label) .add(SyntaxKind::Hash) @@ -119,7 +124,7 @@ pub const ATOMIC_CODE_PRIMARY: SyntaxSet = SyntaxSet::new() .add(SyntaxKind::Numeric) .add(SyntaxKind::Str) .add(SyntaxKind::Label) - .add(SyntaxKind::Raw); + .add(SyntaxKind::RawDelim); /// Syntax kinds that are unary operators. pub const UNARY_OP: SyntaxSet = SyntaxSet::new() @@ -171,11 +176,6 @@ pub const PATTERN_LEAF: SyntaxSet = ATOMIC_CODE_EXPR; mod tests { use super::*; - #[test] - fn test_size() { - assert!((SyntaxKind::Eof as usize) < 128); - } - #[test] fn test_set() { let set = SyntaxSet::new().add(SyntaxKind::And).add(SyntaxKind::Or); diff --git a/crates/typst/src/eval/markup.rs b/crates/typst/src/eval/markup.rs index 1bb12d493..d43e44956 100644 --- a/crates/typst/src/eval/markup.rs +++ b/crates/typst/src/eval/markup.rs @@ -8,7 +8,9 @@ use crate::model::{ }; use crate::symbols::Symbol; use crate::syntax::ast::{self, AstNode}; -use crate::text::{LinebreakElem, RawElem, SmartQuoteElem, SpaceElem, TextElem}; +use crate::text::{ + LinebreakElem, RawContent, RawElem, SmartQuoteElem, SpaceElem, TextElem, +}; impl Eval for ast::Markup<'_> { type Output = Content; @@ -165,9 +167,10 @@ impl Eval for ast::Raw<'_> { type Output = Content; fn eval(self, _: &mut Vm) -> SourceResult { - let mut elem = RawElem::new(self.text()).with_block(self.block()); + let lines = self.lines().map(|line| (line.get().clone(), line.span())).collect(); + let mut elem = RawElem::new(RawContent::Lines(lines)).with_block(self.block()); if let Some(lang) = self.lang() { - elem.push_lang(Some(lang.into())); + elem.push_lang(Some(lang.get().clone())); } Ok(elem.pack()) } diff --git a/crates/typst/src/foundations/value.rs b/crates/typst/src/foundations/value.rs index b5f143d26..f661228af 100644 --- a/crates/typst/src/foundations/value.rs +++ b/crates/typst/src/foundations/value.rs @@ -19,7 +19,7 @@ use crate::foundations::{ use crate::layout::{Abs, Angle, Em, Fr, Length, Ratio, Rel}; use crate::symbols::Symbol; use crate::syntax::{ast, Span}; -use crate::text::{RawElem, TextElem}; +use crate::text::{RawContent, RawElem, TextElem}; use crate::util::ArcExt; use crate::visualize::{Color, Gradient, Pattern}; @@ -209,7 +209,7 @@ impl Value { Self::Symbol(v) => TextElem::packed(v.get()), Self::Content(v) => v, Self::Module(module) => module.content(), - _ => RawElem::new(self.repr()) + _ => RawElem::new(RawContent::Text(self.repr())) .with_lang(Some("typc".into())) .with_block(false) .pack(), diff --git a/crates/typst/src/layout/inline/mod.rs b/crates/typst/src/layout/inline/mod.rs index 2802bbcb2..6add43101 100644 --- a/crates/typst/src/layout/inline/mod.rs +++ b/crates/typst/src/layout/inline/mod.rs @@ -287,7 +287,7 @@ impl SpanMapper { fn span_at(&self, offset: usize) -> (Span, u16) { let mut cursor = 0; for &(len, span) in &self.0 { - if (cursor..=cursor + len).contains(&offset) { + if (cursor..cursor + len).contains(&offset) { return (span, u16::try_from(offset - cursor).unwrap_or(0)); } cursor += len; diff --git a/crates/typst/src/layout/inline/shaping.rs b/crates/typst/src/layout/inline/shaping.rs index b558d5adf..f914d347a 100644 --- a/crates/typst/src/layout/inline/shaping.rs +++ b/crates/typst/src/layout/inline/shaping.rs @@ -231,6 +231,7 @@ impl<'a> ShapedText<'a> { let decos = TextElem::deco_in(self.styles); let fill = TextElem::fill_in(self.styles); let stroke = TextElem::stroke_in(self.styles); + let span_offset = TextElem::span_offset_in(self.styles); for ((font, y_offset), group) in self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset)) @@ -267,6 +268,12 @@ impl<'a> ShapedText<'a> { frame.size_mut().x += justification_left.at(self.size) + justification_right.at(self.size); + // We may not be able to reach the offset completely if + // it exceeds u16, but better to have a roughly correct + // span offset than nothing. + let mut span = shaped.span; + span.1 = span.1.saturating_add(span_offset.saturating_as()); + // |<---- a Glyph ---->| // -->|ShapedGlyph|<-- // +---+-----------+---+ @@ -293,7 +300,7 @@ impl<'a> ShapedText<'a> { x_offset: shaped.x_offset + justification_left, range: (shaped.range.start - range.start).saturating_as() ..(shaped.range.end - range.start).saturating_as(), - span: shaped.span, + span, } }) .collect(); diff --git a/crates/typst/src/text/mod.rs b/crates/typst/src/text/mod.rs index 13049b12b..13193fe83 100644 --- a/crates/typst/src/text/mod.rs +++ b/crates/typst/src/text/mod.rs @@ -622,6 +622,12 @@ pub struct TextElem { #[required] pub text: EcoString, + /// The offset of the text in the text syntax node referenced by this + /// element's span. + #[internal] + #[ghost] + pub span_offset: usize, + /// A delta to apply on the font weight. #[internal] #[fold] diff --git a/crates/typst/src/text/raw.rs b/crates/typst/src/text/raw.rs index c71b16b1b..d47cd9475 100644 --- a/crates/typst/src/text/raw.rs +++ b/crates/typst/src/text/raw.rs @@ -17,7 +17,7 @@ use crate::foundations::{ }; use crate::layout::{BlockElem, Em, HAlignment}; use crate::model::Figurable; -use crate::syntax::{split_newlines, LinkedNode, Spanned}; +use crate::syntax::{split_newlines, LinkedNode, Span, Spanned}; use crate::text::{ FontFamily, FontList, Hyphenate, Lang, LinebreakElem, LocalName, Region, SmartQuoteElem, TextElem, TextSize, @@ -27,8 +27,9 @@ use crate::visualize::Color; use crate::{syntax, World}; // Shorthand for highlighter closures. -type StyleFn<'a> = &'a mut dyn FnMut(&LinkedNode, Range, synt::Style) -> Content; -type LineFn<'a> = &'a mut dyn FnMut(i64, Range, &mut Vec); +type StyleFn<'a> = + &'a mut dyn FnMut(usize, &LinkedNode, Range, synt::Style) -> Content; +type LineFn<'a> = &'a mut dyn FnMut(usize, Range, &mut Vec); /// Raw text with optional syntax highlighting. /// @@ -101,7 +102,7 @@ pub struct RawElem { /// ``` /// ```` #[required] - pub text: EcoString, + pub text: RawContent, /// Whether the raw text is displayed as a separate block. /// @@ -300,17 +301,24 @@ impl Packed { #[comemo::memoize] fn highlight(&self, styles: StyleChain) -> Vec> { let elem = self.as_ref(); - let span = self.span(); - let mut text = elem.text().clone(); - if text.contains('\t') { - let tab_size = RawElem::tab_size_in(styles); - text = align_tabs(&text, tab_size); - } + let text = elem.text(); + let lines = match text { + RawContent::Lines(lines) if !lines.iter().any(|(s, _)| s.contains('\t')) => { + lines.clone() + } + _ => { + let mut text = text.get(); + if text.contains('\t') { + let tab_size = RawElem::tab_size_in(styles); + text = align_tabs(&text, tab_size); + } + let lines = split_newlines(&text); + lines.into_iter().map(|line| (line.into(), self.span())).collect() + } + }; - let lines = split_newlines(&text); let count = lines.len() as i64; - let lang = elem .lang(styles) .as_ref() @@ -332,6 +340,7 @@ impl Packed { let mut seq = vec![]; if matches!(lang.as_deref(), Some("typ" | "typst" | "typc")) { + let text = text.get(); let root = match lang.as_deref() { Some("typc") => syntax::parse_code(&text), _ => syntax::parse(&text), @@ -341,16 +350,23 @@ impl Packed { &text, LinkedNode::new(&root), synt::Highlighter::new(theme), - &mut |_, range, style| styled(&text[range], foreground, style), + &mut |i, _, range, style| { + // Find start of line. + // Note: Dedent is already applied to the text + let span_offset = text[..range.start] + .rfind('\n') + .map_or(0, |i| range.start - (i + 1)); + styled(&text[range], foreground, style, lines[i].1, span_offset) + }, &mut |i, range, line| { seq.push( Packed::new(RawLine::new( - i + 1, + (i + 1) as i64, count, EcoString::from(&text[range]), Content::sequence(line.drain(..)), )) - .spanned(span), + .spanned(lines[i].1), ); }, ) @@ -366,33 +382,43 @@ impl Packed { }) }) { let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme); - for (i, line) in lines.into_iter().enumerate() { + for (i, (line, line_span)) in lines.into_iter().enumerate() { let mut line_content = vec![]; - for (style, piece) in - highlighter.highlight_line(line, syntax_set).into_iter().flatten() + let mut span_offset = 0; + for (style, piece) in highlighter + .highlight_line(line.as_str(), syntax_set) + .into_iter() + .flatten() { - line_content.push(styled(piece, foreground, style)); + line_content.push(styled( + piece, + foreground, + style, + line_span, + span_offset, + )); + span_offset += piece.len(); } seq.push( Packed::new(RawLine::new( i as i64 + 1, count, - EcoString::from(line), + line, Content::sequence(line_content), )) - .spanned(span), + .spanned(line_span), ); } } else { - seq.extend(lines.into_iter().enumerate().map(|(i, line)| { + seq.extend(lines.into_iter().enumerate().map(|(i, (line, line_span))| { Packed::new(RawLine::new( i as i64 + 1, count, - EcoString::from(line), - TextElem::packed(line), + line.clone(), + TextElem::packed(line).spanned(line_span), )) - .spanned(span) + .spanned(line_span) })); }; @@ -478,10 +504,42 @@ impl Figurable for Packed {} impl PlainText for Packed { fn plain_text(&self, text: &mut EcoString) { - text.push_str(self.text()); + text.push_str(&self.text().get()); } } +/// The content of the raw text. +#[derive(Debug, Clone, Hash, PartialEq)] +pub enum RawContent { + /// From a string. + Text(EcoString), + /// From lines of text. + Lines(EcoVec<(EcoString, Span)>), +} + +impl RawContent { + /// Returns or synthesizes the text content of the raw text. + fn get(&self) -> EcoString { + match self.clone() { + RawContent::Text(text) => text, + RawContent::Lines(lines) => { + let mut lines = lines.into_iter().map(|(s, _)| s); + if lines.len() <= 1 { + lines.next().unwrap_or_default() + } else { + lines.collect::>().join("\n").into() + } + } + } + } +} + +cast! { + RawContent, + self => self.get().into_value(), + v: EcoString => Self::Text(v), +} + /// A highlighted line of raw text. /// /// This is a helper element that is synthesized by [`raw`]($raw) elements. @@ -536,7 +594,7 @@ struct ThemedHighlighter<'a> { /// The range of the current line. range: Range, /// The current line number. - line: i64, + line: usize, /// The function to style a piece of text. style_fn: StyleFn<'a>, /// The function to append a line. @@ -597,8 +655,12 @@ impl<'a> ThemedHighlighter<'a> { let offset = self.node.range().start + len; let token_range = offset..(offset + line.len()); - self.current_line - .push((self.style_fn)(&self.node, token_range, style)); + self.current_line.push((self.style_fn)( + self.line, + &self.node, + token_range, + style, + )); len += line.len() + 1; } @@ -621,23 +683,33 @@ impl<'a> ThemedHighlighter<'a> { } /// Style a piece of text with a syntect style. -fn styled(piece: &str, foreground: synt::Color, style: synt::Style) -> Content { - let mut body = TextElem::packed(piece); +fn styled( + piece: &str, + foreground: synt::Color, + style: synt::Style, + span: Span, + span_offset: usize, +) -> Content { + let mut body = TextElem::packed(piece).spanned(span); + + if span_offset > 0 { + body = body.styled(TextElem::set_span_offset(span_offset)); + } if style.foreground != foreground { body = body.styled(TextElem::set_fill(to_typst(style.foreground).into())); } if style.font_style.contains(synt::FontStyle::BOLD) { - body = body.strong(); + body = body.strong().spanned(span); } if style.font_style.contains(synt::FontStyle::ITALIC) { - body = body.emph(); + body = body.emph().spanned(span); } if style.font_style.contains(synt::FontStyle::UNDERLINE) { - body = body.underlined(); + body = body.underlined().spanned(span); } body diff --git a/tests/typ/compiler/raw.typ b/tests/typ/compiler/raw.typ new file mode 100644 index 000000000..3084146da --- /dev/null +++ b/tests/typ/compiler/raw.typ @@ -0,0 +1,170 @@ +// Test new raw parser +// Ref: false + +--- +#let empty = ( + name: "empty", + input: ``, + text: "", +) + +#let backtick = ( + name: "backtick", + input: ``` ` ```, + text: "`", + block: false, +) + +#let lang-backtick = ( + name: "lang-backtick", + input: ```js ` ```, + lang: "js", + text: "`", + block: false, +) + +// The language tag stops on space +#let lang-space = ( + name: "lang-space", + input: ```js test ```, + lang: "js", + text: "test ", + block: false, +) + +// The language tag stops on newline +#let lang-newline = ( + name: "lang-newline", + input: ```js +test +```, + lang: "js", + text: "test", + block: true, +) + +// The first line and the last line are ignored +#let blocky = ( + name: "blocky", + input: { +``` +test +``` +}, + text: "test", + block: true, +) + +// A blocky raw should handle dedents +#let blocky-dedent = ( + name: "blocky-dedent", + input: { +``` + test + ``` + }, + text: "test", + block: true, +) + +// When there is content in the first line, it should exactly eat a whitespace char. +#let blocky-dedent-firstline = ( + name: "blocky-dedent-firstline", + input: ``` test + ```, + text: "test", + block: true, +) + +// When there is content in the first line, it should exactly eat a whitespace char. +#let blocky-dedent-firstline2 = ( + name: "blocky-dedent-firstline2", + input: ``` test +```, + text: "test", + block: true, +) + +// The first line is not affected by dedent, and the middle lines don't consider the whitespace prefix of the first line. +#let blocky-dedent-firstline3 = ( + name: "blocky-dedent-firstline3", + input: ``` test + test2 + ```, + text: "test\n test2", + block: true, +) + +// The first line is not affected by dedent, and the middle lines don't consider the whitespace prefix of the first line. +#let blocky-dedent-firstline4 = ( + name: "blocky-dedent-firstline4", + input: ``` test + test2 + ```, + text: " test\ntest2", + block: true, +) + +#let blocky-dedent-lastline = ( + name: "blocky-dedent-lastline", + input: ``` + test + ```, + text: " test", + block: true, +) + +#let blocky-dedent-lastline2 = ( + name: "blocky-dedent-lastline2", + input: ``` + test + ```, + text: "test", + block: true, +) + +#let blocky-tab = ( + name: "blocky-tab", + input: { +``` + test +``` +}, + text: "\ttest", + block: true, +) + +#let blocky-tab-dedent = ( + name: "blocky-tab-dedent", + input: { +``` + test + + ``` +}, + text: "test\n ", + block: true, +) + +#let cases = ( + empty, + backtick, + lang-backtick, + lang-space, + lang-newline, + blocky, + blocky-dedent, + blocky-dedent-firstline, + blocky-dedent-firstline2, + blocky-dedent-firstline3, + blocky-dedent-lastline, + blocky-dedent-lastline2, + blocky-tab, + blocky-tab-dedent, +) + +#for c in cases { + assert.eq(c.text, c.input.text, message: "in point " + c.name + ", expect " + repr(c.text) + ", got " + repr(c.input.text) + "") + let block = c.at("block", default: false) + assert.eq(block, c.input.block, message: "in point " + c.name + ", expect " + repr(block) + ", got " + repr(c.input.block) + "") +}