Provide more fine-grained spans in raw blocks (#3257)

Co-authored-by: Laurenz <laurmaedje@gmail.com>
2024-03-01 17:17:41 +08:00 · 2024-03-01 17:17:41 +08:00 · 030041466b
commit 030041466b
parent 57ab6d0924
13 changed files with 500 additions and 124 deletions
--- a/crates/typst-syntax/src/ast.rs
+++ b/crates/typst-syntax/src/ast.rs
@ -8,9 +8,7 @@ use std::ops::Deref;
 use ecow::EcoString;
 use unscanny::Scanner;

-use crate::{
-    is_id_continue, is_id_start, is_newline, split_newlines, Span, SyntaxKind, SyntaxNode,
-};
+use crate::{is_newline, Span, SyntaxKind, SyntaxNode};

 /// A typed AST node.
 pub trait AstNode<'a>: Sized {
@ -558,86 +556,50 @@ node! {
 }

 impl<'a> Raw<'a> {
-    /// The trimmed raw text.
-    pub fn text(self) -> EcoString {
-        let mut text = self.0.text().as_str();
-        let blocky = text.starts_with("```");
-        text = text.trim_matches('`');
-
-        // Trim tag, one space at the start, and one space at the end if the
-        // last non-whitespace char is a backtick.
-        if blocky {
-            let mut s = Scanner::new(text);
-            if s.eat_if(is_id_start) {
-                s.eat_while(is_id_continue);
-            }
-            text = s.after();
-            text = text.strip_prefix(' ').unwrap_or(text);
-            if text.trim_end().ends_with('`') {
-                text = text.strip_suffix(' ').unwrap_or(text);
-            }
-        }
-
-        // Split into lines.
-        let mut lines = split_newlines(text);
-
-        if blocky {
-            let dedent = lines
-                .iter()
-                .skip(1)
-                .filter(|line| !line.chars().all(char::is_whitespace))
-                // The line with the closing ``` is always taken into account
-                .chain(lines.last())
-                .map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
-                .min()
-                .unwrap_or(0);
-
-            // Dedent based on column, but not for the first line.
-            for line in lines.iter_mut().skip(1) {
-                let offset = line.chars().take(dedent).map(char::len_utf8).sum();
-                *line = &line[offset..];
-            }
-
-            let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
-
-            // Trims a sequence of whitespace followed by a newline at the start.
-            if lines.first().is_some_and(is_whitespace) {
-                lines.remove(0);
-            }
-
-            // Trims a newline followed by a sequence of whitespace at the end.
-            if lines.last().is_some_and(is_whitespace) {
-                lines.pop();
-            }
-        }
-
-        lines.join("\n").into()
+    /// The lines in the raw block.
+    pub fn lines(self) -> impl DoubleEndedIterator<Item = Text<'a>> {
+        self.0.children().filter_map(SyntaxNode::cast)
    }

    /// An optional identifier specifying the language to syntax-highlight in.
-    pub fn lang(self) -> Option<&'a str> {
-        let text = self.0.text();
-
+    pub fn lang(self) -> Option<RawLang<'a>> {
        // Only blocky literals are supposed to contain a language.
-        if !text.starts_with("```") {
+        let delim: RawDelim = self.0.cast_first_match()?;
+        if delim.0.len() < 3 {
            return Option::None;
        }

-        let inner = text.trim_start_matches('`');
-        let mut s = Scanner::new(inner);
-        s.eat_if(is_id_start).then(|| {
-            s.eat_while(is_id_continue);
-            s.before()
-        })
+        self.0.cast_first_match()
    }

    /// Whether the raw text should be displayed in a separate block.
    pub fn block(self) -> bool {
-        let text = self.0.text();
-        text.starts_with("```") && text.chars().any(is_newline)
+        self.0
+            .cast_first_match()
+            .is_some_and(|delim: RawDelim| delim.0.len() >= 3)
+            && self.0.children().any(|e| {
+                e.kind() == SyntaxKind::RawTrimmed && e.text().chars().any(is_newline)
+            })
    }
 }

+node! {
+    /// A language tag at the start of raw element: ``typ ``.
+    RawLang
+}
+
+impl<'a> RawLang<'a> {
+    /// Get the language tag.
+    pub fn get(self) -> &'a EcoString {
+        self.0.text()
+    }
+}
+
+node! {
+    /// A raw delimiter in single or 3+ backticks: `` ` ``.
+    RawDelim
+}
+
 node! {
    /// A hyperlink: `https://typst.org`.
    Link
--- a/crates/typst-syntax/src/highlight.rs
+++ b/crates/typst-syntax/src/highlight.rs
@ -153,6 +153,9 @@ pub fn highlight(node: &LinkedNode) -> Option<Tag> {
        SyntaxKind::Strong => Some(Tag::Strong),
        SyntaxKind::Emph => Some(Tag::Emph),
        SyntaxKind::Raw => Some(Tag::Raw),
+        SyntaxKind::RawLang => None,
+        SyntaxKind::RawTrimmed => None,
+        SyntaxKind::RawDelim => None,
        SyntaxKind::Link => Some(Tag::Link),
        SyntaxKind::Label => Some(Tag::Label),
        SyntaxKind::Ref => Some(Tag::Ref),
--- a/crates/typst-syntax/src/kind.rs
+++ b/crates/typst-syntax/src/kind.rs
@ -28,6 +28,12 @@ pub enum SyntaxKind {
    Emph,
    /// Raw text with optional syntax highlighting: `` `...` ``.
    Raw,
+    /// A language tag at the start of raw text: ``typ ``.
+    RawLang,
+    /// A raw delimiter consisting of 1 or 3+ backticks: `` ` ``.
+    RawDelim,
+    /// A sequence of whitespace to ignore in a raw block: `    `.
+    RawTrimmed,
    /// A hyperlink: `https://typst.org`.
    Link,
    /// A label: `<intro>`.
@ -369,6 +375,9 @@ impl SyntaxKind {
            Self::Strong => "strong content",
            Self::Emph => "emphasized content",
            Self::Raw => "raw block",
+            Self::RawLang => "raw language tag",
+            Self::RawTrimmed => "raw trimmed",
+            Self::RawDelim => "raw delimiter",
            Self::Link => "link",
            Self::Label => "label",
            Self::Ref => "reference",
--- a/crates/typst-syntax/src/lexer.rs
+++ b/crates/typst-syntax/src/lexer.rs
@ -16,6 +16,8 @@ pub(super) struct Lexer<'s> {
    mode: LexMode,
    /// Whether the last token contained a newline.
    newline: bool,
+    /// The state held by raw line lexing.
+    raw: Vec<(SyntaxKind, usize)>,
    /// An error for the last token.
    error: Option<EcoString>,
 }
@ -29,6 +31,8 @@ pub(super) enum LexMode {
    Math,
    /// Keywords, literals and operators.
    Code,
+    /// The contents of a raw block.
+    Raw,
 }

 impl<'s> Lexer<'s> {
@ -40,6 +44,7 @@ impl<'s> Lexer<'s> {
            mode,
            newline: false,
            error: None,
+            raw: Vec::new(),
        }
    }

@ -86,6 +91,14 @@ impl Lexer<'_> {
 /// Shared.
 impl Lexer<'_> {
    pub fn next(&mut self) -> SyntaxKind {
+        if self.mode == LexMode::Raw {
+            let Some((kind, end)) = self.raw.pop() else {
+                return SyntaxKind::Eof;
+            };
+            self.s.jump(end);
+            return kind;
+        }
+
        self.newline = false;
        self.error = None;
        let start = self.s.cursor();
@ -101,6 +114,7 @@ impl Lexer<'_> {
                LexMode::Markup => self.markup(start, c),
                LexMode::Math => self.math(start, c),
                LexMode::Code => self.code(start, c),
+                LexMode::Raw => unreachable!(),
            },

            None => SyntaxKind::Eof,
@ -224,15 +238,23 @@ impl Lexer<'_> {
    }

    fn raw(&mut self) -> SyntaxKind {
+        let start = self.s.cursor() - 1;
+        self.raw.clear();
+
+        // Determine number of opening backticks.
        let mut backticks = 1;
        while self.s.eat_if('`') {
            backticks += 1;
        }

+        // Special case for ``.
        if backticks == 2 {
-            return SyntaxKind::Raw;
+            self.push_raw(SyntaxKind::RawDelim);
+            self.s.jump(start + 1);
+            return SyntaxKind::RawDelim;
        }

+        // Find end of raw text.
        let mut found = 0;
        while found < backticks {
            match self.s.eat() {
@ -246,12 +268,99 @@ impl Lexer<'_> {
            return self.error("unclosed raw text");
        }

-        SyntaxKind::Raw
+        let end = self.s.cursor();
+        if backticks >= 3 {
+            self.blocky_raw(start, end, backticks);
+        } else {
+            // Single backtick needs no trimming or extra fancyness.
+            self.s.jump(end - backticks);
+            self.push_raw(SyntaxKind::Text);
+            self.s.jump(end);
+        }
+
+        // Closing delimiter.
+        self.push_raw(SyntaxKind::RawDelim);
+
+        // The saved tokens will be removed in reverse.
+        self.raw.reverse();
+
+        // Opening delimiter.
+        self.s.jump(start + backticks);
+        SyntaxKind::RawDelim
+    }
+
+    fn blocky_raw(&mut self, start: usize, end: usize, backticks: usize) {
+        // Language tag.
+        self.s.jump(start + backticks);
+        if self.s.eat_if(is_id_start) {
+            self.s.eat_while(is_id_continue);
+            self.push_raw(SyntaxKind::RawLang);
+        }
+
+        // Determine inner content between backticks and with trimmed
+        // single spaces (line trimming comes later).
+        self.s.eat_if(' ');
+        let mut inner = self.s.to(end - backticks);
+        if inner.trim_end().ends_with('`') {
+            inner = inner.strip_suffix(' ').unwrap_or(inner);
+        }
+
+        // Determine dedent level.
+        let lines = split_newlines(inner);
+        let dedent = lines
+            .iter()
+            .skip(1)
+            .filter(|line| !line.chars().all(char::is_whitespace))
+            // The line with the closing ``` is always taken into account
+            .chain(lines.last())
+            .map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
+            .min()
+            .unwrap_or(0);
+
+        let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
+        let starts_whitespace = lines.first().is_some_and(is_whitespace);
+        let ends_whitespace = lines.last().is_some_and(is_whitespace);
+
+        let mut lines = lines.into_iter();
+        let mut skipped = false;
+
+        // Trim whitespace + newline at start.
+        if starts_whitespace {
+            self.s.advance(lines.next().unwrap().len());
+            skipped = true;
+        }
+        // Trim whitespace + newline at end.
+        if ends_whitespace {
+            lines.next_back();
+        }
+
+        // Add lines.
+        for (i, line) in lines.enumerate() {
+            let dedent = if i == 0 && !skipped { 0 } else { dedent };
+            let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
+            self.s.eat_newline();
+            self.s.advance(offset);
+            self.push_raw(SyntaxKind::RawTrimmed);
+            self.s.advance(line.len() - offset);
+            self.push_raw(SyntaxKind::Text);
+        }
+
+        // Add final trimmed.
+        if self.s.cursor() < end - backticks {
+            self.s.jump(end - backticks);
+            self.push_raw(SyntaxKind::RawTrimmed);
+        }
+        self.s.jump(end);
+    }
+
+    fn push_raw(&mut self, kind: SyntaxKind) {
+        let end = self.s.cursor();
+        self.raw.push((kind, end));
    }

    fn link(&mut self) -> SyntaxKind {
        let (link, balanced) = link_prefix(self.s.after());
-        self.s.jump(self.s.cursor() + link.len());
+        self.s.advance(link.len());

        if !balanced {
            return self.error(
@ -632,6 +741,25 @@ fn keyword(ident: &str) -> Option<SyntaxKind> {
    })
 }

+trait ScannerExt {
+    fn advance(&mut self, by: usize);
+    fn eat_newline(&mut self) -> bool;
+}
+
+impl ScannerExt for Scanner<'_> {
+    fn advance(&mut self, by: usize) {
+        self.jump(self.cursor() + by);
+    }
+
+    fn eat_newline(&mut self) -> bool {
+        let ate = self.eat_if(is_newline);
+        if ate && self.before().ends_with('\r') {
+            self.eat_if('\n');
+        }
+        ate
+    }
+}
+
 /// Whether a character will become a Space token in Typst
 #[inline]
 fn is_space(character: char, mode: LexMode) -> bool {
--- a/crates/typst-syntax/src/parser.rs
+++ b/crates/typst-syntax/src/parser.rs
@ -116,13 +116,13 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
        | SyntaxKind::Escape
        | SyntaxKind::Shorthand
        | SyntaxKind::SmartQuote
-        | SyntaxKind::Raw
        | SyntaxKind::Link
        | SyntaxKind::Label => p.eat(),

        SyntaxKind::Hash => embedded_code_expr(p),
        SyntaxKind::Star => strong(p),
        SyntaxKind::Underscore => emph(p),
+        SyntaxKind::RawDelim => raw(p),
        SyntaxKind::HeadingMarker if *at_start => heading(p),
        SyntaxKind::ListMarker if *at_start => list_item(p),
        SyntaxKind::EnumMarker if *at_start => enum_item(p),
@ -172,6 +172,22 @@ fn emph(p: &mut Parser) {
    p.wrap(m, SyntaxKind::Emph);
 }

+/// Parses raw text with optional syntax highlighting: `` `...` ``.
+fn raw(p: &mut Parser) {
+    let m = p.marker();
+    p.enter(LexMode::Raw);
+    p.assert(SyntaxKind::RawDelim);
+
+    // Eats until the closing delimiter.
+    while !p.eof() && !p.at(SyntaxKind::RawDelim) {
+        p.eat();
+    }
+
+    p.expect(SyntaxKind::RawDelim);
+    p.exit();
+    p.wrap(m, SyntaxKind::Raw);
+}
+
 /// Parses a section heading: `= Introduction`.
 fn heading(p: &mut Parser) {
    const END: SyntaxSet = SyntaxSet::new()
@ -747,6 +763,7 @@ fn code_primary(p: &mut Parser, atomic: bool) {
        SyntaxKind::LeftBrace => code_block(p),
        SyntaxKind::LeftBracket => content_block(p),
        SyntaxKind::LeftParen => expr_with_paren(p, atomic),
+        SyntaxKind::RawDelim => raw(p),
        SyntaxKind::Dollar => equation(p),
        SyntaxKind::Let => let_binding(p),
        SyntaxKind::Set => set_rule(p),
@ -768,8 +785,7 @@ fn code_primary(p: &mut Parser, atomic: bool) {
        | SyntaxKind::Bool
        | SyntaxKind::Numeric
        | SyntaxKind::Str
-        | SyntaxKind::Label
-        | SyntaxKind::Raw => p.eat(),
+        | SyntaxKind::Label => p.eat(),

        _ => p.expected("expression"),
    }
--- a/crates/typst-syntax/src/set.rs
+++ b/crates/typst-syntax/src/set.rs
@ -15,7 +15,10 @@ impl SyntaxSet {
    }

    /// Insert a syntax kind into the set.
+    ///
+    /// You can only add kinds with discriminator < 128.
    pub const fn add(self, kind: SyntaxKind) -> Self {
+        assert!((kind as u8) < BITS);
        Self(self.0 | bit(kind))
    }

@ -26,10 +29,12 @@ impl SyntaxSet {

    /// Whether the set contains the given syntax kind.
    pub const fn contains(&self, kind: SyntaxKind) -> bool {
-        (self.0 & bit(kind)) != 0
+        (kind as u8) < BITS && (self.0 & bit(kind)) != 0
    }
 }

+const BITS: u8 = 128;
+
 const fn bit(kind: SyntaxKind) -> u128 {
    1 << (kind as usize)
 }
@ -54,7 +59,7 @@ pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new()
    .add(SyntaxKind::Escape)
    .add(SyntaxKind::Shorthand)
    .add(SyntaxKind::SmartQuote)
-    .add(SyntaxKind::Raw)
+    .add(SyntaxKind::RawDelim)
    .add(SyntaxKind::Link)
    .add(SyntaxKind::Label)
    .add(SyntaxKind::Hash)
@ -119,7 +124,7 @@ pub const ATOMIC_CODE_PRIMARY: SyntaxSet = SyntaxSet::new()
    .add(SyntaxKind::Numeric)
    .add(SyntaxKind::Str)
    .add(SyntaxKind::Label)
-    .add(SyntaxKind::Raw);
+    .add(SyntaxKind::RawDelim);

 /// Syntax kinds that are unary operators.
 pub const UNARY_OP: SyntaxSet = SyntaxSet::new()
@ -171,11 +176,6 @@ pub const PATTERN_LEAF: SyntaxSet = ATOMIC_CODE_EXPR;
 mod tests {
    use super::*;

-    #[test]
-    fn test_size() {
-        assert!((SyntaxKind::Eof as usize) < 128);
-    }
-
    #[test]
    fn test_set() {
        let set = SyntaxSet::new().add(SyntaxKind::And).add(SyntaxKind::Or);
--- a/crates/typst/src/eval/markup.rs
+++ b/crates/typst/src/eval/markup.rs
@ -8,7 +8,9 @@ use crate::model::{
 };
 use crate::symbols::Symbol;
 use crate::syntax::ast::{self, AstNode};
-use crate::text::{LinebreakElem, RawElem, SmartQuoteElem, SpaceElem, TextElem};
+use crate::text::{
+    LinebreakElem, RawContent, RawElem, SmartQuoteElem, SpaceElem, TextElem,
+};

 impl Eval for ast::Markup<'_> {
    type Output = Content;
@ -165,9 +167,10 @@ impl Eval for ast::Raw<'_> {
    type Output = Content;

    fn eval(self, _: &mut Vm) -> SourceResult<Self::Output> {
-        let mut elem = RawElem::new(self.text()).with_block(self.block());
+        let lines = self.lines().map(|line| (line.get().clone(), line.span())).collect();
+        let mut elem = RawElem::new(RawContent::Lines(lines)).with_block(self.block());
        if let Some(lang) = self.lang() {
-            elem.push_lang(Some(lang.into()));
+            elem.push_lang(Some(lang.get().clone()));
        }
        Ok(elem.pack())
    }
--- a/crates/typst/src/foundations/value.rs
+++ b/crates/typst/src/foundations/value.rs
@ -19,7 +19,7 @@ use crate::foundations::{
 use crate::layout::{Abs, Angle, Em, Fr, Length, Ratio, Rel};
 use crate::symbols::Symbol;
 use crate::syntax::{ast, Span};
-use crate::text::{RawElem, TextElem};
+use crate::text::{RawContent, RawElem, TextElem};
 use crate::util::ArcExt;
 use crate::visualize::{Color, Gradient, Pattern};

@ -209,7 +209,7 @@ impl Value {
            Self::Symbol(v) => TextElem::packed(v.get()),
            Self::Content(v) => v,
            Self::Module(module) => module.content(),
-            _ => RawElem::new(self.repr())
+            _ => RawElem::new(RawContent::Text(self.repr()))
                .with_lang(Some("typc".into()))
                .with_block(false)
                .pack(),
--- a/crates/typst/src/layout/inline/mod.rs
+++ b/crates/typst/src/layout/inline/mod.rs
@ -287,7 +287,7 @@ impl SpanMapper {
    fn span_at(&self, offset: usize) -> (Span, u16) {
        let mut cursor = 0;
        for &(len, span) in &self.0 {
-            if (cursor..=cursor + len).contains(&offset) {
+            if (cursor..cursor + len).contains(&offset) {
                return (span, u16::try_from(offset - cursor).unwrap_or(0));
            }
            cursor += len;
--- a/crates/typst/src/layout/inline/shaping.rs
+++ b/crates/typst/src/layout/inline/shaping.rs
@ -231,6 +231,7 @@ impl<'a> ShapedText<'a> {
        let decos = TextElem::deco_in(self.styles);
        let fill = TextElem::fill_in(self.styles);
        let stroke = TextElem::stroke_in(self.styles);
+        let span_offset = TextElem::span_offset_in(self.styles);

        for ((font, y_offset), group) in
            self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset))
@ -267,6 +268,12 @@ impl<'a> ShapedText<'a> {
                    frame.size_mut().x += justification_left.at(self.size)
                        + justification_right.at(self.size);

+                    // We may not be able to reach the offset completely if
+                    // it exceeds u16, but better to have a roughly correct
+                    // span offset than nothing.
+                    let mut span = shaped.span;
+                    span.1 = span.1.saturating_add(span_offset.saturating_as());
+
                    // |<---- a Glyph ---->|
                    //  -->|ShapedGlyph|<--
                    // +---+-----------+---+
@ -293,7 +300,7 @@ impl<'a> ShapedText<'a> {
                        x_offset: shaped.x_offset + justification_left,
                        range: (shaped.range.start - range.start).saturating_as()
                            ..(shaped.range.end - range.start).saturating_as(),
-                        span: shaped.span,
+                        span,
                    }
                })
                .collect();
--- a/crates/typst/src/text/mod.rs
+++ b/crates/typst/src/text/mod.rs
@ -622,6 +622,12 @@ pub struct TextElem {
    #[required]
    pub text: EcoString,

+    /// The offset of the text in the text syntax node referenced by this
+    /// element's span.
+    #[internal]
+    #[ghost]
+    pub span_offset: usize,
+
    /// A delta to apply on the font weight.
    #[internal]
    #[fold]
--- a/crates/typst/src/text/raw.rs
+++ b/crates/typst/src/text/raw.rs
@ -17,7 +17,7 @@ use crate::foundations::{
 };
 use crate::layout::{BlockElem, Em, HAlignment};
 use crate::model::Figurable;
-use crate::syntax::{split_newlines, LinkedNode, Spanned};
+use crate::syntax::{split_newlines, LinkedNode, Span, Spanned};
 use crate::text::{
    FontFamily, FontList, Hyphenate, Lang, LinebreakElem, LocalName, Region,
    SmartQuoteElem, TextElem, TextSize,
@ -27,8 +27,9 @@ use crate::visualize::Color;
 use crate::{syntax, World};

 // Shorthand for highlighter closures.
-type StyleFn<'a> = &'a mut dyn FnMut(&LinkedNode, Range<usize>, synt::Style) -> Content;
-type LineFn<'a> = &'a mut dyn FnMut(i64, Range<usize>, &mut Vec<Content>);
+type StyleFn<'a> =
+    &'a mut dyn FnMut(usize, &LinkedNode, Range<usize>, synt::Style) -> Content;
+type LineFn<'a> = &'a mut dyn FnMut(usize, Range<usize>, &mut Vec<Content>);

 /// Raw text with optional syntax highlighting.
 ///
@ -101,7 +102,7 @@ pub struct RawElem {
    /// ```
    /// ````
    #[required]
-    pub text: EcoString,
+    pub text: RawContent,

    /// Whether the raw text is displayed as a separate block.
    ///
@ -300,17 +301,24 @@ impl Packed<RawElem> {
    #[comemo::memoize]
    fn highlight(&self, styles: StyleChain) -> Vec<Packed<RawLine>> {
        let elem = self.as_ref();
-        let span = self.span();

-        let mut text = elem.text().clone();
-        if text.contains('\t') {
-            let tab_size = RawElem::tab_size_in(styles);
-            text = align_tabs(&text, tab_size);
-        }
+        let text = elem.text();
+        let lines = match text {
+            RawContent::Lines(lines) if !lines.iter().any(|(s, _)| s.contains('\t')) => {
+                lines.clone()
+            }
+            _ => {
+                let mut text = text.get();
+                if text.contains('\t') {
+                    let tab_size = RawElem::tab_size_in(styles);
+                    text = align_tabs(&text, tab_size);
+                }
+                let lines = split_newlines(&text);
+                lines.into_iter().map(|line| (line.into(), self.span())).collect()
+            }
+        };

-        let lines = split_newlines(&text);
        let count = lines.len() as i64;
-
        let lang = elem
            .lang(styles)
            .as_ref()
@ -332,6 +340,7 @@ impl Packed<RawElem> {

        let mut seq = vec![];
        if matches!(lang.as_deref(), Some("typ" | "typst" | "typc")) {
+            let text = text.get();
            let root = match lang.as_deref() {
                Some("typc") => syntax::parse_code(&text),
                _ => syntax::parse(&text),
@ -341,16 +350,23 @@ impl Packed<RawElem> {
                &text,
                LinkedNode::new(&root),
                synt::Highlighter::new(theme),
-                &mut |_, range, style| styled(&text[range], foreground, style),
+                &mut |i, _, range, style| {
+                    // Find start of line.
+                    // Note: Dedent is already applied to the text
+                    let span_offset = text[..range.start]
+                        .rfind('\n')
+                        .map_or(0, |i| range.start - (i + 1));
+                    styled(&text[range], foreground, style, lines[i].1, span_offset)
+                },
                &mut |i, range, line| {
                    seq.push(
                        Packed::new(RawLine::new(
-                            i + 1,
+                            (i + 1) as i64,
                            count,
                            EcoString::from(&text[range]),
                            Content::sequence(line.drain(..)),
                        ))
-                        .spanned(span),
+                        .spanned(lines[i].1),
                    );
                },
            )
@ -366,33 +382,43 @@ impl Packed<RawElem> {
                })
        }) {
            let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
-            for (i, line) in lines.into_iter().enumerate() {
+            for (i, (line, line_span)) in lines.into_iter().enumerate() {
                let mut line_content = vec![];
-                for (style, piece) in
-                    highlighter.highlight_line(line, syntax_set).into_iter().flatten()
+                let mut span_offset = 0;
+                for (style, piece) in highlighter
+                    .highlight_line(line.as_str(), syntax_set)
+                    .into_iter()
+                    .flatten()
                {
-                    line_content.push(styled(piece, foreground, style));
+                    line_content.push(styled(
+                        piece,
+                        foreground,
+                        style,
+                        line_span,
+                        span_offset,
+                    ));
+                    span_offset += piece.len();
                }

                seq.push(
                    Packed::new(RawLine::new(
                        i as i64 + 1,
                        count,
-                        EcoString::from(line),
+                        line,
                        Content::sequence(line_content),
                    ))
-                    .spanned(span),
+                    .spanned(line_span),
                );
            }
        } else {
-            seq.extend(lines.into_iter().enumerate().map(|(i, line)| {
+            seq.extend(lines.into_iter().enumerate().map(|(i, (line, line_span))| {
                Packed::new(RawLine::new(
                    i as i64 + 1,
                    count,
-                    EcoString::from(line),
-                    TextElem::packed(line),
+                    line.clone(),
+                    TextElem::packed(line).spanned(line_span),
                ))
-                .spanned(span)
+                .spanned(line_span)
            }));
        };

@ -478,10 +504,42 @@ impl Figurable for Packed<RawElem> {}

 impl PlainText for Packed<RawElem> {
    fn plain_text(&self, text: &mut EcoString) {
-        text.push_str(self.text());
+        text.push_str(&self.text().get());
    }
 }

+/// The content of the raw text.
+#[derive(Debug, Clone, Hash, PartialEq)]
+pub enum RawContent {
+    /// From a string.
+    Text(EcoString),
+    /// From lines of text.
+    Lines(EcoVec<(EcoString, Span)>),
+}
+
+impl RawContent {
+    /// Returns or synthesizes the text content of the raw text.
+    fn get(&self) -> EcoString {
+        match self.clone() {
+            RawContent::Text(text) => text,
+            RawContent::Lines(lines) => {
+                let mut lines = lines.into_iter().map(|(s, _)| s);
+                if lines.len() <= 1 {
+                    lines.next().unwrap_or_default()
+                } else {
+                    lines.collect::<Vec<_>>().join("\n").into()
+                }
+            }
+        }
+    }
+}
+
+cast! {
+    RawContent,
+    self => self.get().into_value(),
+    v: EcoString => Self::Text(v),
+}
+
 /// A highlighted line of raw text.
 ///
 /// This is a helper element that is synthesized by [`raw`]($raw) elements.
@ -536,7 +594,7 @@ struct ThemedHighlighter<'a> {
    /// The range of the current line.
    range: Range<usize>,
    /// The current line number.
-    line: i64,
+    line: usize,
    /// The function to style a piece of text.
    style_fn: StyleFn<'a>,
    /// The function to append a line.
@ -597,8 +655,12 @@ impl<'a> ThemedHighlighter<'a> {

                let offset = self.node.range().start + len;
                let token_range = offset..(offset + line.len());
-                self.current_line
-                    .push((self.style_fn)(&self.node, token_range, style));
+                self.current_line.push((self.style_fn)(
+                    self.line,
+                    &self.node,
+                    token_range,
+                    style,
+                ));

                len += line.len() + 1;
            }
@ -621,23 +683,33 @@ impl<'a> ThemedHighlighter<'a> {
 }

 /// Style a piece of text with a syntect style.
-fn styled(piece: &str, foreground: synt::Color, style: synt::Style) -> Content {
-    let mut body = TextElem::packed(piece);
+fn styled(
+    piece: &str,
+    foreground: synt::Color,
+    style: synt::Style,
+    span: Span,
+    span_offset: usize,
+) -> Content {
+    let mut body = TextElem::packed(piece).spanned(span);
+
+    if span_offset > 0 {
+        body = body.styled(TextElem::set_span_offset(span_offset));
+    }

    if style.foreground != foreground {
        body = body.styled(TextElem::set_fill(to_typst(style.foreground).into()));
    }

    if style.font_style.contains(synt::FontStyle::BOLD) {
-        body = body.strong();
+        body = body.strong().spanned(span);
    }

    if style.font_style.contains(synt::FontStyle::ITALIC) {
-        body = body.emph();
+        body = body.emph().spanned(span);
    }

    if style.font_style.contains(synt::FontStyle::UNDERLINE) {
-        body = body.underlined();
+        body = body.underlined().spanned(span);
    }

    body
--- a/tests/typ/compiler/raw.typ
+++ b/tests/typ/compiler/raw.typ
@ -0,0 +1,170 @@
+// Test new raw parser
+// Ref: false
+
+---
+#let empty = (
+  name: "empty",
+  input: ``,
+  text: "",
+)
+
+#let backtick = (
+  name: "backtick",
+  input: ``` ` ```,
+  text: "`",
+  block: false,
+)
+
+#let lang-backtick = (
+  name: "lang-backtick",
+  input: ```js ` ```,
+  lang: "js",
+  text: "`",
+  block: false,
+)
+
+// The language tag stops on space
+#let lang-space = (
+  name: "lang-space",
+  input: ```js test ```,
+  lang: "js",
+  text: "test ",
+  block: false,
+)
+
+// The language tag stops on newline
+#let lang-newline = (
+  name: "lang-newline",
+  input: ```js
+test
+```,
+  lang: "js",
+  text: "test",
+  block: true,
+)
+
+// The first line and the last line are ignored
+#let blocky = (
+  name: "blocky",
+  input: {
+```
+test
+```
+},
+  text: "test",
+  block: true,
+)
+
+// A blocky raw should handle dedents
+#let blocky-dedent = (
+  name: "blocky-dedent",
+  input: {
+```
+ test
+ ```
+  },
+  text: "test",
+  block: true,
+)
+
+// When there is content in the first line, it should exactly eat a whitespace char.
+#let blocky-dedent-firstline = (
+  name: "blocky-dedent-firstline",
+  input: ``` test
+  ```,
+  text: "test",
+  block: true,
+)
+
+// When there is content in the first line, it should exactly eat a whitespace char.
+#let blocky-dedent-firstline2 = (
+  name: "blocky-dedent-firstline2",
+  input: ``` test
+```,
+  text: "test",
+  block: true,
+)
+
+// The first line is not affected by dedent, and the middle lines don't consider the whitespace prefix of the first line.
+#let blocky-dedent-firstline3 = (
+  name: "blocky-dedent-firstline3",
+  input: ``` test
+     test2
+  ```,
+  text: "test\n   test2",
+  block: true,
+)
+
+// The first line is not affected by dedent, and the middle lines don't consider the whitespace prefix of the first line.
+#let blocky-dedent-firstline4 = (
+  name: "blocky-dedent-firstline4",
+  input: ```     test
+  test2
+  ```,
+  text: "    test\ntest2",
+  block: true,
+)
+
+#let blocky-dedent-lastline = (
+  name: "blocky-dedent-lastline",
+  input: ```
+  test
+ ```,
+  text: " test",
+  block: true,
+)
+
+#let blocky-dedent-lastline2 = (
+  name: "blocky-dedent-lastline2",
+  input: ```
+  test
+   ```,
+  text: "test",
+  block: true,
+)
+
+#let blocky-tab = (
+  name: "blocky-tab",
+  input: {
+```
+	test
+```
+},
+  text: "\ttest",
+  block: true,
+)
+
+#let blocky-tab-dedent = (
+  name: "blocky-tab-dedent",
+  input: {
+```
+	test
+  
+ ```
+},
+  text: "test\n ",
+  block: true,
+)
+
+#let cases = (
+  empty,
+  backtick,
+  lang-backtick,
+  lang-space,
+  lang-newline,
+  blocky,
+  blocky-dedent,
+  blocky-dedent-firstline,
+  blocky-dedent-firstline2,
+  blocky-dedent-firstline3,
+  blocky-dedent-lastline,
+  blocky-dedent-lastline2,
+  blocky-tab,
+  blocky-tab-dedent,
+)
+
+#for c in cases {
+  assert.eq(c.text, c.input.text, message: "in point " + c.name + ", expect " + repr(c.text) + ", got " + repr(c.input.text) + "")
+  let block = c.at("block", default: false)
+  assert.eq(block, c.input.block, message: "in point " + c.name + ", expect " + repr(block) + ", got " + repr(c.input.block) + "")
+}