Provide more fine-grained spans in raw blocks (#3257)
Co-authored-by: Laurenz <laurmaedje@gmail.com>
This commit is contained in:
parent
57ab6d0924
commit
030041466b
@ -8,9 +8,7 @@ use std::ops::Deref;
|
||||
use ecow::EcoString;
|
||||
use unscanny::Scanner;
|
||||
|
||||
use crate::{
|
||||
is_id_continue, is_id_start, is_newline, split_newlines, Span, SyntaxKind, SyntaxNode,
|
||||
};
|
||||
use crate::{is_newline, Span, SyntaxKind, SyntaxNode};
|
||||
|
||||
/// A typed AST node.
|
||||
pub trait AstNode<'a>: Sized {
|
||||
@ -558,86 +556,50 @@ node! {
|
||||
}
|
||||
|
||||
impl<'a> Raw<'a> {
|
||||
/// The trimmed raw text.
|
||||
pub fn text(self) -> EcoString {
|
||||
let mut text = self.0.text().as_str();
|
||||
let blocky = text.starts_with("```");
|
||||
text = text.trim_matches('`');
|
||||
|
||||
// Trim tag, one space at the start, and one space at the end if the
|
||||
// last non-whitespace char is a backtick.
|
||||
if blocky {
|
||||
let mut s = Scanner::new(text);
|
||||
if s.eat_if(is_id_start) {
|
||||
s.eat_while(is_id_continue);
|
||||
}
|
||||
text = s.after();
|
||||
text = text.strip_prefix(' ').unwrap_or(text);
|
||||
if text.trim_end().ends_with('`') {
|
||||
text = text.strip_suffix(' ').unwrap_or(text);
|
||||
}
|
||||
}
|
||||
|
||||
// Split into lines.
|
||||
let mut lines = split_newlines(text);
|
||||
|
||||
if blocky {
|
||||
let dedent = lines
|
||||
.iter()
|
||||
.skip(1)
|
||||
.filter(|line| !line.chars().all(char::is_whitespace))
|
||||
// The line with the closing ``` is always taken into account
|
||||
.chain(lines.last())
|
||||
.map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
|
||||
.min()
|
||||
.unwrap_or(0);
|
||||
|
||||
// Dedent based on column, but not for the first line.
|
||||
for line in lines.iter_mut().skip(1) {
|
||||
let offset = line.chars().take(dedent).map(char::len_utf8).sum();
|
||||
*line = &line[offset..];
|
||||
}
|
||||
|
||||
let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
|
||||
|
||||
// Trims a sequence of whitespace followed by a newline at the start.
|
||||
if lines.first().is_some_and(is_whitespace) {
|
||||
lines.remove(0);
|
||||
}
|
||||
|
||||
// Trims a newline followed by a sequence of whitespace at the end.
|
||||
if lines.last().is_some_and(is_whitespace) {
|
||||
lines.pop();
|
||||
}
|
||||
}
|
||||
|
||||
lines.join("\n").into()
|
||||
/// The lines in the raw block.
|
||||
pub fn lines(self) -> impl DoubleEndedIterator<Item = Text<'a>> {
|
||||
self.0.children().filter_map(SyntaxNode::cast)
|
||||
}
|
||||
|
||||
/// An optional identifier specifying the language to syntax-highlight in.
|
||||
pub fn lang(self) -> Option<&'a str> {
|
||||
let text = self.0.text();
|
||||
|
||||
pub fn lang(self) -> Option<RawLang<'a>> {
|
||||
// Only blocky literals are supposed to contain a language.
|
||||
if !text.starts_with("```") {
|
||||
let delim: RawDelim = self.0.cast_first_match()?;
|
||||
if delim.0.len() < 3 {
|
||||
return Option::None;
|
||||
}
|
||||
|
||||
let inner = text.trim_start_matches('`');
|
||||
let mut s = Scanner::new(inner);
|
||||
s.eat_if(is_id_start).then(|| {
|
||||
s.eat_while(is_id_continue);
|
||||
s.before()
|
||||
})
|
||||
self.0.cast_first_match()
|
||||
}
|
||||
|
||||
/// Whether the raw text should be displayed in a separate block.
|
||||
pub fn block(self) -> bool {
|
||||
let text = self.0.text();
|
||||
text.starts_with("```") && text.chars().any(is_newline)
|
||||
self.0
|
||||
.cast_first_match()
|
||||
.is_some_and(|delim: RawDelim| delim.0.len() >= 3)
|
||||
&& self.0.children().any(|e| {
|
||||
e.kind() == SyntaxKind::RawTrimmed && e.text().chars().any(is_newline)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
node! {
|
||||
/// A language tag at the start of raw element: ``typ ``.
|
||||
RawLang
|
||||
}
|
||||
|
||||
impl<'a> RawLang<'a> {
|
||||
/// Get the language tag.
|
||||
pub fn get(self) -> &'a EcoString {
|
||||
self.0.text()
|
||||
}
|
||||
}
|
||||
|
||||
node! {
|
||||
/// A raw delimiter in single or 3+ backticks: `` ` ``.
|
||||
RawDelim
|
||||
}
|
||||
|
||||
node! {
|
||||
/// A hyperlink: `https://typst.org`.
|
||||
Link
|
||||
|
@ -153,6 +153,9 @@ pub fn highlight(node: &LinkedNode) -> Option<Tag> {
|
||||
SyntaxKind::Strong => Some(Tag::Strong),
|
||||
SyntaxKind::Emph => Some(Tag::Emph),
|
||||
SyntaxKind::Raw => Some(Tag::Raw),
|
||||
SyntaxKind::RawLang => None,
|
||||
SyntaxKind::RawTrimmed => None,
|
||||
SyntaxKind::RawDelim => None,
|
||||
SyntaxKind::Link => Some(Tag::Link),
|
||||
SyntaxKind::Label => Some(Tag::Label),
|
||||
SyntaxKind::Ref => Some(Tag::Ref),
|
||||
|
@ -28,6 +28,12 @@ pub enum SyntaxKind {
|
||||
Emph,
|
||||
/// Raw text with optional syntax highlighting: `` `...` ``.
|
||||
Raw,
|
||||
/// A language tag at the start of raw text: ``typ ``.
|
||||
RawLang,
|
||||
/// A raw delimiter consisting of 1 or 3+ backticks: `` ` ``.
|
||||
RawDelim,
|
||||
/// A sequence of whitespace to ignore in a raw block: ` `.
|
||||
RawTrimmed,
|
||||
/// A hyperlink: `https://typst.org`.
|
||||
Link,
|
||||
/// A label: `<intro>`.
|
||||
@ -369,6 +375,9 @@ impl SyntaxKind {
|
||||
Self::Strong => "strong content",
|
||||
Self::Emph => "emphasized content",
|
||||
Self::Raw => "raw block",
|
||||
Self::RawLang => "raw language tag",
|
||||
Self::RawTrimmed => "raw trimmed",
|
||||
Self::RawDelim => "raw delimiter",
|
||||
Self::Link => "link",
|
||||
Self::Label => "label",
|
||||
Self::Ref => "reference",
|
||||
|
@ -16,6 +16,8 @@ pub(super) struct Lexer<'s> {
|
||||
mode: LexMode,
|
||||
/// Whether the last token contained a newline.
|
||||
newline: bool,
|
||||
/// The state held by raw line lexing.
|
||||
raw: Vec<(SyntaxKind, usize)>,
|
||||
/// An error for the last token.
|
||||
error: Option<EcoString>,
|
||||
}
|
||||
@ -29,6 +31,8 @@ pub(super) enum LexMode {
|
||||
Math,
|
||||
/// Keywords, literals and operators.
|
||||
Code,
|
||||
/// The contents of a raw block.
|
||||
Raw,
|
||||
}
|
||||
|
||||
impl<'s> Lexer<'s> {
|
||||
@ -40,6 +44,7 @@ impl<'s> Lexer<'s> {
|
||||
mode,
|
||||
newline: false,
|
||||
error: None,
|
||||
raw: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -86,6 +91,14 @@ impl Lexer<'_> {
|
||||
/// Shared.
|
||||
impl Lexer<'_> {
|
||||
pub fn next(&mut self) -> SyntaxKind {
|
||||
if self.mode == LexMode::Raw {
|
||||
let Some((kind, end)) = self.raw.pop() else {
|
||||
return SyntaxKind::Eof;
|
||||
};
|
||||
self.s.jump(end);
|
||||
return kind;
|
||||
}
|
||||
|
||||
self.newline = false;
|
||||
self.error = None;
|
||||
let start = self.s.cursor();
|
||||
@ -101,6 +114,7 @@ impl Lexer<'_> {
|
||||
LexMode::Markup => self.markup(start, c),
|
||||
LexMode::Math => self.math(start, c),
|
||||
LexMode::Code => self.code(start, c),
|
||||
LexMode::Raw => unreachable!(),
|
||||
},
|
||||
|
||||
None => SyntaxKind::Eof,
|
||||
@ -224,15 +238,23 @@ impl Lexer<'_> {
|
||||
}
|
||||
|
||||
fn raw(&mut self) -> SyntaxKind {
|
||||
let start = self.s.cursor() - 1;
|
||||
self.raw.clear();
|
||||
|
||||
// Determine number of opening backticks.
|
||||
let mut backticks = 1;
|
||||
while self.s.eat_if('`') {
|
||||
backticks += 1;
|
||||
}
|
||||
|
||||
// Special case for ``.
|
||||
if backticks == 2 {
|
||||
return SyntaxKind::Raw;
|
||||
self.push_raw(SyntaxKind::RawDelim);
|
||||
self.s.jump(start + 1);
|
||||
return SyntaxKind::RawDelim;
|
||||
}
|
||||
|
||||
// Find end of raw text.
|
||||
let mut found = 0;
|
||||
while found < backticks {
|
||||
match self.s.eat() {
|
||||
@ -246,12 +268,99 @@ impl Lexer<'_> {
|
||||
return self.error("unclosed raw text");
|
||||
}
|
||||
|
||||
SyntaxKind::Raw
|
||||
let end = self.s.cursor();
|
||||
if backticks >= 3 {
|
||||
self.blocky_raw(start, end, backticks);
|
||||
} else {
|
||||
// Single backtick needs no trimming or extra fancyness.
|
||||
self.s.jump(end - backticks);
|
||||
self.push_raw(SyntaxKind::Text);
|
||||
self.s.jump(end);
|
||||
}
|
||||
|
||||
// Closing delimiter.
|
||||
self.push_raw(SyntaxKind::RawDelim);
|
||||
|
||||
// The saved tokens will be removed in reverse.
|
||||
self.raw.reverse();
|
||||
|
||||
// Opening delimiter.
|
||||
self.s.jump(start + backticks);
|
||||
SyntaxKind::RawDelim
|
||||
}
|
||||
|
||||
fn blocky_raw(&mut self, start: usize, end: usize, backticks: usize) {
|
||||
// Language tag.
|
||||
self.s.jump(start + backticks);
|
||||
if self.s.eat_if(is_id_start) {
|
||||
self.s.eat_while(is_id_continue);
|
||||
self.push_raw(SyntaxKind::RawLang);
|
||||
}
|
||||
|
||||
// Determine inner content between backticks and with trimmed
|
||||
// single spaces (line trimming comes later).
|
||||
self.s.eat_if(' ');
|
||||
let mut inner = self.s.to(end - backticks);
|
||||
if inner.trim_end().ends_with('`') {
|
||||
inner = inner.strip_suffix(' ').unwrap_or(inner);
|
||||
}
|
||||
|
||||
// Determine dedent level.
|
||||
let lines = split_newlines(inner);
|
||||
let dedent = lines
|
||||
.iter()
|
||||
.skip(1)
|
||||
.filter(|line| !line.chars().all(char::is_whitespace))
|
||||
// The line with the closing ``` is always taken into account
|
||||
.chain(lines.last())
|
||||
.map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
|
||||
.min()
|
||||
.unwrap_or(0);
|
||||
|
||||
let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
|
||||
let starts_whitespace = lines.first().is_some_and(is_whitespace);
|
||||
let ends_whitespace = lines.last().is_some_and(is_whitespace);
|
||||
|
||||
let mut lines = lines.into_iter();
|
||||
let mut skipped = false;
|
||||
|
||||
// Trim whitespace + newline at start.
|
||||
if starts_whitespace {
|
||||
self.s.advance(lines.next().unwrap().len());
|
||||
skipped = true;
|
||||
}
|
||||
// Trim whitespace + newline at end.
|
||||
if ends_whitespace {
|
||||
lines.next_back();
|
||||
}
|
||||
|
||||
// Add lines.
|
||||
for (i, line) in lines.enumerate() {
|
||||
let dedent = if i == 0 && !skipped { 0 } else { dedent };
|
||||
let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
|
||||
self.s.eat_newline();
|
||||
self.s.advance(offset);
|
||||
self.push_raw(SyntaxKind::RawTrimmed);
|
||||
self.s.advance(line.len() - offset);
|
||||
self.push_raw(SyntaxKind::Text);
|
||||
}
|
||||
|
||||
// Add final trimmed.
|
||||
if self.s.cursor() < end - backticks {
|
||||
self.s.jump(end - backticks);
|
||||
self.push_raw(SyntaxKind::RawTrimmed);
|
||||
}
|
||||
self.s.jump(end);
|
||||
}
|
||||
|
||||
fn push_raw(&mut self, kind: SyntaxKind) {
|
||||
let end = self.s.cursor();
|
||||
self.raw.push((kind, end));
|
||||
}
|
||||
|
||||
fn link(&mut self) -> SyntaxKind {
|
||||
let (link, balanced) = link_prefix(self.s.after());
|
||||
self.s.jump(self.s.cursor() + link.len());
|
||||
self.s.advance(link.len());
|
||||
|
||||
if !balanced {
|
||||
return self.error(
|
||||
@ -632,6 +741,25 @@ fn keyword(ident: &str) -> Option<SyntaxKind> {
|
||||
})
|
||||
}
|
||||
|
||||
trait ScannerExt {
|
||||
fn advance(&mut self, by: usize);
|
||||
fn eat_newline(&mut self) -> bool;
|
||||
}
|
||||
|
||||
impl ScannerExt for Scanner<'_> {
|
||||
fn advance(&mut self, by: usize) {
|
||||
self.jump(self.cursor() + by);
|
||||
}
|
||||
|
||||
fn eat_newline(&mut self) -> bool {
|
||||
let ate = self.eat_if(is_newline);
|
||||
if ate && self.before().ends_with('\r') {
|
||||
self.eat_if('\n');
|
||||
}
|
||||
ate
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether a character will become a Space token in Typst
|
||||
#[inline]
|
||||
fn is_space(character: char, mode: LexMode) -> bool {
|
||||
|
@ -116,13 +116,13 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
|
||||
| SyntaxKind::Escape
|
||||
| SyntaxKind::Shorthand
|
||||
| SyntaxKind::SmartQuote
|
||||
| SyntaxKind::Raw
|
||||
| SyntaxKind::Link
|
||||
| SyntaxKind::Label => p.eat(),
|
||||
|
||||
SyntaxKind::Hash => embedded_code_expr(p),
|
||||
SyntaxKind::Star => strong(p),
|
||||
SyntaxKind::Underscore => emph(p),
|
||||
SyntaxKind::RawDelim => raw(p),
|
||||
SyntaxKind::HeadingMarker if *at_start => heading(p),
|
||||
SyntaxKind::ListMarker if *at_start => list_item(p),
|
||||
SyntaxKind::EnumMarker if *at_start => enum_item(p),
|
||||
@ -172,6 +172,22 @@ fn emph(p: &mut Parser) {
|
||||
p.wrap(m, SyntaxKind::Emph);
|
||||
}
|
||||
|
||||
/// Parses raw text with optional syntax highlighting: `` `...` ``.
|
||||
fn raw(p: &mut Parser) {
|
||||
let m = p.marker();
|
||||
p.enter(LexMode::Raw);
|
||||
p.assert(SyntaxKind::RawDelim);
|
||||
|
||||
// Eats until the closing delimiter.
|
||||
while !p.eof() && !p.at(SyntaxKind::RawDelim) {
|
||||
p.eat();
|
||||
}
|
||||
|
||||
p.expect(SyntaxKind::RawDelim);
|
||||
p.exit();
|
||||
p.wrap(m, SyntaxKind::Raw);
|
||||
}
|
||||
|
||||
/// Parses a section heading: `= Introduction`.
|
||||
fn heading(p: &mut Parser) {
|
||||
const END: SyntaxSet = SyntaxSet::new()
|
||||
@ -747,6 +763,7 @@ fn code_primary(p: &mut Parser, atomic: bool) {
|
||||
SyntaxKind::LeftBrace => code_block(p),
|
||||
SyntaxKind::LeftBracket => content_block(p),
|
||||
SyntaxKind::LeftParen => expr_with_paren(p, atomic),
|
||||
SyntaxKind::RawDelim => raw(p),
|
||||
SyntaxKind::Dollar => equation(p),
|
||||
SyntaxKind::Let => let_binding(p),
|
||||
SyntaxKind::Set => set_rule(p),
|
||||
@ -768,8 +785,7 @@ fn code_primary(p: &mut Parser, atomic: bool) {
|
||||
| SyntaxKind::Bool
|
||||
| SyntaxKind::Numeric
|
||||
| SyntaxKind::Str
|
||||
| SyntaxKind::Label
|
||||
| SyntaxKind::Raw => p.eat(),
|
||||
| SyntaxKind::Label => p.eat(),
|
||||
|
||||
_ => p.expected("expression"),
|
||||
}
|
||||
|
@ -15,7 +15,10 @@ impl SyntaxSet {
|
||||
}
|
||||
|
||||
/// Insert a syntax kind into the set.
|
||||
///
|
||||
/// You can only add kinds with discriminator < 128.
|
||||
pub const fn add(self, kind: SyntaxKind) -> Self {
|
||||
assert!((kind as u8) < BITS);
|
||||
Self(self.0 | bit(kind))
|
||||
}
|
||||
|
||||
@ -26,10 +29,12 @@ impl SyntaxSet {
|
||||
|
||||
/// Whether the set contains the given syntax kind.
|
||||
pub const fn contains(&self, kind: SyntaxKind) -> bool {
|
||||
(self.0 & bit(kind)) != 0
|
||||
(kind as u8) < BITS && (self.0 & bit(kind)) != 0
|
||||
}
|
||||
}
|
||||
|
||||
const BITS: u8 = 128;
|
||||
|
||||
const fn bit(kind: SyntaxKind) -> u128 {
|
||||
1 << (kind as usize)
|
||||
}
|
||||
@ -54,7 +59,7 @@ pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new()
|
||||
.add(SyntaxKind::Escape)
|
||||
.add(SyntaxKind::Shorthand)
|
||||
.add(SyntaxKind::SmartQuote)
|
||||
.add(SyntaxKind::Raw)
|
||||
.add(SyntaxKind::RawDelim)
|
||||
.add(SyntaxKind::Link)
|
||||
.add(SyntaxKind::Label)
|
||||
.add(SyntaxKind::Hash)
|
||||
@ -119,7 +124,7 @@ pub const ATOMIC_CODE_PRIMARY: SyntaxSet = SyntaxSet::new()
|
||||
.add(SyntaxKind::Numeric)
|
||||
.add(SyntaxKind::Str)
|
||||
.add(SyntaxKind::Label)
|
||||
.add(SyntaxKind::Raw);
|
||||
.add(SyntaxKind::RawDelim);
|
||||
|
||||
/// Syntax kinds that are unary operators.
|
||||
pub const UNARY_OP: SyntaxSet = SyntaxSet::new()
|
||||
@ -171,11 +176,6 @@ pub const PATTERN_LEAF: SyntaxSet = ATOMIC_CODE_EXPR;
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_size() {
|
||||
assert!((SyntaxKind::Eof as usize) < 128);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set() {
|
||||
let set = SyntaxSet::new().add(SyntaxKind::And).add(SyntaxKind::Or);
|
||||
|
@ -8,7 +8,9 @@ use crate::model::{
|
||||
};
|
||||
use crate::symbols::Symbol;
|
||||
use crate::syntax::ast::{self, AstNode};
|
||||
use crate::text::{LinebreakElem, RawElem, SmartQuoteElem, SpaceElem, TextElem};
|
||||
use crate::text::{
|
||||
LinebreakElem, RawContent, RawElem, SmartQuoteElem, SpaceElem, TextElem,
|
||||
};
|
||||
|
||||
impl Eval for ast::Markup<'_> {
|
||||
type Output = Content;
|
||||
@ -165,9 +167,10 @@ impl Eval for ast::Raw<'_> {
|
||||
type Output = Content;
|
||||
|
||||
fn eval(self, _: &mut Vm) -> SourceResult<Self::Output> {
|
||||
let mut elem = RawElem::new(self.text()).with_block(self.block());
|
||||
let lines = self.lines().map(|line| (line.get().clone(), line.span())).collect();
|
||||
let mut elem = RawElem::new(RawContent::Lines(lines)).with_block(self.block());
|
||||
if let Some(lang) = self.lang() {
|
||||
elem.push_lang(Some(lang.into()));
|
||||
elem.push_lang(Some(lang.get().clone()));
|
||||
}
|
||||
Ok(elem.pack())
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ use crate::foundations::{
|
||||
use crate::layout::{Abs, Angle, Em, Fr, Length, Ratio, Rel};
|
||||
use crate::symbols::Symbol;
|
||||
use crate::syntax::{ast, Span};
|
||||
use crate::text::{RawElem, TextElem};
|
||||
use crate::text::{RawContent, RawElem, TextElem};
|
||||
use crate::util::ArcExt;
|
||||
use crate::visualize::{Color, Gradient, Pattern};
|
||||
|
||||
@ -209,7 +209,7 @@ impl Value {
|
||||
Self::Symbol(v) => TextElem::packed(v.get()),
|
||||
Self::Content(v) => v,
|
||||
Self::Module(module) => module.content(),
|
||||
_ => RawElem::new(self.repr())
|
||||
_ => RawElem::new(RawContent::Text(self.repr()))
|
||||
.with_lang(Some("typc".into()))
|
||||
.with_block(false)
|
||||
.pack(),
|
||||
|
@ -287,7 +287,7 @@ impl SpanMapper {
|
||||
fn span_at(&self, offset: usize) -> (Span, u16) {
|
||||
let mut cursor = 0;
|
||||
for &(len, span) in &self.0 {
|
||||
if (cursor..=cursor + len).contains(&offset) {
|
||||
if (cursor..cursor + len).contains(&offset) {
|
||||
return (span, u16::try_from(offset - cursor).unwrap_or(0));
|
||||
}
|
||||
cursor += len;
|
||||
|
@ -231,6 +231,7 @@ impl<'a> ShapedText<'a> {
|
||||
let decos = TextElem::deco_in(self.styles);
|
||||
let fill = TextElem::fill_in(self.styles);
|
||||
let stroke = TextElem::stroke_in(self.styles);
|
||||
let span_offset = TextElem::span_offset_in(self.styles);
|
||||
|
||||
for ((font, y_offset), group) in
|
||||
self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset))
|
||||
@ -267,6 +268,12 @@ impl<'a> ShapedText<'a> {
|
||||
frame.size_mut().x += justification_left.at(self.size)
|
||||
+ justification_right.at(self.size);
|
||||
|
||||
// We may not be able to reach the offset completely if
|
||||
// it exceeds u16, but better to have a roughly correct
|
||||
// span offset than nothing.
|
||||
let mut span = shaped.span;
|
||||
span.1 = span.1.saturating_add(span_offset.saturating_as());
|
||||
|
||||
// |<---- a Glyph ---->|
|
||||
// -->|ShapedGlyph|<--
|
||||
// +---+-----------+---+
|
||||
@ -293,7 +300,7 @@ impl<'a> ShapedText<'a> {
|
||||
x_offset: shaped.x_offset + justification_left,
|
||||
range: (shaped.range.start - range.start).saturating_as()
|
||||
..(shaped.range.end - range.start).saturating_as(),
|
||||
span: shaped.span,
|
||||
span,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
@ -622,6 +622,12 @@ pub struct TextElem {
|
||||
#[required]
|
||||
pub text: EcoString,
|
||||
|
||||
/// The offset of the text in the text syntax node referenced by this
|
||||
/// element's span.
|
||||
#[internal]
|
||||
#[ghost]
|
||||
pub span_offset: usize,
|
||||
|
||||
/// A delta to apply on the font weight.
|
||||
#[internal]
|
||||
#[fold]
|
||||
|
@ -17,7 +17,7 @@ use crate::foundations::{
|
||||
};
|
||||
use crate::layout::{BlockElem, Em, HAlignment};
|
||||
use crate::model::Figurable;
|
||||
use crate::syntax::{split_newlines, LinkedNode, Spanned};
|
||||
use crate::syntax::{split_newlines, LinkedNode, Span, Spanned};
|
||||
use crate::text::{
|
||||
FontFamily, FontList, Hyphenate, Lang, LinebreakElem, LocalName, Region,
|
||||
SmartQuoteElem, TextElem, TextSize,
|
||||
@ -27,8 +27,9 @@ use crate::visualize::Color;
|
||||
use crate::{syntax, World};
|
||||
|
||||
// Shorthand for highlighter closures.
|
||||
type StyleFn<'a> = &'a mut dyn FnMut(&LinkedNode, Range<usize>, synt::Style) -> Content;
|
||||
type LineFn<'a> = &'a mut dyn FnMut(i64, Range<usize>, &mut Vec<Content>);
|
||||
type StyleFn<'a> =
|
||||
&'a mut dyn FnMut(usize, &LinkedNode, Range<usize>, synt::Style) -> Content;
|
||||
type LineFn<'a> = &'a mut dyn FnMut(usize, Range<usize>, &mut Vec<Content>);
|
||||
|
||||
/// Raw text with optional syntax highlighting.
|
||||
///
|
||||
@ -101,7 +102,7 @@ pub struct RawElem {
|
||||
/// ```
|
||||
/// ````
|
||||
#[required]
|
||||
pub text: EcoString,
|
||||
pub text: RawContent,
|
||||
|
||||
/// Whether the raw text is displayed as a separate block.
|
||||
///
|
||||
@ -300,17 +301,24 @@ impl Packed<RawElem> {
|
||||
#[comemo::memoize]
|
||||
fn highlight(&self, styles: StyleChain) -> Vec<Packed<RawLine>> {
|
||||
let elem = self.as_ref();
|
||||
let span = self.span();
|
||||
|
||||
let mut text = elem.text().clone();
|
||||
if text.contains('\t') {
|
||||
let tab_size = RawElem::tab_size_in(styles);
|
||||
text = align_tabs(&text, tab_size);
|
||||
}
|
||||
let text = elem.text();
|
||||
let lines = match text {
|
||||
RawContent::Lines(lines) if !lines.iter().any(|(s, _)| s.contains('\t')) => {
|
||||
lines.clone()
|
||||
}
|
||||
_ => {
|
||||
let mut text = text.get();
|
||||
if text.contains('\t') {
|
||||
let tab_size = RawElem::tab_size_in(styles);
|
||||
text = align_tabs(&text, tab_size);
|
||||
}
|
||||
let lines = split_newlines(&text);
|
||||
lines.into_iter().map(|line| (line.into(), self.span())).collect()
|
||||
}
|
||||
};
|
||||
|
||||
let lines = split_newlines(&text);
|
||||
let count = lines.len() as i64;
|
||||
|
||||
let lang = elem
|
||||
.lang(styles)
|
||||
.as_ref()
|
||||
@ -332,6 +340,7 @@ impl Packed<RawElem> {
|
||||
|
||||
let mut seq = vec![];
|
||||
if matches!(lang.as_deref(), Some("typ" | "typst" | "typc")) {
|
||||
let text = text.get();
|
||||
let root = match lang.as_deref() {
|
||||
Some("typc") => syntax::parse_code(&text),
|
||||
_ => syntax::parse(&text),
|
||||
@ -341,16 +350,23 @@ impl Packed<RawElem> {
|
||||
&text,
|
||||
LinkedNode::new(&root),
|
||||
synt::Highlighter::new(theme),
|
||||
&mut |_, range, style| styled(&text[range], foreground, style),
|
||||
&mut |i, _, range, style| {
|
||||
// Find start of line.
|
||||
// Note: Dedent is already applied to the text
|
||||
let span_offset = text[..range.start]
|
||||
.rfind('\n')
|
||||
.map_or(0, |i| range.start - (i + 1));
|
||||
styled(&text[range], foreground, style, lines[i].1, span_offset)
|
||||
},
|
||||
&mut |i, range, line| {
|
||||
seq.push(
|
||||
Packed::new(RawLine::new(
|
||||
i + 1,
|
||||
(i + 1) as i64,
|
||||
count,
|
||||
EcoString::from(&text[range]),
|
||||
Content::sequence(line.drain(..)),
|
||||
))
|
||||
.spanned(span),
|
||||
.spanned(lines[i].1),
|
||||
);
|
||||
},
|
||||
)
|
||||
@ -366,33 +382,43 @@ impl Packed<RawElem> {
|
||||
})
|
||||
}) {
|
||||
let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
|
||||
for (i, line) in lines.into_iter().enumerate() {
|
||||
for (i, (line, line_span)) in lines.into_iter().enumerate() {
|
||||
let mut line_content = vec![];
|
||||
for (style, piece) in
|
||||
highlighter.highlight_line(line, syntax_set).into_iter().flatten()
|
||||
let mut span_offset = 0;
|
||||
for (style, piece) in highlighter
|
||||
.highlight_line(line.as_str(), syntax_set)
|
||||
.into_iter()
|
||||
.flatten()
|
||||
{
|
||||
line_content.push(styled(piece, foreground, style));
|
||||
line_content.push(styled(
|
||||
piece,
|
||||
foreground,
|
||||
style,
|
||||
line_span,
|
||||
span_offset,
|
||||
));
|
||||
span_offset += piece.len();
|
||||
}
|
||||
|
||||
seq.push(
|
||||
Packed::new(RawLine::new(
|
||||
i as i64 + 1,
|
||||
count,
|
||||
EcoString::from(line),
|
||||
line,
|
||||
Content::sequence(line_content),
|
||||
))
|
||||
.spanned(span),
|
||||
.spanned(line_span),
|
||||
);
|
||||
}
|
||||
} else {
|
||||
seq.extend(lines.into_iter().enumerate().map(|(i, line)| {
|
||||
seq.extend(lines.into_iter().enumerate().map(|(i, (line, line_span))| {
|
||||
Packed::new(RawLine::new(
|
||||
i as i64 + 1,
|
||||
count,
|
||||
EcoString::from(line),
|
||||
TextElem::packed(line),
|
||||
line.clone(),
|
||||
TextElem::packed(line).spanned(line_span),
|
||||
))
|
||||
.spanned(span)
|
||||
.spanned(line_span)
|
||||
}));
|
||||
};
|
||||
|
||||
@ -478,10 +504,42 @@ impl Figurable for Packed<RawElem> {}
|
||||
|
||||
impl PlainText for Packed<RawElem> {
|
||||
fn plain_text(&self, text: &mut EcoString) {
|
||||
text.push_str(self.text());
|
||||
text.push_str(&self.text().get());
|
||||
}
|
||||
}
|
||||
|
||||
/// The content of the raw text.
|
||||
#[derive(Debug, Clone, Hash, PartialEq)]
|
||||
pub enum RawContent {
|
||||
/// From a string.
|
||||
Text(EcoString),
|
||||
/// From lines of text.
|
||||
Lines(EcoVec<(EcoString, Span)>),
|
||||
}
|
||||
|
||||
impl RawContent {
|
||||
/// Returns or synthesizes the text content of the raw text.
|
||||
fn get(&self) -> EcoString {
|
||||
match self.clone() {
|
||||
RawContent::Text(text) => text,
|
||||
RawContent::Lines(lines) => {
|
||||
let mut lines = lines.into_iter().map(|(s, _)| s);
|
||||
if lines.len() <= 1 {
|
||||
lines.next().unwrap_or_default()
|
||||
} else {
|
||||
lines.collect::<Vec<_>>().join("\n").into()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cast! {
|
||||
RawContent,
|
||||
self => self.get().into_value(),
|
||||
v: EcoString => Self::Text(v),
|
||||
}
|
||||
|
||||
/// A highlighted line of raw text.
|
||||
///
|
||||
/// This is a helper element that is synthesized by [`raw`]($raw) elements.
|
||||
@ -536,7 +594,7 @@ struct ThemedHighlighter<'a> {
|
||||
/// The range of the current line.
|
||||
range: Range<usize>,
|
||||
/// The current line number.
|
||||
line: i64,
|
||||
line: usize,
|
||||
/// The function to style a piece of text.
|
||||
style_fn: StyleFn<'a>,
|
||||
/// The function to append a line.
|
||||
@ -597,8 +655,12 @@ impl<'a> ThemedHighlighter<'a> {
|
||||
|
||||
let offset = self.node.range().start + len;
|
||||
let token_range = offset..(offset + line.len());
|
||||
self.current_line
|
||||
.push((self.style_fn)(&self.node, token_range, style));
|
||||
self.current_line.push((self.style_fn)(
|
||||
self.line,
|
||||
&self.node,
|
||||
token_range,
|
||||
style,
|
||||
));
|
||||
|
||||
len += line.len() + 1;
|
||||
}
|
||||
@ -621,23 +683,33 @@ impl<'a> ThemedHighlighter<'a> {
|
||||
}
|
||||
|
||||
/// Style a piece of text with a syntect style.
|
||||
fn styled(piece: &str, foreground: synt::Color, style: synt::Style) -> Content {
|
||||
let mut body = TextElem::packed(piece);
|
||||
fn styled(
|
||||
piece: &str,
|
||||
foreground: synt::Color,
|
||||
style: synt::Style,
|
||||
span: Span,
|
||||
span_offset: usize,
|
||||
) -> Content {
|
||||
let mut body = TextElem::packed(piece).spanned(span);
|
||||
|
||||
if span_offset > 0 {
|
||||
body = body.styled(TextElem::set_span_offset(span_offset));
|
||||
}
|
||||
|
||||
if style.foreground != foreground {
|
||||
body = body.styled(TextElem::set_fill(to_typst(style.foreground).into()));
|
||||
}
|
||||
|
||||
if style.font_style.contains(synt::FontStyle::BOLD) {
|
||||
body = body.strong();
|
||||
body = body.strong().spanned(span);
|
||||
}
|
||||
|
||||
if style.font_style.contains(synt::FontStyle::ITALIC) {
|
||||
body = body.emph();
|
||||
body = body.emph().spanned(span);
|
||||
}
|
||||
|
||||
if style.font_style.contains(synt::FontStyle::UNDERLINE) {
|
||||
body = body.underlined();
|
||||
body = body.underlined().spanned(span);
|
||||
}
|
||||
|
||||
body
|
||||
|
170
tests/typ/compiler/raw.typ
Normal file
170
tests/typ/compiler/raw.typ
Normal file
@ -0,0 +1,170 @@
|
||||
// Test new raw parser
|
||||
// Ref: false
|
||||
|
||||
---
|
||||
#let empty = (
|
||||
name: "empty",
|
||||
input: ``,
|
||||
text: "",
|
||||
)
|
||||
|
||||
#let backtick = (
|
||||
name: "backtick",
|
||||
input: ``` ` ```,
|
||||
text: "`",
|
||||
block: false,
|
||||
)
|
||||
|
||||
#let lang-backtick = (
|
||||
name: "lang-backtick",
|
||||
input: ```js ` ```,
|
||||
lang: "js",
|
||||
text: "`",
|
||||
block: false,
|
||||
)
|
||||
|
||||
// The language tag stops on space
|
||||
#let lang-space = (
|
||||
name: "lang-space",
|
||||
input: ```js test ```,
|
||||
lang: "js",
|
||||
text: "test ",
|
||||
block: false,
|
||||
)
|
||||
|
||||
// The language tag stops on newline
|
||||
#let lang-newline = (
|
||||
name: "lang-newline",
|
||||
input: ```js
|
||||
test
|
||||
```,
|
||||
lang: "js",
|
||||
text: "test",
|
||||
block: true,
|
||||
)
|
||||
|
||||
// The first line and the last line are ignored
|
||||
#let blocky = (
|
||||
name: "blocky",
|
||||
input: {
|
||||
```
|
||||
test
|
||||
```
|
||||
},
|
||||
text: "test",
|
||||
block: true,
|
||||
)
|
||||
|
||||
// A blocky raw should handle dedents
|
||||
#let blocky-dedent = (
|
||||
name: "blocky-dedent",
|
||||
input: {
|
||||
```
|
||||
test
|
||||
```
|
||||
},
|
||||
text: "test",
|
||||
block: true,
|
||||
)
|
||||
|
||||
// When there is content in the first line, it should exactly eat a whitespace char.
|
||||
#let blocky-dedent-firstline = (
|
||||
name: "blocky-dedent-firstline",
|
||||
input: ``` test
|
||||
```,
|
||||
text: "test",
|
||||
block: true,
|
||||
)
|
||||
|
||||
// When there is content in the first line, it should exactly eat a whitespace char.
|
||||
#let blocky-dedent-firstline2 = (
|
||||
name: "blocky-dedent-firstline2",
|
||||
input: ``` test
|
||||
```,
|
||||
text: "test",
|
||||
block: true,
|
||||
)
|
||||
|
||||
// The first line is not affected by dedent, and the middle lines don't consider the whitespace prefix of the first line.
|
||||
#let blocky-dedent-firstline3 = (
|
||||
name: "blocky-dedent-firstline3",
|
||||
input: ``` test
|
||||
test2
|
||||
```,
|
||||
text: "test\n test2",
|
||||
block: true,
|
||||
)
|
||||
|
||||
// The first line is not affected by dedent, and the middle lines don't consider the whitespace prefix of the first line.
|
||||
#let blocky-dedent-firstline4 = (
|
||||
name: "blocky-dedent-firstline4",
|
||||
input: ``` test
|
||||
test2
|
||||
```,
|
||||
text: " test\ntest2",
|
||||
block: true,
|
||||
)
|
||||
|
||||
#let blocky-dedent-lastline = (
|
||||
name: "blocky-dedent-lastline",
|
||||
input: ```
|
||||
test
|
||||
```,
|
||||
text: " test",
|
||||
block: true,
|
||||
)
|
||||
|
||||
#let blocky-dedent-lastline2 = (
|
||||
name: "blocky-dedent-lastline2",
|
||||
input: ```
|
||||
test
|
||||
```,
|
||||
text: "test",
|
||||
block: true,
|
||||
)
|
||||
|
||||
#let blocky-tab = (
|
||||
name: "blocky-tab",
|
||||
input: {
|
||||
```
|
||||
test
|
||||
```
|
||||
},
|
||||
text: "\ttest",
|
||||
block: true,
|
||||
)
|
||||
|
||||
#let blocky-tab-dedent = (
|
||||
name: "blocky-tab-dedent",
|
||||
input: {
|
||||
```
|
||||
test
|
||||
|
||||
```
|
||||
},
|
||||
text: "test\n ",
|
||||
block: true,
|
||||
)
|
||||
|
||||
#let cases = (
|
||||
empty,
|
||||
backtick,
|
||||
lang-backtick,
|
||||
lang-space,
|
||||
lang-newline,
|
||||
blocky,
|
||||
blocky-dedent,
|
||||
blocky-dedent-firstline,
|
||||
blocky-dedent-firstline2,
|
||||
blocky-dedent-firstline3,
|
||||
blocky-dedent-lastline,
|
||||
blocky-dedent-lastline2,
|
||||
blocky-tab,
|
||||
blocky-tab-dedent,
|
||||
)
|
||||
|
||||
#for c in cases {
|
||||
assert.eq(c.text, c.input.text, message: "in point " + c.name + ", expect " + repr(c.text) + ", got " + repr(c.input.text) + "")
|
||||
let block = c.at("block", default: false)
|
||||
assert.eq(block, c.input.block, message: "in point " + c.name + ", expect " + repr(block) + ", got " + repr(c.input.block) + "")
|
||||
}
|
Loading…
Reference in New Issue
Block a user