[Feature] Allow function as argument for string.replace (#944)

This commit is contained in:
dvdvgt 2023-05-03 11:41:50 +02:00 committed by GitHub
parent edc0632d8c
commit be3c4d7876
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 129 additions and 17 deletions

View File

@ -361,8 +361,8 @@ string and returns the resulting string.
- pattern: string or regex (positional, required)
The pattern to search for.
- replacement: string (positional, required)
The string to replace the matches with.
- replacement: string or function (positional, required)
The string to replace the matches with or a function that is passed a match dictionary if a regex was used.
- count: integer (named)
If given, only the first `count` matches of the pattern are placed.
- returns: string

View File

@ -56,9 +56,9 @@ pub fn call(
"matches" => Value::Array(string.matches(args.expect("pattern")?)),
"replace" => {
let pattern = args.expect("pattern")?;
let with = args.expect("replacement string")?;
let with = args.expect("string or function")?;
let count = args.named("count")?;
Value::Str(string.replace(pattern, with, count))
Value::Str(string.replace(vm, pattern, with, count)?)
}
"trim" => {
let pattern = args.eat()?;

View File

@ -6,8 +6,9 @@ use std::ops::{Add, AddAssign, Deref};
use ecow::EcoString;
use unicode_segmentation::UnicodeSegmentation;
use super::{cast_from_value, dict, Array, Dict, Value};
use crate::diag::StrResult;
use super::{cast_from_value, dict, Array, Dict, Func, Value, Vm};
use crate::diag::{At, SourceResult, StrResult};
use crate::eval::Args;
use crate::geom::GenAlign;
/// Create a new [`Str`] from a format string.
@ -257,16 +258,71 @@ impl Str {
}
/// Replace at most `count` occurrences of the given pattern with a
/// replacement string (beginning from the start).
pub fn replace(&self, pattern: StrPattern, with: Self, count: Option<usize>) -> Self {
match pattern {
StrPattern::Str(pat) => match count {
Some(n) => self.0.replacen(pat.as_str(), &with, n).into(),
None => self.0.replace(pat.as_str(), &with).into(),
},
StrPattern::Regex(re) => match count {
Some(n) => re.replacen(self, n, with.as_str()).into(),
None => re.replace(self, with.as_str()).into(),
/// replacement string or function (beginning from the start). If no count is given,
/// all occurrences are replaced.
pub fn replace(
&self,
vm: &mut Vm,
pattern: StrPattern,
with: Replacement,
count: Option<usize>,
) -> SourceResult<Self> {
match with {
Replacement::Func(func) => {
// heuristic: assume the new string is about the same length as the current string
let mut new = String::with_capacity(self.as_str().len());
let mut last_match = 0;
match &pattern {
StrPattern::Str(pat) => {
let matches = self
.0
.match_indices(pat.as_str())
.map(|(start, s)| (start, start + s.len(), s))
.take(count.unwrap_or(usize::MAX));
for (start, end, text) in matches {
// push everything until the match
new.push_str(&self.as_str()[last_match..start]);
let args = Args::new(
func.span(),
[match_to_dict((start, text)).into()],
);
let res =
func.call_vm(vm, args)?.cast::<Str>().at(func.span())?;
new.push_str(res.as_str());
last_match = end;
}
}
StrPattern::Regex(re) => {
let all_captures =
re.captures_iter(self).take(count.unwrap_or(usize::MAX));
for caps in all_captures {
// `caps.get(0)` returns the entire match over all capture groups
let (start, end) =
caps.get(0).map(|c| (c.start(), c.end())).unwrap();
// push everything until the match
new.push_str(&self.as_str()[last_match..start]);
let args =
Args::new(func.span(), [captures_to_dict(caps).into()]);
let res =
func.call_vm(vm, args)?.cast::<Str>().at(func.span())?;
new.push_str(res.as_str());
last_match = end;
}
}
}
// push the remainder
new.push_str(&self.as_str()[last_match..]);
Ok(new.into())
}
Replacement::Str(s) => match pattern {
StrPattern::Str(pat) => match count {
Some(n) => Ok(self.0.replacen(pat.as_str(), &s, n).into()),
None => Ok(self.0.replace(pat.as_str(), &s).into()),
},
StrPattern::Regex(re) => match count {
Some(n) => Ok(re.replacen(self, n, s.as_str()).into()),
None => Ok(re.replace_all(self, s.as_str()).into()),
},
},
}
}
@ -521,3 +577,18 @@ cast_from_value! {
_ => Err("expected either `start` or `end`")?,
},
}
/// A replacement for a matched [`Str`]
pub enum Replacement {
/// A string a match is replaced with.
Str(Str),
/// Function of type Dict -> Str (see `captures_to_dict` or `match_to_dict`)
/// whose output is inserted for the match.
Func(Func),
}
cast_from_value! {
Replacement,
text: Str => Self::Str(text),
func: Func => Self::Func(func)
}

View File

@ -111,7 +111,7 @@
#test(timesum("1:20, 2:10, 0:40"), "4:10")
---
// Test the `replace` method.
// Test the `replace` method with `Str` replacements.
#test("ABC".replace("", "-"), "-A-B-C-")
#test("Ok".replace("Ok", "Nope", count: 0), "Ok")
#test("to add?".replace("", "How ", count: 1), "How to add?")
@ -126,6 +126,47 @@
#test("123".replace(regex("\d$"), "_"), "12_")
#test("123".replace(regex("\d{1,2}$"), "__"), "1__")
---
// Test the `replace` method with `Func` replacements.
#test("abc".replace(regex("[a-z]"), m => {
str(m.start) + m.text + str(m.end)
}), "0a11b22c3")
#test("abcd, efgh".replace(regex("\w+"), m => {
upper(m.text)
}), "ABCD, EFGH")
#test("hello : world".replace(regex("^(.+)\s*(:)\s*(.+)$"), m => {
upper(m.captures.at(0)) + m.captures.at(1) + " " + upper(m.captures.at(2))
}), "HELLO : WORLD")
#test("hello world, lorem ipsum".replace(regex("(\w+) (\w+)"), m => {
m.captures.at(1) + " " + m.captures.at(0)
}), "world hello, ipsum lorem")
#test("hello world, lorem ipsum".replace(regex("(\w+) (\w+)"), count: 1, m => {
m.captures.at(1) + " " + m.captures.at(0)
}), "world hello, lorem ipsum")
#test("123 456".replace(regex("[a-z]+"), "a"), "123 456")
#test("abc".replace("", m => "-"), "-a-b-c-")
#test("abc".replace("", m => "-", count: 1), "-abc")
#test("123".replace("abc", m => ""), "123")
#test("123".replace("abc", m => "", count: 2), "123")
#test("a123b123c".replace("123", m => {
str(m.start) + "-" + str(m.end)
}), "a1-4b5-8c")
#test("halla warld".replace("a", m => {
if m.start == 1 { "e" }
else if m.start == 4 or m.start == 7 { "o" }
}), "hello world")
#test("aaa".replace("a", m => str(m.captures.len())), "000")
---
// Error: 23-24 expected string, found integer
#"123".replace("123", m => 1)
---
// Error: 23-32 expected string or function, found array
#"123".replace("123", (1, 2, 3))
---
// Test the `trim` method.
#let str = "Typst, LaTeX, Word, InDesign"