feat: multiple matches per line

This commit is contained in:
alexpasmantier 2024-07-26 13:54:18 +02:00
parent 2e5ad3692a
commit 65db10c1d2
3 changed files with 82 additions and 40 deletions

View File

@ -3,6 +3,7 @@ use clap::Parser;
use crossbeam::queue::ArrayQueue;
use ignore::DirEntry;
use printer::PrinterConfig;
use search::build_searcher;
use crate::cli::{process_cli_args, Cli};
use crate::fs::walk_builder;
@ -33,7 +34,8 @@ pub fn main() -> anyhow::Result<()> {
let file_type = entry.file_type().unwrap();
if !file_type.is_dir() {
let path = entry.path().to_path_buf();
match search_file(path, &matcher, cli_args.multiline) {
let mut searcher = build_searcher(cli_args.multiline);
match search_file(path, &matcher, &mut searcher) {
Ok(file_results) => {
if !file_results.is_empty() {
queue.push(file_results).unwrap();

View File

@ -121,30 +121,43 @@ impl Printer {
fn write_colored_search_results(&mut self, results: Vec<SearchResult>) -> Result<()> {
results.iter().try_for_each(|result| {
self.buffer
.set_color(&self.config.color_specs.line_numbers)?;
write!(&mut self.buffer, "{}:\t", result.line_number)?;
self.buffer.set_color(&self.config.color_specs.lines)?;
write!(
&mut self.buffer,
"{}",
result.line[..result.match_range.start].to_string()
)?;
self.buffer.set_color(&self.config.color_specs.matched)?;
write!(
&mut self.buffer,
"{}",
&result.line[result.match_range.start..result.match_range.end]
)?;
self.buffer.set_color(&self.config.color_specs.lines)?;
write!(
&mut self.buffer,
"{}",
&result.line[result.match_range.end..]
)
self.write_colored_line(result)?;
Ok(())
})
}
fn write_colored_line(&mut self, result: &SearchResult) -> Result<()> {
self.buffer
.set_color(&self.config.color_specs.line_numbers)?;
write!(&mut self.buffer, "{}:\t", result.line_number)?;
self.write_colored_matches(result)
}
fn write_colored_matches(&mut self, result: &SearchResult) -> Result<()> {
self.buffer.set_color(&self.config.color_specs.lines)?;
let mut last_end_offset = 0;
result
.matches
.iter()
.try_for_each(|match_range| -> Result<()> {
write!(
&mut self.buffer,
"{}",
&result.line[last_end_offset..match_range.start]
)?;
self.buffer.set_color(&self.config.color_specs.matched)?;
write!(
&mut self.buffer,
"{}",
&result.line[match_range.start..match_range.end]
)?;
self.buffer.set_color(&self.config.color_specs.lines)?;
last_end_offset = match_range.end;
Ok(())
})?;
write!(&mut self.buffer, "{}", &result.line[last_end_offset..])
}
fn writeln_to_buffer(&mut self, text: String) -> Result<()> {
writeln!(self.buffer, "{}", text)
}

View File

@ -14,7 +14,7 @@ pub struct SearchResult {
pub line: String,
pub line_start: u64,
pub line_end: u64,
pub match_range: MatchRange,
pub matches: Vec<MatchRange>,
}
#[derive(Serialize, Debug, Clone)]
@ -175,37 +175,64 @@ impl<'a> IntoIterator for &'a FileResults {
}
}
struct PartialSearchResult {
pub line_number: u64,
pub line: String,
pub m: MatchRange,
}
pub fn search_file<'a>(
path: PathBuf,
matcher: &RegexMatcher,
multiline: bool,
searcher: &mut Searcher,
) -> anyhow::Result<FileResults> {
let mut matches: Vec<SearchResult> = Vec::new();
let mut partial_results: Vec<PartialSearchResult> = Vec::new();
let mut searcher = build_searcher(multiline);
// PERF: we could use search_file instead and handle IO ourselves
// this would allow us to:
// - search the file in parallel (chunking)
// - pre-allocate the results vector based on file size / number of lines
searcher.search_path(
&matcher,
&path,
UTF8(|lnum, line| match matcher.find(line.as_bytes()) {
Ok(Some(m)) => {
matches.push(SearchResult {
// TODO: use find_iter instead of find to find multiple matches per line
UTF8(|lnum, line| {
matcher.find_iter(line.as_bytes(), |m| {
partial_results.push(PartialSearchResult {
line_number: lnum,
line: line.to_string(),
line_start: lnum,
line_end: lnum + line.matches('\n').count() as u64 - 1,
match_range: MatchRange::from_match(m),
m: MatchRange::from_match(m),
});
Ok(true)
}
Ok(None) => Ok(false),
Err(err) => Err(err.into()),
true
})?;
Ok(true)
}),
)?;
Ok(FileResults {
path,
results: matches,
})
let mut results = vec![SearchResult {
line_number: partial_results[0].line_number,
line: partial_results[0].line.clone(),
line_start: partial_results[0].line_number,
line_end: partial_results[0].line_number,
matches: vec![partial_results[0].m.clone()],
}];
for partial_result in partial_results[1..].iter() {
let last_result = results.last_mut().unwrap();
if last_result.line_number != partial_result.line_number {
results.push(SearchResult {
line_number: partial_result.line_number,
line: partial_result.line.clone(),
line_start: partial_result.line_number,
line_end: partial_result.line_number,
matches: vec![partial_result.m.clone()],
});
} else {
last_result.matches.push(partial_result.m.clone());
last_result.line_end = partial_result.line_number;
}
}
Ok(FileResults { path, results })
}
pub fn build_matcher(patterns: &Vec<String>) -> anyhow::Result<RegexMatcher> {