support quote-word parsing for one-line format

so that parsing CD ROM repositories with spaces in the name works too.
But it's not limited to that, and should make one-line parsing rather
similar to what APT does (stanza parsing in APT doesn't use
ParseQuoteWord at all AFAICS).

Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
This commit is contained in:
Fabian Ebner 2021-07-01 15:46:34 +02:00 committed by Thomas Lamprecht
parent 8265d0ce33
commit ae7e2360b7
6 changed files with 186 additions and 38 deletions

View File

@ -1,7 +1,6 @@
use std::convert::TryInto;
use std::io::BufRead;
use std::iter::{Iterator, Peekable};
use std::str::SplitAsciiWhitespace;
use std::iter::Iterator;
use anyhow::{bail, format_err, Error};
@ -9,6 +8,78 @@ use crate::repositories::{APTRepository, APTRepositoryFileType, APTRepositoryOpt
use super::APTRepositoryParser;
// TODO convert %-escape characters. Also adapt printing back accordingly,
// because at least '%' needs to be re-escaped when printing.
/// See APT's ParseQuoteWord in contrib/strutl.cc
///
/// Doesn't split on whitespace when between `[]` or `""` and strips `"` from the word.
///
/// Currently, %-escaped characters are not interpreted, but passed along as is.
struct SplitQuoteWord {
rest: String,
position: usize,
}
impl SplitQuoteWord {
pub fn new(string: String) -> Self {
Self {
rest: string,
position: 0,
}
}
}
impl Iterator for SplitQuoteWord {
type Item = Result<String, Error>;
fn next(&mut self) -> Option<Self::Item> {
let rest = &self.rest[self.position..];
let mut start = None;
let mut wait_for = None;
for (n, c) in rest.chars().enumerate() {
self.position += 1;
if let Some(wait_for_char) = wait_for {
if wait_for_char == c {
wait_for = None;
}
continue;
}
if char::is_ascii_whitespace(&c) {
if let Some(start) = start {
return Some(Ok(rest[start..n].replace('"', "")));
}
continue;
}
if start == None {
start = Some(n);
}
if c == '"' {
wait_for = Some('"');
}
if c == '[' {
wait_for = Some(']');
}
}
if let Some(wait_for) = wait_for {
return Some(Err(format_err!("missing terminating '{}'", wait_for)));
}
if let Some(start) = start {
return Some(Ok(rest[start..].replace('"', "")));
}
None
}
}
pub struct APTListFileParser<R: BufRead> {
input: R,
line_nr: usize,
@ -31,24 +102,18 @@ impl<R: BufRead> APTListFileParser<R> {
/// Errors when options are invalid or not closed by `']'`.
fn parse_options(
options: &mut Vec<APTRepositoryOption>,
tokens: &mut Peekable<SplitAsciiWhitespace>,
tokens: &mut SplitQuoteWord,
) -> Result<(), Error> {
let mut option = match tokens.peek() {
Some(token) => {
match token.strip_prefix('[') {
Some(option) => option,
None => return Ok(()), // doesn't look like options
}
}
None => return Ok(()),
};
tokens.next(); // avoid reading the beginning twice
let mut finished = false;
loop {
let mut option = match tokens.next() {
Some(token) => token?,
None => bail!("options not closed by ']'"),
};
if let Some(stripped) = option.strip_suffix(']') {
option = stripped;
option = stripped.to_string();
if option.is_empty() {
break;
}
@ -83,11 +148,6 @@ impl<R: BufRead> APTListFileParser<R> {
if finished {
break;
}
option = match tokens.next() {
Some(option) => option,
None => bail!("options not closed by ']'"),
}
}
Ok(())
@ -122,24 +182,43 @@ impl<R: BufRead> APTListFileParser<R> {
line = line_start;
}
let mut tokens = line.split_ascii_whitespace().peekable();
match tokens.next() {
Some(package_type) => {
// e.g. quoted "deb" is not accepted by APT, so no need for quote word parsing here
line = match line.split_once(|c| char::is_ascii_whitespace(&c)) {
Some((package_type, rest)) => {
repo.types.push(package_type.try_into()?);
rest
}
None => return Ok(None), // empty line
};
line = line.trim_start_matches(|c| char::is_ascii_whitespace(&c));
let has_options = match line.strip_prefix('[') {
Some(rest) => {
// avoid the start of the options to be interpreted as the start of a quote word
line = rest;
true
}
None => false,
};
let mut tokens = SplitQuoteWord::new(line.to_string());
if has_options {
Self::parse_options(&mut repo.options, &mut tokens)?;
}
Self::parse_options(&mut repo.options, &mut tokens)?;
// the rest of the line is just '<uri> <suite> [<components>...]'
let mut tokens = tokens.map(str::to_string);
repo.uris
.push(tokens.next().ok_or_else(|| format_err!("missing URI"))?);
repo.suites
.push(tokens.next().ok_or_else(|| format_err!("missing suite"))?);
repo.components.extend(tokens);
.push(tokens.next().ok_or_else(|| format_err!("missing URI"))??);
repo.suites.push(
tokens
.next()
.ok_or_else(|| format_err!("missing suite"))??,
);
for token in tokens {
repo.components.push(token?);
}
repo.comment = std::mem::take(&mut self.comment);

View File

@ -433,6 +433,41 @@ fn suite_variant(suite: &str) -> (&str, &str) {
(suite, "")
}
/// Strips existing double quotes from the string first, and then adds double quotes at
/// the beginning and end if there is an ASCII whitespace in the `string`, which is not
/// escaped by `[]`.
fn quote_for_one_line(string: &str) -> String {
let mut add_quotes = false;
let mut wait_for_bracket = false;
// easier to just quote the whole string, so ignore pre-existing quotes
// currently, parsing removes them anyways, but being on the safe side is rather cheap
let string = string.replace('"', "");
for c in string.chars() {
if wait_for_bracket {
if c == ']' {
wait_for_bracket = false;
}
continue;
}
if char::is_ascii_whitespace(&c) {
add_quotes = true;
break;
}
if c == '[' {
wait_for_bracket = true;
}
}
match add_quotes {
true => format!("\"{}\"", string),
false => string,
}
}
/// Writes a repository in one-line format followed by a blank line.
///
/// Expects that `repo.file_type == APTRepositoryFileType::List`.
@ -457,15 +492,26 @@ fn write_one_line(repo: &APTRepository, w: &mut dyn Write) -> Result<(), Error>
if !repo.options.is_empty() {
write!(w, "[ ")?;
repo.options
.iter()
.try_for_each(|option| write!(w, "{}={} ", option.key, option.values.join(",")))?;
for option in repo.options.iter() {
let option = quote_for_one_line(&format!("{}={}", option.key, option.values.join(",")));
write!(w, "{} ", option)?;
}
write!(w, "] ")?;
};
write!(w, "{} ", repo.uris[0])?;
write!(w, "{} ", repo.suites[0])?;
writeln!(w, "{}", repo.components.join(" "))?;
write!(w, "{} ", quote_for_one_line(&repo.uris[0]))?;
write!(w, "{} ", quote_for_one_line(&repo.suites[0]))?;
writeln!(
w,
"{}",
repo.components
.iter()
.map(|comp| quote_for_one_line(comp))
.collect::<Vec<String>>()
.join(" ")
)?;
writeln!(w)?;

View File

@ -0,0 +1,10 @@
# deb [ trusted=yes ] cdrom:[Proxmox VE 5.1]/ stretch pve
# deb [ trusted=yes ] cdrom:[Proxmox VE 5.1]/proxmox/packages/ /
deb [ trusted=yes ] cdrom:[Proxmox VE 7.0 BETA]/ bullseye pve
deb cdrom:[Proxmox VE 7.0 BETA]/proxmox/packages/ /
deb [ trusted=yes ] cdrom:[Debian GNU/Linux 10.6.0 _Buster_ - Official amd64 NETINST 20200926-10:16]/ buster main

View File

@ -0,0 +1,4 @@
deb [ trusted=yes ] "file:///some/spacey/mount point/" bullseye pve
deb [ lang=it ] "file:///some/spacey/mount point/proxmox/packages/" /

View File

@ -0,0 +1,7 @@
#deb [trusted=yes] cdrom:[Proxmox VE 5.1]/ stretch pve
#deb [trusted=yes] cdrom:[Proxmox VE 5.1]/proxmox/packages/ /
deb [trusted=yes] cdrom:[Proxmox VE 7.0 BETA]/ bullseye pve
deb cdrom:[Proxmox VE 7.0 BETA]/proxmox/packages/ /
deb [ "trusted=yes" ] cdrom:[Debian GNU/Linux 10.6.0 _Buster_ - Official amd64 NETINST 20200926-10:16]/ buster main

View File

@ -0,0 +1,2 @@
deb [trusted=yes] "file:///some/spacey/mount point/" bullseye pve
deb [lang="it"] file:///some/spacey/"mount point"/proxmox/packages/ /