From dfa043ae1f6a57f23137af8c03717fe19231b3f9 Mon Sep 17 00:00:00 2001 From: msizanoen1 Date: Wed, 1 Mar 2023 17:35:17 +0700 Subject: [PATCH] escape: Ensure that output is always valid UTF-8 This ensures that shell string escape operations will not produce output with invalid UTF-8 from the input by escaping invalid UTF-8 data as if they were single byte characters. (cherry picked from commit 00f57157f32f6ed5a68d68986b013c203cd78c37) (cherry picked from commit e906fd24214f53f1160918a5bb55a1d14368bfd8) (cherry picked from commit e0a674f7f8ed934eb3b600f09b0ca75a9579293c) --- src/basic/escape.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/basic/escape.c b/src/basic/escape.c index e04b435d5b..317e2786d1 100644 --- a/src/basic/escape.c +++ b/src/basic/escape.c @@ -474,14 +474,20 @@ char* octescape(const char *s, size_t len) { static char* strcpy_backslash_escaped(char *t, const char *s, const char *bad) { assert(bad); - for (; *s; s++) - if (char_is_cc(*s)) - t += cescape_char(*s, t); - else { + while (*s) { + int l = utf8_encoded_valid_unichar(s, SIZE_MAX); + + if (char_is_cc(*s) || l < 0) + t += cescape_char(*(s++), t); + else if (l == 1) { if (*s == '\\' || strchr(bad, *s)) *(t++) = '\\'; - *(t++) = *s; + *(t++) = *(s++); + } else { + t = mempcpy(t, s, l); + s += l; } + } return t; } @@ -510,11 +516,16 @@ char* shell_maybe_quote(const char *s, ShellEscapeFlags flags) { if (FLAGS_SET(flags, SHELL_ESCAPE_EMPTY) && isempty(s)) return strdup("\"\""); /* We don't use $'' here in the POSIX mode. "" is fine too. */ - for (p = s; *p; p++) - if (char_is_cc(*p) || + for (p = s; *p; ) { + int l = utf8_encoded_valid_unichar(p, SIZE_MAX); + + if (char_is_cc(*p) || l < 0 || strchr(WHITESPACE SHELL_NEED_QUOTES, *p)) break; + p += l; + } + if (!*p) return strdup(s);