diff --git a/src/common.cpp b/src/common.cpp index fc1328849..1fdd12f5e 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -332,38 +332,13 @@ wcstring str2wcstring(const std::string &in, size_t len) { return str2wcs_internal(in.data(), len); } -/// This function is distinguished from wcs2str_internal in that it allows embedded null bytes. std::string wcs2string(const wcstring &input) { std::string result; result.reserve(input.size()); - - mbstate_t state = {}; - char converted[MB_LEN_MAX]; - - for (auto wc : input) { - if (wc == INTERNAL_SEPARATOR) { - // do nothing - } else if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) { - result.push_back(wc - ENCODE_DIRECT_BASE); - } else if (MB_CUR_MAX == 1) { // single-byte locale (C/POSIX/ISO-8859) - // If `wc` contains a wide character we emit a question-mark. - if (wc & ~0xFF) { - wc = '?'; - } - converted[0] = wc; - result.append(converted, 1); - } else { - std::memset(converted, 0, sizeof converted); - size_t len = std::wcrtomb(converted, wc, &state); - if (len == static_cast(-1)) { - FLOGF(char_encoding, L"Wide character U+%4X has no narrow representation", wc); - std::memset(&state, 0, sizeof(state)); - } else { - result.append(converted, len); - } - } - } - + wcs2string_callback(input.data(), input.size(), [&](const char *buff, size_t bufflen) { + result.append(buff, bufflen); + return true; + }); return result; } diff --git a/src/wcstringutil.cpp b/src/wcstringutil.cpp index 9a75132c2..a433f6fba 100644 --- a/src/wcstringutil.cpp +++ b/src/wcstringutil.cpp @@ -8,6 +8,7 @@ #include #include "common.h" +#include "flog.h" wcstring_range wcstring_tok(wcstring &str, const wcstring &needle, wcstring_range last) { using size_type = wcstring::size_type; @@ -196,3 +197,7 @@ wcstring join_strings(const wcstring_list_t &vals, wchar_t sep) { } return result; } + +void wcs2string_bad_char(wchar_t wc) { + FLOGF(char_encoding, L"Wide character U+%4X has no narrow representation", wc); +} diff --git a/src/wcstringutil.h b/src/wcstringutil.h index e18e6cee5..63dd91292 100644 --- a/src/wcstringutil.h +++ b/src/wcstringutil.h @@ -3,10 +3,12 @@ #define FISH_WCSTRINGUTIL_H #include +#include #include #include #include "common.h" +#include "expand.h" /// Test if a string prefixes another. Returns true if a is a prefix of b. bool string_prefixes_string(const wcstring &proposed_prefix, const wcstring &value); @@ -136,6 +138,47 @@ wcstring trim(wcstring input, const wchar_t *any_of); /// Converts a string to lowercase. wcstring wcstolower(wcstring input); +// Out-of-line helper for wcs2string_callback. +void wcs2string_bad_char(wchar_t); + +/// Implementation of wcs2string that accepts a callback. +/// This invokes \p func with (const char*, size_t) pairs. +/// If \p func returns false, it stops; otherwise it continues. +/// \return false if the callback returned false, otherwise true. +template +bool wcs2string_callback(const wchar_t *input, size_t len, const Func &func) { + mbstate_t state = {}; + char converted[MB_LEN_MAX]; + + for (size_t i = 0; i < len; i++) { + wchar_t wc = input[i]; + // TODO: this doesn't seem sound. + if (wc == INTERNAL_SEPARATOR) { + // do nothing + } else if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) { + converted[0] = wc - ENCODE_DIRECT_BASE; + if (!func(converted, 1)) return false; + } else if (MB_CUR_MAX == 1) { // single-byte locale (C/POSIX/ISO-8859) + // If `wc` contains a wide character we emit a question-mark. + if (wc & ~0xFF) { + wc = '?'; + } + converted[0] = wc; + if (!func(converted, 1)) return false; + } else { + std::memset(converted, 0, sizeof converted); + size_t len = std::wcrtomb(converted, wc, &state); + if (len == static_cast(-1)) { + wcs2string_bad_char(wc); + std::memset(&state, 0, sizeof(state)); + } else { + if (!func(converted, len)) return false; + } + } + } + return true; +} + /// Support for iterating over a newline-separated string. template class line_iterator_t {