From fb92ad946b6df18797ad55f66e5b05fefa932eb5 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 14 Feb 2021 13:15:29 -0800 Subject: [PATCH] Rework null terminated arrays Several functions including wgetopt and execve operate on null-terminated arrays of nul-terminated pointers: a list of pointers to C strings where the last pointer is null. Prior to this change, each process_t stored its argv in such an array. This had two problems: 1. It was awkward to work with this type, instead of using std::vector, etc. 2. The process's arguments would be rearranged by builtins which is surprising Our null terminated arrays were built around a fancy type that would copy input strings and also generate an array of pointers to them, in one big allocation. Switch to a new model where we construct an array of pointers over existing strings. So you can supply a `vector` and now `null_terminated_array_t` will just make a list of pointers to them. Now processes can just store their argv in a familiar wcstring_list_t. --- src/builtin.cpp | 22 +++--- src/builtin.h | 2 +- src/builtin_source.cpp | 8 ++- src/env.cpp | 15 ++-- src/env.h | 2 +- src/exec.cpp | 23 ++++--- src/fish_tests.cpp | 14 ++-- src/null_terminated_array.cpp | 77 ++------------------- src/null_terminated_array.h | 125 ++++++++++++++++------------------ src/parse_util.cpp | 4 +- src/parse_util.h | 2 +- src/proc.h | 42 ++++-------- 12 files changed, 127 insertions(+), 209 deletions(-) diff --git a/src/builtin.cpp b/src/builtin.cpp index c660defe2..3bfce8ed1 100644 --- a/src/builtin.cpp +++ b/src/builtin.cpp @@ -453,25 +453,25 @@ bool builtin_exists(const wcstring &cmd) { return static_cast(builtin_look /// Is the command a keyword we need to special-case the handling of `-h` and `--help`. static const wchar_t *const help_builtins[] = {L"for", L"while", L"function", L"if", L"end", L"switch", L"case"}; -static bool cmd_needs_help(const wchar_t *cmd) { return contains(help_builtins, cmd); } +static bool cmd_needs_help(const wcstring &cmd) { return contains(help_builtins, cmd); } /// Execute a builtin command -proc_status_t builtin_run(parser_t &parser, const wchar_t **argv, io_streams_t &streams) { - UNUSED(parser); - UNUSED(streams); - if (argv == nullptr || argv[0] == nullptr) - return proc_status_t::from_exit_code(STATUS_INVALID_ARGS); +proc_status_t builtin_run(parser_t &parser, const wcstring_list_t &argv, io_streams_t &streams) { + if (argv.empty()) return proc_status_t::from_exit_code(STATUS_INVALID_ARGS); + const wcstring &cmdname = argv.front(); // We can be handed a keyword by the parser as if it was a command. This happens when the user // follows the keyword by `-h` or `--help`. Since it isn't really a builtin command we need to // handle displaying help for it here. - if (argv[1] && !argv[2] && parse_util_argument_is_help(argv[1]) && cmd_needs_help(argv[0])) { - builtin_print_help(parser, streams, argv[0]); + if (argv.size() == 2 && parse_util_argument_is_help(argv[1]) && cmd_needs_help(cmdname)) { + builtin_print_help(parser, streams, cmdname.c_str()); return proc_status_t::from_exit_code(STATUS_CMD_OK); } - if (const builtin_data_t *data = builtin_lookup(argv[0])) { - maybe_t ret = data->func(parser, streams, argv); + if (const builtin_data_t *data = builtin_lookup(cmdname)) { + // Construct the permutable argv array which the builtin expects. + null_terminated_array_t argv_arr(argv); + maybe_t ret = data->func(parser, streams, argv_arr.get()); if (!ret) { return proc_status_t::empty(); } @@ -486,7 +486,7 @@ proc_status_t builtin_run(parser_t &parser, const wchar_t **argv, io_streams_t & return proc_status_t::from_exit_code(code); } - FLOGF(error, UNKNOWN_BUILTIN_ERR_MSG, argv[0]); + FLOGF(error, UNKNOWN_BUILTIN_ERR_MSG, cmdname.c_str()); return proc_status_t::from_exit_code(STATUS_CMD_ERROR); } diff --git a/src/builtin.h b/src/builtin.h index fece694aa..ef4520229 100644 --- a/src/builtin.h +++ b/src/builtin.h @@ -85,7 +85,7 @@ enum { COMMAND_NOT_BUILTIN, BUILTIN_REGULAR, BUILTIN_FUNCTION }; void builtin_init(); bool builtin_exists(const wcstring &cmd); -proc_status_t builtin_run(parser_t &parser, const wchar_t **argv, io_streams_t &streams); +proc_status_t builtin_run(parser_t &parser, const wcstring_list_t &argv, io_streams_t &streams); wcstring_list_t builtin_get_names(); void builtin_get_names(completion_list_t *list); diff --git a/src/builtin_source.cpp b/src/builtin_source.cpp index 2c2aa3c91..8b4bdce73 100644 --- a/src/builtin_source.cpp +++ b/src/builtin_source.cpp @@ -93,10 +93,14 @@ maybe_t builtin_source(parser_t &parser, io_streams_t &streams, const wchar auto &ld = parser.libdata(); scoped_push filename_push{&ld.current_filename, fn_intern}; + // Construct argv from our null-terminated list. // This is slightly subtle. If this is a bare `source` with no args then `argv + optind` already // points to the end of argv. Otherwise we want to skip the file name to get to the args if any. - wcstring_list_t argv_list = - null_terminated_array_t::to_list(argv + optind + (argc == optind ? 0 : 1)); + wcstring_list_t argv_list; + const wchar_t *const *remaining_args = argv + optind + (argc == optind ? 0 : 1); + for (size_t i = 0, len = null_terminated_array_length(remaining_args); i < len; i++) { + argv_list.push_back(remaining_args[i]); + } parser.vars().set_argv(std::move(argv_list)); retval = reader_read(parser, fd, streams.io_chain ? *streams.io_chain : io_chain_t()); diff --git a/src/env.cpp b/src/env.cpp index 6b2b2da7f..db6361241 100644 --- a/src/env.cpp +++ b/src/env.cpp @@ -534,7 +534,7 @@ class env_scoped_impl_t : public environment_t { ~env_scoped_impl_t() override = default; - std::shared_ptr> export_array(); + std::shared_ptr export_array(); env_scoped_impl_t(env_scoped_impl_t &&) = delete; env_scoped_impl_t(const env_scoped_impl_t &) = delete; @@ -552,7 +552,7 @@ class env_scoped_impl_t : public environment_t { perproc_data_t perproc_data_{}; // Exported variable array used by execv. - std::shared_ptr> export_array_{}; + std::shared_ptr export_array_{}; // Cached list of export generations corresponding to the above export_array_. // If this differs from the current export generations then we need to regenerate the array. @@ -584,7 +584,7 @@ class env_scoped_impl_t : public environment_t { bool export_array_needs_regeneration() const; /// \return a newly allocated export array. - std::shared_ptr> create_export_array() const; + std::shared_ptr create_export_array() const; }; /// Get the exported variables into a variable table. @@ -631,8 +631,7 @@ bool env_scoped_impl_t::export_array_needs_regeneration() const { return mismatch; } -std::shared_ptr> env_scoped_impl_t::create_export_array() - const { +std::shared_ptr env_scoped_impl_t::create_export_array() const { var_table_t table; FLOG(env_export, L"create_export_array() recalc"); @@ -665,10 +664,10 @@ std::shared_ptr> env_scoped_impl_t::create_e str.append(wcs2string(kv.second.as_string())); export_list.push_back(std::move(str)); } - return std::make_shared>(export_list); + return std::make_shared(std::move(export_list)); } -std::shared_ptr> env_scoped_impl_t::export_array() { +std::shared_ptr env_scoped_impl_t::export_array() { ASSERT_IS_NOT_FORKED_CHILD(); if (export_array_needs_regeneration()) { export_array_ = create_export_array(); @@ -1341,7 +1340,7 @@ int env_stack_t::remove(const wcstring &key, int mode, std::vector *out return ret.status; } -std::shared_ptr> env_stack_t::export_arr() { +std::shared_ptr env_stack_t::export_arr() { return acquire_impl()->export_array(); } diff --git a/src/env.h b/src/env.h index 263292bea..ea4586ad2 100644 --- a/src/env.h +++ b/src/env.h @@ -280,7 +280,7 @@ class env_stack_t final : public environment_t { bool universal_barrier(); /// Returns an array containing all exported variables in a format suitable for execv. - std::shared_ptr> export_arr(); + std::shared_ptr export_arr(); /// Snapshot this environment. This means returning a read-only copy. Local variables are copied /// but globals are shared (i.e. changes to global will be visible to this snapshot). This diff --git a/src/exec.cpp b/src/exec.cpp index 95af8b9e0..e3cf07110 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -184,17 +184,20 @@ bool is_thompson_shell_script(const char *path) { ASSERT_IS_MAIN_THREAD(); ASSERT_IS_NOT_FORKED_CHILD(); - null_terminated_array_t argv_array; - convert_wide_array_to_narrow(p->get_argv_array(), &argv_array); + // Construct argv. Ensure the strings stay alive for the duration of this function. + std::vector narrow_strings = wide_string_list_to_narrow(p->argv()); + null_terminated_array_t narrow_argv(narrow_strings); + const char **argv = narrow_argv.get(); + // Construct envp. auto export_vars = vars.export_arr(); - const char *const *envv = export_vars->get(); + const char **envp = export_vars->get(); std::string actual_cmd = wcs2string(p->actual_cmd); // Ensure the terminal modes are what they were before we changed them. restore_term_mode(); // Bounce to launch_process. This never returns. - safe_launch_process(p, actual_cmd.c_str(), argv_array.get(), envv); + safe_launch_process(p, actual_cmd.c_str(), argv, envp); } // Returns whether we can use posix spawn for a given process in a given job. @@ -495,7 +498,7 @@ static void exec_internal_builtin_proc(parser_t &parser, process_t *p, streams.io_chain = &proc_io_chain; // Note this call may block for a long time while the builtin runs. - p->status = builtin_run(parser, p->get_argv(), streams); + p->status = builtin_run(parser, p->argv(), streams); } /// \return an newly allocated output stream for the given fd, which is typically stdout or stderr. @@ -576,8 +579,8 @@ static launch_result_t exec_external_command(parser_t &parser, const std::shared process_t *p, const io_chain_t &proc_io_chain) { assert(p->type == process_type_t::external && "Process is not external"); // Get argv and envv before we fork. - null_terminated_array_t argv_array; - convert_wide_array_to_narrow(p->get_argv_array(), &argv_array); + const std::vector narrow_argv = wide_string_list_to_narrow(p->argv()); + null_terminated_array_t argv_array(narrow_argv); // Convert our IO chain to a dup2 sequence. auto dup2s = dup2_list_t::resolve_chain(proc_io_chain); @@ -716,11 +719,11 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job, FLOGF(error, _(L"Unknown function '%ls'"), p->argv0()); return proc_performer_t{}; } - auto argv = move_to_sharedptr(p->get_argv_array().to_list()); + const wcstring_list_t &argv = p->argv(); return [=](parser_t &parser) { // Pull out the job list from the function. const ast::job_list_t &body = props->func_node->jobs; - const block_t *fb = function_prepare_environment(parser, *argv, *props); + const block_t *fb = function_prepare_environment(parser, argv, *props); auto res = parser.eval_node(props->parsed_source, body, io_chain, job_group); function_restore_environment(parser, fb); @@ -817,7 +820,7 @@ static launch_result_t exec_process_in_job(parser_t &parser, process_t *p, // Maybe trace this process. // TODO: 'and' and 'or' will not show. if (trace_enabled(parser)) { - trace_argv(parser, nullptr, p->get_argv_array().to_list()); + trace_argv(parser, nullptr, p->argv()); } // The IO chain for this process. diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 2f560f985..5f143cd51 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -2814,16 +2814,16 @@ static void test_test_brackets() { null_output_stream_t null{}; io_streams_t streams(null, null); - null_terminated_array_t args; + wcstring_list_t args; - args.set({L"[", L"foo"}); - do_test(builtin_test(parser, streams, args.get()) != 0); + const wchar_t *args1[] = {L"[", L"foo", nullptr}; + do_test(builtin_test(parser, streams, args1) != 0); - args.set({L"[", L"foo", L"]"}); - do_test(builtin_test(parser, streams, args.get()) == 0); + const wchar_t *args2[] = {L"[", L"foo", L"]", nullptr}; + do_test(builtin_test(parser, streams, args2) == 0); - args.set({L"[", L"foo", L"]", L"bar"}); - do_test(builtin_test(parser, streams, args.get()) != 0); + const wchar_t *args3[] = {L"[", L"foo", L"]", L"bar", nullptr}; + do_test(builtin_test(parser, streams, args3) != 0); } static void test_test() { diff --git a/src/null_terminated_array.cpp b/src/null_terminated_array.cpp index 2e10a9e36..e3ec03839 100644 --- a/src/null_terminated_array.cpp +++ b/src/null_terminated_array.cpp @@ -1,75 +1,10 @@ #include "null_terminated_array.h" -template -static const CharT **make_null_terminated_array_helper( - const std::vector > &argv) { - size_t count = argv.size(); - - // We allocate everything in one giant block. First compute how much space we need. - // N + 1 pointers. - size_t pointers_allocation_len = (count + 1) * sizeof(CharT *); - - // In the very unlikely event that CharT has stricter alignment requirements than does a - // pointer, round us up to the size of a CharT. - pointers_allocation_len += sizeof(CharT) - 1; - pointers_allocation_len -= pointers_allocation_len % sizeof(CharT); - - // N null terminated strings. - size_t strings_allocation_len = 0; - for (size_t i = 0; i < count; i++) { - // The size of the string, plus a null terminator. - strings_allocation_len += (argv.at(i).size() + 1) * sizeof(CharT); +std::vector wide_string_list_to_narrow(const wcstring_list_t &strs) { + std::vector res; + res.reserve(strs.size()); + for (const wcstring &s : strs) { + res.push_back(wcs2string(s)); } - - // Now allocate their sum. - auto base = - static_cast(malloc(pointers_allocation_len + strings_allocation_len)); - if (!base) return nullptr; - - // Divvy it up into the pointers and strings. - auto pointers = reinterpret_cast(base); - auto strings = reinterpret_cast(base + pointers_allocation_len); - - // Start copying. - for (size_t i = 0; i < count; i++) { - const std::basic_string &str = argv.at(i); - *pointers++ = strings; // store the current string pointer into self - strings = std::copy(str.begin(), str.end(), strings); // copy the string into strings - *strings++ = static_cast(0); // each string needs a null terminator - } - *pointers++ = nullptr; // array of pointers needs a null terminator - - // Make sure we know what we're doing. - assert(reinterpret_cast(pointers) - base == - static_cast(pointers_allocation_len)); - assert(reinterpret_cast(strings) - - reinterpret_cast(pointers) == - static_cast(strings_allocation_len)); - assert(reinterpret_cast(strings) - base == - static_cast(pointers_allocation_len + strings_allocation_len)); - - return reinterpret_cast(base); -} - -const wchar_t **make_null_terminated_array(const wcstring_list_t &lst) { - return make_null_terminated_array_helper(lst); -} - -const char **make_null_terminated_array(const std::vector &lst) { - return make_null_terminated_array_helper(lst); -} - -void convert_wide_array_to_narrow(const null_terminated_array_t &wide_arr, - null_terminated_array_t *output) { - const wchar_t *const *arr = wide_arr.get(); - if (!arr) { - output->clear(); - return; - } - - std::vector list; - for (size_t i = 0; arr[i]; i++) { - list.push_back(wcs2string(arr[i])); - } - output->set(list); + return res; } diff --git a/src/null_terminated_array.h b/src/null_terminated_array.h index b2690ee25..406eb6d5d 100644 --- a/src/null_terminated_array.h +++ b/src/null_terminated_array.h @@ -4,87 +4,78 @@ #include "config.h" // IWYU pragma: keep +#include #include #include #include "common.h" -const wchar_t **make_null_terminated_array(const wcstring_list_t &lst); -const char **make_null_terminated_array(const std::vector &lst); - -/// \return the length of a null terminated array. -template -inline size_t null_terminated_array_length(const CharT *const *arr) { - if (!arr) return 0; - size_t len = 0; - while (arr[len] != nullptr) { - len++; - } - return len; -} - -// Helper class for managing a null-terminated array of null-terminated strings (of some char type). -template +/// This supports the null-terminated array of NUL-terminated strings consumed by exec. +/// Given a list of strings, construct a vector of pointers to those strings contents. +/// This is used for building null-terminated arrays of null-terminated strings. +/// *Important*: the vector stores pointers into the interior of the input strings, which may be +/// subject to the small-string optimization. This means that pointers will be left dangling if any +/// input string is deallocated *or moved*. This class should only be used in transient calls. +template class null_terminated_array_t { - using string_list_t = std::vector>; - - const CharT **array{nullptr}; - - // No assignment or copying. - void operator=(null_terminated_array_t rhs) = delete; - null_terminated_array_t(const null_terminated_array_t &) = delete; - - size_t size() const { return nullterm_array_length(array); } - - void free(void) { - ::free(reinterpret_cast(array)); - array = nullptr; - } - public: - null_terminated_array_t() = default; - - explicit null_terminated_array_t(const string_list_t &argv) - : array(make_null_terminated_array(argv)) {} - - ~null_terminated_array_t() { this->free(); } - - null_terminated_array_t(null_terminated_array_t &&rhs) : array(rhs.array) { - rhs.array = nullptr; + /// \return the list of pointers, appropriate for envp or argv. + /// Note this returns a mutable array of const strings. The caller may rearrange the strings but + /// not modify their contents. + const T **get() { + assert(!pointers_.empty() && pointers_.back() == nullptr && "Should have null terminator"); + return &pointers_[0]; } - null_terminated_array_t operator=(null_terminated_array_t &&rhs) { - free(); - array = rhs.array; - rhs.array = nullptr; - } - - void set(const string_list_t &argv) { - this->free(); - this->array = make_null_terminated_array(argv); - } - - /// Convert from a null terminated list to a vector of strings. - static string_list_t to_list(const CharT *const *arr) { - string_list_t result; - for (auto cursor = arr; cursor && *cursor; cursor++) { - result.push_back(*cursor); + // Construct from a list of strings (std::string or wcstring). + // This holds pointers into the strings. + explicit null_terminated_array_t(const std::vector> &strs) { + pointers_.reserve(strs.size() + 1); + for (const auto &s : strs) { + pointers_.push_back(s.c_str()); } - return result; + pointers_.push_back(nullptr); } - /// Instance method. - string_list_t to_list() const { return to_list(array); } + // Because this class holds unowned pointers, it should not be copied or moved. + null_terminated_array_t(const null_terminated_array_t &) = delete; + null_terminated_array_t(null_terminated_array_t &&) = delete; + void operator=(const null_terminated_array_t &) = delete; + void operator=(null_terminated_array_t &&) = delete; - const CharT *const *get() const { return array; } - const CharT **get() { return array; } - - void clear() { this->free(); } + private: + std::vector pointers_{}; }; -// Helper function to convert from a null_terminated_array_t to a -// null_terminated_array_t. -void convert_wide_array_to_narrow(const null_terminated_array_t &arr, - null_terminated_array_t *output); +/// A container which exposes a null-terminated array of pointers to strings that it owns. +/// This is useful for persisted null-terminated arrays, e.g. the exported environment variable +/// list. This assumes char, since we don't need this for wchar_t. +/// Note this class is not movable or copyable as it embeds a null_terminated_array_t. +class owning_null_terminated_array_t { + public: + // Access the null-terminated array of nul-terminated strings, appropriate for execv(). + const char **get() { return pointers_.get(); } + + // Construct, taking ownership of a list of strings. + explicit owning_null_terminated_array_t(std::vector &&strings) + : strings_(std::move(strings)), pointers_(strings_) {} + + private: + const std::vector strings_; + null_terminated_array_t pointers_; +}; + +/// Helper to convert a list of wcstring to a list of std::string. +std::vector wide_string_list_to_narrow(const wcstring_list_t &strs); + +/// \return the length of a null-terminated array of pointers to something. +template +size_t null_terminated_array_length(const T *const *arr) { + size_t idx = 0; + while (arr[idx] != nullptr) { + idx++; + } + return idx; +} #endif // FISH_NULL_TERMINATED_ARRAY_H diff --git a/src/parse_util.cpp b/src/parse_util.cpp index 32aa7fdb7..ee5da5204 100644 --- a/src/parse_util.cpp +++ b/src/parse_util.cpp @@ -805,9 +805,7 @@ static int parser_is_pipe_forbidden(const wcstring &word) { return contains(forbidden_pipe_commands, word); } -bool parse_util_argument_is_help(const wchar_t *s) { - return std::wcscmp(L"-h", s) == 0 || std::wcscmp(L"--help", s) == 0; -} +bool parse_util_argument_is_help(const wcstring &s) { return s == L"-h" || s == L"--help"; } // \return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if // there are no arguments. diff --git a/src/parse_util.h b/src/parse_util.h index 85f7e13da..0ef2c5ef0 100644 --- a/src/parse_util.h +++ b/src/parse_util.h @@ -94,7 +94,7 @@ size_t parse_util_get_offset(const wcstring &str, int line, long line_offset); wcstring parse_util_unescape_wildcards(const wcstring &str); /// Checks if the specified string is a help option. -bool parse_util_argument_is_help(const wchar_t *s); +bool parse_util_argument_is_help(const wcstring &s); /// Calculates information on the parameter at the specified index. /// diff --git a/src/proc.h b/src/proc.h index e316c360b..e5f62c500 100644 --- a/src/proc.h +++ b/src/proc.h @@ -198,15 +198,6 @@ enum { INVALID_PID = -2 }; /// the name of the shellscript function. class parser_t; class process_t { - private: - null_terminated_array_t argv_array; - - redirection_spec_list_t proc_redirection_specs; - - // No copying. - process_t(const process_t &rhs) = delete; - void operator=(const process_t &rhs) = delete; - public: process_t(); @@ -230,30 +221,19 @@ class process_t { std::vector variable_assignments; /// Sets argv. - void set_argv(const wcstring_list_t &argv) { argv_array.set(argv); } + void set_argv(wcstring_list_t argv) { argv_ = std::move(argv); } /// Returns argv. - const wchar_t **get_argv() { return argv_array.get(); } - const wchar_t *const *get_argv() const { return argv_array.get(); } - const null_terminated_array_t &get_argv_array() const { return argv_array; } + const wcstring_list_t &argv() { return argv_; } - /// Returns argv[idx]. - const wchar_t *argv(size_t idx) const { - const wchar_t *const *argv = argv_array.get(); - assert(argv != nullptr); - return argv[idx]; - } - - /// Returns argv[0], or NULL. - const wchar_t *argv0() const { - const wchar_t *const *argv = argv_array.get(); - return argv ? argv[0] : nullptr; - } + /// Returns argv[0], or nullptr. + const wchar_t *argv0() const { return argv_.empty() ? nullptr : argv_.front().c_str(); } /// Redirection list getter and setter. - const redirection_spec_list_t &redirection_specs() const { return proc_redirection_specs; } + const redirection_spec_list_t &redirection_specs() const { return proc_redirection_specs_; } + void set_redirection_specs(redirection_spec_list_t specs) { - this->proc_redirection_specs = std::move(specs); + this->proc_redirection_specs_ = std::move(specs); } /// Store the current topic generations. That is, right before the process is launched, record @@ -297,6 +277,14 @@ class process_t { /// Number of jiffies spent in process at last cpu time check. unsigned long last_jiffies{0}; + + // No copying. + process_t(const process_t &rhs) = delete; + void operator=(const process_t &rhs) = delete; + + private: + wcstring_list_t argv_; + redirection_spec_list_t proc_redirection_specs_; }; typedef std::unique_ptr process_ptr_t;