Rework null terminated arrays

Several functions including wgetopt and execve operate on null-terminated
arrays of nul-terminated pointers: a list of pointers to C strings where
the last pointer is null. Prior to this change, each process_t stored its
argv in such an array. This had two problems:

1. It was awkward to work with this type, instead of using std::vector,
etc.
2. The process's arguments would be rearranged by builtins which is
surprising

Our null terminated arrays were built around a fancy type that would copy
input strings and also generate an array of pointers to them, in one big
allocation.

Switch to a new model where we construct an array of pointers over
existing strings. So you can supply a `vector<string>` and now
`null_terminated_array_t` will just make a list of pointers to them. Now
processes can just store their argv in a familiar wcstring_list_t.
This commit is contained in:
ridiculousfish 2021-02-14 13:15:29 -08:00
parent e0e4b11dbd
commit fb92ad946b
12 changed files with 127 additions and 209 deletions

View File

@ -453,25 +453,25 @@ bool builtin_exists(const wcstring &cmd) { return static_cast<bool>(builtin_look
/// Is the command a keyword we need to special-case the handling of `-h` and `--help`.
static const wchar_t *const help_builtins[] = {L"for", L"while", L"function", L"if",
L"end", L"switch", L"case"};
static bool cmd_needs_help(const wchar_t *cmd) { return contains(help_builtins, cmd); }
static bool cmd_needs_help(const wcstring &cmd) { return contains(help_builtins, cmd); }
/// Execute a builtin command
proc_status_t builtin_run(parser_t &parser, const wchar_t **argv, io_streams_t &streams) {
UNUSED(parser);
UNUSED(streams);
if (argv == nullptr || argv[0] == nullptr)
return proc_status_t::from_exit_code(STATUS_INVALID_ARGS);
proc_status_t builtin_run(parser_t &parser, const wcstring_list_t &argv, io_streams_t &streams) {
if (argv.empty()) return proc_status_t::from_exit_code(STATUS_INVALID_ARGS);
const wcstring &cmdname = argv.front();
// We can be handed a keyword by the parser as if it was a command. This happens when the user
// follows the keyword by `-h` or `--help`. Since it isn't really a builtin command we need to
// handle displaying help for it here.
if (argv[1] && !argv[2] && parse_util_argument_is_help(argv[1]) && cmd_needs_help(argv[0])) {
builtin_print_help(parser, streams, argv[0]);
if (argv.size() == 2 && parse_util_argument_is_help(argv[1]) && cmd_needs_help(cmdname)) {
builtin_print_help(parser, streams, cmdname.c_str());
return proc_status_t::from_exit_code(STATUS_CMD_OK);
}
if (const builtin_data_t *data = builtin_lookup(argv[0])) {
maybe_t<int> ret = data->func(parser, streams, argv);
if (const builtin_data_t *data = builtin_lookup(cmdname)) {
// Construct the permutable argv array which the builtin expects.
null_terminated_array_t<wchar_t> argv_arr(argv);
maybe_t<int> ret = data->func(parser, streams, argv_arr.get());
if (!ret) {
return proc_status_t::empty();
}
@ -486,7 +486,7 @@ proc_status_t builtin_run(parser_t &parser, const wchar_t **argv, io_streams_t &
return proc_status_t::from_exit_code(code);
}
FLOGF(error, UNKNOWN_BUILTIN_ERR_MSG, argv[0]);
FLOGF(error, UNKNOWN_BUILTIN_ERR_MSG, cmdname.c_str());
return proc_status_t::from_exit_code(STATUS_CMD_ERROR);
}

View File

@ -85,7 +85,7 @@ enum { COMMAND_NOT_BUILTIN, BUILTIN_REGULAR, BUILTIN_FUNCTION };
void builtin_init();
bool builtin_exists(const wcstring &cmd);
proc_status_t builtin_run(parser_t &parser, const wchar_t **argv, io_streams_t &streams);
proc_status_t builtin_run(parser_t &parser, const wcstring_list_t &argv, io_streams_t &streams);
wcstring_list_t builtin_get_names();
void builtin_get_names(completion_list_t *list);

View File

@ -93,10 +93,14 @@ maybe_t<int> builtin_source(parser_t &parser, io_streams_t &streams, const wchar
auto &ld = parser.libdata();
scoped_push<const wchar_t *> filename_push{&ld.current_filename, fn_intern};
// Construct argv from our null-terminated list.
// This is slightly subtle. If this is a bare `source` with no args then `argv + optind` already
// points to the end of argv. Otherwise we want to skip the file name to get to the args if any.
wcstring_list_t argv_list =
null_terminated_array_t<wchar_t>::to_list(argv + optind + (argc == optind ? 0 : 1));
wcstring_list_t argv_list;
const wchar_t *const *remaining_args = argv + optind + (argc == optind ? 0 : 1);
for (size_t i = 0, len = null_terminated_array_length(remaining_args); i < len; i++) {
argv_list.push_back(remaining_args[i]);
}
parser.vars().set_argv(std::move(argv_list));
retval = reader_read(parser, fd, streams.io_chain ? *streams.io_chain : io_chain_t());

View File

@ -534,7 +534,7 @@ class env_scoped_impl_t : public environment_t {
~env_scoped_impl_t() override = default;
std::shared_ptr<const null_terminated_array_t<char>> export_array();
std::shared_ptr<owning_null_terminated_array_t> export_array();
env_scoped_impl_t(env_scoped_impl_t &&) = delete;
env_scoped_impl_t(const env_scoped_impl_t &) = delete;
@ -552,7 +552,7 @@ class env_scoped_impl_t : public environment_t {
perproc_data_t perproc_data_{};
// Exported variable array used by execv.
std::shared_ptr<const null_terminated_array_t<char>> export_array_{};
std::shared_ptr<owning_null_terminated_array_t> export_array_{};
// Cached list of export generations corresponding to the above export_array_.
// If this differs from the current export generations then we need to regenerate the array.
@ -584,7 +584,7 @@ class env_scoped_impl_t : public environment_t {
bool export_array_needs_regeneration() const;
/// \return a newly allocated export array.
std::shared_ptr<const null_terminated_array_t<char>> create_export_array() const;
std::shared_ptr<owning_null_terminated_array_t> create_export_array() const;
};
/// Get the exported variables into a variable table.
@ -631,8 +631,7 @@ bool env_scoped_impl_t::export_array_needs_regeneration() const {
return mismatch;
}
std::shared_ptr<const null_terminated_array_t<char>> env_scoped_impl_t::create_export_array()
const {
std::shared_ptr<owning_null_terminated_array_t> env_scoped_impl_t::create_export_array() const {
var_table_t table;
FLOG(env_export, L"create_export_array() recalc");
@ -665,10 +664,10 @@ std::shared_ptr<const null_terminated_array_t<char>> env_scoped_impl_t::create_e
str.append(wcs2string(kv.second.as_string()));
export_list.push_back(std::move(str));
}
return std::make_shared<null_terminated_array_t<char>>(export_list);
return std::make_shared<owning_null_terminated_array_t>(std::move(export_list));
}
std::shared_ptr<const null_terminated_array_t<char>> env_scoped_impl_t::export_array() {
std::shared_ptr<owning_null_terminated_array_t> env_scoped_impl_t::export_array() {
ASSERT_IS_NOT_FORKED_CHILD();
if (export_array_needs_regeneration()) {
export_array_ = create_export_array();
@ -1341,7 +1340,7 @@ int env_stack_t::remove(const wcstring &key, int mode, std::vector<event_t> *out
return ret.status;
}
std::shared_ptr<const null_terminated_array_t<char>> env_stack_t::export_arr() {
std::shared_ptr<owning_null_terminated_array_t> env_stack_t::export_arr() {
return acquire_impl()->export_array();
}

View File

@ -280,7 +280,7 @@ class env_stack_t final : public environment_t {
bool universal_barrier();
/// Returns an array containing all exported variables in a format suitable for execv.
std::shared_ptr<const null_terminated_array_t<char>> export_arr();
std::shared_ptr<owning_null_terminated_array_t> export_arr();
/// Snapshot this environment. This means returning a read-only copy. Local variables are copied
/// but globals are shared (i.e. changes to global will be visible to this snapshot). This

View File

@ -184,17 +184,20 @@ bool is_thompson_shell_script(const char *path) {
ASSERT_IS_MAIN_THREAD();
ASSERT_IS_NOT_FORKED_CHILD();
null_terminated_array_t<char> argv_array;
convert_wide_array_to_narrow(p->get_argv_array(), &argv_array);
// Construct argv. Ensure the strings stay alive for the duration of this function.
std::vector<std::string> narrow_strings = wide_string_list_to_narrow(p->argv());
null_terminated_array_t<char> narrow_argv(narrow_strings);
const char **argv = narrow_argv.get();
// Construct envp.
auto export_vars = vars.export_arr();
const char *const *envv = export_vars->get();
const char **envp = export_vars->get();
std::string actual_cmd = wcs2string(p->actual_cmd);
// Ensure the terminal modes are what they were before we changed them.
restore_term_mode();
// Bounce to launch_process. This never returns.
safe_launch_process(p, actual_cmd.c_str(), argv_array.get(), envv);
safe_launch_process(p, actual_cmd.c_str(), argv, envp);
}
// Returns whether we can use posix spawn for a given process in a given job.
@ -495,7 +498,7 @@ static void exec_internal_builtin_proc(parser_t &parser, process_t *p,
streams.io_chain = &proc_io_chain;
// Note this call may block for a long time while the builtin runs.
p->status = builtin_run(parser, p->get_argv(), streams);
p->status = builtin_run(parser, p->argv(), streams);
}
/// \return an newly allocated output stream for the given fd, which is typically stdout or stderr.
@ -576,8 +579,8 @@ static launch_result_t exec_external_command(parser_t &parser, const std::shared
process_t *p, const io_chain_t &proc_io_chain) {
assert(p->type == process_type_t::external && "Process is not external");
// Get argv and envv before we fork.
null_terminated_array_t<char> argv_array;
convert_wide_array_to_narrow(p->get_argv_array(), &argv_array);
const std::vector<std::string> narrow_argv = wide_string_list_to_narrow(p->argv());
null_terminated_array_t<char> argv_array(narrow_argv);
// Convert our IO chain to a dup2 sequence.
auto dup2s = dup2_list_t::resolve_chain(proc_io_chain);
@ -716,11 +719,11 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job,
FLOGF(error, _(L"Unknown function '%ls'"), p->argv0());
return proc_performer_t{};
}
auto argv = move_to_sharedptr(p->get_argv_array().to_list());
const wcstring_list_t &argv = p->argv();
return [=](parser_t &parser) {
// Pull out the job list from the function.
const ast::job_list_t &body = props->func_node->jobs;
const block_t *fb = function_prepare_environment(parser, *argv, *props);
const block_t *fb = function_prepare_environment(parser, argv, *props);
auto res = parser.eval_node(props->parsed_source, body, io_chain, job_group);
function_restore_environment(parser, fb);
@ -817,7 +820,7 @@ static launch_result_t exec_process_in_job(parser_t &parser, process_t *p,
// Maybe trace this process.
// TODO: 'and' and 'or' will not show.
if (trace_enabled(parser)) {
trace_argv(parser, nullptr, p->get_argv_array().to_list());
trace_argv(parser, nullptr, p->argv());
}
// The IO chain for this process.

View File

@ -2814,16 +2814,16 @@ static void test_test_brackets() {
null_output_stream_t null{};
io_streams_t streams(null, null);
null_terminated_array_t<wchar_t> args;
wcstring_list_t args;
args.set({L"[", L"foo"});
do_test(builtin_test(parser, streams, args.get()) != 0);
const wchar_t *args1[] = {L"[", L"foo", nullptr};
do_test(builtin_test(parser, streams, args1) != 0);
args.set({L"[", L"foo", L"]"});
do_test(builtin_test(parser, streams, args.get()) == 0);
const wchar_t *args2[] = {L"[", L"foo", L"]", nullptr};
do_test(builtin_test(parser, streams, args2) == 0);
args.set({L"[", L"foo", L"]", L"bar"});
do_test(builtin_test(parser, streams, args.get()) != 0);
const wchar_t *args3[] = {L"[", L"foo", L"]", L"bar", nullptr};
do_test(builtin_test(parser, streams, args3) != 0);
}
static void test_test() {

View File

@ -1,75 +1,10 @@
#include "null_terminated_array.h"
template <typename CharT>
static const CharT **make_null_terminated_array_helper(
const std::vector<std::basic_string<CharT> > &argv) {
size_t count = argv.size();
// We allocate everything in one giant block. First compute how much space we need.
// N + 1 pointers.
size_t pointers_allocation_len = (count + 1) * sizeof(CharT *);
// In the very unlikely event that CharT has stricter alignment requirements than does a
// pointer, round us up to the size of a CharT.
pointers_allocation_len += sizeof(CharT) - 1;
pointers_allocation_len -= pointers_allocation_len % sizeof(CharT);
// N null terminated strings.
size_t strings_allocation_len = 0;
for (size_t i = 0; i < count; i++) {
// The size of the string, plus a null terminator.
strings_allocation_len += (argv.at(i).size() + 1) * sizeof(CharT);
std::vector<std::string> wide_string_list_to_narrow(const wcstring_list_t &strs) {
std::vector<std::string> res;
res.reserve(strs.size());
for (const wcstring &s : strs) {
res.push_back(wcs2string(s));
}
// Now allocate their sum.
auto base =
static_cast<unsigned char *>(malloc(pointers_allocation_len + strings_allocation_len));
if (!base) return nullptr;
// Divvy it up into the pointers and strings.
auto pointers = reinterpret_cast<CharT **>(base);
auto strings = reinterpret_cast<CharT *>(base + pointers_allocation_len);
// Start copying.
for (size_t i = 0; i < count; i++) {
const std::basic_string<CharT> &str = argv.at(i);
*pointers++ = strings; // store the current string pointer into self
strings = std::copy(str.begin(), str.end(), strings); // copy the string into strings
*strings++ = static_cast<CharT>(0); // each string needs a null terminator
}
*pointers++ = nullptr; // array of pointers needs a null terminator
// Make sure we know what we're doing.
assert(reinterpret_cast<unsigned char *>(pointers) - base ==
static_cast<std::ptrdiff_t>(pointers_allocation_len));
assert(reinterpret_cast<unsigned char *>(strings) -
reinterpret_cast<unsigned char *>(pointers) ==
static_cast<std::ptrdiff_t>(strings_allocation_len));
assert(reinterpret_cast<unsigned char *>(strings) - base ==
static_cast<std::ptrdiff_t>(pointers_allocation_len + strings_allocation_len));
return reinterpret_cast<const CharT **>(base);
}
const wchar_t **make_null_terminated_array(const wcstring_list_t &lst) {
return make_null_terminated_array_helper(lst);
}
const char **make_null_terminated_array(const std::vector<std::string> &lst) {
return make_null_terminated_array_helper(lst);
}
void convert_wide_array_to_narrow(const null_terminated_array_t<wchar_t> &wide_arr,
null_terminated_array_t<char> *output) {
const wchar_t *const *arr = wide_arr.get();
if (!arr) {
output->clear();
return;
}
std::vector<std::string> list;
for (size_t i = 0; arr[i]; i++) {
list.push_back(wcs2string(arr[i]));
}
output->set(list);
return res;
}

View File

@ -4,87 +4,78 @@
#include "config.h" // IWYU pragma: keep
#include <memory>
#include <string>
#include <vector>
#include "common.h"
const wchar_t **make_null_terminated_array(const wcstring_list_t &lst);
const char **make_null_terminated_array(const std::vector<std::string> &lst);
/// \return the length of a null terminated array.
template <typename CharT>
inline size_t null_terminated_array_length(const CharT *const *arr) {
if (!arr) return 0;
size_t len = 0;
while (arr[len] != nullptr) {
len++;
}
return len;
}
// Helper class for managing a null-terminated array of null-terminated strings (of some char type).
template <typename CharT>
/// This supports the null-terminated array of NUL-terminated strings consumed by exec.
/// Given a list of strings, construct a vector of pointers to those strings contents.
/// This is used for building null-terminated arrays of null-terminated strings.
/// *Important*: the vector stores pointers into the interior of the input strings, which may be
/// subject to the small-string optimization. This means that pointers will be left dangling if any
/// input string is deallocated *or moved*. This class should only be used in transient calls.
template <typename T>
class null_terminated_array_t {
using string_list_t = std::vector<std::basic_string<CharT>>;
const CharT **array{nullptr};
// No assignment or copying.
void operator=(null_terminated_array_t rhs) = delete;
null_terminated_array_t(const null_terminated_array_t &) = delete;
size_t size() const { return nullterm_array_length(array); }
void free(void) {
::free(reinterpret_cast<void *>(array));
array = nullptr;
}
public:
null_terminated_array_t() = default;
explicit null_terminated_array_t(const string_list_t &argv)
: array(make_null_terminated_array(argv)) {}
~null_terminated_array_t() { this->free(); }
null_terminated_array_t(null_terminated_array_t &&rhs) : array(rhs.array) {
rhs.array = nullptr;
/// \return the list of pointers, appropriate for envp or argv.
/// Note this returns a mutable array of const strings. The caller may rearrange the strings but
/// not modify their contents.
const T **get() {
assert(!pointers_.empty() && pointers_.back() == nullptr && "Should have null terminator");
return &pointers_[0];
}
null_terminated_array_t operator=(null_terminated_array_t &&rhs) {
free();
array = rhs.array;
rhs.array = nullptr;
}
void set(const string_list_t &argv) {
this->free();
this->array = make_null_terminated_array(argv);
}
/// Convert from a null terminated list to a vector of strings.
static string_list_t to_list(const CharT *const *arr) {
string_list_t result;
for (auto cursor = arr; cursor && *cursor; cursor++) {
result.push_back(*cursor);
// Construct from a list of strings (std::string or wcstring).
// This holds pointers into the strings.
explicit null_terminated_array_t(const std::vector<std::basic_string<T>> &strs) {
pointers_.reserve(strs.size() + 1);
for (const auto &s : strs) {
pointers_.push_back(s.c_str());
}
return result;
pointers_.push_back(nullptr);
}
/// Instance method.
string_list_t to_list() const { return to_list(array); }
// Because this class holds unowned pointers, it should not be copied or moved.
null_terminated_array_t(const null_terminated_array_t &) = delete;
null_terminated_array_t(null_terminated_array_t &&) = delete;
void operator=(const null_terminated_array_t &) = delete;
void operator=(null_terminated_array_t &&) = delete;
const CharT *const *get() const { return array; }
const CharT **get() { return array; }
void clear() { this->free(); }
private:
std::vector<const T *> pointers_{};
};
// Helper function to convert from a null_terminated_array_t<wchar_t> to a
// null_terminated_array_t<char_t>.
void convert_wide_array_to_narrow(const null_terminated_array_t<wchar_t> &arr,
null_terminated_array_t<char> *output);
/// A container which exposes a null-terminated array of pointers to strings that it owns.
/// This is useful for persisted null-terminated arrays, e.g. the exported environment variable
/// list. This assumes char, since we don't need this for wchar_t.
/// Note this class is not movable or copyable as it embeds a null_terminated_array_t.
class owning_null_terminated_array_t {
public:
// Access the null-terminated array of nul-terminated strings, appropriate for execv().
const char **get() { return pointers_.get(); }
// Construct, taking ownership of a list of strings.
explicit owning_null_terminated_array_t(std::vector<std::string> &&strings)
: strings_(std::move(strings)), pointers_(strings_) {}
private:
const std::vector<std::string> strings_;
null_terminated_array_t<char> pointers_;
};
/// Helper to convert a list of wcstring to a list of std::string.
std::vector<std::string> wide_string_list_to_narrow(const wcstring_list_t &strs);
/// \return the length of a null-terminated array of pointers to something.
template <typename T>
size_t null_terminated_array_length(const T *const *arr) {
size_t idx = 0;
while (arr[idx] != nullptr) {
idx++;
}
return idx;
}
#endif // FISH_NULL_TERMINATED_ARRAY_H

View File

@ -805,9 +805,7 @@ static int parser_is_pipe_forbidden(const wcstring &word) {
return contains(forbidden_pipe_commands, word);
}
bool parse_util_argument_is_help(const wchar_t *s) {
return std::wcscmp(L"-h", s) == 0 || std::wcscmp(L"--help", s) == 0;
}
bool parse_util_argument_is_help(const wcstring &s) { return s == L"-h" || s == L"--help"; }
// \return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if
// there are no arguments.

View File

@ -94,7 +94,7 @@ size_t parse_util_get_offset(const wcstring &str, int line, long line_offset);
wcstring parse_util_unescape_wildcards(const wcstring &str);
/// Checks if the specified string is a help option.
bool parse_util_argument_is_help(const wchar_t *s);
bool parse_util_argument_is_help(const wcstring &s);
/// Calculates information on the parameter at the specified index.
///

View File

@ -198,15 +198,6 @@ enum { INVALID_PID = -2 };
/// the name of the shellscript function.
class parser_t;
class process_t {
private:
null_terminated_array_t<wchar_t> argv_array;
redirection_spec_list_t proc_redirection_specs;
// No copying.
process_t(const process_t &rhs) = delete;
void operator=(const process_t &rhs) = delete;
public:
process_t();
@ -230,30 +221,19 @@ class process_t {
std::vector<concrete_assignment> variable_assignments;
/// Sets argv.
void set_argv(const wcstring_list_t &argv) { argv_array.set(argv); }
void set_argv(wcstring_list_t argv) { argv_ = std::move(argv); }
/// Returns argv.
const wchar_t **get_argv() { return argv_array.get(); }
const wchar_t *const *get_argv() const { return argv_array.get(); }
const null_terminated_array_t<wchar_t> &get_argv_array() const { return argv_array; }
const wcstring_list_t &argv() { return argv_; }
/// Returns argv[idx].
const wchar_t *argv(size_t idx) const {
const wchar_t *const *argv = argv_array.get();
assert(argv != nullptr);
return argv[idx];
}
/// Returns argv[0], or NULL.
const wchar_t *argv0() const {
const wchar_t *const *argv = argv_array.get();
return argv ? argv[0] : nullptr;
}
/// Returns argv[0], or nullptr.
const wchar_t *argv0() const { return argv_.empty() ? nullptr : argv_.front().c_str(); }
/// Redirection list getter and setter.
const redirection_spec_list_t &redirection_specs() const { return proc_redirection_specs; }
const redirection_spec_list_t &redirection_specs() const { return proc_redirection_specs_; }
void set_redirection_specs(redirection_spec_list_t specs) {
this->proc_redirection_specs = std::move(specs);
this->proc_redirection_specs_ = std::move(specs);
}
/// Store the current topic generations. That is, right before the process is launched, record
@ -297,6 +277,14 @@ class process_t {
/// Number of jiffies spent in process at last cpu time check.
unsigned long last_jiffies{0};
// No copying.
process_t(const process_t &rhs) = delete;
void operator=(const process_t &rhs) = delete;
private:
wcstring_list_t argv_;
redirection_spec_list_t proc_redirection_specs_;
};
typedef std::unique_ptr<process_t> process_ptr_t;