Rationalize how the parser reports tokenizer errors

Remove the unnecessary SQUASH_ERROR flag and correctly report errors
generated from the tokenizer.
This commit is contained in:
ridiculousfish 2018-02-23 17:28:12 -08:00
parent 0950c35eb2
commit c4d903ff98
5 changed files with 42 additions and 49 deletions

View File

@ -374,8 +374,8 @@ class parse_ll_t {
void parse_error_unexpected_token(const wchar_t *expected, parse_token_t token);
void parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *format, ...);
void parse_error_at_location(size_t location, parse_error_code_t code, const wchar_t *format,
...);
void parse_error_at_location(size_t source_start, size_t source_length, size_t error_location,
parse_error_code_t code, const wchar_t *format, ...);
void parse_error_failed_production(struct parse_stack_element_t &elem, parse_token_t token);
void parse_error_unbalancing_token(parse_token_t token);
@ -608,7 +608,8 @@ void parse_ll_t::parse_error(parse_token_t token, parse_error_code_t code, const
}
}
void parse_ll_t::parse_error_at_location(size_t source_location, parse_error_code_t code,
void parse_ll_t::parse_error_at_location(size_t source_start, size_t source_length,
size_t error_location, parse_error_code_t code,
const wchar_t *fmt, ...) {
this->fatal_errored = true;
if (this->should_generate_error_messages) {
@ -621,8 +622,8 @@ void parse_ll_t::parse_error_at_location(size_t source_location, parse_error_cod
err.code = code;
va_end(va);
err.source_start = source_location;
err.source_length = 0;
err.source_start = source_start;
err.source_length = source_length;
this->errors.push_back(err);
}
}
@ -733,8 +734,10 @@ void parse_ll_t::report_tokenizer_error(const tokenizer_t &tokenizer, const tok_
break;
}
}
this->parse_error_at_location(tok.offset + tok.error_offset, parse_error_code, L"%ls",
tokenizer.text_of(tok).c_str());
this->parse_error_at_location(tok.offset, tok.length, tok.offset + tok.error_offset,
parse_error_code, L"%ls",
error_message_for_code(tok.error).c_str());
}
void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token) {
@ -811,8 +814,9 @@ bool parse_ll_t::report_error_for_unclosed_block() {
}
if (cursor->source_start != NODE_OFFSET_INVALID) {
const wcstring node_desc = block_type_user_presentable_description(block_node->type);
this->parse_error_at_location(cursor->source_start, parse_error_generic,
L"Missing end to balance this %ls", node_desc.c_str());
this->parse_error_at_location(cursor->source_start, 0, cursor->source_start,
parse_error_generic, L"Missing end to balance this %ls",
node_desc.c_str());
reported_error = true;
}
return reported_error;
@ -1098,8 +1102,6 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
if (parse_flags & parse_flag_show_blank_lines) tok_options |= TOK_SHOW_BLANK_LINES;
if (errors == NULL) tok_options |= TOK_SQUASH_ERRORS;
tokenizer_t tok(str.c_str(), tok_options);
// We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our

View File

@ -371,7 +371,7 @@ void parse_util_token_extent(const wchar_t *buff, size_t cursor_pos, const wchar
const wcstring buffcpy = wcstring(cmdsubst_begin, cmdsubst_end - cmdsubst_begin);
tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED);
tok_t token;
while (tok.next(&token)) {
size_t tok_begin = token.offset;
@ -474,7 +474,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_
size_t prev_pos = 0;
wchar_t last_quote = L'\0';
tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED);
tok_t token;
while (tok.next(&token)) {
if (token.offset > pos) break;

View File

@ -2316,7 +2316,7 @@ static wchar_t unescaped_quote(const wcstring &str, size_t pos) {
/// Returns true if the last token is a comment.
static bool text_ends_in_comment(const wcstring &text) {
tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS);
tok_t token;
while (tok.next(&token)) {
; // pass

View File

@ -34,6 +34,26 @@
/// Error string for when trying to pipe from fd 0.
#define PIPE_ERROR _(L"Cannot use stdin (fd 0) as pipe output")
wcstring error_message_for_code(tokenizer_error err) {
switch (err) {
case TOK_UNTERMINATED_QUOTE:
return QUOTE_ERROR;
case TOK_UNTERMINATED_SUBSHELL:
return PARAN_ERROR;
case TOK_UNTERMINATED_SLICE:
return SQUARE_BRACKET_ERROR;
case TOK_UNTERMINATED_ESCAPE:
return UNTERMINATED_ESCAPE_ERROR;
case TOK_INVALID_REDIRECT:
return REDIRECT_ERROR;
case TOK_INVALID_PIPE:
return PIPE_ERROR;
default:
assert(0 && "Unknown error type");
return {};
}
}
/// Return an error token and mark that we no longer have a next token.
tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *token_start,
const wchar_t *error_loc) {
@ -49,30 +69,6 @@ tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *to
result.offset = token_start - this->start;
result.length = this->buff - token_start;
result.error_offset = error_loc - token_start;
if (!this->squash_errors) {
switch (error_type) {
case TOK_UNTERMINATED_QUOTE:
result.error_text = QUOTE_ERROR;
break;
case TOK_UNTERMINATED_SUBSHELL:
result.error_text = PARAN_ERROR;
break;
case TOK_UNTERMINATED_SLICE:
result.error_text = SQUARE_BRACKET_ERROR;
break;
case TOK_UNTERMINATED_ESCAPE:
result.error_text = UNTERMINATED_ESCAPE_ERROR;
break;
case TOK_INVALID_REDIRECT:
result.error_text = REDIRECT_ERROR;
break;
case TOK_INVALID_PIPE:
result.error_text = PIPE_ERROR;
break;
default:
assert(0 && "Unknown error type");
}
}
return result;
}
@ -81,7 +77,6 @@ tokenizer_t::tokenizer_t(const wchar_t *start, tok_flags_t flags) : buff(start),
this->accept_unfinished = static_cast<bool>(flags & TOK_ACCEPT_UNFINISHED);
this->show_comments = static_cast<bool>(flags & TOK_SHOW_COMMENTS);
this->squash_errors = static_cast<bool>(flags & TOK_SQUASH_ERRORS);
this->show_blank_lines = static_cast<bool>(flags & TOK_SHOW_BLANK_LINES);
}
@ -590,7 +585,7 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
}
wcstring tok_first(const wcstring &str) {
tokenizer_t t(str.c_str(), TOK_SQUASH_ERRORS);
tokenizer_t t(str.c_str(), 0);
tok_t token;
if (t.next(&token) && token.type == TOK_STRING) {
return t.text_of(token);

View File

@ -46,13 +46,9 @@ enum class redirection_type_t {
/// Flag telling the tokenizer not to remove comments. Useful for syntax highlighting.
#define TOK_SHOW_COMMENTS 2
/// Flag telling the tokenizer to not generate error messages, which we need to do when tokenizing
/// off of the main thread (since wgettext is not thread safe).
#define TOK_SQUASH_ERRORS 4
/// Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon. This flag tells
/// the tokenizer to return each of them as a separate END.
#define TOK_SHOW_BLANK_LINES 8
#define TOK_SHOW_BLANK_LINES 4
typedef unsigned int tok_flags_t;
@ -70,11 +66,10 @@ struct tok_t {
// If an error, this is the error code.
enum tokenizer_error error { TOK_ERROR_NONE };
// If an error, this is the offset of the error within the token. A value of 0 means it occurred
// at 'offset'.
size_t error_offset{size_t(-1)};
// If there is an error, the text of the error; otherwise empty.
wcstring error_text{};
tok_t() = default;
};
@ -97,8 +92,6 @@ class tokenizer_t {
bool show_comments{false};
/// Whether all blank lines are returned.
bool show_blank_lines{false};
/// Whether we are squashing errors.
bool squash_errors{false};
/// Whether to continue the previous line after the comment.
bool continue_line_after_comment{false};
@ -145,6 +138,9 @@ int fd_redirected_by_pipe(const wcstring &str);
/// Helper function to return oflags (as in open(2)) for a redirection type.
int oflags_for_redirection_type(redirection_type_t type);
/// Returns an error message for an error code.
wcstring error_message_for_code(tokenizer_error err);
enum move_word_style_t {
move_word_style_punctuation, // stop at punctuation
move_word_style_path_components, // stops at path components