Rationalize how the parser reports tokenizer errors
Remove the unnecessary SQUASH_ERROR flag and correctly report errors generated from the tokenizer.
This commit is contained in:
parent
0950c35eb2
commit
c4d903ff98
@ -374,8 +374,8 @@ class parse_ll_t {
|
||||
|
||||
void parse_error_unexpected_token(const wchar_t *expected, parse_token_t token);
|
||||
void parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *format, ...);
|
||||
void parse_error_at_location(size_t location, parse_error_code_t code, const wchar_t *format,
|
||||
...);
|
||||
void parse_error_at_location(size_t source_start, size_t source_length, size_t error_location,
|
||||
parse_error_code_t code, const wchar_t *format, ...);
|
||||
void parse_error_failed_production(struct parse_stack_element_t &elem, parse_token_t token);
|
||||
void parse_error_unbalancing_token(parse_token_t token);
|
||||
|
||||
@ -608,7 +608,8 @@ void parse_ll_t::parse_error(parse_token_t token, parse_error_code_t code, const
|
||||
}
|
||||
}
|
||||
|
||||
void parse_ll_t::parse_error_at_location(size_t source_location, parse_error_code_t code,
|
||||
void parse_ll_t::parse_error_at_location(size_t source_start, size_t source_length,
|
||||
size_t error_location, parse_error_code_t code,
|
||||
const wchar_t *fmt, ...) {
|
||||
this->fatal_errored = true;
|
||||
if (this->should_generate_error_messages) {
|
||||
@ -621,8 +622,8 @@ void parse_ll_t::parse_error_at_location(size_t source_location, parse_error_cod
|
||||
err.code = code;
|
||||
va_end(va);
|
||||
|
||||
err.source_start = source_location;
|
||||
err.source_length = 0;
|
||||
err.source_start = source_start;
|
||||
err.source_length = source_length;
|
||||
this->errors.push_back(err);
|
||||
}
|
||||
}
|
||||
@ -733,8 +734,10 @@ void parse_ll_t::report_tokenizer_error(const tokenizer_t &tokenizer, const tok_
|
||||
break;
|
||||
}
|
||||
}
|
||||
this->parse_error_at_location(tok.offset + tok.error_offset, parse_error_code, L"%ls",
|
||||
tokenizer.text_of(tok).c_str());
|
||||
|
||||
this->parse_error_at_location(tok.offset, tok.length, tok.offset + tok.error_offset,
|
||||
parse_error_code, L"%ls",
|
||||
error_message_for_code(tok.error).c_str());
|
||||
}
|
||||
|
||||
void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token) {
|
||||
@ -811,8 +814,9 @@ bool parse_ll_t::report_error_for_unclosed_block() {
|
||||
}
|
||||
if (cursor->source_start != NODE_OFFSET_INVALID) {
|
||||
const wcstring node_desc = block_type_user_presentable_description(block_node->type);
|
||||
this->parse_error_at_location(cursor->source_start, parse_error_generic,
|
||||
L"Missing end to balance this %ls", node_desc.c_str());
|
||||
this->parse_error_at_location(cursor->source_start, 0, cursor->source_start,
|
||||
parse_error_generic, L"Missing end to balance this %ls",
|
||||
node_desc.c_str());
|
||||
reported_error = true;
|
||||
}
|
||||
return reported_error;
|
||||
@ -1098,8 +1102,6 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
|
||||
|
||||
if (parse_flags & parse_flag_show_blank_lines) tok_options |= TOK_SHOW_BLANK_LINES;
|
||||
|
||||
if (errors == NULL) tok_options |= TOK_SQUASH_ERRORS;
|
||||
|
||||
tokenizer_t tok(str.c_str(), tok_options);
|
||||
|
||||
// We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our
|
||||
|
@ -371,7 +371,7 @@ void parse_util_token_extent(const wchar_t *buff, size_t cursor_pos, const wchar
|
||||
|
||||
const wcstring buffcpy = wcstring(cmdsubst_begin, cmdsubst_end - cmdsubst_begin);
|
||||
|
||||
tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
||||
tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED);
|
||||
tok_t token;
|
||||
while (tok.next(&token)) {
|
||||
size_t tok_begin = token.offset;
|
||||
@ -474,7 +474,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_
|
||||
size_t prev_pos = 0;
|
||||
wchar_t last_quote = L'\0';
|
||||
|
||||
tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
||||
tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED);
|
||||
tok_t token;
|
||||
while (tok.next(&token)) {
|
||||
if (token.offset > pos) break;
|
||||
|
@ -2316,7 +2316,7 @@ static wchar_t unescaped_quote(const wcstring &str, size_t pos) {
|
||||
|
||||
/// Returns true if the last token is a comment.
|
||||
static bool text_ends_in_comment(const wcstring &text) {
|
||||
tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
|
||||
tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS);
|
||||
tok_t token;
|
||||
while (tok.next(&token)) {
|
||||
; // pass
|
||||
|
@ -34,6 +34,26 @@
|
||||
/// Error string for when trying to pipe from fd 0.
|
||||
#define PIPE_ERROR _(L"Cannot use stdin (fd 0) as pipe output")
|
||||
|
||||
wcstring error_message_for_code(tokenizer_error err) {
|
||||
switch (err) {
|
||||
case TOK_UNTERMINATED_QUOTE:
|
||||
return QUOTE_ERROR;
|
||||
case TOK_UNTERMINATED_SUBSHELL:
|
||||
return PARAN_ERROR;
|
||||
case TOK_UNTERMINATED_SLICE:
|
||||
return SQUARE_BRACKET_ERROR;
|
||||
case TOK_UNTERMINATED_ESCAPE:
|
||||
return UNTERMINATED_ESCAPE_ERROR;
|
||||
case TOK_INVALID_REDIRECT:
|
||||
return REDIRECT_ERROR;
|
||||
case TOK_INVALID_PIPE:
|
||||
return PIPE_ERROR;
|
||||
default:
|
||||
assert(0 && "Unknown error type");
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an error token and mark that we no longer have a next token.
|
||||
tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *token_start,
|
||||
const wchar_t *error_loc) {
|
||||
@ -49,30 +69,6 @@ tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *to
|
||||
result.offset = token_start - this->start;
|
||||
result.length = this->buff - token_start;
|
||||
result.error_offset = error_loc - token_start;
|
||||
if (!this->squash_errors) {
|
||||
switch (error_type) {
|
||||
case TOK_UNTERMINATED_QUOTE:
|
||||
result.error_text = QUOTE_ERROR;
|
||||
break;
|
||||
case TOK_UNTERMINATED_SUBSHELL:
|
||||
result.error_text = PARAN_ERROR;
|
||||
break;
|
||||
case TOK_UNTERMINATED_SLICE:
|
||||
result.error_text = SQUARE_BRACKET_ERROR;
|
||||
break;
|
||||
case TOK_UNTERMINATED_ESCAPE:
|
||||
result.error_text = UNTERMINATED_ESCAPE_ERROR;
|
||||
break;
|
||||
case TOK_INVALID_REDIRECT:
|
||||
result.error_text = REDIRECT_ERROR;
|
||||
break;
|
||||
case TOK_INVALID_PIPE:
|
||||
result.error_text = PIPE_ERROR;
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Unknown error type");
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -81,7 +77,6 @@ tokenizer_t::tokenizer_t(const wchar_t *start, tok_flags_t flags) : buff(start),
|
||||
|
||||
this->accept_unfinished = static_cast<bool>(flags & TOK_ACCEPT_UNFINISHED);
|
||||
this->show_comments = static_cast<bool>(flags & TOK_SHOW_COMMENTS);
|
||||
this->squash_errors = static_cast<bool>(flags & TOK_SQUASH_ERRORS);
|
||||
this->show_blank_lines = static_cast<bool>(flags & TOK_SHOW_BLANK_LINES);
|
||||
}
|
||||
|
||||
@ -590,7 +585,7 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
|
||||
}
|
||||
|
||||
wcstring tok_first(const wcstring &str) {
|
||||
tokenizer_t t(str.c_str(), TOK_SQUASH_ERRORS);
|
||||
tokenizer_t t(str.c_str(), 0);
|
||||
tok_t token;
|
||||
if (t.next(&token) && token.type == TOK_STRING) {
|
||||
return t.text_of(token);
|
||||
|
@ -46,13 +46,9 @@ enum class redirection_type_t {
|
||||
/// Flag telling the tokenizer not to remove comments. Useful for syntax highlighting.
|
||||
#define TOK_SHOW_COMMENTS 2
|
||||
|
||||
/// Flag telling the tokenizer to not generate error messages, which we need to do when tokenizing
|
||||
/// off of the main thread (since wgettext is not thread safe).
|
||||
#define TOK_SQUASH_ERRORS 4
|
||||
|
||||
/// Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon. This flag tells
|
||||
/// the tokenizer to return each of them as a separate END.
|
||||
#define TOK_SHOW_BLANK_LINES 8
|
||||
#define TOK_SHOW_BLANK_LINES 4
|
||||
|
||||
typedef unsigned int tok_flags_t;
|
||||
|
||||
@ -70,11 +66,10 @@ struct tok_t {
|
||||
|
||||
// If an error, this is the error code.
|
||||
enum tokenizer_error error { TOK_ERROR_NONE };
|
||||
|
||||
// If an error, this is the offset of the error within the token. A value of 0 means it occurred
|
||||
// at 'offset'.
|
||||
size_t error_offset{size_t(-1)};
|
||||
// If there is an error, the text of the error; otherwise empty.
|
||||
wcstring error_text{};
|
||||
|
||||
tok_t() = default;
|
||||
};
|
||||
@ -97,8 +92,6 @@ class tokenizer_t {
|
||||
bool show_comments{false};
|
||||
/// Whether all blank lines are returned.
|
||||
bool show_blank_lines{false};
|
||||
/// Whether we are squashing errors.
|
||||
bool squash_errors{false};
|
||||
/// Whether to continue the previous line after the comment.
|
||||
bool continue_line_after_comment{false};
|
||||
|
||||
@ -145,6 +138,9 @@ int fd_redirected_by_pipe(const wcstring &str);
|
||||
/// Helper function to return oflags (as in open(2)) for a redirection type.
|
||||
int oflags_for_redirection_type(redirection_type_t type);
|
||||
|
||||
/// Returns an error message for an error code.
|
||||
wcstring error_message_for_code(tokenizer_error err);
|
||||
|
||||
enum move_word_style_t {
|
||||
move_word_style_punctuation, // stop at punctuation
|
||||
move_word_style_path_components, // stops at path components
|
||||
|
Loading…
x
Reference in New Issue
Block a user