Work towards refactoring tokenizer to be a real object
This commit is contained in:
parent
e73be48d96
commit
f545fb2491
@ -143,7 +143,6 @@ static void write_part(const wchar_t *begin,
|
||||
int cut_at_cursor,
|
||||
int tokenize)
|
||||
{
|
||||
tokenizer tok;
|
||||
wcstring out;
|
||||
wchar_t *buff;
|
||||
size_t pos;
|
||||
@ -155,9 +154,8 @@ static void write_part(const wchar_t *begin,
|
||||
buff = wcsndup(begin, end-begin);
|
||||
// fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end );
|
||||
out.clear();
|
||||
|
||||
for (tok_init(&tok, buff, TOK_ACCEPT_UNFINISHED);
|
||||
tok_has_next(&tok);
|
||||
tokenizer_t tok(buff, TOK_ACCEPT_UNFINISHED);
|
||||
for (; tok_has_next(&tok);
|
||||
tok_next(&tok))
|
||||
{
|
||||
if ((cut_at_cursor) &&
|
||||
|
@ -1765,7 +1765,6 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, complete_ty
|
||||
completer_t completer(cmd, type);
|
||||
|
||||
const wchar_t *tok_begin, *tok_end, *cmdsubst_begin, *cmdsubst_end, *prev_begin, *prev_end;
|
||||
tokenizer tok;
|
||||
const wchar_t *current_token=0, *prev_token=0;
|
||||
wcstring current_command;
|
||||
int on_command=0;
|
||||
@ -1807,9 +1806,8 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, complete_ty
|
||||
|
||||
int had_cmd=0;
|
||||
int end_loop=0;
|
||||
|
||||
tok_init(&tok, buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
||||
|
||||
|
||||
tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
||||
while (tok_has_next(&tok) && !end_loop)
|
||||
{
|
||||
|
||||
|
@ -84,7 +84,6 @@ static void insert_tabs(wcstring &out, int indent)
|
||||
*/
|
||||
static int indent(wcstring &out, const wcstring &in, int flags)
|
||||
{
|
||||
tokenizer tok;
|
||||
int res=0;
|
||||
int is_command = 1;
|
||||
int indent = 0;
|
||||
@ -92,8 +91,7 @@ static int indent(wcstring &out, const wcstring &in, int flags)
|
||||
int prev_type = 0;
|
||||
int prev_prev_type = 0;
|
||||
|
||||
tok_init(&tok, in.c_str(), TOK_SHOW_COMMENTS);
|
||||
|
||||
tokenizer_t tok(in.c_str(), TOK_SHOW_COMMENTS);
|
||||
for (; tok_has_next(&tok); tok_next(&tok))
|
||||
{
|
||||
int type = tok_last_type(&tok);
|
||||
|
@ -289,13 +289,12 @@ static void test_convert()
|
||||
*/
|
||||
static void test_tok()
|
||||
{
|
||||
tokenizer t;
|
||||
|
||||
say(L"Testing tokenizer");
|
||||
|
||||
|
||||
say(L"Testing invalid input");
|
||||
tok_init(&t, 0, 0);
|
||||
tokenizer_t t(NULL, 0);
|
||||
|
||||
if (tok_last_type(&t) != TOK_ERROR)
|
||||
{
|
||||
@ -326,14 +325,12 @@ static void test_tok()
|
||||
const int types[] =
|
||||
{
|
||||
TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_END
|
||||
}
|
||||
;
|
||||
size_t i;
|
||||
};
|
||||
|
||||
say(L"Test correct tokenization");
|
||||
|
||||
for (i=0, tok_init(&t, str, 0); i<(sizeof(types)/sizeof(int)); i++,tok_next(&t))
|
||||
{
|
||||
|
||||
tokenizer_t t(str, 0);
|
||||
for (size_t i=0; i < sizeof types / sizeof *types; i++, tok_next(&t)) {
|
||||
if (types[i] != tok_last_type(&t))
|
||||
{
|
||||
err(L"Tokenization error:");
|
||||
|
@ -691,8 +691,8 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command
|
||||
int arg_pos = -1;
|
||||
|
||||
bool had_cmd = false;
|
||||
tokenizer tok;
|
||||
for (tok_init(&tok, str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); tok_has_next(&tok); tok_next(&tok))
|
||||
tokenizer_t tok(str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
||||
for (; tok_has_next(&tok); tok_next(&tok))
|
||||
{
|
||||
int last_type = tok_last_type(&tok);
|
||||
|
||||
@ -955,10 +955,8 @@ static void tokenize(const wchar_t * const buff, std::vector<int> &color, const
|
||||
|
||||
std::fill(color.begin(), color.end(), -1);
|
||||
|
||||
tokenizer tok;
|
||||
for (tok_init(&tok, buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
|
||||
tok_has_next(&tok);
|
||||
tok_next(&tok))
|
||||
tokenizer_t tok(buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
|
||||
for (; tok_has_next(&tok); tok_next(&tok))
|
||||
{
|
||||
int last_type = tok_last_type(&tok);
|
||||
|
||||
|
@ -1415,10 +1415,8 @@ void history_t::add_with_file_detection(const wcstring &str)
|
||||
ASSERT_IS_MAIN_THREAD();
|
||||
path_list_t potential_paths;
|
||||
|
||||
tokenizer tokenizer;
|
||||
for (tok_init(&tokenizer, str.c_str(), TOK_SQUASH_ERRORS);
|
||||
tok_has_next(&tokenizer);
|
||||
tok_next(&tokenizer))
|
||||
tokenizer_t tokenizer(str.c_str(), TOK_SQUASH_ERRORS);
|
||||
for (; tok_has_next(&tokenizer); tok_next(&tokenizer))
|
||||
{
|
||||
int type = tok_last_type(&tokenizer);
|
||||
if (type == TOK_STRING)
|
||||
|
@ -326,8 +326,6 @@ static void job_or_process_extent(const wchar_t *buff,
|
||||
wchar_t *buffcpy;
|
||||
int finished=0;
|
||||
|
||||
tokenizer tok;
|
||||
|
||||
CHECK(buff,);
|
||||
|
||||
if (a)
|
||||
@ -365,9 +363,8 @@ static void job_or_process_extent(const wchar_t *buff,
|
||||
DIE_MEM();
|
||||
}
|
||||
|
||||
for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED);
|
||||
tok_has_next(&tok) && !finished;
|
||||
tok_next(&tok))
|
||||
tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED);
|
||||
for (; tok_has_next(&tok) && !finished; tok_next(&tok))
|
||||
{
|
||||
int tok_begin = tok_get_pos(&tok);
|
||||
|
||||
@ -440,8 +437,6 @@ void parse_util_token_extent(const wchar_t *buff,
|
||||
long pos;
|
||||
wchar_t *buffcpy;
|
||||
|
||||
tokenizer tok;
|
||||
|
||||
const wchar_t *a = NULL, *b = NULL, *pa = NULL, *pb = NULL;
|
||||
|
||||
CHECK(buff,);
|
||||
@ -474,9 +469,8 @@ void parse_util_token_extent(const wchar_t *buff,
|
||||
DIE_MEM();
|
||||
}
|
||||
|
||||
for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
||||
tok_has_next(&tok);
|
||||
tok_next(&tok))
|
||||
tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
||||
for (; tok_has_next(&tok); tok_next(&tok))
|
||||
{
|
||||
size_t tok_begin = tok_get_pos(&tok);
|
||||
size_t tok_end = tok_begin;
|
||||
@ -711,9 +705,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_
|
||||
wchar_t last_quote = '\0';
|
||||
int unfinished;
|
||||
|
||||
tokenizer tok;
|
||||
tok_init(&tok, cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
||||
|
||||
tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
||||
for (; tok_has_next(&tok); tok_next(&tok))
|
||||
{
|
||||
if (tok_get_pos(&tok) > pos)
|
||||
|
54
parser.cpp
54
parser.cpp
@ -525,7 +525,6 @@ static int parser_is_pipe_forbidden(const wcstring &word)
|
||||
*/
|
||||
static const wchar_t *parser_find_end(const wchar_t * buff)
|
||||
{
|
||||
tokenizer tok;
|
||||
int had_cmd=0;
|
||||
int count = 0;
|
||||
int error=0;
|
||||
@ -533,9 +532,8 @@ static const wchar_t *parser_find_end(const wchar_t * buff)
|
||||
|
||||
CHECK(buff, 0);
|
||||
|
||||
for (tok_init(&tok, buff, 0);
|
||||
tok_has_next(&tok) && !error;
|
||||
tok_next(&tok))
|
||||
tokenizer_t tok(buff, 0);
|
||||
for (; tok_has_next(&tok) && !error; tok_next(&tok))
|
||||
{
|
||||
int last_type = tok_last_type(&tok);
|
||||
switch (last_type)
|
||||
@ -796,7 +794,6 @@ void parser_t::print_errors_stderr()
|
||||
|
||||
int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
|
||||
{
|
||||
tokenizer tok;
|
||||
|
||||
expand_flags_t eflags = 0;
|
||||
if (! show_errors)
|
||||
@ -808,8 +805,8 @@ int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
|
||||
eval_args may be called while evaulating another command, so we
|
||||
save the previous tokenizer and restore it on exit
|
||||
*/
|
||||
tokenizer *previous_tokenizer=current_tokenizer;
|
||||
int previous_pos=current_tokenizer_pos;
|
||||
tokenizer_t * const previous_tokenizer = current_tokenizer;
|
||||
const int previous_pos = current_tokenizer_pos;
|
||||
int do_loop=1;
|
||||
|
||||
CHECK(line, 1);
|
||||
@ -819,10 +816,10 @@ int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
|
||||
if (this->parser_type == PARSER_TYPE_GENERAL)
|
||||
proc_push_interactive(0);
|
||||
|
||||
tokenizer_t tok(line, (show_errors ? 0 : TOK_SQUASH_ERRORS));
|
||||
current_tokenizer = &tok;
|
||||
current_tokenizer_pos = 0;
|
||||
|
||||
tok_init(&tok, line, (show_errors ? 0 : TOK_SQUASH_ERRORS));
|
||||
error_code=0;
|
||||
|
||||
for (; do_loop && tok_has_next(&tok) ; tok_next(&tok))
|
||||
@ -1319,7 +1316,7 @@ job_t *parser_t::job_get_from_pid(int pid)
|
||||
*/
|
||||
void parser_t::parse_job_argument_list(process_t *p,
|
||||
job_t *j,
|
||||
tokenizer *tok,
|
||||
tokenizer_t *tok,
|
||||
std::vector<completion_t> &args,
|
||||
bool unskip)
|
||||
{
|
||||
@ -1718,7 +1715,7 @@ f
|
||||
*/
|
||||
int parser_t::parse_job(process_t *p,
|
||||
job_t *j,
|
||||
tokenizer *tok)
|
||||
tokenizer_t *tok)
|
||||
{
|
||||
std::vector<completion_t> args; // The list that will become the argc array for the program
|
||||
int use_function = 1; // May functions be considered when checking what action this command represents
|
||||
@ -2185,7 +2182,6 @@ int parser_t::parse_job(process_t *p,
|
||||
|
||||
const wchar_t *end=parser_find_end(tok_string(tok) +
|
||||
current_tokenizer_pos);
|
||||
tokenizer subtok;
|
||||
int make_sub_block = j->first_process != p;
|
||||
|
||||
if (!end)
|
||||
@ -2202,9 +2198,8 @@ int parser_t::parse_job(process_t *p,
|
||||
{
|
||||
int done=0;
|
||||
|
||||
for (tok_init(&subtok, end, 0);
|
||||
!done && tok_has_next(&subtok);
|
||||
tok_next(&subtok))
|
||||
tokenizer_t subtok(end, 0);
|
||||
for (; ! done && tok_has_next(&subtok); tok_next(&subtok))
|
||||
{
|
||||
|
||||
switch (tok_last_type(&subtok))
|
||||
@ -2388,7 +2383,7 @@ static bool job_should_skip_elseif(const job_t *job, const block_t *current_bloc
|
||||
\param tok The tokenizer to read tokens from
|
||||
*/
|
||||
|
||||
void parser_t::eval_job(tokenizer *tok)
|
||||
void parser_t::eval_job(tokenizer_t *tok)
|
||||
{
|
||||
ASSERT_IS_MAIN_THREAD();
|
||||
job_t *j;
|
||||
@ -2630,7 +2625,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type
|
||||
const wchar_t * const cmd = cmdStr.c_str();
|
||||
size_t forbid_count;
|
||||
int code;
|
||||
tokenizer *previous_tokenizer=current_tokenizer;
|
||||
tokenizer_t *previous_tokenizer=current_tokenizer;
|
||||
block_t *start_current_block = current_block;
|
||||
|
||||
/* Record the current chain so we can put it back later */
|
||||
@ -2676,8 +2671,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type
|
||||
|
||||
this->push_block(new scope_block_t(block_type));
|
||||
|
||||
current_tokenizer = new tokenizer;
|
||||
tok_init(current_tokenizer, cmd, 0);
|
||||
current_tokenizer = new tokenizer_t(cmd, 0);
|
||||
|
||||
error_code = 0;
|
||||
|
||||
@ -2907,19 +2901,17 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha
|
||||
|
||||
int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *prefix)
|
||||
{
|
||||
tokenizer tok;
|
||||
tokenizer *previous_tokenizer = current_tokenizer;
|
||||
int previous_pos = current_tokenizer_pos;
|
||||
tokenizer_t *const previous_tokenizer = current_tokenizer;
|
||||
const int previous_pos = current_tokenizer_pos;
|
||||
int do_loop = 1;
|
||||
int err = 0;
|
||||
|
||||
CHECK(buff, 1);
|
||||
|
||||
current_tokenizer = &tok;
|
||||
|
||||
for (tok_init(&tok, buff, 0);
|
||||
do_loop && tok_has_next(&tok);
|
||||
tok_next(&tok))
|
||||
tokenizer_t tok(buff, 0);
|
||||
current_tokenizer = &tok;
|
||||
for (; do_loop && tok_has_next(&tok); tok_next(&tok))
|
||||
{
|
||||
current_tokenizer_pos = tok_get_pos(&tok);
|
||||
switch (tok_last_type(&tok))
|
||||
@ -2970,7 +2962,7 @@ int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *pre
|
||||
|
||||
tok_destroy(&tok);
|
||||
|
||||
current_tokenizer=previous_tokenizer;
|
||||
current_tokenizer = previous_tokenizer;
|
||||
current_tokenizer_pos = previous_pos;
|
||||
|
||||
error_code=0;
|
||||
@ -2985,7 +2977,6 @@ int parser_t::test(const wchar_t * buff,
|
||||
{
|
||||
ASSERT_IS_MAIN_THREAD();
|
||||
|
||||
tokenizer tok;
|
||||
/*
|
||||
Set to one if a command name has been given for the currently
|
||||
parsed process specification
|
||||
@ -2994,8 +2985,8 @@ int parser_t::test(const wchar_t * buff,
|
||||
int err=0;
|
||||
int unfinished = 0;
|
||||
|
||||
tokenizer *previous_tokenizer=current_tokenizer;
|
||||
int previous_pos=current_tokenizer_pos;
|
||||
tokenizer_t * const previous_tokenizer=current_tokenizer;
|
||||
const int previous_pos=current_tokenizer_pos;
|
||||
|
||||
int block_pos[BLOCK_MAX_COUNT] = {};
|
||||
block_type_t block_type[BLOCK_MAX_COUNT] = {};
|
||||
@ -3043,11 +3034,10 @@ int parser_t::test(const wchar_t * buff,
|
||||
|
||||
}
|
||||
|
||||
tokenizer_t tok(buff, 0);
|
||||
current_tokenizer = &tok;
|
||||
|
||||
for (tok_init(&tok, buff, 0);
|
||||
;
|
||||
tok_next(&tok))
|
||||
for (;; tok_next(&tok))
|
||||
{
|
||||
current_tokenizer_pos = tok_get_pos(&tok);
|
||||
|
||||
|
10
parser.h
10
parser.h
@ -295,7 +295,7 @@ struct profile_item_t
|
||||
wcstring cmd;
|
||||
};
|
||||
|
||||
struct tokenizer;
|
||||
struct tokenizer_t;
|
||||
|
||||
class parser_t
|
||||
{
|
||||
@ -316,7 +316,7 @@ private:
|
||||
wcstring err_buff;
|
||||
|
||||
/** Pointer to the current tokenizer */
|
||||
tokenizer *current_tokenizer;
|
||||
tokenizer_t *current_tokenizer;
|
||||
|
||||
/** String for representing the current line */
|
||||
wcstring lineinfo;
|
||||
@ -344,10 +344,10 @@ private:
|
||||
parser_t(const parser_t&);
|
||||
parser_t& operator=(const parser_t&);
|
||||
|
||||
void parse_job_argument_list(process_t *p, job_t *j, tokenizer *tok, std::vector<completion_t>&, bool);
|
||||
int parse_job(process_t *p, job_t *j, tokenizer *tok);
|
||||
void parse_job_argument_list(process_t *p, job_t *j, tokenizer_t *tok, std::vector<completion_t>&, bool);
|
||||
int parse_job(process_t *p, job_t *j, tokenizer_t *tok);
|
||||
void skipped_exec(job_t * j);
|
||||
void eval_job(tokenizer *tok);
|
||||
void eval_job(tokenizer_t *tok);
|
||||
int parser_test_argument(const wchar_t *arg, wcstring *out, const wchar_t *prefix, int offset);
|
||||
void print_errors(wcstring &target, const wchar_t *prefix);
|
||||
void print_errors_stderr();
|
||||
|
@ -1821,7 +1821,6 @@ static void handle_token_history(int forward, int reset)
|
||||
|
||||
const wchar_t *str=0;
|
||||
long current_pos;
|
||||
tokenizer tok;
|
||||
|
||||
if (reset)
|
||||
{
|
||||
@ -1895,10 +1894,8 @@ static void handle_token_history(int forward, int reset)
|
||||
{
|
||||
|
||||
//debug( 3, L"new '%ls'", data->token_history_buff.c_str() );
|
||||
|
||||
for (tok_init(&tok, data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
|
||||
tok_has_next(&tok);
|
||||
tok_next(&tok))
|
||||
tokenizer_t tok(data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
|
||||
for (; tok_has_next(&tok); tok_next(&tok))
|
||||
{
|
||||
switch (tok_last_type(&tok))
|
||||
{
|
||||
|
109
tokenizer.cpp
109
tokenizer.cpp
@ -83,7 +83,7 @@ static const wchar_t *tok_desc[] =
|
||||
|
||||
\return 0 if the system could not provide the memory needed, and 1 otherwise.
|
||||
*/
|
||||
static int check_size(tokenizer *tok, size_t len)
|
||||
static int check_size(tokenizer_t *tok, size_t len)
|
||||
{
|
||||
if (tok->last_len <= len)
|
||||
{
|
||||
@ -103,7 +103,7 @@ static int check_size(tokenizer *tok, size_t len)
|
||||
/**
|
||||
Set the latest tokens string to be the specified error message
|
||||
*/
|
||||
static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_message)
|
||||
static void tok_call_error(tokenizer_t *tok, int error_type, const wchar_t *error_message)
|
||||
{
|
||||
tok->last_type = TOK_ERROR;
|
||||
tok->error = error_type;
|
||||
@ -117,13 +117,13 @@ static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_
|
||||
wcscpy(tok->last, error_message);
|
||||
}
|
||||
|
||||
int tok_get_error(tokenizer *tok)
|
||||
int tok_get_error(tokenizer_t *tok)
|
||||
{
|
||||
return tok->error;
|
||||
}
|
||||
|
||||
|
||||
void tok_init(tokenizer *tok, const wchar_t *b, int flags)
|
||||
tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last(NULL), last_type(0), last_len(0), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), last_quote(0), error(0), squash_errors(false), cached_lineno_offset(0), cached_lineno_count(0)
|
||||
{
|
||||
|
||||
/* We can only generate error messages on the main thread due to wgettext() thread safety issues. */
|
||||
@ -132,33 +132,28 @@ void tok_init(tokenizer *tok, const wchar_t *b, int flags)
|
||||
ASSERT_IS_MAIN_THREAD();
|
||||
}
|
||||
|
||||
CHECK(tok,);
|
||||
|
||||
memset(tok, 0, sizeof(tokenizer));
|
||||
|
||||
CHECK(b,);
|
||||
|
||||
|
||||
tok->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED);
|
||||
tok->show_comments = !!(flags & TOK_SHOW_COMMENTS);
|
||||
tok->squash_errors = !!(flags & TOK_SQUASH_ERRORS);
|
||||
tok->has_next=true;
|
||||
this->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED);
|
||||
this->show_comments = !!(flags & TOK_SHOW_COMMENTS);
|
||||
this->squash_errors = !!(flags & TOK_SQUASH_ERRORS);
|
||||
|
||||
tok->has_next = (*b != L'\0');
|
||||
tok->orig_buff = tok->buff = b;
|
||||
tok->cached_lineno_offset = 0;
|
||||
tok->cached_lineno_count = 0;
|
||||
tok_next(tok);
|
||||
this->has_next = (*b != L'\0');
|
||||
this->orig_buff = this->buff = b;
|
||||
this->cached_lineno_offset = 0;
|
||||
this->cached_lineno_count = 0;
|
||||
tok_next(this);
|
||||
}
|
||||
|
||||
void tok_destroy(tokenizer *tok)
|
||||
void tok_destroy(tokenizer_t *tok)
|
||||
{
|
||||
CHECK(tok,);
|
||||
|
||||
free(tok->last);
|
||||
}
|
||||
|
||||
int tok_last_type(tokenizer *tok)
|
||||
int tok_last_type(tokenizer_t *tok)
|
||||
{
|
||||
CHECK(tok, TOK_ERROR);
|
||||
CHECK(tok->buff, TOK_ERROR);
|
||||
@ -166,14 +161,14 @@ int tok_last_type(tokenizer *tok)
|
||||
return tok->last_type;
|
||||
}
|
||||
|
||||
wchar_t *tok_last(tokenizer *tok)
|
||||
wchar_t *tok_last(tokenizer_t *tok)
|
||||
{
|
||||
CHECK(tok, 0);
|
||||
|
||||
return tok->last;
|
||||
}
|
||||
|
||||
int tok_has_next(tokenizer *tok)
|
||||
int tok_has_next(tokenizer_t *tok)
|
||||
{
|
||||
/*
|
||||
Return 1 on broken tokenizer
|
||||
@ -185,7 +180,7 @@ int tok_has_next(tokenizer *tok)
|
||||
return tok->has_next;
|
||||
}
|
||||
|
||||
int tokenizer::line_number_of_character_at_offset(size_t offset)
|
||||
int tokenizer_t::line_number_of_character_at_offset(size_t offset)
|
||||
{
|
||||
// we want to return (one plus) the number of newlines at offsets less than the given offset
|
||||
// cached_lineno_count is the number of newlines at indexes less than cached_lineno_offset
|
||||
@ -265,24 +260,28 @@ static int myal(wchar_t c)
|
||||
/**
|
||||
Read the next token as a string
|
||||
*/
|
||||
static void read_string(tokenizer *tok)
|
||||
static void read_string(tokenizer_t *tok)
|
||||
{
|
||||
const wchar_t *start;
|
||||
long len;
|
||||
int mode=0;
|
||||
int do_loop=1;
|
||||
int paran_count=0;
|
||||
|
||||
start = tok->buff;
|
||||
bool is_first = true;
|
||||
|
||||
enum tok_mode_t {
|
||||
mode_regular_text = 0, // regular text
|
||||
mode_subshell = 1, // inside of subshell
|
||||
mode_array_brackets = 2, // inside of array brackets
|
||||
mode_array_brackets_and_subshell = 3 // inside of array brackets and subshell, like in '$foo[(ech'
|
||||
} mode = mode_regular_text;
|
||||
|
||||
while (1)
|
||||
{
|
||||
|
||||
if (!myal(*tok->buff))
|
||||
{
|
||||
// debug(1, L"%lc", *tok->buff );
|
||||
|
||||
if (*tok->buff == L'\\')
|
||||
{
|
||||
tok->buff++;
|
||||
@ -296,13 +295,13 @@ static void read_string(tokenizer *tok)
|
||||
else
|
||||
{
|
||||
/* Since we are about to increment tok->buff, decrement it first so the increment doesn't go past the end of the buffer. https://github.com/fish-shell/fish-shell/issues/389 */
|
||||
do_loop = 0;
|
||||
tok->buff--;
|
||||
do_loop = 0;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
else if (*tok->buff == L'\n' && mode == 0)
|
||||
else if (*tok->buff == L'\n' && mode == mode_regular_text)
|
||||
{
|
||||
tok->buff--;
|
||||
do_loop = 0;
|
||||
@ -312,33 +311,24 @@ static void read_string(tokenizer *tok)
|
||||
tok->buff++;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
The modes are as follows:
|
||||
|
||||
0: regular text
|
||||
1: inside of subshell
|
||||
2: inside of array brackets
|
||||
3: inside of array brackets and subshell, like in '$foo[(ech'
|
||||
*/
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case 0:
|
||||
case mode_regular_text:
|
||||
{
|
||||
switch (*tok->buff)
|
||||
{
|
||||
case L'(':
|
||||
{
|
||||
paran_count=1;
|
||||
mode = 1;
|
||||
mode = mode_subshell;
|
||||
break;
|
||||
}
|
||||
|
||||
case L'[':
|
||||
{
|
||||
if (tok->buff != start)
|
||||
mode=2;
|
||||
mode = mode_array_brackets;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -356,7 +346,7 @@ static void read_string(tokenizer *tok)
|
||||
{
|
||||
tok->buff += wcslen(tok->buff);
|
||||
|
||||
if ((!tok->accept_unfinished))
|
||||
if (! tok->accept_unfinished)
|
||||
{
|
||||
TOK_CALL_ERROR(tok, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR);
|
||||
return;
|
||||
@ -369,7 +359,7 @@ static void read_string(tokenizer *tok)
|
||||
|
||||
default:
|
||||
{
|
||||
if (!tok_is_string_character(*(tok->buff), is_first))
|
||||
if (! tok_is_string_character(*(tok->buff), is_first))
|
||||
{
|
||||
do_loop=0;
|
||||
}
|
||||
@ -378,8 +368,8 @@ static void read_string(tokenizer *tok)
|
||||
break;
|
||||
}
|
||||
|
||||
case 3:
|
||||
case 1:
|
||||
case mode_array_brackets_and_subshell:
|
||||
case mode_subshell:
|
||||
switch (*tok->buff)
|
||||
{
|
||||
case L'\'':
|
||||
@ -411,7 +401,7 @@ static void read_string(tokenizer *tok)
|
||||
paran_count--;
|
||||
if (paran_count == 0)
|
||||
{
|
||||
mode--;
|
||||
mode = (mode == mode_array_brackets_and_subshell ? mode_array_brackets : mode_regular_text);
|
||||
}
|
||||
break;
|
||||
case L'\0':
|
||||
@ -419,16 +409,17 @@ static void read_string(tokenizer *tok)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
|
||||
case mode_array_brackets:
|
||||
switch (*tok->buff)
|
||||
{
|
||||
case L'(':
|
||||
paran_count=1;
|
||||
mode = 3;
|
||||
mode = mode_array_brackets_and_subshell;
|
||||
break;
|
||||
|
||||
case L']':
|
||||
mode=0;
|
||||
mode = mode_regular_text;
|
||||
break;
|
||||
|
||||
case L'\0':
|
||||
@ -447,7 +438,7 @@ static void read_string(tokenizer *tok)
|
||||
is_first = false;
|
||||
}
|
||||
|
||||
if ((!tok->accept_unfinished) && (mode!=0))
|
||||
if ((!tok->accept_unfinished) && (mode != mode_regular_text))
|
||||
{
|
||||
TOK_CALL_ERROR(tok, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR);
|
||||
return;
|
||||
@ -467,7 +458,7 @@ static void read_string(tokenizer *tok)
|
||||
/**
|
||||
Read the next token as a comment.
|
||||
*/
|
||||
static void read_comment(tokenizer *tok)
|
||||
static void read_comment(tokenizer_t *tok)
|
||||
{
|
||||
const wchar_t *start;
|
||||
|
||||
@ -487,7 +478,7 @@ static void read_comment(tokenizer *tok)
|
||||
/**
|
||||
Read a FD redirection.
|
||||
*/
|
||||
static void read_redirect(tokenizer *tok, int fd)
|
||||
static void read_redirect(tokenizer_t *tok, int fd)
|
||||
{
|
||||
int mode = -1;
|
||||
|
||||
@ -552,7 +543,7 @@ static void read_redirect(tokenizer *tok, int fd)
|
||||
}
|
||||
}
|
||||
|
||||
wchar_t tok_last_quote(tokenizer *tok)
|
||||
wchar_t tok_last_quote(tokenizer_t *tok)
|
||||
{
|
||||
CHECK(tok, 0);
|
||||
|
||||
@ -582,7 +573,7 @@ const wchar_t *tok_get_desc(int type)
|
||||
}
|
||||
|
||||
|
||||
void tok_next(tokenizer *tok)
|
||||
void tok_next(tokenizer_t *tok)
|
||||
{
|
||||
|
||||
CHECK(tok,);
|
||||
@ -705,20 +696,18 @@ void tok_next(tokenizer *tok)
|
||||
|
||||
}
|
||||
|
||||
const wchar_t *tok_string(tokenizer *tok)
|
||||
const wchar_t *tok_string(tokenizer_t *tok)
|
||||
{
|
||||
return tok?tok->orig_buff:0;
|
||||
}
|
||||
|
||||
wchar_t *tok_first(const wchar_t *str)
|
||||
{
|
||||
tokenizer t;
|
||||
wchar_t *res=0;
|
||||
|
||||
CHECK(str, 0);
|
||||
|
||||
tok_init(&t, str, TOK_SQUASH_ERRORS);
|
||||
|
||||
tokenizer_t t(str, TOK_SQUASH_ERRORS);
|
||||
switch (tok_last_type(&t))
|
||||
{
|
||||
case TOK_STRING:
|
||||
@ -733,7 +722,7 @@ wchar_t *tok_first(const wchar_t *str)
|
||||
return res;
|
||||
}
|
||||
|
||||
int tok_get_pos(tokenizer *tok)
|
||||
int tok_get_pos(tokenizer_t *tok)
|
||||
{
|
||||
CHECK(tok, 0);
|
||||
|
||||
@ -741,7 +730,7 @@ int tok_get_pos(tokenizer *tok)
|
||||
}
|
||||
|
||||
|
||||
void tok_set_pos(tokenizer *tok, int pos)
|
||||
void tok_set_pos(tokenizer_t *tok, int pos)
|
||||
{
|
||||
CHECK(tok,);
|
||||
|
||||
|
49
tokenizer.h
49
tokenizer.h
@ -61,11 +61,12 @@ enum tokenizer_error
|
||||
*/
|
||||
#define TOK_SQUASH_ERRORS 4
|
||||
|
||||
typedef unsigned int tok_flags_t;
|
||||
|
||||
/**
|
||||
The tokenizer struct.
|
||||
*/
|
||||
struct tokenizer
|
||||
struct tokenizer_t
|
||||
{
|
||||
/** A pointer into the original string, showing where the next token begins */
|
||||
const wchar_t *buff;
|
||||
@ -100,62 +101,60 @@ struct tokenizer
|
||||
/** Return the line number of the character at the given offset */
|
||||
int line_number_of_character_at_offset(size_t offset);
|
||||
|
||||
/**
|
||||
Constructor for a tokenizer. b is the string that is to be
|
||||
tokenized. It is not copied, and should not be freed by the caller
|
||||
until after the tokenizer is destroyed.
|
||||
|
||||
\param b The string to tokenize
|
||||
\param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
|
||||
to accept incomplete tokens, such as a subshell without a closing
|
||||
parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
|
||||
|
||||
*/
|
||||
tokenizer_t(const wchar_t *b, tok_flags_t flags);
|
||||
};
|
||||
|
||||
/**
|
||||
Initialize the tokenizer. b is the string that is to be
|
||||
tokenized. It is not copied, and should not be freed by the caller
|
||||
until after the tokenizer is destroyed.
|
||||
|
||||
\param tok The tokenizer to initialize
|
||||
\param b The string to tokenize
|
||||
\param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
|
||||
to accept incomplete tokens, such as a subshell without a closing
|
||||
parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
|
||||
|
||||
*/
|
||||
void tok_init(tokenizer *tok, const wchar_t *b, int flags);
|
||||
|
||||
/**
|
||||
Jump to the next token.
|
||||
*/
|
||||
void tok_next(tokenizer *tok);
|
||||
void tok_next(tokenizer_t *tok);
|
||||
|
||||
/**
|
||||
Returns the type of the last token. Must be one of the values in the token_type enum.
|
||||
*/
|
||||
int tok_last_type(tokenizer *tok);
|
||||
int tok_last_type(tokenizer_t *tok);
|
||||
|
||||
/**
|
||||
Returns the last token string. The string should not be freed by the caller.
|
||||
*/
|
||||
wchar_t *tok_last(tokenizer *tok);
|
||||
wchar_t *tok_last(tokenizer_t *tok);
|
||||
|
||||
/**
|
||||
Returns the type of quote from the last TOK_QSTRING
|
||||
*/
|
||||
wchar_t tok_last_quote(tokenizer *tok);
|
||||
wchar_t tok_last_quote(tokenizer_t *tok);
|
||||
|
||||
/**
|
||||
Returns true as long as there are more tokens left
|
||||
*/
|
||||
int tok_has_next(tokenizer *tok);
|
||||
int tok_has_next(tokenizer_t *tok);
|
||||
|
||||
/**
|
||||
Returns the position of the beginning of the current token in the original string
|
||||
*/
|
||||
int tok_get_pos(tokenizer *tok);
|
||||
int tok_get_pos(tokenizer_t *tok);
|
||||
|
||||
/**
|
||||
Destroy the tokenizer and free asociated memory
|
||||
*/
|
||||
void tok_destroy(tokenizer *tok);
|
||||
void tok_destroy(tokenizer_t *tok);
|
||||
|
||||
|
||||
/**
|
||||
Returns the original string to tokenizer
|
||||
*/
|
||||
const wchar_t *tok_string(tokenizer *tok);
|
||||
const wchar_t *tok_string(tokenizer_t *tok);
|
||||
|
||||
|
||||
/**
|
||||
@ -178,7 +177,7 @@ bool tok_is_string_character(wchar_t c, bool is_first);
|
||||
/**
|
||||
Move tokenizer position
|
||||
*/
|
||||
void tok_set_pos(tokenizer *tok, int pos);
|
||||
void tok_set_pos(tokenizer_t *tok, int pos);
|
||||
|
||||
/**
|
||||
Returns a string description of the specified token type
|
||||
@ -188,7 +187,7 @@ const wchar_t *tok_get_desc(int type);
|
||||
/**
|
||||
Get tokenizer error type. Should only be called if tok_last_tope returns TOK_ERROR.
|
||||
*/
|
||||
int tok_get_error(tokenizer *tok);
|
||||
int tok_get_error(tokenizer_t *tok);
|
||||
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user