Work towards refactoring tokenizer to be a real object

2012-11-21 17:48:35 -08:00 · 2012-11-21 17:48:35 -08:00 · f545fb2491
commit f545fb2491
parent e73be48d96
12 changed files with 123 additions and 169 deletions
--- a/builtin_commandline.cpp
+++ b/builtin_commandline.cpp
@ -143,7 +143,6 @@ static void write_part(const wchar_t *begin,
                       int cut_at_cursor,
                       int tokenize)
 {
-    tokenizer tok;
    wcstring out;
    wchar_t *buff;
    size_t pos;
@ -155,9 +154,8 @@ static void write_part(const wchar_t *begin,
        buff = wcsndup(begin, end-begin);
 //    fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end );
        out.clear();
-
-        for (tok_init(&tok, buff, TOK_ACCEPT_UNFINISHED);
-                tok_has_next(&tok);
+        tokenizer_t tok(buff, TOK_ACCEPT_UNFINISHED);
+        for (; tok_has_next(&tok);
                tok_next(&tok))
        {
            if ((cut_at_cursor) &&
--- a/complete.cpp
+++ b/complete.cpp
@ -1765,7 +1765,6 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, complete_ty
    completer_t completer(cmd, type);

    const wchar_t *tok_begin, *tok_end, *cmdsubst_begin, *cmdsubst_end, *prev_begin, *prev_end;
-    tokenizer tok;
    const wchar_t *current_token=0, *prev_token=0;
    wcstring current_command;
    int on_command=0;
@ -1807,9 +1806,8 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, complete_ty

        int had_cmd=0;
        int end_loop=0;
-
-        tok_init(&tok, buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
-
+        
+        tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
        while (tok_has_next(&tok) && !end_loop)
        {

--- a/fish_indent.cpp
+++ b/fish_indent.cpp
@ -84,7 +84,6 @@ static void insert_tabs(wcstring &out, int indent)
 */
 static int indent(wcstring &out, const wcstring &in, int flags)
 {
-    tokenizer tok;
    int res=0;
    int is_command = 1;
    int indent = 0;
@ -92,8 +91,7 @@ static int indent(wcstring &out, const wcstring &in, int flags)
    int prev_type = 0;
    int prev_prev_type = 0;

-    tok_init(&tok, in.c_str(), TOK_SHOW_COMMENTS);
-
+    tokenizer_t tok(in.c_str(), TOK_SHOW_COMMENTS);
    for (; tok_has_next(&tok); tok_next(&tok))
    {
        int type = tok_last_type(&tok);
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@ -289,13 +289,12 @@ static void test_convert()
 */
 static void test_tok()
 {
-    tokenizer t;

    say(L"Testing tokenizer");


    say(L"Testing invalid input");
-    tok_init(&t, 0, 0);
+    tokenizer_t t(NULL, 0);

    if (tok_last_type(&t) != TOK_ERROR)
    {
@ -326,14 +325,12 @@ static void test_tok()
        const int types[] =
        {
            TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_END
-        }
-        ;
-        size_t i;
+        };

        say(L"Test correct tokenization");
-
-        for (i=0, tok_init(&t, str, 0); i<(sizeof(types)/sizeof(int)); i++,tok_next(&t))
-        {
+        
+        tokenizer_t t(str, 0);
+        for (size_t i=0; i < sizeof types / sizeof *types; i++, tok_next(&t)) {
            if (types[i] != tok_last_type(&t))
            {
                err(L"Tokenization error:");
--- a/highlight.cpp
+++ b/highlight.cpp
@ -691,8 +691,8 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command
    int arg_pos = -1;

    bool had_cmd = false;
-    tokenizer tok;
-    for (tok_init(&tok, str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); tok_has_next(&tok); tok_next(&tok))
+    tokenizer_t tok(str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
+    for (; tok_has_next(&tok); tok_next(&tok))
    {
        int last_type = tok_last_type(&tok);

@ -955,10 +955,8 @@ static void tokenize(const wchar_t * const buff, std::vector<int> &color, const

    std::fill(color.begin(), color.end(), -1);

-    tokenizer tok;
-    for (tok_init(&tok, buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
-            tok_has_next(&tok);
-            tok_next(&tok))
+    tokenizer_t tok(buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
+    for (; tok_has_next(&tok); tok_next(&tok))
    {
        int last_type = tok_last_type(&tok);

--- a/history.cpp
+++ b/history.cpp
@ -1415,10 +1415,8 @@ void history_t::add_with_file_detection(const wcstring &str)
    ASSERT_IS_MAIN_THREAD();
    path_list_t potential_paths;

-    tokenizer tokenizer;
-    for (tok_init(&tokenizer, str.c_str(), TOK_SQUASH_ERRORS);
-            tok_has_next(&tokenizer);
-            tok_next(&tokenizer))
+    tokenizer_t tokenizer(str.c_str(), TOK_SQUASH_ERRORS);
+    for (; tok_has_next(&tokenizer); tok_next(&tokenizer))
    {
        int type = tok_last_type(&tokenizer);
        if (type == TOK_STRING)
--- a/parse_util.cpp
+++ b/parse_util.cpp
@ -326,8 +326,6 @@ static void job_or_process_extent(const wchar_t *buff,
    wchar_t *buffcpy;
    int finished=0;

-    tokenizer tok;
-
    CHECK(buff,);

    if (a)
@ -365,9 +363,8 @@ static void job_or_process_extent(const wchar_t *buff,
        DIE_MEM();
    }

-    for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED);
-            tok_has_next(&tok) && !finished;
-            tok_next(&tok))
+    tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED);
+    for (; tok_has_next(&tok) && !finished; tok_next(&tok))
    {
        int tok_begin = tok_get_pos(&tok);

@ -440,8 +437,6 @@ void parse_util_token_extent(const wchar_t *buff,
    long pos;
    wchar_t *buffcpy;

-    tokenizer tok;
-
    const wchar_t *a = NULL, *b = NULL, *pa = NULL, *pb = NULL;

    CHECK(buff,);
@ -474,9 +469,8 @@ void parse_util_token_extent(const wchar_t *buff,
        DIE_MEM();
    }

-    for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
-            tok_has_next(&tok);
-            tok_next(&tok))
+    tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
+    for (; tok_has_next(&tok); tok_next(&tok))
    {
        size_t tok_begin = tok_get_pos(&tok);
        size_t tok_end = tok_begin;
@ -711,9 +705,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_
    wchar_t last_quote = '\0';
    int unfinished;

-    tokenizer tok;
-    tok_init(&tok, cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
-
+    tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
    for (; tok_has_next(&tok); tok_next(&tok))
    {
        if (tok_get_pos(&tok) > pos)
--- a/parser.cpp
+++ b/parser.cpp
@ -525,7 +525,6 @@ static int parser_is_pipe_forbidden(const wcstring &word)
 */
 static const wchar_t *parser_find_end(const wchar_t * buff)
 {
-    tokenizer tok;
    int had_cmd=0;
    int count = 0;
    int error=0;
@ -533,9 +532,8 @@ static const wchar_t *parser_find_end(const wchar_t * buff)

    CHECK(buff, 0);

-    for (tok_init(&tok, buff, 0);
-            tok_has_next(&tok) && !error;
-            tok_next(&tok))
+    tokenizer_t tok(buff, 0);
+    for (; tok_has_next(&tok) && !error; tok_next(&tok))
    {
        int last_type = tok_last_type(&tok);
        switch (last_type)
@ -796,7 +794,6 @@ void parser_t::print_errors_stderr()

 int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
 {
-    tokenizer tok;

    expand_flags_t eflags = 0;
    if (! show_errors)
@ -808,8 +805,8 @@ int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
      eval_args may be called while evaulating another command, so we
      save the previous tokenizer and restore it on exit
    */
-    tokenizer *previous_tokenizer=current_tokenizer;
-    int previous_pos=current_tokenizer_pos;
+    tokenizer_t * const previous_tokenizer = current_tokenizer;
+    const int previous_pos = current_tokenizer_pos;
    int do_loop=1;

    CHECK(line, 1);
@ -819,10 +816,10 @@ int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
    if (this->parser_type == PARSER_TYPE_GENERAL)
        proc_push_interactive(0);

+    tokenizer_t tok(line, (show_errors ? 0 : TOK_SQUASH_ERRORS));
    current_tokenizer = &tok;
    current_tokenizer_pos = 0;

-    tok_init(&tok, line, (show_errors ? 0 : TOK_SQUASH_ERRORS));
    error_code=0;

    for (; do_loop && tok_has_next(&tok) ; tok_next(&tok))
@ -1319,7 +1316,7 @@ job_t *parser_t::job_get_from_pid(int pid)
 */
 void parser_t::parse_job_argument_list(process_t *p,
                                       job_t *j,
-                                       tokenizer *tok,
+                                       tokenizer_t *tok,
                                       std::vector<completion_t> &args,
                                       bool unskip)
 {
@ -1718,7 +1715,7 @@ f
 */
 int parser_t::parse_job(process_t *p,
                        job_t *j,
-                        tokenizer *tok)
+                        tokenizer_t *tok)
 {
    std::vector<completion_t> args; // The list that will become the argc array for the program
    int use_function = 1;   // May functions be considered when checking what action this command represents
@ -2185,7 +2182,6 @@ int parser_t::parse_job(process_t *p,

        const wchar_t *end=parser_find_end(tok_string(tok) +
                                           current_tokenizer_pos);
-        tokenizer subtok;
        int make_sub_block = j->first_process != p;

        if (!end)
@ -2202,9 +2198,8 @@ int parser_t::parse_job(process_t *p,
            {
                int done=0;

-                for (tok_init(&subtok, end, 0);
-                        !done && tok_has_next(&subtok);
-                        tok_next(&subtok))
+                tokenizer_t subtok(end, 0);
+                for (; ! done && tok_has_next(&subtok); tok_next(&subtok))
                {

                    switch (tok_last_type(&subtok))
@ -2388,7 +2383,7 @@ static bool job_should_skip_elseif(const job_t *job, const block_t *current_bloc
   \param tok The tokenizer to read tokens from
 */

-void parser_t::eval_job(tokenizer *tok)
+void parser_t::eval_job(tokenizer_t *tok)
 {
    ASSERT_IS_MAIN_THREAD();
    job_t *j;
@ -2630,7 +2625,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type
    const wchar_t * const cmd = cmdStr.c_str();
    size_t forbid_count;
    int code;
-    tokenizer *previous_tokenizer=current_tokenizer;
+    tokenizer_t *previous_tokenizer=current_tokenizer;
    block_t *start_current_block = current_block;

    /* Record the current chain so we can put it back later */
@ -2676,8 +2671,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type

    this->push_block(new scope_block_t(block_type));

-    current_tokenizer = new tokenizer;
-    tok_init(current_tokenizer, cmd, 0);
+    current_tokenizer = new tokenizer_t(cmd, 0);

    error_code = 0;

@ -2907,19 +2901,17 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha

 int parser_t::test_args(const  wchar_t * buff, wcstring *out, const wchar_t *prefix)
 {
-    tokenizer tok;
-    tokenizer *previous_tokenizer = current_tokenizer;
-    int previous_pos = current_tokenizer_pos;
+    tokenizer_t *const previous_tokenizer = current_tokenizer;
+    const int previous_pos = current_tokenizer_pos;
    int do_loop = 1;
    int err = 0;

    CHECK(buff, 1);

-    current_tokenizer = &tok;

-    for (tok_init(&tok, buff, 0);
-            do_loop && tok_has_next(&tok);
-            tok_next(&tok))
+    tokenizer_t tok(buff, 0);
+    current_tokenizer = &tok;
+    for (; do_loop && tok_has_next(&tok); tok_next(&tok))
    {
        current_tokenizer_pos = tok_get_pos(&tok);
        switch (tok_last_type(&tok))
@ -2970,7 +2962,7 @@ int parser_t::test_args(const  wchar_t * buff, wcstring *out, const wchar_t *pre

    tok_destroy(&tok);

-    current_tokenizer=previous_tokenizer;
+    current_tokenizer = previous_tokenizer;
    current_tokenizer_pos = previous_pos;

    error_code=0;
@ -2985,7 +2977,6 @@ int parser_t::test(const  wchar_t * buff,
 {
    ASSERT_IS_MAIN_THREAD();

-    tokenizer tok;
    /*
       Set to one if a command name has been given for the currently
       parsed process specification
@ -2994,8 +2985,8 @@ int parser_t::test(const  wchar_t * buff,
    int err=0;
    int unfinished = 0;

-    tokenizer *previous_tokenizer=current_tokenizer;
-    int previous_pos=current_tokenizer_pos;
+    tokenizer_t * const previous_tokenizer=current_tokenizer;
+    const int previous_pos=current_tokenizer_pos;

    int block_pos[BLOCK_MAX_COUNT] = {};
    block_type_t block_type[BLOCK_MAX_COUNT] = {};
@ -3043,11 +3034,10 @@ int parser_t::test(const  wchar_t * buff,

    }

+    tokenizer_t tok(buff, 0);
    current_tokenizer = &tok;

-    for (tok_init(&tok, buff, 0);
-            ;
-            tok_next(&tok))
+    for (;; tok_next(&tok))
    {
        current_tokenizer_pos = tok_get_pos(&tok);

--- a/parser.h
+++ b/parser.h
@ -295,7 +295,7 @@ struct profile_item_t
    wcstring cmd;
 };

-struct tokenizer;
+struct tokenizer_t;

 class parser_t
 {
@ -316,7 +316,7 @@ private:
    wcstring err_buff;

    /** Pointer to the current tokenizer */
-    tokenizer *current_tokenizer;
+    tokenizer_t *current_tokenizer;

    /** String for representing the current line */
    wcstring lineinfo;
@ -344,10 +344,10 @@ private:
    parser_t(const parser_t&);
    parser_t& operator=(const parser_t&);

-    void parse_job_argument_list(process_t *p, job_t *j, tokenizer *tok, std::vector<completion_t>&, bool);
-    int parse_job(process_t *p, job_t *j, tokenizer *tok);
+    void parse_job_argument_list(process_t *p, job_t *j, tokenizer_t *tok, std::vector<completion_t>&, bool);
+    int parse_job(process_t *p, job_t *j, tokenizer_t *tok);
    void skipped_exec(job_t * j);
-    void eval_job(tokenizer *tok);
+    void eval_job(tokenizer_t *tok);
    int parser_test_argument(const wchar_t *arg, wcstring *out, const wchar_t *prefix, int offset);
    void print_errors(wcstring &target, const wchar_t *prefix);
    void print_errors_stderr();
--- a/reader.cpp
+++ b/reader.cpp
@ -1821,7 +1821,6 @@ static void handle_token_history(int forward, int reset)

    const wchar_t *str=0;
    long current_pos;
-    tokenizer tok;

    if (reset)
    {
@ -1895,10 +1894,8 @@ static void handle_token_history(int forward, int reset)
        {

            //debug( 3, L"new '%ls'", data->token_history_buff.c_str() );
-
-            for (tok_init(&tok, data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
-                    tok_has_next(&tok);
-                    tok_next(&tok))
+            tokenizer_t tok(data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
+            for (; tok_has_next(&tok); tok_next(&tok))
            {
                switch (tok_last_type(&tok))
                {
--- a/tokenizer.cpp
+++ b/tokenizer.cpp
@ -83,7 +83,7 @@ static const wchar_t *tok_desc[] =

   \return 0 if the system could not provide the memory needed, and 1 otherwise.
 */
-static int check_size(tokenizer *tok, size_t len)
+static int check_size(tokenizer_t *tok, size_t len)
 {
    if (tok->last_len <= len)
    {
@ -103,7 +103,7 @@ static int check_size(tokenizer *tok, size_t len)
 /**
   Set the latest tokens string to be the specified error message
 */
-static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_message)
+static void tok_call_error(tokenizer_t *tok, int error_type, const wchar_t *error_message)
 {
    tok->last_type = TOK_ERROR;
    tok->error = error_type;
@ -117,13 +117,13 @@ static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_
    wcscpy(tok->last, error_message);
 }

-int tok_get_error(tokenizer *tok)
+int tok_get_error(tokenizer_t *tok)
 {
    return tok->error;
 }


-void tok_init(tokenizer *tok, const wchar_t *b, int flags)
+tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last(NULL), last_type(0), last_len(0), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), last_quote(0), error(0), squash_errors(false), cached_lineno_offset(0), cached_lineno_count(0)
 {

    /* We can only generate error messages on the main thread due to wgettext() thread safety issues. */
@ -132,33 +132,28 @@ void tok_init(tokenizer *tok, const wchar_t *b, int flags)
        ASSERT_IS_MAIN_THREAD();
    }

-    CHECK(tok,);
-
-    memset(tok, 0, sizeof(tokenizer));
-
    CHECK(b,);


-    tok->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED);
-    tok->show_comments = !!(flags & TOK_SHOW_COMMENTS);
-    tok->squash_errors = !!(flags & TOK_SQUASH_ERRORS);
-    tok->has_next=true;
+    this->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED);
+    this->show_comments = !!(flags & TOK_SHOW_COMMENTS);
+    this->squash_errors = !!(flags & TOK_SQUASH_ERRORS);

-    tok->has_next = (*b != L'\0');
-    tok->orig_buff = tok->buff = b;
-    tok->cached_lineno_offset = 0;
-    tok->cached_lineno_count = 0;
-    tok_next(tok);
+    this->has_next = (*b != L'\0');
+    this->orig_buff = this->buff = b;
+    this->cached_lineno_offset = 0;
+    this->cached_lineno_count = 0;
+    tok_next(this);
 }

-void tok_destroy(tokenizer *tok)
+void tok_destroy(tokenizer_t *tok)
 {
    CHECK(tok,);

    free(tok->last);
 }

-int tok_last_type(tokenizer *tok)
+int tok_last_type(tokenizer_t *tok)
 {
    CHECK(tok, TOK_ERROR);
    CHECK(tok->buff, TOK_ERROR);
@ -166,14 +161,14 @@ int tok_last_type(tokenizer *tok)
    return tok->last_type;
 }

-wchar_t *tok_last(tokenizer *tok)
+wchar_t *tok_last(tokenizer_t *tok)
 {
    CHECK(tok, 0);

    return tok->last;
 }

-int tok_has_next(tokenizer *tok)
+int tok_has_next(tokenizer_t *tok)
 {
    /*
      Return 1 on broken tokenizer
@ -185,7 +180,7 @@ int tok_has_next(tokenizer *tok)
    return   tok->has_next;
 }

-int tokenizer::line_number_of_character_at_offset(size_t offset)
+int tokenizer_t::line_number_of_character_at_offset(size_t offset)
 {
    // we want to return (one plus) the number of newlines at offsets less than the given offset
    // cached_lineno_count is the number of newlines at indexes less than cached_lineno_offset
@ -265,24 +260,28 @@ static int myal(wchar_t c)
 /**
   Read the next token as a string
 */
-static void read_string(tokenizer *tok)
+static void read_string(tokenizer_t *tok)
 {
    const wchar_t *start;
    long len;
-    int mode=0;
    int do_loop=1;
    int paran_count=0;

    start = tok->buff;
    bool is_first = true;

+    enum tok_mode_t {
+        mode_regular_text = 0, // regular text
+        mode_subshell = 1, // inside of subshell
+        mode_array_brackets = 2, // inside of array brackets
+        mode_array_brackets_and_subshell = 3 // inside of array brackets and subshell, like in '$foo[(ech'
+    } mode = mode_regular_text;
+
    while (1)
    {

        if (!myal(*tok->buff))
        {
-//      debug(1, L"%lc", *tok->buff );
-
            if (*tok->buff == L'\\')
            {
                tok->buff++;
@ -296,13 +295,13 @@ static void read_string(tokenizer *tok)
                    else
                    {
                        /* Since we are about to increment tok->buff, decrement it first so the increment doesn't go past the end of the buffer. https://github.com/fish-shell/fish-shell/issues/389 */
-                        do_loop = 0;
                        tok->buff--;
+                        do_loop = 0;
                    }


                }
-                else if (*tok->buff == L'\n' && mode == 0)
+                else if (*tok->buff == L'\n' && mode == mode_regular_text)
                {
                    tok->buff--;
                    do_loop = 0;
@ -312,33 +311,24 @@ static void read_string(tokenizer *tok)
                tok->buff++;
                continue;
            }
-
-
-            /*
-              The modes are as follows:
-
-              0: regular text
-              1: inside of subshell
-              2: inside of array brackets
-              3: inside of array brackets and subshell, like in '$foo[(ech'
-            */
+            
            switch (mode)
            {
-                case 0:
+                case mode_regular_text:
                {
                    switch (*tok->buff)
                    {
                        case L'(':
                        {
                            paran_count=1;
-                            mode = 1;
+                            mode = mode_subshell;
                            break;
                        }

                        case L'[':
                        {
                            if (tok->buff != start)
-                                mode=2;
+                                mode = mode_array_brackets;
                            break;
                        }

@ -356,7 +346,7 @@ static void read_string(tokenizer *tok)
                            {
                                tok->buff += wcslen(tok->buff);

-                                if ((!tok->accept_unfinished))
+                                if (! tok->accept_unfinished)
                                {
                                    TOK_CALL_ERROR(tok, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR);
                                    return;
@ -369,7 +359,7 @@ static void read_string(tokenizer *tok)

                        default:
                        {
-                            if (!tok_is_string_character(*(tok->buff), is_first))
+                            if (! tok_is_string_character(*(tok->buff), is_first))
                            {
                                do_loop=0;
                            }
@ -378,8 +368,8 @@ static void read_string(tokenizer *tok)
                    break;
                }

-                case 3:
-                case 1:
+                case mode_array_brackets_and_subshell:
+                case mode_subshell:
                    switch (*tok->buff)
                    {
                        case L'\'':
@ -411,7 +401,7 @@ static void read_string(tokenizer *tok)
                            paran_count--;
                            if (paran_count == 0)
                            {
-                                mode--;
+                                mode = (mode == mode_array_brackets_and_subshell ? mode_array_brackets : mode_regular_text);
                            }
                            break;
                        case L'\0':
@ -419,16 +409,17 @@ static void read_string(tokenizer *tok)
                            break;
                    }
                    break;
-                case 2:
+                    
+                case mode_array_brackets:
                    switch (*tok->buff)
                    {
                        case L'(':
                            paran_count=1;
-                            mode = 3;
+                            mode = mode_array_brackets_and_subshell;
                            break;

                        case L']':
-                            mode=0;
+                            mode = mode_regular_text;
                            break;

                        case L'\0':
@ -447,7 +438,7 @@ static void read_string(tokenizer *tok)
        is_first = false;
    }

-    if ((!tok->accept_unfinished) && (mode!=0))
+    if ((!tok->accept_unfinished) && (mode != mode_regular_text))
    {
        TOK_CALL_ERROR(tok, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR);
        return;
@ -467,7 +458,7 @@ static void read_string(tokenizer *tok)
 /**
   Read the next token as a comment.
 */
-static void read_comment(tokenizer *tok)
+static void read_comment(tokenizer_t *tok)
 {
    const wchar_t *start;

@ -487,7 +478,7 @@ static void read_comment(tokenizer *tok)
 /**
   Read a FD redirection.
 */
-static void read_redirect(tokenizer *tok, int fd)
+static void read_redirect(tokenizer_t *tok, int fd)
 {
    int mode = -1;

@ -552,7 +543,7 @@ static void read_redirect(tokenizer *tok, int fd)
    }
 }

-wchar_t tok_last_quote(tokenizer *tok)
+wchar_t tok_last_quote(tokenizer_t *tok)
 {
    CHECK(tok, 0);

@ -582,7 +573,7 @@ const wchar_t *tok_get_desc(int type)
 }


-void tok_next(tokenizer *tok)
+void tok_next(tokenizer_t *tok)
 {

    CHECK(tok,);
@ -705,20 +696,18 @@ void tok_next(tokenizer *tok)

 }

-const wchar_t *tok_string(tokenizer *tok)
+const wchar_t *tok_string(tokenizer_t *tok)
 {
    return tok?tok->orig_buff:0;
 }

 wchar_t *tok_first(const wchar_t *str)
 {
-    tokenizer t;
    wchar_t *res=0;

    CHECK(str, 0);

-    tok_init(&t, str, TOK_SQUASH_ERRORS);
-
+    tokenizer_t t(str, TOK_SQUASH_ERRORS);
    switch (tok_last_type(&t))
    {
        case TOK_STRING:
@ -733,7 +722,7 @@ wchar_t *tok_first(const wchar_t *str)
    return res;
 }

-int tok_get_pos(tokenizer *tok)
+int tok_get_pos(tokenizer_t *tok)
 {
    CHECK(tok, 0);

@ -741,7 +730,7 @@ int tok_get_pos(tokenizer *tok)
 }


-void tok_set_pos(tokenizer *tok, int pos)
+void tok_set_pos(tokenizer_t *tok, int pos)
 {
    CHECK(tok,);

--- a/tokenizer.h
+++ b/tokenizer.h
@ -61,11 +61,12 @@ enum tokenizer_error
 */
 #define TOK_SQUASH_ERRORS 4

+typedef unsigned int tok_flags_t;

 /**
   The tokenizer struct.
 */
-struct tokenizer
+struct tokenizer_t
 {
    /** A pointer into the original string, showing where the next token begins */
    const wchar_t *buff;
@ -100,62 +101,60 @@ struct tokenizer
    /** Return the line number of the character at the given offset */
    int line_number_of_character_at_offset(size_t offset);

+    /**
+      Constructor for a tokenizer. b is the string that is to be
+      tokenized. It is not copied, and should not be freed by the caller
+      until after the tokenizer is destroyed.
+
+      \param b The string to tokenize
+      \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
+      to accept incomplete tokens, such as a subshell without a closing
+      parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
+
+    */
+    tokenizer_t(const wchar_t *b, tok_flags_t flags);
 };

-/**
-  Initialize the tokenizer. b is the string that is to be
-  tokenized. It is not copied, and should not be freed by the caller
-  until after the tokenizer is destroyed.
-
-  \param tok The tokenizer to initialize
-  \param b The string to tokenize
-  \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
-  to accept incomplete tokens, such as a subshell without a closing
-  parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
-
-*/
-void tok_init(tokenizer *tok, const wchar_t *b, int flags);
-
 /**
  Jump to the next token.
 */
-void tok_next(tokenizer *tok);
+void tok_next(tokenizer_t *tok);

 /**
  Returns the type of the last token. Must be one of the values in the token_type enum.
 */
-int tok_last_type(tokenizer *tok);
+int tok_last_type(tokenizer_t *tok);

 /**
  Returns the last token string. The string should not be freed by the caller.
 */
-wchar_t *tok_last(tokenizer *tok);
+wchar_t *tok_last(tokenizer_t *tok);

 /**
  Returns the type of quote from the last TOK_QSTRING
 */
-wchar_t tok_last_quote(tokenizer *tok);
+wchar_t tok_last_quote(tokenizer_t *tok);

 /**
  Returns true as long as there are more tokens left
 */
-int tok_has_next(tokenizer *tok);
+int tok_has_next(tokenizer_t *tok);

 /**
  Returns the position of the beginning of the current token in the original string
 */
-int tok_get_pos(tokenizer *tok);
+int tok_get_pos(tokenizer_t *tok);

 /**
   Destroy the tokenizer and free asociated memory
 */
-void tok_destroy(tokenizer *tok);
+void tok_destroy(tokenizer_t *tok);


 /**
   Returns the original string to tokenizer
 */
-const wchar_t *tok_string(tokenizer *tok);
+const wchar_t *tok_string(tokenizer_t *tok);


 /**
@ -178,7 +177,7 @@ bool tok_is_string_character(wchar_t c, bool is_first);
 /**
   Move tokenizer position
 */
-void tok_set_pos(tokenizer *tok, int pos);
+void tok_set_pos(tokenizer_t *tok, int pos);

 /**
   Returns a string description of the specified token type
@ -188,7 +187,7 @@ const wchar_t *tok_get_desc(int type);
 /**
   Get tokenizer error type. Should only be called if tok_last_tope returns TOK_ERROR.
 */
-int tok_get_error(tokenizer *tok);
+int tok_get_error(tokenizer_t *tok);


 #endif