2016-05-02 16:53:10 -07:00
// Various mostly unrelated utility functions related to parsing, loading and evaluating fish code.
//
// This library can be seen as a 'toolbox' for functions that are used in many places in fish and
// that are somehow related to parsing the code.
2016-05-18 22:30:21 +00:00
# include "config.h" // IWYU pragma: keep
2019-10-13 15:50:48 -07:00
# include "parse_util.h"
2016-05-02 16:53:10 -07:00
# include <stdarg.h>
# include <stdlib.h>
2017-02-10 18:47:02 -08:00
2019-10-13 15:50:48 -07:00
# include <cwchar>
2016-04-20 23:00:54 -07:00
# include <memory>
2016-05-02 16:53:10 -07:00
# include <string>
2017-02-10 18:47:02 -08:00
# include <type_traits>
2006-01-31 02:51:50 +10:00
2016-05-02 16:53:10 -07:00
# include "builtin.h"
2006-01-31 02:51:50 +10:00
# include "common.h"
2006-02-08 19:20:05 +10:00
# include "expand.h"
2016-05-02 16:53:10 -07:00
# include "fallback.h" // IWYU pragma: keep
2018-05-05 19:11:57 -07:00
# include "future_feature_flags.h"
2016-04-20 23:00:54 -07:00
# include "parse_constants.h"
2019-10-13 16:06:16 -07:00
# include "parse_util.h"
2018-09-10 22:29:52 -07:00
# include "parser.h"
2018-01-20 11:58:57 -08:00
# include "tnode.h"
2016-05-02 16:53:10 -07:00
# include "tokenizer.h"
2019-09-19 10:32:07 -07:00
# include "wcstringutil.h"
2016-05-02 16:53:10 -07:00
# include "wildcard.h"
# include "wutil.h" // IWYU pragma: keep
2006-02-08 19:20:05 +10:00
2016-05-02 16:53:10 -07:00
/// Error message for use of backgrounded commands before and/or.
# define BOOL_AFTER_BACKGROUND_ERROR_MSG \
_ ( L " The '%ls' command can not be used immediately after a backgrounded job " )
2014-11-02 13:11:27 -08:00
2016-05-02 16:53:10 -07:00
/// Error message for backgrounded commands as conditionals.
# define BACKGROUND_IN_CONDITIONAL_ERROR_MSG \
_ ( L " Backgrounded commands can not be used as conditionals " )
2014-11-02 13:11:27 -08:00
2018-01-22 13:31:39 -08:00
/// Error message for arguments to 'end'
# define END_ARG_ERR_MSG _(L"'end' does not take arguments. Did you forget a ';'?")
2019-09-19 10:32:07 -07:00
/// Maximum length of a variable name to show in error reports before truncation
static constexpr int var_err_len = 16 ;
2016-05-02 16:53:10 -07:00
int parse_util_lineno ( const wchar_t * str , size_t offset ) {
if ( ! str ) return 0 ;
2012-11-18 11:23:22 +01:00
2012-11-18 16:30:30 -08:00
int res = 1 ;
2016-05-02 16:53:10 -07:00
for ( size_t i = 0 ; i < offset & & str [ i ] ! = L ' \0 ' ; i + + ) {
if ( str [ i ] = = L ' \n ' ) {
2012-11-18 16:30:30 -08:00
res + + ;
}
2012-11-18 11:23:22 +01:00
}
2012-11-18 16:30:30 -08:00
return res ;
2006-02-05 23:10:35 +10:00
}
2016-05-02 16:53:10 -07:00
int parse_util_get_line_from_offset ( const wcstring & str , size_t pos ) {
2012-02-06 00:57:43 -08:00
const wchar_t * buff = str . c_str ( ) ;
2012-11-18 16:30:30 -08:00
int count = 0 ;
2016-05-02 16:53:10 -07:00
for ( size_t i = 0 ; i < pos ; i + + ) {
if ( ! buff [ i ] ) {
2012-11-18 16:30:30 -08:00
return - 1 ;
}
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
if ( buff [ i ] = = L ' \n ' ) {
2012-11-18 16:30:30 -08:00
count + + ;
}
2012-11-18 11:23:22 +01:00
}
2012-11-18 16:30:30 -08:00
return count ;
2007-09-22 00:05:49 +10:00
}
2016-05-02 16:53:10 -07:00
size_t parse_util_get_offset_from_line ( const wcstring & str , int line ) {
2012-02-06 00:57:43 -08:00
const wchar_t * buff = str . c_str ( ) ;
2012-11-18 16:30:30 -08:00
size_t i ;
int count = 0 ;
2012-11-18 11:23:22 +01:00
2019-11-18 17:08:16 -08:00
if ( line < 0 ) return static_cast < size_t > ( - 1 ) ;
2016-05-02 16:53:10 -07:00
if ( line = = 0 ) return 0 ;
2012-11-18 16:30:30 -08:00
2016-05-02 16:53:10 -07:00
for ( i = 0 ; ; i + + ) {
2019-11-18 17:08:16 -08:00
if ( ! buff [ i ] ) return static_cast < size_t > ( - 1 ) ;
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
if ( buff [ i ] = = L ' \n ' ) {
2012-11-18 16:30:30 -08:00
count + + ;
2016-05-02 16:53:10 -07:00
if ( count = = line ) {
return ( i + 1 ) < str . size ( ) ? i + 1 : i ;
2012-11-18 16:30:30 -08:00
}
}
2012-11-18 11:23:22 +01:00
}
2007-09-22 00:05:49 +10:00
}
2016-05-02 16:53:10 -07:00
size_t parse_util_get_offset ( const wcstring & str , int line , long line_offset ) {
2018-10-20 22:25:55 +02:00
size_t off = parse_util_get_offset_from_line ( str , line ) ;
size_t off2 = parse_util_get_offset_from_line ( str , line + 1 ) ;
2012-11-18 11:23:22 +01:00
2019-11-18 17:08:16 -08:00
if ( off = = static_cast < size_t > ( - 1 ) ) return static_cast < size_t > ( - 1 ) ;
if ( off2 = = static_cast < size_t > ( - 1 ) ) off2 = str . length ( ) + 1 ;
2016-10-20 18:53:31 -07:00
if ( line_offset < 0 ) line_offset = 0 ; //!OCLINT(parameter reassignment)
2012-11-18 11:23:22 +01:00
2019-11-18 17:08:16 -08:00
if ( static_cast < size_t > ( line_offset ) > = off2 - off - 1 ) {
2016-10-20 18:53:31 -07:00
line_offset = off2 - off - 1 ; //!OCLINT(parameter reassignment)
2012-11-18 16:30:30 -08:00
}
2012-11-18 11:23:22 +01:00
2016-10-09 14:36:08 -07:00
return off + line_offset ;
2007-09-22 00:05:49 +10:00
}
2016-05-02 16:53:10 -07:00
static int parse_util_locate_brackets_of_type ( const wchar_t * in , wchar_t * * begin , wchar_t * * end ,
bool allow_incomplete , wchar_t open_type ,
wchar_t close_type ) {
// open_type is typically ( or [, and close type is the corresponding value.
2012-11-18 16:30:30 -08:00
wchar_t * pos ;
2016-05-02 16:53:10 -07:00
wchar_t prev = 0 ;
int syntax_error = 0 ;
int paran_count = 0 ;
2014-03-31 10:01:39 -07:00
2019-11-18 18:34:50 -08:00
wchar_t * paran_begin = nullptr , * paran_end = nullptr ;
2014-03-31 10:01:39 -07:00
2019-05-27 17:24:19 -07:00
assert ( in & & " null parameter " ) ;
2014-03-31 10:01:39 -07:00
2016-05-02 16:53:10 -07:00
for ( pos = const_cast < wchar_t * > ( in ) ; * pos ; pos + + ) {
if ( prev ! = ' \\ ' ) {
2019-03-12 14:06:01 -07:00
if ( std : : wcschr ( L " \' \" " , * pos ) ) {
2012-11-18 16:30:30 -08:00
wchar_t * q_end = quote_end ( pos ) ;
2016-05-02 16:53:10 -07:00
if ( q_end & & * q_end ) {
pos = q_end ;
} else {
2012-11-18 16:30:30 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
} else {
if ( * pos = = open_type ) {
2019-11-18 18:34:50 -08:00
if ( ( paran_count = = 0 ) & & ( paran_begin = = nullptr ) ) {
2012-11-18 16:30:30 -08:00
paran_begin = pos ;
}
2014-03-31 10:01:39 -07:00
2012-11-18 16:30:30 -08:00
paran_count + + ;
2016-05-02 16:53:10 -07:00
} else if ( * pos = = close_type ) {
2012-11-18 16:30:30 -08:00
paran_count - - ;
2014-03-31 10:01:39 -07:00
2019-11-18 18:34:50 -08:00
if ( ( paran_count = = 0 ) & & ( paran_end = = nullptr ) ) {
2012-11-18 16:30:30 -08:00
paran_end = pos ;
break ;
}
2014-03-31 10:01:39 -07:00
2016-05-02 16:53:10 -07:00
if ( paran_count < 0 ) {
2012-11-18 16:30:30 -08:00
syntax_error = 1 ;
break ;
}
}
}
}
prev = * pos ;
2012-11-18 11:23:22 +01:00
}
2014-03-31 10:01:39 -07:00
2012-11-18 16:30:30 -08:00
syntax_error | = ( paran_count < 0 ) ;
2016-05-02 16:53:10 -07:00
syntax_error | = ( ( paran_count > 0 ) & & ( ! allow_incomplete ) ) ;
2014-03-31 10:01:39 -07:00
2016-05-02 16:53:10 -07:00
if ( syntax_error ) {
2012-11-18 16:30:30 -08:00
return - 1 ;
}
2014-03-31 10:01:39 -07:00
2019-11-18 18:34:50 -08:00
if ( paran_begin = = nullptr ) {
2012-11-18 16:30:30 -08:00
return 0 ;
}
2014-03-31 10:01:39 -07:00
2016-05-02 16:53:10 -07:00
if ( begin ) {
2012-11-18 16:30:30 -08:00
* begin = paran_begin ;
}
2014-03-31 10:01:39 -07:00
2016-05-02 16:53:10 -07:00
if ( end ) {
2019-11-18 17:08:16 -08:00
* end = paran_count ? const_cast < wchar_t * > ( in ) + std : : wcslen ( in ) : paran_end ;
2012-11-18 16:30:30 -08:00
}
2014-03-31 10:01:39 -07:00
2012-11-18 16:30:30 -08:00
return 1 ;
2006-01-31 02:51:50 +10:00
}
2016-05-02 16:53:10 -07:00
int parse_util_locate_cmdsubst ( const wchar_t * in , wchar_t * * begin , wchar_t * * end ,
bool accept_incomplete ) {
2014-02-03 14:13:42 -08:00
return parse_util_locate_brackets_of_type ( in , begin , end , accept_incomplete , L ' ( ' , L ' ) ' ) ;
}
2016-05-02 16:53:10 -07:00
int parse_util_locate_slice ( const wchar_t * in , wchar_t * * begin , wchar_t * * end ,
bool accept_incomplete ) {
2014-02-03 14:13:42 -08:00
return parse_util_locate_brackets_of_type ( in , begin , end , accept_incomplete , L ' [ ' , L ' ] ' ) ;
}
2016-05-02 16:53:10 -07:00
static int parse_util_locate_brackets_range ( const wcstring & str , size_t * inout_cursor_offset ,
wcstring * out_contents , size_t * out_start ,
size_t * out_end , bool accept_incomplete ,
wchar_t open_type , wchar_t close_type ) {
// Clear the return values.
2019-04-11 14:28:27 -07:00
if ( out_contents ! = nullptr ) out_contents - > clear ( ) ;
2013-10-08 18:41:35 -07:00
* out_start = 0 ;
* out_end = str . size ( ) ;
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// Nothing to do if the offset is at or past the end of the string.
if ( * inout_cursor_offset > = str . size ( ) ) return 0 ;
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// Defer to the wonky version.
const wchar_t * const buff = str . c_str ( ) ;
const wchar_t * const valid_range_start = buff + * inout_cursor_offset ,
* valid_range_end = buff + str . size ( ) ;
2019-11-18 18:34:50 -08:00
wchar_t * bracket_range_begin = nullptr , * bracket_range_end = nullptr ;
2016-05-02 16:53:10 -07:00
int ret = parse_util_locate_brackets_of_type ( valid_range_start , & bracket_range_begin ,
& bracket_range_end , accept_incomplete , open_type ,
close_type ) ;
2016-10-30 21:05:27 -07:00
if ( ret < = 0 ) {
return ret ;
2013-10-08 18:41:35 -07:00
}
2016-10-30 21:05:27 -07:00
// The command substitutions must not be NULL and must be in the valid pointer range, and
// the end must be bigger than the beginning.
2019-11-18 18:34:50 -08:00
assert ( bracket_range_begin ! = nullptr & & bracket_range_begin > = valid_range_start & &
2016-12-03 20:12:53 -08:00
bracket_range_begin < = valid_range_end ) ;
2019-11-18 18:34:50 -08:00
assert ( bracket_range_end ! = nullptr & & bracket_range_end > bracket_range_begin & &
2016-12-03 20:12:53 -08:00
bracket_range_end > = valid_range_start & & bracket_range_end < = valid_range_end ) ;
2016-10-30 21:05:27 -07:00
// Assign the substring to the out_contents.
const wchar_t * interior_begin = bracket_range_begin + 1 ;
2019-04-11 14:28:27 -07:00
if ( out_contents ! = nullptr ) {
out_contents - > assign ( interior_begin , bracket_range_end - interior_begin ) ;
}
2016-10-30 21:05:27 -07:00
// Return the start and end.
* out_start = bracket_range_begin - buff ;
* out_end = bracket_range_end - buff ;
// Update the inout_cursor_offset. Note this may cause it to exceed str.size(), though
// overflow is not likely.
* inout_cursor_offset = 1 + * out_end ;
2013-10-08 18:41:35 -07:00
return ret ;
}
2016-05-02 16:53:10 -07:00
int parse_util_locate_cmdsubst_range ( const wcstring & str , size_t * inout_cursor_offset ,
wcstring * out_contents , size_t * out_start , size_t * out_end ,
bool accept_incomplete ) {
return parse_util_locate_brackets_range ( str , inout_cursor_offset , out_contents , out_start ,
out_end , accept_incomplete , L ' ( ' , L ' ) ' ) ;
2014-02-03 14:13:42 -08:00
}
2016-05-02 16:53:10 -07:00
void parse_util_cmdsubst_extent ( const wchar_t * buff , size_t cursor_pos , const wchar_t * * a ,
const wchar_t * * b ) {
2019-05-27 17:24:19 -07:00
assert ( buff & & " Null buffer " ) ;
2016-05-02 16:53:10 -07:00
const wchar_t * const cursor = buff + cursor_pos ;
2012-11-18 11:23:22 +01:00
2019-03-12 14:06:01 -07:00
const size_t bufflen = std : : wcslen ( buff ) ;
2013-07-17 01:35:30 -07:00
assert ( cursor_pos < = bufflen ) ;
2013-07-22 18:26:15 -07:00
2019-11-25 20:03:25 +09:00
// ap and bp are the beginning and end of the tightest command substitution found so far.
2013-07-17 01:35:30 -07:00
const wchar_t * ap = buff , * bp = buff + bufflen ;
const wchar_t * pos = buff ;
2016-05-02 16:53:10 -07:00
for ( ; ; ) {
2019-11-18 18:34:50 -08:00
wchar_t * begin = nullptr , * end = nullptr ;
2016-05-02 16:53:10 -07:00
if ( parse_util_locate_cmdsubst ( pos , & begin , & end , true ) < = 0 ) {
// No subshell found, all done.
2012-11-18 16:30:30 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
// Interpret NULL to mean the end.
2019-11-18 18:34:50 -08:00
if ( end = = nullptr ) {
2013-07-17 01:35:30 -07:00
end = const_cast < wchar_t * > ( buff ) + bufflen ;
2012-11-18 16:30:30 -08:00
}
2013-07-22 18:26:15 -07:00
2016-05-02 16:53:10 -07:00
if ( begin < cursor & & end > = cursor ) {
// This command substitution surrounds the cursor, so it's a tighter fit.
2012-11-18 16:30:30 -08:00
begin + + ;
2013-07-17 01:35:30 -07:00
ap = begin ;
bp = end ;
2016-05-02 16:53:10 -07:00
// pos is where to begin looking for the next one. But if we reached the end there's no
// next one.
if ( begin > = end ) break ;
2013-07-17 01:35:30 -07:00
pos = begin + 1 ;
2016-05-02 16:53:10 -07:00
} else if ( begin > = cursor ) {
// This command substitution starts at or after the cursor. Since it was the first
// command substitution in the string, we're done.
2012-11-18 16:30:30 -08:00
break ;
2016-05-02 16:53:10 -07:00
} else {
// This command substitution ends before the cursor. Skip it.
2013-07-17 01:35:30 -07:00
assert ( end < cursor ) ;
pos = end + 1 ;
assert ( pos < = buff + bufflen ) ;
}
2012-11-18 16:30:30 -08:00
}
2013-07-22 18:26:15 -07:00
2019-11-18 18:34:50 -08:00
if ( a ! = nullptr ) * a = ap ;
if ( b ! = nullptr ) * b = bp ;
2006-01-31 02:51:50 +10:00
}
2016-05-02 16:53:10 -07:00
/// Get the beginning and end of the job or process definition under the cursor.
2019-10-29 13:32:26 +01:00
static void job_or_process_extent ( bool process , const wchar_t * buff , size_t cursor_pos ,
const wchar_t * * a , const wchar_t * * b ,
std : : vector < tok_t > * tokens ) {
2019-05-27 17:24:19 -07:00
assert ( buff & & " Null buffer " ) ;
2019-10-18 15:24:28 -07:00
const wchar_t * begin = nullptr , * end = nullptr ;
2016-05-02 16:53:10 -07:00
int finished = 0 ;
2012-11-18 11:23:22 +01:00
2019-10-18 15:24:28 -07:00
if ( a ) * a = nullptr ;
if ( b ) * b = nullptr ;
2012-11-18 16:30:30 -08:00
parse_util_cmdsubst_extent ( buff , cursor_pos , & begin , & end ) ;
2016-05-02 16:53:10 -07:00
if ( ! end | | ! begin ) {
2012-11-18 16:30:30 -08:00
return ;
}
2012-11-18 11:23:22 +01:00
2020-04-08 16:56:59 -07:00
assert ( cursor_pos > = static_cast < size_t > ( begin - buff ) ) ;
2015-07-26 00:12:36 -07:00
const size_t pos = cursor_pos - ( begin - buff ) ;
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
if ( a ) * a = begin ;
if ( b ) * b = end ;
2012-11-18 11:23:22 +01:00
2019-10-18 15:24:28 -07:00
const wcstring buffcpy ( begin , end ) ;
tokenizer_t tok ( buffcpy . c_str ( ) , TOK_ACCEPT_UNFINISHED ) ;
maybe_t < tok_t > token { } ;
while ( ( token = tok . next ( ) ) & & ! finished ) {
size_t tok_begin = token - > offset ;
2012-11-18 16:30:30 -08:00
2019-10-18 15:24:28 -07:00
switch ( token - > type ) {
case token_type_t : : pipe : {
if ( ! process ) {
2012-11-19 00:31:03 -08:00
break ;
}
2019-10-18 15:24:28 -07:00
}
/* FALLTHROUGH */
case token_type_t : : end :
case token_type_t : : background :
case token_type_t : : andand :
2019-10-29 13:32:26 +01:00
case token_type_t : : oror :
case token_type_t : : comment : {
2019-10-18 15:24:28 -07:00
if ( tok_begin > = pos ) {
finished = 1 ;
2019-11-18 17:08:16 -08:00
if ( b ) * b = const_cast < wchar_t * > ( begin ) + tok_begin ;
2019-10-18 15:24:28 -07:00
} else {
2019-10-29 13:32:26 +01:00
// Statement at cursor might start after this token.
2019-11-18 17:08:16 -08:00
if ( a ) * a = const_cast < wchar_t * > ( begin ) + tok_begin + token - > length ;
2019-10-29 13:32:26 +01:00
if ( tokens ) tokens - > clear ( ) ;
2012-11-18 16:30:30 -08:00
}
2019-10-29 13:32:26 +01:00
continue ; // Do not add this to tokens
2019-10-18 15:24:28 -07:00
}
default : {
break ;
2019-05-05 12:09:25 +02:00
}
2012-11-18 11:23:22 +01:00
}
2019-10-29 13:32:26 +01:00
if ( tokens ) tokens - > push_back ( * token ) ;
2019-10-18 15:24:28 -07:00
}
2006-01-31 02:51:50 +10:00
}
2016-05-02 16:53:10 -07:00
void parse_util_process_extent ( const wchar_t * buff , size_t pos , const wchar_t * * a ,
2019-10-29 13:32:26 +01:00
const wchar_t * * b , std : : vector < tok_t > * tokens ) {
job_or_process_extent ( true , buff , pos , a , b , tokens ) ;
2006-01-31 02:51:50 +10:00
}
2016-05-02 16:53:10 -07:00
void parse_util_job_extent ( const wchar_t * buff , size_t pos , const wchar_t * * a , const wchar_t * * b ) {
2019-10-29 13:32:26 +01:00
job_or_process_extent ( false , buff , pos , a , b , nullptr ) ;
2006-01-31 02:51:50 +10:00
}
2016-05-02 16:53:10 -07:00
void parse_util_token_extent ( const wchar_t * buff , size_t cursor_pos , const wchar_t * * tok_begin ,
const wchar_t * * tok_end , const wchar_t * * prev_begin ,
const wchar_t * * prev_end ) {
2019-05-27 17:24:19 -07:00
assert ( buff & & " Null buffer " ) ;
2019-11-18 18:34:50 -08:00
const wchar_t * a = nullptr , * b = nullptr , * pa = nullptr , * pb = nullptr ;
2012-11-18 11:23:22 +01:00
2013-09-21 16:38:57 -07:00
const wchar_t * cmdsubst_begin , * cmdsubst_end ;
parse_util_cmdsubst_extent ( buff , cursor_pos , & cmdsubst_begin , & cmdsubst_end ) ;
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
if ( ! cmdsubst_end | | ! cmdsubst_begin ) {
2012-11-18 16:30:30 -08:00
return ;
}
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
// pos is equivalent to cursor_pos within the range of the command substitution {begin, end}.
2016-10-09 14:36:08 -07:00
size_t offset_within_cmdsubst = cursor_pos - ( cmdsubst_begin - buff ) ;
2012-11-18 11:23:22 +01:00
2019-03-12 14:06:01 -07:00
size_t bufflen = std : : wcslen ( buff ) ;
2018-10-20 22:25:55 +02:00
2013-09-21 16:38:57 -07:00
a = cmdsubst_begin + offset_within_cmdsubst ;
2012-11-18 16:30:30 -08:00
b = a ;
2013-09-21 16:38:57 -07:00
pa = cmdsubst_begin + offset_within_cmdsubst ;
2012-11-18 16:30:30 -08:00
pb = pa ;
2012-11-18 11:23:22 +01:00
2013-09-21 16:38:57 -07:00
assert ( cmdsubst_begin > = buff ) ;
2018-10-20 22:25:55 +02:00
assert ( cmdsubst_begin < = ( buff + bufflen ) ) ;
2013-09-21 16:38:57 -07:00
assert ( cmdsubst_end > = cmdsubst_begin ) ;
2018-10-20 22:25:55 +02:00
assert ( cmdsubst_end < = ( buff + bufflen ) ) ;
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
const wcstring buffcpy = wcstring ( cmdsubst_begin , cmdsubst_end - cmdsubst_begin ) ;
2012-11-18 11:23:22 +01:00
2018-02-23 17:28:12 -08:00
tokenizer_t tok ( buffcpy . c_str ( ) , TOK_ACCEPT_UNFINISHED ) ;
2019-10-13 16:06:16 -07:00
while ( maybe_t < tok_t > token = tok . next ( ) ) {
size_t tok_begin = token - > offset ;
2012-11-18 16:30:30 -08:00
size_t tok_end = tok_begin ;
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
// Calculate end of token.
2019-10-13 16:06:16 -07:00
if ( token - > type = = token_type_t : : string ) {
tok_end + = token - > length ;
2012-11-18 16:30:30 -08:00
}
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
// Cursor was before beginning of this token, means that the cursor is between two tokens,
// so we set it to a zero element string and break.
if ( tok_begin > offset_within_cmdsubst ) {
2013-09-21 16:38:57 -07:00
a = b = cmdsubst_begin + offset_within_cmdsubst ;
2012-11-18 16:30:30 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
// If cursor is inside the token, this is the token we are looking for. If so, set a and b
// and break.
2019-10-13 16:06:16 -07:00
if ( token - > type = = token_type_t : : string & & tok_end > = offset_within_cmdsubst ) {
a = cmdsubst_begin + token - > offset ;
b = a + token - > length ;
2012-11-18 16:30:30 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
// Remember previous string token.
2019-10-13 16:06:16 -07:00
if ( token - > type = = token_type_t : : string ) {
pa = cmdsubst_begin + token - > offset ;
pb = pa + token - > length ;
2012-11-18 16:30:30 -08:00
}
2012-11-18 11:23:22 +01:00
}
2016-05-02 16:53:10 -07:00
if ( tok_begin ) * tok_begin = a ;
if ( tok_end ) * tok_end = b ;
if ( prev_begin ) * prev_begin = pa ;
if ( prev_end ) * prev_end = pb ;
2012-11-18 11:23:22 +01:00
2012-11-18 16:30:30 -08:00
assert ( pa > = buff ) ;
2018-10-20 22:25:55 +02:00
assert ( pa < = ( buff + bufflen ) ) ;
2012-11-18 16:30:30 -08:00
assert ( pb > = pa ) ;
2018-10-20 22:25:55 +02:00
assert ( pb < = ( buff + bufflen ) ) ;
2006-01-31 02:51:50 +10:00
}
2016-05-02 16:53:10 -07:00
wcstring parse_util_unescape_wildcards ( const wcstring & str ) {
2015-08-19 11:35:24 -07:00
wcstring result ;
result . reserve ( str . size ( ) ) ;
2018-05-05 19:44:57 -07:00
bool unesc_qmark = ! feature_test ( features_t : : qmark_noglob ) ;
2016-05-02 16:53:10 -07:00
const wchar_t * const cs = str . c_str ( ) ;
for ( size_t i = 0 ; cs [ i ] ! = L ' \0 ' ; i + + ) {
if ( cs [ i ] = = L ' * ' ) {
2015-08-19 11:35:24 -07:00
result . push_back ( ANY_STRING ) ;
2018-05-05 19:11:57 -07:00
} else if ( cs [ i ] = = L ' ? ' & & unesc_qmark ) {
result . push_back ( ANY_CHAR ) ;
2018-03-31 16:48:57 -07:00
} else if ( cs [ i ] = = L ' \\ ' & & cs [ i + 1 ] = = L ' * ' ) {
2016-05-02 16:53:10 -07:00
result . push_back ( cs [ i + 1 ] ) ;
2015-08-19 11:35:24 -07:00
i + = 1 ;
2018-05-05 19:11:57 -07:00
} else if ( cs [ i ] = = L ' \\ ' & & cs [ i + 1 ] = = L ' ? ' & & unesc_qmark ) {
result . push_back ( cs [ i + 1 ] ) ;
i + = 1 ;
2016-05-02 16:53:10 -07:00
} else if ( cs [ i ] = = L ' \\ ' & & cs [ i + 1 ] = = L ' \\ ' ) {
// Not a wildcard, but ensure the next iteration doesn't see this escaped backslash.
2015-08-19 11:35:24 -07:00
result . append ( L " \\ \\ " ) ;
i + = 1 ;
2016-05-02 16:53:10 -07:00
} else {
2015-08-19 11:35:24 -07:00
result . push_back ( cs [ i ] ) ;
2012-11-18 16:30:30 -08:00
}
}
2015-08-19 11:35:24 -07:00
return result ;
2006-02-19 11:14:32 +10:00
}
2006-02-15 05:56:36 +10:00
2016-05-02 16:53:10 -07:00
/// Find the outermost quoting style of current token. Returns 0 if token is not quoted.
static wchar_t get_quote ( const wcstring & cmd_str , size_t len ) {
size_t i = 0 ;
wchar_t res = 0 ;
const wchar_t * const cmd = cmd_str . c_str ( ) ;
2012-11-18 11:23:22 +01:00
2019-11-25 16:36:13 -08:00
while ( true ) {
2016-05-02 16:53:10 -07:00
if ( ! cmd [ i ] ) break ;
2012-11-18 16:30:30 -08:00
2016-05-02 16:53:10 -07:00
if ( cmd [ i ] = = L ' \\ ' ) {
2012-11-18 16:30:30 -08:00
i + + ;
2016-05-02 16:53:10 -07:00
if ( ! cmd [ i ] ) break ;
2012-11-18 16:30:30 -08:00
i + + ;
2016-05-02 16:53:10 -07:00
} else {
if ( cmd [ i ] = = L ' \' ' | | cmd [ i ] = = L ' \" ' ) {
2012-11-18 16:30:30 -08:00
const wchar_t * end = quote_end ( & cmd [ i ] ) ;
2019-03-12 14:06:01 -07:00
// std::fwprintf( stderr, L"Jump %d\n", end-cmd );
2019-11-18 18:34:50 -08:00
if ( ( end = = nullptr ) | | ( ! * end ) | | ( end > cmd + len ) ) {
2012-11-18 16:30:30 -08:00
res = cmd [ i ] ;
break ;
}
2016-05-02 16:53:10 -07:00
i = end - cmd + 1 ;
} else
2012-11-18 16:30:30 -08:00
i + + ;
2012-11-18 11:23:22 +01:00
}
}
2012-11-18 16:30:30 -08:00
return res ;
2012-07-06 14:34:53 -07:00
}
2016-05-02 16:53:10 -07:00
void parse_util_get_parameter_info ( const wcstring & cmd , const size_t pos , wchar_t * quote ,
2019-10-13 16:06:16 -07:00
size_t * offset , token_type_t * out_type ) {
2016-05-02 16:53:10 -07:00
size_t prev_pos = 0 ;
2016-10-24 17:13:39 -07:00
wchar_t last_quote = L ' \0 ' ;
2012-11-18 11:23:22 +01:00
2018-02-23 17:28:12 -08:00
tokenizer_t tok ( cmd . c_str ( ) , TOK_ACCEPT_UNFINISHED ) ;
2019-10-13 16:06:16 -07:00
while ( auto token = tok . next ( ) ) {
if ( token - > offset > pos ) break ;
2012-07-06 14:34:53 -07:00
2019-10-13 16:06:16 -07:00
if ( token - > type = = token_type_t : : string )
last_quote = get_quote ( tok . text_of ( * token ) , pos - token - > offset ) ;
2012-07-06 14:34:53 -07:00
2019-11-18 18:34:50 -08:00
if ( out_type ! = nullptr ) * out_type = token - > type ;
2012-07-06 14:34:53 -07:00
2019-10-13 16:06:16 -07:00
prev_pos = token - > offset ;
2012-11-18 16:30:30 -08:00
}
2012-07-06 14:34:53 -07:00
wchar_t * cmd_tmp = wcsdup ( cmd . c_str ( ) ) ;
2016-05-02 16:53:10 -07:00
cmd_tmp [ pos ] = 0 ;
2018-10-20 22:25:55 +02:00
size_t cmdlen = pos ;
2016-10-24 17:13:39 -07:00
bool finished = cmdlen ! = 0 ;
2016-10-22 11:21:13 -07:00
if ( finished ) {
2019-11-18 18:34:50 -08:00
finished = ( quote = = nullptr ) ;
2019-03-12 14:06:01 -07:00
if ( finished & & std : : wcschr ( L " \t \n \r " , cmd_tmp [ cmdlen - 1 ] ) ) {
2016-10-24 17:13:39 -07:00
finished = cmdlen > 1 & & cmd_tmp [ cmdlen - 2 ] = = L ' \\ ' ;
2012-11-18 11:23:22 +01:00
}
}
2016-05-02 16:53:10 -07:00
if ( quote ) * quote = last_quote ;
2012-11-18 11:23:22 +01:00
2019-11-18 18:34:50 -08:00
if ( offset ! = nullptr ) {
2016-10-22 11:21:13 -07:00
if ( finished ) {
2019-11-18 18:34:50 -08:00
while ( ( cmd_tmp [ prev_pos ] ! = 0 ) & & ( std : : wcschr ( L " ;| " , cmd_tmp [ prev_pos ] ) ! = nullptr ) )
2019-05-05 12:09:25 +02:00
prev_pos + + ;
2012-11-18 16:30:30 -08:00
* offset = prev_pos ;
2016-05-02 16:53:10 -07:00
} else {
2012-11-18 16:30:30 -08:00
* offset = pos ;
}
2012-11-18 11:23:22 +01:00
}
2016-10-22 11:21:13 -07:00
2012-07-06 14:34:53 -07:00
free ( cmd_tmp ) ;
}
2018-02-17 14:36:43 -08:00
wcstring parse_util_escape_string_with_quote ( const wcstring & cmd , wchar_t quote , bool no_tilde ) {
2012-07-06 14:34:53 -07:00
wcstring result ;
2016-05-02 16:53:10 -07:00
if ( quote = = L ' \0 ' ) {
2019-09-19 14:32:40 +08:00
escape_flags_t flags = ESCAPE_ALL | ESCAPE_NO_QUOTED | ( no_tilde ? ESCAPE_NO_TILDE : 0 ) ;
2018-02-17 14:36:43 -08:00
result = escape_string ( cmd , flags ) ;
2016-05-02 16:53:10 -07:00
} else {
2018-02-17 15:04:31 -08:00
// Here we are going to escape a string with quotes.
// A few characters cannot be represented inside quotes, e.g. newlines. In that case,
// terminate the quote and then re-enter it.
result . reserve ( cmd . size ( ) ) ;
for ( wchar_t c : cmd ) {
2016-05-02 16:53:10 -07:00
switch ( c ) {
2012-11-19 00:31:03 -08:00
case L ' \n ' :
2018-02-17 15:04:31 -08:00
result . append ( { quote , L ' \\ ' , L ' n ' , quote } ) ;
break ;
2012-11-19 00:31:03 -08:00
case L ' \t ' :
2018-02-17 15:04:31 -08:00
result . append ( { quote , L ' \\ ' , L ' t ' , quote } ) ;
break ;
2012-11-19 00:31:03 -08:00
case L ' \b ' :
2018-02-17 15:04:31 -08:00
result . append ( { quote , L ' \\ ' , L ' b ' , quote } ) ;
2012-11-19 00:31:03 -08:00
break ;
2018-02-17 15:04:31 -08:00
case L ' \r ' :
result . append ( { quote , L ' \\ ' , L ' r ' , quote } ) ;
break ;
case L ' \\ ' :
result . append ( { L ' \\ ' , L ' \\ ' } ) ;
break ;
2019-08-24 10:31:36 +02:00
case L ' $ ' :
if ( quote = = L ' " ' ) result . push_back ( L ' \\ ' ) ;
result . push_back ( L ' $ ' ) ;
break ;
2018-02-17 15:04:31 -08:00
default :
2016-05-02 16:53:10 -07:00
if ( c = = quote ) result . push_back ( L ' \\ ' ) ;
2012-11-19 00:31:03 -08:00
result . push_back ( c ) ;
break ;
2012-07-06 14:34:53 -07:00
}
}
}
return result ;
}
2013-12-08 13:41:12 -08:00
2016-05-02 16:53:10 -07:00
/// We are given a parse tree, the index of a node within the tree, its indent, and a vector of
/// indents the same size as the original source string. Set the indent correspdonding to the node's
/// source range, if appropriate.
///
/// trailing_indent is the indent for nodes with unrealized source, i.e. if I type 'if false <ret>'
/// then we have an if node with an empty job list (without source) but we want the last line to be
/// indented anyways.
///
/// switch statements also indent.
///
/// max_visited_node_idx is the largest index we visited.
static void compute_indents_recursive ( const parse_node_tree_t & tree , node_offset_t node_idx ,
int node_indent , parse_token_type_t parent_type ,
std : : vector < int > * indents , int * trailing_indent ,
node_offset_t * max_visited_node_idx ) {
// Guard against incomplete trees.
if ( node_idx > tree . size ( ) ) return ;
// Update max_visited_node_idx.
if ( node_idx > * max_visited_node_idx ) * max_visited_node_idx = node_idx ;
// We could implement this by utilizing the fish grammar. But there's an easy trick instead:
// almost everything that wraps a job list should be indented by 1. So just find all of the job
// lists. One exception is switch, which wraps a case_item_list instead of a job_list. The other
// exception is job_list itself: a job_list is a job and a job_list, and we want that child list
// to be indented the same as the parent. So just find all job_lists whose parent is not a
// job_list, and increment their indent by 1. We also want to treat andor_job_list like
// job_lists.
2013-12-08 13:41:12 -08:00
const parse_node_t & node = tree . at ( node_idx ) ;
const parse_token_type_t node_type = node . type ;
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// Increment the indent if we are either a root job_list, or root case_item_list.
const bool is_root_job_list = node_type ! = parent_type & & ( node_type = = symbol_job_list | |
node_type = = symbol_andor_job_list ) ;
const bool is_root_case_item_list =
2016-05-03 21:31:32 -07:00
node_type = = symbol_case_item_list & & parent_type ! = symbol_case_item_list ;
2016-05-02 16:53:10 -07:00
if ( is_root_job_list | | is_root_case_item_list ) {
2013-12-08 13:41:12 -08:00
node_indent + = 1 ;
}
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// If we have source, store the trailing indent unconditionally. If we do not have source, store
// the trailing indent only if ours is bigger; this prevents the trailing "run" of terminal job
// lists from affecting the trailing indent. For example, code like this:
//
// if foo
//
// will be parsed as this:
//
// job_list
// job
// if_statement
// job [if]
// job_list [empty]
// job_list [empty]
//
// There's two "terminal" job lists, and we want the innermost one.
//
// Note we are relying on the fact that nodes are in the same order as the source, i.e. an
// in-order traversal of the node tree also traverses the source from beginning to end.
if ( node . has_source ( ) | | node_indent > * trailing_indent ) {
2013-12-08 13:41:12 -08:00
* trailing_indent = node_indent ;
}
2016-05-02 16:53:10 -07:00
// Store the indent into the indent array.
if ( node . source_start ! = SOURCE_OFFSET_INVALID & & node . source_start < indents - > size ( ) ) {
if ( node . has_source ( ) ) {
// A normal non-empty node. Store the indent unconditionally.
2014-09-29 11:29:50 -07:00
indents - > at ( node . source_start ) = node_indent ;
2016-05-02 16:53:10 -07:00
} else {
// An empty node. We have a source offset but no source length. This can come about when
2019-09-19 10:32:07 -07:00
// a node is legitimately empty:
2016-05-02 16:53:10 -07:00
//
// while true; end
//
// The job_list inside the while loop is empty. It still has a source offset (at the end
// of the while statement) but no source extent. We still need to capture that indent,
// because there may be comments inside:
//
// while true
// # loop forever
// end
//
// The 'loop forever' comment must be indented, by virtue of storing the indent.
//
// Now consider what happens if we remove the end:
//
// while true
// # loop forever
//
// Now both the job_list and end_command are unmaterialized. However, we want the indent
// to be of the job_list and not the end_command. Therefore, we only store the indent
// if it's bigger.
if ( node_indent > indents - > at ( node . source_start ) ) {
2014-09-29 11:29:50 -07:00
indents - > at ( node . source_start ) = node_indent ;
}
}
2013-12-08 13:41:12 -08:00
}
2016-05-02 16:53:10 -07:00
// Recursive to all our children.
for ( node_offset_t idx = 0 ; idx < node . child_count ; idx + + ) {
// Note we pass our type to our child, which becomes its parent node type.
compute_indents_recursive ( tree , node . child_start + idx , node_indent , node_type , indents ,
trailing_indent , max_visited_node_idx ) ;
2013-12-08 13:41:12 -08:00
}
}
2016-05-02 16:53:10 -07:00
std : : vector < int > parse_util_compute_indents ( const wcstring & src ) {
// Make a vector the same size as the input string, which contains the indents. Initialize them
// to -1.
2013-12-08 13:41:12 -08:00
const size_t src_size = src . size ( ) ;
std : : vector < int > indents ( src_size , - 1 ) ;
2014-01-15 01:40:40 -08:00
2020-03-03 01:24:05 -08:00
// Simple trick: if our source does not contain a newline, then all indents are 0.
if ( src . find ( ' \n ' ) = = wcstring : : npos ) {
std : : fill ( indents . begin ( ) , indents . end ( ) , 0 ) ;
return indents ;
}
2016-05-02 16:53:10 -07:00
// Parse the string. We pass continue_after_error to produce a forest; the trailing indent of
// the last node we visited becomes the input indent of the next. I.e. in the case of 'switch
// foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it
// were a case item list.
2013-12-08 13:41:12 -08:00
parse_node_tree_t tree ;
2019-05-05 12:09:25 +02:00
parse_tree_from_string ( src ,
parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_accept_incomplete_tokens ,
2019-11-18 18:34:50 -08:00
& tree , nullptr /* errors */ ) ;
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// Start indenting at the first node. If we have a parse error, we'll have to start indenting
// from the top again.
2013-12-08 14:13:23 -08:00
node_offset_t start_node_idx = 0 ;
int last_trailing_indent = 0 ;
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
while ( start_node_idx < tree . size ( ) ) {
// The indent that we'll get for the last line.
2013-12-08 14:13:23 -08:00
int trailing_indent = 0 ;
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// Biggest offset we visited.
2013-12-08 14:13:23 -08:00
node_offset_t max_visited_node_idx = 0 ;
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// Invoke the recursive version. As a hack, pass job_list for the 'parent' token type, which
// will prevent the really-root job list from indenting.
compute_indents_recursive ( tree , start_node_idx , last_trailing_indent , symbol_job_list ,
& indents , & trailing_indent , & max_visited_node_idx ) ;
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// We may have more to indent. The trailing indent becomes our current indent. Start at the
// node after the last we visited.
2013-12-08 14:13:23 -08:00
last_trailing_indent = trailing_indent ;
start_node_idx = max_visited_node_idx + 1 ;
}
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// Handle comments. Each comment node has a parent (which is whatever the top of the symbol
// stack was when the comment was encountered). So the source range of the comment has the same
// indent as its parent.
2014-09-29 11:29:50 -07:00
const size_t tree_size = tree . size ( ) ;
2016-05-02 16:53:10 -07:00
for ( node_offset_t i = 0 ; i < tree_size ; i + + ) {
2014-09-29 11:29:50 -07:00
const parse_node_t & node = tree . at ( i ) ;
2016-05-02 16:53:10 -07:00
if ( node . type = = parse_special_type_comment & & node . has_source ( ) & &
node . parent < tree_size ) {
2014-09-29 11:29:50 -07:00
const parse_node_t & parent = tree . at ( node . parent ) ;
2016-05-02 16:53:10 -07:00
if ( parent . source_start ! = SOURCE_OFFSET_INVALID ) {
2014-09-29 11:29:50 -07:00
indents . at ( node . source_start ) = indents . at ( parent . source_start ) ;
}
}
}
2016-05-02 16:53:10 -07:00
// Now apply the indents. The indents array has -1 for places where the indent does not change,
// so start at each value and extend it along the run of -1s.
2013-12-08 13:41:12 -08:00
int last_indent = 0 ;
2016-05-02 16:53:10 -07:00
for ( size_t i = 0 ; i < src_size ; i + + ) {
2013-12-08 13:41:12 -08:00
int this_indent = indents . at ( i ) ;
2016-05-02 16:53:10 -07:00
if ( this_indent < 0 ) {
2013-12-08 13:41:12 -08:00
indents . at ( i ) = last_indent ;
2016-05-02 16:53:10 -07:00
} else {
// New indent level.
2013-12-08 13:41:12 -08:00
last_indent = this_indent ;
2016-05-02 16:53:10 -07:00
// Make all whitespace before a token have the new level. This avoid using the wrong
// indentation level if a new line starts with whitespace.
2013-12-08 13:41:12 -08:00
size_t prev_char_idx = i ;
2016-05-02 16:53:10 -07:00
while ( prev_char_idx - - ) {
2019-03-12 14:06:01 -07:00
if ( ! std : : wcschr ( L " \n \t \r " , src . at ( prev_char_idx ) ) ) break ;
2013-12-08 13:41:12 -08:00
indents . at ( prev_char_idx ) = last_indent ;
}
}
}
2016-05-02 16:53:10 -07:00
// Ensure trailing whitespace has the trailing indent. This makes sure a new line is correctly
// indented even if it is empty.
2013-12-08 13:41:12 -08:00
size_t suffix_idx = src_size ;
2016-05-02 16:53:10 -07:00
while ( suffix_idx - - ) {
2019-03-12 14:06:01 -07:00
if ( ! std : : wcschr ( L " \n \t \r " , src . at ( suffix_idx ) ) ) break ;
2013-12-08 14:13:23 -08:00
indents . at ( suffix_idx ) = last_trailing_indent ;
2013-12-08 13:41:12 -08:00
}
2014-01-15 01:40:40 -08:00
2013-12-08 13:41:12 -08:00
return indents ;
}
2013-12-15 16:05:37 -08:00
2016-05-02 16:53:10 -07:00
/// Append a syntax error to the given error list.
static bool append_syntax_error ( parse_error_list_t * errors , size_t source_location ,
const wchar_t * fmt , . . . ) {
2015-04-29 16:53:02 -07:00
parse_error_t error ;
error . source_start = source_location ;
error . source_length = 0 ;
error . code = parse_error_syntax ;
2016-05-02 16:53:10 -07:00
2015-04-29 16:53:02 -07:00
va_list va ;
va_start ( va , fmt ) ;
error . text = vformat_string ( fmt , va ) ;
va_end ( va ) ;
2016-05-02 16:53:10 -07:00
2015-04-29 16:53:02 -07:00
errors - > push_back ( error ) ;
return true ;
}
2016-05-02 16:53:10 -07:00
/// Returns 1 if the specified command is a builtin that may not be used in a pipeline.
2018-09-29 00:58:44 -04:00
static const wchar_t * const forbidden_pipe_commands [ ] = { L " exec " , L " case " , L " break " , L " return " ,
L " continue " } ;
2016-05-02 16:53:10 -07:00
static int parser_is_pipe_forbidden ( const wcstring & word ) {
2017-04-04 21:28:57 -07:00
return contains ( forbidden_pipe_commands , word ) ;
2013-12-15 16:05:37 -08:00
}
2017-06-18 22:07:48 -07:00
bool parse_util_argument_is_help ( const wchar_t * s ) {
2019-03-12 14:06:01 -07:00
return std : : wcscmp ( L " -h " , s ) = = 0 | | std : : wcscmp ( L " --help " , s ) = = 0 ;
2014-03-18 14:42:38 -07:00
}
2016-05-02 16:53:10 -07:00
/// Check if the first argument under the given node is --help.
2018-01-15 22:13:37 -08:00
static bool first_argument_is_help ( tnode_t < grammar : : plain_statement > statement ,
2016-05-02 16:53:10 -07:00
const wcstring & src ) {
2013-12-15 16:05:37 -08:00
bool is_help = false ;
2018-01-15 22:13:37 -08:00
auto arg_nodes = get_argument_nodes ( statement . child < 1 > ( ) ) ;
2016-05-02 16:53:10 -07:00
if ( ! arg_nodes . empty ( ) ) {
// Check the first argument only.
2018-01-15 22:13:37 -08:00
wcstring first_arg_src = arg_nodes . front ( ) . get_source ( src ) ;
2017-06-18 22:07:48 -07:00
is_help = parse_util_argument_is_help ( first_arg_src . c_str ( ) ) ;
2013-12-15 16:05:37 -08:00
}
return is_help ;
}
2016-05-02 16:53:10 -07:00
/// Given a wide character immediately after a dollar sign, return the appropriate error message.
/// For example, if wc is @, then the variable name was $@ and we suggest $argv.
static const wchar_t * error_format_for_character ( wchar_t wc ) {
switch ( wc ) {
2016-05-03 16:23:30 -07:00
case L ' ? ' : {
2016-05-02 16:53:10 -07:00
return ERROR_NOT_STATUS ;
2016-05-03 16:23:30 -07:00
}
case L ' # ' : {
2016-05-02 16:53:10 -07:00
return ERROR_NOT_ARGV_COUNT ;
2016-05-03 16:23:30 -07:00
}
case L ' @ ' : {
2016-05-02 16:53:10 -07:00
return ERROR_NOT_ARGV_AT ;
2016-05-03 16:23:30 -07:00
}
case L ' * ' : {
2016-05-02 16:53:10 -07:00
return ERROR_NOT_ARGV_STAR ;
2016-05-03 16:23:30 -07:00
}
2015-04-29 16:53:02 -07:00
case L ' $ ' :
case VARIABLE_EXPAND :
case VARIABLE_EXPAND_SINGLE :
2016-05-03 16:23:30 -07:00
case VARIABLE_EXPAND_EMPTY : {
2015-04-29 16:53:02 -07:00
return ERROR_NOT_PID ;
2016-05-03 16:23:30 -07:00
}
2019-05-05 12:09:25 +02:00
default : {
return ERROR_BAD_VAR_CHAR1 ;
}
2015-04-29 16:53:02 -07:00
}
}
2016-05-02 16:53:10 -07:00
void parse_util_expand_variable_error ( const wcstring & token , size_t global_token_pos ,
size_t dollar_pos , parse_error_list_t * errors ) {
// Note that dollar_pos is probably VARIABLE_EXPAND or VARIABLE_EXPAND_SINGLE, not a literal
// dollar sign.
2019-11-18 18:34:50 -08:00
assert ( errors ! = nullptr ) ;
2015-04-29 16:53:02 -07:00
assert ( dollar_pos < token . size ( ) ) ;
2016-05-03 21:31:32 -07:00
const bool double_quotes = token . at ( dollar_pos ) = = VARIABLE_EXPAND_SINGLE ;
2015-04-29 16:53:02 -07:00
const size_t start_error_count = errors - > size ( ) ;
const size_t global_dollar_pos = global_token_pos + dollar_pos ;
const size_t global_after_dollar_pos = global_dollar_pos + 1 ;
2016-04-04 14:34:28 -07:00
wchar_t char_after_dollar = dollar_pos + 1 > = token . size ( ) ? 0 : token . at ( dollar_pos + 1 ) ;
2016-05-02 16:53:10 -07:00
switch ( char_after_dollar ) {
2018-03-10 13:16:07 -06:00
case BRACE_BEGIN :
2016-12-12 12:35:22 -08:00
case L ' { ' : {
2018-03-10 13:16:07 -06:00
// The BRACE_BEGIN is for unquoted, the { is for quoted. Anyways we have (possible
2016-05-02 16:53:10 -07:00
// quoted) ${. See if we have a }, and the stuff in between is variable material. If so,
// report a bracket error. Otherwise just complain about the ${.
2015-04-29 16:53:02 -07:00
bool looks_like_variable = false ;
2016-05-02 16:53:10 -07:00
size_t closing_bracket =
2018-03-10 13:16:07 -06:00
token . find ( char_after_dollar = = L ' { ' ? L ' } ' : wchar_t ( BRACE_END ) , dollar_pos + 2 ) ;
2015-04-29 16:53:02 -07:00
wcstring var_name ;
2016-05-02 16:53:10 -07:00
if ( closing_bracket ! = wcstring : : npos ) {
2015-04-29 16:53:02 -07:00
size_t var_start = dollar_pos + 2 , var_end = closing_bracket ;
var_name = wcstring ( token , var_start , var_end - var_start ) ;
2017-04-19 23:43:02 -07:00
looks_like_variable = valid_var_name ( var_name ) ;
2014-03-04 02:53:34 -08:00
}
2016-05-02 16:53:10 -07:00
if ( looks_like_variable ) {
append_syntax_error (
errors , global_after_dollar_pos ,
double_quotes ? ERROR_BRACKETED_VARIABLE_QUOTED1 : ERROR_BRACKETED_VARIABLE1 ,
2019-09-19 10:32:07 -07:00
truncate ( var_name , var_err_len ) . c_str ( ) ) ;
2016-05-02 16:53:10 -07:00
} else {
append_syntax_error ( errors , global_after_dollar_pos , ERROR_BAD_VAR_CHAR1 , L ' { ' ) ;
2014-03-04 02:53:34 -08:00
}
break ;
}
2016-05-02 16:53:10 -07:00
case INTERNAL_SEPARATOR : {
// e.g.: echo foo"$"baz
// These are only ever quotes, not command substitutions. Command substitutions are
// handled earlier.
append_syntax_error ( errors , global_dollar_pos , ERROR_NO_VAR_NAME ) ;
2014-03-04 02:53:34 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
case ' ( ' : {
2015-04-29 16:53:02 -07:00
// e.g.: 'echo "foo$(bar)baz"
// Try to determine what's in the parens.
wcstring token_after_parens ;
wcstring paren_text ;
size_t open_parens = dollar_pos + 1 , cmdsub_start = 0 , cmdsub_end = 0 ;
2016-05-02 16:53:10 -07:00
if ( parse_util_locate_cmdsubst_range ( token , & open_parens , & paren_text , & cmdsub_start ,
& cmdsub_end , true ) > 0 ) {
2015-07-26 00:58:32 -07:00
token_after_parens = tok_first ( paren_text ) ;
2015-04-29 16:53:02 -07:00
}
2016-05-02 16:53:10 -07:00
// Make sure we always show something.
if ( token_after_parens . empty ( ) ) {
2019-09-19 11:09:37 -07:00
token_after_parens = get_ellipsis_str ( ) ;
2015-04-29 16:53:02 -07:00
}
2016-05-02 16:53:10 -07:00
append_syntax_error ( errors , global_dollar_pos , ERROR_BAD_VAR_SUBCOMMAND1 ,
2019-09-19 10:32:07 -07:00
truncate ( token_after_parens , var_err_len ) . c_str ( ) ) ;
2015-04-29 16:53:02 -07:00
break ;
}
2016-05-02 16:53:10 -07:00
case L ' \0 ' : {
append_syntax_error ( errors , global_dollar_pos , ERROR_NO_VAR_NAME ) ;
2014-03-04 02:53:34 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
default : {
2015-04-29 16:53:02 -07:00
wchar_t token_stop_char = char_after_dollar ;
2016-05-02 16:53:10 -07:00
// Unescape (see issue #50).
2018-05-05 19:11:57 -07:00
if ( token_stop_char = = ANY_CHAR )
token_stop_char = L ' ? ' ;
else if ( token_stop_char = = ANY_STRING | | token_stop_char = = ANY_STRING_RECURSIVE )
2014-03-04 02:53:34 -08:00
token_stop_char = L ' * ' ;
2016-05-02 16:53:10 -07:00
// Determine which error message to use. The format string may not consume all the
// arguments we pass but that's harmless.
2015-04-29 16:53:02 -07:00
const wchar_t * error_fmt = error_format_for_character ( token_stop_char ) ;
2016-05-02 16:53:10 -07:00
append_syntax_error ( errors , global_after_dollar_pos , error_fmt , token_stop_char ) ;
2014-03-04 02:53:34 -08:00
break ;
}
}
2016-05-02 16:53:10 -07:00
// We should have appended exactly one error.
2015-04-29 16:53:02 -07:00
assert ( errors - > size ( ) = = start_error_count + 1 ) ;
2014-03-04 02:53:34 -08:00
}
2016-05-02 16:53:10 -07:00
/// Detect cases like $(abc). Given an arg like foo(bar), let arg_src be foo and cmdsubst_src be
/// bar. If arg ends with VARIABLE_EXPAND, then report an error.
static parser_test_error_bits_t detect_dollar_cmdsub_errors ( size_t arg_src_offset ,
const wcstring & arg_src ,
const wcstring & cmdsubst_src ,
parse_error_list_t * out_errors ) {
2015-04-29 16:53:02 -07:00
parser_test_error_bits_t result_bits = 0 ;
wcstring unescaped_arg_src ;
2016-10-30 21:05:27 -07:00
if ( ! unescape_string ( arg_src , & unescaped_arg_src , UNESCAPE_SPECIAL ) | |
unescaped_arg_src . empty ( ) ) {
return result_bits ;
}
wchar_t last = unescaped_arg_src . at ( unescaped_arg_src . size ( ) - 1 ) ;
if ( last = = VARIABLE_EXPAND ) {
result_bits | = PARSER_TEST_ERROR ;
2019-11-18 18:34:50 -08:00
if ( out_errors ! = nullptr ) {
2016-10-30 21:05:27 -07:00
wcstring subcommand_first_token = tok_first ( cmdsubst_src ) ;
if ( subcommand_first_token . empty ( ) ) {
// e.g. $(). Report somthing.
2019-09-19 11:09:37 -07:00
subcommand_first_token = get_ellipsis_str ( ) ;
2015-04-29 16:53:02 -07:00
}
2016-10-30 21:05:27 -07:00
append_syntax_error (
out_errors ,
arg_src_offset + arg_src . size ( ) - 1 , // global position of the dollar
2019-09-19 10:32:07 -07:00
ERROR_BAD_VAR_SUBCOMMAND1 , truncate ( subcommand_first_token , var_err_len ) . c_str ( ) ) ;
2015-04-29 16:53:02 -07:00
}
}
2016-10-30 21:05:27 -07:00
2015-04-29 16:53:02 -07:00
return result_bits ;
}
2014-03-04 02:53:34 -08:00
2016-05-02 16:53:10 -07:00
/// Test if this argument contains any errors. Detected errors include syntax errors in command
/// substitutions, improperly escaped characters and improper use of the variable expansion
/// operator.
2018-01-12 11:36:45 -08:00
parser_test_error_bits_t parse_util_detect_errors_in_argument ( tnode_t < grammar : : argument > node ,
2016-05-02 16:53:10 -07:00
const wcstring & arg_src ,
parse_error_list_t * out_errors ) {
2018-01-12 11:36:45 -08:00
assert ( node . has_source ( ) & & " argument has no source " ) ;
auto source_start = node . source_range ( ) - > start ;
2016-05-02 16:53:10 -07:00
int err = 0 ;
2014-03-04 02:53:34 -08:00
wchar_t * paran_begin , * paran_end ;
int do_loop = 1 ;
2014-10-05 15:40:46 -07:00
wcstring working_copy = arg_src ;
2014-03-04 02:53:34 -08:00
2016-05-02 16:53:10 -07:00
while ( do_loop ) {
2014-10-05 15:40:46 -07:00
const wchar_t * working_copy_cstr = working_copy . c_str ( ) ;
2016-05-02 16:53:10 -07:00
switch ( parse_util_locate_cmdsubst ( working_copy_cstr , & paran_begin , & paran_end , false ) ) {
case - 1 : {
err = 1 ;
if ( out_errors ) {
2018-01-12 11:36:45 -08:00
append_syntax_error ( out_errors , source_start , L " Mismatched parenthesis " ) ;
2014-03-04 02:53:34 -08:00
}
return err ;
}
2016-05-02 16:53:10 -07:00
case 0 : {
2014-03-04 02:53:34 -08:00
do_loop = 0 ;
break ;
}
2016-05-02 16:53:10 -07:00
case 1 : {
2014-03-04 02:53:34 -08:00
const wcstring subst ( paran_begin + 1 , paran_end ) ;
2016-05-02 16:53:10 -07:00
// Replace the command substitution with just INTERNAL_SEPARATOR.
2014-10-05 15:40:46 -07:00
size_t cmd_sub_start = paran_begin - working_copy_cstr ;
size_t cmd_sub_len = paran_end + 1 - paran_begin ;
working_copy . replace ( cmd_sub_start , cmd_sub_len , wcstring ( 1 , INTERNAL_SEPARATOR ) ) ;
2014-03-04 02:53:34 -08:00
2014-03-18 14:14:32 -07:00
parse_error_list_t subst_errors ;
2016-05-02 16:53:10 -07:00
err | = parse_util_detect_errors ( subst , & subst_errors ,
false /* do not accept incomplete */ ) ;
2014-03-18 14:14:32 -07:00
2016-05-02 16:53:10 -07:00
// Our command substitution produced error offsets relative to its source. Tweak the
// offsets of the errors in the command substitution to account for both its offset
// within the string, and the offset of the node.
2018-01-12 11:36:45 -08:00
size_t error_offset = cmd_sub_start + 1 + source_start ;
2014-03-21 17:13:33 -07:00
parse_error_offset_source_start ( & subst_errors , error_offset ) ;
2016-05-02 16:53:10 -07:00
2019-11-18 18:34:50 -08:00
if ( out_errors ! = nullptr ) {
2014-03-18 14:14:32 -07:00
out_errors - > insert ( out_errors - > end ( ) , subst_errors . begin ( ) , subst_errors . end ( ) ) ;
2016-05-02 16:53:10 -07:00
// Hackish. Take this opportunity to report $(...) errors. We do this because
// after we've replaced with internal separators, we can't distinguish between
// "" and (), and also we no longer have the source of the command substitution.
// As an optimization, this is only necessary if the last character is a $.
if ( cmd_sub_start > 0 & & working_copy . at ( cmd_sub_start - 1 ) = = L ' $ ' ) {
2018-01-12 11:36:45 -08:00
err | = detect_dollar_cmdsub_errors (
source_start , working_copy . substr ( 0 , cmd_sub_start ) , subst , out_errors ) ;
2015-04-29 16:53:02 -07:00
}
2014-03-04 02:53:34 -08:00
}
break ;
}
2016-10-29 17:25:48 -07:00
default : {
DIE ( " unexpected parse_util_locate_cmdsubst() return value " ) ;
}
2014-03-04 02:53:34 -08:00
}
}
wcstring unesc ;
2016-05-02 16:53:10 -07:00
if ( ! unescape_string ( working_copy , & unesc , UNESCAPE_SPECIAL ) ) {
if ( out_errors ) {
2018-01-12 11:36:45 -08:00
append_syntax_error ( out_errors , source_start , L " Invalid token '%ls' " ,
2016-05-02 16:53:10 -07:00
working_copy . c_str ( ) ) ;
2014-03-04 02:53:34 -08:00
}
return 1 ;
2016-05-04 15:19:47 -07:00
}
// Check for invalid variable expansions.
const size_t unesc_size = unesc . size ( ) ;
for ( size_t idx = 0 ; idx < unesc_size ; idx + + ) {
2016-10-30 21:05:27 -07:00
if ( unesc . at ( idx ) ! = VARIABLE_EXPAND & & unesc . at ( idx ) ! = VARIABLE_EXPAND_SINGLE ) {
continue ;
}
2016-10-22 20:32:25 -07:00
2016-10-30 21:05:27 -07:00
wchar_t next_char = idx + 1 < unesc_size ? unesc . at ( idx + 1 ) : L ' \0 ' ;
if ( next_char ! = VARIABLE_EXPAND & & next_char ! = VARIABLE_EXPAND_SINGLE & &
2017-04-19 23:43:02 -07:00
! valid_var_name_char ( next_char ) ) {
2016-10-30 21:05:27 -07:00
err = 1 ;
if ( out_errors ) {
// We have something like $$$^.... Back up until we reach the first $.
size_t first_dollar = idx ;
2016-12-03 20:12:53 -08:00
while ( first_dollar > 0 & & ( unesc . at ( first_dollar - 1 ) = = VARIABLE_EXPAND | |
unesc . at ( first_dollar - 1 ) = = VARIABLE_EXPAND_SINGLE ) ) {
2016-10-30 21:05:27 -07:00
first_dollar - - ;
2014-03-04 02:53:34 -08:00
}
2018-01-12 11:36:45 -08:00
parse_util_expand_variable_error ( unesc , source_start , first_dollar , out_errors ) ;
2014-03-04 02:53:34 -08:00
}
}
}
return err ;
}
2018-01-12 11:15:35 -08:00
/// Given that the job given by node should be backgrounded, return true if we detect any errors.
2018-01-20 14:05:34 -08:00
static bool detect_errors_in_backgrounded_job ( tnode_t < grammar : : job > job ,
2018-01-12 11:15:35 -08:00
parse_error_list_t * parse_errors ) {
2018-03-01 18:30:48 -08:00
namespace g = grammar ;
2018-01-13 16:24:21 -08:00
auto source_range = job . source_range ( ) ;
if ( ! source_range ) return false ;
2018-01-12 11:15:35 -08:00
bool errored = false ;
// Disallow background in the following cases:
// foo & ; and bar
// foo & ; or bar
// if foo & ; end
// while foo & ; end
2018-03-02 10:23:57 -08:00
auto job_conj = job . try_get_parent < g : : job_conjunction > ( ) ;
if ( job_conj . try_get_parent < g : : if_clause > ( ) ) {
2018-01-13 16:24:21 -08:00
errored = append_syntax_error ( parse_errors , source_range - > start ,
BACKGROUND_IN_CONDITIONAL_ERROR_MSG ) ;
2018-03-02 10:23:57 -08:00
} else if ( job_conj . try_get_parent < g : : while_header > ( ) ) {
2018-01-13 16:24:21 -08:00
errored = append_syntax_error ( parse_errors , source_range - > start ,
BACKGROUND_IN_CONDITIONAL_ERROR_MSG ) ;
2018-03-02 10:23:57 -08:00
} else if ( auto jlist = job_conj . try_get_parent < g : : job_list > ( ) ) {
2018-01-13 16:24:21 -08:00
// This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
2018-03-01 18:30:48 -08:00
// Fetch the job list and then advance it by one.
auto first_jconj = jlist . next_in_list < g : : job_conjunction > ( ) ;
assert ( first_jconj = = job . try_get_parent < g : : job_conjunction > ( ) & &
" Expected first job to be the node we found " ) ;
( void ) first_jconj ;
2018-03-02 18:09:16 -08:00
// Try getting the next job's decorator.
if ( auto next_job_dec = jlist . next_in_list < g : : job_decorator > ( ) ) {
2018-01-13 16:24:21 -08:00
// The next job is indeed a boolean statement.
2019-12-19 22:41:53 -06:00
parse_job_decoration_t bool_type = bool_statement_type ( next_job_dec ) ;
if ( bool_type = = parse_job_decoration_and ) {
2018-03-02 18:09:16 -08:00
errored = append_syntax_error ( parse_errors , next_job_dec . source_range ( ) - > start ,
2018-01-13 16:24:21 -08:00
BOOL_AFTER_BACKGROUND_ERROR_MSG , L " and " ) ;
2019-12-19 22:41:53 -06:00
} else if ( bool_type = = parse_job_decoration_or ) {
2018-03-02 18:09:16 -08:00
errored = append_syntax_error ( parse_errors , next_job_dec . source_range ( ) - > start ,
2018-01-13 16:24:21 -08:00
BOOL_AFTER_BACKGROUND_ERROR_MSG , L " or " ) ;
2018-01-12 11:15:35 -08:00
}
}
}
return errored ;
}
2018-02-18 13:00:46 -08:00
static bool detect_errors_in_plain_statement ( const wcstring & buff_src ,
const parse_node_tree_t & node_tree ,
tnode_t < grammar : : plain_statement > pst ,
parse_error_list_t * parse_errors ) {
using namespace grammar ;
bool errored = false ;
auto source_start = pst . source_range ( ) - > start ;
// In a few places below, we want to know if we are in a pipeline.
tnode_t < statement > st = pst . try_get_parent < decorated_statement > ( ) . try_get_parent < statement > ( ) ;
2018-03-02 18:09:16 -08:00
pipeline_position_t pipe_pos = get_pipeline_position ( st ) ;
bool is_in_pipeline = ( pipe_pos ! = pipeline_position_t : : none ) ;
2018-02-18 13:00:46 -08:00
// We need to know the decoration.
const enum parse_statement_decoration_t decoration = get_decoration ( pst ) ;
// Check that we don't try to pipe through exec.
if ( is_in_pipeline & & decoration = = parse_statement_decoration_exec ) {
errored = append_syntax_error ( parse_errors , source_start , EXEC_ERR_MSG , L " exec " ) ;
}
2018-03-02 18:09:16 -08:00
// This is a somewhat stale check that 'and' and 'or' are not in pipelines, except at the
// beginning. We can't disallow them as commands entirely because we need to support 'and
// --help', etc.
if ( pipe_pos = = pipeline_position_t : : subsequent ) {
// check if our command is 'and' or 'or'. This is very clumsy; we don't catch e.g. quoted
// commands.
wcstring command = pst . child < 0 > ( ) . get_source ( buff_src ) ;
if ( command = = L " and " | | command = = L " or " ) {
errored =
append_syntax_error ( parse_errors , source_start , EXEC_ERR_MSG , command . c_str ( ) ) ;
}
}
2018-08-26 01:41:45 -07:00
if ( maybe_t < wcstring > unexp_command = command_for_plain_statement ( pst , buff_src ) ) {
wcstring command ;
2018-02-18 13:00:46 -08:00
// Check that we can expand the command.
2020-01-15 17:14:47 -08:00
if ( expand_to_command_and_args ( * unexp_command , operation_context_t : : empty ( ) , & command ,
nullptr , parse_errors ) = = expand_result_t : : error ) {
2018-08-26 01:41:45 -07:00
errored = true ;
2019-04-11 14:28:27 -07:00
parse_error_offset_source_start ( parse_errors , source_start ) ;
2018-02-18 13:00:46 -08:00
}
// Check that pipes are sound.
if ( ! errored & & parser_is_pipe_forbidden ( command ) & & is_in_pipeline ) {
errored =
append_syntax_error ( parse_errors , source_start , EXEC_ERR_MSG , command . c_str ( ) ) ;
}
// Check that we don't return from outside a function. But we allow it if it's
// 'return --help'.
if ( ! errored & & command = = L " return " ) {
bool found_function = false ;
for ( const parse_node_t * ancestor = pst . node ( ) ; ancestor ! = nullptr ;
ancestor = node_tree . get_parent ( * ancestor ) ) {
auto fh = tnode_t < block_statement > : : try_create ( & node_tree , ancestor )
. child < 0 > ( )
. try_get_child < function_header , 0 > ( ) ;
if ( fh ) {
found_function = true ;
break ;
}
}
if ( ! found_function & & ! first_argument_is_help ( pst , buff_src ) ) {
errored = append_syntax_error ( parse_errors , source_start , INVALID_RETURN_ERR_MSG ) ;
}
}
// Check that we don't break or continue from outside a loop.
if ( ! errored & & ( command = = L " break " | | command = = L " continue " ) ) {
// Walk up until we hit a 'for' or 'while' loop. If we hit a function first,
// stop the search; we can't break an outer loop from inside a function.
// This is a little funny because we can't tell if it's a 'for' or 'while'
// loop from the ancestor alone; we need the header. That is, we hit a
// block_statement, and have to check its header.
bool found_loop = false ;
for ( const parse_node_t * ancestor = pst . node ( ) ; ancestor ! = nullptr ;
ancestor = node_tree . get_parent ( * ancestor ) ) {
tnode_t < block_header > bh =
tnode_t < block_statement > : : try_create ( & node_tree , ancestor ) . child < 0 > ( ) ;
if ( bh . try_get_child < while_header , 0 > ( ) | | bh . try_get_child < for_header , 0 > ( ) ) {
// This is a loop header, so we can break or continue.
found_loop = true ;
break ;
} else if ( bh . try_get_child < function_header , 0 > ( ) ) {
// This is a function header, so we cannot break or
// continue. We stop our search here.
found_loop = false ;
break ;
}
}
if ( ! found_loop & & ! first_argument_is_help ( pst , buff_src ) ) {
errored = append_syntax_error (
parse_errors , source_start ,
( command = = L " break " ? INVALID_BREAK_ERR_MSG : INVALID_CONTINUE_ERR_MSG ) ) ;
}
}
// Check that we don't do an invalid builtin (issue #1252).
if ( ! errored & & decoration = = parse_statement_decoration_builtin & &
2020-01-15 17:14:47 -08:00
expand_one ( * unexp_command , expand_flag : : skip_cmdsubst , operation_context_t : : empty ( ) ,
2019-05-04 19:16:26 -07:00
parse_errors ) & &
2019-02-10 14:14:48 -08:00
! builtin_exists ( * unexp_command ) ) {
2018-02-18 13:00:46 -08:00
errored = append_syntax_error ( parse_errors , source_start , UNKNOWN_BUILTIN_ERR_MSG ,
2019-02-10 14:14:48 -08:00
unexp_command - > c_str ( ) ) ;
2018-02-18 13:00:46 -08:00
}
}
return errored ;
}
2016-05-02 16:53:10 -07:00
parser_test_error_bits_t parse_util_detect_errors ( const wcstring & buff_src ,
parse_error_list_t * out_errors ,
bool allow_incomplete ,
2017-12-22 14:40:15 -08:00
parsed_source_ref_t * out_pstree ) {
2018-01-22 13:31:39 -08:00
namespace g = grammar ;
2013-12-15 16:05:37 -08:00
parse_node_tree_t node_tree ;
parse_error_list_t parse_errors ;
2014-01-15 01:40:40 -08:00
2014-03-04 02:53:34 -08:00
parser_test_error_bits_t res = 0 ;
2016-05-02 16:53:10 -07:00
// Whether we encountered a parse error.
2013-12-15 16:05:37 -08:00
bool errored = false ;
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// Whether we encountered an unclosed block. We detect this via an 'end_command' block without
// source.
2013-12-15 16:05:37 -08:00
bool has_unclosed_block = false ;
2014-01-15 01:40:40 -08:00
2018-02-18 13:13:58 -08:00
// Whether we encounter a missing statement, i.e. a newline after a pipe. This is found by
// detecting job_continuations that have source for pipes but not the statement.
bool has_unclosed_pipe = false ;
2019-11-13 18:01:47 -08:00
// Whether there's an unclosed quote or subshell, and therefore unfinished. This is only set if
2016-05-02 16:53:10 -07:00
// allow_incomplete is set.
2019-11-13 18:01:47 -08:00
bool has_unclosed_quote_or_subshell = false ;
2014-01-14 00:01:26 -08:00
2016-05-02 16:53:10 -07:00
// Parse the input string into a parse tree. Some errors are detected here.
bool parsed = parse_tree_from_string (
buff_src , allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none , & node_tree ,
& parse_errors ) ;
2014-01-14 00:01:26 -08:00
2016-05-02 16:53:10 -07:00
if ( allow_incomplete ) {
2018-02-18 13:00:46 -08:00
size_t idx = parse_errors . size ( ) ;
while ( idx - - ) {
2019-11-13 18:01:47 -08:00
if ( parse_errors . at ( idx ) . code = = parse_error_tokenizer_unterminated_quote | |
parse_errors . at ( idx ) . code = = parse_error_tokenizer_unterminated_subshell ) {
2016-05-02 16:53:10 -07:00
// Remove this error, since we don't consider it a real error.
2019-11-13 18:01:47 -08:00
has_unclosed_quote_or_subshell = true ;
2018-02-18 13:00:46 -08:00
parse_errors . erase ( parse_errors . begin ( ) + idx ) ;
2014-07-24 14:41:15 -07:00
}
2014-01-14 00:01:26 -08:00
}
}
2016-05-02 16:53:10 -07:00
// Issue #1238: If the only error was unterminated quote, then consider this to have parsed
// successfully. A better fix would be to have parse_tree_from_string return this information
// directly (but it would be a shame to munge up its nice bool return).
2019-11-13 18:01:47 -08:00
if ( parse_errors . empty ( ) & & has_unclosed_quote_or_subshell ) {
2014-01-14 00:01:26 -08:00
parsed = true ;
2014-07-24 14:41:15 -07:00
}
2014-01-14 00:01:26 -08:00
2016-05-02 16:53:10 -07:00
if ( ! parsed ) {
2013-12-15 16:05:37 -08:00
errored = true ;
}
2014-01-15 01:40:40 -08:00
2019-11-13 18:01:47 -08:00
// has_unclosed_quote_or_subshell may only be set if allow_incomplete is true.
assert ( ! has_unclosed_quote_or_subshell | | allow_incomplete ) ;
2016-05-02 16:53:10 -07:00
// Expand all commands.
// Verify 'or' and 'and' not used inside pipelines.
// Verify pipes via parser_is_pipe_forbidden.
// Verify return only within a function.
// Verify no variable expansions.
if ( ! errored ) {
2018-01-12 11:15:35 -08:00
for ( const parse_node_t & node : node_tree ) {
2016-05-02 16:53:10 -07:00
if ( node . type = = symbol_end_command & & ! node . has_source ( ) ) {
// An 'end' without source is an unclosed block.
2013-12-15 16:05:37 -08:00
has_unclosed_block = true ;
2018-02-18 13:13:58 -08:00
} else if ( node . type = = symbol_statement & & ! node . has_source ( ) ) {
// Check for a statement without source in a pipeline, i.e. unterminated pipeline.
2018-03-02 18:09:16 -08:00
auto pipe_pos = get_pipeline_position ( { & node_tree , & node } ) ;
if ( pipe_pos ! = pipeline_position_t : : none ) {
has_unclosed_pipe = true ;
2014-01-13 13:14:18 -08:00
}
2016-05-02 16:53:10 -07:00
} else if ( node . type = = symbol_argument ) {
2018-01-22 13:31:39 -08:00
tnode_t < g : : argument > arg { & node_tree , & node } ;
2014-03-04 02:53:34 -08:00
const wcstring arg_src = node . get_source ( buff_src ) ;
2018-01-12 11:36:45 -08:00
res | = parse_util_detect_errors_in_argument ( arg , arg_src , & parse_errors ) ;
2016-05-02 16:53:10 -07:00
} else if ( node . type = = symbol_job ) {
2018-01-12 11:15:35 -08:00
// Disallow background in the following cases:
//
// foo & ; and bar
// foo & ; or bar
// if foo & ; end
// while foo & ; end
// If it's not a background job, nothing to do.
2018-01-22 13:31:39 -08:00
auto job = tnode_t < g : : job > { & node_tree , & node } ;
2018-01-15 16:39:27 -08:00
if ( job_node_is_background ( job ) ) {
2018-01-20 14:05:34 -08:00
errored | = detect_errors_in_backgrounded_job ( job , & parse_errors ) ;
2014-11-02 13:11:27 -08:00
}
2018-01-22 13:31:39 -08:00
} else if ( node . type = = symbol_arguments_or_redirections_list ) {
// verify no arguments to the end command of if, switch, begin (#986).
auto list = tnode_t < g : : arguments_or_redirections_list > { & node_tree , & node } ;
if ( list . try_get_parent < g : : if_statement > ( ) | |
list . try_get_parent < g : : switch_statement > ( ) | |
list . try_get_parent < g : : block_statement > ( ) ) {
if ( auto arg = list . next_in_list < g : : argument > ( ) ) {
errored = append_syntax_error ( & parse_errors , arg . source_range ( ) - > start ,
END_ARG_ERR_MSG ) ;
}
}
2016-05-02 16:53:10 -07:00
} else if ( node . type = = symbol_plain_statement ) {
2018-02-18 13:00:46 -08:00
tnode_t < grammar : : plain_statement > pst { & node_tree , & node } ;
errored | =
detect_errors_in_plain_statement ( buff_src , node_tree , pst , & parse_errors ) ;
2013-12-15 16:05:37 -08:00
}
}
}
2016-05-02 16:53:10 -07:00
if ( errored ) res | = PARSER_TEST_ERROR ;
2013-12-15 16:05:37 -08:00
2019-11-13 18:01:47 -08:00
if ( has_unclosed_block | | has_unclosed_quote_or_subshell | | has_unclosed_pipe )
2018-02-18 13:13:58 -08:00
res | = PARSER_TEST_INCOMPLETE ;
2014-01-15 01:40:40 -08:00
2019-11-18 18:34:50 -08:00
if ( out_errors ! = nullptr ) {
2017-01-26 16:14:50 -08:00
* out_errors = std : : move ( parse_errors ) ;
2013-12-15 16:05:37 -08:00
}
2016-05-02 16:53:10 -07:00
2019-11-18 18:34:50 -08:00
if ( out_pstree ! = nullptr ) {
2017-12-22 14:40:15 -08:00
* out_pstree = std : : make_shared < parsed_source_t > ( buff_src , std : : move ( node_tree ) ) ;
2016-02-28 00:44:20 -08:00
}
2013-12-15 16:05:37 -08:00
return res ;
}
2019-08-04 14:49:56 -07:00
maybe_t < wcstring > parse_util_detect_errors_in_argument_list ( const wcstring & arg_list_src ,
const wcstring & prefix ) {
// Helper to return a description of the first error.
auto get_error_text = [ & ] ( const parse_error_list_t & errors ) {
assert ( ! errors . empty ( ) & & " Expected an error " ) ;
return errors . at ( 0 ) . describe_with_prefix ( arg_list_src , prefix , false /* not interactive */ ,
false /* don't skip caret */ ) ;
} ;
// Parse the string as an argument list.
parse_error_list_t errors ;
parse_node_tree_t tree ;
if ( ! parse_tree_from_string ( arg_list_src , parse_flag_none , & tree , & errors ,
symbol_freestanding_argument_list ) ) {
// Failed to parse.
return get_error_text ( errors ) ;
}
// Get the root argument list and extract arguments from it.
// Test each of these.
assert ( ! tree . empty ( ) & & " Should have parsed a tree " ) ;
tnode_t < grammar : : freestanding_argument_list > arg_list ( & tree , & tree . at ( 0 ) ) ;
while ( auto arg = arg_list . next_in_list < grammar : : argument > ( ) ) {
const wcstring arg_src = arg . get_source ( arg_list_src ) ;
if ( parse_util_detect_errors_in_argument ( arg , arg_src , & errors ) ) {
return get_error_text ( errors ) ;
}
}
return none ( ) ;
}