2016-05-02 16:53:10 -07:00
// Various mostly unrelated utility functions related to parsing, loading and evaluating fish code.
//
// This library can be seen as a 'toolbox' for functions that are used in many places in fish and
// that are somehow related to parsing the code.
2016-05-18 22:30:21 +00:00
# include "config.h" // IWYU pragma: keep
2019-10-13 15:50:48 -07:00
# include "parse_util.h"
2016-05-02 16:53:10 -07:00
# include <stdarg.h>
# include <stdlib.h>
2017-02-10 18:47:02 -08:00
2019-10-13 15:50:48 -07:00
# include <cwchar>
2016-04-20 23:00:54 -07:00
# include <memory>
2016-05-02 16:53:10 -07:00
# include <string>
2017-02-10 18:47:02 -08:00
# include <type_traits>
2006-01-31 02:51:50 +10:00
2020-06-20 18:22:11 -07:00
# include "ast.h"
2016-05-02 16:53:10 -07:00
# include "builtin.h"
2006-01-31 02:51:50 +10:00
# include "common.h"
2006-02-08 19:20:05 +10:00
# include "expand.h"
2016-05-02 16:53:10 -07:00
# include "fallback.h" // IWYU pragma: keep
2018-05-05 19:11:57 -07:00
# include "future_feature_flags.h"
2016-04-20 23:00:54 -07:00
# include "parse_constants.h"
2019-10-13 16:06:16 -07:00
# include "parse_util.h"
2018-09-10 22:29:52 -07:00
# include "parser.h"
2016-05-02 16:53:10 -07:00
# include "tokenizer.h"
2019-09-19 10:32:07 -07:00
# include "wcstringutil.h"
2016-05-02 16:53:10 -07:00
# include "wildcard.h"
# include "wutil.h" // IWYU pragma: keep
2006-02-08 19:20:05 +10:00
2016-05-02 16:53:10 -07:00
/// Error message for use of backgrounded commands before and/or.
# define BOOL_AFTER_BACKGROUND_ERROR_MSG \
_ ( L " The '%ls' command can not be used immediately after a backgrounded job " )
2014-11-02 13:11:27 -08:00
2016-05-02 16:53:10 -07:00
/// Error message for backgrounded commands as conditionals.
# define BACKGROUND_IN_CONDITIONAL_ERROR_MSG \
_ ( L " Backgrounded commands can not be used as conditionals " )
2014-11-02 13:11:27 -08:00
2018-01-22 13:31:39 -08:00
/// Error message for arguments to 'end'
# define END_ARG_ERR_MSG _(L"'end' does not take arguments. Did you forget a ';'?")
2019-09-19 10:32:07 -07:00
/// Maximum length of a variable name to show in error reports before truncation
static constexpr int var_err_len = 16 ;
2016-05-02 16:53:10 -07:00
int parse_util_lineno ( const wchar_t * str , size_t offset ) {
if ( ! str ) return 0 ;
2012-11-18 11:23:22 +01:00
2012-11-18 16:30:30 -08:00
int res = 1 ;
2016-05-02 16:53:10 -07:00
for ( size_t i = 0 ; i < offset & & str [ i ] ! = L ' \0 ' ; i + + ) {
if ( str [ i ] = = L ' \n ' ) {
2012-11-18 16:30:30 -08:00
res + + ;
}
2012-11-18 11:23:22 +01:00
}
2012-11-18 16:30:30 -08:00
return res ;
2006-02-05 23:10:35 +10:00
}
2016-05-02 16:53:10 -07:00
int parse_util_get_line_from_offset ( const wcstring & str , size_t pos ) {
2012-02-06 00:57:43 -08:00
const wchar_t * buff = str . c_str ( ) ;
2012-11-18 16:30:30 -08:00
int count = 0 ;
2016-05-02 16:53:10 -07:00
for ( size_t i = 0 ; i < pos ; i + + ) {
if ( ! buff [ i ] ) {
2012-11-18 16:30:30 -08:00
return - 1 ;
}
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
if ( buff [ i ] = = L ' \n ' ) {
2012-11-18 16:30:30 -08:00
count + + ;
}
2012-11-18 11:23:22 +01:00
}
2012-11-18 16:30:30 -08:00
return count ;
2007-09-22 00:05:49 +10:00
}
2016-05-02 16:53:10 -07:00
size_t parse_util_get_offset_from_line ( const wcstring & str , int line ) {
2012-02-06 00:57:43 -08:00
const wchar_t * buff = str . c_str ( ) ;
2012-11-18 16:30:30 -08:00
size_t i ;
int count = 0 ;
2012-11-18 11:23:22 +01:00
2019-11-18 17:08:16 -08:00
if ( line < 0 ) return static_cast < size_t > ( - 1 ) ;
2016-05-02 16:53:10 -07:00
if ( line = = 0 ) return 0 ;
2012-11-18 16:30:30 -08:00
2016-05-02 16:53:10 -07:00
for ( i = 0 ; ; i + + ) {
2019-11-18 17:08:16 -08:00
if ( ! buff [ i ] ) return static_cast < size_t > ( - 1 ) ;
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
if ( buff [ i ] = = L ' \n ' ) {
2012-11-18 16:30:30 -08:00
count + + ;
2016-05-02 16:53:10 -07:00
if ( count = = line ) {
2020-08-29 11:07:12 +02:00
return i + 1 ;
2012-11-18 16:30:30 -08:00
}
}
2012-11-18 11:23:22 +01:00
}
2007-09-22 00:05:49 +10:00
}
2016-05-02 16:53:10 -07:00
size_t parse_util_get_offset ( const wcstring & str , int line , long line_offset ) {
2018-10-20 22:25:55 +02:00
size_t off = parse_util_get_offset_from_line ( str , line ) ;
size_t off2 = parse_util_get_offset_from_line ( str , line + 1 ) ;
2012-11-18 11:23:22 +01:00
2019-11-18 17:08:16 -08:00
if ( off = = static_cast < size_t > ( - 1 ) ) return static_cast < size_t > ( - 1 ) ;
if ( off2 = = static_cast < size_t > ( - 1 ) ) off2 = str . length ( ) + 1 ;
2016-10-20 18:53:31 -07:00
if ( line_offset < 0 ) line_offset = 0 ; //!OCLINT(parameter reassignment)
2012-11-18 11:23:22 +01:00
2019-11-18 17:08:16 -08:00
if ( static_cast < size_t > ( line_offset ) > = off2 - off - 1 ) {
2016-10-20 18:53:31 -07:00
line_offset = off2 - off - 1 ; //!OCLINT(parameter reassignment)
2012-11-18 16:30:30 -08:00
}
2012-11-18 11:23:22 +01:00
2016-10-09 14:36:08 -07:00
return off + line_offset ;
2007-09-22 00:05:49 +10:00
}
2016-05-02 16:53:10 -07:00
static int parse_util_locate_brackets_of_type ( const wchar_t * in , wchar_t * * begin , wchar_t * * end ,
bool allow_incomplete , wchar_t open_type ,
wchar_t close_type ) {
// open_type is typically ( or [, and close type is the corresponding value.
2012-11-18 16:30:30 -08:00
wchar_t * pos ;
2021-02-05 22:00:31 +01:00
bool escaped = false ;
2020-09-24 17:21:49 +02:00
bool syntax_error = false ;
2016-05-02 16:53:10 -07:00
int paran_count = 0 ;
2014-03-31 10:01:39 -07:00
2019-11-18 18:34:50 -08:00
wchar_t * paran_begin = nullptr , * paran_end = nullptr ;
2014-03-31 10:01:39 -07:00
2019-05-27 17:24:19 -07:00
assert ( in & & " null parameter " ) ;
2014-03-31 10:01:39 -07:00
2016-05-02 16:53:10 -07:00
for ( pos = const_cast < wchar_t * > ( in ) ; * pos ; pos + + ) {
2021-02-05 22:00:31 +01:00
if ( ! escaped ) {
2021-06-15 01:14:49 +02:00
if ( std : : wcschr ( L " ' \" " , * pos ) ) {
2012-11-18 16:30:30 -08:00
wchar_t * q_end = quote_end ( pos ) ;
2016-05-02 16:53:10 -07:00
if ( q_end & & * q_end ) {
pos = q_end ;
} else {
2012-11-18 16:30:30 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
} else {
if ( * pos = = open_type ) {
2019-11-18 18:34:50 -08:00
if ( ( paran_count = = 0 ) & & ( paran_begin = = nullptr ) ) {
2012-11-18 16:30:30 -08:00
paran_begin = pos ;
}
2014-03-31 10:01:39 -07:00
2012-11-18 16:30:30 -08:00
paran_count + + ;
2016-05-02 16:53:10 -07:00
} else if ( * pos = = close_type ) {
2012-11-18 16:30:30 -08:00
paran_count - - ;
2014-03-31 10:01:39 -07:00
2019-11-18 18:34:50 -08:00
if ( ( paran_count = = 0 ) & & ( paran_end = = nullptr ) ) {
2012-11-18 16:30:30 -08:00
paran_end = pos ;
break ;
}
2014-03-31 10:01:39 -07:00
2016-05-02 16:53:10 -07:00
if ( paran_count < 0 ) {
2020-09-24 17:21:49 +02:00
syntax_error = true ;
2012-11-18 16:30:30 -08:00
break ;
}
}
}
}
2021-02-05 22:00:31 +01:00
if ( * pos = = ' \\ ' ) {
escaped = ! escaped ;
} else {
escaped = false ;
}
2012-11-18 11:23:22 +01:00
}
2014-03-31 10:01:39 -07:00
2012-11-18 16:30:30 -08:00
syntax_error | = ( paran_count < 0 ) ;
2016-05-02 16:53:10 -07:00
syntax_error | = ( ( paran_count > 0 ) & & ( ! allow_incomplete ) ) ;
2014-03-31 10:01:39 -07:00
2016-05-02 16:53:10 -07:00
if ( syntax_error ) {
2012-11-18 16:30:30 -08:00
return - 1 ;
}
2014-03-31 10:01:39 -07:00
2019-11-18 18:34:50 -08:00
if ( paran_begin = = nullptr ) {
2012-11-18 16:30:30 -08:00
return 0 ;
}
2014-03-31 10:01:39 -07:00
2016-05-02 16:53:10 -07:00
if ( begin ) {
2012-11-18 16:30:30 -08:00
* begin = paran_begin ;
}
2014-03-31 10:01:39 -07:00
2016-05-02 16:53:10 -07:00
if ( end ) {
2019-11-18 17:08:16 -08:00
* end = paran_count ? const_cast < wchar_t * > ( in ) + std : : wcslen ( in ) : paran_end ;
2012-11-18 16:30:30 -08:00
}
2014-03-31 10:01:39 -07:00
2012-11-18 16:30:30 -08:00
return 1 ;
2006-01-31 02:51:50 +10:00
}
2016-05-02 16:53:10 -07:00
int parse_util_locate_slice ( const wchar_t * in , wchar_t * * begin , wchar_t * * end ,
bool accept_incomplete ) {
2014-02-03 14:13:42 -08:00
return parse_util_locate_brackets_of_type ( in , begin , end , accept_incomplete , L ' [ ' , L ' ] ' ) ;
}
2016-05-02 16:53:10 -07:00
static int parse_util_locate_brackets_range ( const wcstring & str , size_t * inout_cursor_offset ,
wcstring * out_contents , size_t * out_start ,
size_t * out_end , bool accept_incomplete ,
wchar_t open_type , wchar_t close_type ) {
// Clear the return values.
2019-04-11 14:28:27 -07:00
if ( out_contents ! = nullptr ) out_contents - > clear ( ) ;
2013-10-08 18:41:35 -07:00
* out_start = 0 ;
* out_end = str . size ( ) ;
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// Nothing to do if the offset is at or past the end of the string.
if ( * inout_cursor_offset > = str . size ( ) ) return 0 ;
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// Defer to the wonky version.
const wchar_t * const buff = str . c_str ( ) ;
const wchar_t * const valid_range_start = buff + * inout_cursor_offset ,
* valid_range_end = buff + str . size ( ) ;
2019-11-18 18:34:50 -08:00
wchar_t * bracket_range_begin = nullptr , * bracket_range_end = nullptr ;
2016-05-02 16:53:10 -07:00
int ret = parse_util_locate_brackets_of_type ( valid_range_start , & bracket_range_begin ,
& bracket_range_end , accept_incomplete , open_type ,
close_type ) ;
2016-10-30 21:05:27 -07:00
if ( ret < = 0 ) {
return ret ;
2013-10-08 18:41:35 -07:00
}
2016-10-30 21:05:27 -07:00
// The command substitutions must not be NULL and must be in the valid pointer range, and
// the end must be bigger than the beginning.
2019-11-18 18:34:50 -08:00
assert ( bracket_range_begin ! = nullptr & & bracket_range_begin > = valid_range_start & &
2016-12-03 20:12:53 -08:00
bracket_range_begin < = valid_range_end ) ;
2019-11-18 18:34:50 -08:00
assert ( bracket_range_end ! = nullptr & & bracket_range_end > bracket_range_begin & &
2016-12-03 20:12:53 -08:00
bracket_range_end > = valid_range_start & & bracket_range_end < = valid_range_end ) ;
2016-10-30 21:05:27 -07:00
// Assign the substring to the out_contents.
const wchar_t * interior_begin = bracket_range_begin + 1 ;
2019-04-11 14:28:27 -07:00
if ( out_contents ! = nullptr ) {
out_contents - > assign ( interior_begin , bracket_range_end - interior_begin ) ;
}
2016-10-30 21:05:27 -07:00
// Return the start and end.
* out_start = bracket_range_begin - buff ;
* out_end = bracket_range_end - buff ;
// Update the inout_cursor_offset. Note this may cause it to exceed str.size(), though
// overflow is not likely.
* inout_cursor_offset = 1 + * out_end ;
2013-10-08 18:41:35 -07:00
return ret ;
}
2016-05-02 16:53:10 -07:00
int parse_util_locate_cmdsubst_range ( const wcstring & str , size_t * inout_cursor_offset ,
wcstring * out_contents , size_t * out_start , size_t * out_end ,
bool accept_incomplete ) {
return parse_util_locate_brackets_range ( str , inout_cursor_offset , out_contents , out_start ,
out_end , accept_incomplete , L ' ( ' , L ' ) ' ) ;
2014-02-03 14:13:42 -08:00
}
2016-05-02 16:53:10 -07:00
void parse_util_cmdsubst_extent ( const wchar_t * buff , size_t cursor_pos , const wchar_t * * a ,
const wchar_t * * b ) {
2019-05-27 17:24:19 -07:00
assert ( buff & & " Null buffer " ) ;
2016-05-02 16:53:10 -07:00
const wchar_t * const cursor = buff + cursor_pos ;
2012-11-18 11:23:22 +01:00
2019-03-12 14:06:01 -07:00
const size_t bufflen = std : : wcslen ( buff ) ;
2013-07-17 01:35:30 -07:00
assert ( cursor_pos < = bufflen ) ;
2013-07-22 18:26:15 -07:00
2019-11-25 20:03:25 +09:00
// ap and bp are the beginning and end of the tightest command substitution found so far.
2013-07-17 01:35:30 -07:00
const wchar_t * ap = buff , * bp = buff + bufflen ;
const wchar_t * pos = buff ;
2016-05-02 16:53:10 -07:00
for ( ; ; ) {
2019-11-18 18:34:50 -08:00
wchar_t * begin = nullptr , * end = nullptr ;
2021-02-09 17:36:39 +01:00
if ( parse_util_locate_brackets_of_type ( pos , & begin , & end , true , L ' ( ' , L ' ) ' ) < = 0 ) {
2016-05-02 16:53:10 -07:00
// No subshell found, all done.
2012-11-18 16:30:30 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
// Interpret NULL to mean the end.
2019-11-18 18:34:50 -08:00
if ( end = = nullptr ) {
2013-07-17 01:35:30 -07:00
end = const_cast < wchar_t * > ( buff ) + bufflen ;
2012-11-18 16:30:30 -08:00
}
2013-07-22 18:26:15 -07:00
2016-05-02 16:53:10 -07:00
if ( begin < cursor & & end > = cursor ) {
// This command substitution surrounds the cursor, so it's a tighter fit.
2012-11-18 16:30:30 -08:00
begin + + ;
2013-07-17 01:35:30 -07:00
ap = begin ;
bp = end ;
2016-05-02 16:53:10 -07:00
// pos is where to begin looking for the next one. But if we reached the end there's no
// next one.
if ( begin > = end ) break ;
2013-07-17 01:35:30 -07:00
pos = begin + 1 ;
2016-05-02 16:53:10 -07:00
} else if ( begin > = cursor ) {
// This command substitution starts at or after the cursor. Since it was the first
// command substitution in the string, we're done.
2012-11-18 16:30:30 -08:00
break ;
2016-05-02 16:53:10 -07:00
} else {
// This command substitution ends before the cursor. Skip it.
2013-07-17 01:35:30 -07:00
assert ( end < cursor ) ;
pos = end + 1 ;
assert ( pos < = buff + bufflen ) ;
}
2012-11-18 16:30:30 -08:00
}
2013-07-22 18:26:15 -07:00
2019-11-18 18:34:50 -08:00
if ( a ! = nullptr ) * a = ap ;
if ( b ! = nullptr ) * b = bp ;
2006-01-31 02:51:50 +10:00
}
2016-05-02 16:53:10 -07:00
/// Get the beginning and end of the job or process definition under the cursor.
2019-10-29 13:32:26 +01:00
static void job_or_process_extent ( bool process , const wchar_t * buff , size_t cursor_pos ,
const wchar_t * * a , const wchar_t * * b ,
std : : vector < tok_t > * tokens ) {
2019-05-27 17:24:19 -07:00
assert ( buff & & " Null buffer " ) ;
2019-10-18 15:24:28 -07:00
const wchar_t * begin = nullptr , * end = nullptr ;
2016-05-02 16:53:10 -07:00
int finished = 0 ;
2012-11-18 11:23:22 +01:00
2019-10-18 15:24:28 -07:00
if ( a ) * a = nullptr ;
if ( b ) * b = nullptr ;
2012-11-18 16:30:30 -08:00
parse_util_cmdsubst_extent ( buff , cursor_pos , & begin , & end ) ;
2016-05-02 16:53:10 -07:00
if ( ! end | | ! begin ) {
2012-11-18 16:30:30 -08:00
return ;
}
2012-11-18 11:23:22 +01:00
2020-04-08 16:56:59 -07:00
assert ( cursor_pos > = static_cast < size_t > ( begin - buff ) ) ;
2015-07-26 00:12:36 -07:00
const size_t pos = cursor_pos - ( begin - buff ) ;
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
if ( a ) * a = begin ;
if ( b ) * b = end ;
2012-11-18 11:23:22 +01:00
2019-10-18 15:24:28 -07:00
const wcstring buffcpy ( begin , end ) ;
tokenizer_t tok ( buffcpy . c_str ( ) , TOK_ACCEPT_UNFINISHED ) ;
maybe_t < tok_t > token { } ;
while ( ( token = tok . next ( ) ) & & ! finished ) {
size_t tok_begin = token - > offset ;
2012-11-18 16:30:30 -08:00
2019-10-18 15:24:28 -07:00
switch ( token - > type ) {
case token_type_t : : pipe : {
if ( ! process ) {
2012-11-19 00:31:03 -08:00
break ;
}
2019-10-18 15:24:28 -07:00
}
/* FALLTHROUGH */
case token_type_t : : end :
case token_type_t : : background :
case token_type_t : : andand :
2019-10-29 13:32:26 +01:00
case token_type_t : : oror :
case token_type_t : : comment : {
2019-10-18 15:24:28 -07:00
if ( tok_begin > = pos ) {
finished = 1 ;
2019-11-18 17:08:16 -08:00
if ( b ) * b = const_cast < wchar_t * > ( begin ) + tok_begin ;
2019-10-18 15:24:28 -07:00
} else {
2019-10-29 13:32:26 +01:00
// Statement at cursor might start after this token.
2019-11-18 17:08:16 -08:00
if ( a ) * a = const_cast < wchar_t * > ( begin ) + tok_begin + token - > length ;
2019-10-29 13:32:26 +01:00
if ( tokens ) tokens - > clear ( ) ;
2012-11-18 16:30:30 -08:00
}
2019-10-29 13:32:26 +01:00
continue ; // Do not add this to tokens
2019-10-18 15:24:28 -07:00
}
default : {
break ;
2019-05-05 12:09:25 +02:00
}
2012-11-18 11:23:22 +01:00
}
2019-10-29 13:32:26 +01:00
if ( tokens ) tokens - > push_back ( * token ) ;
2019-10-18 15:24:28 -07:00
}
2006-01-31 02:51:50 +10:00
}
2016-05-02 16:53:10 -07:00
void parse_util_process_extent ( const wchar_t * buff , size_t pos , const wchar_t * * a ,
2019-10-29 13:32:26 +01:00
const wchar_t * * b , std : : vector < tok_t > * tokens ) {
job_or_process_extent ( true , buff , pos , a , b , tokens ) ;
2006-01-31 02:51:50 +10:00
}
2016-05-02 16:53:10 -07:00
void parse_util_job_extent ( const wchar_t * buff , size_t pos , const wchar_t * * a , const wchar_t * * b ) {
2019-10-29 13:32:26 +01:00
job_or_process_extent ( false , buff , pos , a , b , nullptr ) ;
2006-01-31 02:51:50 +10:00
}
2016-05-02 16:53:10 -07:00
void parse_util_token_extent ( const wchar_t * buff , size_t cursor_pos , const wchar_t * * tok_begin ,
const wchar_t * * tok_end , const wchar_t * * prev_begin ,
const wchar_t * * prev_end ) {
2019-05-27 17:24:19 -07:00
assert ( buff & & " Null buffer " ) ;
2019-11-18 18:34:50 -08:00
const wchar_t * a = nullptr , * b = nullptr , * pa = nullptr , * pb = nullptr ;
2012-11-18 11:23:22 +01:00
2013-09-21 16:38:57 -07:00
const wchar_t * cmdsubst_begin , * cmdsubst_end ;
parse_util_cmdsubst_extent ( buff , cursor_pos , & cmdsubst_begin , & cmdsubst_end ) ;
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
if ( ! cmdsubst_end | | ! cmdsubst_begin ) {
2012-11-18 16:30:30 -08:00
return ;
}
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
// pos is equivalent to cursor_pos within the range of the command substitution {begin, end}.
2016-10-09 14:36:08 -07:00
size_t offset_within_cmdsubst = cursor_pos - ( cmdsubst_begin - buff ) ;
2012-11-18 11:23:22 +01:00
2019-03-12 14:06:01 -07:00
size_t bufflen = std : : wcslen ( buff ) ;
2018-10-20 22:25:55 +02:00
2013-09-21 16:38:57 -07:00
a = cmdsubst_begin + offset_within_cmdsubst ;
2012-11-18 16:30:30 -08:00
b = a ;
2013-09-21 16:38:57 -07:00
pa = cmdsubst_begin + offset_within_cmdsubst ;
2012-11-18 16:30:30 -08:00
pb = pa ;
2012-11-18 11:23:22 +01:00
2013-09-21 16:38:57 -07:00
assert ( cmdsubst_begin > = buff ) ;
2018-10-20 22:25:55 +02:00
assert ( cmdsubst_begin < = ( buff + bufflen ) ) ;
2013-09-21 16:38:57 -07:00
assert ( cmdsubst_end > = cmdsubst_begin ) ;
2018-10-20 22:25:55 +02:00
assert ( cmdsubst_end < = ( buff + bufflen ) ) ;
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
const wcstring buffcpy = wcstring ( cmdsubst_begin , cmdsubst_end - cmdsubst_begin ) ;
2012-11-18 11:23:22 +01:00
2018-02-23 17:28:12 -08:00
tokenizer_t tok ( buffcpy . c_str ( ) , TOK_ACCEPT_UNFINISHED ) ;
2019-10-13 16:06:16 -07:00
while ( maybe_t < tok_t > token = tok . next ( ) ) {
size_t tok_begin = token - > offset ;
2012-11-18 16:30:30 -08:00
size_t tok_end = tok_begin ;
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
// Calculate end of token.
2019-10-13 16:06:16 -07:00
if ( token - > type = = token_type_t : : string ) {
tok_end + = token - > length ;
2012-11-18 16:30:30 -08:00
}
2012-11-18 11:23:22 +01:00
2016-05-02 16:53:10 -07:00
// Cursor was before beginning of this token, means that the cursor is between two tokens,
// so we set it to a zero element string and break.
if ( tok_begin > offset_within_cmdsubst ) {
2013-09-21 16:38:57 -07:00
a = b = cmdsubst_begin + offset_within_cmdsubst ;
2012-11-18 16:30:30 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
// If cursor is inside the token, this is the token we are looking for. If so, set a and b
// and break.
2019-10-13 16:06:16 -07:00
if ( token - > type = = token_type_t : : string & & tok_end > = offset_within_cmdsubst ) {
a = cmdsubst_begin + token - > offset ;
b = a + token - > length ;
2012-11-18 16:30:30 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
// Remember previous string token.
2019-10-13 16:06:16 -07:00
if ( token - > type = = token_type_t : : string ) {
pa = cmdsubst_begin + token - > offset ;
pb = pa + token - > length ;
2012-11-18 16:30:30 -08:00
}
2012-11-18 11:23:22 +01:00
}
2016-05-02 16:53:10 -07:00
if ( tok_begin ) * tok_begin = a ;
if ( tok_end ) * tok_end = b ;
if ( prev_begin ) * prev_begin = pa ;
if ( prev_end ) * prev_end = pb ;
2012-11-18 11:23:22 +01:00
2012-11-18 16:30:30 -08:00
assert ( pa > = buff ) ;
2018-10-20 22:25:55 +02:00
assert ( pa < = ( buff + bufflen ) ) ;
2012-11-18 16:30:30 -08:00
assert ( pb > = pa ) ;
2018-10-20 22:25:55 +02:00
assert ( pb < = ( buff + bufflen ) ) ;
2006-01-31 02:51:50 +10:00
}
2016-05-02 16:53:10 -07:00
wcstring parse_util_unescape_wildcards ( const wcstring & str ) {
2015-08-19 11:35:24 -07:00
wcstring result ;
result . reserve ( str . size ( ) ) ;
2018-05-05 19:44:57 -07:00
bool unesc_qmark = ! feature_test ( features_t : : qmark_noglob ) ;
2016-05-02 16:53:10 -07:00
const wchar_t * const cs = str . c_str ( ) ;
for ( size_t i = 0 ; cs [ i ] ! = L ' \0 ' ; i + + ) {
if ( cs [ i ] = = L ' * ' ) {
2015-08-19 11:35:24 -07:00
result . push_back ( ANY_STRING ) ;
2018-05-05 19:11:57 -07:00
} else if ( cs [ i ] = = L ' ? ' & & unesc_qmark ) {
result . push_back ( ANY_CHAR ) ;
2018-03-31 16:48:57 -07:00
} else if ( cs [ i ] = = L ' \\ ' & & cs [ i + 1 ] = = L ' * ' ) {
2016-05-02 16:53:10 -07:00
result . push_back ( cs [ i + 1 ] ) ;
2015-08-19 11:35:24 -07:00
i + = 1 ;
2018-05-05 19:11:57 -07:00
} else if ( cs [ i ] = = L ' \\ ' & & cs [ i + 1 ] = = L ' ? ' & & unesc_qmark ) {
result . push_back ( cs [ i + 1 ] ) ;
i + = 1 ;
2016-05-02 16:53:10 -07:00
} else if ( cs [ i ] = = L ' \\ ' & & cs [ i + 1 ] = = L ' \\ ' ) {
// Not a wildcard, but ensure the next iteration doesn't see this escaped backslash.
2015-08-19 11:35:24 -07:00
result . append ( L " \\ \\ " ) ;
i + = 1 ;
2016-05-02 16:53:10 -07:00
} else {
2015-08-19 11:35:24 -07:00
result . push_back ( cs [ i ] ) ;
2012-11-18 16:30:30 -08:00
}
}
2015-08-19 11:35:24 -07:00
return result ;
2006-02-19 11:14:32 +10:00
}
2006-02-15 05:56:36 +10:00
2016-05-02 16:53:10 -07:00
/// Find the outermost quoting style of current token. Returns 0 if token is not quoted.
static wchar_t get_quote ( const wcstring & cmd_str , size_t len ) {
size_t i = 0 ;
wchar_t res = 0 ;
const wchar_t * const cmd = cmd_str . c_str ( ) ;
2012-11-18 11:23:22 +01:00
2019-11-25 16:36:13 -08:00
while ( true ) {
2016-05-02 16:53:10 -07:00
if ( ! cmd [ i ] ) break ;
2012-11-18 16:30:30 -08:00
2016-05-02 16:53:10 -07:00
if ( cmd [ i ] = = L ' \\ ' ) {
2012-11-18 16:30:30 -08:00
i + + ;
2016-05-02 16:53:10 -07:00
if ( ! cmd [ i ] ) break ;
2012-11-18 16:30:30 -08:00
i + + ;
2016-05-02 16:53:10 -07:00
} else {
if ( cmd [ i ] = = L ' \' ' | | cmd [ i ] = = L ' \" ' ) {
2012-11-18 16:30:30 -08:00
const wchar_t * end = quote_end ( & cmd [ i ] ) ;
2019-03-12 14:06:01 -07:00
// std::fwprintf( stderr, L"Jump %d\n", end-cmd );
2019-11-18 18:34:50 -08:00
if ( ( end = = nullptr ) | | ( ! * end ) | | ( end > cmd + len ) ) {
2012-11-18 16:30:30 -08:00
res = cmd [ i ] ;
break ;
}
2016-05-02 16:53:10 -07:00
i = end - cmd + 1 ;
} else
2012-11-18 16:30:30 -08:00
i + + ;
2012-11-18 11:23:22 +01:00
}
}
2012-11-18 16:30:30 -08:00
return res ;
2012-07-06 14:34:53 -07:00
}
2016-05-02 16:53:10 -07:00
void parse_util_get_parameter_info ( const wcstring & cmd , const size_t pos , wchar_t * quote ,
2019-10-13 16:06:16 -07:00
size_t * offset , token_type_t * out_type ) {
2016-05-02 16:53:10 -07:00
size_t prev_pos = 0 ;
2016-10-24 17:13:39 -07:00
wchar_t last_quote = L ' \0 ' ;
2012-11-18 11:23:22 +01:00
2018-02-23 17:28:12 -08:00
tokenizer_t tok ( cmd . c_str ( ) , TOK_ACCEPT_UNFINISHED ) ;
2019-10-13 16:06:16 -07:00
while ( auto token = tok . next ( ) ) {
if ( token - > offset > pos ) break ;
2012-07-06 14:34:53 -07:00
2019-10-13 16:06:16 -07:00
if ( token - > type = = token_type_t : : string )
last_quote = get_quote ( tok . text_of ( * token ) , pos - token - > offset ) ;
2012-07-06 14:34:53 -07:00
2019-11-18 18:34:50 -08:00
if ( out_type ! = nullptr ) * out_type = token - > type ;
2012-07-06 14:34:53 -07:00
2019-10-13 16:06:16 -07:00
prev_pos = token - > offset ;
2012-11-18 16:30:30 -08:00
}
2012-07-06 14:34:53 -07:00
wchar_t * cmd_tmp = wcsdup ( cmd . c_str ( ) ) ;
2016-05-02 16:53:10 -07:00
cmd_tmp [ pos ] = 0 ;
2018-10-20 22:25:55 +02:00
size_t cmdlen = pos ;
2016-10-24 17:13:39 -07:00
bool finished = cmdlen ! = 0 ;
2016-10-22 11:21:13 -07:00
if ( finished ) {
2019-11-18 18:34:50 -08:00
finished = ( quote = = nullptr ) ;
2019-03-12 14:06:01 -07:00
if ( finished & & std : : wcschr ( L " \t \n \r " , cmd_tmp [ cmdlen - 1 ] ) ) {
2016-10-24 17:13:39 -07:00
finished = cmdlen > 1 & & cmd_tmp [ cmdlen - 2 ] = = L ' \\ ' ;
2012-11-18 11:23:22 +01:00
}
}
2016-05-02 16:53:10 -07:00
if ( quote ) * quote = last_quote ;
2012-11-18 11:23:22 +01:00
2019-11-18 18:34:50 -08:00
if ( offset ! = nullptr ) {
2016-10-22 11:21:13 -07:00
if ( finished ) {
2019-11-18 18:34:50 -08:00
while ( ( cmd_tmp [ prev_pos ] ! = 0 ) & & ( std : : wcschr ( L " ;| " , cmd_tmp [ prev_pos ] ) ! = nullptr ) )
2019-05-05 12:09:25 +02:00
prev_pos + + ;
2012-11-18 16:30:30 -08:00
* offset = prev_pos ;
2016-05-02 16:53:10 -07:00
} else {
2012-11-18 16:30:30 -08:00
* offset = pos ;
}
2012-11-18 11:23:22 +01:00
}
2016-10-22 11:21:13 -07:00
2012-07-06 14:34:53 -07:00
free ( cmd_tmp ) ;
}
2018-02-17 14:36:43 -08:00
wcstring parse_util_escape_string_with_quote ( const wcstring & cmd , wchar_t quote , bool no_tilde ) {
2012-07-06 14:34:53 -07:00
wcstring result ;
2016-05-02 16:53:10 -07:00
if ( quote = = L ' \0 ' ) {
2019-09-19 14:32:40 +08:00
escape_flags_t flags = ESCAPE_ALL | ESCAPE_NO_QUOTED | ( no_tilde ? ESCAPE_NO_TILDE : 0 ) ;
2018-02-17 14:36:43 -08:00
result = escape_string ( cmd , flags ) ;
2016-05-02 16:53:10 -07:00
} else {
2018-02-17 15:04:31 -08:00
// Here we are going to escape a string with quotes.
// A few characters cannot be represented inside quotes, e.g. newlines. In that case,
// terminate the quote and then re-enter it.
result . reserve ( cmd . size ( ) ) ;
for ( wchar_t c : cmd ) {
2016-05-02 16:53:10 -07:00
switch ( c ) {
2012-11-19 00:31:03 -08:00
case L ' \n ' :
2018-02-17 15:04:31 -08:00
result . append ( { quote , L ' \\ ' , L ' n ' , quote } ) ;
break ;
2012-11-19 00:31:03 -08:00
case L ' \t ' :
2018-02-17 15:04:31 -08:00
result . append ( { quote , L ' \\ ' , L ' t ' , quote } ) ;
break ;
2012-11-19 00:31:03 -08:00
case L ' \b ' :
2018-02-17 15:04:31 -08:00
result . append ( { quote , L ' \\ ' , L ' b ' , quote } ) ;
2012-11-19 00:31:03 -08:00
break ;
2018-02-17 15:04:31 -08:00
case L ' \r ' :
result . append ( { quote , L ' \\ ' , L ' r ' , quote } ) ;
break ;
case L ' \\ ' :
result . append ( { L ' \\ ' , L ' \\ ' } ) ;
break ;
2019-08-24 10:31:36 +02:00
case L ' $ ' :
if ( quote = = L ' " ' ) result . push_back ( L ' \\ ' ) ;
result . push_back ( L ' $ ' ) ;
break ;
2018-02-17 15:04:31 -08:00
default :
2016-05-02 16:53:10 -07:00
if ( c = = quote ) result . push_back ( L ' \\ ' ) ;
2012-11-19 00:31:03 -08:00
result . push_back ( c ) ;
break ;
2012-07-06 14:34:53 -07:00
}
}
}
return result ;
}
2013-12-08 13:41:12 -08:00
2016-05-02 16:53:10 -07:00
std : : vector < int > parse_util_compute_indents ( const wcstring & src ) {
// Make a vector the same size as the input string, which contains the indents. Initialize them
2020-06-20 18:22:11 -07:00
// to 0.
2013-12-08 13:41:12 -08:00
const size_t src_size = src . size ( ) ;
2020-06-20 18:22:11 -07:00
std : : vector < int > indents ( src_size , 0 ) ;
2014-01-15 01:40:40 -08:00
2020-03-03 01:24:05 -08:00
// Simple trick: if our source does not contain a newline, then all indents are 0.
if ( src . find ( ' \n ' ) = = wcstring : : npos ) {
return indents ;
}
2016-05-02 16:53:10 -07:00
// Parse the string. We pass continue_after_error to produce a forest; the trailing indent of
// the last node we visited becomes the input indent of the next. I.e. in the case of 'switch
// foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it
// were a case item list.
2020-06-20 18:22:11 -07:00
using namespace ast ;
auto ast =
ast_t : : parse ( src , parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated ) ;
// Visit all of our nodes. When we get a job_list or case_item_list, increment indent while
// visiting its children.
struct indent_visitor_t {
2020-08-09 12:15:14 -07:00
indent_visitor_t ( const wcstring & src , std : : vector < int > & indents )
: src ( src ) , indents ( indents ) { }
2020-06-20 18:22:11 -07:00
void visit ( const node_t & node ) {
int inc = 0 ;
int dec = 0 ;
switch ( node . type ) {
case type_t : : job_list :
case type_t : : andor_job_list :
// Job lists are never unwound.
inc = 1 ;
dec = 1 ;
break ;
// Increment indents for conditions in headers (#1665).
case type_t : : job_conjunction :
if ( node . parent - > type = = type_t : : while_header | |
node . parent - > type = = type_t : : if_clause ) {
inc = 1 ;
dec = 1 ;
}
break ;
2014-01-15 01:40:40 -08:00
2020-08-09 12:15:14 -07:00
// Increment indents for job_continuation_t if it contains a newline.
// This is a bit of a hack - it indents cases like:
// cmd1 |
// ....cmd2
// but avoids "double indenting" if there's no newline:
// cmd1 | while cmd2
// ....cmd3
// end
// See #7252.
case type_t : : job_continuation :
if ( has_newline ( node . as < job_continuation_t > ( ) - > newlines ) ) {
inc = 1 ;
dec = 1 ;
}
break ;
// Likewise for && and ||.
case type_t : : job_conjunction_continuation :
if ( has_newline ( node . as < job_conjunction_continuation_t > ( ) - > newlines ) ) {
2020-06-20 18:22:11 -07:00
inc = 1 ;
dec = 1 ;
}
break ;
case type_t : : case_item_list :
// Here's a hack. Consider:
// switch abc
// cas
//
// fish will see that 'cas' is not valid inside a switch statement because it is
// not "case". It will then unwind back to the top level job list, producing a
// parse tree like:
//
// job_list
// switch_job
// <err>
// normal_job
// cas
//
// And so we will think that the 'cas' job is at the same level as the switch.
// To address this, if we see that the switch statement was not closed, do not
// decrement the indent afterwards.
inc = 1 ;
dec = node . parent - > as < switch_statement_t > ( ) - > end . unsourced ? 0 : 1 ;
break ;
2021-02-08 04:42:24 +01:00
case type_t : : token_base : {
auto tok = node . as < token_base_t > ( ) ;
if ( node . parent - > type = = type_t : : begin_header & &
tok - > type = = parse_token_type_t : : end ) {
// The newline after "begin" is optional, so it is part of the header.
// The header is not in the indented block, so indent the newline here.
if ( node . source ( src ) = = L " \n " ) {
inc = 1 ;
dec = 1 ;
}
}
break ;
}
2020-06-20 18:22:11 -07:00
default :
break ;
2014-09-29 11:29:50 -07:00
}
2021-02-08 04:42:24 +01:00
auto range = node . source_range ( ) ;
if ( range . length > 0 & & node . category = = category_t : : leaf ) {
2021-02-08 05:01:17 +01:00
record_line_continuations_until ( range . start ) ;
2021-02-08 04:42:24 +01:00
std : : fill ( indents . begin ( ) + last_leaf_end , indents . begin ( ) + range . start ,
last_indent ) ;
}
2020-06-20 18:22:11 -07:00
indent + = inc ;
2014-09-29 11:29:50 -07:00
2020-06-20 18:22:11 -07:00
// If we increased the indentation, apply it to the remainder of the string, even if the
// list is empty. For example (where _ represents the cursor):
//
// if foo
// _
//
// we want to indent the newline.
if ( inc ) {
last_indent = indent ;
}
// If this is a leaf node, apply the current indentation.
2021-02-13 07:48:19 +01:00
if ( node . category = = category_t : : leaf & & range . length > 0 ) {
std : : fill ( indents . begin ( ) + range . start , indents . begin ( ) + range . end ( ) , indent ) ;
last_leaf_end = range . start + range . length ;
last_indent = indent ;
2013-12-08 13:41:12 -08:00
}
2020-06-20 18:22:11 -07:00
node_visitor ( * this ) . accept_children_of ( & node ) ;
indent - = dec ;
2013-12-08 13:41:12 -08:00
}
2020-08-09 12:15:14 -07:00
/// \return whether a maybe_newlines node contains at least one newline.
bool has_newline ( const maybe_newlines_t & nls ) const {
return nls . source ( src ) . find ( L ' \n ' ) ! = wcstring : : npos ;
}
2021-02-08 05:01:17 +01:00
void record_line_continuations_until ( size_t offset ) {
wcstring gap_text = src . substr ( last_leaf_end , offset - last_leaf_end ) ;
size_t escaped_nl = gap_text . find ( L " \\ \n " ) ;
if ( escaped_nl = = wcstring : : npos ) return ;
2021-02-16 18:16:05 +01:00
auto line_end = gap_text . begin ( ) + escaped_nl ;
if ( std : : find ( gap_text . begin ( ) , line_end , L ' # ' ) ! = line_end ) return ;
2021-02-08 05:01:17 +01:00
auto end = src . begin ( ) + offset ;
auto newline = src . begin ( ) + last_leaf_end + escaped_nl + 1 ;
// The gap text might contain multiple newlines if there are multiple lines that
// don't contain an AST node, for example, comment lines, or lines containing only
// the escaped newline.
do {
line_continuations . push_back ( newline - src . begin ( ) ) ;
newline = std : : find ( newline + 1 , end , L ' \n ' ) ;
} while ( newline ! = end ) ;
}
2020-06-20 18:22:11 -07:00
// The one-past-the-last index of the most recently encountered leaf node.
// We use this to populate the indents even if there's no tokens in the range.
size_t last_leaf_end { 0 } ;
// The last indent which we assigned.
int last_indent { - 1 } ;
2020-08-09 12:15:14 -07:00
// The source we are indenting.
const wcstring & src ;
2020-06-20 18:22:11 -07:00
// List of indents, which we populate.
std : : vector < int > & indents ;
2014-01-15 01:40:40 -08:00
2020-06-20 18:22:11 -07:00
// Initialize our starting indent to -1, as our top-level node is a job list which
// will immediately increment it.
int indent { - 1 } ;
2021-02-08 05:01:17 +01:00
// List of locations of escaped newline characters.
std : : vector < size_t > line_continuations ;
2020-06-20 18:22:11 -07:00
} ;
2020-08-09 12:15:14 -07:00
indent_visitor_t iv ( src , indents ) ;
2020-06-20 18:22:11 -07:00
node_visitor ( iv ) . accept ( ast . top ( ) ) ;
2021-02-08 05:01:17 +01:00
iv . record_line_continuations_until ( indents . size ( ) ) ;
2021-02-08 04:42:24 +01:00
std : : fill ( indents . begin ( ) + iv . last_leaf_end , indents . end ( ) , iv . last_indent ) ;
2020-06-20 18:22:11 -07:00
// All newlines now get the *next* indent.
// For example, in this code:
// if true
// stuff
// the newline "belongs" to the if statement as it ends its job.
// But when rendered, it visually belongs to the job list.
size_t idx = src_size ;
int next_indent = iv . last_indent ;
while ( idx - - ) {
if ( src . at ( idx ) = = L ' \n ' ) {
2021-02-08 04:37:50 +01:00
bool empty_middle_line = idx + 1 < src_size & & src . at ( idx + 1 ) = = L ' \n ' ;
if ( ! empty_middle_line ) {
indents . at ( idx ) = next_indent ;
}
2020-06-20 18:22:11 -07:00
} else {
next_indent = indents . at ( idx ) ;
}
}
2021-02-08 05:01:17 +01:00
// Add an extra level of indentation to continuation lines.
for ( size_t idx : iv . line_continuations ) {
do {
indents . at ( idx ) + + ;
} while ( + + idx < src_size & & src . at ( idx ) ! = L ' \n ' ) ;
}
2013-12-08 13:41:12 -08:00
return indents ;
}
2013-12-15 16:05:37 -08:00
2016-05-02 16:53:10 -07:00
/// Append a syntax error to the given error list.
static bool append_syntax_error ( parse_error_list_t * errors , size_t source_location ,
const wchar_t * fmt , . . . ) {
2020-07-12 13:55:51 -07:00
if ( ! errors ) return true ;
2015-04-29 16:53:02 -07:00
parse_error_t error ;
error . source_start = source_location ;
error . source_length = 0 ;
error . code = parse_error_syntax ;
2016-05-02 16:53:10 -07:00
2015-04-29 16:53:02 -07:00
va_list va ;
va_start ( va , fmt ) ;
error . text = vformat_string ( fmt , va ) ;
va_end ( va ) ;
2016-05-02 16:53:10 -07:00
2020-07-12 13:55:51 -07:00
errors - > push_back ( std : : move ( error ) ) ;
2015-04-29 16:53:02 -07:00
return true ;
}
2016-05-02 16:53:10 -07:00
/// Returns 1 if the specified command is a builtin that may not be used in a pipeline.
2018-09-29 00:58:44 -04:00
static const wchar_t * const forbidden_pipe_commands [ ] = { L " exec " , L " case " , L " break " , L " return " ,
L " continue " } ;
2016-05-02 16:53:10 -07:00
static int parser_is_pipe_forbidden ( const wcstring & word ) {
2017-04-04 21:28:57 -07:00
return contains ( forbidden_pipe_commands , word ) ;
2013-12-15 16:05:37 -08:00
}
2021-02-14 13:15:29 -08:00
bool parse_util_argument_is_help ( const wcstring & s ) { return s = = L " -h " | | s = = L " --help " ; }
2014-03-18 14:42:38 -07:00
2020-07-01 21:06:58 -07:00
// \return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if
// there are no arguments.
2020-09-08 22:04:44 +02:00
static const ast : : argument_t * get_first_arg ( const ast : : argument_or_redirection_list_t & list ) {
2020-07-01 21:06:58 -07:00
for ( const ast : : argument_or_redirection_t & v : list ) {
if ( v . is_argument ( ) ) return & v . argument ( ) ;
2013-12-15 16:05:37 -08:00
}
2020-07-01 21:06:58 -07:00
return nullptr ;
2013-12-15 16:05:37 -08:00
}
2016-05-02 16:53:10 -07:00
/// Given a wide character immediately after a dollar sign, return the appropriate error message.
/// For example, if wc is @, then the variable name was $@ and we suggest $argv.
static const wchar_t * error_format_for_character ( wchar_t wc ) {
switch ( wc ) {
2016-05-03 16:23:30 -07:00
case L ' ? ' : {
2016-05-02 16:53:10 -07:00
return ERROR_NOT_STATUS ;
2016-05-03 16:23:30 -07:00
}
case L ' # ' : {
2016-05-02 16:53:10 -07:00
return ERROR_NOT_ARGV_COUNT ;
2016-05-03 16:23:30 -07:00
}
case L ' @ ' : {
2016-05-02 16:53:10 -07:00
return ERROR_NOT_ARGV_AT ;
2016-05-03 16:23:30 -07:00
}
case L ' * ' : {
2016-05-02 16:53:10 -07:00
return ERROR_NOT_ARGV_STAR ;
2016-05-03 16:23:30 -07:00
}
2015-04-29 16:53:02 -07:00
case L ' $ ' :
case VARIABLE_EXPAND :
case VARIABLE_EXPAND_SINGLE :
2016-05-03 16:23:30 -07:00
case VARIABLE_EXPAND_EMPTY : {
2015-04-29 16:53:02 -07:00
return ERROR_NOT_PID ;
2016-05-03 16:23:30 -07:00
}
2019-05-05 12:09:25 +02:00
default : {
return ERROR_BAD_VAR_CHAR1 ;
}
2015-04-29 16:53:02 -07:00
}
}
2016-05-02 16:53:10 -07:00
void parse_util_expand_variable_error ( const wcstring & token , size_t global_token_pos ,
size_t dollar_pos , parse_error_list_t * errors ) {
// Note that dollar_pos is probably VARIABLE_EXPAND or VARIABLE_EXPAND_SINGLE, not a literal
// dollar sign.
2019-11-18 18:34:50 -08:00
assert ( errors ! = nullptr ) ;
2015-04-29 16:53:02 -07:00
assert ( dollar_pos < token . size ( ) ) ;
2016-05-03 21:31:32 -07:00
const bool double_quotes = token . at ( dollar_pos ) = = VARIABLE_EXPAND_SINGLE ;
2015-04-29 16:53:02 -07:00
const size_t start_error_count = errors - > size ( ) ;
const size_t global_dollar_pos = global_token_pos + dollar_pos ;
const size_t global_after_dollar_pos = global_dollar_pos + 1 ;
2016-04-04 14:34:28 -07:00
wchar_t char_after_dollar = dollar_pos + 1 > = token . size ( ) ? 0 : token . at ( dollar_pos + 1 ) ;
2016-05-02 16:53:10 -07:00
switch ( char_after_dollar ) {
2018-03-10 13:16:07 -06:00
case BRACE_BEGIN :
2016-12-12 12:35:22 -08:00
case L ' { ' : {
2018-03-10 13:16:07 -06:00
// The BRACE_BEGIN is for unquoted, the { is for quoted. Anyways we have (possible
2016-05-02 16:53:10 -07:00
// quoted) ${. See if we have a }, and the stuff in between is variable material. If so,
// report a bracket error. Otherwise just complain about the ${.
2015-04-29 16:53:02 -07:00
bool looks_like_variable = false ;
2016-05-02 16:53:10 -07:00
size_t closing_bracket =
2018-03-10 13:16:07 -06:00
token . find ( char_after_dollar = = L ' { ' ? L ' } ' : wchar_t ( BRACE_END ) , dollar_pos + 2 ) ;
2015-04-29 16:53:02 -07:00
wcstring var_name ;
2016-05-02 16:53:10 -07:00
if ( closing_bracket ! = wcstring : : npos ) {
2015-04-29 16:53:02 -07:00
size_t var_start = dollar_pos + 2 , var_end = closing_bracket ;
var_name = wcstring ( token , var_start , var_end - var_start ) ;
2017-04-19 23:43:02 -07:00
looks_like_variable = valid_var_name ( var_name ) ;
2014-03-04 02:53:34 -08:00
}
2016-05-02 16:53:10 -07:00
if ( looks_like_variable ) {
append_syntax_error (
errors , global_after_dollar_pos ,
double_quotes ? ERROR_BRACKETED_VARIABLE_QUOTED1 : ERROR_BRACKETED_VARIABLE1 ,
2019-09-19 10:32:07 -07:00
truncate ( var_name , var_err_len ) . c_str ( ) ) ;
2016-05-02 16:53:10 -07:00
} else {
append_syntax_error ( errors , global_after_dollar_pos , ERROR_BAD_VAR_CHAR1 , L ' { ' ) ;
2014-03-04 02:53:34 -08:00
}
break ;
}
2016-05-02 16:53:10 -07:00
case INTERNAL_SEPARATOR : {
// e.g.: echo foo"$"baz
// These are only ever quotes, not command substitutions. Command substitutions are
// handled earlier.
append_syntax_error ( errors , global_dollar_pos , ERROR_NO_VAR_NAME ) ;
2014-03-04 02:53:34 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
case ' ( ' : {
2015-04-29 16:53:02 -07:00
// e.g.: 'echo "foo$(bar)baz"
// Try to determine what's in the parens.
wcstring token_after_parens ;
wcstring paren_text ;
size_t open_parens = dollar_pos + 1 , cmdsub_start = 0 , cmdsub_end = 0 ;
2016-05-02 16:53:10 -07:00
if ( parse_util_locate_cmdsubst_range ( token , & open_parens , & paren_text , & cmdsub_start ,
& cmdsub_end , true ) > 0 ) {
2015-07-26 00:58:32 -07:00
token_after_parens = tok_first ( paren_text ) ;
2015-04-29 16:53:02 -07:00
}
2016-05-02 16:53:10 -07:00
// Make sure we always show something.
if ( token_after_parens . empty ( ) ) {
2019-09-19 11:09:37 -07:00
token_after_parens = get_ellipsis_str ( ) ;
2015-04-29 16:53:02 -07:00
}
2016-05-02 16:53:10 -07:00
append_syntax_error ( errors , global_dollar_pos , ERROR_BAD_VAR_SUBCOMMAND1 ,
2019-09-19 10:32:07 -07:00
truncate ( token_after_parens , var_err_len ) . c_str ( ) ) ;
2015-04-29 16:53:02 -07:00
break ;
}
2016-05-02 16:53:10 -07:00
case L ' \0 ' : {
append_syntax_error ( errors , global_dollar_pos , ERROR_NO_VAR_NAME ) ;
2014-03-04 02:53:34 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
default : {
2015-04-29 16:53:02 -07:00
wchar_t token_stop_char = char_after_dollar ;
2016-05-02 16:53:10 -07:00
// Unescape (see issue #50).
2018-05-05 19:11:57 -07:00
if ( token_stop_char = = ANY_CHAR )
token_stop_char = L ' ? ' ;
else if ( token_stop_char = = ANY_STRING | | token_stop_char = = ANY_STRING_RECURSIVE )
2014-03-04 02:53:34 -08:00
token_stop_char = L ' * ' ;
2016-05-02 16:53:10 -07:00
// Determine which error message to use. The format string may not consume all the
// arguments we pass but that's harmless.
2015-04-29 16:53:02 -07:00
const wchar_t * error_fmt = error_format_for_character ( token_stop_char ) ;
2016-05-02 16:53:10 -07:00
append_syntax_error ( errors , global_after_dollar_pos , error_fmt , token_stop_char ) ;
2014-03-04 02:53:34 -08:00
break ;
}
}
2016-05-02 16:53:10 -07:00
// We should have appended exactly one error.
2015-04-29 16:53:02 -07:00
assert ( errors - > size ( ) = = start_error_count + 1 ) ;
2014-03-04 02:53:34 -08:00
}
2016-05-02 16:53:10 -07:00
/// Detect cases like $(abc). Given an arg like foo(bar), let arg_src be foo and cmdsubst_src be
/// bar. If arg ends with VARIABLE_EXPAND, then report an error.
static parser_test_error_bits_t detect_dollar_cmdsub_errors ( size_t arg_src_offset ,
const wcstring & arg_src ,
const wcstring & cmdsubst_src ,
parse_error_list_t * out_errors ) {
2015-04-29 16:53:02 -07:00
parser_test_error_bits_t result_bits = 0 ;
wcstring unescaped_arg_src ;
2016-10-30 21:05:27 -07:00
if ( ! unescape_string ( arg_src , & unescaped_arg_src , UNESCAPE_SPECIAL ) | |
unescaped_arg_src . empty ( ) ) {
return result_bits ;
}
wchar_t last = unescaped_arg_src . at ( unescaped_arg_src . size ( ) - 1 ) ;
if ( last = = VARIABLE_EXPAND ) {
result_bits | = PARSER_TEST_ERROR ;
2019-11-18 18:34:50 -08:00
if ( out_errors ! = nullptr ) {
2016-10-30 21:05:27 -07:00
wcstring subcommand_first_token = tok_first ( cmdsubst_src ) ;
if ( subcommand_first_token . empty ( ) ) {
// e.g. $(). Report somthing.
2019-09-19 11:09:37 -07:00
subcommand_first_token = get_ellipsis_str ( ) ;
2015-04-29 16:53:02 -07:00
}
2016-10-30 21:05:27 -07:00
append_syntax_error (
out_errors ,
arg_src_offset + arg_src . size ( ) - 1 , // global position of the dollar
2019-09-19 10:32:07 -07:00
ERROR_BAD_VAR_SUBCOMMAND1 , truncate ( subcommand_first_token , var_err_len ) . c_str ( ) ) ;
2015-04-29 16:53:02 -07:00
}
}
2016-10-30 21:05:27 -07:00
2015-04-29 16:53:02 -07:00
return result_bits ;
}
2014-03-04 02:53:34 -08:00
2016-05-02 16:53:10 -07:00
/// Test if this argument contains any errors. Detected errors include syntax errors in command
/// substitutions, improperly escaped characters and improper use of the variable expansion
/// operator.
2020-07-01 21:06:58 -07:00
parser_test_error_bits_t parse_util_detect_errors_in_argument ( const ast : : argument_t & arg ,
2016-05-02 16:53:10 -07:00
const wcstring & arg_src ,
parse_error_list_t * out_errors ) {
2020-07-01 21:06:58 -07:00
maybe_t < source_range_t > source_range = arg . try_source_range ( ) ;
if ( ! source_range . has_value ( ) ) return 0 ;
size_t source_start = source_range - > start ;
2020-07-14 15:51:12 -07:00
parser_test_error_bits_t err = 0 ;
2014-03-04 02:53:34 -08:00
2020-07-14 15:51:12 -07:00
size_t cursor = 0 ;
wcstring subst ;
bool do_loop = true ;
2016-05-02 16:53:10 -07:00
while ( do_loop ) {
2020-07-14 15:34:26 -07:00
size_t paren_begin = 0 ;
size_t paren_end = 0 ;
2020-07-14 15:51:12 -07:00
switch ( parse_util_locate_cmdsubst_range ( arg_src , & cursor , & subst , & paren_begin , & paren_end ,
false ) ) {
2016-05-02 16:53:10 -07:00
case - 1 : {
2020-07-14 15:51:12 -07:00
err | = PARSER_TEST_ERROR ;
2016-05-02 16:53:10 -07:00
if ( out_errors ) {
2018-01-12 11:36:45 -08:00
append_syntax_error ( out_errors , source_start , L " Mismatched parenthesis " ) ;
2014-03-04 02:53:34 -08:00
}
return err ;
}
2016-05-02 16:53:10 -07:00
case 0 : {
2020-07-14 15:51:12 -07:00
do_loop = false ;
2014-03-04 02:53:34 -08:00
break ;
}
2016-05-02 16:53:10 -07:00
case 1 : {
2020-07-14 15:34:26 -07:00
assert ( paren_begin < paren_end & & " Parens out of order? " ) ;
2014-03-18 14:14:32 -07:00
parse_error_list_t subst_errors ;
2020-07-12 13:55:51 -07:00
err | = parse_util_detect_errors ( subst , & subst_errors ) ;
2014-03-18 14:14:32 -07:00
2016-05-02 16:53:10 -07:00
// Our command substitution produced error offsets relative to its source. Tweak the
// offsets of the errors in the command substitution to account for both its offset
// within the string, and the offset of the node.
2020-07-14 15:34:26 -07:00
size_t error_offset = paren_begin + 1 + source_start ;
2014-03-21 17:13:33 -07:00
parse_error_offset_source_start ( & subst_errors , error_offset ) ;
2016-05-02 16:53:10 -07:00
2019-11-18 18:34:50 -08:00
if ( out_errors ! = nullptr ) {
2014-03-18 14:14:32 -07:00
out_errors - > insert ( out_errors - > end ( ) , subst_errors . begin ( ) , subst_errors . end ( ) ) ;
2016-05-02 16:53:10 -07:00
// Hackish. Take this opportunity to report $(...) errors. We do this because
// after we've replaced with internal separators, we can't distinguish between
// "" and (), and also we no longer have the source of the command substitution.
// As an optimization, this is only necessary if the last character is a $.
2020-07-14 15:51:12 -07:00
if ( paren_begin > 0 & & arg_src . at ( paren_begin - 1 ) = = L ' $ ' ) {
2018-01-12 11:36:45 -08:00
err | = detect_dollar_cmdsub_errors (
2020-07-14 15:51:12 -07:00
source_start , arg_src . substr ( 0 , paren_begin ) , subst , out_errors ) ;
2015-04-29 16:53:02 -07:00
}
2014-03-04 02:53:34 -08:00
}
break ;
}
2016-10-29 17:25:48 -07:00
default : {
DIE ( " unexpected parse_util_locate_cmdsubst() return value " ) ;
}
2014-03-04 02:53:34 -08:00
}
}
wcstring unesc ;
2020-07-14 15:51:12 -07:00
if ( ! unescape_string ( arg_src , & unesc , UNESCAPE_SPECIAL ) ) {
2016-05-02 16:53:10 -07:00
if ( out_errors ) {
2020-07-14 15:51:12 -07:00
append_syntax_error ( out_errors , source_start , L " Invalid token '%ls' " , arg_src . c_str ( ) ) ;
2014-03-04 02:53:34 -08:00
}
return 1 ;
2016-05-04 15:19:47 -07:00
}
// Check for invalid variable expansions.
const size_t unesc_size = unesc . size ( ) ;
for ( size_t idx = 0 ; idx < unesc_size ; idx + + ) {
2016-10-30 21:05:27 -07:00
if ( unesc . at ( idx ) ! = VARIABLE_EXPAND & & unesc . at ( idx ) ! = VARIABLE_EXPAND_SINGLE ) {
continue ;
}
2016-10-22 20:32:25 -07:00
2016-10-30 21:05:27 -07:00
wchar_t next_char = idx + 1 < unesc_size ? unesc . at ( idx + 1 ) : L ' \0 ' ;
if ( next_char ! = VARIABLE_EXPAND & & next_char ! = VARIABLE_EXPAND_SINGLE & &
2017-04-19 23:43:02 -07:00
! valid_var_name_char ( next_char ) ) {
2016-10-30 21:05:27 -07:00
err = 1 ;
if ( out_errors ) {
// We have something like $$$^.... Back up until we reach the first $.
size_t first_dollar = idx ;
2016-12-03 20:12:53 -08:00
while ( first_dollar > 0 & & ( unesc . at ( first_dollar - 1 ) = = VARIABLE_EXPAND | |
unesc . at ( first_dollar - 1 ) = = VARIABLE_EXPAND_SINGLE ) ) {
2016-10-30 21:05:27 -07:00
first_dollar - - ;
2014-03-04 02:53:34 -08:00
}
2018-01-12 11:36:45 -08:00
parse_util_expand_variable_error ( unesc , source_start , first_dollar , out_errors ) ;
2014-03-04 02:53:34 -08:00
}
}
}
return err ;
}
2018-01-12 11:15:35 -08:00
/// Given that the job given by node should be backgrounded, return true if we detect any errors.
2020-07-01 21:06:58 -07:00
static bool detect_errors_in_backgrounded_job ( const ast : : job_t & job ,
2018-01-12 11:15:35 -08:00
parse_error_list_t * parse_errors ) {
2020-07-01 21:06:58 -07:00
using namespace ast ;
auto source_range = job . try_source_range ( ) ;
2018-01-13 16:24:21 -08:00
if ( ! source_range ) return false ;
2018-01-12 11:15:35 -08:00
bool errored = false ;
// Disallow background in the following cases:
// foo & ; and bar
// foo & ; or bar
// if foo & ; end
// while foo & ; end
2020-07-01 21:06:58 -07:00
const job_conjunction_t * job_conj = job . parent - > try_as < job_conjunction_t > ( ) ;
if ( ! job_conj ) return false ;
if ( job_conj - > parent - > try_as < if_clause_t > ( ) ) {
2018-01-13 16:24:21 -08:00
errored = append_syntax_error ( parse_errors , source_range - > start ,
BACKGROUND_IN_CONDITIONAL_ERROR_MSG ) ;
2020-07-01 21:06:58 -07:00
} else if ( job_conj - > parent - > try_as < while_header_t > ( ) ) {
2018-01-13 16:24:21 -08:00
errored = append_syntax_error ( parse_errors , source_range - > start ,
BACKGROUND_IN_CONDITIONAL_ERROR_MSG ) ;
2020-07-01 21:06:58 -07:00
} else if ( const ast : : job_list_t * jlist = job_conj - > parent - > try_as < ast : : job_list_t > ( ) ) {
2018-01-13 16:24:21 -08:00
// This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
2020-07-01 21:06:58 -07:00
// Find the index of ourselves in the job list.
size_t index ;
for ( index = 0 ; index < jlist - > count ( ) ; index + + ) {
if ( jlist - > at ( index ) = = job_conj ) break ;
}
assert ( index < jlist - > count ( ) & & " Should have found the job in the list " ) ;
// Try getting the next job and check its decorator.
if ( const job_conjunction_t * next = jlist - > at ( index + 1 ) ) {
if ( const keyword_base_t * deco = next - > decorator . contents . get ( ) ) {
assert (
( deco - > kw = = parse_keyword_t : : kw_and | | deco - > kw = = parse_keyword_t : : kw_or ) & &
" Unexpected decorator keyword " ) ;
const wchar_t * deco_name = ( deco - > kw = = parse_keyword_t : : kw_and ? L " and " : L " or " ) ;
errored = append_syntax_error ( parse_errors , deco - > source_range ( ) . start ,
BOOL_AFTER_BACKGROUND_ERROR_MSG , deco_name ) ;
2018-01-12 11:15:35 -08:00
}
}
}
return errored ;
}
2020-07-12 12:51:17 -07:00
/// Given a source buffer \p buff_src and decorated statement \p dst within it, return true if there
/// is an error and false if not. \p storage may be used to reduce allocations.
2020-07-01 21:06:58 -07:00
static bool detect_errors_in_decorated_statement ( const wcstring & buff_src ,
const ast : : decorated_statement_t & dst ,
2020-07-12 12:51:17 -07:00
wcstring * storage ,
2020-07-01 21:06:58 -07:00
parse_error_list_t * parse_errors ) {
using namespace ast ;
2018-02-18 13:00:46 -08:00
bool errored = false ;
2020-07-01 21:06:58 -07:00
auto source_start = dst . source_range ( ) . start ;
2020-07-07 16:28:39 -07:00
const statement_decoration_t decoration = dst . decoration ( ) ;
2020-07-01 21:06:58 -07:00
// Determine if the first argument is help.
bool first_arg_is_help = false ;
if ( const auto * arg = get_first_arg ( dst . args_or_redirs ) ) {
2020-07-12 12:51:17 -07:00
const wcstring & arg_src = arg - > source ( buff_src , storage ) ;
2021-02-14 14:09:59 -08:00
first_arg_is_help = parse_util_argument_is_help ( arg_src ) ;
2020-07-01 21:06:58 -07:00
}
2018-02-18 13:00:46 -08:00
2020-07-01 21:06:58 -07:00
// Get the statement we are part of.
const statement_t * st = dst . parent - > as < statement_t > ( ) ;
2018-02-18 13:00:46 -08:00
2020-07-01 21:06:58 -07:00
// Walk up to the job.
const ast : : job_t * job = nullptr ;
for ( const node_t * cursor = st ; job = = nullptr ; cursor = cursor - > parent ) {
assert ( cursor & & " Reached root without finding a job " ) ;
job = cursor - > try_as < ast : : job_t > ( ) ;
}
assert ( job & & " Should have found the job " ) ;
// Check our pipeline position.
pipeline_position_t pipe_pos ;
if ( job - > continuation . empty ( ) ) {
pipe_pos = pipeline_position_t : : none ;
} else if ( & job - > statement = = st ) {
pipe_pos = pipeline_position_t : : first ;
} else {
pipe_pos = pipeline_position_t : : subsequent ;
}
2018-02-18 13:00:46 -08:00
// Check that we don't try to pipe through exec.
2020-07-01 21:06:58 -07:00
bool is_in_pipeline = ( pipe_pos ! = pipeline_position_t : : none ) ;
2020-07-07 16:28:39 -07:00
if ( is_in_pipeline & & decoration = = statement_decoration_t : : exec ) {
2018-02-18 13:00:46 -08:00
errored = append_syntax_error ( parse_errors , source_start , EXEC_ERR_MSG , L " exec " ) ;
}
2018-03-02 18:09:16 -08:00
// This is a somewhat stale check that 'and' and 'or' are not in pipelines, except at the
// beginning. We can't disallow them as commands entirely because we need to support 'and
// --help', etc.
if ( pipe_pos = = pipeline_position_t : : subsequent ) {
// check if our command is 'and' or 'or'. This is very clumsy; we don't catch e.g. quoted
// commands.
2020-07-12 12:51:17 -07:00
const wcstring & command = dst . command . source ( buff_src , storage ) ;
2018-03-02 18:09:16 -08:00
if ( command = = L " and " | | command = = L " or " ) {
errored =
append_syntax_error ( parse_errors , source_start , EXEC_ERR_MSG , command . c_str ( ) ) ;
}
}
2020-07-12 12:51:17 -07:00
const wcstring & unexp_command = dst . command . source ( buff_src , storage ) ;
if ( ! unexp_command . empty ( ) ) {
2018-08-26 01:41:45 -07:00
wcstring command ;
2018-02-18 13:00:46 -08:00
// Check that we can expand the command.
2020-07-12 12:51:17 -07:00
if ( expand_to_command_and_args ( unexp_command , operation_context_t : : empty ( ) , & command ,
2020-12-20 13:36:12 -08:00
nullptr , parse_errors ,
true /* skip wildcards */ ) = = expand_result_t : : error ) {
2018-08-26 01:41:45 -07:00
errored = true ;
2019-04-11 14:28:27 -07:00
parse_error_offset_source_start ( parse_errors , source_start ) ;
2018-02-18 13:00:46 -08:00
}
// Check that pipes are sound.
if ( ! errored & & parser_is_pipe_forbidden ( command ) & & is_in_pipeline ) {
errored =
append_syntax_error ( parse_errors , source_start , EXEC_ERR_MSG , command . c_str ( ) ) ;
}
// Check that we don't return from outside a function. But we allow it if it's
// 'return --help'.
2020-07-01 21:06:58 -07:00
if ( ! errored & & command = = L " return " & & ! first_arg_is_help ) {
// See if we are in a function.
2018-02-18 13:00:46 -08:00
bool found_function = false ;
2020-07-01 21:06:58 -07:00
for ( const node_t * cursor = & dst ; cursor ! = nullptr ; cursor = cursor - > parent ) {
if ( const auto * bs = cursor - > try_as < block_statement_t > ( ) ) {
if ( bs - > header - > type = = type_t : : function_header ) {
found_function = true ;
break ;
}
2018-02-18 13:00:46 -08:00
}
}
2020-07-01 21:06:58 -07:00
if ( ! found_function ) {
2018-02-18 13:00:46 -08:00
errored = append_syntax_error ( parse_errors , source_start , INVALID_RETURN_ERR_MSG ) ;
}
}
// Check that we don't break or continue from outside a loop.
2020-07-01 21:06:58 -07:00
if ( ! errored & & ( command = = L " break " | | command = = L " continue " ) & & ! first_arg_is_help ) {
2018-02-18 13:00:46 -08:00
// Walk up until we hit a 'for' or 'while' loop. If we hit a function first,
// stop the search; we can't break an outer loop from inside a function.
// This is a little funny because we can't tell if it's a 'for' or 'while'
// loop from the ancestor alone; we need the header. That is, we hit a
// block_statement, and have to check its header.
bool found_loop = false ;
2020-07-01 21:06:58 -07:00
for ( const node_t * ancestor = & dst ; ancestor ! = nullptr ; ancestor = ancestor - > parent ) {
const auto * block = ancestor - > try_as < block_statement_t > ( ) ;
if ( ! block ) continue ;
if ( block - > header - > type = = type_t : : for_header | |
block - > header - > type = = type_t : : while_header ) {
2018-02-18 13:00:46 -08:00
// This is a loop header, so we can break or continue.
found_loop = true ;
break ;
2020-07-01 21:06:58 -07:00
} else if ( block - > header - > type = = type_t : : function_header ) {
2018-02-18 13:00:46 -08:00
// This is a function header, so we cannot break or
// continue. We stop our search here.
found_loop = false ;
break ;
}
}
2020-07-01 21:06:58 -07:00
if ( ! found_loop ) {
2018-02-18 13:00:46 -08:00
errored = append_syntax_error (
parse_errors , source_start ,
( command = = L " break " ? INVALID_BREAK_ERR_MSG : INVALID_CONTINUE_ERR_MSG ) ) ;
}
}
// Check that we don't do an invalid builtin (issue #1252).
2020-07-12 12:51:17 -07:00
if ( ! errored & & decoration = = statement_decoration_t : : builtin ) {
wcstring command = unexp_command ;
if ( expand_one ( command , expand_flag : : skip_cmdsubst , operation_context_t : : empty ( ) ,
parse_errors ) & &
! builtin_exists ( unexp_command ) ) {
errored = append_syntax_error ( parse_errors , source_start , UNKNOWN_BUILTIN_ERR_MSG ,
unexp_command . c_str ( ) ) ;
}
2018-02-18 13:00:46 -08:00
}
}
return errored ;
}
2020-07-01 21:06:58 -07:00
// Given we have a trailing argument_or_redirection_list, like `begin; end > /dev/null`, verify that
// there are no arguments in the list.
static bool detect_errors_in_block_redirection_list (
const ast : : argument_or_redirection_list_t & args_or_redirs , parse_error_list_t * out_errors ) {
if ( const auto * first_arg = get_first_arg ( args_or_redirs ) ) {
return append_syntax_error ( out_errors , first_arg - > source_range ( ) . start ,
BACKGROUND_IN_CONDITIONAL_ERROR_MSG ) ;
}
return false ;
}
2020-07-12 13:55:51 -07:00
parser_test_error_bits_t parse_util_detect_errors ( const ast : : ast_t & ast , const wcstring & buff_src ,
parse_error_list_t * out_errors ) {
using namespace ast ;
2014-03-04 02:53:34 -08:00
parser_test_error_bits_t res = 0 ;
2016-05-02 16:53:10 -07:00
// Whether we encountered a parse error.
2013-12-15 16:05:37 -08:00
bool errored = false ;
2014-01-15 01:40:40 -08:00
2016-05-02 16:53:10 -07:00
// Whether we encountered an unclosed block. We detect this via an 'end_command' block without
// source.
2013-12-15 16:05:37 -08:00
bool has_unclosed_block = false ;
2014-01-15 01:40:40 -08:00
2018-02-18 13:13:58 -08:00
// Whether we encounter a missing statement, i.e. a newline after a pipe. This is found by
// detecting job_continuations that have source for pipes but not the statement.
bool has_unclosed_pipe = false ;
2020-08-04 21:39:37 +02:00
// Whether we encounter a missing job, i.e. a newline after && or ||. This is found by
// detecting job_conjunction_continuations that have source for && or || but not the job.
bool has_unclosed_conjunction = false ;
2016-05-02 16:53:10 -07:00
// Expand all commands.
// Verify 'or' and 'and' not used inside pipelines.
// Verify pipes via parser_is_pipe_forbidden.
// Verify return only within a function.
// Verify no variable expansions.
2020-07-12 12:51:17 -07:00
wcstring storage ;
2016-05-02 16:53:10 -07:00
2020-08-04 21:41:14 +02:00
for ( const node_t & node : ast ) {
if ( const job_continuation_t * jc = node . try_as < job_continuation_t > ( ) ) {
// Somewhat clumsy way of checking for a statement without source in a pipeline.
// See if our pipe has source but our statement does not.
if ( ! jc - > pipe . unsourced & & ! jc - > statement . try_source_range ( ) . has_value ( ) ) {
has_unclosed_pipe = true ;
2013-12-15 16:05:37 -08:00
}
2020-08-04 21:39:37 +02:00
} else if ( const auto * jcc = node . try_as < job_conjunction_continuation_t > ( ) ) {
// Somewhat clumsy way of checking for a job without source in a conjunction.
// See if our conjunction operator (&& or ||) has source but our job does not.
if ( ! jcc - > conjunction . unsourced & & ! jcc - > job . try_source_range ( ) . has_value ( ) ) {
has_unclosed_conjunction = true ;
}
2020-08-04 21:41:14 +02:00
} else if ( const argument_t * arg = node . try_as < argument_t > ( ) ) {
const wcstring & arg_src = arg - > source ( buff_src , & storage ) ;
res | = parse_util_detect_errors_in_argument ( * arg , arg_src , out_errors ) ;
} else if ( const ast : : job_t * job = node . try_as < ast : : job_t > ( ) ) {
// Disallow background in the following cases:
//
// foo & ; and bar
// foo & ; or bar
// if foo & ; end
// while foo & ; end
// If it's not a background job, nothing to do.
if ( job - > bg ) {
errored | = detect_errors_in_backgrounded_job ( * job , out_errors ) ;
}
} else if ( const ast : : decorated_statement_t * stmt = node . try_as < decorated_statement_t > ( ) ) {
errored | = detect_errors_in_decorated_statement ( buff_src , * stmt , & storage , out_errors ) ;
} else if ( const auto * block = node . try_as < block_statement_t > ( ) ) {
// If our 'end' had no source, we are unsourced.
if ( block - > end . unsourced ) has_unclosed_block = true ;
errored | = detect_errors_in_block_redirection_list ( block - > args_or_redirs , out_errors ) ;
} else if ( const auto * ifs = node . try_as < if_statement_t > ( ) ) {
// If our 'end' had no source, we are unsourced.
if ( ifs - > end . unsourced ) has_unclosed_block = true ;
errored | = detect_errors_in_block_redirection_list ( ifs - > args_or_redirs , out_errors ) ;
} else if ( const auto * switchs = node . try_as < switch_statement_t > ( ) ) {
// If our 'end' had no source, we are unsourced.
if ( switchs - > end . unsourced ) has_unclosed_block = true ;
errored | = detect_errors_in_block_redirection_list ( switchs - > args_or_redirs , out_errors ) ;
2013-12-15 16:05:37 -08:00
}
2020-08-04 21:41:14 +02:00
}
2013-12-15 16:05:37 -08:00
2016-05-02 16:53:10 -07:00
if ( errored ) res | = PARSER_TEST_ERROR ;
2013-12-15 16:05:37 -08:00
2020-08-04 21:39:37 +02:00
if ( has_unclosed_block | | has_unclosed_pipe | | has_unclosed_conjunction )
res | = PARSER_TEST_INCOMPLETE ;
2020-07-12 13:55:51 -07:00
return res ;
}
parser_test_error_bits_t parse_util_detect_errors ( const wcstring & buff_src ,
parse_error_list_t * out_errors ,
bool allow_incomplete ) {
// Whether there's an unclosed quote or subshell, and therefore unfinished. This is only set if
// allow_incomplete is set.
bool has_unclosed_quote_or_subshell = false ;
2014-01-15 01:40:40 -08:00
2020-07-12 13:55:51 -07:00
const parse_tree_flags_t parse_flags =
allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none ;
// Parse the input string into an ast. Some errors are detected here.
using namespace ast ;
parse_error_list_t parse_errors ;
auto ast = ast_t : : parse ( buff_src , parse_flags , & parse_errors ) ;
if ( allow_incomplete ) {
// Issue #1238: If the only error was unterminated quote, then consider this to have parsed
// successfully.
size_t idx = parse_errors . size ( ) ;
while ( idx - - ) {
if ( parse_errors . at ( idx ) . code = = parse_error_tokenizer_unterminated_quote | |
parse_errors . at ( idx ) . code = = parse_error_tokenizer_unterminated_subshell ) {
// Remove this error, since we don't consider it a real error.
has_unclosed_quote_or_subshell = true ;
parse_errors . erase ( parse_errors . begin ( ) + idx ) ;
}
}
2013-12-15 16:05:37 -08:00
}
2016-05-02 16:53:10 -07:00
2020-07-12 13:55:51 -07:00
// has_unclosed_quote_or_subshell may only be set if allow_incomplete is true.
assert ( ! has_unclosed_quote_or_subshell | | allow_incomplete ) ;
if ( has_unclosed_quote_or_subshell ) {
// We do not bother to validate the rest of the tree in this case.
return PARSER_TEST_INCOMPLETE ;
2016-02-28 00:44:20 -08:00
}
2013-12-15 16:05:37 -08:00
2020-07-12 13:55:51 -07:00
// Early parse error, stop here.
if ( ! parse_errors . empty ( ) ) {
if ( out_errors ) vec_append ( * out_errors , std : : move ( parse_errors ) ) ;
return PARSER_TEST_ERROR ;
}
// Defer to the tree-walking version.
return parse_util_detect_errors ( ast , buff_src , out_errors ) ;
2013-12-15 16:05:37 -08:00
}
2019-08-04 14:49:56 -07:00
maybe_t < wcstring > parse_util_detect_errors_in_argument_list ( const wcstring & arg_list_src ,
const wcstring & prefix ) {
// Helper to return a description of the first error.
auto get_error_text = [ & ] ( const parse_error_list_t & errors ) {
assert ( ! errors . empty ( ) & & " Expected an error " ) ;
return errors . at ( 0 ) . describe_with_prefix ( arg_list_src , prefix , false /* not interactive */ ,
false /* don't skip caret */ ) ;
} ;
2020-07-01 21:06:58 -07:00
// Parse the string as a freestanding argument list.
using namespace ast ;
2019-08-04 14:49:56 -07:00
parse_error_list_t errors ;
2020-07-01 21:06:58 -07:00
auto ast = ast_t : : parse_argument_list ( arg_list_src , parse_flag_none , & errors ) ;
if ( ! errors . empty ( ) ) {
2019-08-04 14:49:56 -07:00
return get_error_text ( errors ) ;
}
// Get the root argument list and extract arguments from it.
// Test each of these.
2020-07-01 21:06:58 -07:00
for ( const argument_t & arg : ast . top ( ) - > as < freestanding_argument_list_t > ( ) - > arguments ) {
const wcstring arg_src = arg . source ( arg_list_src ) ;
2019-08-04 14:49:56 -07:00
if ( parse_util_detect_errors_in_argument ( arg , arg_src , & errors ) ) {
return get_error_text ( errors ) ;
}
}
return none ( ) ;
}