2019-09-12 14:45:07 +03:00
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Lexical analysis for genksyms.
* Copyright 1996, 1997 Linux International.
*
* New implementation contributed by Richard Henderson <rth@tamu.edu>
* Based on original work by Bjorn Ekwall <bj0rn@blox.se>
*
* Taken from Linux modutils 2.4.22.
*/
2005-04-17 02:20:36 +04:00
%{
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "genksyms.h"
2011-05-23 08:05:28 +04:00
#include "parse.tab.h"
2005-04-17 02:20:36 +04:00
/* We've got a two-level lexer here. We let flex do basic tokenization
and then we categorize those basic tokens in the second stage. */
#define YY_DECL static int yylex1(void)
%}
IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
O_INT 0[0-7]*
D_INT [1-9][0-9]*
X_INT 0[Xx][0-9A-Fa-f]+
I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
FRAC ([0-9]*\.[0-9]+)|([0-9]+\.)
EXP [Ee][+-]?[0-9]+
F_SUF [FfLl]
REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
STRING L?\"([^\\\"]*\\.)*[^\\\"]*\"
CHAR L?\'([^\\\']*\\.)*[^\\\']*\'
MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
/* We don't do multiple input files. */
%option noyywrap
2008-07-17 03:08:12 +04:00
%option noinput
2005-04-17 02:20:36 +04:00
%%
/* Keep track of our location in the original source files. */
^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
^#.*\n cur_line++;
\n cur_line++;
/* Ignore all other whitespace. */
[ \t\f\v\r]+ ;
{STRING} return STRING;
{CHAR} return CHAR;
{IDENT} return IDENT;
/* The Pedant requires that the other C multi-character tokens be
recognized as tokens. We don't actually use them since we don't
parse expressions, but we do want whitespace to be arranged
around them properly. */
2011-01-20 18:19:58 +03:00
{MC_TOKEN} return OTHER;
{INT} return INT;
{REAL} return REAL;
2005-04-17 02:20:36 +04:00
"..." return DOTS;
/* All other tokens are single characters. */
. return yytext[0];
%%
/* Bring in the keyword recognizer. */
2017-08-19 20:17:02 +03:00
#include "keywords.c"
2005-04-17 02:20:36 +04:00
/* Macros to append to our phrase collection list. */
2011-02-04 01:57:09 +03:00
/*
* We mark any token, that that equals to a known enumerator, as
* SYM_ENUM_CONST. The parser will change this for struct and union tags later,
* the only problem is struct and union members:
* enum e { a, b }; struct s { int a, b; }
* but in this case, the only effect will be, that the ABI checksums become
* more volatile, which is acceptable. Also, such collisions are quite rare,
* so far it was only observed in include/linux/telephony.h.
*/
2005-04-17 02:20:36 +04:00
#define _APP(T,L) do { \
cur_node = next_node; \
next_node = xmalloc(sizeof(*next_node)); \
next_node->next = cur_node; \
cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
2011-02-04 01:57:09 +03:00
cur_node->tag = \
find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
SYM_ENUM_CONST : SYM_NORMAL ; \
2011-10-08 03:18:35 +04:00
cur_node->in_source_file = in_source_file; \
2005-04-17 02:20:36 +04:00
} while (0)
#define APP _APP(yytext, yyleng)
/* The second stage lexer. Here we incorporate knowledge of the state
of the parser to tailor the tokens that are returned. */
int
yylex(void)
{
static enum {
2014-04-04 01:46:37 +04:00
ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
2020-12-01 18:20:18 +03:00
ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
2005-04-17 02:20:36 +04:00
} lexstate = ST_NOTSTARTED;
static int suppress_type_lookup, dont_want_brace_phrase;
static struct string_list *next_node;
2021-01-16 02:43:02 +03:00
static char *source_file;
2005-04-17 02:20:36 +04:00
int token, count = 0;
struct string_list *cur_node;
if (lexstate == ST_NOTSTARTED)
{
next_node = xmalloc(sizeof(*next_node));
next_node->next = NULL;
lexstate = ST_NORMAL;
}
repeat:
token = yylex1();
if (token == 0)
return 0;
else if (token == FILENAME)
{
char *file, *e;
/* Save the filename and line number for later error messages. */
if (cur_filename)
free(cur_filename);
file = strchr(yytext, '\"')+1;
e = strchr(file, '\"');
*e = '\0';
cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
cur_line = atoi(yytext+2);
2011-10-08 03:18:35 +04:00
if (!source_file) {
source_file = xstrdup(cur_filename);
in_source_file = 1;
} else {
in_source_file = (strcmp(cur_filename, source_file) == 0);
}
2005-04-17 02:20:36 +04:00
goto repeat;
}
switch (lexstate)
{
case ST_NORMAL:
switch (token)
{
case IDENT:
APP;
{
2017-08-19 20:17:02 +03:00
int r = is_reserved_word(yytext, yyleng);
if (r >= 0)
2005-04-17 02:20:36 +04:00
{
2017-08-19 20:17:02 +03:00
switch (token = r)
2005-04-17 02:20:36 +04:00
{
case ATTRIBUTE_KEYW:
lexstate = ST_ATTRIBUTE;
count = 0;
goto repeat;
case ASM_KEYW:
lexstate = ST_ASM;
count = 0;
goto repeat;
2014-04-04 01:46:37 +04:00
case TYPEOF_KEYW:
lexstate = ST_TYPEOF;
count = 0;
goto repeat;
2005-04-17 02:20:36 +04:00
case STRUCT_KEYW:
case UNION_KEYW:
case ENUM_KEYW:
2011-02-04 01:57:09 +03:00
dont_want_brace_phrase = 3;
2005-04-17 02:20:36 +04:00
suppress_type_lookup = 2;
goto fini;
case EXPORT_SYMBOL_KEYW:
goto fini;
2020-12-01 18:20:18 +03:00
case STATIC_ASSERT_KEYW:
lexstate = ST_STATIC_ASSERT;
count = 0;
goto repeat;
2005-04-17 02:20:36 +04:00
}
}
if (!suppress_type_lookup)
{
2011-02-15 17:11:36 +03:00
if (find_symbol(yytext, SYM_TYPEDEF, 1))
2005-04-17 02:20:36 +04:00
token = TYPE;
}
}
break;
case '[':
APP;
lexstate = ST_BRACKET;
count = 1;
goto repeat;
case '{':
APP;
if (dont_want_brace_phrase)
break;
lexstate = ST_BRACE;
count = 1;
goto repeat;
case '=': case ':':
APP;
lexstate = ST_EXPRESSION;
break;
default:
APP;
break;
}
break;
case ST_ATTRIBUTE:
APP;
switch (token)
{
case '(':
++count;
goto repeat;
case ')':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = ATTRIBUTE_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
case ST_ASM:
APP;
switch (token)
{
case '(':
++count;
goto repeat;
case ')':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = ASM_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
2016-08-25 12:53:08 +03:00
case ST_TYPEOF_1:
if (token == IDENT)
{
2017-09-09 00:32:34 +03:00
if (is_reserved_word(yytext, yyleng) >= 0
2016-08-25 12:53:08 +03:00
|| find_symbol(yytext, SYM_TYPEDEF, 1))
{
yyless(0);
unput('(');
lexstate = ST_NORMAL;
token = TYPEOF_KEYW;
break;
}
_APP("(", 1);
}
lexstate = ST_TYPEOF;
/* FALLTHRU */
2014-04-04 01:46:37 +04:00
case ST_TYPEOF:
switch (token)
{
case '(':
if ( ++count == 1 )
lexstate = ST_TYPEOF_1;
else
APP;
goto repeat;
case ')':
APP;
if (--count == 0)
{
lexstate = ST_NORMAL;
token = TYPEOF_PHRASE;
break;
}
goto repeat;
default:
APP;
goto repeat;
}
break;
2005-04-17 02:20:36 +04:00
case ST_BRACKET:
APP;
switch (token)
{
case '[':
++count;
goto repeat;
case ']':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = BRACKET_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
case ST_BRACE:
APP;
switch (token)
{
case '{':
++count;
goto repeat;
case '}':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = BRACE_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
case ST_EXPRESSION:
switch (token)
{
case '(': case '[': case '{':
++count;
APP;
goto repeat;
2011-02-04 01:57:09 +03:00
case '}':
/* is this the last line of an enum declaration? */
if (count == 0)
{
/* Put back the token we just read so's we can find it again
after registering the expression. */
unput(token);
lexstate = ST_NORMAL;
token = EXPRESSION_PHRASE;
break;
}
/* FALLTHRU */
case ')': case ']':
2005-04-17 02:20:36 +04:00
--count;
APP;
goto repeat;
case ',': case ';':
if (count == 0)
{
/* Put back the token we just read so's we can find it again
after registering the expression. */
unput(token);
lexstate = ST_NORMAL;
token = EXPRESSION_PHRASE;
break;
}
APP;
goto repeat;
default:
APP;
goto repeat;
2020-12-01 18:20:18 +03:00
}
break;
case ST_STATIC_ASSERT:
APP;
switch (token)
{
case '(':
++count;
goto repeat;
case ')':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = STATIC_ASSERT_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
2005-04-17 02:20:36 +04:00
}
break;
default:
2006-06-25 01:46:54 +04:00
exit(1);
2005-04-17 02:20:36 +04:00
}
fini:
if (suppress_type_lookup > 0)
--suppress_type_lookup;
if (dont_want_brace_phrase > 0)
--dont_want_brace_phrase;
yylval = &next_node->next;
return token;
}