samba-mirror/ejsLex.c at 31980e03faedaa44317f64d940c458d38a103627

mirror of https://github.com/samba-team/samba.git synced 2025-12-13 16:23:50 +03:00

Files

Derrell Lipman 917af234a8 r18925: Add current snapshot of the ejs-2.0 code. Tridge, will you be incorporating this?

2007-10-10 14:20:21 -05:00

1034 lines

23 KiB

C

Raw Blame History

 /*
  *	@file 	ejsLex.c
  *	@brief 	EJS Lexical Analyser
  *	@overview EJS lexical analyser. This implementes a lexical analyser
  *		for a subset of the JavaScript language.
  */
 /********************************* Copyright **********************************/
 /*
  *	@copy	default.g
  *
  *	Copyright (c) Mbedthis Software LLC, 2003-2006. All Rights Reserved.
  *	Portions Copyright (c) GoAhead Software, 1995-2000. All Rights Reserved.
  *
  *	This software is distributed under commercial and open source licenses.
  *	You may use the GPL open source license described below or you may acquire
  *	a commercial license from Mbedthis Software. You agree to be fully bound
  *	by the terms of either license. Consult the LICENSE.TXT distributed with
  *	this software for full details.
  *
  *	This software is open source; you can redistribute it and/or modify it
  *	under the terms of the GNU General Public License as published by the
  *	Free Software Foundation; either version 2 of the License, or (at your
  *	option) any later version. See the GNU General Public License for more
  *	details at: http://www.mbedthis.com/downloads/gplLicense.html
  *
  *	This program is distributed WITHOUT ANY WARRANTY; without even the
  *	implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  *
  *	This GPL license does NOT permit incorporating this software into
  *	proprietary programs. If you are unable to comply with the GPL, you must
  *	acquire a commercial license to use this software. Commercial licenses
  *	for this software and support services are available from Mbedthis
  *	Software at http://www.mbedthis.com
  *
  *	@end
  */
 /********************************** Includes **********************************/
 #include	"ejs.h"
 #if BLD_FEATURE_EJS
 /****************************** Forward Declarations **************************/
 static int 		getLexicalToken(Ejs *ep, int state);
 static int 		tokenAddChar(Ejs *ep, int c);
 static int 		inputGetc(Ejs *ep);
 static void		inputPutback(Ejs *ep, int c);
 static int		charConvert(Ejs *ep, int base, int maxDig);
 static void 	parseNumber(Ejs *ep, EjsType type);
 /************************************* Code ***********************************/
 /*
  *	Open a new input script
  */
 int ejsLexOpenScript(Ejs *ep, const char *script)
 {
 	EjsInput	*ip;
 	mprAssert(ep);
 	mprAssert(script);
 	if ((ip = mprAllocTypeZeroed(ep, EjsInput)) == NULL) {
 		return MPR_ERR_MEMORY;
 	}
 	ip->next = ep->input;
 	ep->input = ip;
 	ip->procName = ep->proc ? ep->proc->procName : NULL;
 	ip->fileName = ep->fileName ? ep->fileName : NULL;
 /*
  *	Create the parse token buffer and script buffer
  */
 	ip->tokServp = ip->tokbuf;
 	ip->tokEndp = ip->tokbuf;
 	ip->script = script;
 	ip->scriptSize = strlen(script);
 	ip->scriptServp = (char*) ip->script;
 	ip->lineNumber = 1;
 	ip->lineColumn = 0;
 	ip->putBackIndex = -1;
 	return 0;
 }
 /******************************************************************************/
 /*
  *	Close the input script
  */
 void ejsLexCloseScript(Ejs *ep)
 {
 	EjsInput	*ip;
 	mprAssert(ep);
 	ip = ep->input;
 	mprAssert(ip);
 	ep->input = ip->next;
 	mprFree(ip);
 }
 /******************************************************************************/
 /*
  *	Initialize an input state structure
  */
 int ejsInitInputState(EjsInput *ip)
 {
 	mprAssert(ip);
 	memset(ip, 0, sizeof(*ip));
 	ip->putBackIndex = -1;
 	return 0;
 }
 /******************************************************************************/
 /*
  *	Save the input state
  */
 void ejsLexSaveInputState(Ejs *ep, EjsInput *state)
 {
 	EjsInput	*ip;
 	int			i;
 	mprAssert(ep);
 	ip = ep->input;
 	mprAssert(ip);
 	*state = *ip;
 	for (i = 0; i <= ip->putBackIndex; i++) {
 		mprStrcpy(state->putBack[i].tokbuf, EJS_MAX_TOKEN,
 			ip->putBack[i].tokbuf);
 		state->putBack[i].tid = ip->putBack[i].tid;
 	}
 	mprStrcpy(state->line, sizeof(state->line), ip->line);
 	state->lineColumn = ip->lineColumn;
 	state->lineNumber = ip->lineNumber;
 }
 /******************************************************************************/
 /*
  *	Restore the input state
  */
 void ejsLexRestoreInputState(Ejs *ep, EjsInput *state)
 {
 	EjsInput	*ip;
 	EjsToken	*tp;
 	int			i;
 	mprAssert(ep);
 	mprAssert(state);
 	ip = ep->input;
 	mprAssert(ip);
 	mprStrcpy(ip->tokbuf, sizeof(ip->tokbuf), state->tokbuf);
 	ip->tokServp = state->tokServp;
 	ip->tokEndp = state->tokEndp;
 	ip->script = state->script;
 	ip->scriptServp = state->scriptServp;
 	ip->scriptSize = state->scriptSize;
 	ip->putBackIndex = state->putBackIndex;
 	for (i = 0; i <= ip->putBackIndex; i++) {
 		tp = &ip->putBack[i];
 		tp->tid = state->putBack[i].tid;
 		mprStrcpy(tp->tokbuf, sizeof(tp->tokbuf), state->putBack[i].tokbuf);
 	}
 	mprStrcpy(ip->line, sizeof(ip->line), state->line);
 	ip->lineColumn = state->lineColumn;
 	ip->lineNumber = state->lineNumber;
 }
 /******************************************************************************/
 /*
  *	Free a saved input state
  */
 void ejsLexFreeInputState(Ejs *ep, EjsInput *state)
 {
 	mprAssert(ep);
 	mprAssert(state);
 	state->putBackIndex = -1;
 	state->lineColumn = 0;
 }
 /******************************************************************************/
 /*
  *	Get the next EJS token
  */
 int ejsLexGetToken(Ejs *ep, int state)
 {
 	mprAssert(ep);
 	ep->tid = getLexicalToken(ep, state);
 	return ep->tid;
 }
 /******************************************************************************/
 /*
  *	Check for reserved words "if", "else", "var", "for", "delete", "function",
  *	"class", "extends", "public", "private", "protected", "try", "catch",
  *	"finally", "throw", "return", "get", "set", "this", "module", "each"
  *
  *	The "new" and "in" reserved words are handled below. The "true", "false",
  *	"null" "typeof" and "undefined" reserved words are handled as global
  *	objects.
  *
  *	Other reserved words not supported:
  *		"break", "case", "continue", "default", "do",
  *		"instanceof", "switch", "while", "with"
  *
  *	ECMA extensions reserved words (not supported):
  *		"abstract", "boolean", "byte", "char", "const",
  *		"debugger", "double", "enum", "export",
  *		"final", "float", "goto", "implements", "import", "int",
  *		"interface", "long", "native", "package",
  *		"short", "static", "super", "synchronized", "transient", "volatile"
  *
  *	FUTURE -- use a hash lookup
  */
 static int checkReservedWord(Ejs *ep, int state, int c, int tid)
 {
 	/*	FUTURE -- probably should return for all tokens != EJS_TOK_ID */
 	/*	FUTURE -- Should have a hash for this. MUCH faster. */
 	if (!isalpha(ep->token[0]) || tid == EJS_TOK_LITERAL) {
 		return tid;
 	}
 	if (state == EJS_STATE_STMT) {
 		/*	FUTURE OPT -- convert to hash lookup */
 		if (strcmp(ep->token, "if") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_IF;
 		} else if (strcmp(ep->token, "else") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_ELSE;
 		} else if (strcmp(ep->token, "var") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_VAR;
 		} else if (strcmp(ep->token, "new") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_NEW;
 		} else if (strcmp(ep->token, "for") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_FOR;
 		} else if (strcmp(ep->token, "delete") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_DELETE;
 		} else if (strcmp(ep->token, "function") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_FUNCTION;
 		} else if (strcmp(ep->token, "class") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_CLASS;
 		} else if (strcmp(ep->token, "module") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_MODULE;
 		} else if (strcmp(ep->token, "extends") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_EXTENDS;
 		} else if (strcmp(ep->token, "try") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_TRY;
 		} else if (strcmp(ep->token, "catch") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_CATCH;
 		} else if (strcmp(ep->token, "finally") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_FINALLY;
 		} else if (strcmp(ep->token, "throw") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_THROW;
 		} else if (strcmp(ep->token, "public") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_PUBLIC;
 		} else if (strcmp(ep->token, "protected") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_PROTECTED;
 		} else if (strcmp(ep->token, "private") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_PRIVATE;
 		} else if (strcmp(ep->token, "get") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_GET;
 		} else if (strcmp(ep->token, "set") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_SET;
 		} else if (strcmp(ep->token, "extends") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_EXTENDS;
 		} else if (strcmp(ep->token, "try") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_TRY;
 		} else if (strcmp(ep->token, "catch") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_CATCH;
 		} else if (strcmp(ep->token, "finally") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_FINALLY;
 		} else if (strcmp(ep->token, "throw") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_THROW;
 		} else if (strcmp(ep->token, "public") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_PUBLIC;
 		} else if (strcmp(ep->token, "protected") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_PROTECTED;
 		} else if (strcmp(ep->token, "private") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_PRIVATE;
 		} else if (strcmp(ep->token, "get") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_GET;
 		} else if (strcmp(ep->token, "set") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_SET;
 		} else if (strcmp(ep->token, "each") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_EACH;
 		} else if (strcmp(ep->token, "return") == 0) {
 			if ((c == ';') || (c == '(')) {
 				inputPutback(ep, c);
 			}
 			return EJS_TOK_RETURN;
 		}
 	} else if (state == EJS_STATE_EXPR) {
 		if (strcmp(ep->token, "new") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_NEW;
 		} else if (strcmp(ep->token, "in") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_IN;
 		} else if (strcmp(ep->token, "function") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_FUNCTION;
 		}
 	} else if (state == EJS_STATE_DEC) {
 		if (strcmp(ep->token, "extends") == 0) {
 			inputPutback(ep, c);
 			return EJS_TOK_EXTENDS;
 		}
 	}
 	return tid;
 }
 /******************************************************************************/
 /*
  *	Get the next EJS token
  */
 static int getLexicalToken(Ejs *ep, int state)
 {
 	EjsType		type;
 	EjsInput	*ip;
 	int			done, tid, c, quote, style, idx, isHex;
 	mprAssert(ep);
 	ip = ep->input;
 	mprAssert(ip);
 	ep->tid = -1;
 	tid = -1;
 	type = BLD_FEATURE_NUM_TYPE_ID;
 	isHex = 0;
 	/*
  	 *	Use a putback tokens first. Don't free strings as caller needs access.
 	 */
 	if (ip->putBackIndex >= 0) {
 		idx = ip->putBackIndex;
 		tid = ip->putBack[idx].tid;
 		ep->token = (char*) ip->putBack[idx].tokbuf;
 		tid = checkReservedWord(ep, state, 0, tid);
 		ip->putBackIndex--;
 		return tid;
 	}
 	ep->token = ip->tokServp = ip->tokEndp = ip->tokbuf;
 	*ip->tokServp = '\0';
 	if ((c = inputGetc(ep)) < 0) {
 		return EJS_TOK_EOF;
 	}
 	/*
  	 *	Main lexical analyser
 	 */
 	for (done = 0; !done; ) {
 		switch (c) {
 		case -1:
 			return EJS_TOK_EOF;
 		case ' ':
 		case '\t':
 		case '\r':
 			do {
 				if ((c = inputGetc(ep)) < 0)
 					break;
 			} while (c == ' ' || c == '\t' || c == '\r');
 			break;
 		case '\n':
 			return EJS_TOK_NEWLINE;
 		case '(':
 			tokenAddChar(ep, c);
 			return EJS_TOK_LPAREN;
 		case ')':
 			tokenAddChar(ep, c);
 			return EJS_TOK_RPAREN;
 		case '[':
 			tokenAddChar(ep, c);
 			return EJS_TOK_LBRACKET;
 		case ']':
 			tokenAddChar(ep, c);
 			return EJS_TOK_RBRACKET;
 		case '.':
 			tokenAddChar(ep, c);
 			return EJS_TOK_PERIOD;
 		case '{':
 			tokenAddChar(ep, c);
 			return EJS_TOK_LBRACE;
 		case '}':
 			tokenAddChar(ep, c);
 			return EJS_TOK_RBRACE;
 		case '+':
 			if ((c = inputGetc(ep)) < 0) {
 				ejsSyntaxError(ep, 0);
 				return EJS_TOK_ERR;
 			}
 			if (c != '+' ) {
 				inputPutback(ep, c);
 				tokenAddChar(ep, EJS_EXPR_PLUS);
 				return EJS_TOK_EXPR;
 			}
 			tokenAddChar(ep, EJS_EXPR_INC);
 			return EJS_TOK_INC_DEC;
 		case '-':
 			if ((c = inputGetc(ep)) < 0) {
 				ejsSyntaxError(ep, 0);
 				return EJS_TOK_ERR;
 			}
 			if (c != '-' ) {
 				inputPutback(ep, c);
 				tokenAddChar(ep, EJS_EXPR_MINUS);
 				return EJS_TOK_EXPR;
 			}
 			tokenAddChar(ep, EJS_EXPR_DEC);
 			return EJS_TOK_INC_DEC;
 		case '*':
 			tokenAddChar(ep, EJS_EXPR_MUL);
 			return EJS_TOK_EXPR;
 		case '%':
 			tokenAddChar(ep, EJS_EXPR_MOD);
 			return EJS_TOK_EXPR;
 		case '/':
 			/*
 			 *	Handle the division operator and comments
 			 */
 			if ((c = inputGetc(ep)) < 0) {
 				ejsSyntaxError(ep, 0);
 				return EJS_TOK_ERR;
 			}
 			if (c != '*' && c != '/') {
 				inputPutback(ep, c);
 				tokenAddChar(ep, EJS_EXPR_DIV);
 				return EJS_TOK_EXPR;
 			}
 			style = c;
 			/*
 			 *	Eat comments. Both C and C++ comment styles are supported.
 			 */
 			while (1) {
 				if ((c = inputGetc(ep)) < 0) {
 					if (style == '/') {
 						return EJS_TOK_EOF;
 					}
 					ejsSyntaxError(ep, 0);
 					return EJS_TOK_ERR;
 				}
 				if (c == '\n' && style == '/') {
 					break;
 				} else if (c == '*') {
 					c = inputGetc(ep);
 					if (style == '/') {
 						if (c == '\n') {
 							break;
 						}
 					} else {
 						if (c == '/') {
 							break;
 						}
 					}
 				}
 			}
 			/*
 			 *	Continue looking for a token, so get the next character
 			 */
 			if ((c = inputGetc(ep)) < 0) {
 				return EJS_TOK_EOF;
 			}
 			break;
 		case '<':									/* < and <= */
 			if ((c = inputGetc(ep)) < 0) {
 				ejsSyntaxError(ep, 0);
 				return EJS_TOK_ERR;
 			}
 			if (c == '<') {
 				tokenAddChar(ep, EJS_EXPR_LSHIFT);
 				return EJS_TOK_EXPR;
 			} else if (c == '=') {
 				tokenAddChar(ep, EJS_EXPR_LESSEQ);
 				return EJS_TOK_EXPR;
 			}
 			tokenAddChar(ep, EJS_EXPR_LESS);
 			inputPutback(ep, c);
 			return EJS_TOK_EXPR;
 		case '>':									/* > and >= */
 			if ((c = inputGetc(ep)) < 0) {
 				ejsSyntaxError(ep, 0);
 				return EJS_TOK_ERR;
 			}
 			if (c == '>') {
 				tokenAddChar(ep, EJS_EXPR_RSHIFT);
 				return EJS_TOK_EXPR;
 			} else if (c == '=') {
 				tokenAddChar(ep, EJS_EXPR_GREATEREQ);
 				return EJS_TOK_EXPR;
 			}
 			tokenAddChar(ep, EJS_EXPR_GREATER);
 			inputPutback(ep, c);
 			return EJS_TOK_EXPR;
 		case '=':									/* "==" */
 			if ((c = inputGetc(ep)) < 0) {
 				ejsSyntaxError(ep, 0);
 				return EJS_TOK_ERR;
 			}
 			if (c == '=') {
 				tokenAddChar(ep, EJS_EXPR_EQ);
 				return EJS_TOK_EXPR;
 			}
 			inputPutback(ep, c);
 			return EJS_TOK_ASSIGNMENT;
 		case '!':									/* "!=" or "!"*/
 			if ((c = inputGetc(ep)) < 0) {
 				ejsSyntaxError(ep, 0);
 				return EJS_TOK_ERR;
 			}
 			if (c == '=') {
 				tokenAddChar(ep, EJS_EXPR_NOTEQ);
 				return EJS_TOK_EXPR;
 			}
 			inputPutback(ep, c);
 			tokenAddChar(ep, EJS_EXPR_BOOL_COMP);
 			return EJS_TOK_EXPR;
 		case ';':
 			tokenAddChar(ep, c);
 			return EJS_TOK_SEMI;
 		case ',':
 			tokenAddChar(ep, c);
 			return EJS_TOK_COMMA;
 		case ':':
 			tokenAddChar(ep, c);
 			return EJS_TOK_COLON;
 		case '|':									/* "||" */
 			if ((c = inputGetc(ep)) < 0 || c != '|') {
 				ejsSyntaxError(ep, 0);
 				return EJS_TOK_ERR;
 			}
 			tokenAddChar(ep, EJS_COND_OR);
 			return EJS_TOK_LOGICAL;
 		case '&':									/* "&&" */
 			if ((c = inputGetc(ep)) < 0 || c != '&') {
 				ejsSyntaxError(ep, 0);
 				return EJS_TOK_ERR;
 			}
 			tokenAddChar(ep, EJS_COND_AND);
 			return EJS_TOK_LOGICAL;
 		case '\"':									/* String quote */
 		case '\'':
 			quote = c;
 			if ((c = inputGetc(ep)) < 0) {
 				ejsSyntaxError(ep, 0);
 				return EJS_TOK_ERR;
 			}
 			while (c != quote) {
 				/*
 				 *	Check for escape sequence characters
 				 */
 				if (c == '\\') {
 					c = inputGetc(ep);
 					if (isdigit(c)) {
 						/*
 						 *	Octal support, \101 maps to 65 = 'A'. Put first
 						 *	char back so converter will work properly.
 						 */
 						inputPutback(ep, c);
 						c = charConvert(ep, 8, 3);
 					} else {
 						switch (c) {
 						case 'n':
 							c = '\n'; break;
 						case 'b':
 							c = '\b'; break;
 						case 'f':
 							c = '\f'; break;
 						case 'r':
 							c = '\r'; break;
 						case 't':
 							c = '\t'; break;
 						case 'x':
 							/*
 							 *	Hex support, \x41 maps to 65 = 'A'
 							 */
 							c = charConvert(ep, 16, 2);
 							break;
 						case 'u':
 							/*
 							 *	Unicode support, \x0401 maps to 65 = 'A'
 							 */
 							c = charConvert(ep, 16, 2);
 							c = c*16 + charConvert(ep, 16, 2);
 							break;
 						case '\'':
 						case '\"':
 						case '\\':
 							break;
 						default:
 							if (tokenAddChar(ep, '\\') < 0) {
 								return EJS_TOK_ERR;
 							}
 						}
 					}
 					if (tokenAddChar(ep, c) < 0) {
 						return EJS_TOK_ERR;
 					}
 				} else {
 					if (tokenAddChar(ep, c) < 0) {
 						return EJS_TOK_ERR;
 					}
 				}
 				if ((c = inputGetc(ep)) < 0) {
 					ejsSyntaxError(ep, "Unmatched Quote");
 					return EJS_TOK_ERR;
 				}
 			}
 			return EJS_TOK_LITERAL;
 		case '0':
 			if (tokenAddChar(ep, c) < 0) {
 				return EJS_TOK_ERR;
 			}
 			if ((c = inputGetc(ep)) < 0) {
 				break;
 			}
 			if (tolower(c) == 'x') {
 				if (tokenAddChar(ep, c) < 0) {
 					return EJS_TOK_ERR;
 				}
 				if ((c = inputGetc(ep)) < 0) {
 					break;
 				}
 				isHex = 1;
 				if (! isxdigit(c)) {
 					parseNumber(ep, type);
 					inputPutback(ep, c);
 					return EJS_TOK_NUMBER;
 				}
 			} else if (! isdigit(c)) {
 #if BLD_FEATURE_FLOATING_POINT
 				if (c == '.' || tolower(c) == 'e' || c == '+' || c == '-') {
 					/* Fall through */
 					type = EJS_TYPE_FLOAT;
 				} else
 #endif
 				{
 					parseNumber(ep, type);
 					inputPutback(ep, c);
 					return EJS_TOK_NUMBER;
 				}
 			}
 			/* Fall through to get more digits */
 		case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
 			if (isHex) {
 				do {
 					if (tokenAddChar(ep, c) < 0) {
 						return EJS_TOK_ERR;
 					}
 					if ((c = inputGetc(ep)) < 0) {
 						break;
 					}
 				} while (isxdigit(c));
 			} else {
 #if BLD_FEATURE_FLOATING_POINT
 				do {
 					if (tokenAddChar(ep, c) < 0) {
 						return EJS_TOK_ERR;
 					}
 					if ((c = inputGetc(ep)) < 0) {
 						break;
 					}
 					c = tolower(c);
 					if (c == '.' || c == 'e' || c == 'f') {
 						type = EJS_TYPE_FLOAT;
 					}
 				} while (isdigit(c) || c == '.' || c == 'e' ||
 						c == 'f' ||
 					((type == EJS_TYPE_FLOAT) && (c == '+' || c == '-')));
 #else
 				do {
 					if (tokenAddChar(ep, c) < 0) {
 						return EJS_TOK_ERR;
 					}
 					if ((c = inputGetc(ep)) < 0) {
 						break;
 					}
 				} while (isdigit(c));
 #endif
 			}
 			parseNumber(ep, type);
 			inputPutback(ep, c);
 			return EJS_TOK_NUMBER;
 		default:
 			/*
 			 *	Identifiers or a function names
 			 */
 			while (1) {
 				if (c == '\\') {
 					if ((c = inputGetc(ep)) < 0) {
 						break;
 					}
 					if (c == '\n' || c == '\r') {
 						break;
 					}
 				} else if (tokenAddChar(ep, c) < 0) {
 						break;
 				}
 				if ((c = inputGetc(ep)) < 0) {
 					break;
 				}
 				if (!isalnum(c) && c != '$' && c != '_' &&
 						c != '\\' && c != '@') {
 					break;
 				}
 			}
 			if (*ep->token == '\0') {
 				c = inputGetc(ep);
 				break;
 			}
 			if (! isalpha((int) *ep->token) && *ep->token != '$' &&
 					*ep->token != '_' && *ep->token != '@') {
 				ejsError(ep, EJS_SYNTAX_ERROR, "Invalid identifier %s",
 					ep->token);
 				return EJS_TOK_ERR;
 			}
 			tid = checkReservedWord(ep, state, c, EJS_TOK_ID);
 			if (tid != EJS_TOK_ID) {
 				return tid;
 			}
 			/*
 			 *	Skip white space after token to find out whether this is
 			 * 	a function or not.
 			 */
 			while (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
 				if ((c = inputGetc(ep)) < 0)
 					break;
 			}
 			tid = EJS_TOK_ID;
 			if ((strlen(ep->token) + 1) >= EJS_MAX_ID) {
 				ejsError(ep, EJS_SYNTAX_ERROR,
 					"Identifier too big. Max is %d letters.", EJS_MAX_ID);
 				return EJS_TOK_ERR;
 			}
 			done++;
 		}
 	}
 	/*
 	 *	Putback the last extra character for next time
 	 */
 	inputPutback(ep, c);
 	return tid;
 }
 /******************************************************************************/
 static void parseNumber(Ejs *ep, EjsType type)
 {
 	switch (type) {
 	case EJS_TYPE_INT:
 		ep->tokenNumber.integer = ejsParseInteger(ep->token);
 		ep->tokenNumber.type = type;
 		break;
 #if BLD_FEATURE_FLOATING_POINT
 	case EJS_TYPE_FLOAT:
 		ep->tokenNumber.floating = atof(ep->token);
 		ep->tokenNumber.type = type;
 		break;
 #endif
 #if BLD_FEATURE_INT64
 	case EJS_TYPE_INT64:
 		ep->tokenNumber.integer64 = ejsParseInteger64(ep->token);
 		ep->tokenNumber.type = type;
 		break;
 #endif
 	}
 }
 /******************************************************************************/
 /*
  *	Convert a hex or octal character back to binary, return original char if
  *	not a hex digit
  */
 static int charConvert(Ejs *ep, int base, int maxDig)
 {
 	int		i, c, lval, convChar;
 	lval = 0;
 	for (i = 0; i < maxDig; i++) {
 		if ((c = inputGetc(ep)) < 0) {
 			break;
 		}
 		/*
 		 *	Initialize to out of range value
 		 */
 		convChar = base;
 		if (isdigit(c)) {
 			convChar = c - '0';
 		} else if (c >= 'a' && c <= 'f') {
 			convChar = c - 'a' + 10;
 		} else if (c >= 'A' && c <= 'F') {
 			convChar = c - 'A' + 10;
 		}
 		/*
 		 *	If unexpected character then return it to buffer.
 		 */
 		if (convChar >= base) {
 			inputPutback(ep, c);
 			break;
 		}
 		lval = (lval * base) + convChar;
 	}
 	return lval;
 }
 /******************************************************************************/
 /*
  *	Putback the last token read. Accept at most one push back token.
  */
 void ejsLexPutbackToken(Ejs *ep, int tid, char *string)
 {
 	EjsInput	*ip;
 	EjsToken	*tp;
 	int			idx;
 	mprAssert(ep);
 	ip = ep->input;
 	mprAssert(ip);
 	ip->putBackIndex += 1;
 	mprAssert(ip->putBackIndex < EJS_TOKEN_STACK);
 	idx = ip->putBackIndex;
 	tp = &ip->putBack[idx];
 	tp->tid = tid;
 	mprStrcpy(tp->tokbuf, sizeof(tp->tokbuf), string);
 }
 /******************************************************************************/
 /*
  *	Add a character to the token buffer
  */
 static int tokenAddChar(Ejs *ep, int c)
 {
 	EjsInput	*ip;
 	mprAssert(ep);
 	ip = ep->input;
 	mprAssert(ip);
 	if (ip->tokEndp >= &ip->tokbuf[sizeof(ip->tokbuf) - 1]) {
 		ejsSyntaxError(ep, "Token too big");
 		return -1;
 	}
 	*ip->tokEndp++ = c;
 	*ip->tokEndp = '\0';
 	return 0;
 }
 /******************************************************************************/
 /*
  *	Get another input character
  */
 static int inputGetc(Ejs *ep)
 {
 	EjsInput	*ip;
 	int			c;
 	mprAssert(ep);
 	ip = ep->input;
 	if (ip->scriptSize <= 0) {
 		return -1;
 	}
 	c = (uchar) (*ip->scriptServp++);
 	ip->scriptSize--;
 	/*
 	 *	For debugging, accumulate the line number and the currenly parsed line
 	 */
 	if (c == '\n') {
 #if 0 && BLD_DEBUG
 		if (ip->lineColumn > 0) {
 			printf("PARSED: %s\n", ip->line);
 		}
 #endif
 		ip->lineNumber++;
 		ip->lineColumn = 0;
 	} else if ((ip->lineColumn + 2) < sizeof(ip->line)) {
 		ip->line[ip->lineColumn++] = c;
 		ip->line[ip->lineColumn] = '\0';
 	}
 	return c;
 }
 /******************************************************************************/
 /*
  *	Putback a character onto the input queue
  */
 static void inputPutback(Ejs *ep, int c)
 {
 	EjsInput	*ip;
 	mprAssert(ep);
 	if (c > 0) {
 		ip = ep->input;
 		*--ip->scriptServp = c;
 		ip->scriptSize++;
 		if (--(ip->lineColumn) < 0) {
 			ip->lineColumn = 0;
 		}
 		mprAssert(ip->line);
 		mprAssert(ip->lineColumn >= 0);
 		mprAssert(ip->lineColumn < sizeof(ip->line));
 		ip->line[ip->lineColumn] = '\0';
 	}
 }
 /******************************************************************************/
 #else
 void ejsLexDummy() {}
 /******************************************************************************/
 #endif /* BLD_FEATURE_EJS */
 /*
  * Local variables:
  * tab-width: 4
  * c-basic-offset: 4
  * End:
  * vim:tw=78
  * vim600: sw=4 ts=4 fdm=marker
  * vim<600: sw=4 ts=4
  */

1034 lines 23 KiB C Raw Blame History

1034 lines

23 KiB

C

Raw Blame History