diff options
author | Lua Team <team@lua.org> | 2010-01-08 12:00:00 +0000 |
---|---|---|
committer | repogen <> | 2010-01-08 12:00:00 +0000 |
commit | 22912c77c80f8de8f7accd3319c726f7c5349fd3 (patch) | |
tree | caf064ecca31cd2ef1c919c585ee6b3d5e6d25d6 /src/llex.c | |
parent | 300cd56eb905be061aa75bb665549b3b85109bbe (diff) | |
download | lua-github-5.2.0-work1.tar.gz |
Lua 5.2.0-work15.2.0-work1
Diffstat (limited to 'src/llex.c')
-rw-r--r-- | src/llex.c | 191 |
1 files changed, 109 insertions, 82 deletions
@@ -1,11 +1,10 @@ /* -** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $ +** $Id: llex.c,v 2.34 2009/11/17 16:33:38 roberto Exp $ ** Lexical Analyzer ** See Copyright Notice in lua.h */ -#include <ctype.h> #include <locale.h> #include <string.h> @@ -14,6 +13,7 @@ #include "lua.h" +#include "lctype.h" #include "ldo.h" #include "llex.h" #include "lobject.h" @@ -29,35 +29,36 @@ - #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') /* ORDER RESERVED */ -const char *const luaX_tokens [] = { +static const char *const luaX_tokens [] = { "and", "break", "do", "else", "elseif", "end", "false", "for", "function", "if", "in", "local", "nil", "not", "or", "repeat", "return", "then", "true", "until", "while", - "..", "...", "==", ">=", "<=", "~=", - "<number>", "<name>", "<string>", "<eof>", - NULL + "..", "...", "==", ">=", "<=", "~=", "<eof>", + "<number>", "<name>", "<string>" }; #define save_and_next(ls) (save(ls, ls->current), next(ls)) +static void lexerror (LexState *ls, const char *msg, int token); + + static void save (LexState *ls, int c) { Mbuffer *b = ls->buff; - if (b->n + 1 > b->buffsize) { + if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) { size_t newsize; - if (b->buffsize >= MAX_SIZET/2) - luaX_lexerror(ls, "lexical element too long", 0); - newsize = b->buffsize * 2; + if (luaZ_sizebuffer(b) >= MAX_SIZET/2) + lexerror(ls, "lexical element too long", 0); + newsize = luaZ_sizebuffer(b) * 2; luaZ_resizebuffer(ls->L, b, newsize); } - b->buffer[b->n++] = cast(char, c); + b->buffer[luaZ_bufflen(b)++] = cast(char, c); } @@ -72,17 +73,19 @@ void luaX_init (lua_State *L) { } -#define MAXSRC 80 - - const char *luaX_token2str (LexState *ls, int token) { if (token < FIRST_RESERVED) { lua_assert(token == cast(unsigned char, token)); - return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) : - luaO_pushfstring(ls->L, "%c", token); + return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) : + luaO_pushfstring(ls->L, "char(%d)", token); + } + else { + const char *s = luaX_tokens[token - FIRST_RESERVED]; + if (token < TK_EOS) + return luaO_pushfstring(ls->L, LUA_QS, s); + else + return s; } - else - return luaX_tokens[token-FIRST_RESERVED]; } @@ -92,34 +95,39 @@ static const char *txtToken (LexState *ls, int token) { case TK_STRING: case TK_NUMBER: save(ls, '\0'); - return luaZ_buffer(ls->buff); + return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff)); default: return luaX_token2str(ls, token); } } -void luaX_lexerror (LexState *ls, const char *msg, int token) { - char buff[MAXSRC]; - luaO_chunkid(buff, getstr(ls->source), MAXSRC); +static void lexerror (LexState *ls, const char *msg, int token) { + char buff[LUA_IDSIZE]; + luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE); msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); if (token) - luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token)); + luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token)); luaD_throw(ls->L, LUA_ERRSYNTAX); } void luaX_syntaxerror (LexState *ls, const char *msg) { - luaX_lexerror(ls, msg, ls->t.token); + lexerror(ls, msg, ls->t.token); } TString *luaX_newstring (LexState *ls, const char *str, size_t l) { lua_State *L = ls->L; + TValue *o; /* entry for `str' */ TString *ts = luaS_newlstr(L, str, l); - TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */ - if (ttisnil(o)) + setsvalue2s(L, L->top++, ts); /* anchor string */ + o = luaH_setstr(L, ls->fs->h, ts); + if (ttisnil(o)) { setbvalue(o, 1); /* make sure `str' will not be collected */ + luaC_checkGC(L); + } + L->top--; return ts; } @@ -159,7 +167,7 @@ void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { static int check_next (LexState *ls, const char *set) { - if (!strchr(set, ls->current)) + if (ls->current == '\0' || !strchr(set, ls->current)) return 0; save_and_next(ls); return 1; @@ -174,29 +182,32 @@ static void buffreplace (LexState *ls, char from, char to) { } +#if !defined(getlocaledecpoint) +#define getlocaledecpoint() (localeconv()->decimal_point[0]) +#endif + static void trydecpoint (LexState *ls, SemInfo *seminfo) { /* format error: try to update decimal point separator */ - struct lconv *cv = localeconv(); char old = ls->decpoint; - ls->decpoint = (cv ? cv->decimal_point[0] : '.'); + ls->decpoint = getlocaledecpoint(); buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */ if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) { /* format error with correct decimal point: no more options */ buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ - luaX_lexerror(ls, "malformed number", TK_NUMBER); + lexerror(ls, "malformed number", TK_NUMBER); } } /* LUA_NUMBER */ static void read_numeral (LexState *ls, SemInfo *seminfo) { - lua_assert(isdigit(ls->current)); + lua_assert(lisdigit(ls->current)); do { save_and_next(ls); - } while (isdigit(ls->current) || ls->current == '.'); + } while (lisdigit(ls->current) || ls->current == '.'); if (check_next(ls, "Ee")) /* `E'? */ check_next(ls, "+-"); /* optional exponent sign */ - while (isalnum(ls->current) || ls->current == '_') + while (lislalnum(ls->current)) save_and_next(ls); save(ls, '\0'); buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ @@ -219,37 +230,18 @@ static int skip_sep (LexState *ls) { static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { - int cont = 0; - (void)(cont); /* avoid warnings when `cont' is not used */ save_and_next(ls); /* skip 2nd `[' */ if (currIsNewline(ls)) /* string starts with a newline? */ inclinenumber(ls); /* skip it */ for (;;) { switch (ls->current) { case EOZ: - luaX_lexerror(ls, (seminfo) ? "unfinished long string" : - "unfinished long comment", TK_EOS); + lexerror(ls, (seminfo) ? "unfinished long string" : + "unfinished long comment", TK_EOS); break; /* to avoid warnings */ -#if defined(LUA_COMPAT_LSTR) - case '[': { - if (skip_sep(ls) == sep) { - save_and_next(ls); /* skip 2nd `[' */ - cont++; -#if LUA_COMPAT_LSTR == 1 - if (sep == 0) - luaX_lexerror(ls, "nesting of [[...]] is deprecated", '['); -#endif - } - break; - } -#endif case ']': { if (skip_sep(ls) == sep) { save_and_next(ls); /* skip 2nd `]' */ -#if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2 - cont--; - if (sep == 0 && cont >= 0) break; -#endif goto endloop; } break; @@ -273,19 +265,61 @@ static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { } +static int hexavalue (int c) { + if (lisdigit(c)) return c - '0'; + else if (lisupper(c)) return c - 'A' + 10; + else return c - 'a' + 10; +} + + +static int readhexaesc (LexState *ls) { + int c1, c2 = EOZ; + if (!lisxdigit(c1 = next(ls)) || !lisxdigit(c2 = next(ls))) { + luaZ_resetbuffer(ls->buff); /* prepare error message */ + save(ls, '\\'); save(ls, 'x'); + if (c1 != EOZ) save(ls, c1); + if (c2 != EOZ) save(ls, c2); + lexerror(ls, "hexadecimal digit expected", TK_STRING); + } + return (hexavalue(c1) << 4) + hexavalue(c2); +} + + +static int readdecesc (LexState *ls) { + int c1 = ls->current, c2, c3; + int c = c1 - '0'; + if (lisdigit(c2 = next(ls))) { + c = 10*c + c2 - '0'; + if (lisdigit(c3 = next(ls))) { + c = 10*c + c3 - '0'; + if (c > UCHAR_MAX) { + luaZ_resetbuffer(ls->buff); /* prepare error message */ + save(ls, '\\'); + save(ls, c1); save(ls, c2); save(ls, c3); + lexerror(ls, "decimal escape too large", TK_STRING); + } + return c; + } + } + /* else, has read one character that was not a digit */ + zungetc(ls->z); /* return it to input stream */ + return c; +} + + static void read_string (LexState *ls, int del, SemInfo *seminfo) { save_and_next(ls); while (ls->current != del) { switch (ls->current) { case EOZ: - luaX_lexerror(ls, "unfinished string", TK_EOS); + lexerror(ls, "unfinished string", TK_EOS); continue; /* to avoid warnings */ case '\n': case '\r': - luaX_lexerror(ls, "unfinished string", TK_STRING); + lexerror(ls, "unfinished string", TK_STRING); continue; /* to avoid warnings */ - case '\\': { - int c; + case '\\': { /* escape sequences */ + int c; /* final character to be saved */ next(ls); /* do not save the `\' */ switch (ls->current) { case 'a': c = '\a'; break; @@ -295,28 +329,20 @@ static void read_string (LexState *ls, int del, SemInfo *seminfo) { case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; - case '\n': /* go through */ + case 'x': c = readhexaesc(ls); break; + case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; case EOZ: continue; /* will raise an error next loop */ default: { - if (!isdigit(ls->current)) - save_and_next(ls); /* handles \\, \", \', and \? */ - else { /* \xxx */ - int i = 0; - c = 0; - do { - c = 10*c + (ls->current-'0'); - next(ls); - } while (++i<3 && isdigit(ls->current)); - if (c > UCHAR_MAX) - luaX_lexerror(ls, "escape sequence too large", TK_STRING); - save(ls, c); - } - continue; + if (!lisdigit(ls->current)) + c = ls->current; /* handles \\, \", \', and \? */ + else /* digital escape \ddd */ + c = readdecesc(ls); + break; } } - save(ls, c); next(ls); + save(ls, c); continue; } default: @@ -364,7 +390,7 @@ static int llex (LexState *ls, SemInfo *seminfo) { return TK_STRING; } else if (sep == -1) return '['; - else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); + else lexerror(ls, "invalid long string delimiter", TK_STRING); } case '=': { next(ls); @@ -398,7 +424,7 @@ static int llex (LexState *ls, SemInfo *seminfo) { return TK_DOTS; /* ... */ else return TK_CONCAT; /* .. */ } - else if (!isdigit(ls->current)) return '.'; + else if (!lisdigit(ls->current)) return '.'; else { read_numeral(ls, seminfo); return TK_NUMBER; @@ -408,21 +434,21 @@ static int llex (LexState *ls, SemInfo *seminfo) { return TK_EOS; } default: { - if (isspace(ls->current)) { + if (lisspace(ls->current)) { lua_assert(!currIsNewline(ls)); next(ls); continue; } - else if (isdigit(ls->current)) { + else if (lisdigit(ls->current)) { read_numeral(ls, seminfo); return TK_NUMBER; } - else if (isalpha(ls->current) || ls->current == '_') { + else if (lislalpha(ls->current)) { /* identifier or reserved word */ TString *ts; do { save_and_next(ls); - } while (isalnum(ls->current) || ls->current == '_'); + } while (lislalnum(ls->current)); ts = luaX_newstring(ls, luaZ_buffer(ls->buff), luaZ_bufflen(ls->buff)); if (ts->tsv.reserved > 0) /* reserved word? */ @@ -454,8 +480,9 @@ void luaX_next (LexState *ls) { } -void luaX_lookahead (LexState *ls) { +int luaX_lookahead (LexState *ls) { lua_assert(ls->lookahead.token == TK_EOS); ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); + return ls->lookahead.token; } |