diff options
author | Lua Team <team@lua.org> | 2004-03-24 12:00:00 +0000 |
---|---|---|
committer | repogen <> | 2004-03-24 12:00:00 +0000 |
commit | ced7bbbe7a257ce6de94069d5dbf6672aeafd4d9 (patch) | |
tree | 2a01a79e6a4f451dccd247c70310ad957204cefa /src/llex.c | |
parent | e7731a8fb8a317aa5c444ef073bfad82fa5baa54 (diff) | |
download | lua-github-5.1-work0.tar.gz |
Lua 5.1-work05.1-work0
Diffstat (limited to 'src/llex.c')
-rw-r--r-- | src/llex.c | 449 |
1 files changed, 220 insertions, 229 deletions
@@ -1,5 +1,5 @@ /* -** $Id: llex.c,v 1.119 2003/03/24 12:39:34 roberto Exp $ +** $Id: llex.c,v 2.2 2004/03/12 19:53:56 roberto Exp $ ** Lexical Analyzer ** See Copyright Notice in lua.h */ @@ -22,9 +22,19 @@ -#define next(LS) (LS->current = zgetc(LS->z)) +#define next(ls) (ls->current = zgetc(ls->z)) +#define save(ls,c) { \ + Mbuffer *b = ls->buff; \ + if (b->n + 1 > b->buffsize) \ + luaZ_resizebuffer(ls->L, b, ((b->buffsize*2) + LUA_MINBUFFER)); \ + b->buffer[b->n++] = cast(char, c); } + + + +#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') + /* ORDER RESERVED */ static const char *const token2string [] = { @@ -51,85 +61,75 @@ void luaX_init (lua_State *L) { #define MAXSRC 80 -void luaX_checklimit (LexState *ls, int val, int limit, const char *msg) { - if (val > limit) { - msg = luaO_pushfstring(ls->L, "too many %s (limit=%d)", msg, limit); - luaX_syntaxerror(ls, msg); +const char *luaX_token2str (LexState *ls, int token) { + if (token < FIRST_RESERVED) { + lua_assert(token == (unsigned char)token); + return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) : + luaO_pushfstring(ls->L, "%c", token); } + else + return token2string[token-FIRST_RESERVED]; } -void luaX_errorline (LexState *ls, const char *s, const char *token, int line) { - lua_State *L = ls->L; - char buff[MAXSRC]; - luaO_chunkid(buff, getstr(ls->source), MAXSRC); - luaO_pushfstring(L, "%s:%d: %s near `%s'", buff, line, s, token); - luaD_throw(L, LUA_ERRSYNTAX); -} - - -static void luaX_error (LexState *ls, const char *s, const char *token) { - luaX_errorline(ls, s, token, ls->linenumber); -} - - -void luaX_syntaxerror (LexState *ls, const char *msg) { - const char *lasttoken; - switch (ls->t.token) { +static const char *txtToken (LexState *ls, int token) { + switch (token) { case TK_NAME: - lasttoken = getstr(ls->t.seminfo.ts); - break; case TK_STRING: case TK_NUMBER: - lasttoken = luaZ_buffer(ls->buff); - break; + save(ls, '\0'); + return luaZ_buffer(ls->buff); default: - lasttoken = luaX_token2str(ls, ls->t.token); - break; + return luaX_token2str(ls, token); } - luaX_error(ls, msg, lasttoken); } -const char *luaX_token2str (LexState *ls, int token) { - if (token < FIRST_RESERVED) { - lua_assert(token == (unsigned char)token); - return luaO_pushfstring(ls->L, "%c", token); - } - else - return token2string[token-FIRST_RESERVED]; +void luaX_lexerror (LexState *ls, const char *msg, int token) { + char buff[MAXSRC]; + luaO_chunkid(buff, getstr(ls->source), MAXSRC); + msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); + if (token) + luaO_pushfstring(ls->L, "%s near `%s'", msg, txtToken(ls, token)); + luaD_throw(ls->L, LUA_ERRSYNTAX); } -static void luaX_lexerror (LexState *ls, const char *s, int token) { - if (token == TK_EOS) - luaX_error(ls, s, luaX_token2str(ls, token)); - else - luaX_error(ls, s, luaZ_buffer(ls->buff)); +void luaX_syntaxerror (LexState *ls, const char *msg) { + luaX_lexerror(ls, msg, ls->t.token); } -static void inclinenumber (LexState *LS) { - next(LS); /* skip `\n' */ - ++LS->linenumber; - luaX_checklimit(LS, LS->linenumber, MAX_INT, "lines in a chunk"); +TString *luaX_newstring (LexState *ls, const char *str, size_t l) { + lua_State *L = ls->L; + TString *ts = luaS_newlstr(L, str, l); + TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */ + if (ttisnil(o)) + setbvalue(o, 1); /* make sure `str' will not be collected */ + return ts; } -void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) { - LS->L = L; - LS->lookahead.token = TK_EOS; /* no look-ahead token */ - LS->z = z; - LS->fs = NULL; - LS->linenumber = 1; - LS->lastline = 1; - LS->source = source; - next(LS); /* read first char */ - if (LS->current == '#') { - do { /* skip first line */ - next(LS); - } while (LS->current != '\n' && LS->current != EOZ); - } +static void inclinenumber (LexState *ls) { + int old = ls->current; + lua_assert(currIsNewline(ls)); + next(ls); /* skip `\n' or `\r' */ + if (currIsNewline(ls) && ls->current != old) + next(ls); /* skip `\n\r' or `\r\n' */ + if (++ls->linenumber >= MAX_INT) + luaX_syntaxerror(ls, "chunk has too many lines"); +} + + +void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { + ls->L = L; + ls->lookahead.token = TK_EOS; /* no look-ahead token */ + ls->z = z; + ls->fs = NULL; + ls->linenumber = 1; + ls->lastline = 1; + ls->source = source; + next(ls); /* read first char */ } @@ -141,242 +141,229 @@ void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) { */ -/* use buffer to store names, literal strings and numbers */ -/* extra space to allocate when growing buffer */ -#define EXTRABUFF 32 - -/* maximum number of chars that can be read without checking buffer size */ -#define MAXNOCHECK 5 - -#define checkbuffer(LS, len) \ - if (((len)+MAXNOCHECK)*sizeof(char) > luaZ_sizebuffer((LS)->buff)) \ - luaZ_openspace((LS)->L, (LS)->buff, (len)+EXTRABUFF) - -#define save(LS, c, l) \ - (luaZ_buffer((LS)->buff)[l++] = cast(char, c)) -#define save_and_next(LS, l) (save(LS, LS->current, l), next(LS)) - - -static size_t readname (LexState *LS) { - size_t l = 0; - checkbuffer(LS, l); - do { - checkbuffer(LS, l); - save_and_next(LS, l); - } while (isalnum(LS->current) || LS->current == '_'); - save(LS, '\0', l); - return l-1; +static void save_and_next (LexState *ls) { + save(ls, ls->current); + next(ls); } + /* LUA_NUMBER */ -static void read_numeral (LexState *LS, int comma, SemInfo *seminfo) { - size_t l = 0; - checkbuffer(LS, l); - if (comma) save(LS, '.', l); - while (isdigit(LS->current)) { - checkbuffer(LS, l); - save_and_next(LS, l); +static void read_numeral (LexState *ls, SemInfo *seminfo) { + while (isdigit(ls->current)) { + save_and_next(ls); } - if (LS->current == '.') { - save_and_next(LS, l); - if (LS->current == '.') { - save_and_next(LS, l); - save(LS, '\0', l); - luaX_lexerror(LS, + if (ls->current == '.') { + save_and_next(ls); + if (ls->current == '.') { + save_and_next(ls); + luaX_lexerror(ls, "ambiguous syntax (decimal point x string concatenation)", TK_NUMBER); } } - while (isdigit(LS->current)) { - checkbuffer(LS, l); - save_and_next(LS, l); + while (isdigit(ls->current)) { + save_and_next(ls); } - if (LS->current == 'e' || LS->current == 'E') { - save_and_next(LS, l); /* read `E' */ - if (LS->current == '+' || LS->current == '-') - save_and_next(LS, l); /* optional exponent sign */ - while (isdigit(LS->current)) { - checkbuffer(LS, l); - save_and_next(LS, l); + if (ls->current == 'e' || ls->current == 'E') { + save_and_next(ls); /* read `E' */ + if (ls->current == '+' || ls->current == '-') + save_and_next(ls); /* optional exponent sign */ + while (isdigit(ls->current)) { + save_and_next(ls); } } - save(LS, '\0', l); - if (!luaO_str2d(luaZ_buffer(LS->buff), &seminfo->r)) - luaX_lexerror(LS, "malformed number", TK_NUMBER); + save(ls, '\0'); + if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) + luaX_lexerror(ls, "malformed number", TK_NUMBER); } -static void read_long_string (LexState *LS, SemInfo *seminfo) { +static int skip_ast (LexState *ls) { + int count = 0; + int s = ls->current; + lua_assert(s == '[' || s == ']'); + save_and_next(ls); + while (ls->current == '*') { + save_and_next(ls); + count++; + } + return (ls->current == s) ? count : (-count) - 1; +} + + +static void read_long_string (LexState *ls, SemInfo *seminfo, int ast) { int cont = 0; - size_t l = 0; - checkbuffer(LS, l); - save(LS, '[', l); /* save first `[' */ - save_and_next(LS, l); /* pass the second `[' */ - if (LS->current == '\n') /* string starts with a newline? */ - inclinenumber(LS); /* skip it */ + save_and_next(ls); /* skip 2nd `[' */ + if (currIsNewline(ls)) /* string starts with a newline? */ + inclinenumber(ls); /* skip it */ for (;;) { - checkbuffer(LS, l); - switch (LS->current) { + switch (ls->current) { case EOZ: - save(LS, '\0', l); - luaX_lexerror(LS, (seminfo) ? "unfinished long string" : + luaX_lexerror(ls, (seminfo) ? "unfinished long string" : "unfinished long comment", TK_EOS); break; /* to avoid warnings */ case '[': - save_and_next(LS, l); - if (LS->current == '[') { + if (skip_ast(ls) == ast) { + save_and_next(ls); /* skip 2nd `[' */ cont++; - save_and_next(LS, l); } continue; case ']': - save_and_next(LS, l); - if (LS->current == ']') { - if (cont == 0) goto endloop; - cont--; - save_and_next(LS, l); + if (skip_ast(ls) == ast) { + save_and_next(ls); /* skip 2nd `]' */ + if (cont-- == 0) goto endloop; } continue; case '\n': - save(LS, '\n', l); - inclinenumber(LS); - if (!seminfo) l = 0; /* reset buffer to avoid wasting space */ + case '\r': + save(ls, '\n'); + inclinenumber(ls); + if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ continue; default: - save_and_next(LS, l); + if (seminfo) save_and_next(ls); + else next(ls); } } endloop: - save_and_next(LS, l); /* skip the second `]' */ - save(LS, '\0', l); if (seminfo) - seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 2, l - 5); + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + ast), + luaZ_bufflen(ls->buff) - 2*(2 + ast)); } -static void read_string (LexState *LS, int del, SemInfo *seminfo) { - size_t l = 0; - checkbuffer(LS, l); - save_and_next(LS, l); - while (LS->current != del) { - checkbuffer(LS, l); - switch (LS->current) { +static void read_string (LexState *ls, int del, SemInfo *seminfo) { + save_and_next(ls); + while (ls->current != del) { + switch (ls->current) { case EOZ: - save(LS, '\0', l); - luaX_lexerror(LS, "unfinished string", TK_EOS); - break; /* to avoid warnings */ + luaX_lexerror(ls, "unfinished string", TK_EOS); + continue; /* to avoid warnings */ case '\n': - save(LS, '\0', l); - luaX_lexerror(LS, "unfinished string", TK_STRING); - break; /* to avoid warnings */ - case '\\': - next(LS); /* do not save the `\' */ - switch (LS->current) { - case 'a': save(LS, '\a', l); next(LS); break; - case 'b': save(LS, '\b', l); next(LS); break; - case 'f': save(LS, '\f', l); next(LS); break; - case 'n': save(LS, '\n', l); next(LS); break; - case 'r': save(LS, '\r', l); next(LS); break; - case 't': save(LS, '\t', l); next(LS); break; - case 'v': save(LS, '\v', l); next(LS); break; - case '\n': save(LS, '\n', l); inclinenumber(LS); break; - case EOZ: break; /* will raise an error next loop */ + case '\r': + luaX_lexerror(ls, "unfinished string", TK_STRING); + continue; /* to avoid warnings */ + case '\\': { + int c; + next(ls); /* do not save the `\' */ + switch (ls->current) { + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + case '\n': /* go through */ + case '\r': save(ls, '\n'); inclinenumber(ls); continue; + case EOZ: continue; /* will raise an error next loop */ default: { - if (!isdigit(LS->current)) - save_and_next(LS, l); /* handles \\, \", \', and \? */ + if (!isdigit(ls->current)) + save_and_next(ls); /* handles \\, \", \', and \? */ else { /* \xxx */ - int c = 0; int i = 0; + c = 0; do { - c = 10*c + (LS->current-'0'); - next(LS); - } while (++i<3 && isdigit(LS->current)); - if (c > UCHAR_MAX) { - save(LS, '\0', l); - luaX_lexerror(LS, "escape sequence too large", TK_STRING); - } - save(LS, c, l); + c = 10*c + (ls->current-'0'); + next(ls); + } while (++i<3 && isdigit(ls->current)); + if (c > UCHAR_MAX) + luaX_lexerror(ls, "escape sequence too large", TK_STRING); + save(ls, c); } + continue; } } - break; + save(ls, c); + next(ls); + continue; + } default: - save_and_next(LS, l); + save_and_next(ls); } } - save_and_next(LS, l); /* skip delimiter */ - save(LS, '\0', l); - seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 1, l - 3); + save_and_next(ls); /* skip delimiter */ + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, + luaZ_bufflen(ls->buff) - 2); } -int luaX_lex (LexState *LS, SemInfo *seminfo) { +int luaX_lex (LexState *ls, SemInfo *seminfo) { + luaZ_resetbuffer(ls->buff); for (;;) { - switch (LS->current) { - - case '\n': { - inclinenumber(LS); + switch (ls->current) { + case '\n': + case '\r': { + inclinenumber(ls); continue; } case '-': { - next(LS); - if (LS->current != '-') return '-'; + next(ls); + if (ls->current != '-') return '-'; /* else is a comment */ - next(LS); - if (LS->current == '[' && (next(LS), LS->current == '[')) - read_long_string(LS, NULL); /* long comment */ - else /* short comment */ - while (LS->current != '\n' && LS->current != EOZ) - next(LS); + next(ls); + if (ls->current == '[') { + int ast = skip_ast(ls); + luaZ_resetbuffer(ls->buff); /* `skip_ast' may dirty the buffer */ + if (ast >= 0) { + read_long_string(ls, NULL, ast); /* long comment */ + luaZ_resetbuffer(ls->buff); + continue; + } + } + /* else short comment */ + while (!currIsNewline(ls) && ls->current != EOZ) + next(ls); continue; } case '[': { - next(LS); - if (LS->current != '[') return '['; - else { - read_long_string(LS, seminfo); + int ast = skip_ast(ls); + if (ast >= 0) { + read_long_string(ls, seminfo, ast); return TK_STRING; } + else if (ast == -1) return '['; + else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); } case '=': { - next(LS); - if (LS->current != '=') return '='; - else { next(LS); return TK_EQ; } + next(ls); + if (ls->current != '=') return '='; + else { next(ls); return TK_EQ; } } case '<': { - next(LS); - if (LS->current != '=') return '<'; - else { next(LS); return TK_LE; } + next(ls); + if (ls->current != '=') return '<'; + else { next(ls); return TK_LE; } } case '>': { - next(LS); - if (LS->current != '=') return '>'; - else { next(LS); return TK_GE; } + next(ls); + if (ls->current != '=') return '>'; + else { next(ls); return TK_GE; } } case '~': { - next(LS); - if (LS->current != '=') return '~'; - else { next(LS); return TK_NE; } + next(ls); + if (ls->current != '=') return '~'; + else { next(ls); return TK_NE; } } case '"': case '\'': { - read_string(LS, LS->current, seminfo); + read_string(ls, ls->current, seminfo); return TK_STRING; } case '.': { - next(LS); - if (LS->current == '.') { - next(LS); - if (LS->current == '.') { - next(LS); + save_and_next(ls); + if (ls->current == '.') { + next(ls); + if (ls->current == '.') { + next(ls); return TK_DOTS; /* ... */ } else return TK_CONCAT; /* .. */ } - else if (!isdigit(LS->current)) return '.'; + else if (!isdigit(ls->current)) return '.'; else { - read_numeral(LS, 1, seminfo); + read_numeral(ls, seminfo); return TK_NUMBER; } } @@ -384,29 +371,33 @@ int luaX_lex (LexState *LS, SemInfo *seminfo) { return TK_EOS; } default: { - if (isspace(LS->current)) { - next(LS); + if (isspace(ls->current)) { + lua_assert(!currIsNewline(ls)); + next(ls); continue; } - else if (isdigit(LS->current)) { - read_numeral(LS, 0, seminfo); + else if (isdigit(ls->current)) { + read_numeral(ls, seminfo); return TK_NUMBER; } - else if (isalpha(LS->current) || LS->current == '_') { + else if (isalpha(ls->current) || ls->current == '_') { /* identifier or reserved word */ - size_t l = readname(LS); - TString *ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff), l); + TString *ts; + do { + save_and_next(ls); + } while (isalnum(ls->current) || ls->current == '_'); + ts = luaX_newstring(ls, luaZ_buffer(ls->buff), + luaZ_bufflen(ls->buff)); if (ts->tsv.reserved > 0) /* reserved word? */ return ts->tsv.reserved - 1 + FIRST_RESERVED; - seminfo->ts = ts; - return TK_NAME; + else { + seminfo->ts = ts; + return TK_NAME; + } } else { - int c = LS->current; - if (iscntrl(c)) - luaX_error(LS, "invalid control char", - luaO_pushfstring(LS->L, "char(%d)", c)); - next(LS); + int c = ls->current; + next(ls); return c; /* single-char tokens (+ - / ...) */ } } |