summaryrefslogtreecommitdiff
path: root/Parser
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2016-10-07 21:55:49 +0300
committerSerhiy Storchaka <storchaka@gmail.com>2016-10-07 21:55:49 +0300
commitf6884ef5c8209462210b82c43d15a36b09e49cf1 (patch)
treee18dff898ac19dd995ca60da43b2484d09dbe2bf /Parser
parent6a2df86ef6566ecf275166827b4acf7f2c42d6c7 (diff)
parentcbc6cfd31fc4e0c80fec50097f3136120cc6e2b5 (diff)
downloadcpython-f6884ef5c8209462210b82c43d15a36b09e49cf1.tar.gz
Issue #24098: Fixed possible crash when AST is changed in process of
compiling it.
Diffstat (limited to 'Parser')
-rw-r--r--Parser/Python.asdl13
-rw-r--r--Parser/asdl.py3
-rw-r--r--[-rwxr-xr-x]Parser/asdl_c.py16
-rw-r--r--Parser/grammar.c27
-rw-r--r--Parser/myreadline.c118
-rw-r--r--Parser/node.c2
-rw-r--r--Parser/parser.c9
-rw-r--r--Parser/parsetok.c6
-rw-r--r--Parser/pgen.c24
-rw-r--r--Parser/pgenmain.c14
-rw-r--r--Parser/tokenizer.c246
11 files changed, 375 insertions, 103 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index cd0832da8d..f470ad13b6 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -1,4 +1,8 @@
--- ASDL's six builtin types are identifier, int, string, bytes, object, singleton
+-- ASDL's 7 builtin types are:
+-- identifier, int, string, bytes, object, singleton, constant
+--
+-- singleton: None, True or False
+-- constant can be None, whereas None means "no value" for object.
module Python
{
@@ -24,6 +28,8 @@ module Python
| Delete(expr* targets)
| Assign(expr* targets, expr value)
| AugAssign(expr target, operator op, expr value)
+ -- 'simple' indicates that we annotate simple name without parens
+ | AnnAssign(expr target, expr annotation, expr? value, int simple)
-- use 'orelse' because else is a keyword in target languages
| For(expr target, expr iter, stmt* body, stmt* orelse)
@@ -71,9 +77,12 @@ module Python
| Call(expr func, expr* args, keyword* keywords)
| Num(object n) -- a number as a PyObject.
| Str(string s) -- need to specify raw, unicode, etc?
+ | FormattedValue(expr value, int? conversion, expr? format_spec)
+ | JoinedStr(expr* values)
| Bytes(bytes s)
| NameConstant(singleton value)
| Ellipsis
+ | Constant(constant value)
-- the following expression can appear in assignment context
| Attribute(expr value, identifier attr, expr_context ctx)
@@ -101,7 +110,7 @@ module Python
cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
- comprehension = (expr target, expr iter, expr* ifs)
+ comprehension = (expr target, expr iter, expr* ifs, int is_async)
excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body)
attributes (int lineno, int col_offset)
diff --git a/Parser/asdl.py b/Parser/asdl.py
index 121cdab952..62f5c19c99 100644
--- a/Parser/asdl.py
+++ b/Parser/asdl.py
@@ -33,7 +33,8 @@ __all__ = [
# See the EBNF at the top of the file to understand the logical connection
# between the various node types.
-builtin_types = {'identifier', 'string', 'bytes', 'int', 'object', 'singleton'}
+builtin_types = {'identifier', 'string', 'bytes', 'int', 'object', 'singleton',
+ 'constant'}
class AST:
def __repr__(self):
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index ac9d2b6346..725d73bad4 100755..100644
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -841,6 +841,7 @@ static PyObject* ast2obj_object(void *o)
return (PyObject*)o;
}
#define ast2obj_singleton ast2obj_object
+#define ast2obj_constant ast2obj_object
#define ast2obj_identifier ast2obj_object
#define ast2obj_string ast2obj_object
#define ast2obj_bytes ast2obj_object
@@ -878,6 +879,19 @@ static int obj2ast_object(PyObject* obj, PyObject** out, PyArena* arena)
return 0;
}
+static int obj2ast_constant(PyObject* obj, PyObject** out, PyArena* arena)
+{
+ if (obj) {
+ if (PyArena_AddPyObject(arena, obj) < 0) {
+ *out = NULL;
+ return -1;
+ }
+ Py_INCREF(obj);
+ }
+ *out = obj;
+ return 0;
+}
+
static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena)
{
if (!PyUnicode_CheckExact(obj) && obj != Py_None) {
@@ -913,7 +927,7 @@ static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
return 1;
}
- i = (int)PyLong_AsLong(obj);
+ i = _PyLong_AsInt(obj);
if (i == -1 && PyErr_Occurred())
return 1;
*out = i;
diff --git a/Parser/grammar.c b/Parser/grammar.c
index b598294a4a..75fd5b9cde 100644
--- a/Parser/grammar.c
+++ b/Parser/grammar.c
@@ -28,6 +28,23 @@ newgrammar(int start)
return g;
}
+void
+freegrammar(grammar *g)
+{
+ int i;
+ for (i = 0; i < g->g_ndfas; i++) {
+ free(g->g_dfa[i].d_name);
+ for (int j = 0; j < g->g_dfa[i].d_nstates; j++)
+ PyObject_FREE(g->g_dfa[i].d_state[j].s_arc);
+ PyObject_FREE(g->g_dfa[i].d_state);
+ }
+ PyObject_FREE(g->g_dfa);
+ for (i = 0; i < g->g_ll.ll_nlabels; i++)
+ free(g->g_ll.ll_label[i].lb_str);
+ PyObject_FREE(g->g_ll.ll_label);
+ PyObject_FREE(g);
+}
+
dfa *
adddfa(grammar *g, int type, const char *name)
{
@@ -63,7 +80,7 @@ addstate(dfa *d)
s->s_upper = 0;
s->s_accel = NULL;
s->s_accept = 0;
- return Py_SAFE_DOWNCAST(s - d->d_state, Py_intptr_t, int);
+ return Py_SAFE_DOWNCAST(s - d->d_state, intptr_t, int);
}
void
@@ -105,7 +122,7 @@ addlabel(labellist *ll, int type, const char *str)
if (Py_DebugFlag)
printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
PyGrammar_LabelRepr(lb));
- return Py_SAFE_DOWNCAST(lb - ll->ll_label, Py_intptr_t, int);
+ return Py_SAFE_DOWNCAST(lb - ll->ll_label, intptr_t, int);
}
/* Same, but rather dies than adds */
@@ -122,7 +139,13 @@ findlabel(labellist *ll, int type, const char *str)
}
fprintf(stderr, "Label %d/'%s' not found\n", type, str);
Py_FatalError("grammar.c:findlabel()");
+
+ /* Py_FatalError() is declared with __attribute__((__noreturn__)).
+ GCC emits a warning without "return 0;" (compiler bug!), but Clang is
+ smarter and emits a warning on the return... */
+#ifndef __clang__
return 0; /* Make gcc -Wall happy */
+#endif
}
/* Forward */
diff --git a/Parser/myreadline.c b/Parser/myreadline.c
index 28c7b6d7ff..a8f23b790a 100644
--- a/Parser/myreadline.c
+++ b/Parser/myreadline.c
@@ -41,10 +41,7 @@ my_fgets(char *buf, int len, FILE *fp)
(void)(PyOS_InputHook)();
errno = 0;
clearerr(fp);
- if (_PyVerify_fd(fileno(fp)))
- p = fgets(buf, len, fp);
- else
- p = NULL;
+ p = fgets(buf, len, fp);
if (p != NULL)
return 0; /* No error */
err = errno;
@@ -101,6 +98,100 @@ my_fgets(char *buf, int len, FILE *fp)
/* NOTREACHED */
}
+#ifdef MS_WINDOWS
+/* Readline implementation using ReadConsoleW */
+
+extern char _get_console_type(HANDLE handle);
+
+char *
+_PyOS_WindowsConsoleReadline(HANDLE hStdIn)
+{
+ static wchar_t wbuf_local[1024 * 16];
+ const DWORD chunk_size = 1024;
+
+ DWORD n_read, total_read, wbuflen, u8len;
+ wchar_t *wbuf;
+ char *buf = NULL;
+ int err = 0;
+
+ n_read = 0;
+ total_read = 0;
+ wbuf = wbuf_local;
+ wbuflen = sizeof(wbuf_local) / sizeof(wbuf_local[0]) - 1;
+ while (1) {
+ if (!ReadConsoleW(hStdIn, &wbuf[total_read], wbuflen - total_read, &n_read, NULL)) {
+ err = GetLastError();
+ goto exit;
+ }
+ if (n_read == 0) {
+ int s;
+ err = GetLastError();
+ if (err != ERROR_OPERATION_ABORTED)
+ goto exit;
+ err = 0;
+ HANDLE hInterruptEvent = _PyOS_SigintEvent();
+ if (WaitForSingleObjectEx(hInterruptEvent, 100, FALSE)
+ == WAIT_OBJECT_0) {
+ ResetEvent(hInterruptEvent);
+#ifdef WITH_THREAD
+ PyEval_RestoreThread(_PyOS_ReadlineTState);
+#endif
+ s = PyErr_CheckSignals();
+#ifdef WITH_THREAD
+ PyEval_SaveThread();
+#endif
+ if (s < 0)
+ goto exit;
+ }
+ break;
+ }
+
+ total_read += n_read;
+ if (total_read == 0 || wbuf[total_read - 1] == L'\n') {
+ break;
+ }
+ wbuflen += chunk_size;
+ if (wbuf == wbuf_local) {
+ wbuf[total_read] = '\0';
+ wbuf = (wchar_t*)PyMem_RawMalloc(wbuflen * sizeof(wchar_t));
+ if (wbuf)
+ wcscpy_s(wbuf, wbuflen, wbuf_local);
+ }
+ else
+ wbuf = (wchar_t*)PyMem_RawRealloc(wbuf, wbuflen * sizeof(wchar_t));
+ }
+
+ if (wbuf[0] == '\x1a') {
+ buf = PyMem_RawMalloc(1);
+ if (buf)
+ buf[0] = '\0';
+ goto exit;
+ }
+
+ u8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, total_read, NULL, 0, NULL, NULL);
+ buf = PyMem_RawMalloc(u8len + 1);
+ u8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, total_read, buf, u8len, NULL, NULL);
+ buf[u8len] = '\0';
+
+exit:
+ if (wbuf != wbuf_local)
+ PyMem_RawFree(wbuf);
+
+ if (err) {
+#ifdef WITH_THREAD
+ PyEval_RestoreThread(_PyOS_ReadlineTState);
+#endif
+ PyErr_SetFromWindowsErr(err);
+#ifdef WITH_THREAD
+ PyEval_SaveThread();
+#endif
+ }
+
+ return buf;
+}
+
+#endif
+
/* Readline implementation using fgets() */
@@ -110,6 +201,25 @@ PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
size_t n;
char *p, *pr;
+#ifdef MS_WINDOWS
+ if (!Py_LegacyWindowsStdioFlag && sys_stdin == stdin) {
+ HANDLE hStdIn;
+
+ _Py_BEGIN_SUPPRESS_IPH
+ hStdIn = (HANDLE)_get_osfhandle(fileno(sys_stdin));
+ _Py_END_SUPPRESS_IPH
+
+ if (_get_console_type(hStdIn) == 'r') {
+ fflush(sys_stdout);
+ if (prompt)
+ fprintf(stderr, "%s", prompt);
+ fflush(stderr);
+ clearerr(sys_stdin);
+ return _PyOS_WindowsConsoleReadline(hStdIn);
+ }
+ }
+#endif
+
n = 100;
p = (char *)PyMem_RawMalloc(n);
if (p == NULL)
diff --git a/Parser/node.c b/Parser/node.c
index 00103240af..240d29057c 100644
--- a/Parser/node.c
+++ b/Parser/node.c
@@ -91,7 +91,7 @@ PyNode_AddChild(node *n1, int type, char *str, int lineno, int col_offset)
if (current_capacity < 0 || required_capacity < 0)
return E_OVERFLOW;
if (current_capacity < required_capacity) {
- if ((size_t)required_capacity > PY_SIZE_MAX / sizeof(node)) {
+ if ((size_t)required_capacity > SIZE_MAX / sizeof(node)) {
return E_NOMEM;
}
n = n1->n_child;
diff --git a/Parser/parser.c b/Parser/parser.c
index 56ec5148d3..41072c478c 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -140,21 +140,20 @@ classify(parser_state *ps, int type, const char *str)
int n = g->g_ll.ll_nlabels;
if (type == NAME) {
- const char *s = str;
label *l = g->g_ll.ll_label;
int i;
for (i = n; i > 0; i--, l++) {
if (l->lb_type != NAME || l->lb_str == NULL ||
- l->lb_str[0] != s[0] ||
- strcmp(l->lb_str, s) != 0)
+ l->lb_str[0] != str[0] ||
+ strcmp(l->lb_str, str) != 0)
continue;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#if 0
/* Leaving this in as an example */
if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
- if (s[0] == 'w' && strcmp(s, "with") == 0)
+ if (str[0] == 'w' && strcmp(str, "with") == 0)
break; /* not a keyword yet */
- else if (s[0] == 'a' && strcmp(s, "as") == 0)
+ else if (str[0] == 'a' && strcmp(str, "as") == 0)
break; /* not a keyword yet */
}
#endif
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 629dee565c..1f467d63c4 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -161,10 +161,10 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#if 0
-static char with_msg[] =
+static const char with_msg[] =
"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
-static char as_msg[] =
+static const char as_msg[] =
"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
static void
@@ -255,7 +255,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
#endif
if (a >= tok->line_start)
col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
- Py_intptr_t, int);
+ intptr_t, int);
else
col_offset = -1;
diff --git a/Parser/pgen.c b/Parser/pgen.c
index f3031aea0b..6451a1d998 100644
--- a/Parser/pgen.c
+++ b/Parser/pgen.c
@@ -117,6 +117,16 @@ newnfagrammar(void)
return gr;
}
+static void
+freenfagrammar(nfagrammar *gr)
+{
+ for (int i = 0; i < gr->gr_nnfas; i++) {
+ PyObject_FREE(gr->gr_nfa[i]->nf_state);
+ }
+ PyObject_FREE(gr->gr_nfa);
+ PyObject_FREE(gr);
+}
+
static nfa *
addnfa(nfagrammar *gr, char *name)
{
@@ -134,7 +144,7 @@ addnfa(nfagrammar *gr, char *name)
#ifdef Py_DEBUG
-static char REQNFMT[] = "metacompile: less than %d children\n";
+static const char REQNFMT[] = "metacompile: less than %d children\n";
#define REQN(i, count) do { \
if (i < count) { \
@@ -379,7 +389,7 @@ typedef struct _ss_dfa {
/* Forward */
static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
- labellist *ll, char *msg);
+ labellist *ll, const char *msg);
static void simplify(int xx_nstates, ss_state *xx_state);
static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
@@ -488,13 +498,17 @@ makedfa(nfagrammar *gr, nfa *nf, dfa *d)
convert(d, xx_nstates, xx_state);
- /* XXX cleanup */
+ for (int i = 0; i < xx_nstates; i++) {
+ for (int j = 0; j < xx_state[i].ss_narcs; j++)
+ delbitset(xx_state[i].ss_arc[j].sa_bitset);
+ PyObject_FREE(xx_state[i].ss_arc);
+ }
PyObject_FREE(xx_state);
}
static void
printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
- labellist *ll, char *msg)
+ labellist *ll, const char *msg)
{
int i, ibit, iarc;
ss_state *yy;
@@ -669,7 +683,7 @@ pgen(node *n)
g = maketables(gr);
translatelabels(g);
addfirstsets(g);
- PyObject_FREE(gr);
+ freenfagrammar(gr);
return g;
}
diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c
index 0f055d6308..e386248c2f 100644
--- a/Parser/pgenmain.c
+++ b/Parser/pgenmain.c
@@ -27,7 +27,7 @@ int Py_VerboseFlag;
int Py_IgnoreEnvironmentFlag;
/* Forward */
-grammar *getgrammar(char *filename);
+grammar *getgrammar(const char *filename);
void Py_Exit(int) _Py_NO_RETURN;
@@ -37,6 +37,15 @@ Py_Exit(int sts)
exit(sts);
}
+#ifdef WITH_THREAD
+/* Needed by obmalloc.c */
+int PyGILState_Check(void)
+{ return 1; }
+#endif
+
+void _PyMem_DumpTraceback(int fd, const void *ptr)
+{}
+
int
main(int argc, char **argv)
{
@@ -71,12 +80,13 @@ main(int argc, char **argv)
printf("Writing %s ...\n", graminit_h);
printnonterminals(g, fp);
fclose(fp);
+ freegrammar(g);
Py_Exit(0);
return 0; /* Make gcc -Wall happy */
}
grammar *
-getgrammar(char *filename)
+getgrammar(const char *filename)
{
FILE *fp;
node *n;
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 612cb23545..8317293796 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -202,8 +202,8 @@ error_ret(struct tok_state *tok) /* XXX */
}
-static char *
-get_normal_name(char *s) /* for utf-8 and latin-1 */
+static const char *
+get_normal_name(const char *s) /* for utf-8 and latin-1 */
{
char buf[13];
int i;
@@ -264,7 +264,7 @@ get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *t
if (begin < t) {
char* r = new_string(begin, t - begin, tok);
- char* q;
+ const char* q;
if (!r)
return 0;
q = get_normal_name(r);
@@ -1335,6 +1335,28 @@ verify_identifier(struct tok_state *tok)
}
#endif
+static int
+tok_decimal_tail(struct tok_state *tok)
+{
+ int c;
+
+ while (1) {
+ do {
+ c = tok_nextc(tok);
+ } while (isdigit(c));
+ if (c != '_') {
+ break;
+ }
+ c = tok_nextc(tok);
+ if (!isdigit(c)) {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return 0;
+ }
+ }
+ return c;
+}
+
/* Get next token, after space stripping etc. */
static int
@@ -1355,17 +1377,20 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
tok->atbol = 0;
for (;;) {
c = tok_nextc(tok);
- if (c == ' ')
+ if (c == ' ') {
col++, altcol++;
+ }
else if (c == '\t') {
col = (col/tok->tabsize + 1) * tok->tabsize;
altcol = (altcol/tok->alttabsize + 1)
* tok->alttabsize;
}
- else if (c == '\014') /* Control-L (formfeed) */
+ else if (c == '\014') {/* Control-L (formfeed) */
col = altcol = 0; /* For Emacs users */
- else
+ }
+ else {
break;
+ }
}
tok_backup(tok, c);
if (c == '#' || c == '\n') {
@@ -1374,10 +1399,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
not passed to the parser as NEWLINE tokens,
except *totally* empty lines in interactive
mode, which signal the end of a command group. */
- if (col == 0 && c == '\n' && tok->prompt != NULL)
+ if (col == 0 && c == '\n' && tok->prompt != NULL) {
blankline = 0; /* Let it through */
- else
+ }
+ else {
blankline = 1; /* Ignore completely */
+ }
/* We can't jump back right here since we still
may need to skip to the end of a comment */
}
@@ -1385,8 +1412,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
if (col == tok->indstack[tok->indent]) {
/* No change */
if (altcol != tok->altindstack[tok->indent]) {
- if (indenterror(tok))
+ if (indenterror(tok)) {
return ERRORTOKEN;
+ }
}
}
else if (col > tok->indstack[tok->indent]) {
@@ -1397,8 +1425,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
return ERRORTOKEN;
}
if (altcol <= tok->altindstack[tok->indent]) {
- if (indenterror(tok))
+ if (indenterror(tok)) {
return ERRORTOKEN;
+ }
}
tok->pendin++;
tok->indstack[++tok->indent] = col;
@@ -1417,8 +1446,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
return ERRORTOKEN;
}
if (altcol != tok->altindstack[tok->indent]) {
- if (indenterror(tok))
+ if (indenterror(tok)) {
return ERRORTOKEN;
+ }
}
}
}
@@ -1464,9 +1494,11 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
tok->start = tok->cur - 1;
/* Skip comment */
- if (c == '#')
- while (c != EOF && c != '\n')
+ if (c == '#') {
+ while (c != EOF && c != '\n') {
c = tok_nextc(tok);
+ }
+ }
/* Check for EOF and errors now */
if (c == EOF) {
@@ -1477,31 +1509,41 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
nonascii = 0;
if (is_potential_identifier_start(c)) {
/* Process b"", r"", u"", br"" and rb"" */
- int saw_b = 0, saw_r = 0, saw_u = 0;
+ int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0;
while (1) {
- if (!(saw_b || saw_u) && (c == 'b' || c == 'B'))
+ if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B'))
saw_b = 1;
/* Since this is a backwards compatibility support literal we don't
want to support it in arbitrary order like byte literals. */
- else if (!(saw_b || saw_u || saw_r) && (c == 'u' || c == 'U'))
+ else if (!(saw_b || saw_u || saw_r || saw_f)
+ && (c == 'u'|| c == 'U')) {
saw_u = 1;
+ }
/* ur"" and ru"" are not supported */
- else if (!(saw_r || saw_u) && (c == 'r' || c == 'R'))
+ else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) {
saw_r = 1;
- else
+ }
+ else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) {
+ saw_f = 1;
+ }
+ else {
break;
+ }
c = tok_nextc(tok);
- if (c == '"' || c == '\'')
+ if (c == '"' || c == '\'') {
goto letter_quote;
+ }
}
while (is_potential_identifier_char(c)) {
- if (c >= 128)
+ if (c >= 128) {
nonascii = 1;
+ }
c = tok_nextc(tok);
}
tok_backup(tok, c);
- if (nonascii && !verify_identifier(tok))
+ if (nonascii && !verify_identifier(tok)) {
return ERRORTOKEN;
+ }
*p_start = tok->start;
*p_end = tok->cur;
@@ -1510,10 +1552,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
/* Current token length is 5. */
if (tok->async_def) {
/* We're inside an 'async def' function. */
- if (memcmp(tok->start, "async", 5) == 0)
+ if (memcmp(tok->start, "async", 5) == 0) {
return ASYNC;
- if (memcmp(tok->start, "await", 5) == 0)
+ }
+ if (memcmp(tok->start, "await", 5) == 0) {
return AWAIT;
+ }
}
else if (memcmp(tok->start, "async", 5) == 0) {
/* The current token is 'async'.
@@ -1546,8 +1590,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
/* Newline */
if (c == '\n') {
tok->atbol = 1;
- if (blankline || tok->level > 0)
+ if (blankline || tok->level > 0) {
goto nextline;
+ }
*p_start = tok->start;
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
tok->cont_line = 0;
@@ -1570,11 +1615,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
*p_start = tok->start;
*p_end = tok->cur;
return ELLIPSIS;
- } else {
+ }
+ else {
tok_backup(tok, c);
}
tok_backup(tok, '.');
- } else {
+ }
+ else {
tok_backup(tok, c);
}
*p_start = tok->start;
@@ -1587,64 +1634,94 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
if (c == '0') {
/* Hex, octal or binary -- maybe. */
c = tok_nextc(tok);
- if (c == '.')
- goto fraction;
- if (c == 'j' || c == 'J')
- goto imaginary;
if (c == 'x' || c == 'X') {
-
/* Hex */
c = tok_nextc(tok);
- if (!isxdigit(c)) {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
do {
- c = tok_nextc(tok);
- } while (isxdigit(c));
+ if (c == '_') {
+ c = tok_nextc(tok);
+ }
+ if (!isxdigit(c)) {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ do {
+ c = tok_nextc(tok);
+ } while (isxdigit(c));
+ } while (c == '_');
}
else if (c == 'o' || c == 'O') {
/* Octal */
c = tok_nextc(tok);
- if (c < '0' || c >= '8') {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
do {
- c = tok_nextc(tok);
- } while ('0' <= c && c < '8');
+ if (c == '_') {
+ c = tok_nextc(tok);
+ }
+ if (c < '0' || c >= '8') {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ do {
+ c = tok_nextc(tok);
+ } while ('0' <= c && c < '8');
+ } while (c == '_');
}
else if (c == 'b' || c == 'B') {
/* Binary */
c = tok_nextc(tok);
- if (c != '0' && c != '1') {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
do {
- c = tok_nextc(tok);
- } while (c == '0' || c == '1');
+ if (c == '_') {
+ c = tok_nextc(tok);
+ }
+ if (c != '0' && c != '1') {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ do {
+ c = tok_nextc(tok);
+ } while (c == '0' || c == '1');
+ } while (c == '_');
}
else {
int nonzero = 0;
/* maybe old-style octal; c is first char of it */
/* in any case, allow '0' as a literal */
- while (c == '0')
+ while (1) {
+ if (c == '_') {
+ c = tok_nextc(tok);
+ if (!isdigit(c)) {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ }
+ if (c != '0') {
+ break;
+ }
c = tok_nextc(tok);
- while (isdigit(c)) {
+ }
+ if (isdigit(c)) {
nonzero = 1;
- c = tok_nextc(tok);
+ c = tok_decimal_tail(tok);
+ if (c == 0) {
+ return ERRORTOKEN;
+ }
}
- if (c == '.')
+ if (c == '.') {
+ c = tok_nextc(tok);
goto fraction;
- else if (c == 'e' || c == 'E')
+ }
+ else if (c == 'e' || c == 'E') {
goto exponent;
- else if (c == 'j' || c == 'J')
+ }
+ else if (c == 'j' || c == 'J') {
goto imaginary;
+ }
else if (nonzero) {
+ /* Old-style octal: now disallowed. */
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
@@ -1653,17 +1730,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
}
else {
/* Decimal */
- do {
- c = tok_nextc(tok);
- } while (isdigit(c));
+ c = tok_decimal_tail(tok);
+ if (c == 0) {
+ return ERRORTOKEN;
+ }
{
/* Accept floating point numbers. */
if (c == '.') {
+ c = tok_nextc(tok);
fraction:
/* Fraction */
- do {
- c = tok_nextc(tok);
- } while (isdigit(c));
+ if (isdigit(c)) {
+ c = tok_decimal_tail(tok);
+ if (c == 0) {
+ return ERRORTOKEN;
+ }
+ }
}
if (c == 'e' || c == 'E') {
int e;
@@ -1685,14 +1767,16 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
*p_end = tok->cur;
return NUMBER;
}
- do {
- c = tok_nextc(tok);
- } while (isdigit(c));
+ c = tok_decimal_tail(tok);
+ if (c == 0) {
+ return ERRORTOKEN;
+ }
}
- if (c == 'j' || c == 'J')
+ if (c == 'j' || c == 'J') {
/* Imaginary part */
imaginary:
c = tok_nextc(tok);
+ }
}
}
tok_backup(tok, c);
@@ -1712,22 +1796,27 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
c = tok_nextc(tok);
if (c == quote) {
c = tok_nextc(tok);
- if (c == quote)
+ if (c == quote) {
quote_size = 3;
- else
+ }
+ else {
end_quote_size = 1; /* empty string found */
+ }
}
- if (c != quote)
+ if (c != quote) {
tok_backup(tok, c);
+ }
/* Get rest of string */
while (end_quote_size != quote_size) {
c = tok_nextc(tok);
if (c == EOF) {
- if (quote_size == 3)
+ if (quote_size == 3) {
tok->done = E_EOFS;
- else
+ }
+ else {
tok->done = E_EOLS;
+ }
tok->cur = tok->inp;
return ERRORTOKEN;
}
@@ -1736,12 +1825,14 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
tok->cur = tok->inp;
return ERRORTOKEN;
}
- if (c == quote)
+ if (c == quote) {
end_quote_size += 1;
+ }
else {
end_quote_size = 0;
- if (c == '\\')
- c = tok_nextc(tok); /* skip escaped char */
+ if (c == '\\') {
+ tok_nextc(tok); /* skip escaped char */
+ }
}
}
@@ -1771,7 +1862,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
int token3 = PyToken_ThreeChars(c, c2, c3);
if (token3 != OP) {
token = token3;
- } else {
+ }
+ else {
tok_backup(tok, c3);
}
*p_start = tok->start;