Issue #24098: Fixed possible crash when AST is changed in process of

compiling it.
author: Serhiy Storchaka <storchaka@gmail.com> 2016-10-07 21:55:49 +0300
committer: Serhiy Storchaka <storchaka@gmail.com> 2016-10-07 21:55:49 +0300
commit: f6884ef5c8209462210b82c43d15a36b09e49cf1 (patch)
tree: e18dff898ac19dd995ca60da43b2484d09dbe2bf /Parser
parent: 6a2df86ef6566ecf275166827b4acf7f2c42d6c7 (diff)
parent: cbc6cfd31fc4e0c80fec50097f3136120cc6e2b5 (diff)
download: cpython-f6884ef5c8209462210b82c43d15a36b09e49cf1.tar.gz
11 files changed, 375 insertions, 103 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index cd0832da8d..f470ad13b6 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -1,4 +1,8 @@
--- ASDL's six builtin types are identifier, int, string, bytes, object, singleton
+-- ASDL's 7 builtin types are:
+-- identifier, int, string, bytes, object, singleton, constant
+--
+-- singleton: None, True or False
+-- constant can be None, whereas None means "no value" for object.
 
 module Python
 {
@@ -24,6 +28,8 @@ module Python
           | Delete(expr* targets)
           | Assign(expr* targets, expr value)
           | AugAssign(expr target, operator op, expr value)
+          -- 'simple' indicates that we annotate simple name without parens
+          | AnnAssign(expr target, expr annotation, expr? value, int simple)
 
           -- use 'orelse' because else is a keyword in target languages
           | For(expr target, expr iter, stmt* body, stmt* orelse)
@@ -71,9 +77,12 @@ module Python
          | Call(expr func, expr* args, keyword* keywords)
          | Num(object n) -- a number as a PyObject.
          | Str(string s) -- need to specify raw, unicode, etc?
+         | FormattedValue(expr value, int? conversion, expr? format_spec)
+         | JoinedStr(expr* values)
          | Bytes(bytes s)
          | NameConstant(singleton value)
          | Ellipsis
+         | Constant(constant value)
 
          -- the following expression can appear in assignment context
          | Attribute(expr value, identifier attr, expr_context ctx)
@@ -101,7 +110,7 @@ module Python
 
     cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
 
-    comprehension = (expr target, expr iter, expr* ifs)
+    comprehension = (expr target, expr iter, expr* ifs, int is_async)
 
     excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body)
                     attributes (int lineno, int col_offset)
diff --git a/Parser/asdl.py b/Parser/asdl.py
index 121cdab952..62f5c19c99 100644
--- a/Parser/asdl.py
+++ b/Parser/asdl.py
@@ -33,7 +33,8 @@ __all__ = [
 # See the EBNF at the top of the file to understand the logical connection
 # between the various node types.
 
-builtin_types = {'identifier', 'string', 'bytes', 'int', 'object', 'singleton'}
+builtin_types = {'identifier', 'string', 'bytes', 'int', 'object', 'singleton',
+                 'constant'}
 
 class AST:
     def __repr__(self):
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index ac9d2b6346..725d73bad4 100755..100644
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -841,6 +841,7 @@ static PyObject* ast2obj_object(void *o)
     return (PyObject*)o;
 }
 #define ast2obj_singleton ast2obj_object
+#define ast2obj_constant ast2obj_object
 #define ast2obj_identifier ast2obj_object
 #define ast2obj_string ast2obj_object
 #define ast2obj_bytes ast2obj_object
@@ -878,6 +879,19 @@ static int obj2ast_object(PyObject* obj, PyObject** out, PyArena* arena)
     return 0;
 }
 
+static int obj2ast_constant(PyObject* obj, PyObject** out, PyArena* arena)
+{
+    if (obj) {
+        if (PyArena_AddPyObject(arena, obj) < 0) {
+            *out = NULL;
+            return -1;
+        }
+        Py_INCREF(obj);
+    }
+    *out = obj;
+    return 0;
+}
+
 static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena)
 {
     if (!PyUnicode_CheckExact(obj) && obj != Py_None) {
@@ -913,7 +927,7 @@ static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
         return 1;
     }
 
-    i = (int)PyLong_AsLong(obj);
+    i = _PyLong_AsInt(obj);
     if (i == -1 && PyErr_Occurred())
         return 1;
     *out = i;
diff --git a/Parser/grammar.c b/Parser/grammar.c
index b598294a4a..75fd5b9cde 100644
--- a/Parser/grammar.c
+++ b/Parser/grammar.c
@@ -28,6 +28,23 @@ newgrammar(int start)
     return g;
 }
 
+void
+freegrammar(grammar *g)
+{
+    int i;
+    for (i = 0; i < g->g_ndfas; i++) {
+        free(g->g_dfa[i].d_name);
+        for (int j = 0; j < g->g_dfa[i].d_nstates; j++)
+            PyObject_FREE(g->g_dfa[i].d_state[j].s_arc);
+        PyObject_FREE(g->g_dfa[i].d_state);
+    }
+    PyObject_FREE(g->g_dfa);
+    for (i = 0; i < g->g_ll.ll_nlabels; i++)
+        free(g->g_ll.ll_label[i].lb_str);
+    PyObject_FREE(g->g_ll.ll_label);
+    PyObject_FREE(g);
+}
+
 dfa *
 adddfa(grammar *g, int type, const char *name)
 {
@@ -63,7 +80,7 @@ addstate(dfa *d)
     s->s_upper = 0;
     s->s_accel = NULL;
     s->s_accept = 0;
-    return Py_SAFE_DOWNCAST(s - d->d_state, Py_intptr_t, int);
+    return Py_SAFE_DOWNCAST(s - d->d_state, intptr_t, int);
 }
 
 void
@@ -105,7 +122,7 @@ addlabel(labellist *ll, int type, const char *str)
     if (Py_DebugFlag)
         printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
                PyGrammar_LabelRepr(lb));
-    return Py_SAFE_DOWNCAST(lb - ll->ll_label, Py_intptr_t, int);
+    return Py_SAFE_DOWNCAST(lb - ll->ll_label, intptr_t, int);
 }
 
 /* Same, but rather dies than adds */
@@ -122,7 +139,13 @@ findlabel(labellist *ll, int type, const char *str)
     }
     fprintf(stderr, "Label %d/'%s' not found\n", type, str);
     Py_FatalError("grammar.c:findlabel()");
+
+    /* Py_FatalError() is declared with __attribute__((__noreturn__)).
+       GCC emits a warning without "return 0;" (compiler bug!), but Clang is
+       smarter and emits a warning on the return... */
+#ifndef __clang__
     return 0; /* Make gcc -Wall happy */
+#endif
 }
 
 /* Forward */
diff --git a/Parser/myreadline.c b/Parser/myreadline.c
index 28c7b6d7ff..a8f23b790a 100644
--- a/Parser/myreadline.c
+++ b/Parser/myreadline.c
@@ -41,10 +41,7 @@ my_fgets(char *buf, int len, FILE *fp)
             (void)(PyOS_InputHook)();
         errno = 0;
         clearerr(fp);
-        if (_PyVerify_fd(fileno(fp)))
-            p = fgets(buf, len, fp);
-        else
-            p = NULL;
+        p = fgets(buf, len, fp);
         if (p != NULL)
             return 0; /* No error */
         err = errno;
@@ -101,6 +98,100 @@ my_fgets(char *buf, int len, FILE *fp)
     /* NOTREACHED */
 }
 
+#ifdef MS_WINDOWS
+/* Readline implementation using ReadConsoleW */
+
+extern char _get_console_type(HANDLE handle);
+
+char *
+_PyOS_WindowsConsoleReadline(HANDLE hStdIn)
+{
+    static wchar_t wbuf_local[1024 * 16];
+    const DWORD chunk_size = 1024;
+
+    DWORD n_read, total_read, wbuflen, u8len;
+    wchar_t *wbuf;
+    char *buf = NULL;
+    int err = 0;
+
+    n_read = 0;
+    total_read = 0;
+    wbuf = wbuf_local;
+    wbuflen = sizeof(wbuf_local) / sizeof(wbuf_local[0]) - 1;
+    while (1) {
+        if (!ReadConsoleW(hStdIn, &wbuf[total_read], wbuflen - total_read, &n_read, NULL)) {
+            err = GetLastError();
+            goto exit;
+        }
+        if (n_read == 0) {
+            int s;
+            err = GetLastError();
+            if (err != ERROR_OPERATION_ABORTED)
+                goto exit;
+            err = 0;
+            HANDLE hInterruptEvent = _PyOS_SigintEvent();
+            if (WaitForSingleObjectEx(hInterruptEvent, 100, FALSE)
+                    == WAIT_OBJECT_0) {
+                ResetEvent(hInterruptEvent);
+#ifdef WITH_THREAD
+                PyEval_RestoreThread(_PyOS_ReadlineTState);
+#endif
+                s = PyErr_CheckSignals();
+#ifdef WITH_THREAD
+                PyEval_SaveThread();
+#endif
+                if (s < 0)
+                    goto exit;
+            }
+            break;
+        }
+
+        total_read += n_read;
+        if (total_read == 0 || wbuf[total_read - 1] == L'\n') {
+            break;
+        }
+        wbuflen += chunk_size;
+        if (wbuf == wbuf_local) {
+            wbuf[total_read] = '\0';
+            wbuf = (wchar_t*)PyMem_RawMalloc(wbuflen * sizeof(wchar_t));
+            if (wbuf)
+                wcscpy_s(wbuf, wbuflen, wbuf_local);
+        }
+        else
+            wbuf = (wchar_t*)PyMem_RawRealloc(wbuf, wbuflen * sizeof(wchar_t));
+    }
+
+    if (wbuf[0] == '\x1a') {
+        buf = PyMem_RawMalloc(1);
+        if (buf)
+            buf[0] = '\0';
+        goto exit;
+    }
+
+    u8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, total_read, NULL, 0, NULL, NULL);
+    buf = PyMem_RawMalloc(u8len + 1);
+    u8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, total_read, buf, u8len, NULL, NULL);
+    buf[u8len] = '\0';
+
+exit:
+    if (wbuf != wbuf_local)
+        PyMem_RawFree(wbuf);
+
+    if (err) {
+#ifdef WITH_THREAD
+        PyEval_RestoreThread(_PyOS_ReadlineTState);
+#endif
+        PyErr_SetFromWindowsErr(err);
+#ifdef WITH_THREAD
+        PyEval_SaveThread();
+#endif
+    }
+
+    return buf;
+}
+
+#endif
+
 
 /* Readline implementation using fgets() */
 
@@ -110,6 +201,25 @@ PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
     size_t n;
     char *p, *pr;
 
+#ifdef MS_WINDOWS
+    if (!Py_LegacyWindowsStdioFlag && sys_stdin == stdin) {
+        HANDLE hStdIn;
+
+        _Py_BEGIN_SUPPRESS_IPH
+        hStdIn = (HANDLE)_get_osfhandle(fileno(sys_stdin));
+        _Py_END_SUPPRESS_IPH
+
+        if (_get_console_type(hStdIn) == 'r') {
+            fflush(sys_stdout);
+            if (prompt)
+                fprintf(stderr, "%s", prompt);
+            fflush(stderr);
+            clearerr(sys_stdin);
+            return _PyOS_WindowsConsoleReadline(hStdIn);
+        }
+    }
+#endif
+
     n = 100;
     p = (char *)PyMem_RawMalloc(n);
     if (p == NULL)
diff --git a/Parser/node.c b/Parser/node.c
index 00103240af..240d29057c 100644
--- a/Parser/node.c
+++ b/Parser/node.c
@@ -91,7 +91,7 @@ PyNode_AddChild(node *n1, int type, char *str, int lineno, int col_offset)
     if (current_capacity < 0 || required_capacity < 0)
         return E_OVERFLOW;
     if (current_capacity < required_capacity) {
-        if ((size_t)required_capacity > PY_SIZE_MAX / sizeof(node)) {
+        if ((size_t)required_capacity > SIZE_MAX / sizeof(node)) {
             return E_NOMEM;
         }
         n = n1->n_child;
diff --git a/Parser/parser.c b/Parser/parser.c
index 56ec5148d3..41072c478c 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -140,21 +140,20 @@ classify(parser_state *ps, int type, const char *str)
     int n = g->g_ll.ll_nlabels;
 
     if (type == NAME) {
-        const char *s = str;
         label *l = g->g_ll.ll_label;
         int i;
         for (i = n; i > 0; i--, l++) {
             if (l->lb_type != NAME || l->lb_str == NULL ||
-                l->lb_str[0] != s[0] ||
-                strcmp(l->lb_str, s) != 0)
+                l->lb_str[0] != str[0] ||
+                strcmp(l->lb_str, str) != 0)
                 continue;
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
 #if 0
             /* Leaving this in as an example */
             if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
-                if (s[0] == 'w' && strcmp(s, "with") == 0)
+                if (str[0] == 'w' && strcmp(str, "with") == 0)
                     break; /* not a keyword yet */
-                else if (s[0] == 'a' && strcmp(s, "as") == 0)
+                else if (str[0] == 'a' && strcmp(str, "as") == 0)
                     break; /* not a keyword yet */
             }
 #endif
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 629dee565c..1f467d63c4 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -161,10 +161,10 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
 
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
 #if 0
-static char with_msg[] =
+static const char with_msg[] =
 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
 
-static char as_msg[] =
+static const char as_msg[] =
 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
 
 static void
@@ -255,7 +255,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
 #endif
         if (a >= tok->line_start)
             col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
-                                          Py_intptr_t, int);
+                                          intptr_t, int);
         else
             col_offset = -1;
 
diff --git a/Parser/pgen.c b/Parser/pgen.c
index f3031aea0b..6451a1d998 100644
--- a/Parser/pgen.c
+++ b/Parser/pgen.c
@@ -117,6 +117,16 @@ newnfagrammar(void)
     return gr;
 }
 
+static void
+freenfagrammar(nfagrammar *gr)
+{
+    for (int i = 0; i < gr->gr_nnfas; i++) {
+        PyObject_FREE(gr->gr_nfa[i]->nf_state);
+    }
+    PyObject_FREE(gr->gr_nfa);
+    PyObject_FREE(gr);
+}
+
 static nfa *
 addnfa(nfagrammar *gr, char *name)
 {
@@ -134,7 +144,7 @@ addnfa(nfagrammar *gr, char *name)
 
 #ifdef Py_DEBUG
 
-static char REQNFMT[] = "metacompile: less than %d children\n";
+static const char REQNFMT[] = "metacompile: less than %d children\n";
 
 #define REQN(i, count) do { \
     if (i < count) { \
@@ -379,7 +389,7 @@ typedef struct _ss_dfa {
 
 /* Forward */
 static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
-                       labellist *ll, char *msg);
+                       labellist *ll, const char *msg);
 static void simplify(int xx_nstates, ss_state *xx_state);
 static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
 
@@ -488,13 +498,17 @@ makedfa(nfagrammar *gr, nfa *nf, dfa *d)
 
     convert(d, xx_nstates, xx_state);
 
-    /* XXX cleanup */
+    for (int i = 0; i < xx_nstates; i++) {
+        for (int j = 0; j < xx_state[i].ss_narcs; j++)
+            delbitset(xx_state[i].ss_arc[j].sa_bitset);
+        PyObject_FREE(xx_state[i].ss_arc);
+    }
     PyObject_FREE(xx_state);
 }
 
 static void
 printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
-           labellist *ll, char *msg)
+           labellist *ll, const char *msg)
 {
     int i, ibit, iarc;
     ss_state *yy;
@@ -669,7 +683,7 @@ pgen(node *n)
     g = maketables(gr);
     translatelabels(g);
     addfirstsets(g);
-    PyObject_FREE(gr);
+    freenfagrammar(gr);
     return g;
 }
 
diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c
index 0f055d6308..e386248c2f 100644
--- a/Parser/pgenmain.c
+++ b/Parser/pgenmain.c
@@ -27,7 +27,7 @@ int Py_VerboseFlag;
 int Py_IgnoreEnvironmentFlag;
 
 /* Forward */
-grammar *getgrammar(char *filename);
+grammar *getgrammar(const char *filename);
 
 void Py_Exit(int) _Py_NO_RETURN;
 
@@ -37,6 +37,15 @@ Py_Exit(int sts)
     exit(sts);
 }
 
+#ifdef WITH_THREAD
+/* Needed by obmalloc.c */
+int PyGILState_Check(void)
+{ return 1; }
+#endif
+
+void _PyMem_DumpTraceback(int fd, const void *ptr)
+{}
+
 int
 main(int argc, char **argv)
 {
@@ -71,12 +80,13 @@ main(int argc, char **argv)
         printf("Writing %s ...\n", graminit_h);
     printnonterminals(g, fp);
     fclose(fp);
+    freegrammar(g);
     Py_Exit(0);
     return 0; /* Make gcc -Wall happy */
 }
 
 grammar *
-getgrammar(char *filename)
+getgrammar(const char *filename)
 {
     FILE *fp;
     node *n;
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 612cb23545..8317293796 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -202,8 +202,8 @@ error_ret(struct tok_state *tok) /* XXX */
 }
 
 
-static char *
-get_normal_name(char *s)        /* for utf-8 and latin-1 */
+static const char *
+get_normal_name(const char *s)  /* for utf-8 and latin-1 */
 {
     char buf[13];
     int i;
@@ -264,7 +264,7 @@ get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *t
 
             if (begin < t) {
                 char* r = new_string(begin, t - begin, tok);
-                char* q;
+                const char* q;
                 if (!r)
                     return 0;
                 q = get_normal_name(r);
@@ -1335,6 +1335,28 @@ verify_identifier(struct tok_state *tok)
 }
 #endif
 
+static int
+tok_decimal_tail(struct tok_state *tok)
+{
+    int c;
+
+    while (1) {
+        do {
+            c = tok_nextc(tok);
+        } while (isdigit(c));
+        if (c != '_') {
+            break;
+        }
+        c = tok_nextc(tok);
+        if (!isdigit(c)) {
+            tok->done = E_TOKEN;
+            tok_backup(tok, c);
+            return 0;
+        }
+    }
+    return c;
+}
+
 /* Get next token, after space stripping etc. */
 
 static int
@@ -1355,17 +1377,20 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
         tok->atbol = 0;
         for (;;) {
             c = tok_nextc(tok);
-            if (c == ' ')
+            if (c == ' ') {
                 col++, altcol++;
+            }
             else if (c == '\t') {
                 col = (col/tok->tabsize + 1) * tok->tabsize;
                 altcol = (altcol/tok->alttabsize + 1)
                     * tok->alttabsize;
             }
-            else if (c == '\014') /* Control-L (formfeed) */
+            else if (c == '\014')  {/* Control-L (formfeed) */
                 col = altcol = 0; /* For Emacs users */
-            else
+            }
+            else {
                 break;
+            }
         }
         tok_backup(tok, c);
         if (c == '#' || c == '\n') {
@@ -1374,10 +1399,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                not passed to the parser as NEWLINE tokens,
                except *totally* empty lines in interactive
                mode, which signal the end of a command group. */
-            if (col == 0 && c == '\n' && tok->prompt != NULL)
+            if (col == 0 && c == '\n' && tok->prompt != NULL) {
                 blankline = 0; /* Let it through */
-            else
+            }
+            else {
                 blankline = 1; /* Ignore completely */
+            }
             /* We can't jump back right here since we still
                may need to skip to the end of a comment */
         }
@@ -1385,8 +1412,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
             if (col == tok->indstack[tok->indent]) {
                 /* No change */
                 if (altcol != tok->altindstack[tok->indent]) {
-                    if (indenterror(tok))
+                    if (indenterror(tok)) {
                         return ERRORTOKEN;
+                    }
                 }
             }
             else if (col > tok->indstack[tok->indent]) {
@@ -1397,8 +1425,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                     return ERRORTOKEN;
                 }
                 if (altcol <= tok->altindstack[tok->indent]) {
-                    if (indenterror(tok))
+                    if (indenterror(tok)) {
                         return ERRORTOKEN;
+                    }
                 }
                 tok->pendin++;
                 tok->indstack[++tok->indent] = col;
@@ -1417,8 +1446,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                     return ERRORTOKEN;
                 }
                 if (altcol != tok->altindstack[tok->indent]) {
-                    if (indenterror(tok))
+                    if (indenterror(tok)) {
                         return ERRORTOKEN;
+                    }
                 }
             }
         }
@@ -1464,9 +1494,11 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
     tok->start = tok->cur - 1;
 
     /* Skip comment */
-    if (c == '#')
-        while (c != EOF && c != '\n')
+    if (c == '#') {
+        while (c != EOF && c != '\n') {
             c = tok_nextc(tok);
+        }
+    }
 
     /* Check for EOF and errors now */
     if (c == EOF) {
@@ -1477,31 +1509,41 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
     nonascii = 0;
     if (is_potential_identifier_start(c)) {
         /* Process b"", r"", u"", br"" and rb"" */
-        int saw_b = 0, saw_r = 0, saw_u = 0;
+        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0;
         while (1) {
-            if (!(saw_b || saw_u) && (c == 'b' || c == 'B'))
+            if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B'))
                 saw_b = 1;
             /* Since this is a backwards compatibility support literal we don't
                want to support it in arbitrary order like byte literals. */
-            else if (!(saw_b || saw_u || saw_r) && (c == 'u' || c == 'U'))
+            else if (!(saw_b || saw_u || saw_r || saw_f)
+                     && (c == 'u'|| c == 'U')) {
                 saw_u = 1;
+            }
             /* ur"" and ru"" are not supported */
-            else if (!(saw_r || saw_u) && (c == 'r' || c == 'R'))
+            else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) {
                 saw_r = 1;
-            else
+            }
+            else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) {
+                saw_f = 1;
+            }
+            else {
                 break;
+            }
             c = tok_nextc(tok);
-            if (c == '"' || c == '\'')
+            if (c == '"' || c == '\'') {
                 goto letter_quote;
+            }
         }
         while (is_potential_identifier_char(c)) {
-            if (c >= 128)
+            if (c >= 128) {
                 nonascii = 1;
+            }
             c = tok_nextc(tok);
         }
         tok_backup(tok, c);
-        if (nonascii && !verify_identifier(tok))
+        if (nonascii && !verify_identifier(tok)) {
             return ERRORTOKEN;
+        }
         *p_start = tok->start;
         *p_end = tok->cur;
 
@@ -1510,10 +1552,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
             /* Current token length is 5. */
             if (tok->async_def) {
                 /* We're inside an 'async def' function. */
-                if (memcmp(tok->start, "async", 5) == 0)
+                if (memcmp(tok->start, "async", 5) == 0) {
                     return ASYNC;
-                if (memcmp(tok->start, "await", 5) == 0)
+                }
+                if (memcmp(tok->start, "await", 5) == 0) {
                     return AWAIT;
+                }
             }
             else if (memcmp(tok->start, "async", 5) == 0) {
                 /* The current token is 'async'.
@@ -1546,8 +1590,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
     /* Newline */
     if (c == '\n') {
         tok->atbol = 1;
-        if (blankline || tok->level > 0)
+        if (blankline || tok->level > 0) {
             goto nextline;
+        }
         *p_start = tok->start;
         *p_end = tok->cur - 1; /* Leave '\n' out of the string */
         tok->cont_line = 0;
@@ -1570,11 +1615,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                 *p_start = tok->start;
                 *p_end = tok->cur;
                 return ELLIPSIS;
-            } else {
+            }
+            else {
                 tok_backup(tok, c);
             }
             tok_backup(tok, '.');
-        } else {
+        }
+        else {
             tok_backup(tok, c);
         }
         *p_start = tok->start;
@@ -1587,64 +1634,94 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
         if (c == '0') {
             /* Hex, octal or binary -- maybe. */
             c = tok_nextc(tok);
-            if (c == '.')
-                goto fraction;
-            if (c == 'j' || c == 'J')
-                goto imaginary;
             if (c == 'x' || c == 'X') {
-
                 /* Hex */
                 c = tok_nextc(tok);
-                if (!isxdigit(c)) {
-                    tok->done = E_TOKEN;
-                    tok_backup(tok, c);
-                    return ERRORTOKEN;
-                }
                 do {
-                    c = tok_nextc(tok);
-                } while (isxdigit(c));
+                    if (c == '_') {
+                        c = tok_nextc(tok);
+                    }
+                    if (!isxdigit(c)) {
+                        tok->done = E_TOKEN;
+                        tok_backup(tok, c);
+                        return ERRORTOKEN;
+                    }
+                    do {
+                        c = tok_nextc(tok);
+                    } while (isxdigit(c));
+                } while (c == '_');
             }
             else if (c == 'o' || c == 'O') {
                 /* Octal */
                 c = tok_nextc(tok);
-                if (c < '0' || c >= '8') {
-                    tok->done = E_TOKEN;
-                    tok_backup(tok, c);
-                    return ERRORTOKEN;
-                }
                 do {
-                    c = tok_nextc(tok);
-                } while ('0' <= c && c < '8');
+                    if (c == '_') {
+                        c = tok_nextc(tok);
+                    }
+                    if (c < '0' || c >= '8') {
+                        tok->done = E_TOKEN;
+                        tok_backup(tok, c);
+                        return ERRORTOKEN;
+                    }
+                    do {
+                        c = tok_nextc(tok);
+                    } while ('0' <= c && c < '8');
+                } while (c == '_');
             }
             else if (c == 'b' || c == 'B') {
                 /* Binary */
                 c = tok_nextc(tok);
-                if (c != '0' && c != '1') {
-                    tok->done = E_TOKEN;
-                    tok_backup(tok, c);
-                    return ERRORTOKEN;
-                }
                 do {
-                    c = tok_nextc(tok);
-                } while (c == '0' || c == '1');
+                    if (c == '_') {
+                        c = tok_nextc(tok);
+                    }
+                    if (c != '0' && c != '1') {
+                        tok->done = E_TOKEN;
+                        tok_backup(tok, c);
+                        return ERRORTOKEN;
+                    }
+                    do {
+                        c = tok_nextc(tok);
+                    } while (c == '0' || c == '1');
+                } while (c == '_');
             }
             else {
                 int nonzero = 0;
                 /* maybe old-style octal; c is first char of it */
                 /* in any case, allow '0' as a literal */
-                while (c == '0')
+                while (1) {
+                    if (c == '_') {
+                        c = tok_nextc(tok);
+                        if (!isdigit(c)) {
+                            tok->done = E_TOKEN;
+                            tok_backup(tok, c);
+                            return ERRORTOKEN;
+                        }
+                    }
+                    if (c != '0') {
+                        break;
+                    }
                     c = tok_nextc(tok);
-                while (isdigit(c)) {
+                }
+                if (isdigit(c)) {
                     nonzero = 1;
-                    c = tok_nextc(tok);
+                    c = tok_decimal_tail(tok);
+                    if (c == 0) {
+                        return ERRORTOKEN;
+                    }
                 }
-                if (c == '.')
+                if (c == '.') {
+                    c = tok_nextc(tok);
                     goto fraction;
-                else if (c == 'e' || c == 'E')
+                }
+                else if (c == 'e' || c == 'E') {
                     goto exponent;
-                else if (c == 'j' || c == 'J')
+                }
+                else if (c == 'j' || c == 'J') {
                     goto imaginary;
+                }
                 else if (nonzero) {
+                    /* Old-style octal: now disallowed. */
                     tok->done = E_TOKEN;
                     tok_backup(tok, c);
                     return ERRORTOKEN;
@@ -1653,17 +1730,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
         }
         else {
             /* Decimal */
-            do {
-                c = tok_nextc(tok);
-            } while (isdigit(c));
+            c = tok_decimal_tail(tok);
+            if (c == 0) {
+                return ERRORTOKEN;
+            }
             {
                 /* Accept floating point numbers. */
                 if (c == '.') {
+                    c = tok_nextc(tok);
         fraction:
                     /* Fraction */
-                    do {
-                        c = tok_nextc(tok);
-                    } while (isdigit(c));
+                    if (isdigit(c)) {
+                        c = tok_decimal_tail(tok);
+                        if (c == 0) {
+                            return ERRORTOKEN;
+                        }
+                    }
                 }
                 if (c == 'e' || c == 'E') {
                     int e;
@@ -1685,14 +1767,16 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                         *p_end = tok->cur;
                         return NUMBER;
                     }
-                    do {
-                        c = tok_nextc(tok);
-                    } while (isdigit(c));
+                    c = tok_decimal_tail(tok);
+                    if (c == 0) {
+                        return ERRORTOKEN;
+                    }
                 }
-                if (c == 'j' || c == 'J')
+                if (c == 'j' || c == 'J') {
                     /* Imaginary part */
         imaginary:
                     c = tok_nextc(tok);
+                }
             }
         }
         tok_backup(tok, c);
@@ -1712,22 +1796,27 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
         c = tok_nextc(tok);
         if (c == quote) {
             c = tok_nextc(tok);
-            if (c == quote)
+            if (c == quote) {
                 quote_size = 3;
-            else
+            }
+            else {
                 end_quote_size = 1;     /* empty string found */
+            }
         }
-        if (c != quote)
+        if (c != quote) {
             tok_backup(tok, c);
+        }
 
         /* Get rest of string */
         while (end_quote_size != quote_size) {
             c = tok_nextc(tok);
             if (c == EOF) {
-                if (quote_size == 3)
+                if (quote_size == 3) {
                     tok->done = E_EOFS;
-                else
+                }
+                else {
                     tok->done = E_EOLS;
+                }
                 tok->cur = tok->inp;
                 return ERRORTOKEN;
             }
@@ -1736,12 +1825,14 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                 tok->cur = tok->inp;
                 return ERRORTOKEN;
             }
-            if (c == quote)
+            if (c == quote) {
                 end_quote_size += 1;
+            }
             else {
                 end_quote_size = 0;
-                if (c == '\\')
-                c = tok_nextc(tok);  /* skip escaped char */
+                if (c == '\\') {
+                    tok_nextc(tok);  /* skip escaped char */
+                }
             }
         }
 
@@ -1771,7 +1862,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
             int token3 = PyToken_ThreeChars(c, c2, c3);
             if (token3 != OP) {
                 token = token3;
-            } else {
+            }
+            else {
                 tok_backup(tok, c3);
             }
             *p_start = tok->start;
author	Serhiy Storchaka <storchaka@gmail.com>	2016-10-07 21:55:49 +0300
committer	Serhiy Storchaka <storchaka@gmail.com>	2016-10-07 21:55:49 +0300
commit	f6884ef5c8209462210b82c43d15a36b09e49cf1 (patch)
tree	e18dff898ac19dd995ca60da43b2484d09dbe2bf /Parser
parent	6a2df86ef6566ecf275166827b4acf7f2c42d6c7 (diff)
parent	cbc6cfd31fc4e0c80fec50097f3136120cc6e2b5 (diff)
download	cpython-f6884ef5c8209462210b82c43d15a36b09e49cf1.tar.gz