summaryrefslogtreecommitdiff
path: root/Python/ast.c
diff options
context:
space:
mode:
Diffstat (limited to 'Python/ast.c')
-rw-r--r--Python/ast.c922
1 files changed, 686 insertions, 236 deletions
diff --git a/Python/ast.c b/Python/ast.c
index edcd18b9b8..9a0b0649db 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -1,24 +1,506 @@
/*
* This file includes functions to transform a concrete syntax tree (CST) to
- * an abstract syntax tree (AST). The main function is PyAST_FromNode().
+ * an abstract syntax tree (AST). The main function is PyAST_FromNode().
*
*/
#include "Python.h"
#include "Python-ast.h"
-#include "grammar.h"
#include "node.h"
#include "ast.h"
#include "token.h"
-#include "parsetok.h"
-#include "graminit.h"
#include <assert.h>
+static int validate_stmts(asdl_seq *);
+static int validate_exprs(asdl_seq *, expr_context_ty, int);
+static int validate_nonempty_seq(asdl_seq *, const char *, const char *);
+static int validate_stmt(stmt_ty);
+static int validate_expr(expr_ty, expr_context_ty);
+
+static int
+validate_comprehension(asdl_seq *gens)
+{
+ int i;
+ if (!asdl_seq_LEN(gens)) {
+ PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
+ return 0;
+ }
+ for (i = 0; i < asdl_seq_LEN(gens); i++) {
+ comprehension_ty comp = asdl_seq_GET(gens, i);
+ if (!validate_expr(comp->target, Store) ||
+ !validate_expr(comp->iter, Load) ||
+ !validate_exprs(comp->ifs, Load, 0))
+ return 0;
+ }
+ return 1;
+}
+
+static int
+validate_slice(slice_ty slice)
+{
+ switch (slice->kind) {
+ case Slice_kind:
+ return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) &&
+ (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) &&
+ (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load));
+ case ExtSlice_kind: {
+ int i;
+ if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice"))
+ return 0;
+ for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++)
+ if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i)))
+ return 0;
+ return 1;
+ }
+ case Index_kind:
+ return validate_expr(slice->v.Index.value, Load);
+ default:
+ PyErr_SetString(PyExc_SystemError, "unknown slice node");
+ return 0;
+ }
+}
+
+static int
+validate_keywords(asdl_seq *keywords)
+{
+ int i;
+ for (i = 0; i < asdl_seq_LEN(keywords); i++)
+ if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
+ return 0;
+ return 1;
+}
+
+static int
+validate_args(asdl_seq *args)
+{
+ int i;
+ for (i = 0; i < asdl_seq_LEN(args); i++) {
+ arg_ty arg = asdl_seq_GET(args, i);
+ if (arg->annotation && !validate_expr(arg->annotation, Load))
+ return 0;
+ }
+ return 1;
+}
+
+static const char *
+expr_context_name(expr_context_ty ctx)
+{
+ switch (ctx) {
+ case Load:
+ return "Load";
+ case Store:
+ return "Store";
+ case Del:
+ return "Del";
+ case AugLoad:
+ return "AugLoad";
+ case AugStore:
+ return "AugStore";
+ case Param:
+ return "Param";
+ default:
+ assert(0);
+ return "(unknown)";
+ }
+}
+
+static int
+validate_arguments(arguments_ty args)
+{
+ if (!validate_args(args->args))
+ return 0;
+ if (args->varargannotation) {
+ if (!args->vararg) {
+ PyErr_SetString(PyExc_ValueError, "varargannotation but no vararg on arguments");
+ return 0;
+ }
+ if (!validate_expr(args->varargannotation, Load))
+ return 0;
+ }
+ if (!validate_args(args->kwonlyargs))
+ return 0;
+ if (args->kwargannotation) {
+ if (!args->kwarg) {
+ PyErr_SetString(PyExc_ValueError, "kwargannotation but no kwarg on arguments");
+ return 0;
+ }
+ if (!validate_expr(args->kwargannotation, Load))
+ return 0;
+ }
+ if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->args)) {
+ PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
+ return 0;
+ }
+ if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
+ PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
+ "kw_defaults on arguments");
+ return 0;
+ }
+ return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
+}
+
+static int
+validate_expr(expr_ty exp, expr_context_ty ctx)
+{
+ int check_ctx = 1;
+ expr_context_ty actual_ctx;
+
+ /* First check expression context. */
+ switch (exp->kind) {
+ case Attribute_kind:
+ actual_ctx = exp->v.Attribute.ctx;
+ break;
+ case Subscript_kind:
+ actual_ctx = exp->v.Subscript.ctx;
+ break;
+ case Starred_kind:
+ actual_ctx = exp->v.Starred.ctx;
+ break;
+ case Name_kind:
+ actual_ctx = exp->v.Name.ctx;
+ break;
+ case List_kind:
+ actual_ctx = exp->v.List.ctx;
+ break;
+ case Tuple_kind:
+ actual_ctx = exp->v.Tuple.ctx;
+ break;
+ default:
+ if (ctx != Load) {
+ PyErr_Format(PyExc_ValueError, "expression which can't be "
+ "assigned to in %s context", expr_context_name(ctx));
+ return 0;
+ }
+ check_ctx = 0;
+ }
+ if (check_ctx && actual_ctx != ctx) {
+ PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
+ expr_context_name(ctx), expr_context_name(actual_ctx));
+ return 0;
+ }
+
+ /* Now validate expression. */
+ switch (exp->kind) {
+ case BoolOp_kind:
+ if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
+ PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
+ return 0;
+ }
+ return validate_exprs(exp->v.BoolOp.values, Load, 0);
+ case BinOp_kind:
+ return validate_expr(exp->v.BinOp.left, Load) &&
+ validate_expr(exp->v.BinOp.right, Load);
+ case UnaryOp_kind:
+ return validate_expr(exp->v.UnaryOp.operand, Load);
+ case Lambda_kind:
+ return validate_arguments(exp->v.Lambda.args) &&
+ validate_expr(exp->v.Lambda.body, Load);
+ case IfExp_kind:
+ return validate_expr(exp->v.IfExp.test, Load) &&
+ validate_expr(exp->v.IfExp.body, Load) &&
+ validate_expr(exp->v.IfExp.orelse, Load);
+ case Dict_kind:
+ if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Dict doesn't have the same number of keys as values");
+ return 0;
+ }
+ return validate_exprs(exp->v.Dict.keys, Load, 0) &&
+ validate_exprs(exp->v.Dict.values, Load, 0);
+ case Set_kind:
+ return validate_exprs(exp->v.Set.elts, Load, 0);
+#define COMP(NAME) \
+ case NAME ## _kind: \
+ return validate_comprehension(exp->v.NAME.generators) && \
+ validate_expr(exp->v.NAME.elt, Load);
+ COMP(ListComp)
+ COMP(SetComp)
+ COMP(GeneratorExp)
+#undef COMP
+ case DictComp_kind:
+ return validate_comprehension(exp->v.DictComp.generators) &&
+ validate_expr(exp->v.DictComp.key, Load) &&
+ validate_expr(exp->v.DictComp.value, Load);
+ case Yield_kind:
+ return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
+ case YieldFrom_kind:
+ return validate_expr(exp->v.YieldFrom.value, Load);
+ case Compare_kind:
+ if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
+ PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
+ return 0;
+ }
+ if (asdl_seq_LEN(exp->v.Compare.comparators) !=
+ asdl_seq_LEN(exp->v.Compare.ops)) {
+ PyErr_SetString(PyExc_ValueError, "Compare has a different number "
+ "of comparators and operands");
+ return 0;
+ }
+ return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
+ validate_expr(exp->v.Compare.left, Load);
+ case Call_kind:
+ return validate_expr(exp->v.Call.func, Load) &&
+ validate_exprs(exp->v.Call.args, Load, 0) &&
+ validate_keywords(exp->v.Call.keywords) &&
+ (!exp->v.Call.starargs || validate_expr(exp->v.Call.starargs, Load)) &&
+ (!exp->v.Call.kwargs || validate_expr(exp->v.Call.kwargs, Load));
+ case Num_kind: {
+ PyObject *n = exp->v.Num.n;
+ if (!PyLong_CheckExact(n) && !PyFloat_CheckExact(n) &&
+ !PyComplex_CheckExact(n)) {
+ PyErr_SetString(PyExc_TypeError, "non-numeric type in Num");
+ return 0;
+ }
+ return 1;
+ }
+ case Str_kind: {
+ PyObject *s = exp->v.Str.s;
+ if (!PyUnicode_CheckExact(s)) {
+ PyErr_SetString(PyExc_TypeError, "non-string type in Str");
+ return 0;
+ }
+ return 1;
+ }
+ case Bytes_kind: {
+ PyObject *b = exp->v.Bytes.s;
+ if (!PyBytes_CheckExact(b)) {
+ PyErr_SetString(PyExc_TypeError, "non-bytes type in Bytes");
+ return 0;
+ }
+ return 1;
+ }
+ case Attribute_kind:
+ return validate_expr(exp->v.Attribute.value, Load);
+ case Subscript_kind:
+ return validate_slice(exp->v.Subscript.slice) &&
+ validate_expr(exp->v.Subscript.value, Load);
+ case Starred_kind:
+ return validate_expr(exp->v.Starred.value, ctx);
+ case List_kind:
+ return validate_exprs(exp->v.List.elts, ctx, 0);
+ case Tuple_kind:
+ return validate_exprs(exp->v.Tuple.elts, ctx, 0);
+ /* These last cases don't have any checking. */
+ case Name_kind:
+ case Ellipsis_kind:
+ return 1;
+ default:
+ PyErr_SetString(PyExc_SystemError, "unexpected expression");
+ return 0;
+ }
+}
+
+static int
+validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
+{
+ if (asdl_seq_LEN(seq))
+ return 1;
+ PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
+ return 0;
+}
+
+static int
+validate_assignlist(asdl_seq *targets, expr_context_ty ctx)
+{
+ return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
+ validate_exprs(targets, ctx, 0);
+}
+
+static int
+validate_body(asdl_seq *body, const char *owner)
+{
+ return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
+}
+
+static int
+validate_stmt(stmt_ty stmt)
+{
+ int i;
+ switch (stmt->kind) {
+ case FunctionDef_kind:
+ return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
+ validate_arguments(stmt->v.FunctionDef.args) &&
+ validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
+ (!stmt->v.FunctionDef.returns ||
+ validate_expr(stmt->v.FunctionDef.returns, Load));
+ case ClassDef_kind:
+ return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
+ validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
+ validate_keywords(stmt->v.ClassDef.keywords) &&
+ validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0) &&
+ (!stmt->v.ClassDef.starargs || validate_expr(stmt->v.ClassDef.starargs, Load)) &&
+ (!stmt->v.ClassDef.kwargs || validate_expr(stmt->v.ClassDef.kwargs, Load));
+ case Return_kind:
+ return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
+ case Delete_kind:
+ return validate_assignlist(stmt->v.Delete.targets, Del);
+ case Assign_kind:
+ return validate_assignlist(stmt->v.Assign.targets, Store) &&
+ validate_expr(stmt->v.Assign.value, Load);
+ case AugAssign_kind:
+ return validate_expr(stmt->v.AugAssign.target, Store) &&
+ validate_expr(stmt->v.AugAssign.value, Load);
+ case For_kind:
+ return validate_expr(stmt->v.For.target, Store) &&
+ validate_expr(stmt->v.For.iter, Load) &&
+ validate_body(stmt->v.For.body, "For") &&
+ validate_stmts(stmt->v.For.orelse);
+ case While_kind:
+ return validate_expr(stmt->v.While.test, Load) &&
+ validate_body(stmt->v.While.body, "While") &&
+ validate_stmts(stmt->v.While.orelse);
+ case If_kind:
+ return validate_expr(stmt->v.If.test, Load) &&
+ validate_body(stmt->v.If.body, "If") &&
+ validate_stmts(stmt->v.If.orelse);
+ case With_kind:
+ if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
+ return 0;
+ for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
+ withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
+ if (!validate_expr(item->context_expr, Load) ||
+ (item->optional_vars && !validate_expr(item->optional_vars, Store)))
+ return 0;
+ }
+ return validate_body(stmt->v.With.body, "With");
+ case Raise_kind:
+ if (stmt->v.Raise.exc) {
+ return validate_expr(stmt->v.Raise.exc, Load) &&
+ (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
+ }
+ if (stmt->v.Raise.cause) {
+ PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
+ return 0;
+ }
+ return 1;
+ case Try_kind:
+ if (!validate_body(stmt->v.Try.body, "Try"))
+ return 0;
+ if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
+ !asdl_seq_LEN(stmt->v.Try.finalbody)) {
+ PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
+ return 0;
+ }
+ if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
+ asdl_seq_LEN(stmt->v.Try.orelse)) {
+ PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
+ return 0;
+ }
+ for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
+ excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
+ if ((handler->v.ExceptHandler.type &&
+ !validate_expr(handler->v.ExceptHandler.type, Load)) ||
+ !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
+ return 0;
+ }
+ return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
+ validate_stmts(stmt->v.Try.finalbody)) &&
+ (!asdl_seq_LEN(stmt->v.Try.orelse) ||
+ validate_stmts(stmt->v.Try.orelse));
+ case Assert_kind:
+ return validate_expr(stmt->v.Assert.test, Load) &&
+ (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
+ case Import_kind:
+ return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
+ case ImportFrom_kind:
+ if (stmt->v.ImportFrom.level < -1) {
+ PyErr_SetString(PyExc_ValueError, "ImportFrom level less than -1");
+ return 0;
+ }
+ return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
+ case Global_kind:
+ return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
+ case Nonlocal_kind:
+ return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
+ case Expr_kind:
+ return validate_expr(stmt->v.Expr.value, Load);
+ case Pass_kind:
+ case Break_kind:
+ case Continue_kind:
+ return 1;
+ default:
+ PyErr_SetString(PyExc_SystemError, "unexpected statement");
+ return 0;
+ }
+}
+
+static int
+validate_stmts(asdl_seq *seq)
+{
+ int i;
+ for (i = 0; i < asdl_seq_LEN(seq); i++) {
+ stmt_ty stmt = asdl_seq_GET(seq, i);
+ if (stmt) {
+ if (!validate_stmt(stmt))
+ return 0;
+ }
+ else {
+ PyErr_SetString(PyExc_ValueError,
+ "None disallowed in statement list");
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int
+validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
+{
+ int i;
+ for (i = 0; i < asdl_seq_LEN(exprs); i++) {
+ expr_ty expr = asdl_seq_GET(exprs, i);
+ if (expr) {
+ if (!validate_expr(expr, ctx))
+ return 0;
+ }
+ else if (!null_ok) {
+ PyErr_SetString(PyExc_ValueError,
+ "None disallowed in expression list");
+ return 0;
+ }
+
+ }
+ return 1;
+}
+
+int
+PyAST_Validate(mod_ty mod)
+{
+ int res = 0;
+
+ switch (mod->kind) {
+ case Module_kind:
+ res = validate_stmts(mod->v.Module.body);
+ break;
+ case Interactive_kind:
+ res = validate_stmts(mod->v.Interactive.body);
+ break;
+ case Expression_kind:
+ res = validate_expr(mod->v.Expression.body, Load);
+ break;
+ case Suite_kind:
+ PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler");
+ break;
+ default:
+ PyErr_SetString(PyExc_SystemError, "impossible module node");
+ res = 0;
+ break;
+ }
+ return res;
+}
+
+/* This is done here, so defines like "test" don't interfere with AST use above. */
+#include "grammar.h"
+#include "parsetok.h"
+#include "graminit.h"
+
/* Data structure used internally */
struct compiling {
char *c_encoding; /* source encoding */
PyArena *c_arena; /* arena for allocating memeory */
const char *c_filename; /* filename */
+ PyObject *c_normalize; /* Normalization function from unicodedata. */
+ PyObject *c_normalize_args; /* Normalization argument tuple. */
};
static asdl_seq *seq_for_testlist(struct compiling *, const node *);
@@ -38,127 +520,96 @@ static PyObject *parsestr(struct compiling *, const node *n, int *bytesmode);
static PyObject *parsestrplus(struct compiling *, const node *n,
int *bytesmode);
-#ifndef LINENO
-#define LINENO(n) ((n)->n_lineno)
-#endif
-
#define COMP_GENEXP 0
#define COMP_LISTCOMP 1
#define COMP_SETCOMP 2
+static int
+init_normalization(struct compiling *c)
+{
+ PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
+ if (!m)
+ return 0;
+ c->c_normalize = PyObject_GetAttrString(m, "normalize");
+ Py_DECREF(m);
+ if (!c->c_normalize)
+ return 0;
+ c->c_normalize_args = Py_BuildValue("(sN)", "NFKC", Py_None);
+ if (!c->c_normalize_args) {
+ Py_CLEAR(c->c_normalize);
+ return 0;
+ }
+ PyTuple_SET_ITEM(c->c_normalize_args, 1, NULL);
+ return 1;
+}
+
static identifier
-new_identifier(const char *n, PyArena *arena)
+new_identifier(const char *n, struct compiling *c)
{
PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
- Py_UNICODE *u;
if (!id)
return NULL;
- u = PyUnicode_AS_UNICODE(id);
+ /* PyUnicode_DecodeUTF8 should always return a ready string. */
+ assert(PyUnicode_IS_READY(id));
/* Check whether there are non-ASCII characters in the
identifier; if so, normalize to NFKC. */
- for (; *u; u++) {
- if (*u >= 128) {
- PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
- PyObject *id2;
- if (!m)
- return NULL;
- id2 = PyObject_CallMethod(m, "normalize", "sO", "NFKC", id);
- Py_DECREF(m);
- if (!id2)
- return NULL;
+ if (!PyUnicode_IS_ASCII(id)) {
+ PyObject *id2;
+ if (!c->c_normalize && !init_normalization(c)) {
Py_DECREF(id);
- id = id2;
- break;
+ return NULL;
}
+ PyTuple_SET_ITEM(c->c_normalize_args, 1, id);
+ id2 = PyObject_Call(c->c_normalize, c->c_normalize_args, NULL);
+ Py_DECREF(id);
+ if (!id2)
+ return NULL;
+ id = id2;
}
PyUnicode_InternInPlace(&id);
- PyArena_AddPyObject(arena, id);
+ PyArena_AddPyObject(c->c_arena, id);
return id;
}
-#define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
-
-/* This routine provides an invalid object for the syntax error.
- The outermost routine must unpack this error and create the
- proper object. We do this so that we don't have to pass
- the filename to everything function.
-
- XXX Maybe we should just pass the filename...
-*/
+#define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
static int
-ast_error(const node *n, const char *errstr)
+ast_error(struct compiling *c, const node *n, const char *errmsg)
{
- PyObject *u = Py_BuildValue("zii", errstr, LINENO(n), n->n_col_offset), *save;
- if (!u)
- return 0;
- /*
- * Prevent the error from being chained. PyErr_SetObject will normalize the
- * exception in order to chain it. ast_error_finish, however, requires the
- * error not to be normalized.
- */
- save = PyThreadState_GET()->exc_value;
- PyThreadState_GET()->exc_value = NULL;
- PyErr_SetObject(PyExc_SyntaxError, u);
- PyThreadState_GET()->exc_value = save;
- Py_DECREF(u);
- return 0;
-}
-
-static void
-ast_error_finish(const char *filename)
-{
- PyObject *type, *value, *tback, *errstr, *offset, *loc, *tmp;
+ PyObject *value, *errstr, *loc, *tmp;
PyObject *filename_obj;
- long lineno;
-
- assert(PyErr_Occurred());
- if (!PyErr_ExceptionMatches(PyExc_SyntaxError))
- return;
-
- PyErr_Fetch(&type, &value, &tback);
- errstr = PyTuple_GetItem(value, 0);
- if (!errstr)
- return;
- Py_INCREF(errstr);
- lineno = PyLong_AsLong(PyTuple_GetItem(value, 1));
- if (lineno == -1) {
- Py_DECREF(errstr);
- return;
- }
- offset = PyTuple_GetItem(value, 2);
- if (!offset) {
- Py_DECREF(errstr);
- return;
- }
- Py_DECREF(value);
-
- loc = PyErr_ProgramText(filename, lineno);
+
+ loc = PyErr_ProgramText(c->c_filename, LINENO(n));
if (!loc) {
Py_INCREF(Py_None);
loc = Py_None;
}
- if (filename != NULL)
- filename_obj = PyUnicode_DecodeFSDefault(filename);
- else {
+ if (c->c_filename) {
+ filename_obj = PyUnicode_DecodeFSDefault(c->c_filename);
+ if (!filename_obj) {
+ Py_DECREF(loc);
+ return 0;
+ }
+ } else {
Py_INCREF(Py_None);
filename_obj = Py_None;
}
- if (filename_obj != NULL)
- tmp = Py_BuildValue("(NlOO)", filename_obj, lineno, offset, loc);
- else
- tmp = NULL;
- Py_DECREF(loc);
- if (!tmp) {
- Py_DECREF(errstr);
- return;
+ tmp = Py_BuildValue("(NiiN)", filename_obj, LINENO(n), n->n_col_offset, loc);
+ if (!tmp)
+ return 0;
+ errstr = PyUnicode_FromString(errmsg);
+ if (!errstr) {
+ Py_DECREF(tmp);
+ return 0;
}
value = PyTuple_Pack(2, errstr, tmp);
Py_DECREF(errstr);
Py_DECREF(tmp);
- if (!value)
- return;
- PyErr_Restore(type, value, tback);
+ if (value) {
+ PyErr_SetObject(PyExc_SyntaxError, value);
+ Py_DECREF(value);
+ }
+ return 0;
}
/* num_stmts() returns number of contained statements.
@@ -234,13 +685,17 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
stmt_ty s;
node *ch;
struct compiling c;
+ mod_ty res = NULL;
+ c.c_arena = arena;
+ c.c_filename = filename;
+ c.c_normalize = c.c_normalize_args = NULL;
if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
c.c_encoding = "utf-8";
if (TYPE(n) == encoding_decl) {
#if 0
- ast_error(n, "encoding declaration in Unicode string");
- goto error;
+ ast_error(c, n, "encoding declaration in Unicode string");
+ goto out;
#endif
n = CHILD(n, 0);
}
@@ -251,15 +706,13 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
/* PEP 3120 */
c.c_encoding = "utf-8";
}
- c.c_arena = arena;
- c.c_filename = filename;
k = 0;
switch (TYPE(n)) {
case file_input:
stmts = asdl_seq_new(num_stmts(n), arena);
if (!stmts)
- return NULL;
+ goto out;
for (i = 0; i < NCH(n) - 1; i++) {
ch = CHILD(n, i);
if (TYPE(ch) == NEWLINE)
@@ -269,7 +722,7 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
if (num == 1) {
s = ast_for_stmt(&c, ch);
if (!s)
- goto error;
+ goto out;
asdl_seq_SET(stmts, k++, s);
}
else {
@@ -278,42 +731,44 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
for (j = 0; j < num; j++) {
s = ast_for_stmt(&c, CHILD(ch, j * 2));
if (!s)
- goto error;
+ goto out;
asdl_seq_SET(stmts, k++, s);
}
}
}
- return Module(stmts, arena);
+ res = Module(stmts, arena);
+ break;
case eval_input: {
expr_ty testlist_ast;
/* XXX Why not comp_for here? */
testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
if (!testlist_ast)
- goto error;
- return Expression(testlist_ast, arena);
+ goto out;
+ res = Expression(testlist_ast, arena);
+ break;
}
case single_input:
if (TYPE(CHILD(n, 0)) == NEWLINE) {
stmts = asdl_seq_new(1, arena);
if (!stmts)
- goto error;
+ goto out;
asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
arena));
if (!asdl_seq_GET(stmts, 0))
- goto error;
- return Interactive(stmts, arena);
+ goto out;
+ res = Interactive(stmts, arena);
}
else {
n = CHILD(n, 0);
num = num_stmts(n);
stmts = asdl_seq_new(num, arena);
if (!stmts)
- goto error;
+ goto out;
if (num == 1) {
s = ast_for_stmt(&c, n);
if (!s)
- goto error;
+ goto out;
asdl_seq_SET(stmts, 0, s);
}
else {
@@ -324,21 +779,26 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
break;
s = ast_for_stmt(&c, CHILD(n, i));
if (!s)
- goto error;
+ goto out;
asdl_seq_SET(stmts, i / 2, s);
}
}
- return Interactive(stmts, arena);
+ res = Interactive(stmts, arena);
}
+ break;
default:
PyErr_Format(PyExc_SystemError,
"invalid node %d for PyAST_FromNode", TYPE(n));
- goto error;
+ goto out;
}
- error:
- ast_error_finish(filename);
- return NULL;
+ out:
+ if (c.c_normalize) {
+ Py_DECREF(c.c_normalize);
+ PyTuple_SET_ITEM(c.c_normalize_args, 1, NULL);
+ Py_DECREF(c.c_normalize_args);
+ }
+ return res;
}
/* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
@@ -383,18 +843,18 @@ static const char* FORBIDDEN[] = {
};
static int
-forbidden_name(identifier name, const node *n, int full_checks)
+forbidden_name(struct compiling *c, identifier name, const node *n, int full_checks)
{
assert(PyUnicode_Check(name));
if (PyUnicode_CompareWithASCIIString(name, "__debug__") == 0) {
- ast_error(n, "assignment to keyword");
+ ast_error(c, n, "assignment to keyword");
return 1;
}
if (full_checks) {
const char **p;
for (p = FORBIDDEN; *p; p++) {
if (PyUnicode_CompareWithASCIIString(name, *p) == 0) {
- ast_error(n, "assignment to keyword");
+ ast_error(c, n, "assignment to keyword");
return 1;
}
}
@@ -430,7 +890,7 @@ set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
switch (e->kind) {
case Attribute_kind:
e->v.Attribute.ctx = ctx;
- if (ctx == Store && forbidden_name(e->v.Attribute.attr, n, 1))
+ if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
return 0;
break;
case Subscript_kind:
@@ -443,7 +903,7 @@ set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
break;
case Name_kind:
if (ctx == Store) {
- if (forbidden_name(e->v.Name.id, n, 1))
+ if (forbidden_name(c, e->v.Name.id, n, 1))
return 0; /* forbidden_name() calls ast_error() */
}
e->v.Name.ctx = ctx;
@@ -476,6 +936,7 @@ set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
expr_name = "generator expression";
break;
case Yield_kind:
+ case YieldFrom_kind:
expr_name = "yield expression";
break;
case ListComp_kind:
@@ -516,7 +977,7 @@ set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
"can't %s %s",
ctx == Store ? "assign to" : "delete",
expr_name);
- return ast_error(n, buf);
+ return ast_error(c, n, buf);
}
/* If the LHS is a list or tuple, we need to set the assignment
@@ -664,7 +1125,7 @@ ast_for_arg(struct compiling *c, const node *n)
name = NEW_IDENTIFIER(ch);
if (!name)
return NULL;
- if (forbidden_name(name, ch, 0))
+ if (forbidden_name(c, name, ch, 0))
return NULL;
if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
@@ -694,7 +1155,7 @@ handle_keywordonly_args(struct compiling *c, const node *n, int start,
int j = 0; /* index for kwdefaults and kwonlyargs */
if (kwonlyargs == NULL) {
- ast_error(CHILD(n, start), "named arguments must follow bare *");
+ ast_error(c, CHILD(n, start), "named arguments must follow bare *");
return -1;
}
assert(kwdefaults != NULL);
@@ -726,7 +1187,7 @@ handle_keywordonly_args(struct compiling *c, const node *n, int start,
argname = NEW_IDENTIFIER(ch);
if (!argname)
goto error;
- if (forbidden_name(argname, ch, 0))
+ if (forbidden_name(c, argname, ch, 0))
goto error;
arg = arg(argname, annotation, c->c_arena);
if (!arg)
@@ -737,7 +1198,7 @@ handle_keywordonly_args(struct compiling *c, const node *n, int start,
case DOUBLESTAR:
return i;
default:
- ast_error(ch, "unexpected node");
+ ast_error(c, ch, "unexpected node");
goto error;
}
}
@@ -829,7 +1290,7 @@ ast_for_arguments(struct compiling *c, const node *n)
return NULL;
if (nposargs + nkwonlyargs > 255) {
- ast_error(n, "more than 255 arguments");
+ ast_error(c, n, "more than 255 arguments");
return NULL;
}
@@ -857,7 +1318,7 @@ ast_for_arguments(struct compiling *c, const node *n)
found_default = 1;
}
else if (found_default) {
- ast_error(n,
+ ast_error(c, n,
"non-default argument follows default argument");
return NULL;
}
@@ -869,7 +1330,7 @@ ast_for_arguments(struct compiling *c, const node *n)
break;
case STAR:
if (i+1 >= NCH(n)) {
- ast_error(CHILD(n, i),
+ ast_error(c, CHILD(n, i),
"named arguments must follow bare *");
return NULL;
}
@@ -886,7 +1347,7 @@ ast_for_arguments(struct compiling *c, const node *n)
vararg = NEW_IDENTIFIER(CHILD(ch, 0));
if (!vararg)
return NULL;
- if (forbidden_name(vararg, CHILD(ch, 0), 0))
+ if (forbidden_name(c, vararg, CHILD(ch, 0), 0))
return NULL;
if (NCH(ch) > 1) {
/* there is an annotation on the vararg */
@@ -917,7 +1378,7 @@ ast_for_arguments(struct compiling *c, const node *n)
if (!kwargannotation)
return NULL;
}
- if (forbidden_name(kwarg, CHILD(ch, 0), 0))
+ if (forbidden_name(c, kwarg, CHILD(ch, 0), 0))
return NULL;
i += 3;
break;
@@ -1036,7 +1497,7 @@ ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
name = NEW_IDENTIFIER(CHILD(n, name_i));
if (!name)
return NULL;
- if (forbidden_name(name, CHILD(n, name_i), 0))
+ if (forbidden_name(c, name, CHILD(n, name_i), 0))
return NULL;
args = ast_for_arguments(c, CHILD(n, name_i + 1));
if (!args)
@@ -1385,7 +1846,7 @@ ast_for_atom(struct compiling *c, const node *n)
} else {
PyOS_snprintf(buf, sizeof(buf), "(%s) unknown error", errtype);
}
- ast_error(n, buf);
+ ast_error(c, n, buf);
Py_DECREF(type);
Py_DECREF(value);
Py_XDECREF(tback);
@@ -1901,12 +2362,25 @@ ast_for_expr(struct compiling *c, const node *n)
}
return ast_for_binop(c, n);
case yield_expr: {
+ node *an = NULL;
+ node *en = NULL;
+ int is_from = 0;
expr_ty exp = NULL;
- if (NCH(n) == 2) {
- exp = ast_for_testlist(c, CHILD(n, 1));
+ if (NCH(n) > 1)
+ an = CHILD(n, 1); /* yield_arg */
+ if (an) {
+ en = CHILD(an, NCH(an) - 1);
+ if (NCH(an) == 2) {
+ is_from = 1;
+ exp = ast_for_expr(c, en);
+ }
+ else
+ exp = ast_for_testlist(c, en);
if (!exp)
return NULL;
}
+ if (is_from)
+ return YieldFrom(exp, LINENO(n), n->n_col_offset, c->c_arena);
return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
}
case factor:
@@ -1931,7 +2405,7 @@ ast_for_call(struct compiling *c, const node *n, expr_ty func)
/*
arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
| '**' test)
- argument: [test '='] test [comp_for] # Really [keyword '='] test
+ argument: [test '='] (test) [comp_for] # Really [keyword '='] test
*/
int i, nargs, nkeywords, ngens;
@@ -1956,14 +2430,14 @@ ast_for_call(struct compiling *c, const node *n, expr_ty func)
}
}
if (ngens > 1 || (ngens && (nargs || nkeywords))) {
- ast_error(n, "Generator expression must be parenthesized "
+ ast_error(c, n, "Generator expression must be parenthesized "
"if not sole argument");
return NULL;
}
if (nargs + nkeywords + ngens > 255) {
- ast_error(n, "more than 255 arguments");
- return NULL;
+ ast_error(c, n, "more than 255 arguments");
+ return NULL;
}
args = asdl_seq_new(nargs + ngens, c->c_arena);
@@ -1980,12 +2454,12 @@ ast_for_call(struct compiling *c, const node *n, expr_ty func)
expr_ty e;
if (NCH(ch) == 1) {
if (nkeywords) {
- ast_error(CHILD(ch, 0),
+ ast_error(c, CHILD(ch, 0),
"non-keyword arg after keyword arg");
return NULL;
}
if (vararg) {
- ast_error(CHILD(ch, 0),
+ ast_error(c, CHILD(ch, 0),
"only named arguments may follow *expression");
return NULL;
}
@@ -2015,19 +2489,19 @@ ast_for_call(struct compiling *c, const node *n, expr_ty func)
* then is very confusing.
*/
if (e->kind == Lambda_kind) {
- ast_error(CHILD(ch, 0), "lambda cannot contain assignment");
- return NULL;
+ ast_error(c, CHILD(ch, 0), "lambda cannot contain assignment");
+ return NULL;
} else if (e->kind != Name_kind) {
- ast_error(CHILD(ch, 0), "keyword can't be an expression");
- return NULL;
- } else if (forbidden_name(e->v.Name.id, ch, 1)) {
- return NULL;
+ ast_error(c, CHILD(ch, 0), "keyword can't be an expression");
+ return NULL;
+ } else if (forbidden_name(c, e->v.Name.id, ch, 1)) {
+ return NULL;
}
key = e->v.Name.id;
for (k = 0; k < nkeywords; k++) {
tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg;
if (!PyUnicode_Compare(tmp, key)) {
- ast_error(CHILD(ch, 0), "keyword argument repeated");
+ ast_error(c, CHILD(ch, 0), "keyword argument repeated");
return NULL;
}
}
@@ -2120,7 +2594,7 @@ ast_for_expr_stmt(struct compiling *c, const node *n)
case Subscript_kind:
break;
default:
- ast_error(ch, "illegal expression for augmented assignment");
+ ast_error(c, ch, "illegal expression for augmented assignment");
return NULL;
}
@@ -2153,7 +2627,7 @@ ast_for_expr_stmt(struct compiling *c, const node *n)
expr_ty e;
node *ch = CHILD(n, i);
if (TYPE(ch) == yield_expr) {
- ast_error(ch, "assignment to yield expression not possible");
+ ast_error(c, ch, "assignment to yield expression not possible");
return NULL;
}
e = ast_for_testlist(c, ch);
@@ -2225,7 +2699,7 @@ ast_for_flow_stmt(struct compiling *c, const node *n)
continue_stmt: 'continue'
return_stmt: 'return' [testlist]
yield_stmt: yield_expr
- yield_expr: 'yield' testlist
+ yield_expr: 'yield' testlist | 'yield' 'from' test
raise_stmt: 'raise' [test [',' test [',' test]]]
*/
node *ch;
@@ -2300,11 +2774,11 @@ alias_for_import_name(struct compiling *c, const node *n, int store)
str = NEW_IDENTIFIER(str_node);
if (!str)
return NULL;
- if (store && forbidden_name(str, str_node, 0))
+ if (store && forbidden_name(c, str, str_node, 0))
return NULL;
}
else {
- if (forbidden_name(name, name_node, 0))
+ if (forbidden_name(c, name, name_node, 0))
return NULL;
}
return alias(name, str, c->c_arena);
@@ -2323,7 +2797,7 @@ alias_for_import_name(struct compiling *c, const node *n, int store)
a->asname = NEW_IDENTIFIER(asname_node);
if (!a->asname)
return NULL;
- if (forbidden_name(a->asname, asname_node, 0))
+ if (forbidden_name(c, a->asname, asname_node, 0))
return NULL;
return a;
}
@@ -2334,7 +2808,7 @@ alias_for_import_name(struct compiling *c, const node *n, int store)
name = NEW_IDENTIFIER(name_node);
if (!name)
return NULL;
- if (store && forbidden_name(name, name_node, 0))
+ if (store && forbidden_name(c, name, name_node, 0))
return NULL;
return alias(name, NULL, c->c_arena);
}
@@ -2463,13 +2937,13 @@ ast_for_import_stmt(struct compiling *c, const node *n)
n = CHILD(n, idx);
n_children = NCH(n);
if (n_children % 2 == 0) {
- ast_error(n, "trailing comma not allowed without"
+ ast_error(c, n, "trailing comma not allowed without"
" surrounding parentheses");
return NULL;
}
break;
default:
- ast_error(n, "Unexpected node-type in from-import");
+ ast_error(c, n, "Unexpected node-type in from-import");
return NULL;
}
@@ -2875,7 +3349,7 @@ ast_for_except_clause(struct compiling *c, const node *exc, node *body)
identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
if (!e)
return NULL;
- if (forbidden_name(e, CHILD(exc, 3), 0))
+ if (forbidden_name(c, e, CHILD(exc, 3), 0))
return NULL;
expression = ast_for_expr(c, CHILD(exc, 1));
if (!expression)
@@ -2899,7 +3373,7 @@ ast_for_try_stmt(struct compiling *c, const node *n)
{
const int nch = NCH(n);
int n_except = (nch - 3)/3;
- asdl_seq *body, *orelse = NULL, *finally = NULL;
+ asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
REQ(n, try_stmt);
@@ -2934,15 +3408,14 @@ ast_for_try_stmt(struct compiling *c, const node *n)
}
}
else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
- ast_error(n, "malformed 'try' statement");
+ ast_error(c, n, "malformed 'try' statement");
return NULL;
}
if (n_except > 0) {
int i;
- stmt_ty except_st;
/* process except statements to create a try ... except */
- asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena);
+ handlers = asdl_seq_new(n_except, c->c_arena);
if (handlers == NULL)
return NULL;
@@ -2953,28 +3426,15 @@ ast_for_try_stmt(struct compiling *c, const node *n)
return NULL;
asdl_seq_SET(handlers, i, e);
}
-
- except_st = TryExcept(body, handlers, orelse, LINENO(n),
- n->n_col_offset, c->c_arena);
- if (!finally)
- return except_st;
-
- /* if a 'finally' is present too, we nest the TryExcept within a
- TryFinally to emulate try ... except ... finally */
- body = asdl_seq_new(1, c->c_arena);
- if (body == NULL)
- return NULL;
- asdl_seq_SET(body, 0, except_st);
}
- /* must be a try ... finally (except clauses are in body, if any exist) */
- assert(finally != NULL);
- return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena);
+ assert(finally != NULL || asdl_seq_LEN(handlers));
+ return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset, c->c_arena);
}
/* with_item: test ['as' expr] */
-static stmt_ty
-ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content)
+static withitem_ty
+ast_for_with_item(struct compiling *c, const node *n)
{
expr_ty context_expr, optional_vars = NULL;
@@ -2993,43 +3453,34 @@ ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content)
}
}
- return With(context_expr, optional_vars, content, LINENO(n),
- n->n_col_offset, c->c_arena);
+ return withitem(context_expr, optional_vars, c->c_arena);
}
/* with_stmt: 'with' with_item (',' with_item)* ':' suite */
static stmt_ty
ast_for_with_stmt(struct compiling *c, const node *n)
{
- int i;
- stmt_ty ret;
- asdl_seq *inner;
+ int i, n_items;
+ asdl_seq *items, *body;
REQ(n, with_stmt);
- /* process the with items inside-out */
- i = NCH(n) - 1;
- /* the suite of the innermost with item is the suite of the with stmt */
- inner = ast_for_suite(c, CHILD(n, i));
- if (!inner)
+ n_items = (NCH(n) - 2) / 2;
+ items = asdl_seq_new(n_items, c->c_arena);
+ if (!items)
return NULL;
-
- for (;;) {
- i -= 2;
- ret = ast_for_with_item(c, CHILD(n, i), inner);
- if (!ret)
- return NULL;
- /* was this the last item? */
- if (i == 1)
- break;
- /* if not, wrap the result so far in a new sequence */
- inner = asdl_seq_new(1, c->c_arena);
- if (!inner)
+ for (i = 1; i < NCH(n) - 2; i += 2) {
+ withitem_ty item = ast_for_with_item(c, CHILD(n, i));
+ if (!item)
return NULL;
- asdl_seq_SET(inner, 0, ret);
+ asdl_seq_SET(items, (i - 1) / 2, item);
}
- return ret;
+ body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
+ if (!body)
+ return NULL;
+
+ return With(items, body, LINENO(n), n->n_col_offset, c->c_arena);
}
static stmt_ty
@@ -3049,7 +3500,7 @@ ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
classname = NEW_IDENTIFIER(CHILD(n, 1));
if (!classname)
return NULL;
- if (forbidden_name(classname, CHILD(n, 3), 0))
+ if (forbidden_name(c, classname, CHILD(n, 3), 0))
return NULL;
return ClassDef(classname, NULL, NULL, NULL, NULL, s, decorator_seq,
LINENO(n), n->n_col_offset, c->c_arena);
@@ -3062,7 +3513,7 @@ ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
classname = NEW_IDENTIFIER(CHILD(n, 1));
if (!classname)
return NULL;
- if (forbidden_name(classname, CHILD(n, 3), 0))
+ if (forbidden_name(c, classname, CHILD(n, 3), 0))
return NULL;
return ClassDef(classname, NULL, NULL, NULL, NULL, s, decorator_seq,
LINENO(n), n->n_col_offset, c->c_arena);
@@ -3087,7 +3538,7 @@ ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
classname = NEW_IDENTIFIER(CHILD(n, 1));
if (!classname)
return NULL;
- if (forbidden_name(classname, CHILD(n, 1), 0))
+ if (forbidden_name(c, classname, CHILD(n, 1), 0))
return NULL;
return ClassDef(classname, call->v.Call.args, call->v.Call.keywords,
@@ -3213,20 +3664,14 @@ parsenumber(struct compiling *c, const char *s)
}
static PyObject *
-decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding)
+decode_utf8(struct compiling *c, const char **sPtr, const char *end)
{
- PyObject *u, *v;
char *s, *t;
t = s = (char *)*sPtr;
/* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
while (s < end && (*s & 0x80)) s++;
*sPtr = s;
- u = PyUnicode_DecodeUTF8(t, s - t, NULL);
- if (u == NULL)
- return NULL;
- v = PyUnicode_AsEncodedString(u, encoding, NULL);
- Py_DECREF(u);
- return v;
+ return PyUnicode_DecodeUTF8(t, s - t, NULL);
}
static PyObject *
@@ -3238,7 +3683,6 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
const char *end;
if (encoding == NULL) {
- buf = (char *)s;
u = NULL;
} else {
/* check for integer overflow */
@@ -3261,22 +3705,20 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
}
if (*s & 0x80) { /* XXX inefficient */
PyObject *w;
- char *r;
- Py_ssize_t rn, i;
- w = decode_utf8(c, &s, end, "utf-32-be");
+ int kind;
+ void *data;
+ Py_ssize_t len, i;
+ w = decode_utf8(c, &s, end);
if (w == NULL) {
Py_DECREF(u);
return NULL;
}
- r = PyBytes_AS_STRING(w);
- rn = Py_SIZE(w);
- assert(rn % 4 == 0);
- for (i = 0; i < rn; i += 4) {
- sprintf(p, "\\U%02x%02x%02x%02x",
- r[i + 0] & 0xFF,
- r[i + 1] & 0xFF,
- r[i + 2] & 0xFF,
- r[i + 3] & 0xFF);
+ kind = PyUnicode_KIND(w);
+ data = PyUnicode_DATA(w);
+ len = PyUnicode_GET_LENGTH(w);
+ for (i = 0; i < len; i++) {
+ Py_UCS4 chr = PyUnicode_READ(kind, data, i);
+ sprintf(p, "\\U%08x", chr);
p += 10;
}
/* Should be impossible to overflow */
@@ -3310,13 +3752,21 @@ parsestr(struct compiling *c, const node *n, int *bytesmode)
int rawmode = 0;
int need_encoding;
if (Py_ISALPHA(quote)) {
- if (quote == 'b' || quote == 'B') {
- quote = *++s;
- *bytesmode = 1;
- }
- if (quote == 'r' || quote == 'R') {
- quote = *++s;
- rawmode = 1;
+ while (!*bytesmode || !rawmode) {
+ if (quote == 'b' || quote == 'B') {
+ quote = *++s;
+ *bytesmode = 1;
+ }
+ else if (quote == 'u' || quote == 'U') {
+ quote = *++s;
+ }
+ else if (quote == 'r' || quote == 'R') {
+ quote = *++s;
+ rawmode = 1;
+ }
+ else {
+ break;
+ }
}
}
if (quote != '\'' && quote != '\"') {
@@ -3347,10 +3797,10 @@ parsestr(struct compiling *c, const node *n, int *bytesmode)
}
if (*bytesmode) {
/* Disallow non-ascii characters (but not escapes) */
- const char *c;
- for (c = s; *c; c++) {
- if (Py_CHARMASK(*c) >= 0x80) {
- ast_error(n, "bytes can only contain ASCII "
+ const char *ch;
+ for (ch = s; *ch; ch++) {
+ if (Py_CHARMASK(*ch) >= 0x80) {
+ ast_error(c, n, "bytes can only contain ASCII "
"literal characters.");
return NULL;
}
@@ -3398,7 +3848,7 @@ parsestrplus(struct compiling *c, const node *n, int *bytesmode)
if (s == NULL)
goto onError;
if (*bytesmode != subbm) {
- ast_error(n, "cannot mix bytes and nonbytes literals");
+ ast_error(c, n, "cannot mix bytes and nonbytes literals");
Py_DECREF(s);
goto onError;
}