From 84d910b372375ad37c9dcd6fb34f3102d4aad4bb Mon Sep 17 00:00:00 2001 From: Kristj?n Valur J?nsson Date: Tue, 19 Mar 2013 18:02:10 -0700 Subject: Issue #16475: Support object instancing, recursion and interned strings in marshal --- Python/marshal.c | 272 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 245 insertions(+), 27 deletions(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index 959f3f7f2c..c3e75b145b 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -1,8 +1,10 @@ /* Write Python objects to files and read them back. - This is intended for writing and reading compiled Python code only; - a true persistent storage facility would be much harder, since - it would have to take circular links and sharing into account. */ + This is primarily intended for writing and reading compiled Python code, + even though dicts, lists, sets and frozensets, not commonly seen in + code objects, are supported. + Version 3 of this protocol properly supports circular links + and sharing. */ #define PY_SSIZE_T_CLEAN @@ -41,6 +43,8 @@ #define TYPE_BINARY_COMPLEX 'y' #define TYPE_LONG 'l' #define TYPE_STRING 's' +#define TYPE_INTERNED 't' +#define TYPE_REF 'r' #define TYPE_TUPLE '(' #define TYPE_LIST '[' #define TYPE_DICT '{' @@ -49,6 +53,7 @@ #define TYPE_UNKNOWN '?' #define TYPE_SET '<' #define TYPE_FROZENSET '>' +#define FLAG_REF '\x80' /* with a type, add obj to index */ #define WFERR_OK 0 #define WFERR_UNMARSHALLABLE 1 @@ -65,6 +70,7 @@ typedef struct { PyObject *current_filename; char *ptr; char *end; + PyObject *refs; /* dict on marshal, list on unmarshal */ int version; } WFILE; @@ -151,13 +157,17 @@ w_long(long x, WFILE *p) #endif #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT) +#define W_TYPE(t, p) do { \ + w_byte((t) | flag, (p)); \ +} while(0) + static void -w_PyLong(const PyLongObject *ob, WFILE *p) +w_PyLong(const PyLongObject *ob, char flag, WFILE *p) { Py_ssize_t i, j, n, l; digit d; - w_byte(TYPE_LONG, p); + W_TYPE(TYPE_LONG, p); if (Py_SIZE(ob) == 0) { w_long((long)0, p); return; @@ -194,10 +204,64 @@ w_PyLong(const PyLongObject *ob, WFILE *p) } while (d != 0); } +static int +w_ref(PyObject *v, char *flag, WFILE *p) +{ + PyObject *id; + PyObject *idx; + + if (p->version < 3 || p->refs == NULL) + return 0; /* not writing object references */ + + /* if it has only one reference, it definitely isn't shared */ + if (Py_REFCNT(v) == 1) + return 0; + + id = PyLong_FromVoidPtr((void*)v); + if (id == NULL) + goto err; + idx = PyDict_GetItem(p->refs, id); + if (idx != NULL) { + /* write the reference index to the stream */ + long w = PyLong_AsLong(idx); + Py_DECREF(id); + if (w == -1 && PyErr_Occurred()) { + goto err; + } + /* we don't store "long" indices in the dict */ + assert(0 <= w && w <= 0x7fffffff); + w_byte(TYPE_REF, p); + w_long(w, p); + return 1; + } else { + int ok; + Py_ssize_t s = PyDict_Size(p->refs); + /* we don't support long indices */ + if (s >= 0x7fffffff) { + PyErr_SetString(PyExc_ValueError, "too many objects"); + goto err; + } + idx = PyLong_FromSsize_t(s); + ok = idx && PyDict_SetItem(p->refs, id, idx) == 0; + Py_DECREF(id); + Py_XDECREF(idx); + if (!ok) + goto err; + *flag |= FLAG_REF; + return 0; + } +err: + p->error = WFERR_UNMARSHALLABLE; + return 1; +} + +static void +w_complex_object(PyObject *v, char flag, WFILE *p); + static void w_object(PyObject *v, WFILE *p) { - Py_ssize_t i, n; + char flag = '\0'; p->depth++; @@ -222,24 +286,35 @@ w_object(PyObject *v, WFILE *p) else if (v == Py_True) { w_byte(TYPE_TRUE, p); } - else if (PyLong_CheckExact(v)) { + else if (!w_ref(v, &flag, p)) + w_complex_object(v, flag, p); + + p->depth--; +} + +static void +w_complex_object(PyObject *v, char flag, WFILE *p) +{ + Py_ssize_t i, n; + + if (PyLong_CheckExact(v)) { long x = PyLong_AsLong(v); if ((x == -1) && PyErr_Occurred()) { PyLongObject *ob = (PyLongObject *)v; PyErr_Clear(); - w_PyLong(ob, p); + w_PyLong(ob, flag, p); } else { #if SIZEOF_LONG > 4 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31); if (y && y != -1) { /* Too large for TYPE_INT */ - w_PyLong((PyLongObject*)v, p); + w_PyLong((PyLongObject*)v, flag, p); } else #endif { - w_byte(TYPE_INT, p); + W_TYPE(TYPE_INT, p); w_long(x, p); } } @@ -252,7 +327,7 @@ w_object(PyObject *v, WFILE *p) p->error = WFERR_UNMARSHALLABLE; return; } - w_byte(TYPE_BINARY_FLOAT, p); + W_TYPE(TYPE_BINARY_FLOAT, p); w_string((char*)buf, 8, p); } else { @@ -263,7 +338,7 @@ w_object(PyObject *v, WFILE *p) return; } n = strlen(buf); - w_byte(TYPE_FLOAT, p); + W_TYPE(TYPE_FLOAT, p); w_byte((int)n, p); w_string(buf, n, p); PyMem_Free(buf); @@ -277,7 +352,7 @@ w_object(PyObject *v, WFILE *p) p->error = WFERR_UNMARSHALLABLE; return; } - w_byte(TYPE_BINARY_COMPLEX, p); + W_TYPE(TYPE_BINARY_COMPLEX, p); w_string((char*)buf, 8, p); if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v), buf, 1) < 0) { @@ -288,7 +363,7 @@ w_object(PyObject *v, WFILE *p) } else { char *buf; - w_byte(TYPE_COMPLEX, p); + W_TYPE(TYPE_COMPLEX, p); buf = PyOS_double_to_string(PyComplex_RealAsDouble(v), 'g', 17, 0, NULL); if (!buf) { @@ -312,7 +387,7 @@ w_object(PyObject *v, WFILE *p) } } else if (PyBytes_CheckExact(v)) { - w_byte(TYPE_STRING, p); + W_TYPE(TYPE_STRING, p); n = PyBytes_GET_SIZE(v); W_SIZE(n, p); w_string(PyBytes_AS_STRING(v), n, p); @@ -325,14 +400,17 @@ w_object(PyObject *v, WFILE *p) p->error = WFERR_UNMARSHALLABLE; return; } - w_byte(TYPE_UNICODE, p); + if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v)) + W_TYPE(TYPE_INTERNED, p); + else + W_TYPE(TYPE_UNICODE, p); n = PyBytes_GET_SIZE(utf8); W_SIZE(n, p); w_string(PyBytes_AS_STRING(utf8), n, p); Py_DECREF(utf8); } else if (PyTuple_CheckExact(v)) { - w_byte(TYPE_TUPLE, p); + W_TYPE(TYPE_TUPLE, p); n = PyTuple_Size(v); W_SIZE(n, p); for (i = 0; i < n; i++) { @@ -340,7 +418,7 @@ w_object(PyObject *v, WFILE *p) } } else if (PyList_CheckExact(v)) { - w_byte(TYPE_LIST, p); + W_TYPE(TYPE_LIST, p); n = PyList_GET_SIZE(v); W_SIZE(n, p); for (i = 0; i < n; i++) { @@ -350,7 +428,7 @@ w_object(PyObject *v, WFILE *p) else if (PyDict_CheckExact(v)) { Py_ssize_t pos; PyObject *key, *value; - w_byte(TYPE_DICT, p); + W_TYPE(TYPE_DICT, p); /* This one is NULL object terminated! */ pos = 0; while (PyDict_Next(v, &pos, &key, &value)) { @@ -363,9 +441,9 @@ w_object(PyObject *v, WFILE *p) PyObject *value, *it; if (PyObject_TypeCheck(v, &PySet_Type)) - w_byte(TYPE_SET, p); + W_TYPE(TYPE_SET, p); else - w_byte(TYPE_FROZENSET, p); + W_TYPE(TYPE_FROZENSET, p); n = PyObject_Size(v); if (n == -1) { p->depth--; @@ -392,7 +470,7 @@ w_object(PyObject *v, WFILE *p) } else if (PyCode_Check(v)) { PyCodeObject *co = (PyCodeObject *)v; - w_byte(TYPE_CODE, p); + W_TYPE(TYPE_CODE, p); w_long(co->co_argcount, p); w_long(co->co_kwonlyargcount, p); w_long(co->co_nlocals, p); @@ -419,7 +497,7 @@ w_object(PyObject *v, WFILE *p) p->error = WFERR_UNMARSHALLABLE; return; } - w_byte(TYPE_STRING, p); + W_TYPE(TYPE_STRING, p); n = view.len; s = view.buf; W_SIZE(n, p); @@ -427,10 +505,9 @@ w_object(PyObject *v, WFILE *p) PyBuffer_Release(&view); } else { - w_byte(TYPE_UNKNOWN, p); + W_TYPE(TYPE_UNKNOWN, p); p->error = WFERR_UNMARSHALLABLE; } - p->depth--; } /* version currently has no effect for writing longs. */ @@ -441,6 +518,7 @@ PyMarshal_WriteLongToFile(long x, FILE *fp, int version) wf.fp = fp; wf.error = WFERR_OK; wf.depth = 0; + wf.refs = NULL; wf.version = version; w_long(x, &wf); } @@ -452,8 +530,14 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version) wf.fp = fp; wf.error = WFERR_OK; wf.depth = 0; + if (version >= 3) { + if ((wf.refs = PyDict_New()) == NULL) + return; /* caller mush check PyErr_Occurred() */ + } else + wf.refs = NULL; wf.version = version; w_object(x, &wf); + Py_XDECREF(wf.refs); } typedef WFILE RFILE; /* Same struct with different invariants */ @@ -489,7 +573,7 @@ r_string(char *s, Py_ssize_t n, RFILE *p) data->ob_type->tp_name); } else { - read = PyBytes_GET_SIZE(data); + read = (int)PyBytes_GET_SIZE(data); if (read > 0) { ptr = PyBytes_AS_STRING(data); memcpy(s, ptr, read); @@ -659,6 +743,59 @@ r_PyLong(RFILE *p) return NULL; } +/* allocate the reflist index */ +static PyObject * +r_ref_reserve(PyObject *o, Py_ssize_t *idx, int flag, RFILE *p) +{ + if (flag) { /* currently only FLAG_REF is defined */ + *idx = PyList_Size(p->refs); + if (*idx < 0) + goto err; + if (*idx >= 0x7ffffffe) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)"); + goto err; + } + if (PyList_Append(p->refs, Py_None) < 0) + goto err; + } else + *idx = 0; + return o; +err: + Py_XDECREF(o); /* release the new object */ + *idx = -1; + return NULL; +} + +/* insert actual object to the reflist */ +static PyObject * +r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p) +{ + if (o && (flag & FLAG_REF)) { + if (PyList_SetItem(p->refs, idx, o) < 0) { + Py_DECREF(o); /* release the new object */ + return NULL; + } else { + Py_INCREF(o); /* a reference for the list */ + } + } + return o; +} + +/* combination of both above, used when an object can be + * created whenever it is seen in the file, as opposed to + * after having loaded its sub-objects. + */ +static PyObject * +r_ref(PyObject *o, int flag, RFILE *p) +{ + if (o && (flag & FLAG_REF)) { + if (PyList_Append(p->refs, o) < 0) { + Py_DECREF(o); /* release the new object */ + return NULL; + } + } + return o; +} static PyObject * r_object(RFILE *p) @@ -666,8 +803,10 @@ r_object(RFILE *p) /* NULL is a valid return value, it does not necessarily means that an exception is set. */ PyObject *v, *v2; + Py_ssize_t idx; long i, n; int type = r_byte(p); + int flag; PyObject *retval; p->depth++; @@ -678,6 +817,13 @@ r_object(RFILE *p) return NULL; } + flag = type & FLAG_REF; + type = type & ~FLAG_REF; + +#define R_REF(O) do{\ + O = r_ref(O, flag, p);\ +} while (0) + switch (type) { case EOF: @@ -718,14 +864,17 @@ r_object(RFILE *p) case TYPE_INT: n = r_long(p); retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n); + R_REF(retval); break; case TYPE_INT64: retval = r_long64(p); + R_REF(retval); break; case TYPE_LONG: retval = r_PyLong(p); + R_REF(retval); break; case TYPE_FLOAT: @@ -744,6 +893,7 @@ r_object(RFILE *p) if (dx == -1.0 && PyErr_Occurred()) break; retval = PyFloat_FromDouble(dx); + R_REF(retval); break; } @@ -763,6 +913,7 @@ r_object(RFILE *p) break; } retval = PyFloat_FromDouble(x); + R_REF(retval); break; } @@ -792,6 +943,7 @@ r_object(RFILE *p) if (c.imag == -1.0 && PyErr_Occurred()) break; retval = PyComplex_FromCComplex(c); + R_REF(retval); break; } @@ -822,6 +974,7 @@ r_object(RFILE *p) break; } retval = PyComplex_FromCComplex(c); + R_REF(retval); break; } @@ -849,9 +1002,11 @@ r_object(RFILE *p) break; } retval = v; + R_REF(retval); break; case TYPE_UNICODE: + case TYPE_INTERNED: { char *buffer; @@ -879,7 +1034,10 @@ r_object(RFILE *p) } v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass"); PyMem_DEL(buffer); + if (type == TYPE_INTERNED) + PyUnicode_InternInPlace(&v); retval = v; + R_REF(retval); break; } @@ -895,6 +1053,7 @@ r_object(RFILE *p) break; } v = PyTuple_New(n); + R_REF(v); if (v == NULL) { retval = NULL; break; @@ -926,6 +1085,7 @@ r_object(RFILE *p) break; } v = PyList_New(n); + R_REF(v); if (v == NULL) { retval = NULL; break; @@ -947,6 +1107,7 @@ r_object(RFILE *p) case TYPE_DICT: v = PyDict_New(); + R_REF(v); if (v == NULL) { retval = NULL; break; @@ -982,6 +1143,13 @@ r_object(RFILE *p) break; } v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL); + /* must use delayed registration of frozensets because they must + * be init with a refcount of 1 + */ + if (type == TYPE_SET) + R_REF(v); + else + v = r_ref_reserve(v, &idx, flag, p); if (v == NULL) { retval = NULL; break; @@ -1004,6 +1172,8 @@ r_object(RFILE *p) } Py_DECREF(v2); } + if (type != TYPE_SET) + v = r_ref_insert(v, idx, flag, p); retval = v; break; @@ -1024,6 +1194,12 @@ r_object(RFILE *p) PyObject *name = NULL; int firstlineno; PyObject *lnotab = NULL; + + r_ref_reserve(NULL, &idx, flag, p); + if (idx < 0) { + retval = NULL; + break; + } v = NULL; @@ -1090,6 +1266,7 @@ r_object(RFILE *p) code, consts, names, varnames, freevars, cellvars, filename, name, firstlineno, lnotab); + v = r_ref_insert(v, idx, flag, p); code_error: Py_XDECREF(code); @@ -1105,6 +1282,23 @@ r_object(RFILE *p) retval = v; break; + case TYPE_REF: + n = r_long(p); + if (n < 0 || n >= PyList_GET_SIZE(p->refs)) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); + retval = NULL; + break; + } + v = PyList_GET_ITEM(p->refs, n); + if (v == Py_None) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); + retval = NULL; + break; + } + Py_INCREF(v); + retval = v; + break; + default: /* Bogus data got written, which isn't ideal. This will let you keep working and recover. */ @@ -1210,7 +1404,11 @@ PyMarshal_ReadObjectFromFile(FILE *fp) rf.current_filename = NULL; rf.depth = 0; rf.ptr = rf.end = NULL; + rf.refs = PyList_New(0); + if (rf.refs == NULL) + return NULL; result = r_object(&rf); + Py_DECREF(rf.refs); return result; } @@ -1225,7 +1423,11 @@ PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len) rf.ptr = str; rf.end = str + len; rf.depth = 0; + rf.refs = PyList_New(0); + if (rf.refs == NULL) + return NULL; result = r_object(&rf); + Py_DECREF(rf.refs); return result; } @@ -1244,7 +1446,13 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) wf.error = WFERR_OK; wf.depth = 0; wf.version = version; + if (version >= 3) { + if ((wf.refs = PyDict_New()) == NULL) + return NULL; + } else + wf.refs = NULL; w_object(x, &wf); + Py_XDECREF(wf.refs); if (wf.str != NULL) { char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str); if (wf.ptr - base > PY_SSIZE_T_MAX) { @@ -1316,6 +1524,8 @@ marshal_load(PyObject *self, PyObject *f) * Make a call to the read method, but read zero bytes. * This is to ensure that the object passed in at least * has a read method which returns bytes. + * This can be removed if we guarantee good error handling + * for r_string() */ data = _PyObject_CallMethodId(f, &PyId_read, "i", 0); if (data == NULL) @@ -1331,7 +1541,11 @@ marshal_load(PyObject *self, PyObject *f) rf.fp = NULL; rf.readable = f; rf.current_filename = NULL; - result = read_object(&rf); + if ((rf.refs = PyList_New(0)) != NULL) { + result = read_object(&rf); + Py_DECREF(rf.refs); + } else + result = NULL; } Py_DECREF(data); return result; @@ -1388,8 +1602,11 @@ marshal_loads(PyObject *self, PyObject *args) rf.ptr = s; rf.end = s + n; rf.depth = 0; + if ((rf.refs = PyList_New(0)) == NULL) + return NULL; result = read_object(&rf); PyBuffer_Release(&p); + Py_DECREF(rf.refs); return result; } @@ -1429,6 +1646,7 @@ Variables:\n\ version -- indicates the format that the module uses. Version 0 is the\n\ historical format, version 1 shares interned strings and version 2\n\ uses a binary format for floating point numbers.\n\ + Version 3 shares common object references (New in version 3.4).\n\ \n\ Functions:\n\ \n\ -- cgit v1.2.1 From 5fa12916b42529655234bba935e56d440d24ce8c Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Tue, 19 Mar 2013 23:20:59 -0500 Subject: fix compiler warning --- Python/marshal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index c3e75b145b..d237d17fad 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -803,7 +803,7 @@ r_object(RFILE *p) /* NULL is a valid return value, it does not necessarily means that an exception is set. */ PyObject *v, *v2; - Py_ssize_t idx; + Py_ssize_t idx = 0; long i, n; int type = r_byte(p); int flag; -- cgit v1.2.1 From ae5dc3cfdead5e4d54fe658e7034f2fcaa04361c Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Wed, 20 Mar 2013 00:39:41 -0500 Subject: Backed out changeset 521232b05b97 --- Python/marshal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index d237d17fad..c3e75b145b 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -803,7 +803,7 @@ r_object(RFILE *p) /* NULL is a valid return value, it does not necessarily means that an exception is set. */ PyObject *v, *v2; - Py_ssize_t idx = 0; + Py_ssize_t idx; long i, n; int type = r_byte(p); int flag; -- cgit v1.2.1 From df7b59cced9f631c3b75374fabb24b9efd7ab808 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Wed, 20 Mar 2013 00:40:07 -0500 Subject: fix compiler warning --- Python/marshal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index c3e75b145b..d237d17fad 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -803,7 +803,7 @@ r_object(RFILE *p) /* NULL is a valid return value, it does not necessarily means that an exception is set. */ PyObject *v, *v2; - Py_ssize_t idx; + Py_ssize_t idx = 0; long i, n; int type = r_byte(p); int flag; -- cgit v1.2.1 From 7f89106a42723b0d13e8300e85b12f934c3af4f6 Mon Sep 17 00:00:00 2001 From: Kristj?n Valur J?nsson Date: Wed, 20 Mar 2013 11:43:57 -0700 Subject: Issue #16475: Simplify the interface to r_ref_allocate and improve comments. --- Python/marshal.c | 59 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 26 deletions(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index d237d17fad..4e552967f7 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -743,34 +743,37 @@ r_PyLong(RFILE *p) return NULL; } -/* allocate the reflist index */ -static PyObject * -r_ref_reserve(PyObject *o, Py_ssize_t *idx, int flag, RFILE *p) +/* allocate the reflist index for a new object. Return -1 on failure */ +static Py_ssize_t +r_ref_reserve(int flag, RFILE *p) { if (flag) { /* currently only FLAG_REF is defined */ - *idx = PyList_Size(p->refs); - if (*idx < 0) - goto err; - if (*idx >= 0x7ffffffe) { + Py_ssize_t idx = PyList_Size(p->refs); + if (idx < 0) + return -1; + if (idx >= 0x7ffffffe) { PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)"); - goto err; + return -1; } if (PyList_Append(p->refs, Py_None) < 0) - goto err; + return -1; + return idx; } else - *idx = 0; - return o; -err: - Py_XDECREF(o); /* release the new object */ - *idx = -1; - return NULL; + return 0; } -/* insert actual object to the reflist */ +/* insert the new object 'o' to the reflist at previously + * allocated index 'idx'. + * 'o' can be NULL, in which case nothing is done. + * if 'o' was non-NULL, and the function succeeds, 'o' is returned. + * if 'o' was non-NULL, and the function fails, 'o' is released and + * NULL returned. This simplifies error checking at the call site since + * a single test for NULL for the function result is enough. + */ static PyObject * r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p) { - if (o && (flag & FLAG_REF)) { + if (o != NULL && flag) { /* currently only FLAG_REF is defined */ if (PyList_SetItem(p->refs, idx, o) < 0) { Py_DECREF(o); /* release the new object */ return NULL; @@ -788,7 +791,7 @@ r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p) static PyObject * r_ref(PyObject *o, int flag, RFILE *p) { - if (o && (flag & FLAG_REF)) { + if (o != NULL && flag) { /* currently only FLAG_REF is defined */ if (PyList_Append(p->refs, o) < 0) { Py_DECREF(o); /* release the new object */ return NULL; @@ -821,7 +824,8 @@ r_object(RFILE *p) type = type & ~FLAG_REF; #define R_REF(O) do{\ - O = r_ref(O, flag, p);\ + if (flag) \ + O = r_ref(O, flag, p);\ } while (0) switch (type) { @@ -1143,13 +1147,16 @@ r_object(RFILE *p) break; } v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL); - /* must use delayed registration of frozensets because they must - * be init with a refcount of 1 - */ - if (type == TYPE_SET) + if (type == TYPE_SET) { R_REF(v); - else - v = r_ref_reserve(v, &idx, flag, p); + } else { + /* must use delayed registration of frozensets because they must + * be init with a refcount of 1 + */ + idx = r_ref_reserve(flag, p); + if (idx < 0) + Py_CLEAR(v); /* signal error */ + } if (v == NULL) { retval = NULL; break; @@ -1195,7 +1202,7 @@ r_object(RFILE *p) int firstlineno; PyObject *lnotab = NULL; - r_ref_reserve(NULL, &idx, flag, p); + idx = r_ref_reserve(flag, p); if (idx < 0) { retval = NULL; break; -- cgit v1.2.1 From ec83e7f8ea12b012f3d594e6b1960ababa467bae Mon Sep 17 00:00:00 2001 From: Kristj?n Valur J?nsson Date: Wed, 20 Mar 2013 14:26:33 -0700 Subject: Issue #16475 : Correctly handle the EOF when reading marshal streams. --- Python/marshal.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index 4e552967f7..bd889393d1 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -808,10 +808,16 @@ r_object(RFILE *p) PyObject *v, *v2; Py_ssize_t idx = 0; long i, n; - int type = r_byte(p); + int type, code = r_byte(p); int flag; PyObject *retval; + if (code == EOF) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + return NULL; + } + p->depth++; if (p->depth > MAX_MARSHAL_STACK_DEPTH) { @@ -820,8 +826,8 @@ r_object(RFILE *p) return NULL; } - flag = type & FLAG_REF; - type = type & ~FLAG_REF; + flag = code & FLAG_REF; + type = code & ~FLAG_REF; #define R_REF(O) do{\ if (flag) \ @@ -830,12 +836,6 @@ r_object(RFILE *p) switch (type) { - case EOF: - PyErr_SetString(PyExc_EOFError, - "EOF read where object expected"); - retval = NULL; - break; - case TYPE_NULL: retval = NULL; break; -- cgit v1.2.1 From e5879d2c7947222d4185fdbd495f1b7237aa6d95 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 13 Apr 2013 22:41:09 +0200 Subject: Issue #15480: Remove the deprecated and unused TYPE_INT64 code from marshal. Initial patch by Daniel Riti. --- Python/marshal.c | 47 +---------------------------------------------- 1 file changed, 1 insertion(+), 46 deletions(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index bd889393d1..3e13851704 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -33,10 +33,6 @@ #define TYPE_STOPITER 'S' #define TYPE_ELLIPSIS '.' #define TYPE_INT 'i' -/* TYPE_INT64 is deprecated. It is not - generated anymore, and support for reading it - will be removed in Python 3.4. */ -#define TYPE_INT64 'I' #define TYPE_FLOAT 'f' #define TYPE_BINARY_FLOAT 'g' #define TYPE_COMPLEX 'x' @@ -638,42 +634,6 @@ r_long(RFILE *p) return x; } -/* r_long64 deals with the TYPE_INT64 code. On a machine with - sizeof(long) > 4, it returns a Python int object, else a Python long - object. Note that w_long64 writes out TYPE_INT if 32 bits is enough, - so there's no inefficiency here in returning a PyLong on 32-bit boxes - for everything written via TYPE_INT64 (i.e., if an int is written via - TYPE_INT64, it *needs* more than 32 bits). -*/ -static PyObject * -r_long64(RFILE *p) -{ - PyObject *result = NULL; - long lo4 = r_long(p); - long hi4 = r_long(p); - - if (!PyErr_Occurred()) { -#if SIZEOF_LONG > 4 - long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL); - result = PyLong_FromLong(x); -#else - unsigned char buf[8]; - int one = 1; - int is_little_endian = (int)*(char*)&one; - if (is_little_endian) { - memcpy(buf, &lo4, 4); - memcpy(buf+4, &hi4, 4); - } - else { - memcpy(buf, &hi4, 4); - memcpy(buf+4, &lo4, 4); - } - result = _PyLong_FromByteArray(buf, 8, is_little_endian, 1); -#endif - } - return result; -} - static PyObject * r_PyLong(RFILE *p) { @@ -871,11 +831,6 @@ r_object(RFILE *p) R_REF(retval); break; - case TYPE_INT64: - retval = r_long64(p); - R_REF(retval); - break; - case TYPE_LONG: retval = r_PyLong(p); R_REF(retval); @@ -1201,7 +1156,7 @@ r_object(RFILE *p) PyObject *name = NULL; int firstlineno; PyObject *lnotab = NULL; - + idx = r_ref_reserve(flag, p); if (idx < 0) { retval = NULL; -- cgit v1.2.1 From 697a47c050b4882e6d03961ef97717f54539a56f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 21 Jun 2013 19:08:06 +0200 Subject: marshal: optimize parsing of empty Unicode strings Don't create a temporary buffer of zeroy byte nor call r_string() if the length is zero, create directly the empty string. --- Python/marshal.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index 3e13851704..e519fc9bfa 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -979,20 +979,25 @@ r_object(RFILE *p) retval = NULL; break; } - buffer = PyMem_NEW(char, n); - if (buffer == NULL) { - retval = PyErr_NoMemory(); - break; - } - if (r_string(buffer, n, p) != n) { + if (n != 0) { + buffer = PyMem_NEW(char, n); + if (buffer == NULL) { + retval = PyErr_NoMemory(); + break; + } + if (r_string(buffer, n, p) != n) { + PyMem_DEL(buffer); + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + retval = NULL; + break; + } + v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass"); PyMem_DEL(buffer); - PyErr_SetString(PyExc_EOFError, - "EOF read where object expected"); - retval = NULL; - break; } - v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass"); - PyMem_DEL(buffer); + else { + v = PyUnicode_New(0, 0); + } if (type == TYPE_INTERNED) PyUnicode_InternInPlace(&v); retval = v; -- cgit v1.2.1 From 7c9610ecfb17fd23e8f819866e978266fd7612ad Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 8 Jul 2013 22:23:32 +0200 Subject: Issue #18408: Fix marshal reader for Unicode strings: handle PyUnicode_DecodeUTF8() failure (ex: MemoryError). --- Python/marshal.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index e519fc9bfa..e97de59791 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -998,6 +998,10 @@ r_object(RFILE *p) else { v = PyUnicode_New(0, 0); } + if (v == NULL) { + retval = NULL; + break; + } if (type == TYPE_INTERNED) PyUnicode_InternInPlace(&v); retval = v; -- cgit v1.2.1 From a12be6ae60b6a559296d637e558408d4bc2092a5 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 13 Aug 2013 20:18:52 +0200 Subject: Issue #18722: Remove uses of the "register" keyword in C code. --- Python/marshal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index 1997e1964f..ec2d51f5f6 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -613,7 +613,7 @@ r_byte(RFILE *p) static int r_short(RFILE *p) { - register short x; + short x; unsigned char buffer[2]; r_string((char *) buffer, 2, p); @@ -627,7 +627,7 @@ r_short(RFILE *p) static long r_long(RFILE *p) { - register long x; + long x; unsigned char buffer[4]; r_string((char *) buffer, 4, p); -- cgit v1.2.1 From 455b2baf5e22aa32947e4e3096284046a6644f76 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 12 Oct 2013 22:25:39 +0200 Subject: Issue #19219: Speed up marshal.loads(), and make pyc files slightly (5% to 10%) smaller. --- Python/marshal.c | 392 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 266 insertions(+), 126 deletions(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index 727605a4d1..12565f36ba 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -51,6 +51,12 @@ #define TYPE_FROZENSET '>' #define FLAG_REF '\x80' /* with a type, add obj to index */ +#define TYPE_ASCII 'a' +#define TYPE_ASCII_INTERNED 'A' +#define TYPE_SMALL_TUPLE ')' +#define TYPE_SHORT_ASCII 'z' +#define TYPE_SHORT_ASCII_INTERNED 'Z' + #define WFERR_OK 0 #define WFERR_UNMARSHALLABLE 1 #define WFERR_NESTEDTOODEEP 2 @@ -66,6 +72,8 @@ typedef struct { PyObject *current_filename; char *ptr; char *end; + char *buf; + Py_ssize_t buf_size; PyObject *refs; /* dict on marshal, list on unmarshal */ int version; } WFILE; @@ -148,6 +156,13 @@ w_pstring(const char *s, Py_ssize_t n, WFILE *p) w_string(s, n, p); } +static void +w_short_pstring(const char *s, Py_ssize_t n, WFILE *p) +{ + w_byte(n, p); + w_string(s, n, p); +} + /* We assume that Python ints are stored internally in base some power of 2**15; for the sake of portability we'll always read and write them in base exactly 2**15. */ @@ -394,24 +409,51 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p); } else if (PyUnicode_CheckExact(v)) { - PyObject *utf8; - utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass"); - if (utf8 == NULL) { - p->depth--; - p->error = WFERR_UNMARSHALLABLE; - return; + if (p->version >= 4 && PyUnicode_IS_ASCII(v)) { + int is_short = PyUnicode_GET_LENGTH(v) < 256; + if (is_short) { + if (PyUnicode_CHECK_INTERNED(v)) + W_TYPE(TYPE_SHORT_ASCII_INTERNED, p); + else + W_TYPE(TYPE_SHORT_ASCII, p); + w_short_pstring((char *) PyUnicode_1BYTE_DATA(v), + PyUnicode_GET_LENGTH(v), p); + } + else { + if (PyUnicode_CHECK_INTERNED(v)) + W_TYPE(TYPE_ASCII_INTERNED, p); + else + W_TYPE(TYPE_ASCII, p); + w_pstring((char *) PyUnicode_1BYTE_DATA(v), + PyUnicode_GET_LENGTH(v), p); + } + } + else { + PyObject *utf8; + utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass"); + if (utf8 == NULL) { + p->depth--; + p->error = WFERR_UNMARSHALLABLE; + return; + } + if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v)) + W_TYPE(TYPE_INTERNED, p); + else + W_TYPE(TYPE_UNICODE, p); + w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p); + Py_DECREF(utf8); } - if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v)) - W_TYPE(TYPE_INTERNED, p); - else - W_TYPE(TYPE_UNICODE, p); - w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p); - Py_DECREF(utf8); } else if (PyTuple_CheckExact(v)) { - W_TYPE(TYPE_TUPLE, p); n = PyTuple_Size(v); - W_SIZE(n, p); + if (p->version >= 4 && n < 256) { + W_TYPE(TYPE_SMALL_TUPLE, p); + w_byte(n, p); + } + else { + W_TYPE(TYPE_TUPLE, p); + W_SIZE(n, p); + } for (i = 0; i < n; i++) { w_object(PyTuple_GET_ITEM(v, i), p); } @@ -537,59 +579,75 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version) typedef WFILE RFILE; /* Same struct with different invariants */ -#define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF) - -static Py_ssize_t -r_string(char *s, Py_ssize_t n, RFILE *p) +static char * +r_string(Py_ssize_t n, RFILE *p) { - char *ptr; - Py_ssize_t read, left; - - if (!p->readable) { - if (p->fp != NULL) - /* The result fits into int because it must be <=n. */ - read = fread(s, 1, n, p->fp); - else { - left = p->end - p->ptr; - read = (left < n) ? left : n; - memcpy(s, p->ptr, read); - p->ptr += read; + Py_ssize_t read = -1; + + if (p->ptr != NULL) { + /* Fast path for loads() */ + char *res = p->ptr; + Py_ssize_t left = p->end - p->ptr; + if (left < n) { + PyErr_SetString(PyExc_EOFError, + "marshal data too short"); + return NULL; } + p->ptr += n; + return res; + } + if (p->buf == NULL) { + p->buf = PyMem_MALLOC(n); + if (p->buf == NULL) { + PyErr_NoMemory(); + return NULL; + } + p->buf_size = n; + } + else if (p->buf_size < n) { + p->buf = PyMem_REALLOC(p->buf, n); + if (p->buf == NULL) { + PyErr_NoMemory(); + return NULL; + } + p->buf_size = n; + } + if (!p->readable) { + assert(p->fp != NULL); + /* The result fits into int because it must be <=n. */ + read = fread(p->buf, 1, n, p->fp); } else { - _Py_IDENTIFIER(read); - - PyObject *data = _PyObject_CallMethodId(p->readable, &PyId_read, "n", n); - read = 0; - if (data != NULL) { - if (!PyBytes_Check(data)) { - PyErr_Format(PyExc_TypeError, - "f.read() returned not bytes but %.100s", - data->ob_type->tp_name); - } - else { - read = (int)PyBytes_GET_SIZE(data); - if (read > 0) { - if (read > n) { - PyErr_Format(PyExc_ValueError, - "read() returned too much data: " - "%zd bytes requested, %zd returned", - n, read); - read = -1; - } - else { - ptr = PyBytes_AS_STRING(data); - memcpy(s, ptr, read); - } - } - } - Py_DECREF(data); + _Py_IDENTIFIER(readinto); + PyObject *res, *mview; + Py_buffer buf; + + if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1) + return NULL; + mview = PyMemoryView_FromBuffer(&buf); + if (mview == NULL) + return NULL; + + res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview); + if (res != NULL) { + read = PyNumber_AsSsize_t(res, PyExc_ValueError); + Py_DECREF(res); } } - if (!PyErr_Occurred() && (read < n)) { - PyErr_SetString(PyExc_EOFError, "EOF read where not expected"); + if (read != n) { + if (!PyErr_Occurred()) { + if (read > n) + PyErr_Format(PyExc_ValueError, + "read() returned too much data: " + "%zd bytes requested, %zd returned", + n, read); + else + PyErr_SetString(PyExc_EOFError, + "EOF read where not expected"); + } + return NULL; } - return read; + return p->buf; } @@ -597,15 +655,20 @@ static int r_byte(RFILE *p) { int c = EOF; - unsigned char ch; - Py_ssize_t n; - if (!p->readable) - c = p->fp ? getc(p->fp) : rs_byte(p); + if (p->ptr != NULL) { + if (p->ptr < p->end) + c = (unsigned char) *p->ptr++; + return c; + } + if (!p->readable) { + assert(p->fp); + c = getc(p->fp); + } else { - n = r_string((char *) &ch, 1, p); - if (n > 0) - c = ch; + char *ptr = r_string(1, p); + if (ptr != NULL) + c = *(unsigned char *) ptr; } return c; } @@ -613,32 +676,36 @@ r_byte(RFILE *p) static int r_short(RFILE *p) { - short x; - unsigned char buffer[2]; - - r_string((char *) buffer, 2, p); - x = buffer[0]; - x |= buffer[1] << 8; - /* Sign-extension, in case short greater than 16 bits */ - x |= -(x & 0x8000); + short x = -1; + unsigned char *buffer; + + buffer = (unsigned char *) r_string(2, p); + if (buffer != NULL) { + x = buffer[0]; + x |= buffer[1] << 8; + /* Sign-extension, in case short greater than 16 bits */ + x |= -(x & 0x8000); + } return x; } static long r_long(RFILE *p) { - long x; - unsigned char buffer[4]; - - r_string((char *) buffer, 4, p); - x = buffer[0]; - x |= (long)buffer[1] << 8; - x |= (long)buffer[2] << 16; - x |= (long)buffer[3] << 24; + long x = -1; + unsigned char *buffer; + + buffer = (unsigned char *) r_string(4, p); + if (buffer != NULL) { + x = buffer[0]; + x |= (long)buffer[1] << 8; + x |= (long)buffer[2] << 16; + x |= (long)buffer[3] << 24; #if SIZEOF_LONG > 4 - /* Sign extension for 64-bit machines */ - x |= -(x & 0x80000000L); + /* Sign extension for 64-bit machines */ + x |= -(x & 0x80000000L); #endif + } return x; } @@ -716,9 +783,7 @@ static Py_ssize_t r_ref_reserve(int flag, RFILE *p) { if (flag) { /* currently only FLAG_REF is defined */ - Py_ssize_t idx = PyList_Size(p->refs); - if (idx < 0) - return -1; + Py_ssize_t idx = PyList_GET_SIZE(p->refs); if (idx >= 0x7ffffffe) { PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)"); return -1; @@ -742,12 +807,10 @@ static PyObject * r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p) { if (o != NULL && flag) { /* currently only FLAG_REF is defined */ - if (PyList_SetItem(p->refs, idx, o) < 0) { - Py_DECREF(o); /* release the new object */ - return NULL; - } else { - Py_INCREF(o); /* a reference for the list */ - } + PyObject *tmp = PyList_GET_ITEM(p->refs, idx); + Py_INCREF(o); + PyList_SET_ITEM(p->refs, idx, o); + Py_DECREF(tmp); } return o; } @@ -777,7 +840,7 @@ r_object(RFILE *p) Py_ssize_t idx = 0; long i, n; int type, code = r_byte(p); - int flag; + int flag, is_interned = 0; PyObject *retval; if (code == EOF) { @@ -846,7 +909,7 @@ r_object(RFILE *p) case TYPE_FLOAT: { - char buf[256]; + char buf[256], *ptr; double dx; retval = NULL; n = r_byte(p); @@ -855,8 +918,10 @@ r_object(RFILE *p) "EOF read where object expected"); break; } - if (r_string(buf, n, p) != n) + ptr = r_string(n, p); + if (ptr == NULL) break; + memcpy(buf, ptr, n); buf[n] = '\0'; dx = PyOS_string_to_double(buf, NULL, NULL); if (dx == -1.0 && PyErr_Occurred()) @@ -868,9 +933,10 @@ r_object(RFILE *p) case TYPE_BINARY_FLOAT: { - unsigned char buf[8]; + unsigned char *buf; double x; - if (r_string((char*)buf, 8, p) != 8) { + buf = (unsigned char *) r_string(8, p); + if (buf == NULL) { retval = NULL; break; } @@ -886,7 +952,7 @@ r_object(RFILE *p) case TYPE_COMPLEX: { - char buf[256]; + char buf[256], *ptr; Py_complex c; retval = NULL; n = r_byte(p); @@ -895,8 +961,10 @@ r_object(RFILE *p) "EOF read where object expected"); break; } - if (r_string(buf, n, p) != n) + ptr = r_string(n, p); + if (ptr == NULL) break; + memcpy(buf, ptr, n); buf[n] = '\0'; c.real = PyOS_string_to_double(buf, NULL, NULL); if (c.real == -1.0 && PyErr_Occurred()) @@ -907,8 +975,10 @@ r_object(RFILE *p) "EOF read where object expected"); break; } - if (r_string(buf, n, p) != n) + ptr = r_string(n, p); + if (ptr == NULL) break; + memcpy(buf, ptr, n); buf[n] = '\0'; c.imag = PyOS_string_to_double(buf, NULL, NULL); if (c.imag == -1.0 && PyErr_Occurred()) @@ -920,9 +990,10 @@ r_object(RFILE *p) case TYPE_BINARY_COMPLEX: { - unsigned char buf[8]; + unsigned char *buf; Py_complex c; - if (r_string((char*)buf, 8, p) != 8) { + buf = (unsigned char *) r_string(8, p); + if (buf == NULL) { retval = NULL; break; } @@ -931,7 +1002,8 @@ r_object(RFILE *p) retval = NULL; break; } - if (r_string((char*)buf, 8, p) != 8) { + buf = (unsigned char *) r_string(8, p); + if (buf == NULL) { retval = NULL; break; } @@ -946,32 +1018,82 @@ r_object(RFILE *p) } case TYPE_STRING: + { + char *ptr; + n = r_long(p); + if (PyErr_Occurred()) { + retval = NULL; + break; + } + if (n < 0 || n > SIZE32_MAX) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); + retval = NULL; + break; + } + v = PyBytes_FromStringAndSize((char *)NULL, n); + if (v == NULL) { + retval = NULL; + break; + } + ptr = r_string(n, p); + if (ptr == NULL) { + Py_DECREF(v); + retval = NULL; + break; + } + memcpy(PyBytes_AS_STRING(v), ptr, n); + retval = v; + R_REF(retval); + break; + } + + case TYPE_ASCII_INTERNED: + is_interned = 1; + case TYPE_ASCII: n = r_long(p); if (PyErr_Occurred()) { retval = NULL; break; } if (n < 0 || n > SIZE32_MAX) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); + PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); retval = NULL; break; } - v = PyBytes_FromStringAndSize((char *)NULL, n); - if (v == NULL) { - retval = NULL; + goto _read_ascii; + + case TYPE_SHORT_ASCII_INTERNED: + is_interned = 1; + case TYPE_SHORT_ASCII: + n = r_byte(p); + if (n == EOF) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); break; } - if (r_string(PyBytes_AS_STRING(v), n, p) != n) { - Py_DECREF(v); - retval = NULL; + _read_ascii: + { + char *ptr; + ptr = r_string(n, p); + if (ptr == NULL) { + retval = NULL; + break; + } + v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n); + if (v == NULL) { + retval = NULL; + break; + } + if (is_interned) + PyUnicode_InternInPlace(&v); + retval = v; + R_REF(retval); break; } - retval = v; - R_REF(retval); - break; - case TYPE_UNICODE: case TYPE_INTERNED: + is_interned = 1; + case TYPE_UNICODE: { char *buffer; @@ -986,18 +1108,12 @@ r_object(RFILE *p) break; } if (n != 0) { - buffer = PyMem_NEW(char, n); + buffer = r_string(n, p); if (buffer == NULL) { - retval = PyErr_NoMemory(); - break; - } - if (r_string(buffer, n, p) != n) { - PyMem_DEL(buffer); retval = NULL; break; } v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass"); - PyMem_DEL(buffer); } else { v = PyUnicode_New(0, 0); @@ -1006,13 +1122,16 @@ r_object(RFILE *p) retval = NULL; break; } - if (type == TYPE_INTERNED) + if (is_interned) PyUnicode_InternInPlace(&v); retval = v; R_REF(retval); break; } + case TYPE_SMALL_TUPLE: + n = (unsigned char) r_byte(p); + goto _read_tuple; case TYPE_TUPLE: n = r_long(p); if (PyErr_Occurred()) { @@ -1024,6 +1143,7 @@ r_object(RFILE *p) retval = NULL; break; } + _read_tuple: v = PyTuple_New(n); R_REF(v); if (v == NULL) { @@ -1304,23 +1424,33 @@ int PyMarshal_ReadShortFromFile(FILE *fp) { RFILE rf; + int res; assert(fp); rf.readable = NULL; rf.fp = fp; rf.current_filename = NULL; rf.end = rf.ptr = NULL; - return r_short(&rf); + rf.buf = NULL; + res = r_short(&rf); + if (rf.buf != NULL) + PyMem_FREE(rf.buf); + return res; } long PyMarshal_ReadLongFromFile(FILE *fp) { RFILE rf; + long res; rf.fp = fp; rf.readable = NULL; rf.current_filename = NULL; rf.ptr = rf.end = NULL; - return r_long(&rf); + rf.buf = NULL; + res = r_long(&rf); + if (rf.buf != NULL) + PyMem_FREE(rf.buf); + return res; } #ifdef HAVE_FSTAT @@ -1379,11 +1509,14 @@ PyMarshal_ReadObjectFromFile(FILE *fp) rf.current_filename = NULL; rf.depth = 0; rf.ptr = rf.end = NULL; + rf.buf = NULL; rf.refs = PyList_New(0); if (rf.refs == NULL) return NULL; result = r_object(&rf); Py_DECREF(rf.refs); + if (rf.buf != NULL) + PyMem_FREE(rf.buf); return result; } @@ -1397,12 +1530,15 @@ PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len) rf.current_filename = NULL; rf.ptr = str; rf.end = str + len; + rf.buf = NULL; rf.depth = 0; rf.refs = PyList_New(0); if (rf.refs == NULL) return NULL; result = r_object(&rf); Py_DECREF(rf.refs); + if (rf.buf != NULL) + PyMem_FREE(rf.buf); return result; } @@ -1516,9 +1652,13 @@ marshal_load(PyObject *self, PyObject *f) rf.fp = NULL; rf.readable = f; rf.current_filename = NULL; + rf.ptr = rf.end = NULL; + rf.buf = NULL; if ((rf.refs = PyList_New(0)) != NULL) { result = read_object(&rf); Py_DECREF(rf.refs); + if (rf.buf != NULL) + PyMem_FREE(rf.buf); } else result = NULL; } -- cgit v1.2.1 From cd0b05b63f5e4b670a5329e9e6e8805c82868898 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 12 Oct 2013 23:14:47 +0200 Subject: Try to fix weird assertion error on the Fedora buildbot. --- Python/marshal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index 12565f36ba..9ca23dbe15 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -83,7 +83,7 @@ typedef struct { else w_more(c, p) static void -w_more(int c, WFILE *p) +w_more(char c, WFILE *p) { Py_ssize_t size, newsize; if (p->str == NULL) @@ -100,7 +100,7 @@ w_more(int c, WFILE *p) p->ptr = PyBytes_AS_STRING((PyBytesObject *)p->str) + size; p->end = PyBytes_AS_STRING((PyBytesObject *)p->str) + newsize; - *p->ptr++ = Py_SAFE_DOWNCAST(c, int, char); + *p->ptr++ = c; } } @@ -159,7 +159,7 @@ w_pstring(const char *s, Py_ssize_t n, WFILE *p) static void w_short_pstring(const char *s, Py_ssize_t n, WFILE *p) { - w_byte(n, p); + w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p); w_string(s, n, p); } -- cgit v1.2.1 From 75db3e1fc8d59e8dcabdd6ba712c67bfc2cc0960 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sun, 13 Oct 2013 02:29:06 +0200 Subject: Issue #19219: retval may be used uninitialized value CID 486239: Uninitialized pointer read (UNINIT) --- Python/marshal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index 9ca23dbe15..2addb2dbfe 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -841,7 +841,7 @@ r_object(RFILE *p) long i, n; int type, code = r_byte(p); int flag, is_interned = 0; - PyObject *retval; + PyObject *retval = NULL; if (code == EOF) { PyErr_SetString(PyExc_EOFError, -- cgit v1.2.1 From 12a32fe030c2cd32c2ed7b44484b8bbd6d9be29a Mon Sep 17 00:00:00 2001 From: Kristj?n Valur J?nsson Date: Sun, 13 Oct 2013 13:41:59 +0000 Subject: Issue #19219 Remove a lot of "retval = NULL" statements, now that retval is pre-initialized to that value. Test show a 5% speedup as a bonus. --- Python/marshal.c | 101 ++++++++++++------------------------------------------- 1 file changed, 22 insertions(+), 79 deletions(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index 2addb2dbfe..987d063f27 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -868,7 +868,6 @@ r_object(RFILE *p) switch (type) { case TYPE_NULL: - retval = NULL; break; case TYPE_NONE: @@ -911,7 +910,6 @@ r_object(RFILE *p) { char buf[256], *ptr; double dx; - retval = NULL; n = r_byte(p); if (n == EOF) { PyErr_SetString(PyExc_EOFError, @@ -936,15 +934,11 @@ r_object(RFILE *p) unsigned char *buf; double x; buf = (unsigned char *) r_string(8, p); - if (buf == NULL) { - retval = NULL; + if (buf == NULL) break; - } x = _PyFloat_Unpack8(buf, 1); - if (x == -1.0 && PyErr_Occurred()) { - retval = NULL; + if (x == -1.0 && PyErr_Occurred()) break; - } retval = PyFloat_FromDouble(x); R_REF(retval); break; @@ -954,7 +948,6 @@ r_object(RFILE *p) { char buf[256], *ptr; Py_complex c; - retval = NULL; n = r_byte(p); if (n == EOF) { PyErr_SetString(PyExc_EOFError, @@ -993,25 +986,17 @@ r_object(RFILE *p) unsigned char *buf; Py_complex c; buf = (unsigned char *) r_string(8, p); - if (buf == NULL) { - retval = NULL; + if (buf == NULL) break; - } c.real = _PyFloat_Unpack8(buf, 1); - if (c.real == -1.0 && PyErr_Occurred()) { - retval = NULL; + if (c.real == -1.0 && PyErr_Occurred()) break; - } buf = (unsigned char *) r_string(8, p); - if (buf == NULL) { - retval = NULL; + if (buf == NULL) break; - } c.imag = _PyFloat_Unpack8(buf, 1); - if (c.imag == -1.0 && PyErr_Occurred()) { - retval = NULL; + if (c.imag == -1.0 && PyErr_Occurred()) break; - } retval = PyComplex_FromCComplex(c); R_REF(retval); break; @@ -1021,24 +1006,18 @@ r_object(RFILE *p) { char *ptr; n = r_long(p); - if (PyErr_Occurred()) { - retval = NULL; + if (PyErr_Occurred()) break; - } if (n < 0 || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); - retval = NULL; break; } v = PyBytes_FromStringAndSize((char *)NULL, n); - if (v == NULL) { - retval = NULL; + if (v == NULL) break; - } ptr = r_string(n, p); if (ptr == NULL) { Py_DECREF(v); - retval = NULL; break; } memcpy(PyBytes_AS_STRING(v), ptr, n); @@ -1051,13 +1030,10 @@ r_object(RFILE *p) is_interned = 1; case TYPE_ASCII: n = r_long(p); - if (PyErr_Occurred()) { - retval = NULL; + if (PyErr_Occurred()) break; - } if (n < 0 || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); - retval = NULL; break; } goto _read_ascii; @@ -1075,15 +1051,11 @@ r_object(RFILE *p) { char *ptr; ptr = r_string(n, p); - if (ptr == NULL) { - retval = NULL; + if (ptr == NULL) break; - } v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n); - if (v == NULL) { - retval = NULL; + if (v == NULL) break; - } if (is_interned) PyUnicode_InternInPlace(&v); retval = v; @@ -1098,30 +1070,23 @@ r_object(RFILE *p) char *buffer; n = r_long(p); - if (PyErr_Occurred()) { - retval = NULL; + if (PyErr_Occurred()) break; - } if (n < 0 || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); - retval = NULL; break; } if (n != 0) { buffer = r_string(n, p); - if (buffer == NULL) { - retval = NULL; + if (buffer == NULL) break; - } v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass"); } else { v = PyUnicode_New(0, 0); } - if (v == NULL) { - retval = NULL; + if (v == NULL) break; - } if (is_interned) PyUnicode_InternInPlace(&v); retval = v; @@ -1134,22 +1099,17 @@ r_object(RFILE *p) goto _read_tuple; case TYPE_TUPLE: n = r_long(p); - if (PyErr_Occurred()) { - retval = NULL; + if (PyErr_Occurred()) break; - } if (n < 0 || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); - retval = NULL; break; } _read_tuple: v = PyTuple_New(n); R_REF(v); - if (v == NULL) { - retval = NULL; + if (v == NULL) break; - } for (i = 0; i < n; i++) { v2 = r_object(p); if ( v2 == NULL ) { @@ -1167,21 +1127,16 @@ r_object(RFILE *p) case TYPE_LIST: n = r_long(p); - if (PyErr_Occurred()) { - retval = NULL; + if (PyErr_Occurred()) break; - } if (n < 0 || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); - retval = NULL; break; } v = PyList_New(n); R_REF(v); - if (v == NULL) { - retval = NULL; + if (v == NULL) break; - } for (i = 0; i < n; i++) { v2 = r_object(p); if ( v2 == NULL ) { @@ -1200,10 +1155,8 @@ r_object(RFILE *p) case TYPE_DICT: v = PyDict_New(); R_REF(v); - if (v == NULL) { - retval = NULL; + if (v == NULL) break; - } for (;;) { PyObject *key, *val; key = r_object(p); @@ -1225,13 +1178,10 @@ r_object(RFILE *p) case TYPE_SET: case TYPE_FROZENSET: n = r_long(p); - if (PyErr_Occurred()) { - retval = NULL; + if (PyErr_Occurred()) break; - } if (n < 0 || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); - retval = NULL; break; } v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL); @@ -1245,10 +1195,8 @@ r_object(RFILE *p) if (idx < 0) Py_CLEAR(v); /* signal error */ } - if (v == NULL) { - retval = NULL; + if (v == NULL) break; - } for (i = 0; i < n; i++) { v2 = r_object(p); if ( v2 == NULL ) { @@ -1291,10 +1239,8 @@ r_object(RFILE *p) PyObject *lnotab = NULL; idx = r_ref_reserve(flag, p); - if (idx < 0) { - retval = NULL; + if (idx < 0) break; - } v = NULL; @@ -1381,13 +1327,11 @@ r_object(RFILE *p) n = r_long(p); if (n < 0 || n >= PyList_GET_SIZE(p->refs)) { PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); - retval = NULL; break; } v = PyList_GET_ITEM(p->refs, n); if (v == Py_None) { PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); - retval = NULL; break; } Py_INCREF(v); @@ -1398,7 +1342,6 @@ r_object(RFILE *p) /* Bogus data got written, which isn't ideal. This will let you keep working and recover. */ PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)"); - retval = NULL; break; } -- cgit v1.2.1 From 17dbd870a0e4dfdb86de6ad928bbdf43ec4a9d57 Mon Sep 17 00:00:00 2001 From: Kristj?n Valur J?nsson Date: Sun, 13 Oct 2013 15:19:56 +0000 Subject: Catch a few extra error cases in marshal.c --- Python/marshal.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index 987d063f27..7273e7cd3a 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -1297,6 +1297,8 @@ r_object(RFILE *p) if (name == NULL) goto code_error; firstlineno = (int)r_long(p); + if (firstlineno == -1 && PyErr_Occurred()) + break; lnotab = r_object(p); if (lnotab == NULL) goto code_error; @@ -1326,6 +1328,8 @@ r_object(RFILE *p) case TYPE_REF: n = r_long(p); if (n < 0 || n >= PyList_GET_SIZE(p->refs)) { + if (n == -1 && PyErr_Occurred()) + break; PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); break; } -- cgit v1.2.1 From 798246eedea293b7a7b62af0497fa71f1a38d242 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 14 Oct 2013 20:50:32 +0200 Subject: Close #19260: remove outdated comment in marshal.c --- Python/marshal.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index 7273e7cd3a..f94276ad32 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -614,7 +614,6 @@ r_string(Py_ssize_t n, RFILE *p) } if (!p->readable) { assert(p->fp != NULL); - /* The result fits into int because it must be <=n. */ read = fread(p->buf, 1, n, p->fp); } else { @@ -650,7 +649,6 @@ r_string(Py_ssize_t n, RFILE *p) return p->buf; } - static int r_byte(RFILE *p) { -- cgit v1.2.1 From 6e75c19396402b107f1dfffa8746c6b8b679a9a4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 19 Oct 2013 21:03:34 +0300 Subject: Issue #1772673: The type of `char*` arguments now changed to `const char*`. --- Python/marshal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Python/marshal.c') diff --git a/Python/marshal.c b/Python/marshal.c index f94276ad32..4401afb211 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -1466,15 +1466,15 @@ PyMarshal_ReadObjectFromFile(FILE *fp) } PyObject * -PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len) +PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len) { RFILE rf; PyObject *result; rf.fp = NULL; rf.readable = NULL; rf.current_filename = NULL; - rf.ptr = str; - rf.end = str + len; + rf.ptr = (char *)str; + rf.end = (char *)str + len; rf.buf = NULL; rf.depth = 0; rf.refs = PyList_New(0); -- cgit v1.2.1