From d73c1dbe016563ce1020d2fc3d1074b17e6cc5f3 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Sat, 29 Sep 2012 13:47:39 -0400 Subject: upgrade to UCD 6.2 --- Objects/unicodetype_db.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodetype_db.h b/Objects/unicodetype_db.h index 46a92bbd44..1009bb3bc7 100644 --- a/Objects/unicodetype_db.h +++ b/Objects/unicodetype_db.h @@ -1919,7 +1919,7 @@ static unsigned short index2[] = { 246, 247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 95, 245, 26, 22, 23, 246, 247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 0, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 6, 6, 6, 6, 25, 6, 6, 6, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 113, 5, 5, @@ -2593,10 +2593,10 @@ static unsigned short index2[] = { 0, 0, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, - 141, 141, 141, 141, 141, 141, 141, 141, 141, 252, 252, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, - 141, 141, 141, 252, 252, 141, 141, 141, 141, 141, 141, 141, 141, 141, + 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, + 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, @@ -2925,6 +2925,9 @@ static unsigned short index2[] = { double _PyUnicode_ToNumeric(Py_UCS4 ch) { switch (ch) { + case 0x12456: + case 0x12457: + return (double) -1.0; case 0x0F33: return (double) -1.0/2.0; case 0x0030: @@ -3427,6 +3430,8 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch) return (double) 20000.0; case 0x3251: return (double) 21.0; + case 0x12432: + return (double) 216000.0; case 0x3252: return (double) 22.0; case 0x3253: @@ -3721,6 +3726,8 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch) return (double) 42.0; case 0x32B8: return (double) 43.0; + case 0x12433: + return (double) 432000.0; case 0x32B9: return (double) 44.0; case 0x32BA: -- cgit v1.2.1 From 9b7dd8a6114201295c5dce3a6cd3d48550a88056 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 2 Oct 2012 00:33:47 +0200 Subject: Issue #15609: Optimize str%args for integer argument - Use _PyLong_FormatWriter() instead of formatlong() when possible, to avoid a temporary buffer - Enable the fast path when width is smaller or equals to the length, and when the precision is bigger or equals to the length - Add unit tests! - formatlong() uses PyUnicode_Resize() instead of _PyUnicode_FromASCII() to resize the output string --- Objects/unicodeobject.c | 168 +++++++++++++++++++++++++++++------------------- 1 file changed, 101 insertions(+), 67 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0da565a612..606aa3327c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13288,7 +13288,10 @@ formatlong(PyObject *val, int flags, int prec, int type) assert(PyLong_Check(val)); switch (type) { + default: + assert(!"'type' not in [diuoxX]"); case 'd': + case 'i': case 'u': /* Special-case boolean: we want 0/1 */ if (PyBool_Check(val)) @@ -13305,8 +13308,6 @@ formatlong(PyObject *val, int flags, int prec, int type) numnondigits = 2; result = PyNumber_ToBase(val, 16); break; - default: - assert(!"'type' not in [duoxX]"); } if (!result) return NULL; @@ -13379,15 +13380,94 @@ formatlong(PyObject *val, int flags, int prec, int type) if (buf[i] >= 'a' && buf[i] <= 'x') buf[i] -= 'a'-'A'; } - if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) { + if (!PyUnicode_Check(result) + || buf != PyUnicode_DATA(result)) { PyObject *unicode; unicode = _PyUnicode_FromASCII(buf, len); Py_DECREF(result); result = unicode; } + else if (len != PyUnicode_GET_LENGTH(result)) { + if (PyUnicode_Resize(&result, len) < 0) + Py_CLEAR(result); + } return result; } +/* Format an integer. + * Return 1 if the number has been formatted into the writer, + * 0 if the number has been formatted into *p_result + * -1 and raise an exception on error */ +static int +mainformatlong(_PyUnicodeWriter *writer, PyObject *v, + int c, Py_ssize_t width, int prec, int flags, + PyObject **p_result) +{ + PyObject *iobj, *res; + + if (!PyNumber_Check(v)) + goto wrongtype; + + if (!PyLong_Check(v)) { + iobj = PyNumber_Long(v); + if (iobj == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) + goto wrongtype; + return -1; + } + assert(PyLong_Check(iobj)); + } + else { + iobj = v; + Py_INCREF(iobj); + } + + if (PyLong_CheckExact(v) + && width == -1 && prec == -1 + && !(flags & (F_SIGN | F_BLANK)) + && c != 'X') + { + /* Fast path */ + int alternate = flags & F_ALT; + int base; + + switch(c) + { + default: + assert(0 && "'type' not in [diuoxX]"); + case 'd': + case 'i': + case 'u': + base = 10; + break; + case 'o': + base = 8; + break; + case 'x': + case 'X': + base = 16; + break; + } + + if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) + return -1; + return 1; + } + + res = formatlong(iobj, flags, prec, c); + Py_DECREF(iobj); + if (res == NULL) + return -1; + *p_result = res; + return 0; + +wrongtype: + PyErr_Format(PyExc_TypeError, + "%%%c format: a number is required, " + "not %.200s", (char)c, Py_TYPE(v)->tp_name); + return -1; +} + static Py_UCS4 formatchar(PyObject *v) { @@ -13493,7 +13573,6 @@ PyUnicode_Format(PyObject *format, PyObject *args) Py_UCS4 fill; int sign; Py_UCS4 signchar; - int isnumok; PyObject *v = NULL; void *pbuf = NULL; Py_ssize_t pindex, len; @@ -13692,64 +13771,18 @@ PyUnicode_Format(PyObject *format, PyObject *args) case 'o': case 'x': case 'X': - if (PyLong_CheckExact(v) - && width == -1 && prec == -1 - && !(flags & (F_SIGN | F_BLANK))) - { - /* Fast path */ - switch(c) - { - case 'd': - case 'i': - case 'u': - if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1) - goto onError; - goto nextarg; - case 'x': - if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1) - goto onError; - goto nextarg; - case 'o': - if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1) - goto onError; - goto nextarg; - default: - break; - } - } - - isnumok = 0; - if (PyNumber_Check(v)) { - PyObject *iobj=NULL; - - if (PyLong_Check(v)) { - iobj = v; - Py_INCREF(iobj); - } - else { - iobj = PyNumber_Long(v); - } - if (iobj!=NULL) { - if (PyLong_Check(iobj)) { - isnumok = 1; - sign = 1; - temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c)); - Py_DECREF(iobj); - } - else { - Py_DECREF(iobj); - } - } - } - if (!isnumok) { - PyErr_Format(PyExc_TypeError, - "%%%c format: a number is required, " - "not %.200s", (char)c, Py_TYPE(v)->tp_name); + { + int ret = mainformatlong(&writer, v, c, width, prec, + flags, &temp); + if (ret == 1) + goto nextarg; + if (ret == -1) goto onError; - } + sign = 1; if (flags & F_ZERO) fill = '0'; break; + } case 'e': case 'E': @@ -13803,7 +13836,14 @@ PyUnicode_Format(PyObject *format, PyObject *args) goto onError; assert (PyUnicode_Check(temp)); - if (width == -1 && prec == -1 + if (PyUnicode_READY(temp) == -1) { + Py_CLEAR(temp); + goto onError; + } + + len = PyUnicode_GET_LENGTH(temp); + if ((width == -1 || width <= len) + && (prec == -1 || prec >= len) && !(flags & (F_SIGN | F_BLANK))) { /* Fast path */ @@ -13812,20 +13852,14 @@ PyUnicode_Format(PyObject *format, PyObject *args) goto nextarg; } - if (PyUnicode_READY(temp) == -1) { - Py_CLEAR(temp); - goto onError; - } - kind = PyUnicode_KIND(temp); - pbuf = PyUnicode_DATA(temp); - len = PyUnicode_GET_LENGTH(temp); - if (c == 's' || c == 'r' || c == 'a') { if (prec >= 0 && len > prec) len = prec; } /* pbuf is initialized here. */ + kind = PyUnicode_KIND(temp); + pbuf = PyUnicode_DATA(temp); pindex = 0; if (sign) { Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex); -- cgit v1.2.1 From 96433418062944146bfd48f02611735257d0106e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 2 Oct 2012 12:54:07 +0200 Subject: Issue #15609: Fix refleak introduced by my last optimization --- Objects/unicodeobject.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 606aa3327c..83f2a2a9ba 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13449,8 +13449,11 @@ mainformatlong(_PyUnicodeWriter *writer, PyObject *v, break; } - if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) + if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) { + Py_DECREF(iobj); return -1; + } + Py_DECREF(iobj); return 1; } -- cgit v1.2.1 From 3358e37a8f67949aacb0d329543a1da335257b1f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 3 Oct 2012 23:03:17 +0200 Subject: Unicode: resize_compact() and resize_inplace() fills also the Unicode strings with invalid bytes in debug mode, as done by PyUnicode_New() --- Objects/unicodeobject.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 83f2a2a9ba..09067e919c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -640,6 +640,25 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind, } } +#ifdef Py_DEBUG +/* Fill the data of an Unicode string with invalid characters to detect bugs + earlier. + + _PyUnicode_CheckConsistency(str, 1) detects invalid characters, at least for + ASCII and UCS-4 strings. U+00FF is invalid in ASCII and U+FFFFFFFF is an + invalid character in Unicode 6.0. */ +static void +unicode_fill_invalid(PyObject *unicode, Py_ssize_t old_length) +{ + int kind = PyUnicode_KIND(unicode); + Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode); + Py_ssize_t length = _PyUnicode_LENGTH(unicode); + if (length <= old_length) + return; + memset(data + old_length * kind, 0xff, (length - old_length) * kind); +} +#endif + static PyObject* resize_compact(PyObject *unicode, Py_ssize_t length) { @@ -648,6 +667,10 @@ resize_compact(PyObject *unicode, Py_ssize_t length) Py_ssize_t new_size; int share_wstr; PyObject *new_unicode; +#ifdef Py_DEBUG + Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); +#endif + assert(unicode_modifiable(unicode)); assert(PyUnicode_IS_READY(unicode)); assert(PyUnicode_IS_COMPACT(unicode)); @@ -683,6 +706,9 @@ resize_compact(PyObject *unicode, Py_ssize_t length) if (!PyUnicode_IS_ASCII(unicode)) _PyUnicode_WSTR_LENGTH(unicode) = length; } +#ifdef Py_DEBUG + unicode_fill_invalid(unicode, old_length); +#endif PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode), length, 0); assert(_PyUnicode_CheckConsistency(unicode, 0)); @@ -701,6 +727,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) Py_ssize_t char_size; int share_wstr, share_utf8; void *data; +#ifdef Py_DEBUG + Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); +#endif data = _PyUnicode_DATA_ANY(unicode); char_size = PyUnicode_KIND(unicode); @@ -736,6 +765,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) } _PyUnicode_LENGTH(unicode) = length; PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0); +#ifdef Py_DEBUG + unicode_fill_invalid(unicode, old_length); +#endif if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) { assert(_PyUnicode_CheckConsistency(unicode, 0)); return 0; @@ -1060,11 +1092,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) } } #ifdef Py_DEBUG - /* Fill the data with invalid characters to detect bugs earlier. - _PyUnicode_CheckConsistency(str, 1) detects invalid characters, - at least for ASCII and UCS-4 strings. U+00FF is invalid in ASCII - and U+FFFFFFFF is an invalid character in Unicode 6.0. */ - memset(data, 0xff, size * kind); + unicode_fill_invalid((PyObject*)unicode, 0); #endif assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0)); return obj; -- cgit v1.2.1 From 00ff3488108497bee8189cd010ebaafe93e0e055 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 3 Oct 2012 23:03:46 +0200 Subject: PyUnicode_Format(): disable overallocation when we are writing the last part of the output string --- Objects/unicodeobject.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 09067e919c..53f0fb5a3f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13583,8 +13583,10 @@ PyUnicode_Format(PyObject *format, PyObject *args) fmtpos++; fmtcnt--; } - if (fmtcnt < 0) + if (fmtcnt < 0) { fmtpos--; + writer.overallocate = 0; + } sublen = fmtpos - nonfmtpos; maxchar = _PyUnicode_FindMaxChar(uformat, nonfmtpos, nonfmtpos + sublen); -- cgit v1.2.1 From 72c8948d0eb5cce77bf8c5465a1090cebf98c330 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 4 Oct 2012 02:19:54 +0200 Subject: Split the huge PyUnicode_Format() function (+540 lines) into subfunctions --- Objects/unicodeobject.c | 1077 ++++++++++++++++++++++++++--------------------- 1 file changed, 605 insertions(+), 472 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 53f0fb5a3f..2481b96dda 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13224,16 +13224,39 @@ static PyMappingMethods unicode_as_mapping = { /* Helpers for PyUnicode_Format() */ +struct unicode_formatter_t { + PyObject *args; + int args_owned; + Py_ssize_t arglen, argidx; + PyObject *dict; + + enum PyUnicode_Kind fmtkind; + Py_ssize_t fmtcnt, fmtpos; + void *fmtdata; + PyObject *fmtstr; + + _PyUnicodeWriter writer; +}; + +struct unicode_format_arg_t { + Py_UCS4 ch; + int flags; + Py_ssize_t width; + int prec; + int sign; +}; + static PyObject * -getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) +unicode_format_getnextarg(struct unicode_formatter_t *ctx) { - Py_ssize_t argidx = *p_argidx; - if (argidx < arglen) { - (*p_argidx)++; - if (arglen < 0) - return args; + Py_ssize_t argidx = ctx->argidx; + + if (argidx < ctx->arglen) { + ctx->argidx++; + if (ctx->arglen < 0) + return ctx->args; else - return PyTuple_GetItem(args, argidx); + return PyTuple_GetItem(ctx->args, argidx); } PyErr_SetString(PyExc_TypeError, "not enough arguments for format string"); @@ -13242,23 +13265,34 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) /* Returns a new reference to a PyUnicode object, or NULL on failure. */ +/* Format a float into the writer if the writer is not NULL, or into *p_output + otherwise. + + Return 0 on success, raise an exception and return -1 on error. */ static int -formatfloat(PyObject *v, int flags, int prec, int type, - PyObject **p_output, _PyUnicodeWriter *writer) +formatfloat(PyObject *v, struct unicode_format_arg_t *arg, + PyObject **p_output, + _PyUnicodeWriter *writer) { char *p; double x; Py_ssize_t len; + int prec; + int dtoa_flags; x = PyFloat_AsDouble(v); if (x == -1.0 && PyErr_Occurred()) return -1; + prec = arg->prec; if (prec < 0) prec = 6; - p = PyOS_double_to_string(x, type, prec, - (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); + if (arg->flags & F_ALT) + dtoa_flags = Py_DTSF_ALT; + else + dtoa_flags = 0; + p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL); if (p == NULL) return -1; len = strlen(p); @@ -13295,7 +13329,7 @@ formatfloat(PyObject *v, int flags, int prec, int type, * produce a '-' sign, but can for Python's unbounded ints. */ static PyObject* -formatlong(PyObject *val, int flags, int prec, int type) +formatlong(PyObject *val, struct unicode_format_arg_t *arg) { PyObject *result = NULL; char *buf; @@ -13305,6 +13339,8 @@ formatlong(PyObject *val, int flags, int prec, int type) Py_ssize_t llen; int numdigits; /* len == numnondigits + numdigits */ int numnondigits = 0; + int prec = arg->prec; + int type = arg->ch; /* Avoid exceeding SSIZE_T_MAX */ if (prec > INT_MAX-3) { @@ -13363,7 +13399,7 @@ formatlong(PyObject *val, int flags, int prec, int type) assert(numdigits > 0); /* Get rid of base marker unless F_ALT */ - if (((flags & F_ALT) == 0 && + if (((arg->flags & F_ALT) == 0 && (type == 'o' || type == 'x' || type == 'X'))) { assert(buf[sign] == '0'); assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' || @@ -13424,14 +13460,16 @@ formatlong(PyObject *val, int flags, int prec, int type) /* Format an integer. * Return 1 if the number has been formatted into the writer, - * 0 if the number has been formatted into *p_result + * 0 if the number has been formatted into *p_output * -1 and raise an exception on error */ static int -mainformatlong(_PyUnicodeWriter *writer, PyObject *v, - int c, Py_ssize_t width, int prec, int flags, - PyObject **p_result) +mainformatlong(PyObject *v, + struct unicode_format_arg_t *arg, + PyObject **p_output, + _PyUnicodeWriter *writer) { PyObject *iobj, *res; + char type = (char)arg->ch; if (!PyNumber_Check(v)) goto wrongtype; @@ -13451,15 +13489,15 @@ mainformatlong(_PyUnicodeWriter *writer, PyObject *v, } if (PyLong_CheckExact(v) - && width == -1 && prec == -1 - && !(flags & (F_SIGN | F_BLANK)) - && c != 'X') + && arg->width == -1 && arg->prec == -1 + && !(arg->flags & (F_SIGN | F_BLANK)) + && type != 'X') { /* Fast path */ - int alternate = flags & F_ALT; + int alternate = arg->flags & F_ALT; int base; - switch(c) + switch(type) { default: assert(0 && "'type' not in [diuoxX]"); @@ -13485,17 +13523,18 @@ mainformatlong(_PyUnicodeWriter *writer, PyObject *v, return 1; } - res = formatlong(iobj, flags, prec, c); + res = formatlong(iobj, arg); Py_DECREF(iobj); if (res == NULL) return -1; - *p_result = res; + *p_output = res; return 0; wrongtype: PyErr_Format(PyExc_TypeError, "%%%c format: a number is required, " - "not %.200s", (char)c, Py_TYPE(v)->tp_name); + "not %.200s", + type, Py_TYPE(v)->tp_name); return -1; } @@ -13531,494 +13570,588 @@ formatchar(PyObject *v) return (Py_UCS4) -1; } -PyObject * -PyUnicode_Format(PyObject *format, PyObject *args) -{ - Py_ssize_t fmtcnt, fmtpos, arglen, argidx; - int args_owned = 0; - PyObject *dict = NULL; - PyObject *temp = NULL; - PyObject *second = NULL; - PyObject *uformat; - void *fmt; - enum PyUnicode_Kind kind, fmtkind; - _PyUnicodeWriter writer; - Py_ssize_t sublen; - Py_UCS4 maxchar; +/* Parse options of an argument: flags, width, precision. + Handle also "%(name)" syntax. - if (format == NULL || args == NULL) { - PyErr_BadInternalCall(); - return NULL; - } - uformat = PyUnicode_FromObject(format); - if (uformat == NULL) - return NULL; - if (PyUnicode_READY(uformat) == -1) - Py_DECREF(uformat); + Return 0 if the argument has been formatted into arg->str. + Return 1 if the argument has been written into ctx->writer, + Raise an exception and return -1 on error. */ +static int +unicode_format_arg_parse(struct unicode_formatter_t *ctx, + struct unicode_format_arg_t *arg) +{ +#define FORMAT_READ(ctx) \ + PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos) - fmt = PyUnicode_DATA(uformat); - fmtkind = PyUnicode_KIND(uformat); - fmtcnt = PyUnicode_GET_LENGTH(uformat); - fmtpos = 0; + PyObject *v; - _PyUnicodeWriter_Init(&writer, fmtcnt + 100); + arg->ch = FORMAT_READ(ctx); + if (arg->ch == '(') { + /* Get argument value from a dictionary. Example: "%(name)s". */ + Py_ssize_t keystart; + Py_ssize_t keylen; + PyObject *key; + int pcount = 1; - if (PyTuple_Check(args)) { - arglen = PyTuple_Size(args); - argidx = 0; - } - else { - arglen = -1; - argidx = -2; + if (ctx->dict == NULL) { + PyErr_SetString(PyExc_TypeError, + "format requires a mapping"); + return -1; + } + ++ctx->fmtpos; + --ctx->fmtcnt; + keystart = ctx->fmtpos; + /* Skip over balanced parentheses */ + while (pcount > 0 && --ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + if (arg->ch == ')') + --pcount; + else if (arg->ch == '(') + ++pcount; + ctx->fmtpos++; + } + keylen = ctx->fmtpos - keystart - 1; + if (ctx->fmtcnt < 0 || pcount > 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format key"); + return -1; + } + key = PyUnicode_Substring(ctx->fmtstr, + keystart, keystart + keylen); + if (key == NULL) + return -1; + if (ctx->args_owned) { + Py_DECREF(ctx->args); + ctx->args_owned = 0; + } + ctx->args = PyObject_GetItem(ctx->dict, key); + Py_DECREF(key); + if (ctx->args == NULL) + return -1; + ctx->args_owned = 1; + ctx->arglen = -1; + ctx->argidx = -2; + } + + /* Parse flags. Example: "%+i" => flags=F_SIGN. */ + arg->flags = 0; + while (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; + switch (arg->ch) { + case '-': arg->flags |= F_LJUST; continue; + case '+': arg->flags |= F_SIGN; continue; + case ' ': arg->flags |= F_BLANK; continue; + case '#': arg->flags |= F_ALT; continue; + case '0': arg->flags |= F_ZERO; continue; + } + break; } - if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args)) - dict = args; - - while (--fmtcnt >= 0) { - if (PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') { - Py_ssize_t nonfmtpos; - nonfmtpos = fmtpos++; - while (fmtcnt >= 0 && - PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') { - fmtpos++; - fmtcnt--; - } - if (fmtcnt < 0) { - fmtpos--; - writer.overallocate = 0; - } - sublen = fmtpos - nonfmtpos; - maxchar = _PyUnicode_FindMaxChar(uformat, - nonfmtpos, nonfmtpos + sublen); - if (_PyUnicodeWriter_Prepare(&writer, sublen, maxchar) == -1) - goto onError; - _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos, - uformat, nonfmtpos, sublen); - writer.pos += sublen; + /* Parse width. Example: "%10s" => width=10 */ + arg->width = -1; + if (arg->ch == '*') { + v = unicode_format_getnextarg(ctx); + if (v == NULL) + return -1; + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "* wants int"); + return -1; } - else { - /* Got a format specifier */ - int flags = 0; - Py_ssize_t width = -1; - int prec = -1; - Py_UCS4 c = '\0'; - Py_UCS4 fill; - int sign; - Py_UCS4 signchar; - PyObject *v = NULL; - void *pbuf = NULL; - Py_ssize_t pindex, len; - Py_UCS4 bufmaxchar; - Py_ssize_t buflen; - - fmtpos++; - c = PyUnicode_READ(fmtkind, fmt, fmtpos); - if (c == '(') { - Py_ssize_t keystart; - Py_ssize_t keylen; - PyObject *key; - int pcount = 1; - - if (dict == NULL) { - PyErr_SetString(PyExc_TypeError, - "format requires a mapping"); - goto onError; - } - ++fmtpos; - --fmtcnt; - keystart = fmtpos; - /* Skip over balanced parentheses */ - while (pcount > 0 && --fmtcnt >= 0) { - c = PyUnicode_READ(fmtkind, fmt, fmtpos); - if (c == ')') - --pcount; - else if (c == '(') - ++pcount; - fmtpos++; - } - keylen = fmtpos - keystart - 1; - if (fmtcnt < 0 || pcount > 0) { - PyErr_SetString(PyExc_ValueError, - "incomplete format key"); - goto onError; - } - key = PyUnicode_Substring(uformat, - keystart, keystart + keylen); - if (key == NULL) - goto onError; - if (args_owned) { - Py_DECREF(args); - args_owned = 0; - } - args = PyObject_GetItem(dict, key); - Py_DECREF(key); - if (args == NULL) { - goto onError; - } - args_owned = 1; - arglen = -1; - argidx = -2; - } - while (--fmtcnt >= 0) { - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - switch (c) { - case '-': flags |= F_LJUST; continue; - case '+': flags |= F_SIGN; continue; - case ' ': flags |= F_BLANK; continue; - case '#': flags |= F_ALT; continue; - case '0': flags |= F_ZERO; continue; - } + arg->width = PyLong_AsLong(v); + if (arg->width == -1 && PyErr_Occurred()) + return -1; + if (arg->width < 0) { + arg->flags |= F_LJUST; + arg->width = -arg->width; + } + if (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; + } + } + else if (arg->ch >= '0' && arg->ch <= '9') { + arg->width = arg->ch - '0'; + while (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; + if (arg->ch < '0' || arg->ch > '9') break; + /* Since arg->ch is unsigned, the RHS would end up as unsigned, + mixing signed and unsigned comparison. Since arg->ch is between + '0' and '9', casting to int is safe. */ + if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) { + PyErr_SetString(PyExc_ValueError, + "width too big"); + return -1; } - if (c == '*') { - v = getnextarg(args, arglen, &argidx); - if (v == NULL) - goto onError; - if (!PyLong_Check(v)) { - PyErr_SetString(PyExc_TypeError, - "* wants int"); - goto onError; - } - width = PyLong_AsLong(v); - if (width == -1 && PyErr_Occurred()) - goto onError; - if (width < 0) { - flags |= F_LJUST; - width = -width; - } - if (--fmtcnt >= 0) - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - } - else if (c >= '0' && c <= '9') { - width = c - '0'; - while (--fmtcnt >= 0) { - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - if (c < '0' || c > '9') - break; - /* Since c is unsigned, the RHS would end up as unsigned, - mixing signed and unsigned comparison. Since c is between - '0' and '9', casting to int is safe. */ - if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) { - PyErr_SetString(PyExc_ValueError, - "width too big"); - goto onError; - } - width = width*10 + (c - '0'); - } + arg->width = arg->width*10 + (arg->ch - '0'); + } + } + + /* Parse precision. Example: "%.3f" => prec=3 */ + arg->prec = -1; + if (arg->ch == '.') { + arg->prec = 0; + if (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; + } + if (arg->ch == '*') { + v = unicode_format_getnextarg(ctx); + if (v == NULL) + return -1; + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "* wants int"); + return -1; } - if (c == '.') { - prec = 0; - if (--fmtcnt >= 0) - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - if (c == '*') { - v = getnextarg(args, arglen, &argidx); - if (v == NULL) - goto onError; - if (!PyLong_Check(v)) { - PyErr_SetString(PyExc_TypeError, - "* wants int"); - goto onError; - } - prec = PyLong_AsLong(v); - if (prec == -1 && PyErr_Occurred()) - goto onError; - if (prec < 0) - prec = 0; - if (--fmtcnt >= 0) - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - } - else if (c >= '0' && c <= '9') { - prec = c - '0'; - while (--fmtcnt >= 0) { - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - if (c < '0' || c > '9') - break; - if (prec > (INT_MAX - ((int)c - '0')) / 10) { - PyErr_SetString(PyExc_ValueError, - "prec too big"); - goto onError; - } - prec = prec*10 + (c - '0'); - } - } - } /* prec */ - if (fmtcnt >= 0) { - if (c == 'h' || c == 'l' || c == 'L') { - if (--fmtcnt >= 0) - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); + arg->prec = PyLong_AsLong(v); + if (arg->prec == -1 && PyErr_Occurred()) + return -1; + if (arg->prec < 0) + arg->prec = 0; + if (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; + } + } + else if (arg->ch >= '0' && arg->ch <= '9') { + arg->prec = arg->ch - '0'; + while (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; + if (arg->ch < '0' || arg->ch > '9') + break; + if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) { + PyErr_SetString(PyExc_ValueError, + "prec too big"); + return -1; } + arg->prec = arg->prec*10 + (arg->ch - '0'); } - if (fmtcnt < 0) { - PyErr_SetString(PyExc_ValueError, - "incomplete format"); - goto onError; - } - if (fmtcnt == 0) - writer.overallocate = 0; + } + } - if (c == '%') { - if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1) - goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '%'); - writer.pos += 1; - continue; + /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */ + if (ctx->fmtcnt >= 0) { + if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') { + if (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; } + } + } + if (ctx->fmtcnt < 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format"); + return -1; + } + return 0; - v = getnextarg(args, arglen, &argidx); - if (v == NULL) - goto onError; +#undef FORMAT_READ +} - sign = 0; - signchar = '\0'; - fill = ' '; - switch (c) { +/* Format one argument. Supported conversion specifiers: - case 's': - case 'r': - case 'a': - if (PyLong_CheckExact(v) && width == -1 && prec == -1) { - /* Fast path */ - if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1) - goto onError; - goto nextarg; - } + - "s", "r", "a": any type + - "i", "d", "u", "o", "x", "X": int + - "e", "E", "f", "F", "g", "G": float + - "c": int or str (1 character) - if (PyUnicode_CheckExact(v) && c == 's') { - temp = v; - Py_INCREF(temp); - } - else { - if (c == 's') - temp = PyObject_Str(v); - else if (c == 'r') - temp = PyObject_Repr(v); - else - temp = PyObject_ASCII(v); - } - break; + Return 0 if the argument has been formatted into *p_str, + 1 if the argument has been written into ctx->writer, + -1 on error. */ +static int +unicode_format_arg_format(struct unicode_formatter_t *ctx, + struct unicode_format_arg_t *arg, + PyObject **p_str) +{ + PyObject *v; + _PyUnicodeWriter *writer = &ctx->writer; - case 'i': - case 'd': - case 'u': - case 'o': - case 'x': - case 'X': - { - int ret = mainformatlong(&writer, v, c, width, prec, - flags, &temp); - if (ret == 1) - goto nextarg; - if (ret == -1) - goto onError; - sign = 1; - if (flags & F_ZERO) - fill = '0'; - break; - } + if (ctx->fmtcnt == 0) + ctx->writer.overallocate = 0; - case 'e': - case 'E': - case 'f': - case 'F': - case 'g': - case 'G': - if (width == -1 && prec == -1 - && !(flags & (F_SIGN | F_BLANK))) - { - /* Fast path */ - if (formatfloat(v, flags, prec, c, NULL, &writer) == -1) - goto onError; - goto nextarg; - } + if (arg->ch == '%') { + if (_PyUnicodeWriter_Prepare(writer, 1, '%') == -1) + return -1; + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '%'); + writer->pos += 1; + return 1; + } - sign = 1; - if (flags & F_ZERO) - fill = '0'; - if (formatfloat(v, flags, prec, c, &temp, NULL) == -1) - temp = NULL; - break; + v = unicode_format_getnextarg(ctx); + if (v == NULL) + return -1; - case 'c': - { - Py_UCS4 ch = formatchar(v); - if (ch == (Py_UCS4) -1) - goto onError; - if (width == -1 && prec == -1) { - /* Fast path */ - if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) - goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); - writer.pos += 1; - goto nextarg; - } - temp = PyUnicode_FromOrdinal(ch); - break; - } + arg->sign = 0; - default: - PyErr_Format(PyExc_ValueError, - "unsupported format character '%c' (0x%x) " - "at index %zd", - (31<=c && c<=126) ? (char)c : '?', - (int)c, - fmtpos - 1); - goto onError; - } - if (temp == NULL) - goto onError; - assert (PyUnicode_Check(temp)); + switch (arg->ch) { - if (PyUnicode_READY(temp) == -1) { - Py_CLEAR(temp); - goto onError; - } + case 's': + case 'r': + case 'a': + if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) { + /* Fast path */ + if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1) + return -1; + return 1; + } - len = PyUnicode_GET_LENGTH(temp); - if ((width == -1 || width <= len) - && (prec == -1 || prec >= len) - && !(flags & (F_SIGN | F_BLANK))) - { - /* Fast path */ - if (_PyUnicodeWriter_WriteStr(&writer, temp) == -1) - goto onError; - goto nextarg; - } + if (PyUnicode_CheckExact(v) && arg->ch == 's') { + *p_str = v; + Py_INCREF(*p_str); + } + else { + if (arg->ch == 's') + *p_str = PyObject_Str(v); + else if (arg->ch == 'r') + *p_str = PyObject_Repr(v); + else + *p_str = PyObject_ASCII(v); + } + break; - if (c == 's' || c == 'r' || c == 'a') { - if (prec >= 0 && len > prec) - len = prec; - } + case 'i': + case 'd': + case 'u': + case 'o': + case 'x': + case 'X': + { + int ret = mainformatlong(v, arg, p_str, writer); + if (ret != 0) + return ret; + arg->sign = 1; + break; + } - /* pbuf is initialized here. */ - kind = PyUnicode_KIND(temp); - pbuf = PyUnicode_DATA(temp); - pindex = 0; - if (sign) { - Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex); - if (ch == '-' || ch == '+') { - signchar = ch; - len--; - pindex++; - } - else if (flags & F_SIGN) - signchar = '+'; - else if (flags & F_BLANK) - signchar = ' '; - else - sign = 0; - } - if (width < len) - width = len; - - /* Compute the length and maximum character of the - written characters */ - bufmaxchar = 127; - if (!(flags & F_LJUST)) { - if (sign) { - if ((width-1) > len) - bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); - } - else { - if (width > len) - bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); - } - } - maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len); - bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar); + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + if (arg->width == -1 && arg->prec == -1 + && !(arg->flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + if (formatfloat(v, arg, NULL, writer) == -1) + return -1; + return 1; + } - buflen = width; - if (sign && len == width) - buflen++; + arg->sign = 1; + if (formatfloat(v, arg, p_str, NULL) == -1) + return -1; + break; - if (_PyUnicodeWriter_Prepare(&writer, buflen, bufmaxchar) == -1) - goto onError; + case 'c': + { + Py_UCS4 ch = formatchar(v); + if (ch == (Py_UCS4) -1) + return -1; + if (arg->width == -1 && arg->prec == -1) { + /* Fast path */ + if (_PyUnicodeWriter_Prepare(writer, 1, ch) == -1) + return -1; + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch); + writer->pos += 1; + return 1; + } + *p_str = PyUnicode_FromOrdinal(ch); + break; + } - /* Write characters */ - if (sign) { - if (fill != ' ') { - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar); - writer.pos += 1; - } - if (width > len) - width--; - } - if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) { - assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); - assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c); - if (fill != ' ') { - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0'); - PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c); - writer.pos += 2; - pindex += 2; - } - width -= 2; - if (width < 0) - width = 0; - len -= 2; - } - if (width > len && !(flags & F_LJUST)) { - sublen = width - len; - FILL(writer.kind, writer.data, fill, writer.pos, sublen); - writer.pos += sublen; - width = len; - } - if (fill == ' ') { - if (sign) { - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar); - writer.pos += 1; - } - if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) { - assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); - assert(PyUnicode_READ(kind, pbuf, pindex+1) == c); - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0'); - PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c); - writer.pos += 2; - pindex += 2; - } - } + default: + PyErr_Format(PyExc_ValueError, + "unsupported format character '%c' (0x%x) " + "at index %zd", + (31<=arg->ch && arg->ch<=126) ? (char)arg->ch : '?', + (int)arg->ch, + ctx->fmtpos - 1); + return -1; + } + if (*p_str == NULL) + return -1; + assert (PyUnicode_Check(*p_str)); + return 0; +} - if (len) { - _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos, - temp, pindex, len); - writer.pos += len; - } - if (width > len) { - sublen = width - len; - FILL(writer.kind, writer.data, ' ', writer.pos, sublen); - writer.pos += sublen; - } +static int +unicode_format_arg_output(struct unicode_formatter_t *ctx, + struct unicode_format_arg_t *arg, + PyObject *str) +{ + Py_ssize_t len; + enum PyUnicode_Kind kind; + void *pbuf; + Py_ssize_t pindex; + Py_UCS4 signchar; + Py_ssize_t buflen; + Py_UCS4 maxchar, bufmaxchar; + Py_ssize_t sublen; + _PyUnicodeWriter *writer = &ctx->writer; + Py_UCS4 fill; -nextarg: - if (dict && (argidx < arglen) && c != '%') { - PyErr_SetString(PyExc_TypeError, - "not all arguments converted during string formatting"); + fill = ' '; + if (arg->sign && arg->flags & F_ZERO) + fill = '0'; + + if (PyUnicode_READY(str) == -1) + return -1; + + len = PyUnicode_GET_LENGTH(str); + if ((arg->width == -1 || arg->width <= len) + && (arg->prec == -1 || arg->prec >= len) + && !(arg->flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + if (_PyUnicodeWriter_WriteStr(writer, str) == -1) + return -1; + return 0; + } + + /* Truncate the string for "s", "r" and "a" formats + if the precision is set */ + if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') { + if (arg->prec >= 0 && len > arg->prec) + len = arg->prec; + } + + /* Adjust sign and width */ + kind = PyUnicode_KIND(str); + pbuf = PyUnicode_DATA(str); + pindex = 0; + signchar = '\0'; + if (arg->sign) { + Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex); + if (ch == '-' || ch == '+') { + signchar = ch; + len--; + pindex++; + } + else if (arg->flags & F_SIGN) + signchar = '+'; + else if (arg->flags & F_BLANK) + signchar = ' '; + else + arg->sign = 0; + } + if (arg->width < len) + arg->width = len; + + /* Prepare the writer */ + bufmaxchar = 127; + if (!(arg->flags & F_LJUST)) { + if (arg->sign) { + if ((arg->width-1) > len) + bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); + } + else { + if (arg->width > len) + bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); + } + } + maxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len); + bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar); + buflen = arg->width; + if (arg->sign && len == arg->width) + buflen++; + if (_PyUnicodeWriter_Prepare(writer, buflen, bufmaxchar) == -1) + return -1; + + /* Write the sign if needed */ + if (arg->sign) { + if (fill != ' ') { + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar); + writer->pos += 1; + } + if (arg->width > len) + arg->width--; + } + + /* Write the numeric prefix for "x", "X" and "o" formats + if the alternate form is used. + For example, write "0x" for the "%#x" format. */ + if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) { + assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); + assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch); + if (fill != ' ') { + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0'); + PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch); + writer->pos += 2; + pindex += 2; + } + arg->width -= 2; + if (arg->width < 0) + arg->width = 0; + len -= 2; + } + + /* Pad left with the fill character if needed */ + if (arg->width > len && !(arg->flags & F_LJUST)) { + sublen = arg->width - len; + FILL(writer->kind, writer->data, fill, writer->pos, sublen); + writer->pos += sublen; + arg->width = len; + } + + /* If padding with spaces: write sign if needed and/or numeric prefix if + the alternate form is used */ + if (fill == ' ') { + if (arg->sign) { + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar); + writer->pos += 1; + } + if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) { + assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); + assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch); + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0'); + PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch); + writer->pos += 2; + pindex += 2; + } + } + + /* Write characters */ + if (len) { + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + str, pindex, len); + writer->pos += len; + } + + /* Pad right with the fill character if needed */ + if (arg->width > len) { + sublen = arg->width - len; + FILL(writer->kind, writer->data, ' ', writer->pos, sublen); + writer->pos += sublen; + } + return 0; +} + +/* Helper of PyUnicode_Format(): format one arg. + Return 0 on success, raise an exception and return -1 on error. */ +static int +unicode_format_arg(struct unicode_formatter_t *ctx) +{ + struct unicode_format_arg_t arg; + PyObject *str; + int ret; + + ret = unicode_format_arg_parse(ctx, &arg); + if (ret == -1) + return -1; + + ret = unicode_format_arg_format(ctx, &arg, &str); + if (ret == -1) + return -1; + + if (ret != 1) { + ret = unicode_format_arg_output(ctx, &arg, str); + Py_DECREF(str); + if (ret == -1) + return -1; + } + + if (ctx->dict && (ctx->argidx < ctx->arglen) && arg.ch != '%') { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during string formatting"); + return -1; + } + return 0; +} + +PyObject * +PyUnicode_Format(PyObject *format, PyObject *args) +{ + struct unicode_formatter_t ctx; + + if (format == NULL || args == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + + ctx.fmtstr = PyUnicode_FromObject(format); + if (ctx.fmtstr == NULL) + return NULL; + if (PyUnicode_READY(ctx.fmtstr) == -1) { + Py_DECREF(ctx.fmtstr); + return NULL; + } + ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr); + ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr); + ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr); + ctx.fmtpos = 0; + + _PyUnicodeWriter_Init(&ctx.writer, ctx.fmtcnt + 100); + + if (PyTuple_Check(args)) { + ctx.arglen = PyTuple_Size(args); + ctx.argidx = 0; + } + else { + ctx.arglen = -1; + ctx.argidx = -2; + } + ctx.args_owned = 0; + if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args)) + ctx.dict = args; + else + ctx.dict = NULL; + ctx.args = args; + + while (--ctx.fmtcnt >= 0) { + if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') { + Py_ssize_t nonfmtpos, sublen; + Py_UCS4 maxchar; + + nonfmtpos = ctx.fmtpos++; + while (ctx.fmtcnt >= 0 && + PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') { + ctx.fmtpos++; + ctx.fmtcnt--; + } + if (ctx.fmtcnt < 0) { + ctx.fmtpos--; + ctx.writer.overallocate = 0; + } + sublen = ctx.fmtpos - nonfmtpos; + maxchar = _PyUnicode_FindMaxChar(ctx.fmtstr, + nonfmtpos, nonfmtpos + sublen); + if (_PyUnicodeWriter_Prepare(&ctx.writer, sublen, maxchar) == -1) goto onError; - } - Py_CLEAR(temp); - } /* '%' */ - } /* until end */ - if (argidx < arglen && !dict) { + + _PyUnicode_FastCopyCharacters(ctx.writer.buffer, ctx.writer.pos, + ctx.fmtstr, nonfmtpos, sublen); + ctx.writer.pos += sublen; + } + else { + ctx.fmtpos++; + if (unicode_format_arg(&ctx) == -1) + goto onError; + } + } + + if (ctx.argidx < ctx.arglen && !ctx.dict) { PyErr_SetString(PyExc_TypeError, "not all arguments converted during string formatting"); goto onError; } - if (args_owned) { - Py_DECREF(args); + if (ctx.args_owned) { + Py_DECREF(ctx.args); } - Py_DECREF(uformat); - Py_XDECREF(temp); - Py_XDECREF(second); - return _PyUnicodeWriter_Finish(&writer); + Py_DECREF(ctx.fmtstr); + return _PyUnicodeWriter_Finish(&ctx.writer); onError: - Py_DECREF(uformat); - Py_XDECREF(temp); - Py_XDECREF(second); - _PyUnicodeWriter_Dealloc(&writer); - if (args_owned) { - Py_DECREF(args); + Py_DECREF(ctx.fmtstr); + _PyUnicodeWriter_Dealloc(&ctx.writer); + if (ctx.args_owned) { + Py_DECREF(ctx.args); } return NULL; } -- cgit v1.2.1 From afa9b00f1bdf3212988743e438f5a9c9022e7851 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 4 Oct 2012 02:32:58 +0200 Subject: unicode_result_wchar(): move the assert() to the "#ifdef Py_DEBUG" block --- Objects/unicodeobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2481b96dda..98b10e9584 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -412,8 +412,6 @@ unicode_result_wchar(PyObject *unicode) #ifndef Py_DEBUG Py_ssize_t len; - assert(Py_REFCNT(unicode) == 1); - len = _PyUnicode_WSTR_LENGTH(unicode); if (len == 0) { Py_INCREF(unicode_empty); @@ -431,10 +429,12 @@ unicode_result_wchar(PyObject *unicode) } if (_PyUnicode_Ready(unicode) < 0) { - Py_XDECREF(unicode); + Py_DECREF(unicode); return NULL; } #else + assert(Py_REFCNT(unicode) == 1); + /* don't make the result ready in debug mode to ensure that the caller makes the string ready before using it */ assert(_PyUnicode_CheckConsistency(unicode, 1)); -- cgit v1.2.1 From 9856be5694ed916af798ef17f7c0520e35941b74 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 4 Oct 2012 02:43:02 +0200 Subject: long_to_decimal_string_internal() doesn't need to write the final NULL character --- Objects/longobject.c | 1 - 1 file changed, 1 deletion(-) (limited to 'Objects') diff --git a/Objects/longobject.c b/Objects/longobject.c index 73413dd5af..24cf1a5f8f 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1651,7 +1651,6 @@ long_to_decimal_string_internal(PyObject *aa, else \ p = (TYPE*)PyUnicode_DATA(str) + strlen; \ \ - *p = '\0'; \ /* pout[0] through pout[size-2] contribute exactly \ _PyLong_DECIMAL_SHIFT digits each */ \ for (i=0; i < size - 1; i++) { \ -- cgit v1.2.1 From d5a0fb431a6a6e9c109bf818ba761f2a30b956df Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 4 Oct 2012 21:53:50 +0200 Subject: Enable also ptr==ptr optimization in PyUnicode_Compare() It was already implemented in PyUnicode_RichCompare() --- Objects/unicodeobject.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 98b10e9584..b84d888fbc 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10445,6 +10445,10 @@ unicode_compare(PyObject *str1, PyObject *str2) void *data1, *data2; Py_ssize_t len1, len2, i; + /* a string is equal to itself */ + if (str1 == str2) + return 0; + kind1 = PyUnicode_KIND(str1); kind2 = PyUnicode_KIND(str2); data1 = PyUnicode_DATA(str1); @@ -10531,10 +10535,7 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) return Py_True; } } - if (left == right) - result = 0; - else - result = unicode_compare(left, right); + result = unicode_compare(left, right); /* Convert the return value to a Boolean */ switch (op) { -- cgit v1.2.1 From 65f9af1b8cadf83a3a627404b4ff5158d43d6994 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 4 Oct 2012 22:59:45 +0200 Subject: Optimize unicode_compare(): use memcmp() when comparing two UCS1 strings --- Objects/unicodeobject.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b84d888fbc..c28d1802fc 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10443,7 +10443,8 @@ unicode_compare(PyObject *str1, PyObject *str2) { int kind1, kind2; void *data1, *data2; - Py_ssize_t len1, len2, i; + Py_ssize_t len1, len2; + Py_ssize_t i, len; /* a string is equal to itself */ if (str1 == str2) @@ -10455,17 +10456,33 @@ unicode_compare(PyObject *str1, PyObject *str2) data2 = PyUnicode_DATA(str2); len1 = PyUnicode_GET_LENGTH(str1); len2 = PyUnicode_GET_LENGTH(str2); + len = Py_MIN(len1, len2); - for (i = 0; i < len1 && i < len2; ++i) { - Py_UCS4 c1, c2; - c1 = PyUnicode_READ(kind1, data1, i); - c2 = PyUnicode_READ(kind2, data2, i); + if (kind1 == 1 && kind2 == 1) { + int cmp = memcmp(data1, data2, len); + /* normalize result of memcmp() into the range [-1; 1] */ + if (cmp < 0) + return -1; + if (cmp > 0) + return 1; + } + else { + for (i = 0; i < len; ++i) { + Py_UCS4 c1, c2; + c1 = PyUnicode_READ(kind1, data1, i); + c2 = PyUnicode_READ(kind2, data2, i); - if (c1 != c2) - return (c1 < c2) ? -1 : 1; + if (c1 != c2) + return (c1 < c2) ? -1 : 1; + } } - return (len1 < len2) ? -1 : (len1 != len2); + if (len1 == len2) + return 0; + if (len1 < len2) + return -1; + else + return 1; } int -- cgit v1.2.1 From 711091f06dcedb7223b2e997261fc98c85422ea9 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 6 Oct 2012 00:40:45 +0200 Subject: In debug mode, unicode_write_cstr() now checks that non-ASCII characters are not written into an ASCII string --- Objects/unicodeobject.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index daeb4b4db5..84bbf9afff 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1700,6 +1700,14 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index, switch (kind) { case PyUnicode_1BYTE_KIND: { assert(index + len <= PyUnicode_GET_LENGTH(unicode)); +#ifdef Py_DEBUG + if (PyUnicode_IS_ASCII(unicode)) { + Py_UCS4 maxchar = ucs1lib_find_max_char( + (const Py_UCS1*)str, + (const Py_UCS1*)str + len); + assert(maxchar < 128); + } +#endif memcpy((char *) data + index, str, len); break; } -- cgit v1.2.1 From b88a8dfe8dec8b24ce278c645ceb2cc901462cf8 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sat, 6 Oct 2012 14:03:24 +0200 Subject: Issue #16148: implemented PEP 424 --- Objects/abstract.c | 76 +++++++++++++++++++++++++++++------------------ Objects/bytearrayobject.c | 2 +- Objects/bytesobject.c | 2 +- Objects/iterobject.c | 11 +++++-- Objects/listobject.c | 2 +- 5 files changed, 58 insertions(+), 35 deletions(-) (limited to 'Objects') diff --git a/Objects/abstract.c b/Objects/abstract.c index a2737dd5f4..b6fc478595 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -64,49 +64,67 @@ PyObject_Length(PyObject *o) } #define PyObject_Length PyObject_Size +int +_PyObject_HasLen(PyObject *o) { + return (Py_TYPE(o)->tp_as_sequence && Py_TYPE(o)->tp_as_sequence->sq_length) || + (Py_TYPE(o)->tp_as_mapping && Py_TYPE(o)->tp_as_mapping->mp_length); +} /* The length hint function returns a non-negative value from o.__len__() - or o.__length_hint__(). If those methods aren't found or return a negative - value, then the defaultvalue is returned. If one of the calls fails, - this function returns -1. + or o.__length_hint__(). If those methods aren't found. If one of the calls + fails this function returns -1. */ Py_ssize_t -_PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue) +PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue) { _Py_IDENTIFIER(__length_hint__); - PyObject *ro, *hintmeth; - Py_ssize_t rv; - - /* try o.__len__() */ - rv = PyObject_Size(o); - if (rv >= 0) - return rv; - if (PyErr_Occurred()) { - if (!PyErr_ExceptionMatches(PyExc_TypeError)) + Py_ssize_t res = PyObject_Length(o); + if (res < 0 && PyErr_Occurred()) { + if (!PyErr_ExceptionMatches(PyExc_TypeError)) { return -1; + } PyErr_Clear(); } - - /* try o.__length_hint__() */ - hintmeth = _PyObject_LookupSpecial(o, &PyId___length_hint__); - if (hintmeth == NULL) { - if (PyErr_Occurred()) + else { + return res; + } + PyObject *hint = _PyObject_LookupSpecial(o, &PyId___length_hint__); + if (hint == NULL) { + if (PyErr_Occurred()) { return -1; - else + } + return defaultvalue; + } + PyObject *result = PyObject_CallFunctionObjArgs(hint, NULL); + Py_DECREF(hint); + if (result == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Clear(); return defaultvalue; + } + return -1; } - ro = PyObject_CallFunctionObjArgs(hintmeth, NULL); - Py_DECREF(hintmeth); - if (ro == NULL) { - if (!PyErr_ExceptionMatches(PyExc_TypeError)) - return -1; - PyErr_Clear(); + else if (result == Py_NotImplemented) { + Py_DECREF(result); return defaultvalue; } - rv = PyLong_Check(ro) ? PyLong_AsSsize_t(ro) : defaultvalue; - Py_DECREF(ro); - return rv; + if (!PyLong_Check(result)) { + PyErr_Format(PyExc_TypeError, "Length hint must be an integer, not %s", + Py_TYPE(result)->tp_name); + Py_DECREF(result); + return -1; + } + defaultvalue = PyLong_AsSsize_t(result); + Py_DECREF(result); + if (defaultvalue < 0 && PyErr_Occurred()) { + return -1; + } + if (defaultvalue < 0) { + PyErr_Format(PyExc_ValueError, "__length_hint__() should return >= 0"); + return -1; + } + return defaultvalue; } PyObject * @@ -1687,7 +1705,7 @@ PySequence_Tuple(PyObject *v) return NULL; /* Guess result size and allocate space. */ - n = _PyObject_LengthHint(v, 10); + n = PyObject_LengthHint(v, 10); if (n == -1) goto Fail; result = PyTuple_New(n); diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 2bb3a29daa..26c76d2687 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2282,7 +2282,7 @@ bytearray_extend(PyByteArrayObject *self, PyObject *arg) return NULL; /* Try to determine the length of the argument. 32 is arbitrary. */ - buf_size = _PyObject_LengthHint(arg, 32); + buf_size = PyObject_LengthHint(arg, 32); if (buf_size == -1) { Py_DECREF(it); return NULL; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index bf9259f187..25c2326b95 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2651,7 +2651,7 @@ PyBytes_FromObject(PyObject *x) } /* For iterator version, create a string object and resize as needed */ - size = _PyObject_LengthHint(x, 64); + size = PyObject_LengthHint(x, 64); if (size == -1 && PyErr_Occurred()) return NULL; /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from diff --git a/Objects/iterobject.c b/Objects/iterobject.c index 3cfbeaf2e3..cf4af5b81f 100644 --- a/Objects/iterobject.c +++ b/Objects/iterobject.c @@ -76,9 +76,14 @@ iter_len(seqiterobject *it) Py_ssize_t seqsize, len; if (it->it_seq) { - seqsize = PySequence_Size(it->it_seq); - if (seqsize == -1) - return NULL; + if (_PyObject_HasLen(it->it_seq)) { + seqsize = PySequence_Size(it->it_seq); + if (seqsize == -1) + return NULL; + } + else { + return Py_NotImplemented; + } len = seqsize - it->it_index; if (len >= 0) return PyLong_FromSsize_t(len); diff --git a/Objects/listobject.c b/Objects/listobject.c index 6e0d094154..4cc34b5a3c 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -826,7 +826,7 @@ listextend(PyListObject *self, PyObject *b) iternext = *it->ob_type->tp_iternext; /* Guess a result list size. */ - n = _PyObject_LengthHint(b, 8); + n = PyObject_LengthHint(b, 8); if (n == -1) { Py_DECREF(it); return NULL; -- cgit v1.2.1 From 7ddc0bb7deecd49a6949a0ef56566ef1c6c39fdf Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sat, 6 Oct 2012 14:28:58 +0200 Subject: Added notimplemented_dealloc for better error reporting --- Objects/object.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index f41718424d..858eebedf0 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1524,12 +1524,21 @@ notimplemented_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) Py_RETURN_NOTIMPLEMENTED; } +static void +notimplemented_dealloc(PyObject* ignore) +{ + /* This should never get called, but we also don't want to SEGV if + * we accidentally decref NotImplemented out of existence. + */ + Py_FatalError("deallocating NotImplemented"); +} + static PyTypeObject PyNotImplemented_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "NotImplementedType", 0, 0, - none_dealloc, /*tp_dealloc*/ /*never called*/ + notimplemented_dealloc, /*tp_dealloc*/ /*never called*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ -- cgit v1.2.1 From c9ed5f16826d33f073f5408c025658ac793f0ecc Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sat, 6 Oct 2012 14:30:32 +0200 Subject: Fixed a missing incref introduced by a7ec0a1b0f7c --- Objects/iterobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/iterobject.c b/Objects/iterobject.c index cf4af5b81f..9acd1b79d7 100644 --- a/Objects/iterobject.c +++ b/Objects/iterobject.c @@ -82,7 +82,7 @@ iter_len(seqiterobject *it) return NULL; } else { - return Py_NotImplemented; + Py_RETURN_NOTIMPLEMENTED; } len = seqsize - it->it_index; if (len >= 0) -- cgit v1.2.1 From d9d2492a3f82c328e3fdc4e7555a54145ef18329 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sat, 6 Oct 2012 17:13:29 +0200 Subject: move var declaration to top of block to fix compilation on Windows, fixes a7ec0a1b0f7c --- Objects/abstract.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/abstract.c b/Objects/abstract.c index b6fc478595..eb3a766c19 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -78,8 +78,10 @@ _PyObject_HasLen(PyObject *o) { Py_ssize_t PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue) { + PyObject *hint; + Py_ssize_t res; _Py_IDENTIFIER(__length_hint__); - Py_ssize_t res = PyObject_Length(o); + res = PyObject_Length(o); if (res < 0 && PyErr_Occurred()) { if (!PyErr_ExceptionMatches(PyExc_TypeError)) { return -1; @@ -89,7 +91,7 @@ PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue) else { return res; } - PyObject *hint = _PyObject_LookupSpecial(o, &PyId___length_hint__); + hint = _PyObject_LookupSpecial(o, &PyId___length_hint__); if (hint == NULL) { if (PyErr_Occurred()) { return -1; -- cgit v1.2.1 From d7a8383ac45d610d747c40a94971de095bc36de0 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sat, 6 Oct 2012 17:16:39 +0200 Subject: and another one --- Objects/abstract.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/abstract.c b/Objects/abstract.c index eb3a766c19..84e827ad8c 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -78,7 +78,7 @@ _PyObject_HasLen(PyObject *o) { Py_ssize_t PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue) { - PyObject *hint; + PyObject *hint, *result; Py_ssize_t res; _Py_IDENTIFIER(__length_hint__); res = PyObject_Length(o); @@ -98,7 +98,7 @@ PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue) } return defaultvalue; } - PyObject *result = PyObject_CallFunctionObjArgs(hint, NULL); + result = PyObject_CallFunctionObjArgs(hint, NULL); Py_DECREF(hint); if (result == NULL) { if (PyErr_ExceptionMatches(PyExc_TypeError)) { -- cgit v1.2.1 From c70ad401ec06ef059baeb0e5d9555721905f4176 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 6 Oct 2012 23:03:36 +0200 Subject: Issue #16147: Rewrite PyUnicode_FromFormatV() to use _PyUnicodeWriter API * Simplify the code: replace 4 steps with one unique step using the _PyUnicodeWriter API. PyUnicode_Format() has the same design. It avoids to store intermediate results which require to allocate an array of pointers on the heap. * Use the _PyUnicodeWriter API for speed (and its convinient API): overallocate the buffer to reduce the number of "realloc()" * Implement "width" and "precision" in Python, don't rely on sprintf(). It avoids to need of a temporary buffer allocated on the heap: only use a small buffer allocated in the stack. * Add _PyUnicodeWriter_WriteCstr() function * Split PyUnicode_FromFormatV() into two functions: add unicode_fromformat_arg(). * Inline parse_format_flags(): the format of an argument is now only parsed once, it's no more needed to have a subfunction. * Optimize PyUnicode_FromFormatV() for characters between two "%" arguments: search the next "%" and copy the substring in one chunk, instead of copying character per character. --- Objects/unicodeobject.c | 814 ++++++++++++++++++++---------------------------- 1 file changed, 331 insertions(+), 483 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c3bf392cbf..562efedfe2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2301,16 +2301,9 @@ PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size) static void makefmt(char *fmt, int longflag, int longlongflag, int size_tflag, - int zeropad, int width, int precision, char c) + char c) { *fmt++ = '%'; - if (width) { - if (zeropad) - *fmt++ = '0'; - fmt += sprintf(fmt, "%d", width); - } - if (precision) - fmt += sprintf(fmt, ".%d", precision); if (longflag) *fmt++ = 'l'; else if (longlongflag) { @@ -2335,44 +2328,59 @@ makefmt(char *fmt, int longflag, int longlongflag, int size_tflag, *fmt = '\0'; } -/* helper for PyUnicode_FromFormatV() */ +/* maximum number of characters required for output of %ld. 21 characters + allows for 64-bit integers (in decimal) and an optional sign. */ +#define MAX_LONG_CHARS 21 +/* maximum number of characters required for output of %lld. + We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits, + plus 1 for the sign. 53/22 is an upper bound for log10(256). */ +#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22) static const char* -parse_format_flags(const char *f, - int *p_width, int *p_precision, - int *p_longflag, int *p_longlongflag, int *p_size_tflag) +unicode_fromformat_arg(_PyUnicodeWriter *writer, + const char *f, va_list *vargs) { - int width, precision, longflag, longlongflag, size_tflag; + const char *p; + Py_ssize_t len; + int zeropad; + int width; + int precision; + int longflag; + int longlongflag; + int size_tflag; + int fill; + + p = f; + f++; + zeropad = (*f == '0'); /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */ - f++; width = 0; - while (Py_ISDIGIT((unsigned)*f)) - width = (width*10) + *f++ - '0'; + while (Py_ISDIGIT((unsigned)*f)) { + width = (width*10) + (*f - '0'); + f++; + } precision = 0; if (*f == '.') { f++; - while (Py_ISDIGIT((unsigned)*f)) - precision = (precision*10) + *f++ - '0'; + while (Py_ISDIGIT((unsigned)*f)) { + precision = (precision*10) + (*f - '0'); + f++; + } if (*f == '%') { /* "%.3%s" => f points to "3" */ f--; } } if (*f == '\0') { - /* bogus format "%.1" => go backward, f points to "1" */ + /* bogus format "%.123" => go backward, f points to "3" */ f--; } - if (p_width != NULL) - *p_width = width; - if (p_precision != NULL) - *p_precision = precision; /* Handle %ld, %lu, %lld and %llu. */ longflag = 0; longlongflag = 0; size_tflag = 0; - if (*f == 'l') { if (f[1] == 'd' || f[1] == 'u' || f[1] == 'i') { longflag = 1; @@ -2391,494 +2399,321 @@ parse_format_flags(const char *f, size_tflag = 1; ++f; } - if (p_longflag != NULL) - *p_longflag = longflag; - if (p_longlongflag != NULL) - *p_longlongflag = longlongflag; - if (p_size_tflag != NULL) - *p_size_tflag = size_tflag; - return f; -} -/* maximum number of characters required for output of %ld. 21 characters - allows for 64-bit integers (in decimal) and an optional sign. */ -#define MAX_LONG_CHARS 21 -/* maximum number of characters required for output of %lld. - We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits, - plus 1 for the sign. 53/22 is an upper bound for log10(256). */ -#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22) + if (f[1] == '\0') + writer->overallocate = 0; -PyObject * -PyUnicode_FromFormatV(const char *format, va_list vargs) -{ - va_list count; - Py_ssize_t callcount = 0; - PyObject **callresults = NULL; - PyObject **callresult = NULL; - Py_ssize_t n = 0; - int width = 0; - int precision = 0; - int zeropad; - const char* f; - PyObject *string; - /* used by sprintf */ - char fmt[61]; /* should be enough for %0width.precisionlld */ - Py_UCS4 maxchar = 127; /* result is ASCII by default */ - Py_UCS4 argmaxchar; - Py_ssize_t numbersize = 0; - char *numberresults = NULL; - char *numberresult = NULL; - Py_ssize_t i; - int kind; - void *data; + switch (*f) { + case 'c': + { + int ordinal = va_arg(*vargs, int); + if (_PyUnicodeWriter_Prepare(writer, 1, ordinal) == -1) + return NULL; + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ordinal); + writer->pos++; + break; + } - Py_VA_COPY(count, vargs); - /* step 1: count the number of %S/%R/%A/%s format specifications - * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/ - * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the - * result in an array) - * also estimate a upper bound for all the number formats in the string, - * numbers will be formatted in step 3 and be kept in a '\0'-separated - * buffer before putting everything together. */ - for (f = format; *f; f++) { - if (*f == '%') { - int longlongflag; - /* skip width or width.precision (eg. "1.2" of "%1.2f") */ - f = parse_format_flags(f, &width, NULL, NULL, &longlongflag, NULL); - if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V') - ++callcount; + case 'i': + case 'd': + case 'u': + case 'x': + { + /* used by sprintf */ + char fmt[10]; /* should be enough for "%0lld\0" */ + char small_buffer[MAX_LONG_CHARS]; + char *buffer; + int err; + + if (sizeof(small_buffer) - 1 < precision) { + buffer = PyMem_Malloc(precision + 1); + if (buffer == NULL) { + PyErr_NoMemory(); + return NULL; + } + } + else + buffer = small_buffer; + + if (*f == 'u') { + makefmt(fmt, longflag, longlongflag, size_tflag, *f); - else if (*f == 'd' || *f=='u' || *f=='i' || *f=='x' || *f=='p') { + if (longflag) + len = sprintf(buffer, fmt, + va_arg(*vargs, unsigned long)); #ifdef HAVE_LONG_LONG - if (longlongflag) { - if (width < MAX_LONG_LONG_CHARS) - width = MAX_LONG_LONG_CHARS; - } - else + else if (longlongflag) + len = sprintf(buffer, fmt, + va_arg(*vargs, unsigned PY_LONG_LONG)); +#endif + else if (size_tflag) + len = sprintf(buffer, fmt, + va_arg(*vargs, size_t)); + else + len = sprintf(buffer, fmt, + va_arg(*vargs, unsigned int)); + } + else if (*f == 'x') { + makefmt(fmt, 0, 0, 0, 'x'); + len = sprintf(buffer, fmt, va_arg(*vargs, int)); + } + else { + makefmt(fmt, longflag, longlongflag, size_tflag, *f); + + if (longflag) + len = sprintf(buffer, fmt, + va_arg(*vargs, long)); +#ifdef HAVE_LONG_LONG + else if (longlongflag) + len = sprintf(buffer, fmt, + va_arg(*vargs, PY_LONG_LONG)); #endif - /* MAX_LONG_CHARS is enough to hold a 64-bit integer, - including sign. Decimal takes the most space. This - isn't enough for octal. If a width is specified we - need more (which we allocate later). */ - if (width < MAX_LONG_CHARS) - width = MAX_LONG_CHARS; - - /* account for the size + '\0' to separate numbers - inside of the numberresults buffer */ - numbersize += (width + 1); + else if (size_tflag) + len = sprintf(buffer, fmt, + va_arg(*vargs, Py_ssize_t)); + else + len = sprintf(buffer, fmt, + va_arg(*vargs, int)); + } + assert(len >= 0); + + err = 0; + if (precision < len) + precision = len; + if (width > precision) { + Py_UCS4 fillchar; + fill = width - precision; + fillchar = zeropad?'0':' '; + if (_PyUnicodeWriter_Prepare(writer, fill, fillchar) != -1) { + if (PyUnicode_Fill(writer->buffer, writer->pos, fill, fillchar) == -1) + err = 1; } + else + err = 1; + if (!err) + writer->pos += fill; + } + if (!err && precision > len) { + fill = precision - len; + if (_PyUnicodeWriter_Prepare(writer, fill, '0') != -1) { + if (PyUnicode_Fill(writer->buffer, writer->pos, fill, '0') == -1) + err = 1; + } + else + err = 1; + if (!err) + writer->pos += fill; + } + if (!err) { + if (_PyUnicodeWriter_WriteCstr(writer, buffer, len) == -1) + err = 1; } - else if ((unsigned char)*f > 127) { - PyErr_Format(PyExc_ValueError, - "PyUnicode_FromFormatV() expects an ASCII-encoded format " - "string, got a non-ASCII byte: 0x%02x", - (unsigned char)*f); + + if (buffer != small_buffer) { + PyMem_Free(buffer); + buffer = small_buffer; + } + if (err) return NULL; + + break; + } + + case 'p': + { + char number[MAX_LONG_LONG_CHARS]; + + len = sprintf(number, "%p", va_arg(*vargs, void*)); + assert(len >= 0); + + /* %p is ill-defined: ensure leading 0x. */ + if (number[1] == 'X') + number[1] = 'x'; + else if (number[1] != 'x') { + memmove(number + 2, number, + strlen(number) + 1); + number[0] = '0'; + number[1] = 'x'; + len += 2; } + + if (_PyUnicodeWriter_WriteCstr(writer, number, len) == -1) + return NULL; + break; } - /* step 2: allocate memory for the results of - * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */ - if (callcount) { - callresults = PyObject_Malloc(sizeof(PyObject *) * callcount); - if (!callresults) { - PyErr_NoMemory(); + + case 's': + { + /* UTF-8 */ + const char *s = va_arg(*vargs, const char*); + PyObject *str = PyUnicode_DecodeUTF8Stateful(s, strlen(s), "replace", NULL); + if (!str) + return NULL; + if (_PyUnicodeWriter_WriteStr(writer, str) == -1) { + Py_DECREF(str); return NULL; } - callresult = callresults; + Py_DECREF(str); + break; } - /* step 2.5: allocate memory for the results of formating numbers */ - if (numbersize) { - numberresults = PyObject_Malloc(numbersize); - if (!numberresults) { - PyErr_NoMemory(); - goto fail; + + case 'U': + { + PyObject *obj = va_arg(*vargs, PyObject *); + assert(obj && _PyUnicode_CHECK(obj)); + + if (_PyUnicodeWriter_WriteStr(writer, obj) == -1) + return NULL; + break; + } + + case 'V': + { + PyObject *obj = va_arg(*vargs, PyObject *); + const char *str = va_arg(*vargs, const char *); + PyObject *str_obj; + assert(obj || str); + if (obj) { + assert(_PyUnicode_CHECK(obj)); + if (_PyUnicodeWriter_WriteStr(writer, obj) == -1) + return NULL; } - numberresult = numberresults; + else { + str_obj = PyUnicode_DecodeUTF8Stateful(str, strlen(str), "replace", NULL); + if (!str_obj) + return NULL; + if (_PyUnicodeWriter_WriteStr(writer, str_obj) == -1) { + Py_DECREF(str_obj); + return NULL; + } + Py_DECREF(str_obj); + } + break; } - /* step 3: format numbers and figure out how large a buffer we need */ - for (f = format; *f; f++) { - if (*f == '%') { - const char* p; - int longflag; - int longlongflag; - int size_tflag; - int numprinted; + case 'S': + { + PyObject *obj = va_arg(*vargs, PyObject *); + PyObject *str; + assert(obj); + str = PyObject_Str(obj); + if (!str) + return NULL; + if (_PyUnicodeWriter_WriteStr(writer, str) == -1) { + Py_DECREF(str); + return NULL; + } + Py_DECREF(str); + break; + } - p = f; - zeropad = (f[1] == '0'); - f = parse_format_flags(f, &width, &precision, - &longflag, &longlongflag, &size_tflag); - switch (*f) { - case 'c': - { - Py_UCS4 ordinal = va_arg(count, int); - maxchar = MAX_MAXCHAR(maxchar, ordinal); - n++; - break; - } - case '%': - n++; - break; - case 'i': - case 'd': - makefmt(fmt, longflag, longlongflag, size_tflag, zeropad, - width, precision, *f); - if (longflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, long)); -#ifdef HAVE_LONG_LONG - else if (longlongflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, PY_LONG_LONG)); -#endif - else if (size_tflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, Py_ssize_t)); - else - numprinted = sprintf(numberresult, fmt, - va_arg(count, int)); - n += numprinted; - /* advance by +1 to skip over the '\0' */ - numberresult += (numprinted + 1); - assert(*(numberresult - 1) == '\0'); - assert(*(numberresult - 2) != '\0'); - assert(numprinted >= 0); - assert(numberresult <= numberresults + numbersize); - break; - case 'u': - makefmt(fmt, longflag, longlongflag, size_tflag, zeropad, - width, precision, 'u'); - if (longflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, unsigned long)); -#ifdef HAVE_LONG_LONG - else if (longlongflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, unsigned PY_LONG_LONG)); -#endif - else if (size_tflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, size_t)); - else - numprinted = sprintf(numberresult, fmt, - va_arg(count, unsigned int)); - n += numprinted; - numberresult += (numprinted + 1); - assert(*(numberresult - 1) == '\0'); - assert(*(numberresult - 2) != '\0'); - assert(numprinted >= 0); - assert(numberresult <= numberresults + numbersize); - break; - case 'x': - makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x'); - numprinted = sprintf(numberresult, fmt, va_arg(count, int)); - n += numprinted; - numberresult += (numprinted + 1); - assert(*(numberresult - 1) == '\0'); - assert(*(numberresult - 2) != '\0'); - assert(numprinted >= 0); - assert(numberresult <= numberresults + numbersize); - break; - case 'p': - numprinted = sprintf(numberresult, "%p", va_arg(count, void*)); - /* %p is ill-defined: ensure leading 0x. */ - if (numberresult[1] == 'X') - numberresult[1] = 'x'; - else if (numberresult[1] != 'x') { - memmove(numberresult + 2, numberresult, - strlen(numberresult) + 1); - numberresult[0] = '0'; - numberresult[1] = 'x'; - numprinted += 2; - } - n += numprinted; - numberresult += (numprinted + 1); - assert(*(numberresult - 1) == '\0'); - assert(*(numberresult - 2) != '\0'); - assert(numprinted >= 0); - assert(numberresult <= numberresults + numbersize); - break; - case 's': - { - /* UTF-8 */ - const char *s = va_arg(count, const char*); - PyObject *str = PyUnicode_DecodeUTF8Stateful(s, strlen(s), "replace", NULL); - if (!str) - goto fail; - /* since PyUnicode_DecodeUTF8 returns already flexible - unicode objects, there is no need to call ready on them */ - argmaxchar = PyUnicode_MAX_CHAR_VALUE(str); - maxchar = MAX_MAXCHAR(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(str); - /* Remember the str and switch to the next slot */ - *callresult++ = str; - break; - } - case 'U': - { - PyObject *obj = va_arg(count, PyObject *); - assert(obj && _PyUnicode_CHECK(obj)); - if (PyUnicode_READY(obj) == -1) - goto fail; - argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj); - maxchar = MAX_MAXCHAR(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(obj); - break; - } - case 'V': - { - PyObject *obj = va_arg(count, PyObject *); - const char *str = va_arg(count, const char *); - PyObject *str_obj; - assert(obj || str); - assert(!obj || _PyUnicode_CHECK(obj)); - if (obj) { - if (PyUnicode_READY(obj) == -1) - goto fail; - argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj); - maxchar = MAX_MAXCHAR(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(obj); - *callresult++ = NULL; - } - else { - str_obj = PyUnicode_DecodeUTF8Stateful(str, strlen(str), "replace", NULL); - if (!str_obj) - goto fail; - if (PyUnicode_READY(str_obj) == -1) { - Py_DECREF(str_obj); - goto fail; - } - argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj); - maxchar = MAX_MAXCHAR(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(str_obj); - *callresult++ = str_obj; - } - break; - } - case 'S': - { - PyObject *obj = va_arg(count, PyObject *); - PyObject *str; - assert(obj); - str = PyObject_Str(obj); - if (!str) - goto fail; - if (PyUnicode_READY(str) == -1) { - Py_DECREF(str); - goto fail; - } - argmaxchar = PyUnicode_MAX_CHAR_VALUE(str); - maxchar = MAX_MAXCHAR(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(str); - /* Remember the str and switch to the next slot */ - *callresult++ = str; - break; - } - case 'R': - { - PyObject *obj = va_arg(count, PyObject *); - PyObject *repr; - assert(obj); - repr = PyObject_Repr(obj); - if (!repr) - goto fail; - if (PyUnicode_READY(repr) == -1) { - Py_DECREF(repr); - goto fail; - } - argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr); - maxchar = MAX_MAXCHAR(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(repr); - /* Remember the repr and switch to the next slot */ - *callresult++ = repr; - break; - } - case 'A': - { - PyObject *obj = va_arg(count, PyObject *); - PyObject *ascii; - assert(obj); - ascii = PyObject_ASCII(obj); - if (!ascii) - goto fail; - if (PyUnicode_READY(ascii) == -1) { - Py_DECREF(ascii); - goto fail; - } - argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii); - maxchar = MAX_MAXCHAR(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(ascii); - /* Remember the repr and switch to the next slot */ - *callresult++ = ascii; - break; - } - default: - /* if we stumble upon an unknown - formatting code, copy the rest of - the format string to the output - string. (we cannot just skip the - code, since there's no way to know - what's in the argument list) */ - n += strlen(p); - goto expand; - } - } else - n++; - } - expand: - /* step 4: fill the buffer */ - /* Since we've analyzed how much space we need, - we don't have to resize the string. - There can be no errors beyond this point. */ - string = PyUnicode_New(n, maxchar); - if (!string) - goto fail; - kind = PyUnicode_KIND(string); - data = PyUnicode_DATA(string); - callresult = callresults; - numberresult = numberresults; + case 'R': + { + PyObject *obj = va_arg(*vargs, PyObject *); + PyObject *repr; + assert(obj); + repr = PyObject_Repr(obj); + if (!repr) + return NULL; + if (_PyUnicodeWriter_WriteStr(writer, repr) == -1) { + Py_DECREF(repr); + return NULL; + } + Py_DECREF(repr); + break; + } + + case 'A': + { + PyObject *obj = va_arg(*vargs, PyObject *); + PyObject *ascii; + assert(obj); + ascii = PyObject_ASCII(obj); + if (!ascii) + return NULL; + if (_PyUnicodeWriter_WriteStr(writer, ascii) == -1) { + Py_DECREF(ascii); + return NULL; + } + Py_DECREF(ascii); + break; + } - for (i = 0, f = format; *f; f++) { + case '%': + if (_PyUnicodeWriter_Prepare(writer, 1, '%') == 1) + return NULL; + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '%'); + writer->pos++; + break; + + default: + /* if we stumble upon an unknown formatting code, copy the rest + of the format string to the output string. (we cannot just + skip the code, since there's no way to know what's in the + argument list) */ + len = strlen(p); + if (_PyUnicodeWriter_WriteCstr(writer, p, len) == -1) + return NULL; + f = p+len; + return f; + } + + f++; + return f; +} + +PyObject * +PyUnicode_FromFormatV(const char *format, va_list vargs) +{ + va_list vargs2; + const char *f; + _PyUnicodeWriter writer; + + _PyUnicodeWriter_Init(&writer, strlen(format) + 100); + + /* va_list may be an array (of 1 item) on some platforms (ex: AMD64). + Copy it to be able to pass a reference to a subfunction. */ + Py_VA_COPY(vargs2, vargs); + + for (f = format; *f; ) { if (*f == '%') { - const char* p; + f = unicode_fromformat_arg(&writer, f, &vargs2); + if (f == NULL) + goto fail; + } + else { + const char *p; + Py_ssize_t len; p = f; - f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL); - /* checking for == because the last argument could be a empty - string, which causes i to point to end, the assert at the end of - the loop */ - assert(i <= PyUnicode_GET_LENGTH(string)); - - switch (*f) { - case 'c': - { - const int ordinal = va_arg(vargs, int); - PyUnicode_WRITE(kind, data, i++, ordinal); - break; - } - case 'i': - case 'd': - case 'u': - case 'x': - case 'p': - { - Py_ssize_t len; - /* unused, since we already have the result */ - if (*f == 'p') - (void) va_arg(vargs, void *); - else - (void) va_arg(vargs, int); - /* extract the result from numberresults and append. */ - len = strlen(numberresult); - unicode_write_cstr(string, i, numberresult, len); - /* skip over the separating '\0' */ - i += len; - numberresult += len; - assert(*numberresult == '\0'); - numberresult++; - assert(numberresult <= numberresults + numbersize); - break; - } - case 's': - { - /* unused, since we already have the result */ - Py_ssize_t size; - (void) va_arg(vargs, char *); - size = PyUnicode_GET_LENGTH(*callresult); - assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); - i += size; - /* We're done with the unicode()/repr() => forget it */ - Py_DECREF(*callresult); - /* switch to next unicode()/repr() result */ - ++callresult; - break; - } - case 'U': - { - PyObject *obj = va_arg(vargs, PyObject *); - Py_ssize_t size; - assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); - size = PyUnicode_GET_LENGTH(obj); - _PyUnicode_FastCopyCharacters(string, i, obj, 0, size); - i += size; - break; - } - case 'V': + do { - Py_ssize_t size; - PyObject *obj = va_arg(vargs, PyObject *); - va_arg(vargs, const char *); - if (obj) { - size = PyUnicode_GET_LENGTH(obj); - assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); - _PyUnicode_FastCopyCharacters(string, i, obj, 0, size); - i += size; - } else { - size = PyUnicode_GET_LENGTH(*callresult); - assert(PyUnicode_KIND(*callresult) <= - PyUnicode_KIND(string)); - _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); - i += size; - Py_DECREF(*callresult); + if ((unsigned char)*p > 127) { + PyErr_Format(PyExc_ValueError, + "PyUnicode_FromFormatV() expects an ASCII-encoded format " + "string, got a non-ASCII byte: 0x%02x", + (unsigned char)*p); + return NULL; } - ++callresult; - break; - } - case 'S': - case 'R': - case 'A': - { - Py_ssize_t size = PyUnicode_GET_LENGTH(*callresult); - /* unused, since we already have the result */ - (void) va_arg(vargs, PyObject *); - assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); - i += size; - /* We're done with the unicode()/repr() => forget it */ - Py_DECREF(*callresult); - /* switch to next unicode()/repr() result */ - ++callresult; - break; + p++; } - case '%': - PyUnicode_WRITE(kind, data, i++, '%'); - break; - default: - { - Py_ssize_t len = strlen(p); - unicode_write_cstr(string, i, p, len); - i += len; - assert(i == PyUnicode_GET_LENGTH(string)); - goto end; - } - } - } - else { - assert(i < PyUnicode_GET_LENGTH(string)); - PyUnicode_WRITE(kind, data, i++, *f); + while (*p != '\0' && *p != '%'); + len = p - f; + + if (*p == '\0') + writer.overallocate = 0; + if (_PyUnicodeWriter_Prepare(&writer, len, 127) == -1) + goto fail; + unicode_write_cstr(writer.buffer, writer.pos, f, len); + writer.pos += len; + + f = p; } } - assert(i == PyUnicode_GET_LENGTH(string)); + return _PyUnicodeWriter_Finish(&writer); - end: - if (callresults) - PyObject_Free(callresults); - if (numberresults) - PyObject_Free(numberresults); - return unicode_result(string); fail: - if (callresults) { - PyObject **callresult2 = callresults; - while (callresult2 < callresult) { - Py_XDECREF(*callresult2); - ++callresult2; - } - PyObject_Free(callresults); - } - if (numberresults) - PyObject_Free(numberresults); + _PyUnicodeWriter_Dealloc(&writer); return NULL; } @@ -12962,6 +12797,19 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) return 0; } +int +_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len) +{ + Py_UCS4 maxchar; + + maxchar = ucs1lib_find_max_char((Py_UCS1*)str, (Py_UCS1*)str + len); + if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1) + return -1; + unicode_write_cstr(writer->buffer, writer->pos, str, len); + writer->pos += len; + return 0; +} + PyObject * _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) { -- cgit v1.2.1 From 2c3ebacf38ab59aa07b9fe4381f183ecf6e7e343 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 6 Oct 2012 23:05:00 +0200 Subject: Issue #16147: PyUnicode_FromFormatV() now detects integer overflow when parsing width and precision --- Objects/unicodeobject.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 562efedfe2..40e56cdced 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2357,6 +2357,11 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */ width = 0; while (Py_ISDIGIT((unsigned)*f)) { + if (width > (INT_MAX - ((int)*f - '0')) / 10) { + PyErr_SetString(PyExc_ValueError, + "width too big"); + return NULL; + } width = (width*10) + (*f - '0'); f++; } @@ -2364,6 +2369,11 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, if (*f == '.') { f++; while (Py_ISDIGIT((unsigned)*f)) { + if (precision > (INT_MAX - ((int)*f - '0')) / 10) { + PyErr_SetString(PyExc_ValueError, + "precision too big"); + return NULL; + } precision = (precision*10) + (*f - '0'); f++; } @@ -13589,7 +13599,7 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, break; if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) { PyErr_SetString(PyExc_ValueError, - "prec too big"); + "precision too big"); return -1; } arg->prec = arg->prec*10 + (arg->ch - '0'); -- cgit v1.2.1 From f8f343bdbd0b8fe19c92c0678c2fb50a39dd7248 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 6 Oct 2012 23:05:45 +0200 Subject: Issue #16147: PyUnicode_FromFormatV() now raises an error if the argument of '%c' is not in the range(0x110000). --- Objects/unicodeobject.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 40e56cdced..e6fe1fba4e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2417,6 +2417,11 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, case 'c': { int ordinal = va_arg(*vargs, int); + if (ordinal < 0 || ordinal > MAX_UNICODE) { + PyErr_SetString(PyExc_ValueError, + "character argument not in range(0x110000)"); + return NULL; + } if (_PyUnicodeWriter_Prepare(writer, 1, ordinal) == -1) return NULL; PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ordinal); -- cgit v1.2.1 From 497294b7e421de5e0133e26aaeae8af663235d0d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 6 Oct 2012 23:48:20 +0200 Subject: Issue #16147: PyUnicode_FromFormatV() doesn't need anymore to allocate a buffer on the heap to format numbers. --- Objects/unicodeobject.c | 60 ++++++++++++------------------------------------- 1 file changed, 14 insertions(+), 46 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e6fe1fba4e..0ed38fef8f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2328,10 +2328,7 @@ makefmt(char *fmt, int longflag, int longlongflag, int size_tflag, *fmt = '\0'; } -/* maximum number of characters required for output of %ld. 21 characters - allows for 64-bit integers (in decimal) and an optional sign. */ -#define MAX_LONG_CHARS 21 -/* maximum number of characters required for output of %lld. +/* maximum number of characters required for output of %lld or %p. We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits, plus 1 for the sign. 53/22 is an upper bound for log10(256). */ #define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22) @@ -2436,19 +2433,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, { /* used by sprintf */ char fmt[10]; /* should be enough for "%0lld\0" */ - char small_buffer[MAX_LONG_CHARS]; - char *buffer; - int err; - - if (sizeof(small_buffer) - 1 < precision) { - buffer = PyMem_Malloc(precision + 1); - if (buffer == NULL) { - PyErr_NoMemory(); - return NULL; - } - } - else - buffer = small_buffer; + char buffer[MAX_LONG_LONG_CHARS]; if (*f == 'u') { makefmt(fmt, longflag, longlongflag, size_tflag, *f); @@ -2492,45 +2477,28 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, } assert(len >= 0); - err = 0; if (precision < len) precision = len; if (width > precision) { Py_UCS4 fillchar; fill = width - precision; fillchar = zeropad?'0':' '; - if (_PyUnicodeWriter_Prepare(writer, fill, fillchar) != -1) { - if (PyUnicode_Fill(writer->buffer, writer->pos, fill, fillchar) == -1) - err = 1; - } - else - err = 1; - if (!err) - writer->pos += fill; + if (_PyUnicodeWriter_Prepare(writer, fill, fillchar) == -1) + return NULL; + if (PyUnicode_Fill(writer->buffer, writer->pos, fill, fillchar) == -1) + return NULL; + writer->pos += fill; } - if (!err && precision > len) { + if (precision > len) { fill = precision - len; - if (_PyUnicodeWriter_Prepare(writer, fill, '0') != -1) { - if (PyUnicode_Fill(writer->buffer, writer->pos, fill, '0') == -1) - err = 1; - } - else - err = 1; - if (!err) - writer->pos += fill; - } - if (!err) { - if (_PyUnicodeWriter_WriteCstr(writer, buffer, len) == -1) - err = 1; - } - - if (buffer != small_buffer) { - PyMem_Free(buffer); - buffer = small_buffer; + if (_PyUnicodeWriter_Prepare(writer, fill, '0') == -1) + return NULL; + if (PyUnicode_Fill(writer->buffer, writer->pos, fill, '0') == -1) + return NULL; + writer->pos += fill; } - if (err) + if (_PyUnicodeWriter_WriteCstr(writer, buffer, len) == -1) return NULL; - break; } -- cgit v1.2.1 From b8984cbf7938ce7bf3fa14c54d85611868f274e0 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 6 Oct 2012 23:55:33 +0200 Subject: Cleanup PyUnicode_FromFormatV() for zero padding Skip the "0" instead of parsing it twice: detect zero padding and then parsed as a digit of the width. --- Objects/unicodeobject.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0ed38fef8f..73a3dc48d2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2349,7 +2349,11 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, p = f; f++; - zeropad = (*f == '0'); + zeropad = 0; + if (*f == '0') { + zeropad = 1; + f++; + } /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */ width = 0; -- cgit v1.2.1 From ba20482f46270502fa3a373b6b994fe2015d940f Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 7 Oct 2012 10:29:32 +0200 Subject: Issue #16148: Small improvements and cleanup. Added version information to docs. --- Objects/abstract.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'Objects') diff --git a/Objects/abstract.c b/Objects/abstract.c index 84e827ad8c..4326cfabd7 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -71,8 +71,9 @@ _PyObject_HasLen(PyObject *o) { } /* The length hint function returns a non-negative value from o.__len__() - or o.__length_hint__(). If those methods aren't found. If one of the calls - fails this function returns -1. + or o.__length_hint__(). If those methods aren't found the defaultvalue is + returned. If one of the calls fails with an exception other than TypeError + this function returns -1. */ Py_ssize_t @@ -112,21 +113,21 @@ PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue) return defaultvalue; } if (!PyLong_Check(result)) { - PyErr_Format(PyExc_TypeError, "Length hint must be an integer, not %s", + PyErr_Format(PyExc_TypeError, "__length_hint__ must be an integer, not %.100s", Py_TYPE(result)->tp_name); Py_DECREF(result); return -1; } - defaultvalue = PyLong_AsSsize_t(result); + res = PyLong_AsSsize_t(result); Py_DECREF(result); - if (defaultvalue < 0 && PyErr_Occurred()) { + if (res < 0 && PyErr_Occurred()) { return -1; } - if (defaultvalue < 0) { + if (res < 0) { PyErr_Format(PyExc_ValueError, "__length_hint__() should return >= 0"); return -1; } - return defaultvalue; + return res; } PyObject * -- cgit v1.2.1 From a04bb217c99bb1d334b2142d4a583faa14767317 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 16 Oct 2012 21:07:23 +0200 Subject: Issue #15958: bytes.join and bytearray.join now accept arbitrary buffer objects. --- Objects/bytearrayobject.c | 72 ++------------------------- Objects/bytesobject.c | 107 ++++++---------------------------------- Objects/stringlib/join.h | 122 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 158 deletions(-) create mode 100644 Objects/stringlib/join.h (limited to 'Objects') diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 26c76d2687..a98ecdde5c 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1032,6 +1032,7 @@ bytearray_dealloc(PyByteArrayObject *self) #define FASTSEARCH fastsearch #define STRINGLIB(F) stringlib_##F #define STRINGLIB_CHAR char +#define STRINGLIB_SIZEOF_CHAR 1 #define STRINGLIB_LEN PyByteArray_GET_SIZE #define STRINGLIB_STR PyByteArray_AS_STRING #define STRINGLIB_NEW PyByteArray_FromStringAndSize @@ -1043,6 +1044,7 @@ bytearray_dealloc(PyByteArrayObject *self) #include "stringlib/fastsearch.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/join.h" #include "stringlib/partition.h" #include "stringlib/split.h" #include "stringlib/ctype.h" @@ -2569,73 +2571,9 @@ Concatenate any number of bytes/bytearray objects, with B\n\ in between each pair, and return the result as a new bytearray."); static PyObject * -bytearray_join(PyByteArrayObject *self, PyObject *it) -{ - PyObject *seq; - Py_ssize_t mysize = Py_SIZE(self); - Py_ssize_t i; - Py_ssize_t n; - PyObject **items; - Py_ssize_t totalsize = 0; - PyObject *result; - char *dest; - - seq = PySequence_Fast(it, "can only join an iterable"); - if (seq == NULL) - return NULL; - n = PySequence_Fast_GET_SIZE(seq); - items = PySequence_Fast_ITEMS(seq); - - /* Compute the total size, and check that they are all bytes */ - /* XXX Shouldn't we use _getbuffer() on these items instead? */ - for (i = 0; i < n; i++) { - PyObject *obj = items[i]; - if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) { - PyErr_Format(PyExc_TypeError, - "can only join an iterable of bytes " - "(item %ld has type '%.100s')", - /* XXX %ld isn't right on Win64 */ - (long)i, Py_TYPE(obj)->tp_name); - goto error; - } - if (i > 0) - totalsize += mysize; - totalsize += Py_SIZE(obj); - if (totalsize < 0) { - PyErr_NoMemory(); - goto error; - } - } - - /* Allocate the result, and copy the bytes */ - result = PyByteArray_FromStringAndSize(NULL, totalsize); - if (result == NULL) - goto error; - dest = PyByteArray_AS_STRING(result); - for (i = 0; i < n; i++) { - PyObject *obj = items[i]; - Py_ssize_t size = Py_SIZE(obj); - char *buf; - if (PyByteArray_Check(obj)) - buf = PyByteArray_AS_STRING(obj); - else - buf = PyBytes_AS_STRING(obj); - if (i) { - memcpy(dest, self->ob_bytes, mysize); - dest += mysize; - } - memcpy(dest, buf, size); - dest += size; - } - - /* Done */ - Py_DECREF(seq); - return result; - - /* Error handling */ - error: - Py_DECREF(seq); - return NULL; +bytearray_join(PyObject *self, PyObject *iterable) +{ + return stringlib_bytes_join(self, iterable); } PyDoc_STRVAR(splitlines__doc__, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 25c2326b95..a1db7789f1 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -10,9 +10,18 @@ static Py_ssize_t _getbuffer(PyObject *obj, Py_buffer *view) { - PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer; - - if (buffer == NULL || buffer->bf_getbuffer == NULL) + PyBufferProcs *bufferprocs; + if (PyBytes_CheckExact(obj)) { + /* Fast path, e.g. for .join() of many bytes objects */ + Py_INCREF(obj); + view->obj = obj; + view->buf = PyBytes_AS_STRING(obj); + view->len = PyBytes_GET_SIZE(obj); + return view->len; + } + + bufferprocs = Py_TYPE(obj)->tp_as_buffer; + if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL) { PyErr_Format(PyExc_TypeError, "Type %.100s doesn't support the buffer API", @@ -20,7 +29,7 @@ _getbuffer(PyObject *obj, Py_buffer *view) return -1; } - if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0) + if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0) return -1; return view->len; } @@ -555,6 +564,7 @@ PyBytes_AsStringAndSize(register PyObject *obj, #include "stringlib/fastsearch.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/join.h" #include "stringlib/partition.h" #include "stringlib/split.h" #include "stringlib/ctype.h" @@ -1107,94 +1117,9 @@ Concatenate any number of bytes objects, with B in between each pair.\n\ Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'."); static PyObject * -bytes_join(PyObject *self, PyObject *orig) +bytes_join(PyObject *self, PyObject *iterable) { - char *sep = PyBytes_AS_STRING(self); - const Py_ssize_t seplen = PyBytes_GET_SIZE(self); - PyObject *res = NULL; - char *p; - Py_ssize_t seqlen = 0; - size_t sz = 0; - Py_ssize_t i; - PyObject *seq, *item; - - seq = PySequence_Fast(orig, ""); - if (seq == NULL) { - return NULL; - } - - seqlen = PySequence_Size(seq); - if (seqlen == 0) { - Py_DECREF(seq); - return PyBytes_FromString(""); - } - if (seqlen == 1) { - item = PySequence_Fast_GET_ITEM(seq, 0); - if (PyBytes_CheckExact(item)) { - Py_INCREF(item); - Py_DECREF(seq); - return item; - } - } - - /* There are at least two things to join, or else we have a subclass - * of the builtin types in the sequence. - * Do a pre-pass to figure out the total amount of space we'll - * need (sz), and see whether all argument are bytes. - */ - /* XXX Shouldn't we use _getbuffer() on these items instead? */ - for (i = 0; i < seqlen; i++) { - const size_t old_sz = sz; - item = PySequence_Fast_GET_ITEM(seq, i); - if (!PyBytes_Check(item) && !PyByteArray_Check(item)) { - PyErr_Format(PyExc_TypeError, - "sequence item %zd: expected bytes," - " %.80s found", - i, Py_TYPE(item)->tp_name); - Py_DECREF(seq); - return NULL; - } - sz += Py_SIZE(item); - if (i != 0) - sz += seplen; - if (sz < old_sz || sz > PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "join() result is too long for bytes"); - Py_DECREF(seq); - return NULL; - } - } - - /* Allocate result space. */ - res = PyBytes_FromStringAndSize((char*)NULL, sz); - if (res == NULL) { - Py_DECREF(seq); - return NULL; - } - - /* Catenate everything. */ - /* I'm not worried about a PyByteArray item growing because there's - nowhere in this function where we release the GIL. */ - p = PyBytes_AS_STRING(res); - for (i = 0; i < seqlen; ++i) { - size_t n; - char *q; - if (i) { - Py_MEMCPY(p, sep, seplen); - p += seplen; - } - item = PySequence_Fast_GET_ITEM(seq, i); - n = Py_SIZE(item); - if (PyBytes_Check(item)) - q = PyBytes_AS_STRING(item); - else - q = PyByteArray_AS_STRING(item); - Py_MEMCPY(p, q, n); - p += n; - } - - Py_DECREF(seq); - return res; + return stringlib_bytes_join(self, iterable); } PyObject * diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h new file mode 100644 index 0000000000..21753cb504 --- /dev/null +++ b/Objects/stringlib/join.h @@ -0,0 +1,122 @@ +/* stringlib: bytes joining implementation */ + +#if STRINGLIB_SIZEOF_CHAR != 1 +#error join.h only compatible with byte-wise strings +#endif + +Py_LOCAL_INLINE(PyObject *) +STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) +{ + char *sepstr = STRINGLIB_STR(sep); + const Py_ssize_t seplen = STRINGLIB_LEN(sep); + PyObject *res = NULL; + char *p; + Py_ssize_t seqlen = 0; + Py_ssize_t sz = 0; + Py_ssize_t i, nbufs; + PyObject *seq, *item; + Py_buffer *buffers = NULL; +#define NB_STATIC_BUFFERS 10 + Py_buffer static_buffers[NB_STATIC_BUFFERS]; + + seq = PySequence_Fast(iterable, "can only join an iterable"); + if (seq == NULL) { + return NULL; + } + + seqlen = PySequence_Fast_GET_SIZE(seq); + if (seqlen == 0) { + Py_DECREF(seq); + return STRINGLIB_NEW(NULL, 0); + } +#ifndef STRINGLIB_MUTABLE + if (seqlen == 1) { + item = PySequence_Fast_GET_ITEM(seq, 0); + if (STRINGLIB_CHECK_EXACT(item)) { + Py_INCREF(item); + Py_DECREF(seq); + return item; + } + } +#endif + if (seqlen > NB_STATIC_BUFFERS) { + buffers = PyMem_NEW(Py_buffer, seqlen); + if (buffers == NULL) { + Py_DECREF(seq); + return NULL; + } + } + else { + buffers = static_buffers; + } + + /* Here is the general case. Do a pre-pass to figure out the total + * amount of space we'll need (sz), and see whether all arguments are + * buffer-compatible. + */ + for (i = 0, nbufs = 0; i < seqlen; i++) { + Py_ssize_t itemlen; + item = PySequence_Fast_GET_ITEM(seq, i); + if (_getbuffer(item, &buffers[i]) < 0) { + PyErr_Format(PyExc_TypeError, + "sequence item %zd: expected bytes, bytearray, " + "or an object with the buffer interface, %.80s found", + i, Py_TYPE(item)->tp_name); + goto error; + } + nbufs = i + 1; /* for error cleanup */ + itemlen = buffers[i].len; + if (itemlen > PY_SSIZE_T_MAX - sz) { + PyErr_SetString(PyExc_OverflowError, + "join() result is too long"); + goto error; + } + sz += itemlen; + if (i != 0) { + if (seplen > PY_SSIZE_T_MAX - sz) { + PyErr_SetString(PyExc_OverflowError, + "join() result is too long"); + goto error; + } + sz += seplen; + } + if (seqlen != PySequence_Fast_GET_SIZE(seq)) { + PyErr_SetString(PyExc_RuntimeError, + "sequence changed size during iteration"); + goto error; + } + } + + /* Allocate result space. */ + res = STRINGLIB_NEW(NULL, sz); + if (res == NULL) + goto error; + + /* Catenate everything. */ + p = STRINGLIB_STR(res); + for (i = 0; i < nbufs; i++) { + Py_ssize_t n; + char *q; + if (i) { + Py_MEMCPY(p, sepstr, seplen); + p += seplen; + } + n = buffers[i].len; + q = buffers[i].buf; + Py_MEMCPY(p, q, n); + p += n; + } + goto done; + +error: + res = NULL; +done: + Py_DECREF(seq); + for (i = 0; i < nbufs; i++) + PyBuffer_Release(&buffers[i]); + if (buffers != static_buffers) + PyMem_FREE(buffers); + return res; +} + +#undef NB_STATIC_BUFFERS -- cgit v1.2.1 From 114c42641335bdc26cbf61c18c82b0f5c8a6d4f8 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 17 Oct 2012 23:52:17 +0200 Subject: Issue #16166: Add PY_LITTLE_ENDIAN and PY_BIG_ENDIAN macros and unified endianess detection and handling. --- Objects/longobject.c | 8 ++------ Objects/stringlib/codecs.h | 6 +++--- Objects/unicodeobject.c | 18 +++++------------- 3 files changed, 10 insertions(+), 22 deletions(-) (limited to 'Objects') diff --git a/Objects/longobject.c b/Objects/longobject.c index 2aac8e4051..ead468025b 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -988,7 +988,6 @@ PyLong_AsVoidPtr(PyObject *vv) * rewritten to use the newer PyLong_{As,From}ByteArray API. */ -#define IS_LITTLE_ENDIAN (int)*(unsigned char*)&one #define PY_ABS_LLONG_MIN (0-(unsigned PY_LONG_LONG)PY_LLONG_MIN) /* Create a new long int object from a C PY_LONG_LONG int. */ @@ -1141,7 +1140,6 @@ PyLong_AsLongLong(PyObject *vv) { PyLongObject *v; PY_LONG_LONG bytes; - int one = 1; int res; if (vv == NULL) { @@ -1176,7 +1174,7 @@ PyLong_AsLongLong(PyObject *vv) case 1: return v->ob_digit[0]; } res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes, - SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 1); + SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 1); /* Plan 9 can't handle PY_LONG_LONG in ? : expressions */ if (res < 0) @@ -1193,7 +1191,6 @@ PyLong_AsUnsignedLongLong(PyObject *vv) { PyLongObject *v; unsigned PY_LONG_LONG bytes; - int one = 1; int res; if (vv == NULL) { @@ -1212,7 +1209,7 @@ PyLong_AsUnsignedLongLong(PyObject *vv) } res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes, - SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 0); + SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 0); /* Plan 9 can't handle PY_LONG_LONG in ? : expressions */ if (res < 0) @@ -1288,7 +1285,6 @@ PyLong_AsUnsignedLongLongMask(register PyObject *op) return (unsigned PY_LONG_LONG)-1; } } -#undef IS_LITTLE_ENDIAN /* Get a C long long int from a long int object or any object that has an __int__ method. diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 2a01089c0f..cbf35084cd 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -47,7 +47,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, unsigned long value = *(unsigned long *) _s; if (value & ASCII_CHAR_MASK) break; -#ifdef BYTEORDER_IS_LITTLE_ENDIAN +#if PY_LITTLE_ENDIAN _p[0] = (STRINGLIB_CHAR)(value & 0xFFu); _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu); _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu); @@ -454,7 +454,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e, const unsigned char *q = *inptr; STRINGLIB_CHAR *p = dest + *outpos; /* Offsets from q for retrieving byte pairs in the right order. */ -#ifdef BYTEORDER_IS_LITTLE_ENDIAN +#if PY_LITTLE_ENDIAN int ihi = !!native_ordering, ilo = !native_ordering; #else int ihi = !native_ordering, ilo = !!native_ordering; @@ -485,7 +485,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e, block = SWAB(block); #endif } -#ifdef BYTEORDER_IS_LITTLE_ENDIAN +#if PY_LITTLE_ENDIAN # if SIZEOF_LONG == 4 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu); p[1] = (STRINGLIB_CHAR)(block >> 16); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b4c7ecf900..9461563f99 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -47,14 +47,6 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include #endif -/* Endianness switches; defaults to little endian */ - -#ifdef WORDS_BIGENDIAN -# define BYTEORDER_IS_BIG_ENDIAN -#else -# define BYTEORDER_IS_LITTLE_ENDIAN -#endif - /* --- Globals ------------------------------------------------------------ The globals are initialized by the _PyUnicode_Init() API and should @@ -4813,7 +4805,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, int bo = 0; /* assume native ordering by default */ const char *errmsg = ""; /* Offsets from q for retrieving bytes in the right order. */ -#ifdef BYTEORDER_IS_LITTLE_ENDIAN +#if PY_LITTLE_ENDIAN int iorder[] = {0, 1, 2, 3}; #else int iorder[] = {3, 2, 1, 0}; @@ -4835,7 +4827,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, if (size >= 4) { const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) | (q[iorder[1]] << 8) | q[iorder[0]]; -#ifdef BYTEORDER_IS_LITTLE_ENDIAN +#if PY_LITTLE_ENDIAN if (bom == 0x0000FEFF) { q += 4; bo = -1; @@ -4949,7 +4941,7 @@ _PyUnicode_EncodeUTF32(PyObject *str, unsigned char *p; Py_ssize_t nsize, i; /* Offsets from p for storing byte pairs in the right order. */ -#ifdef BYTEORDER_IS_LITTLE_ENDIAN +#if PY_LITTLE_ENDIAN int iorder[] = {0, 1, 2, 3}; #else int iorder[] = {3, 2, 1, 0}; @@ -5092,7 +5084,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s, return unicode_empty; } -#ifdef BYTEORDER_IS_LITTLE_ENDIAN +#if PY_LITTLE_ENDIAN native_ordering = bo <= 0; #else native_ordering = bo >= 0; @@ -5209,7 +5201,7 @@ _PyUnicode_EncodeUTF16(PyObject *str, unsigned short *out; Py_ssize_t bytesize; Py_ssize_t pairs; -#ifdef WORDS_BIGENDIAN +#if PY_BIG_ENDIAN int native_ordering = byteorder >= 0; #else int native_ordering = byteorder <= 0; -- cgit v1.2.1 From b6fc89dbb4046eb2e0096419fe0b18797a26f337 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Thu, 18 Oct 2012 22:18:42 +0100 Subject: Issue 16280: Drop questionable special-casing of null pointer in PyLong_FromVoidPtr. --- Objects/longobject.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'Objects') diff --git a/Objects/longobject.c b/Objects/longobject.c index 95bd83625c..4024491f13 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -936,9 +936,6 @@ PyObject * PyLong_FromVoidPtr(void *p) { #if SIZEOF_VOID_P <= SIZEOF_LONG - /* special-case null pointer */ - if (!p) - return PyLong_FromLong(0); return PyLong_FromUnsignedLong((unsigned long)(Py_uintptr_t)p); #else @@ -948,9 +945,6 @@ PyLong_FromVoidPtr(void *p) #if SIZEOF_LONG_LONG < SIZEOF_VOID_P # error "PyLong_FromVoidPtr: sizeof(PY_LONG_LONG) < sizeof(void*)" #endif - /* special-case null pointer */ - if (!p) - return PyLong_FromLong(0); return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)(Py_uintptr_t)p); #endif /* SIZEOF_VOID_P <= SIZEOF_LONG */ -- cgit v1.2.1 From 29f5c13873218430f1a24f4405040038895bf2d4 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 20 Oct 2012 23:08:34 +0200 Subject: Issue #12805: Make bytes.join and bytearray.join faster when the separator is empty. Patch by Serhiy Storchaka. --- Objects/stringlib/join.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'Objects') diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h index 21753cb504..d1d6e532c5 100644 --- a/Objects/stringlib/join.h +++ b/Objects/stringlib/join.h @@ -94,6 +94,16 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) /* Catenate everything. */ p = STRINGLIB_STR(res); + if (!seplen) { + /* fast path */ + for (i = 0; i < nbufs; i++) { + Py_ssize_t n = buffers[i].len; + char *q = buffers[i].buf; + Py_MEMCPY(p, q, n); + p += n; + } + goto done; + } for (i = 0; i < nbufs; i++) { Py_ssize_t n; char *q; -- cgit v1.2.1 From 346d8350eda74f01c7a3901d1e7c019d2f4f5a17 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 23 Oct 2012 02:48:49 +0200 Subject: Optimize PyUnicode_RichCompare() for Py_EQ and Py_NE: always use memcmp() --- Objects/unicodeobject.c | 70 +++++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 26 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9461563f99..f569a83d15 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10296,6 +10296,32 @@ unicode_compare(PyObject *str1, PyObject *str2) return 1; } +static int +unicode_compare_eq(PyObject *str1, PyObject *str2) +{ + int kind; + void *data1, *data2; + Py_ssize_t len; + int cmp; + + /* a string is equal to itself */ + if (str1 == str2) + return 1; + + len = PyUnicode_GET_LENGTH(str1); + if (PyUnicode_GET_LENGTH(str2) != len) + return 0; + kind = PyUnicode_KIND(str1); + if (PyUnicode_KIND(str2) != kind) + return 0; + data1 = PyUnicode_DATA(str1); + data2 = PyUnicode_DATA(str2); + + cmp = memcmp(data1, data2, len * kind); + return (cmp == 0); +} + + int PyUnicode_Compare(PyObject *left, PyObject *right) { @@ -10346,33 +10372,27 @@ PyObject * PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) { int result; + PyObject *v; - if (PyUnicode_Check(left) && PyUnicode_Check(right)) { - PyObject *v; - if (PyUnicode_READY(left) == -1 || - PyUnicode_READY(right) == -1) - return NULL; - if (PyUnicode_GET_LENGTH(left) != PyUnicode_GET_LENGTH(right) || - PyUnicode_KIND(left) != PyUnicode_KIND(right)) { - if (op == Py_EQ) { - Py_INCREF(Py_False); - return Py_False; - } - if (op == Py_NE) { - Py_INCREF(Py_True); - return Py_True; - } - } + if (!PyUnicode_Check(left) || !PyUnicode_Check(right)) + Py_RETURN_NOTIMPLEMENTED; + + if (PyUnicode_READY(left) == -1 || + PyUnicode_READY(right) == -1) + return NULL; + + if (op == Py_EQ || op == Py_NE) { + result = unicode_compare_eq(left, right); + if (op == Py_EQ) + v = TEST_COND(result); + else + v = TEST_COND(!result); + } + else { result = unicode_compare(left, right); /* Convert the return value to a Boolean */ switch (op) { - case Py_EQ: - v = TEST_COND(result == 0); - break; - case Py_NE: - v = TEST_COND(result != 0); - break; case Py_LE: v = TEST_COND(result <= 0); break; @@ -10389,11 +10409,9 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) PyErr_BadArgument(); return NULL; } - Py_INCREF(v); - return v; } - - Py_RETURN_NOTIMPLEMENTED; + Py_INCREF(v); + return v; } int -- cgit v1.2.1 From 8df489f6ea43093ddf972dc5c67bae30af0590e4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 23 Oct 2012 02:51:50 +0200 Subject: Inline raise_translate_exception(): it is only used once --- Objects/unicodeobject.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f569a83d15..362f2cffcf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8064,19 +8064,6 @@ make_translate_exception(PyObject **exceptionObject, } } -/* raises a UnicodeTranslateError */ -static void -raise_translate_exception(PyObject **exceptionObject, - PyObject *unicode, - Py_ssize_t startpos, Py_ssize_t endpos, - const char *reason) -{ - make_translate_exception(exceptionObject, - unicode, startpos, endpos, reason); - if (*exceptionObject != NULL) - PyCodec_StrictErrors(*exceptionObject); -} - /* error handling callback helper: build arguments, call the callback and check the arguments, put the result into newpos and return the replacement string, which @@ -8352,8 +8339,10 @@ _PyUnicode_TranslateCharmap(PyObject *input, } switch (known_errorHandler) { case 1: /* strict */ - raise_translate_exception(&exc, input, collstart, - collend, reason); + make_translate_exception(&exc, + input, collstart, collend, reason); + if (exc != NULL) + PyCodec_StrictErrors(exc); goto onError; case 2: /* replace */ /* No need to check for space, this is a 1:1 replacement */ -- cgit v1.2.1 From c2571a28fabfafc0e82df987708ebbd3925b2bdd Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 23 Oct 2012 02:52:18 +0200 Subject: Optimize _PyUnicode_HasNULChars(): use findchar() instead of PyUnicode_Contains() --- Objects/unicodeobject.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 362f2cffcf..a7efb01a04 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3573,18 +3573,20 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) int -_PyUnicode_HasNULChars(PyObject* s) +_PyUnicode_HasNULChars(PyObject* str) { - static PyObject *nul = NULL; + Py_ssize_t pos; - if (nul == NULL) - nul = PyUnicode_FromStringAndSize("\0", 1); - if (nul == NULL) + if (PyUnicode_READY(str) == -1) return -1; - return PyUnicode_Contains(s, nul); + pos = findchar(PyUnicode_DATA(str), PyUnicode_KIND(str), + PyUnicode_GET_LENGTH(str), '\0', 1); + if (pos == -1) + return 0; + else + return 1; } - int PyUnicode_FSConverter(PyObject* arg, void* addr) { -- cgit v1.2.1 From ebd36e8c987880114ad1f18eca8c16cb3fcf9195 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 23 Oct 2012 02:54:47 +0200 Subject: Replace usage of the deprecated Py_UNICODE_COPY() with Py_MEMCPY() in resize_copy() --- Objects/unicodeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a7efb01a04..0a3712e2f3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -812,8 +812,8 @@ resize_copy(PyObject *unicode, Py_ssize_t length) return NULL; copy_length = _PyUnicode_WSTR_LENGTH(unicode); copy_length = Py_MIN(copy_length, length); - Py_UNICODE_COPY(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode), - copy_length); + Py_MEMCPY(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode), + copy_length * sizeof(wchar_t)); return w; } } -- cgit v1.2.1 From 4d0ebf454346e70e5b7c7803ab7939f7c12a2b39 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 30 Oct 2012 01:42:39 +0100 Subject: Issue #16330: Use surrogate-related macros Patch written by Serhiy Storchaka. --- Objects/unicodeobject.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0a3712e2f3..3e2e8e3f26 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4412,7 +4412,7 @@ encode_char: /* code first surrogate */ base64bits += 16; - base64buffer = (base64buffer << 16) | 0xd800 | ((ch-0x10000) >> 10); + base64buffer = (base64buffer << 16) | Py_UNICODE_HIGH_SURROGATE(ch); while (base64bits >= 6) { *out++ = TO_BASE64(base64buffer >> (base64bits-6)); base64bits -= 6; @@ -7052,9 +7052,8 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes, charsize = 1; } else { - ch -= 0x10000; - chars[0] = 0xd800 + (ch >> 10); - chars[1] = 0xdc00 + (ch & 0x3ff); + chars[0] = Py_UNICODE_HIGH_SURROGATE(ch); + chars[1] = Py_UNICODE_LOW_SURROGATE(ch); charsize = 2; } -- cgit v1.2.1 From d4cf72a2401b4efd1c172d9dc23b434d7e32f8bb Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 30 Oct 2012 23:12:47 +0100 Subject: Close #14625: Rewrite the UTF-32 decoder. It is now 3x to 4x faster Patch written by Serhiy Storchaka. --- Objects/unicodeobject.c | 142 +++++++++++++++++++++++------------------------- 1 file changed, 69 insertions(+), 73 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3e2e8e3f26..0f5bdfcb36 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4804,14 +4804,8 @@ PyUnicode_DecodeUTF32Stateful(const char *s, Py_ssize_t outpos; PyObject *unicode; const unsigned char *q, *e; - int bo = 0; /* assume native ordering by default */ + int le, bo = 0; /* assume native ordering by default */ const char *errmsg = ""; - /* Offsets from q for retrieving bytes in the right order. */ -#if PY_LITTLE_ENDIAN - int iorder[] = {0, 1, 2, 3}; -#else - int iorder[] = {3, 2, 1, 0}; -#endif PyObject *errorHandler = NULL; PyObject *exc = NULL; @@ -4825,83 +4819,88 @@ PyUnicode_DecodeUTF32Stateful(const char *s, byte order setting accordingly. In native mode, the leading BOM mark is skipped, in all other modes, it is copied to the output stream as-is (giving a ZWNBSP character). */ - if (bo == 0) { - if (size >= 4) { - const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) | - (q[iorder[1]] << 8) | q[iorder[0]]; -#if PY_LITTLE_ENDIAN - if (bom == 0x0000FEFF) { - q += 4; - bo = -1; - } - else if (bom == 0xFFFE0000) { - q += 4; - bo = 1; - } -#else - if (bom == 0x0000FEFF) { - q += 4; - bo = 1; - } - else if (bom == 0xFFFE0000) { - q += 4; - bo = -1; - } -#endif + if (bo == 0 && size >= 4) { + Py_UCS4 bom = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0]; + if (bom == 0x0000FEFF) { + bo = -1; + q += 4; } + else if (bom == 0xFFFE0000) { + bo = 1; + q += 4; + } + if (byteorder) + *byteorder = bo; } - if (bo == -1) { - /* force LE */ - iorder[0] = 0; - iorder[1] = 1; - iorder[2] = 2; - iorder[3] = 3; - } - else if (bo == 1) { - /* force BE */ - iorder[0] = 3; - iorder[1] = 2; - iorder[2] = 1; - iorder[3] = 0; + if (q == e) { + if (consumed) + *consumed = size; + Py_INCREF(unicode_empty); + return unicode_empty; } - /* This might be one to much, because of a BOM */ - unicode = PyUnicode_New((size+3)/4, 127); +#ifdef WORDS_BIGENDIAN + le = bo < 0; +#else + le = bo <= 0; +#endif + + unicode = PyUnicode_New((e - q + 3) / 4, 127); if (!unicode) return NULL; - if (size == 0) - return unicode; + outpos = 0; + while (1) { + Py_UCS4 ch = 0; + Py_UCS4 maxch = PyUnicode_MAX_CHAR_VALUE(unicode); + + if (e - q >= 4) { + enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); + void *data = PyUnicode_DATA(unicode); + const unsigned char *last = e - 4; + if (le) { + do { + ch = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0]; + if (ch > maxch) + break; + PyUnicode_WRITE(kind, data, outpos++, ch); + q += 4; + } while (q <= last); + } + else { + do { + ch = (q[0] << 24) | (q[1] << 16) | (q[2] << 8) | q[3]; + if (ch > maxch) + break; + PyUnicode_WRITE(kind, data, outpos++, ch); + q += 4; + } while (q <= last); + } + } - while (q < e) { - Py_UCS4 ch; - /* remaining bytes at the end? (size should be divisible by 4) */ - if (e-q<4) { - if (consumed) + if (ch <= maxch) { + if (q == e || consumed) break; + /* remaining bytes at the end? (size should be divisible by 4) */ errmsg = "truncated data"; - startinpos = ((const char *)q)-starts; - endinpos = ((const char *)e)-starts; - goto utf32Error; - /* The remaining input chars are ignored if the callback - chooses to skip the input */ + startinpos = ((const char *)q) - starts; + endinpos = ((const char *)e) - starts; } - ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) | - (q[iorder[1]] << 8) | q[iorder[0]]; - - if (ch >= 0x110000) - { + else { + if (ch < 0x110000) { + if (unicode_putchar(&unicode, &outpos, ch) < 0) + goto onError; + q += 4; + continue; + } errmsg = "codepoint not in range(0x110000)"; - startinpos = ((const char *)q)-starts; - endinpos = startinpos+4; - goto utf32Error; + startinpos = ((const char *)q) - starts; + endinpos = startinpos + 4; } - if (unicode_putchar(&unicode, &outpos, ch) < 0) - goto onError; - q += 4; - continue; - utf32Error: + + /* The remaining input chars are ignored if the callback + chooses to skip the input */ if (unicode_decode_call_errorhandler( errors, &errorHandler, "utf32", errmsg, @@ -4910,9 +4909,6 @@ PyUnicode_DecodeUTF32Stateful(const char *s, goto onError; } - if (byteorder) - *byteorder = bo; - if (consumed) *consumed = (const char *)q-starts; -- cgit v1.2.1 From a452a3cd5532f2caa9b8b9a1e08c8fe3d87c33ba Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 30 Oct 2012 23:40:45 +0100 Subject: Issue #16086: PyTypeObject.tp_flags and PyType_Spec.flags are now unsigned ... (unsigned long and unsigned int) to avoid an undefined behaviour with Py_TPFLAGS_TYPE_SUBCLASS ((1 << 31). PyType_GetFlags() result type is now unsigned too (unsigned long, instead of long). --- Objects/typeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5d625a21ea..a790add61c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1931,7 +1931,7 @@ type_init(PyObject *cls, PyObject *args, PyObject *kwds) return res; } -long +unsigned long PyType_GetFlags(PyTypeObject *type) { return type->tp_flags; -- cgit v1.2.1 From 5e2e341fc47df02410ecbd72a449e6be16a7e0bd Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 31 Oct 2012 00:37:41 +0100 Subject: Issue #9566: Use the right type to fix a compiler warnings on Win64 --- Objects/frameobject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 808e595157..6fff370bba 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -466,7 +466,7 @@ static int frame_traverse(PyFrameObject *f, visitproc visit, void *arg) { PyObject **fastlocals, **p; - int i, slots; + Py_ssize_t i, slots; Py_VISIT(f->f_back); Py_VISIT(f->f_code); @@ -496,7 +496,7 @@ static void frame_clear(PyFrameObject *f) { PyObject **fastlocals, **p, **oldtop; - int i, slots; + Py_ssize_t i, slots; /* Before anything else, make sure that this frame is clearly marked * as being defunct! Else, e.g., a generator reachable from this @@ -848,7 +848,7 @@ PyFrame_FastToLocals(PyFrameObject *f) PyObject *error_type, *error_value, *error_traceback; PyCodeObject *co; Py_ssize_t j; - int ncells, nfreevars; + Py_ssize_t ncells, nfreevars; if (f == NULL) return; locals = f->f_locals; @@ -900,7 +900,7 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear) PyObject *error_type, *error_value, *error_traceback; PyCodeObject *co; Py_ssize_t j; - int ncells, nfreevars; + Py_ssize_t ncells, nfreevars; if (f == NULL) return; locals = f->f_locals; -- cgit v1.2.1 From 3e2d4b8800fb0cd0afe4da7ca6537f8f23847879 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Tue, 30 Oct 2012 23:41:54 -0400 Subject: initialize map/filter/zip in _PyBuiltin_Init rather than the catch-all function --- Objects/object.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index b4bc96dc15..fd1fd256ba 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1708,15 +1708,6 @@ _Py_ReadyTypes(void) if (PyType_Ready(&PyMemberDescr_Type) < 0) Py_FatalError("Can't initialize member descriptor type"); - if (PyType_Ready(&PyFilter_Type) < 0) - Py_FatalError("Can't initialize filter type"); - - if (PyType_Ready(&PyMap_Type) < 0) - Py_FatalError("Can't initialize map type"); - - if (PyType_Ready(&PyZip_Type) < 0) - Py_FatalError("Can't initialize zip type"); - if (PyType_Ready(&_PyNamespace_Type) < 0) Py_FatalError("Can't initialize namespace type"); -- cgit v1.2.1 From 70dc0c824c5f127870b8b00df1616536283d0120 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 7 Nov 2012 00:36:38 +0100 Subject: Close #16311: Use the _PyUnicodeWriter API in text decoders * Remove unicode_widen(): replaced with _PyUnicodeWriter_Prepare() * Remove unicode_putchar(): replaced with PyUnicodeWriter_Prepare() + PyUnicode_WRITER() * When handling an decoding error, only overallocate the buffer by +25% instead of +100% --- Objects/unicodeobject.c | 652 ++++++++++++++++++++++++------------------------ 1 file changed, 328 insertions(+), 324 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3b4bee26f8..6a30e8df60 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1644,38 +1644,6 @@ PyUnicode_Resize(PyObject **p_unicode, Py_ssize_t length) return unicode_resize(p_unicode, length); } -static int -unicode_widen(PyObject **p_unicode, Py_ssize_t length, - unsigned int maxchar) -{ - PyObject *result; - assert(PyUnicode_IS_READY(*p_unicode)); - assert(length <= PyUnicode_GET_LENGTH(*p_unicode)); - if (maxchar <= PyUnicode_MAX_CHAR_VALUE(*p_unicode)) - return 0; - result = PyUnicode_New(PyUnicode_GET_LENGTH(*p_unicode), - maxchar); - if (result == NULL) - return -1; - _PyUnicode_FastCopyCharacters(result, 0, *p_unicode, 0, length); - Py_DECREF(*p_unicode); - *p_unicode = result; - return 0; -} - -static int -unicode_putchar(PyObject **p_unicode, Py_ssize_t *pos, - Py_UCS4 ch) -{ - assert(ch <= MAX_UNICODE); - if (unicode_widen(p_unicode, *pos, ch) < 0) - return -1; - PyUnicode_WRITE(PyUnicode_KIND(*p_unicode), - PyUnicode_DATA(*p_unicode), - (*pos)++, ch); - return 0; -} - /* Copy a ASCII or latin1 char* string into a Python Unicode string. WARNING: The function doesn't copy the terminating null character and @@ -3925,6 +3893,7 @@ onError: *exceptionObject = NULL; } +#ifdef HAVE_MBCS /* error handling callback helper: build arguments, call the callback and check the arguments, if no exception occurred, copy the replacement to the output @@ -3933,11 +3902,12 @@ onError: */ static int -unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, - const char *encoding, const char *reason, - const char **input, const char **inend, Py_ssize_t *startinpos, - Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, - PyObject **output, Py_ssize_t *outpos) +unicode_decode_call_errorhandler_wchar( + const char *errors, PyObject **errorHandler, + const char *encoding, const char *reason, + const char **input, const char **inend, Py_ssize_t *startinpos, + Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, + PyObject **output, Py_ssize_t *outpos) { static char *argparse = "O!n;decoding error handler must return (str, int) tuple"; @@ -3948,12 +3918,11 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, Py_ssize_t requiredsize; Py_ssize_t newpos; PyObject *inputobj = NULL; - int res = -1; + wchar_t *repwstr; + Py_ssize_t repwlen; - if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) - outsize = PyUnicode_GET_LENGTH(*output); - else - outsize = _PyUnicode_WSTR_LENGTH(*output); + assert (_PyUnicode_KIND(*output) == PyUnicode_WCHAR_KIND); + outsize = _PyUnicode_WSTR_LENGTH(*output); if (*errorHandler == NULL) { *errorHandler = PyCodec_LookupError(errors); @@ -3978,8 +3947,6 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, } if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos)) goto onError; - if (PyUnicode_READY(repunicode) == -1) - goto onError; /* Copy back the bytes variables, which might have been modified by the callback */ @@ -4003,54 +3970,112 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, goto onError; } - if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) { - /* need more space? (at least enough for what we - have+the replacement+the rest of the string (starting - at the new input position), so we won't have to check space - when there are no errors in the rest of the string) */ - Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode); - requiredsize = *outpos + replen + insize-newpos; - if (requiredsize > outsize) { - if (requiredsize<2*outsize) - requiredsize = 2*outsize; - if (unicode_resize(output, requiredsize) < 0) - goto onError; - } - if (unicode_widen(output, *outpos, - PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0) + repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen); + if (repwstr == NULL) + goto onError; + /* need more space? (at least enough for what we + have+the replacement+the rest of the string (starting + at the new input position), so we won't have to check space + when there are no errors in the rest of the string) */ + requiredsize = *outpos + repwlen + insize-newpos; + if (requiredsize > outsize) { + if (requiredsize < 2*outsize) + requiredsize = 2*outsize; + if (unicode_resize(output, requiredsize) < 0) goto onError; - _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen); - *outpos += replen; } - else { - wchar_t *repwstr; - Py_ssize_t repwlen; - repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen); - if (repwstr == NULL) + wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen); + *outpos += repwlen; + + *endinpos = newpos; + *inptr = *input + newpos; + + /* we made it! */ + Py_XDECREF(restuple); + return 0; + + onError: + Py_XDECREF(restuple); + return -1; +} +#endif /* HAVE_MBCS */ + +static int +unicode_decode_call_errorhandler_writer( + const char *errors, PyObject **errorHandler, + const char *encoding, const char *reason, + const char **input, const char **inend, Py_ssize_t *startinpos, + Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, + _PyUnicodeWriter *writer /* PyObject **output, Py_ssize_t *outpos */) +{ + static char *argparse = "O!n;decoding error handler must return (str, int) tuple"; + + PyObject *restuple = NULL; + PyObject *repunicode = NULL; + Py_ssize_t insize; + Py_ssize_t newpos; + PyObject *inputobj = NULL; + + if (*errorHandler == NULL) { + *errorHandler = PyCodec_LookupError(errors); + if (*errorHandler == NULL) goto onError; - /* need more space? (at least enough for what we - have+the replacement+the rest of the string (starting - at the new input position), so we won't have to check space - when there are no errors in the rest of the string) */ - requiredsize = *outpos + repwlen + insize-newpos; - if (requiredsize > outsize) { - if (requiredsize < 2*outsize) - requiredsize = 2*outsize; - if (unicode_resize(output, requiredsize) < 0) - goto onError; - } - wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen); - *outpos += repwlen; } + + make_decode_exception(exceptionObject, + encoding, + *input, *inend - *input, + *startinpos, *endinpos, + reason); + if (*exceptionObject == NULL) + goto onError; + + restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL); + if (restuple == NULL) + goto onError; + if (!PyTuple_Check(restuple)) { + PyErr_SetString(PyExc_TypeError, &argparse[4]); + goto onError; + } + if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos)) + goto onError; + + /* Copy back the bytes variables, which might have been modified by the + callback */ + inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject); + if (!inputobj) + goto onError; + if (!PyBytes_Check(inputobj)) { + PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes"); + } + *input = PyBytes_AS_STRING(inputobj); + insize = PyBytes_GET_SIZE(inputobj); + *inend = *input + insize; + /* we can DECREF safely, as the exception has another reference, + so the object won't go away. */ + Py_DECREF(inputobj); + + if (newpos<0) + newpos = insize+newpos; + if (newpos<0 || newpos>insize) { + PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos); + goto onError; + } + + writer->overallocate = 1; + if (_PyUnicodeWriter_WriteStr(writer, repunicode) == -1) + return + *endinpos = newpos; *inptr = *input + newpos; /* we made it! */ - res = 0; + Py_XDECREF(restuple); + return 0; onError: Py_XDECREF(restuple); - return res; + return -1; } /* --- UTF-7 Codec -------------------------------------------------------- */ @@ -4158,9 +4183,8 @@ PyUnicode_DecodeUTF7Stateful(const char *s, const char *starts = s; Py_ssize_t startinpos; Py_ssize_t endinpos; - Py_ssize_t outpos; const char *e; - PyObject *unicode; + _PyUnicodeWriter writer; const char *errmsg = ""; int inShift = 0; Py_ssize_t shiftOutStart; @@ -4170,17 +4194,19 @@ PyUnicode_DecodeUTF7Stateful(const char *s, PyObject *errorHandler = NULL; PyObject *exc = NULL; - /* Start off assuming it's all ASCII. Widen later as necessary. */ - unicode = PyUnicode_New(size, 127); - if (!unicode) - return NULL; if (size == 0) { if (consumed) *consumed = 0; - return unicode; + Py_INCREF(unicode_empty); + return unicode_empty; } - shiftOutStart = outpos = 0; + /* Start off assuming it's all ASCII. Widen later as necessary. */ + _PyUnicodeWriter_Init(&writer, 0); + if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) + goto onError; + + shiftOutStart = 0; e = s + size; while (s < e) { @@ -4202,14 +4228,18 @@ PyUnicode_DecodeUTF7Stateful(const char *s, /* expecting a second surrogate */ if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) { Py_UCS4 ch2 = Py_UNICODE_JOIN_SURROGATES(surrogate, outCh); - if (unicode_putchar(&unicode, &outpos, ch2) < 0) + if (_PyUnicodeWriter_Prepare(&writer, 1, ch2) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch2); + writer.pos++; surrogate = 0; continue; } else { - if (unicode_putchar(&unicode, &outpos, surrogate) < 0) + if (_PyUnicodeWriter_Prepare(&writer, 1, surrogate) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, surrogate); + writer.pos++; surrogate = 0; } } @@ -4218,8 +4248,10 @@ PyUnicode_DecodeUTF7Stateful(const char *s, surrogate = outCh; } else { - if (unicode_putchar(&unicode, &outpos, outCh) < 0) + if (_PyUnicodeWriter_Prepare(&writer, 1, outCh) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, outCh); + writer.pos++; } } } @@ -4227,8 +4259,10 @@ PyUnicode_DecodeUTF7Stateful(const char *s, inShift = 0; s++; if (surrogate) { - if (unicode_putchar(&unicode, &outpos, surrogate) < 0) + if (_PyUnicodeWriter_Prepare(&writer, 1, surrogate) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, surrogate); + writer.pos++; surrogate = 0; } if (base64bits > 0) { /* left-over bits */ @@ -4248,8 +4282,10 @@ PyUnicode_DecodeUTF7Stateful(const char *s, if (ch != '-') { /* '-' is absorbed; other terminating characters are preserved */ - if (unicode_putchar(&unicode, &outpos, ch) < 0) + if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); + writer.pos++; } } } @@ -4258,19 +4294,23 @@ PyUnicode_DecodeUTF7Stateful(const char *s, s++; /* consume '+' */ if (s < e && *s == '-') { /* '+-' encodes '+' */ s++; - if (unicode_putchar(&unicode, &outpos, '+') < 0) + if (_PyUnicodeWriter_Prepare(&writer, 1, '+') == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '+'); + writer.pos++; } else { /* begin base64-encoded section */ inShift = 1; - shiftOutStart = outpos; + shiftOutStart = writer.pos; base64bits = 0; } } else if (DECODE_DIRECT(ch)) { /* character decodes as itself */ - if (unicode_putchar(&unicode, &outpos, ch) < 0) - goto onError; s++; + if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) + goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); + writer.pos++; } else { startinpos = s-starts; @@ -4281,11 +4321,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s, continue; utf7Error: endinpos = s-starts; - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "utf7", errmsg, &starts, &e, &startinpos, &endinpos, &exc, &s, - &unicode, &outpos)) + &writer)) goto onError; } @@ -4297,11 +4337,11 @@ utf7Error: (base64bits >= 6) || (base64bits > 0 && base64buffer != 0)) { endinpos = size; - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "utf7", "unterminated shift sequence", &starts, &e, &startinpos, &endinpos, &exc, &s, - &unicode, &outpos)) + &writer)) goto onError; if (s < e) goto restart; @@ -4311,7 +4351,7 @@ utf7Error: /* return state */ if (consumed) { if (inShift) { - outpos = shiftOutStart; /* back off output */ + writer.pos = shiftOutStart; /* back off output */ *consumed = startinpos; } else { @@ -4319,17 +4359,14 @@ utf7Error: } } - if (unicode_resize(&unicode, outpos) < 0) - goto onError; - Py_XDECREF(errorHandler); Py_XDECREF(exc); - return unicode_result(unicode); + return _PyUnicodeWriter_Finish(&writer); onError: Py_XDECREF(errorHandler); Py_XDECREF(exc); - Py_DECREF(unicode); + _PyUnicodeWriter_Dealloc(&writer); return NULL; } @@ -4555,10 +4592,9 @@ PyUnicode_DecodeUTF8Stateful(const char *s, const char *errors, Py_ssize_t *consumed) { - PyObject *unicode; + _PyUnicodeWriter writer; const char *starts = s; const char *end = s + size; - Py_ssize_t outpos; Py_ssize_t startinpos; Py_ssize_t endinpos; @@ -4580,29 +4616,25 @@ PyUnicode_DecodeUTF8Stateful(const char *s, return get_latin1_char((unsigned char)s[0]); } - unicode = PyUnicode_New(size, 127); - if (!unicode) - return NULL; + _PyUnicodeWriter_Init(&writer, 0); + if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) + goto onError; - outpos = ascii_decode(s, end, PyUnicode_1BYTE_DATA(unicode)); - s += outpos; + writer.pos = ascii_decode(s, end, writer.data); + s += writer.pos; while (s < end) { Py_UCS4 ch; - int kind = PyUnicode_KIND(unicode); + int kind = writer.kind; if (kind == PyUnicode_1BYTE_KIND) { - if (PyUnicode_IS_ASCII(unicode)) - ch = asciilib_utf8_decode(&s, end, - PyUnicode_1BYTE_DATA(unicode), &outpos); + if (PyUnicode_IS_ASCII(writer.buffer)) + ch = asciilib_utf8_decode(&s, end, writer.data, &writer.pos); else - ch = ucs1lib_utf8_decode(&s, end, - PyUnicode_1BYTE_DATA(unicode), &outpos); + ch = ucs1lib_utf8_decode(&s, end, writer.data, &writer.pos); } else if (kind == PyUnicode_2BYTE_KIND) { - ch = ucs2lib_utf8_decode(&s, end, - PyUnicode_2BYTE_DATA(unicode), &outpos); + ch = ucs2lib_utf8_decode(&s, end, writer.data, &writer.pos); } else { assert(kind == PyUnicode_4BYTE_KIND); - ch = ucs4lib_utf8_decode(&s, end, - PyUnicode_4BYTE_DATA(unicode), &outpos); + ch = ucs4lib_utf8_decode(&s, end, writer.data, &writer.pos); } switch (ch) { @@ -4626,35 +4658,33 @@ PyUnicode_DecodeUTF8Stateful(const char *s, endinpos = startinpos + ch - 1; break; default: - if (unicode_putchar(&unicode, &outpos, ch) < 0) + if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); + writer.pos++; continue; } - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "utf-8", errmsg, &starts, &end, &startinpos, &endinpos, &exc, &s, - &unicode, &outpos)) + &writer)) goto onError; } End: - if (unicode_resize(&unicode, outpos) < 0) - goto onError; - if (consumed) *consumed = s - starts; Py_XDECREF(errorHandler); Py_XDECREF(exc); - assert(_PyUnicode_CheckConsistency(unicode, 1)); - return unicode; + return _PyUnicodeWriter_Finish(&writer); onError: Py_XDECREF(errorHandler); Py_XDECREF(exc); - Py_XDECREF(unicode); + _PyUnicodeWriter_Dealloc(&writer); return NULL; } @@ -4799,8 +4829,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, const char *starts = s; Py_ssize_t startinpos; Py_ssize_t endinpos; - Py_ssize_t outpos; - PyObject *unicode; + _PyUnicodeWriter writer; const unsigned char *q, *e; int le, bo = 0; /* assume native ordering by default */ const char *errmsg = ""; @@ -4844,25 +4873,25 @@ PyUnicode_DecodeUTF32Stateful(const char *s, le = bo <= 0; #endif - unicode = PyUnicode_New((e - q + 3) / 4, 127); - if (!unicode) - return NULL; + _PyUnicodeWriter_Init(&writer, 0); + if (_PyUnicodeWriter_Prepare(&writer, (e - q + 3) / 4, 127) == -1) + goto onError; - outpos = 0; while (1) { Py_UCS4 ch = 0; - Py_UCS4 maxch = PyUnicode_MAX_CHAR_VALUE(unicode); + Py_UCS4 maxch = PyUnicode_MAX_CHAR_VALUE(writer.buffer); if (e - q >= 4) { - enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); - void *data = PyUnicode_DATA(unicode); + enum PyUnicode_Kind kind = writer.kind; + void *data = writer.data; const unsigned char *last = e - 4; + Py_ssize_t pos = writer.pos; if (le) { do { ch = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0]; if (ch > maxch) break; - PyUnicode_WRITE(kind, data, outpos++, ch); + PyUnicode_WRITE(kind, data, pos++, ch); q += 4; } while (q <= last); } @@ -4871,10 +4900,11 @@ PyUnicode_DecodeUTF32Stateful(const char *s, ch = (q[0] << 24) | (q[1] << 16) | (q[2] << 8) | q[3]; if (ch > maxch) break; - PyUnicode_WRITE(kind, data, outpos++, ch); + PyUnicode_WRITE(kind, data, pos++, ch); q += 4; } while (q <= last); } + writer.pos = pos; } if (ch <= maxch) { @@ -4887,8 +4917,10 @@ PyUnicode_DecodeUTF32Stateful(const char *s, } else { if (ch < 0x110000) { - if (unicode_putchar(&unicode, &outpos, ch) < 0) + if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); + writer.pos++; q += 4; continue; } @@ -4899,27 +4931,23 @@ PyUnicode_DecodeUTF32Stateful(const char *s, /* The remaining input chars are ignored if the callback chooses to skip the input */ - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "utf32", errmsg, &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q, - &unicode, &outpos)) + &writer)) goto onError; } if (consumed) *consumed = (const char *)q-starts; - /* Adjust length */ - if (unicode_resize(&unicode, outpos) < 0) - goto onError; - Py_XDECREF(errorHandler); Py_XDECREF(exc); - return unicode_result(unicode); + return _PyUnicodeWriter_Finish(&writer); onError: - Py_DECREF(unicode); + _PyUnicodeWriter_Dealloc(&writer); Py_XDECREF(errorHandler); Py_XDECREF(exc); return NULL; @@ -5040,8 +5068,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s, const char *starts = s; Py_ssize_t startinpos; Py_ssize_t endinpos; - Py_ssize_t outpos; - PyObject *unicode; + _PyUnicodeWriter writer; const unsigned char *q, *e; int bo = 0; /* assume native ordering by default */ int native_ordering; @@ -5088,32 +5115,31 @@ PyUnicode_DecodeUTF16Stateful(const char *s, /* Note: size will always be longer than the resulting Unicode character count */ - unicode = PyUnicode_New((e - q + 1) / 2, 127); - if (!unicode) - return NULL; + _PyUnicodeWriter_Init(&writer, 0); + if (_PyUnicodeWriter_Prepare(&writer, (e - q + 1) / 2, 127) == -1) + goto onError; - outpos = 0; while (1) { Py_UCS4 ch = 0; if (e - q >= 2) { - int kind = PyUnicode_KIND(unicode); + int kind = writer.kind; if (kind == PyUnicode_1BYTE_KIND) { - if (PyUnicode_IS_ASCII(unicode)) + if (PyUnicode_IS_ASCII(writer.buffer)) ch = asciilib_utf16_decode(&q, e, - PyUnicode_1BYTE_DATA(unicode), &outpos, + (Py_UCS1*)writer.data, &writer.pos, native_ordering); else ch = ucs1lib_utf16_decode(&q, e, - PyUnicode_1BYTE_DATA(unicode), &outpos, + (Py_UCS1*)writer.data, &writer.pos, native_ordering); } else if (kind == PyUnicode_2BYTE_KIND) { ch = ucs2lib_utf16_decode(&q, e, - PyUnicode_2BYTE_DATA(unicode), &outpos, + (Py_UCS2*)writer.data, &writer.pos, native_ordering); } else { assert(kind == PyUnicode_4BYTE_KIND); ch = ucs4lib_utf16_decode(&q, e, - PyUnicode_4BYTE_DATA(unicode), &outpos, + (Py_UCS4*)writer.data, &writer.pos, native_ordering); } } @@ -5146,12 +5172,14 @@ PyUnicode_DecodeUTF16Stateful(const char *s, endinpos = startinpos + 2; break; default: - if (unicode_putchar(&unicode, &outpos, ch) < 0) + if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); + writer.pos++; continue; } - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "utf16", errmsg, @@ -5161,8 +5189,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s, &endinpos, &exc, (const char **)&q, - &unicode, - &outpos)) + &writer)) goto onError; } @@ -5170,16 +5197,12 @@ End: if (consumed) *consumed = (const char *)q-starts; - /* Adjust length */ - if (unicode_resize(&unicode, outpos) < 0) - goto onError; - Py_XDECREF(errorHandler); Py_XDECREF(exc); - return unicode_result(unicode); + return _PyUnicodeWriter_Finish(&writer); onError: - Py_DECREF(unicode); + _PyUnicodeWriter_Dealloc(&writer); Py_XDECREF(errorHandler); Py_XDECREF(exc); return NULL; @@ -5346,27 +5369,30 @@ PyUnicode_DecodeUnicodeEscape(const char *s, Py_ssize_t startinpos; Py_ssize_t endinpos; int j; - PyObject *v; + _PyUnicodeWriter writer; const char *end; char* message; Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */ PyObject *errorHandler = NULL; PyObject *exc = NULL; Py_ssize_t len; - Py_ssize_t i; len = length_of_escaped_ascii_string(s, size); + if (len == 0) { + Py_INCREF(unicode_empty); + return unicode_empty; + } /* After length_of_escaped_ascii_string() there are two alternatives, either the string is pure ASCII with named escapes like \n, etc. and we determined it's exact size (common case) or it contains \x, \u, ... escape sequences. then we create a legacy wchar string and resize it at the end of this function. */ - if (len >= 0) { - v = PyUnicode_New(len, 127); - if (!v) + _PyUnicodeWriter_Init(&writer, 0); + if (len > 0) { + if (_PyUnicodeWriter_Prepare(&writer, len, 127) == -1) goto onError; - assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); + assert(writer.kind == PyUnicode_1BYTE_KIND); } else { /* Escaped strings will always be longer than the resulting @@ -5374,15 +5400,12 @@ PyUnicode_DecodeUnicodeEscape(const char *s, length after conversion to the true value. (but if the error callback returns a long replacement string we'll have to allocate more space) */ - v = PyUnicode_New(size, 127); - if (!v) + if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) goto onError; - len = size; } if (size == 0) - return v; - i = 0; + return _PyUnicodeWriter_Finish(&writer); end = s + size; while (s < end) { @@ -5390,14 +5413,14 @@ PyUnicode_DecodeUnicodeEscape(const char *s, Py_UCS4 x; int digits; - /* The only case in which i == ascii_length is a backslash - followed by a newline. */ - assert(i <= len); - /* Non-escape characters are interpreted as Unicode ordinals */ if (*s != '\\') { - if (unicode_putchar(&v, &i, (unsigned char) *s++) < 0) + x = (unsigned char)*s; + s++; + if (_PyUnicodeWriter_Prepare(&writer, 1, x) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, x); + writer.pos++; continue; } @@ -5410,16 +5433,18 @@ PyUnicode_DecodeUnicodeEscape(const char *s, /* The only case in which i == ascii_length is a backslash followed by a newline. */ - assert(i < len || (i == len && c == '\n')); + assert(writer.pos < writer.size || (writer.pos == writer.size && c == '\n')); switch (c) { /* \x escapes */ -#define WRITECHAR(ch) \ - do { \ - if (unicode_putchar(&v, &i, ch) < 0) \ - goto onError; \ - }while(0) +#define WRITECHAR(ch) \ + do { \ + if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) \ + goto onError; \ + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); \ + writer.pos++; \ + } while(0) case '\n': break; case '\\': WRITECHAR('\\'); break; @@ -5469,11 +5494,11 @@ PyUnicode_DecodeUnicodeEscape(const char *s, chr = 0; if (s+digits>end) { endinpos = size; - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "unicodeescape", "end of string in escape sequence", &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) + &writer)) goto onError; goto nextByte; } @@ -5481,13 +5506,12 @@ PyUnicode_DecodeUnicodeEscape(const char *s, c = (unsigned char) s[j]; if (!Py_ISXDIGIT(c)) { endinpos = (s+j+1)-starts; - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "unicodeescape", message, &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) + &writer)) goto onError; - len = PyUnicode_GET_LENGTH(v); goto nextByte; } chr = (chr<<4) & ~0xF; @@ -5509,11 +5533,11 @@ PyUnicode_DecodeUnicodeEscape(const char *s, WRITECHAR(chr); } else { endinpos = s-starts; - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "unicodeescape", "illegal Unicode character", &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) + &writer)) goto onError; } break; @@ -5543,11 +5567,11 @@ PyUnicode_DecodeUnicodeEscape(const char *s, } } endinpos = s-starts; - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "unicodeescape", message, &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) + &writer)) goto onError; break; @@ -5556,11 +5580,11 @@ PyUnicode_DecodeUnicodeEscape(const char *s, message = "\\ at end of string"; s--; endinpos = s-starts; - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "unicodeescape", message, &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) + &writer)) goto onError; } else { @@ -5574,24 +5598,22 @@ PyUnicode_DecodeUnicodeEscape(const char *s, } #undef WRITECHAR - if (unicode_resize(&v, i) < 0) - goto onError; Py_XDECREF(errorHandler); Py_XDECREF(exc); - return unicode_result(v); + return _PyUnicodeWriter_Finish(&writer); ucnhashError: PyErr_SetString( PyExc_UnicodeError, "\\N escapes not supported (can't load unicodedata module)" ); - Py_XDECREF(v); + _PyUnicodeWriter_Dealloc(&writer); Py_XDECREF(errorHandler); Py_XDECREF(exc); return NULL; onError: - Py_XDECREF(v); + _PyUnicodeWriter_Dealloc(&writer); Py_XDECREF(errorHandler); Py_XDECREF(exc); return NULL; @@ -5744,23 +5766,25 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s, const char *starts = s; Py_ssize_t startinpos; Py_ssize_t endinpos; - Py_ssize_t outpos; - PyObject *v; + _PyUnicodeWriter writer; const char *end; const char *bs; PyObject *errorHandler = NULL; PyObject *exc = NULL; + if (size == 0) { + Py_INCREF(unicode_empty); + return unicode_empty; + } + /* Escaped strings will always be longer than the resulting Unicode string, so we start with size here and then reduce the length after conversion to the true value. (But decoding error handler might have to resize the string) */ - v = PyUnicode_New(size, 127); - if (v == NULL) + _PyUnicodeWriter_Init(&writer, 1); + if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) goto onError; - if (size == 0) - return v; - outpos = 0; + end = s + size; while (s < end) { unsigned char c; @@ -5770,8 +5794,11 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s, /* Non-escape characters are interpreted as Unicode ordinals */ if (*s != '\\') { - if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0) + x = (unsigned char)*s++; + if (_PyUnicodeWriter_Prepare(&writer, 1, x) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, x); + writer.pos++; continue; } startinpos = s-starts; @@ -5782,15 +5809,18 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s, for (;s < end;) { if (*s != '\\') break; - if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0) + x = (unsigned char)*s++; + if (_PyUnicodeWriter_Prepare(&writer, 1, x) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, x); + writer.pos++; } if (((s - bs) & 1) == 0 || s >= end || (*s != 'u' && *s != 'U')) { continue; } - outpos--; + writer.pos--; count = *s=='u' ? 4 : 8; s++; @@ -5799,11 +5829,11 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s, c = (unsigned char)*s; if (!Py_ISXDIGIT(c)) { endinpos = s-starts; - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "rawunicodeescape", "truncated \\uXXXX", &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) + &writer)) goto onError; goto nextByte; } @@ -5816,28 +5846,29 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s, x += 10 + c - 'A'; } if (x <= MAX_UNICODE) { - if (unicode_putchar(&v, &outpos, x) < 0) + if (_PyUnicodeWriter_Prepare(&writer, 1, x) == -1) goto onError; - } else { + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, x); + writer.pos++; + } + else { endinpos = s-starts; - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "rawunicodeescape", "\\Uxxxxxxxx out of range", &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) + &writer)) goto onError; } nextByte: ; } - if (unicode_resize(&v, outpos) < 0) - goto onError; Py_XDECREF(errorHandler); Py_XDECREF(exc); - return unicode_result(v); + return _PyUnicodeWriter_Finish(&writer); onError: - Py_XDECREF(v); + _PyUnicodeWriter_Dealloc(&writer); Py_XDECREF(errorHandler); Py_XDECREF(exc); return NULL; @@ -5937,8 +5968,7 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, const char *starts = s; Py_ssize_t startinpos; Py_ssize_t endinpos; - Py_ssize_t outpos; - PyObject *v; + _PyUnicodeWriter writer; const char *end; const char *reason; PyObject *errorHandler = NULL; @@ -5949,13 +5979,15 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, 1)) return NULL; + if (size == 0) { + Py_INCREF(unicode_empty); + return unicode_empty; + } + /* XXX overflow detection missing */ - v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127); - if (v == NULL) + _PyUnicodeWriter_Init(&writer, 0); + if (_PyUnicodeWriter_Prepare(&writer, (size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127) == -1) goto onError; - if (PyUnicode_GET_LENGTH(v) == 0) - return v; - outpos = 0; end = s + size; while (s < end) { @@ -5989,11 +6021,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, endinpos = s - starts + Py_UNICODE_SIZE; reason = "illegal code point (> 0x10FFFF)"; } - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "unicode_internal", reason, &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) + &writer)) goto onError; continue; } @@ -6013,18 +6045,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, } #endif - if (unicode_putchar(&v, &outpos, ch) < 0) + if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); + writer.pos++; } - if (unicode_resize(&v, outpos) < 0) - goto onError; Py_XDECREF(errorHandler); Py_XDECREF(exc); - return unicode_result(v); + return _PyUnicodeWriter_Finish(&writer); onError: - Py_XDECREF(v); + _PyUnicodeWriter_Dealloc(&writer); Py_XDECREF(errorHandler); Py_XDECREF(exc); return NULL; @@ -6388,7 +6420,7 @@ PyUnicode_DecodeASCII(const char *s, const char *errors) { const char *starts = s; - PyObject *unicode; + _PyUnicodeWriter writer; int kind; void *data; Py_ssize_t startinpos; @@ -6407,46 +6439,45 @@ PyUnicode_DecodeASCII(const char *s, if (size == 1 && (unsigned char)s[0] < 128) return get_latin1_char((unsigned char)s[0]); - unicode = PyUnicode_New(size, 127); - if (unicode == NULL) + _PyUnicodeWriter_Init(&writer, 0); + if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) goto onError; e = s + size; - data = PyUnicode_1BYTE_DATA(unicode); + data = writer.data; outpos = ascii_decode(s, e, (Py_UCS1 *)data); - if (outpos == size) - return unicode; + writer.pos = outpos; + if (writer.pos == size) + return _PyUnicodeWriter_Finish(&writer); - s += outpos; - kind = PyUnicode_1BYTE_KIND; + s += writer.pos; + kind = writer.kind; while (s < e) { register unsigned char c = (unsigned char)*s; if (c < 128) { - PyUnicode_WRITE(kind, data, outpos++, c); + PyUnicode_WRITE(kind, data, writer.pos, c); + writer.pos++; ++s; } else { startinpos = s-starts; endinpos = startinpos + 1; - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "ascii", "ordinal not in range(128)", &starts, &e, &startinpos, &endinpos, &exc, &s, - &unicode, &outpos)) + &writer)) goto onError; - kind = PyUnicode_KIND(unicode); - data = PyUnicode_DATA(unicode); + kind = writer.kind; + data = writer.data; } } - if (unicode_resize(&unicode, outpos) < 0) - goto onError; Py_XDECREF(errorHandler); Py_XDECREF(exc); - assert(_PyUnicode_CheckConsistency(unicode, 1)); - return unicode; + return _PyUnicodeWriter_Finish(&writer); onError: - Py_XDECREF(unicode); + _PyUnicodeWriter_Dealloc(&writer); Py_XDECREF(errorHandler); Py_XDECREF(exc); return NULL; @@ -6707,7 +6738,7 @@ decode_code_page_errors(UINT code_page, startinpos = in - startin; endinpos = startinpos + 1; outpos = out - PyUnicode_AS_UNICODE(*v); - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_wchar( errors, &errorHandler, encoding, reason, &startin, &endin, &startinpos, &endinpos, &exc, &in, @@ -7249,10 +7280,8 @@ PyUnicode_DecodeCharmap(const char *s, const char *starts = s; Py_ssize_t startinpos; Py_ssize_t endinpos; - Py_ssize_t outpos; const char *e; - PyObject *v; - Py_ssize_t extrachars = 0; + _PyUnicodeWriter writer; PyObject *errorHandler = NULL; PyObject *exc = NULL; @@ -7260,12 +7289,14 @@ PyUnicode_DecodeCharmap(const char *s, if (mapping == NULL) return PyUnicode_DecodeLatin1(s, size, errors); - v = PyUnicode_New(size, 127); - if (v == NULL) + if (size == 0) { + Py_INCREF(unicode_empty); + return unicode_empty; + } + _PyUnicodeWriter_Init(&writer, 0); + if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) goto onError; - if (size == 0) - return v; - outpos = 0; + e = s + size; if (PyUnicode_CheckExact(mapping)) { Py_ssize_t maplen; @@ -7282,28 +7313,29 @@ PyUnicode_DecodeCharmap(const char *s, while (s < e) { unsigned char ch; if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) { - enum PyUnicode_Kind outkind = PyUnicode_KIND(v); + enum PyUnicode_Kind outkind = writer.kind; + void *outdata = writer.data; if (outkind == PyUnicode_1BYTE_KIND) { - void *outdata = PyUnicode_DATA(v); - Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(v); + Py_UCS4 maxchar = writer.maxchar; while (s < e) { unsigned char ch = *s; x = PyUnicode_READ(PyUnicode_2BYTE_KIND, mapdata, ch); if (x > maxchar) goto Error; - PyUnicode_WRITE(PyUnicode_1BYTE_KIND, outdata, outpos++, x); + PyUnicode_WRITE(PyUnicode_1BYTE_KIND, outdata, writer.pos, x); + writer.pos++; ++s; } break; } else if (outkind == PyUnicode_2BYTE_KIND) { - void *outdata = PyUnicode_DATA(v); while (s < e) { unsigned char ch = *s; x = PyUnicode_READ(PyUnicode_2BYTE_KIND, mapdata, ch); if (x == 0xFFFE) goto Error; - PyUnicode_WRITE(PyUnicode_2BYTE_KIND, outdata, outpos++, x); + PyUnicode_WRITE(PyUnicode_2BYTE_KIND, outdata, writer.pos, x); + writer.pos++; ++s; } break; @@ -7321,18 +7353,20 @@ Error: /* undefined mapping */ startinpos = s-starts; endinpos = startinpos+1; - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "charmap", "character maps to ", &starts, &e, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) { + &writer)) { goto onError; } continue; } - if (unicode_putchar(&v, &outpos, x) < 0) + if (_PyUnicodeWriter_Prepare(&writer, 1, x) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, x); + writer.pos++; ++s; } } @@ -7367,18 +7401,21 @@ Error: Py_DECREF(x); goto onError; } - if (unicode_putchar(&v, &outpos, value) < 0) + + if (_PyUnicodeWriter_Prepare(&writer, 1, value) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, value); + writer.pos++; } else if (x == Py_None) { /* undefined mapping */ startinpos = s-starts; endinpos = startinpos+1; - if (unicode_decode_call_errorhandler( + if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, "charmap", "character maps to ", &starts, &e, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) { + &writer)) { Py_DECREF(x); goto onError; } @@ -7386,40 +7423,9 @@ Error: continue; } else if (PyUnicode_Check(x)) { - Py_ssize_t targetsize; - - if (PyUnicode_READY(x) == -1) + writer.overallocate = 1; + if (_PyUnicodeWriter_WriteStr(&writer, x) == -1) goto onError; - targetsize = PyUnicode_GET_LENGTH(x); - - if (targetsize == 1) { - /* 1-1 mapping */ - if (unicode_putchar(&v, &outpos, - PyUnicode_READ_CHAR(x, 0)) < 0) - goto onError; - } - else if (targetsize > 1) { - /* 1-n mapping */ - if (targetsize > extrachars) { - /* resize first */ - Py_ssize_t needed = (targetsize - extrachars) + \ - (targetsize << 2); - extrachars += needed; - /* XXX overflow detection missing */ - if (unicode_resize(&v, - PyUnicode_GET_LENGTH(v) + needed) < 0) - { - Py_DECREF(x); - goto onError; - } - } - if (unicode_widen(&v, outpos, PyUnicode_MAX_CHAR_VALUE(x)) < 0) - goto onError; - PyUnicode_CopyCharacters(v, outpos, x, 0, targetsize); - outpos += targetsize; - extrachars -= targetsize; - } - /* 1-0 mapping: skip the character */ } else { /* wrong return value */ @@ -7432,16 +7438,14 @@ Error: ++s; } } - if (unicode_resize(&v, outpos) < 0) - goto onError; Py_XDECREF(errorHandler); Py_XDECREF(exc); - return unicode_result(v); + return _PyUnicodeWriter_Finish(&writer); onError: Py_XDECREF(errorHandler); Py_XDECREF(exc); - Py_XDECREF(v); + _PyUnicodeWriter_Dealloc(&writer); return NULL; } -- cgit v1.2.1 From c84fda52af51779e1ecc1fad4ec60af824bd9546 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sat, 10 Nov 2012 14:52:10 +0000 Subject: Issue #14794: slice.indices no longer returns OverflowError for out-of-range start, stop, step or length. --- Objects/sliceobject.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 175 insertions(+), 8 deletions(-) (limited to 'Objects') diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 1593335263..4b31f2306e 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -299,25 +299,192 @@ static PyMemberDef slice_members[] = { {0} }; +/* Helper function to convert a slice argument to a PyLong, and raise TypeError + with a suitable message on failure. */ + static PyObject* -slice_indices(PySliceObject* self, PyObject* len) +evaluate_slice_index(PyObject *v) { - Py_ssize_t ilen, start, stop, step, slicelength; + if (PyIndex_Check(v)) { + return PyNumber_Index(v); + } + else { + PyErr_SetString(PyExc_TypeError, + "slice indices must be integers or " + "None or have an __index__ method"); + return NULL; + } +} - ilen = PyNumber_AsSsize_t(len, PyExc_OverflowError); +/* Implementation of slice.indices. */ + +static PyObject* +slice_indices(PySliceObject* self, PyObject* len) +{ + PyObject *start=NULL, *stop=NULL, *step=NULL; + PyObject *length=NULL, *upper=NULL, *lower=NULL, *zero=NULL; + int step_is_negative, cmp; - if (ilen == -1 && PyErr_Occurred()) { + zero = PyLong_FromLong(0L); + if (zero == NULL) return NULL; + + /* Compute step and length as integers. */ + length = PyNumber_Index(len); + if (length == NULL) + goto error; + + if (self->step == Py_None) + step = PyLong_FromLong(1L); + else + step = evaluate_slice_index(self->step); + if (step == NULL) + goto error; + + /* Raise ValueError for negative length or zero step. */ + cmp = PyObject_RichCompareBool(length, zero, Py_LT); + if (cmp < 0) { + goto error; + } + if (cmp) { + PyErr_SetString(PyExc_ValueError, + "length should not be negative"); + goto error; } - if (PySlice_GetIndicesEx((PyObject*)self, ilen, &start, &stop, - &step, &slicelength) < 0) { - return NULL; + cmp = PyObject_RichCompareBool(step, zero, Py_EQ); + if (cmp < 0) { + goto error; + } + if (cmp) { + PyErr_SetString(PyExc_ValueError, + "slice step cannot be zero"); + goto error; } - return Py_BuildValue("(nnn)", start, stop, step); + /* Find lower and upper bounds for start and stop. */ + step_is_negative = PyObject_RichCompareBool(step, zero, Py_LT); + if (step_is_negative < 0) { + goto error; + } + if (step_is_negative) { + lower = PyLong_FromLong(-1L); + if (lower == NULL) + goto error; + + upper = PyNumber_Add(length, lower); + if (upper == NULL) + goto error; + } + else { + lower = zero; + Py_INCREF(lower); + upper = length; + Py_INCREF(upper); + } + + /* Compute start. */ + if (self->start == Py_None) { + start = step_is_negative ? upper : lower; + Py_INCREF(start); + } + else { + start = evaluate_slice_index(self->start); + if (start == NULL) + goto error; + + cmp = PyObject_RichCompareBool(start, zero, Py_LT); + if (cmp < 0) + goto error; + if (cmp) { + /* start += length */ + PyObject *tmp = PyNumber_Add(start, length); + Py_DECREF(start); + start = tmp; + if (start == NULL) + goto error; + + cmp = PyObject_RichCompareBool(start, lower, Py_LT); + if (cmp < 0) + goto error; + if (cmp) { + Py_INCREF(lower); + Py_DECREF(start); + start = lower; + } + } + else { + cmp = PyObject_RichCompareBool(start, upper, Py_GT); + if (cmp < 0) + goto error; + if (cmp) { + Py_INCREF(upper); + Py_DECREF(start); + start = upper; + } + } + } + + /* Compute stop. */ + if (self->stop == Py_None) { + stop = step_is_negative ? lower : upper; + Py_INCREF(stop); + } + else { + stop = evaluate_slice_index(self->stop); + if (stop == NULL) + goto error; + + cmp = PyObject_RichCompareBool(stop, zero, Py_LT); + if (cmp < 0) + goto error; + if (cmp) { + /* stop += length */ + PyObject *tmp = PyNumber_Add(stop, length); + Py_DECREF(stop); + stop = tmp; + if (stop == NULL) + goto error; + + cmp = PyObject_RichCompareBool(stop, lower, Py_LT); + if (cmp < 0) + goto error; + if (cmp) { + Py_INCREF(lower); + Py_DECREF(stop); + stop = lower; + } + } + else { + cmp = PyObject_RichCompareBool(stop, upper, Py_GT); + if (cmp < 0) + goto error; + if (cmp) { + Py_INCREF(upper); + Py_DECREF(stop); + stop = upper; + } + } + } + + Py_DECREF(upper); + Py_DECREF(lower); + Py_DECREF(length); + Py_DECREF(zero); + return Py_BuildValue("(NNN)", start, stop, step); + + error: + Py_XDECREF(start); + Py_XDECREF(stop); + Py_XDECREF(step); + Py_XDECREF(upper); + Py_XDECREF(lower); + Py_XDECREF(length); + Py_XDECREF(zero); + return NULL; } + PyDoc_STRVAR(slice_indices_doc, "S.indices(len) -> (start, stop, stride)\n\ \n\ -- cgit v1.2.1 From e037652273f080b41545e058575097645959b804 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 12 Nov 2012 23:32:21 +0100 Subject: Issue #16416: Fix error handling in _Py_wchar2char() _Py_char2wchar() functions --- Objects/unicodeobject.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6a30e8df60..7856e773ce 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4691,7 +4691,10 @@ onError: #ifdef __APPLE__ /* Simplified UTF-8 decoder using surrogateescape error handler, - used to decode the command line arguments on Mac OS X. */ + used to decode the command line arguments on Mac OS X. + + Return a pointer to a newly allocated wide character string (use + PyMem_Free() to free the memory), or NULL on memory allocation error. */ wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size) @@ -4702,10 +4705,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size) /* Note: size will always be longer than the resulting Unicode character count */ - if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) { - PyErr_NoMemory(); + if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) return NULL; - } unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t)); if (!unicode) return NULL; -- cgit v1.2.1 From 4f0b0b53d7357c12bf3b5f84d2af60af0230d959 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Wed, 14 Nov 2012 17:08:31 +0000 Subject: Issue #16290: __complex__ must now always return an instance of complex. --- Objects/complexobject.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'Objects') diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 403c60c917..355b063f28 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -271,6 +271,12 @@ try_complex_special_method(PyObject *op) { if (f) { PyObject *res = PyObject_CallFunctionObjArgs(f, NULL); Py_DECREF(f); + if (res != NULL && !PyComplex_Check(res)) { + PyErr_SetString(PyExc_TypeError, + "__complex__ should return a complex object"); + Py_DECREF(res); + return NULL; + } return res; } return NULL; @@ -296,12 +302,6 @@ PyComplex_AsCComplex(PyObject *op) newop = try_complex_special_method(op); if (newop) { - if (!PyComplex_Check(newop)) { - PyErr_SetString(PyExc_TypeError, - "__complex__ should return a complex object"); - Py_DECREF(newop); - return cv; - } cv = ((PyComplexObject *)newop)->cval; Py_DECREF(newop); return cv; -- cgit v1.2.1 From 64b904ca555bd6fe7ce0eae5b24d5d525b82d040 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sat, 17 Nov 2012 19:18:10 +0000 Subject: Issue #16451: Refactor to remove duplication between range and slice in slice index computations. --- Objects/rangeobject.c | 205 ++------------------------------------------------ Objects/sliceobject.c | 148 +++++++++++++++++++----------------- 2 files changed, 84 insertions(+), 269 deletions(-) (limited to 'Objects') diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index 214b4556b5..ba51fec9e4 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -318,195 +318,6 @@ range_item(rangeobject *r, Py_ssize_t i) return res; } -/* Additional helpers, since the standard slice helpers - * all clip to PY_SSIZE_T_MAX - */ - -/* Replace _PyEval_SliceIndex */ -static PyObject * -compute_slice_element(PyObject *obj) -{ - PyObject *result = NULL; - if (obj != NULL) { - if (PyIndex_Check(obj)) { - result = PyNumber_Index(obj); - } - else { - PyErr_SetString(PyExc_TypeError, - "slice indices must be integers or " - "None or have an __index__ method"); - } - } - return result; -} - -/* Replace PySlice_GetIndicesEx - * Result indicates whether or not the slice is empty - * (-1 = error, 0 = empty slice, 1 = slice contains elements) - */ -static int -compute_slice_indices(rangeobject *r, PySliceObject *slice, - PyObject **start, PyObject **stop, PyObject **step) -{ - int cmp_result, has_elements; - Py_ssize_t clamped_step = 0; - PyObject *zero = NULL, *one = NULL, *neg_one = NULL, *candidate = NULL; - PyObject *tmp_start = NULL, *tmp_stop = NULL, *tmp_step = NULL; - zero = PyLong_FromLong(0); - if (zero == NULL) goto Fail; - one = PyLong_FromLong(1); - if (one == NULL) goto Fail; - neg_one = PyLong_FromLong(-1); - if (neg_one == NULL) goto Fail; - - /* Calculate step value */ - if (slice->step == Py_None) { - clamped_step = 1; - tmp_step = one; - Py_INCREF(tmp_step); - } else { - if (!_PyEval_SliceIndex(slice->step, &clamped_step)) goto Fail; - if (clamped_step == 0) { - PyErr_SetString(PyExc_ValueError, - "slice step cannot be zero"); - goto Fail; - } - tmp_step = compute_slice_element(slice->step); - if (tmp_step == NULL) goto Fail; - } - - /* Calculate start value */ - if (slice->start == Py_None) { - if (clamped_step < 0) { - tmp_start = PyNumber_Subtract(r->length, one); - if (tmp_start == NULL) goto Fail; - } else { - tmp_start = zero; - Py_INCREF(tmp_start); - } - } else { - candidate = compute_slice_element(slice->start); - if (candidate == NULL) goto Fail; - cmp_result = PyObject_RichCompareBool(candidate, zero, Py_LT); - if (cmp_result == -1) goto Fail; - if (cmp_result) { - /* candidate < 0 */ - tmp_start = PyNumber_Add(r->length, candidate); - if (tmp_start == NULL) goto Fail; - Py_CLEAR(candidate); - } else { - /* candidate >= 0 */ - tmp_start = candidate; - candidate = NULL; - } - cmp_result = PyObject_RichCompareBool(tmp_start, zero, Py_LT); - if (cmp_result == -1) goto Fail; - if (cmp_result) { - /* tmp_start < 0 */ - Py_CLEAR(tmp_start); - if (clamped_step < 0) { - tmp_start = neg_one; - } else { - tmp_start = zero; - } - Py_INCREF(tmp_start); - } else { - /* tmp_start >= 0 */ - cmp_result = PyObject_RichCompareBool(tmp_start, r->length, Py_GE); - if (cmp_result == -1) goto Fail; - if (cmp_result) { - /* tmp_start >= r->length */ - Py_CLEAR(tmp_start); - if (clamped_step < 0) { - tmp_start = PyNumber_Subtract(r->length, one); - if (tmp_start == NULL) goto Fail; - } else { - tmp_start = r->length; - Py_INCREF(tmp_start); - } - } - } - } - - /* Calculate stop value */ - if (slice->stop == Py_None) { - if (clamped_step < 0) { - tmp_stop = neg_one; - } else { - tmp_stop = r->length; - } - Py_INCREF(tmp_stop); - } else { - candidate = compute_slice_element(slice->stop); - if (candidate == NULL) goto Fail; - cmp_result = PyObject_RichCompareBool(candidate, zero, Py_LT); - if (cmp_result == -1) goto Fail; - if (cmp_result) { - /* candidate < 0 */ - tmp_stop = PyNumber_Add(r->length, candidate); - if (tmp_stop == NULL) goto Fail; - Py_CLEAR(candidate); - } else { - /* candidate >= 0 */ - tmp_stop = candidate; - candidate = NULL; - } - cmp_result = PyObject_RichCompareBool(tmp_stop, zero, Py_LT); - if (cmp_result == -1) goto Fail; - if (cmp_result) { - /* tmp_stop < 0 */ - Py_CLEAR(tmp_stop); - if (clamped_step < 0) { - tmp_stop = neg_one; - } else { - tmp_stop = zero; - } - Py_INCREF(tmp_stop); - } else { - /* tmp_stop >= 0 */ - cmp_result = PyObject_RichCompareBool(tmp_stop, r->length, Py_GE); - if (cmp_result == -1) goto Fail; - if (cmp_result) { - /* tmp_stop >= r->length */ - Py_CLEAR(tmp_stop); - if (clamped_step < 0) { - tmp_stop = PyNumber_Subtract(r->length, one); - if (tmp_stop == NULL) goto Fail; - } else { - tmp_stop = r->length; - Py_INCREF(tmp_stop); - } - } - } - } - - /* Check if the slice is empty or not */ - if (clamped_step < 0) { - has_elements = PyObject_RichCompareBool(tmp_start, tmp_stop, Py_GT); - } else { - has_elements = PyObject_RichCompareBool(tmp_start, tmp_stop, Py_LT); - } - if (has_elements == -1) goto Fail; - - *start = tmp_start; - *stop = tmp_stop; - *step = tmp_step; - Py_DECREF(neg_one); - Py_DECREF(one); - Py_DECREF(zero); - return has_elements; - - Fail: - Py_XDECREF(tmp_start); - Py_XDECREF(tmp_stop); - Py_XDECREF(tmp_step); - Py_XDECREF(candidate); - Py_XDECREF(neg_one); - Py_XDECREF(one); - Py_XDECREF(zero); - return -1; -} - static PyObject * compute_slice(rangeobject *r, PyObject *_slice) { @@ -514,10 +325,11 @@ compute_slice(rangeobject *r, PyObject *_slice) rangeobject *result; PyObject *start = NULL, *stop = NULL, *step = NULL; PyObject *substart = NULL, *substop = NULL, *substep = NULL; - int has_elements; + int error; - has_elements = compute_slice_indices(r, slice, &start, &stop, &step); - if (has_elements == -1) return NULL; + error = _PySlice_GetLongIndices(slice, r->length, &start, &stop, &step); + if (error == -1) + return NULL; substep = PyNumber_Multiply(r->step, step); if (substep == NULL) goto fail; @@ -527,13 +339,8 @@ compute_slice(rangeobject *r, PyObject *_slice) if (substart == NULL) goto fail; Py_CLEAR(start); - if (has_elements) { - substop = compute_item(r, stop); - if (substop == NULL) goto fail; - } else { - substop = substart; - Py_INCREF(substop); - } + substop = compute_item(r, stop); + if (substop == NULL) goto fail; Py_CLEAR(stop); result = make_range_object(Py_TYPE(r), substart, substop, substep); diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 4b31f2306e..52f1c89ded 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -316,57 +316,41 @@ evaluate_slice_index(PyObject *v) } } -/* Implementation of slice.indices. */ +/* Compute slice indices given a slice and length. Return -1 on failure. Used + by slice.indices and rangeobject slicing. Assumes that `len` is a + nonnegative instance of PyLong. */ -static PyObject* -slice_indices(PySliceObject* self, PyObject* len) +int +_PySlice_GetLongIndices(PySliceObject *self, PyObject *length, + PyObject **start_ptr, PyObject **stop_ptr, + PyObject **step_ptr) { PyObject *start=NULL, *stop=NULL, *step=NULL; - PyObject *length=NULL, *upper=NULL, *lower=NULL, *zero=NULL; - int step_is_negative, cmp; - - zero = PyLong_FromLong(0L); - if (zero == NULL) - return NULL; - - /* Compute step and length as integers. */ - length = PyNumber_Index(len); - if (length == NULL) - goto error; + PyObject *upper=NULL, *lower=NULL; + int step_is_negative, cmp_result; - if (self->step == Py_None) + /* Convert step to an integer; raise for zero step. */ + if (self->step == Py_None) { step = PyLong_FromLong(1L); - else - step = evaluate_slice_index(self->step); - if (step == NULL) - goto error; - - /* Raise ValueError for negative length or zero step. */ - cmp = PyObject_RichCompareBool(length, zero, Py_LT); - if (cmp < 0) { - goto error; - } - if (cmp) { - PyErr_SetString(PyExc_ValueError, - "length should not be negative"); - goto error; - } - - cmp = PyObject_RichCompareBool(step, zero, Py_EQ); - if (cmp < 0) { - goto error; + if (step == NULL) + goto error; + step_is_negative = 0; } - if (cmp) { - PyErr_SetString(PyExc_ValueError, - "slice step cannot be zero"); - goto error; + else { + int step_sign; + step = evaluate_slice_index(self->step); + if (step == NULL) + goto error; + step_sign = _PyLong_Sign(step); + if (step_sign == 0) { + PyErr_SetString(PyExc_ValueError, + "slice step cannot be zero"); + goto error; + } + step_is_negative = step_sign < 0; } /* Find lower and upper bounds for start and stop. */ - step_is_negative = PyObject_RichCompareBool(step, zero, Py_LT); - if (step_is_negative < 0) { - goto error; - } if (step_is_negative) { lower = PyLong_FromLong(-1L); if (lower == NULL) @@ -377,8 +361,10 @@ slice_indices(PySliceObject* self, PyObject* len) goto error; } else { - lower = zero; - Py_INCREF(lower); + lower = PyLong_FromLong(0L); + if (lower == NULL) + goto error; + upper = length; Py_INCREF(upper); } @@ -393,10 +379,7 @@ slice_indices(PySliceObject* self, PyObject* len) if (start == NULL) goto error; - cmp = PyObject_RichCompareBool(start, zero, Py_LT); - if (cmp < 0) - goto error; - if (cmp) { + if (_PyLong_Sign(start) < 0) { /* start += length */ PyObject *tmp = PyNumber_Add(start, length); Py_DECREF(start); @@ -404,20 +387,20 @@ slice_indices(PySliceObject* self, PyObject* len) if (start == NULL) goto error; - cmp = PyObject_RichCompareBool(start, lower, Py_LT); - if (cmp < 0) + cmp_result = PyObject_RichCompareBool(start, lower, Py_LT); + if (cmp_result < 0) goto error; - if (cmp) { + if (cmp_result) { Py_INCREF(lower); Py_DECREF(start); start = lower; } } else { - cmp = PyObject_RichCompareBool(start, upper, Py_GT); - if (cmp < 0) + cmp_result = PyObject_RichCompareBool(start, upper, Py_GT); + if (cmp_result < 0) goto error; - if (cmp) { + if (cmp_result) { Py_INCREF(upper); Py_DECREF(start); start = upper; @@ -435,10 +418,7 @@ slice_indices(PySliceObject* self, PyObject* len) if (stop == NULL) goto error; - cmp = PyObject_RichCompareBool(stop, zero, Py_LT); - if (cmp < 0) - goto error; - if (cmp) { + if (_PyLong_Sign(stop) < 0) { /* stop += length */ PyObject *tmp = PyNumber_Add(stop, length); Py_DECREF(stop); @@ -446,20 +426,20 @@ slice_indices(PySliceObject* self, PyObject* len) if (stop == NULL) goto error; - cmp = PyObject_RichCompareBool(stop, lower, Py_LT); - if (cmp < 0) + cmp_result = PyObject_RichCompareBool(stop, lower, Py_LT); + if (cmp_result < 0) goto error; - if (cmp) { + if (cmp_result) { Py_INCREF(lower); Py_DECREF(stop); stop = lower; } } else { - cmp = PyObject_RichCompareBool(stop, upper, Py_GT); - if (cmp < 0) + cmp_result = PyObject_RichCompareBool(stop, upper, Py_GT); + if (cmp_result < 0) goto error; - if (cmp) { + if (cmp_result) { Py_INCREF(upper); Py_DECREF(stop); stop = upper; @@ -467,23 +447,51 @@ slice_indices(PySliceObject* self, PyObject* len) } } + *start_ptr = start; + *stop_ptr = stop; + *step_ptr = step; Py_DECREF(upper); Py_DECREF(lower); - Py_DECREF(length); - Py_DECREF(zero); - return Py_BuildValue("(NNN)", start, stop, step); + return 0; error: + *start_ptr = *stop_ptr = *step_ptr = NULL; Py_XDECREF(start); Py_XDECREF(stop); Py_XDECREF(step); Py_XDECREF(upper); Py_XDECREF(lower); - Py_XDECREF(length); - Py_XDECREF(zero); - return NULL; + return -1; } +/* Implementation of slice.indices. */ + +static PyObject* +slice_indices(PySliceObject* self, PyObject* len) +{ + PyObject *start, *stop, *step; + PyObject *length; + int error; + + /* Convert length to an integer if necessary; raise for negative length. */ + length = PyNumber_Index(len); + if (length == NULL) + return NULL; + + if (_PyLong_Sign(length) < 0) { + PyErr_SetString(PyExc_ValueError, + "length should not be negative"); + Py_DECREF(length); + return NULL; + } + + error = _PySlice_GetLongIndices(self, length, &start, &stop, &step); + Py_DECREF(length); + if (error == -1) + return NULL; + else + return Py_BuildValue("(NNN)", start, stop, step); +} PyDoc_STRVAR(slice_indices_doc, "S.indices(len) -> (start, stop, stride)\n\ -- cgit v1.2.1 From 57dbd7ed634cd1e6e8c6aed21a3057bed379a1fd Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sun, 2 Dec 2012 07:56:42 +0100 Subject: Issue #16592: stringlib_bytes_join doesn't raise MemoryError on allocation failure --- Objects/stringlib/join.h | 1 + 1 file changed, 1 insertion(+) (limited to 'Objects') diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h index d1d6e532c5..5568b31dab 100644 --- a/Objects/stringlib/join.h +++ b/Objects/stringlib/join.h @@ -43,6 +43,7 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) buffers = PyMem_NEW(Py_buffer, seqlen); if (buffers == NULL) { Py_DECREF(seq); + PyErr_NoMemory(); return NULL; } } -- cgit v1.2.1 From 9503c54245c09027092e5539f7b28046aec9a2cc Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sun, 2 Dec 2012 19:10:07 +0100 Subject: Issue #16562: Optimize dict equality testing. Patch by Serhiy Storchaka (reviewed by Martin and Raymond). --- Objects/dictobject.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index f4ad3dccd4..a3c640939d 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2114,13 +2114,18 @@ dict_equal(PyDictObject *a, PyDictObject *b) if (aval != NULL) { int cmp; PyObject *bval; + PyObject **vaddr; PyObject *key = ep->me_key; /* temporarily bump aval's refcount to ensure it stays alive until we're done with it */ Py_INCREF(aval); /* ditto for key */ Py_INCREF(key); - bval = PyDict_GetItemWithError((PyObject *)b, key); + /* reuse the known hash value */ + if ((b->ma_keys->dk_lookup)(b, key, ep->me_hash, &vaddr) == NULL) + bval = NULL; + else + bval = *vaddr; Py_DECREF(key); if (bval == NULL) { Py_DECREF(aval); -- cgit v1.2.1 From 2e540f952688de0d7b1a6f5926fb0ef695518eb6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Dec 2012 01:34:47 +0100 Subject: Issue #16455: On FreeBSD and Solaris, if the locale is C, the ASCII/surrogateescape codec is now used, instead of the locale encoding, to decode the command line arguments. This change fixes inconsistencies with os.fsencode() and os.fsdecode() because these operating systems announces an ASCII locale encoding, whereas the ISO-8859-1 encoding is used in practice. --- Objects/unicodeobject.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index dd8d7b2232..6491fdc316 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2863,10 +2863,10 @@ PyUnicode_FromEncodedObject(register PyObject *obj, /* Convert encoding to lower case and replace '_' with '-' in order to catch e.g. UTF_8. Return 0 on error (encoding is longer than lower_len-1), 1 on success. */ -static int -normalize_encoding(const char *encoding, - char *lower, - size_t lower_len) +int +_Py_normalize_encoding(const char *encoding, + char *lower, + size_t lower_len) { const char *e; char *l; @@ -2908,7 +2908,7 @@ PyUnicode_Decode(const char *s, char lower[11]; /* Enough for any encoding shortcut */ /* Shortcuts for common default encodings */ - if (normalize_encoding(encoding, lower, sizeof(lower))) { + if (_Py_normalize_encoding(encoding, lower, sizeof(lower))) { if ((strcmp(lower, "utf-8") == 0) || (strcmp(lower, "utf8") == 0)) return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL); @@ -3110,7 +3110,8 @@ locale_error_handler(const char *errors, int *surrogateescape) *surrogateescape = 0; return 0; } - if (strcmp(errors, "surrogateescape") == 0) { + if (errors == "surrogateescape" + || strcmp(errors, "surrogateescape") == 0) { *surrogateescape = 1; return 0; } @@ -3148,7 +3149,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) } if (surrogateescape) { - /* locale encoding with surrogateescape */ + /* "surrogateescape" error handler */ char *str; str = _Py_wchar2char(wstr, &error_pos); @@ -3168,6 +3169,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) PyMem_Free(str); } else { + /* strict mode */ size_t len, len2; len = wcstombs(NULL, wstr, 0); @@ -3273,7 +3275,7 @@ PyUnicode_AsEncodedString(PyObject *unicode, } /* Shortcuts for common default encodings */ - if (normalize_encoding(encoding, lower, sizeof(lower))) { + if (_Py_normalize_encoding(encoding, lower, sizeof(lower))) { if ((strcmp(lower, "utf-8") == 0) || (strcmp(lower, "utf8") == 0)) { @@ -3413,8 +3415,8 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, return NULL; } - if (surrogateescape) - { + if (surrogateescape) { + /* "surrogateescape" error handler */ wstr = _Py_char2wchar(str, &wlen); if (wstr == NULL) { if (wlen == (size_t)-1) @@ -3428,6 +3430,7 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, PyMem_Free(wstr); } else { + /* strict mode */ #ifndef HAVE_BROKEN_MBSTOWCS wlen = mbstowcs(NULL, str, 0); #else @@ -3447,7 +3450,6 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, return PyErr_NoMemory(); } - /* This shouldn't fail now */ wlen2 = mbstowcs(wstr, str, wlen+1); if (wlen2 == (size_t)-1) { if (wstr != smallbuf) -- cgit v1.2.1 From 2eb4e79d6cccfbbcb4b5416b73ea816c31d014fd Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Dec 2012 09:30:24 +0100 Subject: Cleanup unicodeobject.c * Remove micro-optization: (errors == "surrogateescape" || strcmp(errors, "surrogateescape") == 0). Only use strcmp() * Initialize 'arg' members in unicode_format_arg() to help the compiler to diagnose real bugs and also make the code simpler to read --- Objects/unicodeobject.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6491fdc316..0451f0d789 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3110,8 +3110,7 @@ locale_error_handler(const char *errors, int *surrogateescape) *surrogateescape = 0; return 0; } - if (errors == "surrogateescape" - || strcmp(errors, "surrogateescape") == 0) { + if (strcmp(errors, "surrogateescape") == 0) { *surrogateescape = 1; return 0; } @@ -13438,7 +13437,6 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, PyObject *v; - arg->ch = FORMAT_READ(ctx); if (arg->ch == '(') { /* Get argument value from a dictionary. Example: "%(name)s". */ Py_ssize_t keystart; @@ -13487,7 +13485,6 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, } /* Parse flags. Example: "%+i" => flags=F_SIGN. */ - arg->flags = 0; while (--ctx->fmtcnt >= 0) { arg->ch = FORMAT_READ(ctx); ctx->fmtpos++; @@ -13502,7 +13499,6 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, } /* Parse width. Example: "%10s" => width=10 */ - arg->width = -1; if (arg->ch == '*') { v = unicode_format_getnextarg(ctx); if (v == NULL) @@ -13544,7 +13540,6 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, } /* Parse precision. Example: "%.3f" => prec=3 */ - arg->prec = -1; if (arg->ch == '.') { arg->prec = 0; if (--ctx->fmtcnt >= 0) { @@ -13613,9 +13608,12 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, - "e", "E", "f", "F", "g", "G": float - "c": int or str (1 character) + When possible, the output is written directly into the Unicode writer + (ctx->writer). A string is created when padding is required. + Return 0 if the argument has been formatted into *p_str, 1 if the argument has been written into ctx->writer, - -1 on error. */ + -1 on error. */ static int unicode_format_arg_format(struct unicode_formatter_t *ctx, struct unicode_format_arg_t *arg, @@ -13639,10 +13637,8 @@ unicode_format_arg_format(struct unicode_formatter_t *ctx, if (v == NULL) return -1; - arg->sign = 0; switch (arg->ch) { - case 's': case 'r': case 'a': @@ -13894,6 +13890,13 @@ unicode_format_arg(struct unicode_formatter_t *ctx) PyObject *str; int ret; + arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos); + arg.flags = 0; + arg.width = -1; + arg.prec = -1; + arg.sign = 0; + str = NULL; + ret = unicode_format_arg_parse(ctx, &arg); if (ret == -1) return -1; -- cgit v1.2.1 From 40a0a62bc7caf337b2a1a2d9013308f85ee247ed Mon Sep 17 00:00:00 2001 From: Chris Jerdonek Date: Fri, 7 Dec 2012 15:51:53 -0800 Subject: Issue #16495: remove extraneous NULL encoding check from bytes_decode(). The NULL encoding check in bytes_decode() was unnecessary because this case is already taken care of by the call to _Py_normalize_encoding() inside PyUnicode_Decode(). --- Objects/bytesobject.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'Objects') diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index a1db7789f1..8d8cb05b11 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2236,8 +2236,6 @@ bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs) if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors)) return NULL; - if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); return PyUnicode_FromEncodedObject(self, encoding, errors); } -- cgit v1.2.1 From 350e0ff264038e3790d0182134de7959965dea7d Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sun, 9 Dec 2012 14:28:26 +0100 Subject: Issue #13390: New function :func:`sys.getallocatedblocks()` returns the number of memory blocks currently allocated. Also, the ``-R`` option to regrtest uses this function to guard against memory allocation leaks. --- Objects/obmalloc.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 6225ebbbf1..c82c978e4d 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -525,6 +525,15 @@ static size_t ntimes_arena_allocated = 0; /* High water mark (max value ever seen) for narenas_currently_allocated. */ static size_t narenas_highwater = 0; +static Py_ssize_t _Py_AllocatedBlocks = 0; + +Py_ssize_t +_Py_GetAllocatedBlocks(void) +{ + return _Py_AllocatedBlocks; +} + + /* Allocate a new arena. If we run out of memory, return NULL. Else * allocate a new arena, and return the address of an arena_object * describing the new arena. It's expected that the caller will set @@ -785,6 +794,8 @@ PyObject_Malloc(size_t nbytes) if (nbytes > PY_SSIZE_T_MAX) return NULL; + _Py_AllocatedBlocks++; + /* * This implicitly redirects malloc(0). */ @@ -901,6 +912,7 @@ PyObject_Malloc(size_t nbytes) * and free list are already initialized. */ bp = pool->freeblock; + assert(bp != NULL); pool->freeblock = *(block **)bp; UNLOCK(); return (void *)bp; @@ -958,7 +970,12 @@ redirect: */ if (nbytes == 0) nbytes = 1; - return (void *)malloc(nbytes); + { + void *result = malloc(nbytes); + if (!result) + _Py_AllocatedBlocks--; + return result; + } } /* free */ @@ -978,6 +995,8 @@ PyObject_Free(void *p) if (p == NULL) /* free(NULL) has no effect */ return; + _Py_AllocatedBlocks--; + #ifdef WITH_VALGRIND if (UNLIKELY(running_on_valgrind > 0)) goto redirect; -- cgit v1.2.1 From a9a3cd7e2f043f7bd35d25dc301c8b74a8b6e89d Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 17 Dec 2012 23:05:59 +0100 Subject: Following issue #13390, fix compilation --without-pymalloc, and make sys.getallocatedblocks() return 0 in that situation. --- Objects/obmalloc.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Objects') diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index c82c978e4d..bbe2805bd1 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1316,6 +1316,13 @@ PyObject_Free(void *p) { PyMem_FREE(p); } + +Py_ssize_t +_Py_GetAllocatedBlocks(void) +{ + return 0; +} + #endif /* WITH_PYMALLOC */ #ifdef PYMALLOC_DEBUG -- cgit v1.2.1 From 4c3208fadf4fea67e916d8d8c7a04a28c4ad92f9 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Wed, 19 Dec 2012 14:33:35 +0200 Subject: Issue #16719: Get rid of WindowsError. Use OSError instead Patch by Serhiy Storchaka. --- Objects/unicodeobject.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4efc93df06..1f602b7ea6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6589,8 +6589,8 @@ decode_code_page_flags(UINT code_page) * Decode a byte string from a Windows code page into unicode object in strict * mode. * - * Returns consumed size if succeed, returns -2 on decode error, or raise a - * WindowsError and returns -1 on other error. + * Returns consumed size if succeed, returns -2 on decode error, or raise an + * OSError and returns -1 on other error. */ static int decode_code_page_strict(UINT code_page, @@ -6641,7 +6641,7 @@ error: * Decode a byte string from a code page into unicode object with an error * handler. * - * Returns consumed size if succeed, or raise a WindowsError or + * Returns consumed size if succeed, or raise an OSError or * UnicodeDecodeError exception and returns -1 on error. */ static int @@ -6897,7 +6897,7 @@ encode_code_page_flags(UINT code_page, const char *errors) * mode. * * Returns consumed characters if succeed, returns -2 on encode error, or raise - * a WindowsError and returns -1 on other error. + * an OSError and returns -1 on other error. */ static int encode_code_page_strict(UINT code_page, PyObject **outbytes, @@ -6993,7 +6993,7 @@ error: * Encode a Unicode string to a Windows code page into a byte string using a * error handler. * - * Returns consumed characters if succeed, or raise a WindowsError and returns + * Returns consumed characters if succeed, or raise an OSError and returns * -1 on other error. */ static int -- cgit v1.2.1 From b855f96413ac5651e06b80f23dc762fe026f10c5 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sun, 23 Dec 2012 14:27:17 +0200 Subject: Issue #9856: Replace deprecation warinigs to raising TypeError in object.__format__ Patch by Florent Xicluna. --- Objects/typeobject.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index fec907e862..d0a8246cf8 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3654,16 +3654,9 @@ object_format(PyObject *self, PyObject *args) /* Issue 7994: If we're converting to a string, we should reject format specifications */ if (PyUnicode_GET_LENGTH(format_spec) > 0) { - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "object.__format__ with a non-empty format " - "string is deprecated", 1) < 0) { - goto done; - } - /* Eventually this will become an error: - PyErr_Format(PyExc_TypeError, + PyErr_SetString(PyExc_TypeError, "non-empty format string passed to object.__format__"); - goto done; - */ + goto done; } result = PyObject_Format(self_as_str, format_spec); @@ -4288,13 +4281,11 @@ PyType_Ready(PyTypeObject *type) /* Warn for a type that implements tp_compare (now known as tp_reserved) but not tp_richcompare. */ if (type->tp_reserved && !type->tp_richcompare) { - int error; - error = PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + PyErr_Format(PyExc_TypeError, "Type %.100s defines tp_reserved (formerly tp_compare) " "but not tp_richcompare. Comparisons may not behave as intended.", type->tp_name); - if (error == -1) - goto error; + goto error; } /* All done -- set the ready flag */ -- cgit v1.2.1 From a40d6e3ce9422ce472ac31cc4b8eabf66e5b0bac Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Tue, 25 Dec 2012 13:32:35 +0200 Subject: Issue #15422: get rid of PyCFunction_New macro --- Objects/descrobject.c | 8 ++++---- Objects/methodobject.c | 20 ++++++-------------- Objects/typeobject.c | 4 ++-- 3 files changed, 12 insertions(+), 20 deletions(-) (limited to 'Objects') diff --git a/Objects/descrobject.c b/Objects/descrobject.c index abcc0020ff..3cf00d5293 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -115,7 +115,7 @@ classmethod_get(PyMethodDescrObject *descr, PyObject *obj, PyObject *type) ((PyTypeObject *)type)->tp_name); return NULL; } - return PyCFunction_New(descr->d_method, type); + return PyCFunction_NewEx(descr->d_method, type, NULL); } static PyObject * @@ -125,7 +125,7 @@ method_get(PyMethodDescrObject *descr, PyObject *obj, PyObject *type) if (descr_check((PyDescrObject *)descr, obj, &res)) return res; - return PyCFunction_New(descr->d_method, obj); + return PyCFunction_NewEx(descr->d_method, obj, NULL); } static PyObject * @@ -239,7 +239,7 @@ methoddescr_call(PyMethodDescrObject *descr, PyObject *args, PyObject *kwds) return NULL; } - func = PyCFunction_New(descr->d_method, self); + func = PyCFunction_NewEx(descr->d_method, self, NULL); if (func == NULL) return NULL; args = PyTuple_GetSlice(args, 1, argc); @@ -292,7 +292,7 @@ classmethoddescr_call(PyMethodDescrObject *descr, PyObject *args, return NULL; } - func = PyCFunction_New(descr->d_method, self); + func = PyCFunction_NewEx(descr->d_method, self, NULL); if (func == NULL) return NULL; args = PyTuple_GetSlice(args, 1, argc); diff --git a/Objects/methodobject.c b/Objects/methodobject.c index 1d143f9131..5d9f364034 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -13,6 +13,12 @@ static int numfree = 0; #define PyCFunction_MAXFREELIST 256 #endif +PyObject * +PyCFunction_New(PyMethodDef *ml, PyObject *self) +{ + return PyCFunction_NewEx(ml, self, NULL); +} + PyObject * PyCFunction_NewEx(PyMethodDef *ml, PyObject *self, PyObject *module) { @@ -346,17 +352,3 @@ _PyCFunction_DebugMallocStats(FILE *out) "free PyCFunction", numfree, sizeof(PyCFunction)); } - -/* PyCFunction_New() is now just a macro that calls PyCFunction_NewEx(), - but it's part of the API so we need to keep a function around that - existing C extensions can call. -*/ - -#undef PyCFunction_New -PyAPI_FUNC(PyObject *) PyCFunction_New(PyMethodDef *, PyObject *); - -PyObject * -PyCFunction_New(PyMethodDef *ml, PyObject *self) -{ - return PyCFunction_NewEx(ml, self, NULL); -} diff --git a/Objects/typeobject.c b/Objects/typeobject.c index d0a8246cf8..f71cad3f6d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3811,7 +3811,7 @@ add_methods(PyTypeObject *type, PyMethodDef *meth) descr = PyDescr_NewClassMethod(type, meth); } else if (meth->ml_flags & METH_STATIC) { - PyObject *cfunc = PyCFunction_New(meth, (PyObject*)type); + PyObject *cfunc = PyCFunction_NewEx(meth, (PyObject*)type, NULL); if (cfunc == NULL) return -1; descr = PyStaticMethod_New(cfunc); @@ -4879,7 +4879,7 @@ add_tp_new_wrapper(PyTypeObject *type) if (_PyDict_GetItemId(type->tp_dict, &PyId___new__) != NULL) return 0; - func = PyCFunction_New(tp_new_methoddef, (PyObject *)type); + func = PyCFunction_NewEx(tp_new_methoddef, (PyObject *)type, NULL); if (func == NULL) return -1; if (_PyDict_SetItemId(type->tp_dict, &PyId___new__, func)) { -- cgit v1.2.1 From a40b2d6ba8c868484333803b02ac7208f7ecbe97 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 25 Dec 2012 13:05:31 -0800 Subject: Fixes issue #16772: int() constructor second argument (base) must be an int. Consistent with the behavior in Python 2. --- Objects/longobject.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Objects') diff --git a/Objects/longobject.c b/Objects/longobject.c index 4024491f13..e4d4df46f3 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -4260,6 +4260,11 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return PyLong_FromLong(0L); if (obase == NULL) return PyNumber_Long(x); + if (!PyLong_Check(obase)) { + PyErr_SetString(PyExc_TypeError, + "int() arg 2 must be an integer."); + return NULL; + } base = PyLong_AsLongAndOverflow(obase, &overflow); if (base == -1 && PyErr_Occurred()) -- cgit v1.2.1 From 2a2d2eecec89e3198e80c64af0df72a6603495e1 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 25 Dec 2012 22:38:32 -0800 Subject: Test for issue16772 and redoes the previous fix to accept __index__-aware objects as the base by using PyNumber_AsSsize_t similar to round(). --- Objects/longobject.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/longobject.c b/Objects/longobject.c index e4d4df46f3..cea2f730a4 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -4247,8 +4247,7 @@ static PyObject * long_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *obase = NULL, *x = NULL; - long base; - int overflow; + Py_ssize_t base; static char *kwlist[] = {"x", "base", 0}; if (type != &PyLong_Type) @@ -4266,10 +4265,10 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } - base = PyLong_AsLongAndOverflow(obase, &overflow); + base = PyNumber_AsSsize_t(obase, NULL); if (base == -1 && PyErr_Occurred()) return NULL; - if (overflow || (base != 0 && base < 2) || base > 36) { + if ((base != 0 && base < 2) || base > 36) { PyErr_SetString(PyExc_ValueError, "int() arg 2 must be >= 2 and <= 36"); return NULL; -- cgit v1.2.1 From 05f001328f0648616ea88819a5e887a69ba8bb4a Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Wed, 26 Dec 2012 23:08:54 +0200 Subject: Fix compilation error for #15422 --- Objects/methodobject.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Objects') diff --git a/Objects/methodobject.c b/Objects/methodobject.c index 5d9f364034..5b799c9672 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -13,6 +13,9 @@ static int numfree = 0; #define PyCFunction_MAXFREELIST 256 #endif +/* undefine macro trampoline to PyCFunction_NewEx */ +#undef PyCFunction_New + PyObject * PyCFunction_New(PyMethodDef *ml, PyObject *self) { -- cgit v1.2.1 From 38a16f841555aa87e538c67e2683afc6a69a8e2a Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Wed, 26 Dec 2012 16:43:33 -0600 Subject: use more specific type --- Objects/longobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/longobject.c b/Objects/longobject.c index cea2f730a4..49e9d5d915 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -4072,7 +4072,7 @@ v_complement(digit *z, digit *a, Py_ssize_t m) static PyObject * long_bitwise(PyLongObject *a, - int op, /* '&', '|', '^' */ + char op, /* '&', '|', '^' */ PyLongObject *b) { int nega, negb, negz; -- cgit v1.2.1 From ddb0902d81cd45f66e8e8976cb426d33b3650f8d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 3 Jan 2013 03:18:09 +0100 Subject: Close #16281: handle tailmatch() failure and remove useless comment "honor direction and do a forward or backwards search": the runtime speed may be different, but I consider that it doesn't really matter in practice. The direction was never honored before: Python 2.7 uses memcmp() for the str type for example. --- Objects/unicodeobject.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1f602b7ea6..88010e674c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8935,7 +8935,7 @@ tailmatch(PyObject *self, if (PyUnicode_READY(self) == -1 || PyUnicode_READY(substring) == -1) - return 0; + return -1; if (PyUnicode_GET_LENGTH(substring) == 0) return 1; @@ -8973,7 +8973,6 @@ tailmatch(PyObject *self, /* We do not need to compare 0 and len(substring)-1 because the if statement above ensured already that they are equal when we end up here. */ - /* TODO: honor direction and do a forward or backwards search */ for (i = 1; i < end_sub; ++i) { if (PyUnicode_READ(kind_self, data_self, offset + i) != PyUnicode_READ(kind_sub, data_sub, i)) @@ -12597,6 +12596,8 @@ unicode_startswith(PyObject *self, return NULL; result = tailmatch(self, substring, start, end, -1); Py_DECREF(substring); + if (result == -1) + return NULL; if (result) { Py_RETURN_TRUE; } @@ -12613,6 +12614,8 @@ unicode_startswith(PyObject *self, } result = tailmatch(self, substring, start, end, -1); Py_DECREF(substring); + if (result == -1) + return NULL; return PyBool_FromLong(result); } @@ -12646,6 +12649,8 @@ unicode_endswith(PyObject *self, return NULL; result = tailmatch(self, substring, start, end, +1); Py_DECREF(substring); + if (result == -1) + return NULL; if (result) { Py_RETURN_TRUE; } @@ -12660,6 +12665,8 @@ unicode_endswith(PyObject *self, return NULL; } result = tailmatch(self, substring, start, end, +1); + if (result == -1) + return NULL; Py_DECREF(substring); return PyBool_FromLong(result); } -- cgit v1.2.1 From 2a673ba111763894b66df3feb811031c5ee215ce Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Mon, 7 Jan 2013 21:24:18 +0100 Subject: Add a comment about *not* caching the hash value. Issue #9685 suggested to memorize the hash value, but the feature request was rejected because no speed ups were found. --- Objects/tupleobject.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Objects') diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index ec3f91b2c6..0a95909275 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -322,6 +322,9 @@ error: 1082527, 1165049, 1082531, 1165057, 1247581, 1330103, 1082533, 1330111, 1412633, 1165069, 1247599, 1495177, 1577699 + + Tests have shown that it's not worth to cache the hash value, see + issue #9685. */ static Py_hash_t -- cgit v1.2.1 From e22777ea0898484ffb7c3fb87eedaaeef981206a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 15 Jan 2013 01:12:17 +0200 Subject: Issue #15989: Fix several occurrences of integer overflow when result of PyLong_AsLong() narrowed to int without checks. --- Objects/fileobject.c | 4 ++-- Objects/longobject.c | 18 ++++++++++++++++++ Objects/unicodeobject.c | 4 ++-- 3 files changed, 22 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/fileobject.c b/Objects/fileobject.c index e1c47ce372..3a31314086 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -200,7 +200,7 @@ PyObject_AsFileDescriptor(PyObject *o) _Py_IDENTIFIER(fileno); if (PyLong_Check(o)) { - fd = PyLong_AsLong(o); + fd = _PyLong_AsInt(o); } else if ((meth = _PyObject_GetAttrId(o, &PyId_fileno)) != NULL) { @@ -210,7 +210,7 @@ PyObject_AsFileDescriptor(PyObject *o) return -1; if (PyLong_Check(fno)) { - fd = PyLong_AsLong(fno); + fd = _PyLong_AsInt(fno); Py_DECREF(fno); } else { diff --git a/Objects/longobject.c b/Objects/longobject.c index 5a50f24330..1a82b1c67a 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -434,6 +434,24 @@ PyLong_AsLong(PyObject *obj) return result; } +/* Get a C int from a long int object or any object that has an __int__ + method. Return -1 and set an error if overflow occurs. */ + +int +_PyLong_AsInt(PyObject *obj) +{ + int overflow; + long result = PyLong_AsLongAndOverflow(obj, &overflow); + if (overflow || result > INT_MAX || result < INT_MIN) { + /* XXX: could be cute and give a different + message for overflow == -1 */ + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C int"); + return -1; + } + return (int)result; +} + /* Get a Py_ssize_t from a long int object. Returns -1 and sets an error condition if overflow occurs. */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6a12d71f87..65393d2efa 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13521,7 +13521,7 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, "* wants int"); return -1; } - arg->width = PyLong_AsLong(v); + arg->width = PyLong_AsSsize_t(v); if (arg->width == -1 && PyErr_Occurred()) return -1; if (arg->width < 0) { @@ -13568,7 +13568,7 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, "* wants int"); return -1; } - arg->prec = PyLong_AsLong(v); + arg->prec = _PyLong_AsInt(v); if (arg->prec == -1 && PyErr_Occurred()) return -1; if (arg->prec < 0) -- cgit v1.2.1 From 22808191cb429b98a9271ac433693a2ff7c35461 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sun, 27 Jan 2013 10:17:52 +0000 Subject: Issue #16772: in int(x, base), non-integer bases must have an __index__ method. --- Objects/longobject.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'Objects') diff --git a/Objects/longobject.c b/Objects/longobject.c index 1a82b1c67a..bec0a78008 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -4283,11 +4283,6 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } if (obase == NULL) return PyNumber_Long(x); - if (!PyLong_Check(obase)) { - PyErr_SetString(PyExc_TypeError, - "int() base must be an integer."); - return NULL; - } base = PyNumber_AsSsize_t(obase, NULL); if (base == -1 && PyErr_Occurred()) -- cgit v1.2.1 From 86ade39ddcd76d90461a241fe0a07e81fa2bd8c9 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Sat, 16 Feb 2013 16:32:39 -0700 Subject: Issue #15022: Add pickle and comparison support to types.SimpleNamespace. --- Objects/namespaceobject.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'Objects') diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c index ff278d3347..f9a6f6549d 100644 --- a/Objects/namespaceobject.c +++ b/Objects/namespaceobject.c @@ -66,16 +66,20 @@ namespace_dealloc(_PyNamespaceObject *ns) static PyObject * -namespace_repr(_PyNamespaceObject *ns) +namespace_repr(PyObject *ns) { int i, loop_error = 0; PyObject *pairs = NULL, *d = NULL, *keys = NULL, *keys_iter = NULL; PyObject *key; PyObject *separator, *pairsrepr, *repr = NULL; + const char * name; - i = Py_ReprEnter((PyObject *)ns); + name = (Py_TYPE(ns) == &_PyNamespace_Type) ? "namespace" + : ns->ob_type->tp_name; + + i = Py_ReprEnter(ns); if (i != 0) { - return i > 0 ? PyUnicode_FromString("namespace(...)") : NULL; + return i > 0 ? PyUnicode_FromFormat("%s(...)", name) : NULL; } pairs = PyList_New(0); @@ -127,8 +131,7 @@ namespace_repr(_PyNamespaceObject *ns) if (pairsrepr == NULL) goto error; - repr = PyUnicode_FromFormat("%s(%S)", - ((PyObject *)ns)->ob_type->tp_name, pairsrepr); + repr = PyUnicode_FromFormat("%s(%S)", name, pairsrepr); Py_DECREF(pairsrepr); error: @@ -136,7 +139,7 @@ error: Py_XDECREF(d); Py_XDECREF(keys); Py_XDECREF(keys_iter); - Py_ReprLeave((PyObject *)ns); + Py_ReprLeave(ns); return repr; } @@ -158,14 +161,26 @@ namespace_clear(_PyNamespaceObject *ns) } +static PyObject * +namespace_richcompare(PyObject *self, PyObject *other, int op) +{ + if (PyObject_IsInstance(self, (PyObject *)&_PyNamespace_Type) && + PyObject_IsInstance(other, (PyObject *)&_PyNamespace_Type)) + return PyObject_RichCompare(((_PyNamespaceObject *)self)->ns_dict, + ((_PyNamespaceObject *)other)->ns_dict, op); + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; +} + + PyDoc_STRVAR(namespace_doc, "A simple attribute-based namespace.\n\ \n\ -namespace(**kwargs)"); +SimpleNamespace(**kwargs)"); PyTypeObject _PyNamespace_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - "namespace", /* tp_name */ + "types.SimpleNamespace", /* tp_name */ sizeof(_PyNamespaceObject), /* tp_size */ 0, /* tp_itemsize */ (destructor)namespace_dealloc, /* tp_dealloc */ @@ -188,7 +203,7 @@ PyTypeObject _PyNamespace_Type = { namespace_doc, /* tp_doc */ (traverseproc)namespace_traverse, /* tp_traverse */ (inquiry)namespace_clear, /* tp_clear */ - 0, /* tp_richcompare */ + namespace_richcompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ -- cgit v1.2.1 From 856df14257060474788c2a54c130e4f1107e1990 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Sat, 16 Feb 2013 18:20:32 -0700 Subject: Issue #15022: Ensure all pickle protocols are supported. --- Objects/namespaceobject.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c index f9a6f6549d..7e9107a744 100644 --- a/Objects/namespaceobject.c +++ b/Objects/namespaceobject.c @@ -173,6 +173,29 @@ namespace_richcompare(PyObject *self, PyObject *other, int op) } +PyDoc_STRVAR(namespace_reduce__doc__, "Return state information for pickling"); + +static PyObject * +namespace_reduce(register _PyNamespaceObject *ns) +{ + PyObject *result, *args = PyTuple_New(0); + + if (!args) + return NULL; + + result = PyTuple_Pack(3, (PyObject *)Py_TYPE(ns), args, ns->ns_dict); + Py_DECREF(args); + return result; +} + + +static PyMethodDef namespace_methods[] = { + {"__reduce__", (PyCFunction)namespace_reduce, METH_NOARGS, + namespace_reduce__doc__}, + {NULL, NULL} /* sentinel */ +}; + + PyDoc_STRVAR(namespace_doc, "A simple attribute-based namespace.\n\ \n\ @@ -207,7 +230,7 @@ PyTypeObject _PyNamespace_Type = { 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + namespace_methods, /* tp_methods */ namespace_members, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ -- cgit v1.2.1 From 113b2013a20fe73b57a735068e72bfcbc08051e3 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 6 Mar 2013 00:41:50 +0100 Subject: Issue #17223: Fix PyUnicode_FromUnicode() on Windows (16-bit wchar_t type) to reject invalid UTF-16 surrogate. --- Objects/unicodeobject.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2175655039..00a6a36fcd 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1384,13 +1384,18 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, for (iter = begin; iter < end; ) { #if SIZEOF_WCHAR_T == 2 - if (Py_UNICODE_IS_HIGH_SURROGATE(iter[0]) - && (iter+1) < end - && Py_UNICODE_IS_LOW_SURROGATE(iter[1])) - { - ch = Py_UNICODE_JOIN_SURROGATES(iter[0], iter[1]); - ++(*num_surrogates); - iter += 2; + if (Py_UNICODE_IS_HIGH_SURROGATE(iter[0])) { + if ((iter+1) < end + && Py_UNICODE_IS_LOW_SURROGATE(iter[1])) + { + ch = Py_UNICODE_JOIN_SURROGATES(iter[0], iter[1]); + ++(*num_surrogates); + iter += 2; + } + else { + PyErr_SetString(PyExc_ValueError, "illegal UTF-16 surrogate"); + return -1; + } } else #endif -- cgit v1.2.1 From 01f0e6977a4b72ad5836b937740d50828498cd3e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 6 Mar 2013 01:09:24 +0100 Subject: Backed out changeset b9f7b1bf36aa --- Objects/unicodeobject.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 00a6a36fcd..2175655039 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1384,18 +1384,13 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, for (iter = begin; iter < end; ) { #if SIZEOF_WCHAR_T == 2 - if (Py_UNICODE_IS_HIGH_SURROGATE(iter[0])) { - if ((iter+1) < end - && Py_UNICODE_IS_LOW_SURROGATE(iter[1])) - { - ch = Py_UNICODE_JOIN_SURROGATES(iter[0], iter[1]); - ++(*num_surrogates); - iter += 2; - } - else { - PyErr_SetString(PyExc_ValueError, "illegal UTF-16 surrogate"); - return -1; - } + if (Py_UNICODE_IS_HIGH_SURROGATE(iter[0]) + && (iter+1) < end + && Py_UNICODE_IS_LOW_SURROGATE(iter[1])) + { + ch = Py_UNICODE_JOIN_SURROGATES(iter[0], iter[1]); + ++(*num_surrogates); + iter += 2; } else #endif -- cgit v1.2.1 From 9f8d50845e19d1cc735e305179f86494751f694f Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Thu, 7 Mar 2013 22:16:29 -0500 Subject: Add PyDict_SetDefault. (closes #17327) Patch by Stefan Behnel and I. --- Objects/dictobject.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 9080ddfd28..2e8679fba4 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2211,19 +2211,19 @@ dict_get(register PyDictObject *mp, PyObject *args) return val; } -static PyObject * -dict_setdefault(register PyDictObject *mp, PyObject *args) +PyObject * +PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) { - PyObject *key; - PyObject *failobj = Py_None; + PyDictObject *mp = (PyDictObject *)d; PyObject *val = NULL; Py_hash_t hash; PyDictKeyEntry *ep; PyObject **value_addr; - if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj)) + if (!PyDict_Check(d)) { + PyErr_BadInternalCall(); return NULL; - + } if (!PyUnicode_CheckExact(key) || (hash = ((PyASCIIObject *) key)->hash) == -1) { hash = PyObject_Hash(key); @@ -2241,20 +2241,32 @@ dict_setdefault(register PyDictObject *mp, PyObject *args) return NULL; ep = find_empty_slot(mp, key, hash, &value_addr); } - Py_INCREF(failobj); + Py_INCREF(defaultobj); Py_INCREF(key); - MAINTAIN_TRACKING(mp, key, failobj); + MAINTAIN_TRACKING(mp, key, defaultobj); ep->me_key = key; ep->me_hash = hash; - *value_addr = failobj; - val = failobj; + *value_addr = defaultobj; + val = defaultobj; mp->ma_keys->dk_usable--; mp->ma_used++; } - Py_INCREF(val); return val; } +static PyObject * +dict_setdefault(PyDictObject *mp, PyObject *args) +{ + PyObject *key, *val; + PyObject *defaultobj = Py_None; + + if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &defaultobj)) + return NULL; + + val = PyDict_SetDefault(mp, key, defaultobj); + Py_XINCREF(val); + return val; +} static PyObject * dict_clear(register PyDictObject *mp) -- cgit v1.2.1 From 5ee523d92580624f7cdffb4c00ae44273448ce85 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Fri, 8 Mar 2013 08:36:49 -0500 Subject: fix warning (closes #17327) --- Objects/dictobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 2e8679fba4..208888db0e 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2263,7 +2263,7 @@ dict_setdefault(PyDictObject *mp, PyObject *args) if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &defaultobj)) return NULL; - val = PyDict_SetDefault(mp, key, defaultobj); + val = PyDict_SetDefault((PyObject *)mp, key, defaultobj); Py_XINCREF(val); return val; } -- cgit v1.2.1 From 04cf5cdefa8ef24285755af659c58bfd24025555 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sun, 31 Mar 2013 22:48:04 +0200 Subject: Issue #17591: Use lowercase filenames when including Windows header files. Patch by Roumen Petrov. --- Objects/exceptions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 6b04700621..79bbb8f2ff 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2327,7 +2327,7 @@ PyObject *PyExc_RecursionErrorInst = NULL; } #ifdef MS_WINDOWS -#include +#include /* The following constants were added to errno.h in VS2010 but have preferred WSA equivalents. */ #undef EADDRINUSE -- cgit v1.2.1 From 90adc0096826499bdf6cfbc339603feee8e65239 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 3 Apr 2013 01:48:39 +0200 Subject: Add _PyUnicodeWriter_WriteSubstring() function Write a function to enable more optimizations: * If the substring is the whole string and overallocation is disabled, just keep a reference to the string, don't copy characters * Avoid a call to the expensive _PyUnicode_FindMaxChar() function when possible --- Objects/stringlib/unicode_format.h | 18 +++++--------- Objects/unicodeobject.c | 48 +++++++++++++++++++++++++++++++------- 2 files changed, 45 insertions(+), 21 deletions(-) (limited to 'Objects') diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index e9be516318..2f58946ec3 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -869,25 +869,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, SubString literal; SubString field_name; SubString format_spec; - Py_UCS4 conversion, maxchar; - Py_ssize_t sublen; - int err; + Py_UCS4 conversion; MarkupIterator_init(&iter, input->str, input->start, input->end); while ((result = MarkupIterator_next(&iter, &literal, &field_present, &field_name, &format_spec, &conversion, &format_spec_needs_expanding)) == 2) { - sublen = literal.end - literal.start; - if (sublen) { - maxchar = _PyUnicode_FindMaxChar(literal.str, - literal.start, literal.end); - err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar); - if (err == -1) + if (literal.end != literal.start) { + if (!field_present && iter.str.start == iter.str.end) + writer->overallocate = 0; + if (_PyUnicodeWriter_WriteSubstring(writer, literal.str, + literal.start, literal.end) < 0) return 0; - _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, - literal.str, literal.start, sublen); - writer->pos += sublen; } if (field_present) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 23e57f03fb..a926e371b1 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12790,6 +12790,41 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) return 0; } +int +_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str, + Py_ssize_t start, Py_ssize_t end) +{ + Py_UCS4 maxchar; + Py_ssize_t len; + + if (PyUnicode_READY(str) == -1) + return -1; + + assert(0 <= start); + assert(end <= PyUnicode_GET_LENGTH(str)); + assert(start <= end); + + if (end == 0) + return 0; + + if (start == 0 && end == PyUnicode_GET_LENGTH(str)) + return _PyUnicodeWriter_WriteStr(writer, str); + + if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar) + maxchar = _PyUnicode_FindMaxChar(str, start, end); + else + maxchar = writer->maxchar; + len = end - start; + + if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0) + return -1; + + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + str, start, len); + writer->pos += len; + return 0; +} + int _PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len) { @@ -13963,7 +13998,7 @@ PyUnicode_Format(PyObject *format, PyObject *args) while (--ctx.fmtcnt >= 0) { if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') { - Py_ssize_t nonfmtpos, sublen; + Py_ssize_t nonfmtpos; Py_UCS4 maxchar; nonfmtpos = ctx.fmtpos++; @@ -13976,15 +14011,10 @@ PyUnicode_Format(PyObject *format, PyObject *args) ctx.fmtpos--; ctx.writer.overallocate = 0; } - sublen = ctx.fmtpos - nonfmtpos; - maxchar = _PyUnicode_FindMaxChar(ctx.fmtstr, - nonfmtpos, nonfmtpos + sublen); - if (_PyUnicodeWriter_Prepare(&ctx.writer, sublen, maxchar) == -1) - goto onError; - _PyUnicode_FastCopyCharacters(ctx.writer.buffer, ctx.writer.pos, - ctx.fmtstr, nonfmtpos, sublen); - ctx.writer.pos += sublen; + if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr, + nonfmtpos, ctx.fmtpos) < 0) + goto onError; } else { ctx.fmtpos++; -- cgit v1.2.1 From 4a7596083685d526076d006536188229dd1545d1 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 3 Apr 2013 02:02:33 +0200 Subject: Close #16757: Avoid calling the expensive _PyUnicode_FindMaxChar() function when possible --- Objects/unicodeobject.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a926e371b1..dee2953017 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13777,7 +13777,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx, Py_ssize_t pindex; Py_UCS4 signchar; Py_ssize_t buflen; - Py_UCS4 maxchar, bufmaxchar; + Py_UCS4 maxchar; Py_ssize_t sublen; _PyUnicodeWriter *writer = &ctx->writer; Py_UCS4 fill; @@ -13830,23 +13830,26 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx, arg->width = len; /* Prepare the writer */ - bufmaxchar = 127; + maxchar = writer->maxchar; if (!(arg->flags & F_LJUST)) { if (arg->sign) { if ((arg->width-1) > len) - bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); + maxchar = MAX_MAXCHAR(maxchar, fill); } else { if (arg->width > len) - bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); + maxchar = MAX_MAXCHAR(maxchar, fill); } } - maxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len); - bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar); + if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) { + Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len); + maxchar = MAX_MAXCHAR(maxchar, strmaxchar); + } + buflen = arg->width; if (arg->sign && len == arg->width) buflen++; - if (_PyUnicodeWriter_Prepare(writer, buflen, bufmaxchar) == -1) + if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1) return -1; /* Write the sign if needed */ -- cgit v1.2.1 From e6117be6b95f84f02e5871499552ddbd3a4f0cd1 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 3 Apr 2013 03:14:58 +0200 Subject: fix unused variable --- Objects/unicodeobject.c | 1 - 1 file changed, 1 deletion(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index dee2953017..eb12655111 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14002,7 +14002,6 @@ PyUnicode_Format(PyObject *format, PyObject *args) while (--ctx.fmtcnt >= 0) { if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') { Py_ssize_t nonfmtpos; - Py_UCS4 maxchar; nonfmtpos = ctx.fmtpos++; while (ctx.fmtcnt >= 0 && -- cgit v1.2.1 From 6cbac942ddb9e78572c96b41ddbeb3a5733fe633 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 6 Apr 2013 01:15:30 +0200 Subject: Issue #17469: Fix _Py_GetAllocatedBlocks() and sys.getallocatedblocks() when running on valgrind. --- Objects/obmalloc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 5943f5a6c3..3028f225ae 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -778,6 +778,8 @@ PyObject_Malloc(size_t nbytes) poolp next; uint size; + _Py_AllocatedBlocks++; + #ifdef WITH_VALGRIND if (UNLIKELY(running_on_valgrind == -1)) running_on_valgrind = RUNNING_ON_VALGRIND; @@ -791,10 +793,10 @@ PyObject_Malloc(size_t nbytes) * things without checking for overflows or negatives. * As size_t is unsigned, checking for nbytes < 0 is not required. */ - if (nbytes > PY_SSIZE_T_MAX) + if (nbytes > PY_SSIZE_T_MAX) { + _Py_AllocatedBlocks--; return NULL; - - _Py_AllocatedBlocks++; + } /* * This implicitly redirects malloc(0). -- cgit v1.2.1 From b08e1a1323f65098098cf5d2b527328274b66e58 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 8 Apr 2013 00:26:43 +0200 Subject: Close #13126: "Simplify" FASTSEARCH() code to help the compiler to emit more efficient machine code. Patch written by Antoine Pitrou. Without this change, str.find() was 10% slower than str.rfind() in the worst case. --- Objects/stringlib/fastsearch.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 55ac77dd70..cd7cac40fa 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -142,6 +142,8 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, mask = 0; if (mode != FAST_RSEARCH) { + const STRINGLIB_CHAR *ss = s + m - 1; + const STRINGLIB_CHAR *pp = p + m - 1; /* create compressed boyer-moore delta 1 table */ @@ -156,7 +158,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, for (i = 0; i <= w; i++) { /* note: using mlast in the skip path slows things down on x86 */ - if (s[i+m-1] == p[m-1]) { + if (ss[i] == pp[0]) { /* candidate match */ for (j = 0; j < mlast; j++) if (s[i+j] != p[j]) @@ -172,13 +174,13 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, continue; } /* miss: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, s[i+m])) + if (!STRINGLIB_BLOOM(mask, ss[i+1])) i = i + m; else i = i + skip; } else { /* skip: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, s[i+m])) + if (!STRINGLIB_BLOOM(mask, ss[i+1])) i = i + m; } } -- cgit v1.2.1 From 4c56cc2d3334c701ad3f7ee4526f19372704901f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 8 Apr 2013 21:50:54 +0200 Subject: Issue #17615: Expand expensive PyUnicode_READ() macro in unicode_compare(): write specialized functions for each combination of Unicode kinds. --- Objects/unicodeobject.c | 94 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 77 insertions(+), 17 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index eb12655111..d450b4df50 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10241,10 +10241,24 @@ unicode_center(PyObject *self, PyObject *args) static int unicode_compare(PyObject *str1, PyObject *str2) { +#define COMPARE(TYPE1, TYPE2) \ + do { \ + TYPE1* p1 = (TYPE1 *)data1; \ + TYPE2* p2 = (TYPE2 *)data2; \ + TYPE1* end = p1 + len; \ + Py_UCS4 c1, c2; \ + for (; p1 != end; p1++, p2++) { \ + c1 = *p1; \ + c2 = *p2; \ + if (c1 != c2) \ + return (c1 < c2) ? -1 : 1; \ + } \ + } \ + while (0) + int kind1, kind2; void *data1, *data2; - Py_ssize_t len1, len2; - Py_ssize_t i, len; + Py_ssize_t len1, len2, len; /* a string is equal to itself */ if (str1 == str2) @@ -10258,23 +10272,67 @@ unicode_compare(PyObject *str1, PyObject *str2) len2 = PyUnicode_GET_LENGTH(str2); len = Py_MIN(len1, len2); - if (kind1 == 1 && kind2 == 1) { - int cmp = memcmp(data1, data2, len); - /* normalize result of memcmp() into the range [-1; 1] */ - if (cmp < 0) - return -1; - if (cmp > 0) - return 1; + switch(kind1) { + case PyUnicode_1BYTE_KIND: + { + switch(kind2) { + case PyUnicode_1BYTE_KIND: + { + int cmp = memcmp(data1, data2, len); + /* normalize result of memcmp() into the range [-1; 1] */ + if (cmp < 0) + return -1; + if (cmp > 0) + return 1; + break; + } + case PyUnicode_2BYTE_KIND: + COMPARE(Py_UCS1, Py_UCS2); + break; + case PyUnicode_4BYTE_KIND: + COMPARE(Py_UCS1, Py_UCS4); + break; + default: + assert(0); + } + break; } - else { - for (i = 0; i < len; ++i) { - Py_UCS4 c1, c2; - c1 = PyUnicode_READ(kind1, data1, i); - c2 = PyUnicode_READ(kind2, data2, i); - - if (c1 != c2) - return (c1 < c2) ? -1 : 1; + case PyUnicode_2BYTE_KIND: + { + switch(kind2) { + case PyUnicode_1BYTE_KIND: + COMPARE(Py_UCS2, Py_UCS1); + break; + case PyUnicode_2BYTE_KIND: + COMPARE(Py_UCS2, Py_UCS2); + break; + case PyUnicode_4BYTE_KIND: + COMPARE(Py_UCS2, Py_UCS4); + break; + default: + assert(0); + } + break; + } + case PyUnicode_4BYTE_KIND: + { + switch(kind2) { + case PyUnicode_1BYTE_KIND: + COMPARE(Py_UCS4, Py_UCS1); + break; + case PyUnicode_2BYTE_KIND: + COMPARE(Py_UCS4, Py_UCS2); + break; + case PyUnicode_4BYTE_KIND: + COMPARE(Py_UCS4, Py_UCS4); + break; + default: + assert(0); } + break; + } + default: + assert(0); } if (len1 == len2) @@ -10283,6 +10341,8 @@ unicode_compare(PyObject *str1, PyObject *str2) return -1; else return 1; + +#undef COMPARE } static int -- cgit v1.2.1 From 53f1420ba040714b0fe870d7040990a2179c7ef3 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 8 Apr 2013 22:43:44 +0200 Subject: Issue #17615: Comparing two Unicode strings now uses wmemcmp() when possible wmemcmp() is twice faster than a dummy loop (342 usec vs 744 usec) on Fedora 18/x86_64, GCC 4.7.2. --- Objects/unicodeobject.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d450b4df50..e9153c0de8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10304,8 +10304,19 @@ unicode_compare(PyObject *str1, PyObject *str2) COMPARE(Py_UCS2, Py_UCS1); break; case PyUnicode_2BYTE_KIND: + { +#if defined(HAVE_WMEMCMP) && SIZEOF_WCHAR_T == 2 + int cmp = wmemcmp((wchar_t *)data1, (wchar_t *)data2, len); + /* normalize result of wmemcmp() into the range [-1; 1] */ + if (cmp < 0) + return -1; + if (cmp > 0) + return 1; +#else COMPARE(Py_UCS2, Py_UCS2); +#endif break; + } case PyUnicode_4BYTE_KIND: COMPARE(Py_UCS2, Py_UCS4); break; @@ -10324,8 +10335,19 @@ unicode_compare(PyObject *str1, PyObject *str2) COMPARE(Py_UCS4, Py_UCS2); break; case PyUnicode_4BYTE_KIND: + { +#if defined(HAVE_WMEMCMP) && SIZEOF_WCHAR_T == 4 + int cmp = wmemcmp((wchar_t *)data1, (wchar_t *)data2, len); + /* normalize result of wmemcmp() into the range [-1; 1] */ + if (cmp < 0) + return -1; + if (cmp > 0) + return 1; +#else COMPARE(Py_UCS4, Py_UCS4); +#endif break; + } default: assert(0); } -- cgit v1.2.1 From 3348eb75c8b8ff2f361ef42a743abb0365b7b2cb Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 9 Apr 2013 21:53:09 +0200 Subject: Add fast-path in PyUnicode_DecodeCharmap() for pure 8 bit encodings: cp037, cp500 and iso8859_1 codecs --- Objects/unicodeobject.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e9153c0de8..88729c8bc0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7281,6 +7281,7 @@ PyUnicode_DecodeCharmap(const char *s, enum PyUnicode_Kind mapkind; void *mapdata; Py_UCS4 x; + unsigned char ch; if (PyUnicode_READY(mapping) == -1) return NULL; @@ -7288,8 +7289,32 @@ PyUnicode_DecodeCharmap(const char *s, maplen = PyUnicode_GET_LENGTH(mapping); mapdata = PyUnicode_DATA(mapping); mapkind = PyUnicode_KIND(mapping); + + if (mapkind == PyUnicode_1BYTE_KIND && maplen >= 256) { + /* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1 + * is disabled in encoding aliases, latin1 is preferred because + * its implementation is faster. */ + Py_UCS1 *mapdata_ucs1 = (Py_UCS1 *)mapdata; + Py_UCS1 *outdata = (Py_UCS1 *)writer.data; + Py_UCS4 maxchar = writer.maxchar; + + assert (writer.kind == PyUnicode_1BYTE_KIND); + while (s < e) { + ch = *s; + x = mapdata_ucs1[ch]; + if (x > maxchar) { + if (_PyUnicodeWriter_PrepareInternal(&writer, 1, 0xff) == -1) + goto onError; + maxchar = writer.maxchar; + outdata = (Py_UCS1 *)writer.data; + } + outdata[writer.pos] = x; + writer.pos++; + ++s; + } + } + while (s < e) { - unsigned char ch; if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) { enum PyUnicode_Kind outkind = writer.kind; void *outdata = writer.data; -- cgit v1.2.1 From cc710374afbaa3a0ca9aa79bd285bdc8dd9238e3 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 9 Apr 2013 21:48:24 +0200 Subject: Use PyUnicode_READ() instead of PyUnicode_READ_CHAR() "PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it calls PyUnicode_KIND() and might call it twice." according to its documentation. --- Objects/unicodeobject.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 88729c8bc0..fde153e2de 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -468,7 +468,9 @@ unicode_result_ready(PyObject *unicode) } if (length == 1) { - Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0); + void *data = PyUnicode_DATA(unicode); + int kind = PyUnicode_KIND(unicode); + Py_UCS4 ch = PyUnicode_READ(kind, data, 0); if (ch < 256) { PyObject *latin1_char = unicode_latin1[ch]; if (latin1_char != NULL) { @@ -2786,6 +2788,9 @@ PyObject * PyUnicode_FromOrdinal(int ordinal) { PyObject *v; + void *data; + int kind; + if (ordinal < 0 || ordinal > MAX_UNICODE) { PyErr_SetString(PyExc_ValueError, "chr() arg not in range(0x110000)"); @@ -2798,7 +2803,9 @@ PyUnicode_FromOrdinal(int ordinal) v = PyUnicode_New(1, ordinal); if (v == NULL) return NULL; - PyUnicode_WRITE(PyUnicode_KIND(v), PyUnicode_DATA(v), 0, ordinal); + kind = PyUnicode_KIND(v); + data = PyUnicode_DATA(v); + PyUnicode_WRITE(kind, data, 0, ordinal); assert(_PyUnicode_CheckConsistency(v, 1)); return v; } @@ -3840,6 +3847,9 @@ PyUnicode_GetLength(PyObject *unicode) Py_UCS4 PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index) { + void *data; + int kind; + if (!PyUnicode_Check(unicode) || PyUnicode_READY(unicode) == -1) { PyErr_BadArgument(); return (Py_UCS4)-1; @@ -3848,7 +3858,9 @@ PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index) PyErr_SetString(PyExc_IndexError, "string index out of range"); return (Py_UCS4)-1; } - return PyUnicode_READ_CHAR(unicode, index); + data = PyUnicode_DATA(unicode); + kind = PyUnicode_KIND(unicode); + return PyUnicode_READ(kind, data, index); } int @@ -7984,10 +7996,14 @@ _PyUnicode_EncodeCharmap(PyObject *unicode, * -1=not initialized, 0=unknown, 1=strict, 2=replace, * 3=ignore, 4=xmlcharrefreplace */ int known_errorHandler = -1; + void *data; + int kind; if (PyUnicode_READY(unicode) == -1) return NULL; size = PyUnicode_GET_LENGTH(unicode); + data = PyUnicode_DATA(unicode); + kind = PyUnicode_KIND(unicode); /* Default to Latin-1 */ if (mapping == NULL) @@ -8002,7 +8018,7 @@ _PyUnicode_EncodeCharmap(PyObject *unicode, return res; while (inpos Date: Tue, 9 Apr 2013 21:53:54 +0200 Subject: Optimize make_bloom_mask(), used by str.strip(), str.lstrip() and str.rstrip() Write specialized functions per Unicode kind to avoid the expensive PyUnicode_READ() macro. --- Objects/unicodeobject.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index fde153e2de..e0b507f3f0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -543,7 +543,6 @@ static OSVERSIONINFOEX winver; static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0; -#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1))))) #define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1))))) #define BLOOM_LINEBREAK(ch) \ @@ -553,16 +552,39 @@ static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0; Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(int kind, void* ptr, Py_ssize_t len) { +#define BLOOM_UPDATE(TYPE, MASK, PTR, LEN) \ + do { \ + TYPE *data = (TYPE *)PTR; \ + TYPE *end = data + LEN; \ + Py_UCS4 ch; \ + for (; data != end; data++) { \ + ch = *data; \ + MASK |= (1UL << (ch & (BLOOM_WIDTH - 1))); \ + } \ + break; \ + } while (0) + /* calculate simple bloom-style bitmask for a given unicode string */ BLOOM_MASK mask; - Py_ssize_t i; mask = 0; - for (i = 0; i < len; i++) - BLOOM_ADD(mask, PyUnicode_READ(kind, ptr, i)); - + switch (kind) { + case PyUnicode_1BYTE_KIND: + BLOOM_UPDATE(Py_UCS1, mask, ptr, len); + break; + case PyUnicode_2BYTE_KIND: + BLOOM_UPDATE(Py_UCS2, mask, ptr, len); + break; + case PyUnicode_4BYTE_KIND: + BLOOM_UPDATE(Py_UCS4, mask, ptr, len); + break; + default: + assert(0); + } return mask; + +#undef BLOOM_UPDATE } #define BLOOM_MEMBER(mask, chr, str) \ -- cgit v1.2.1 From bd0538157430822f42c308beb54550fba2d15d3b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 9 Apr 2013 22:13:33 +0200 Subject: Optimize PyUnicode_DecodeCharmap() Avoid expensive PyUnicode_READ() and PyUnicode_WRITE(), manipulate pointers instead. --- Objects/unicodeobject.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e0b507f3f0..bf49ce5962 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7351,27 +7351,29 @@ PyUnicode_DecodeCharmap(const char *s, while (s < e) { if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) { enum PyUnicode_Kind outkind = writer.kind; - void *outdata = writer.data; + Py_UCS2 *mapdata_ucs2 = (Py_UCS2 *)mapdata; if (outkind == PyUnicode_1BYTE_KIND) { + Py_UCS1 *outdata = (Py_UCS1 *)writer.data; Py_UCS4 maxchar = writer.maxchar; while (s < e) { - unsigned char ch = *s; - x = PyUnicode_READ(PyUnicode_2BYTE_KIND, mapdata, ch); + ch = *s; + x = mapdata_ucs2[ch]; if (x > maxchar) goto Error; - PyUnicode_WRITE(PyUnicode_1BYTE_KIND, outdata, writer.pos, x); + outdata[writer.pos] = x; writer.pos++; ++s; } break; } else if (outkind == PyUnicode_2BYTE_KIND) { + Py_UCS2 *outdata = (Py_UCS2 *)writer.data; while (s < e) { - unsigned char ch = *s; - x = PyUnicode_READ(PyUnicode_2BYTE_KIND, mapdata, ch); + ch = *s; + x = mapdata_ucs2[ch]; if (x == 0xFFFE) goto Error; - PyUnicode_WRITE(PyUnicode_2BYTE_KIND, outdata, writer.pos, x); + outdata[writer.pos] = x; writer.pos++; ++s; } -- cgit v1.2.1 From 606ecb154b5a8ba6ce346d80cc1259f9ee22ee68 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 9 Apr 2013 22:19:21 +0200 Subject: Fix _PyUnicode_XStrip() Inline the BLOOM_MEMBER() to only call PyUnicode_READ() only once (per loop iteration). Store also the length of the seperator in a variable to avoid calls to PyUnicode_GET_LENGTH(). --- Objects/unicodeobject.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index bf49ce5962..ba72dba3be 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -587,10 +587,6 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len) #undef BLOOM_UPDATE } -#define BLOOM_MEMBER(mask, chr, str) \ - (BLOOM(mask, chr) \ - && (PyUnicode_FindChar(str, chr, 0, PyUnicode_GET_LENGTH(str), 1) >= 0)) - /* Compilation of templated routines */ #include "stringlib/asciilib.h" @@ -11635,6 +11631,7 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj) int kind; Py_ssize_t i, j, len; BLOOM_MASK sepmask; + Py_ssize_t seplen; if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1) return NULL; @@ -11642,24 +11639,35 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj) kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); len = PyUnicode_GET_LENGTH(self); + seplen = PyUnicode_GET_LENGTH(sepobj); sepmask = make_bloom_mask(PyUnicode_KIND(sepobj), PyUnicode_DATA(sepobj), - PyUnicode_GET_LENGTH(sepobj)); + seplen); i = 0; if (striptype != RIGHTSTRIP) { - while (i < len && - BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, i), sepobj)) { + while (i < len) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (!BLOOM(sepmask, ch)) + break; + if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0) + break; i++; } } j = len; if (striptype != LEFTSTRIP) { - do { + j--; + while (j >= i) { + Py_UCS4 ch = PyUnicode_READ(kind, data, j); + if (!BLOOM(sepmask, ch)) + break; + if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0) + break; j--; - } while (j >= i && - BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, j), sepobj)); + } + j++; } -- cgit v1.2.1 From e896ac823f25edb4bd01c0baa66d53ce795300d8 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 9 Apr 2013 22:21:08 +0200 Subject: Fix do_strip(): don't call PyUnicode_READ() in Py_UNICODE_ISSPACE() to not call it twice --- Objects/unicodeobject.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ba72dba3be..52fe3bc55c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11727,16 +11727,23 @@ do_strip(PyObject *self, int striptype) i = 0; if (striptype != RIGHTSTRIP) { - while (i < len && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) { + while (i < len) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (!Py_UNICODE_ISSPACE(ch)) + break; i++; } } j = len; if (striptype != LEFTSTRIP) { - do { + j--; + while (j >= i) { + Py_UCS4 ch = PyUnicode_READ(kind, data, j); + if (!Py_UNICODE_ISSPACE(ch)) + break; j--; - } while (j >= i && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j))); + } j++; } -- cgit v1.2.1 From d22d53d9f23093c8fb71ae495db991d4d6c68562 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 9 Apr 2013 22:38:52 +0200 Subject: Don't calls macros in PyUnicode_WRITE() parameters PyUnicode_WRITE() expands some parameters twice or more. --- Objects/unicodeobject.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 52fe3bc55c..838d9de9fe 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1958,13 +1958,17 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size) assert(size > 0); if (size == 1) { Py_UCS4 ch = u[0]; + int kind; + void *data; if (ch < 256) return get_latin1_char((unsigned char)ch); res = PyUnicode_New(1, ch); if (res == NULL) return NULL; - PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch); + kind = PyUnicode_KIND(res); + data = PyUnicode_DATA(res); + PyUnicode_WRITE(kind, data, 0, ch); assert(_PyUnicode_CheckConsistency(res, 1)); return res; } @@ -1994,13 +1998,17 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size) assert(size > 0); if (size == 1) { Py_UCS4 ch = u[0]; + int kind; + void *data; if (ch < 256) return get_latin1_char((unsigned char)ch); res = PyUnicode_New(1, ch); if (res == NULL) return NULL; - PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch); + kind = PyUnicode_KIND(res); + data = PyUnicode_DATA(res); + PyUnicode_WRITE(kind, data, 0, ch); assert(_PyUnicode_CheckConsistency(res, 1)); return res; } -- cgit v1.2.1 From afd5696d9d1448216ec7a69efebd4c0f1f7cc240 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 9 Apr 2013 22:39:24 +0200 Subject: Write super-fast version of str.strip(), str.lstrip() and str.rstrip() for pure ASCII --- Objects/unicodeobject.c | 64 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 19 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 838d9de9fe..e348a46585 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11722,37 +11722,63 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) static PyObject * do_strip(PyObject *self, int striptype) { - int kind; - void *data; Py_ssize_t len, i, j; if (PyUnicode_READY(self) == -1) return NULL; - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); len = PyUnicode_GET_LENGTH(self); - i = 0; - if (striptype != RIGHTSTRIP) { - while (i < len) { - Py_UCS4 ch = PyUnicode_READ(kind, data, i); - if (!Py_UNICODE_ISSPACE(ch)) - break; - i++; + if (PyUnicode_IS_ASCII(self)) { + Py_UCS1 *data = PyUnicode_1BYTE_DATA(self); + + i = 0; + if (striptype != RIGHTSTRIP) { + while (i < len) { + Py_UCS4 ch = data[i]; + if (!_Py_ascii_whitespace[ch]) + break; + i++; + } + } + + j = len; + if (striptype != LEFTSTRIP) { + j--; + while (j >= i) { + Py_UCS4 ch = data[j]; + if (!_Py_ascii_whitespace[ch]) + break; + j--; + } + j++; } } + else { + int kind = PyUnicode_KIND(self); + void *data = PyUnicode_DATA(self); - j = len; - if (striptype != LEFTSTRIP) { - j--; - while (j >= i) { - Py_UCS4 ch = PyUnicode_READ(kind, data, j); - if (!Py_UNICODE_ISSPACE(ch)) - break; + i = 0; + if (striptype != RIGHTSTRIP) { + while (i < len) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (!Py_UNICODE_ISSPACE(ch)) + break; + i++; + } + } + + j = len; + if (striptype != LEFTSTRIP) { j--; + while (j >= i) { + Py_UCS4 ch = PyUnicode_READ(kind, data, j); + if (!Py_UNICODE_ISSPACE(ch)) + break; + j--; + } + j++; } - j++; } return PyUnicode_Substring(self, i, j); -- cgit v1.2.1 From e972506229fd5249e632d4b8dedc3b79cfbe7df8 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 9 Apr 2013 22:52:48 +0200 Subject: replace(): only call PyUnicode_DATA(u) once --- Objects/unicodeobject.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e348a46585..6b63157266 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9972,7 +9972,7 @@ replace(PyObject *self, PyObject *str1, Py_UCS4 u1, u2; int rkind; Py_ssize_t index, pos; - char *src; + char *src, *rbuf; u1 = PyUnicode_READ(kind1, buf1, 0); pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1); @@ -9984,8 +9984,9 @@ replace(PyObject *self, PyObject *str1, goto error; _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen); rkind = PyUnicode_KIND(u); + rbuf = PyUnicode_DATA(u); - PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2); + PyUnicode_WRITE(rkind, rbuf, pos, u2); index = 0; src = sbuf; while (--maxcount) @@ -9997,7 +9998,7 @@ replace(PyObject *self, PyObject *str1, pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1); if (pos < 0) break; - PyUnicode_WRITE(rkind, PyUnicode_DATA(u), index + pos, u2); + PyUnicode_WRITE(rkind, rbuf, index + pos, u2); } } else { -- cgit v1.2.1 From 921589b82d08e66fe5909efb94468eed470719fd Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 9 Apr 2013 23:53:26 +0200 Subject: Issue #17615: On Windows (VS2010), Performances of wmemcmp() to compare Unicode strings are not convincing. For UCS2 (16-bit wchar_t type), use a dummy loop instead of wmemcmp(). The dummy loop is as fast, or a little bit faster. wchar_t is only 16-bit long on Windows. wmemcmp() is still used for 32-bit wchar_t. --- Objects/unicodeobject.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6b63157266..162221ceb2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10375,16 +10375,7 @@ unicode_compare(PyObject *str1, PyObject *str2) break; case PyUnicode_2BYTE_KIND: { -#if defined(HAVE_WMEMCMP) && SIZEOF_WCHAR_T == 2 - int cmp = wmemcmp((wchar_t *)data1, (wchar_t *)data2, len); - /* normalize result of wmemcmp() into the range [-1; 1] */ - if (cmp < 0) - return -1; - if (cmp > 0) - return 1; -#else COMPARE(Py_UCS2, Py_UCS2); -#endif break; } case PyUnicode_4BYTE_KIND: -- cgit v1.2.1 From b09c0a9a4560a436818314c964993ec4c75fb355 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 11 Apr 2013 22:09:04 +0200 Subject: Close #17693: Rewrite CJK decoders to use the _PyUnicodeWriter API instead of the legacy Py_UNICODE API. Add also a new _PyUnicodeWriter_WriteChar() function. --- Objects/unicodeobject.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 162221ceb2..e52571db2e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12947,6 +12947,16 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, return 0; } +int +_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch) +{ + if (_PyUnicodeWriter_Prepare(writer, 1, ch) < 0) + return -1; + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch); + writer->pos++; + return 0; +} + int _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) { -- cgit v1.2.1 From 97feca95c34e9bb5137b977f84615e363eac4ed9 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sat, 13 Apr 2013 15:45:44 +0100 Subject: Issue #17643: Add __callback__ attribute to weakref.ref. --- Objects/weakrefobject.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index b49dcee1ad..c083f8fce5 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -338,6 +338,11 @@ weakref___init__(PyObject *self, PyObject *args, PyObject *kwargs) } +static PyMemberDef weakref_members[] = { + {"__callback__", T_OBJECT, offsetof(PyWeakReference, wr_callback), READONLY}, + {NULL} /* Sentinel */ +}; + PyTypeObject _PyWeakref_RefType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) @@ -369,7 +374,7 @@ _PyWeakref_RefType = { 0, /*tp_iter*/ 0, /*tp_iternext*/ 0, /*tp_methods*/ - 0, /*tp_members*/ + weakref_members, /*tp_members*/ 0, /*tp_getset*/ 0, /*tp_base*/ 0, /*tp_dict*/ -- cgit v1.2.1 From 950b78664e99a035ef90211ecaf712dc04b174f7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 13 Apr 2013 22:45:04 +0300 Subject: Issue #16061: Speed up str.replace() for replacing 1-character strings. --- Objects/stringlib/replace.h | 53 +++++++++++++++++++++++++++++++++++++ Objects/unicodeobject.c | 64 +++++++++++++++++++++++++++------------------ 2 files changed, 91 insertions(+), 26 deletions(-) create mode 100644 Objects/stringlib/replace.h (limited to 'Objects') diff --git a/Objects/stringlib/replace.h b/Objects/stringlib/replace.h new file mode 100644 index 0000000000..ef318ed6dd --- /dev/null +++ b/Objects/stringlib/replace.h @@ -0,0 +1,53 @@ +/* stringlib: replace implementation */ + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +Py_LOCAL_INLINE(void) +STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end, + Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount) +{ + *s = u2; + while (--maxcount && ++s != end) { + /* Find the next character to be replaced. + + If it occurs often, it is faster to scan for it using an inline + loop. If it occurs seldom, it is faster to scan for it using a + function call; the overhead of the function call is amortized + across the many characters that call covers. We start with an + inline loop and use a heuristic to determine whether to fall back + to a function call. */ + if (*s != u1) { + int attempts = 10; + /* search u1 in a dummy loop */ + while (1) { + if (++s == end) + return; + if (*s == u1) + break; + if (!--attempts) { + /* if u1 was not found for attempts iterations, + use FASTSEARCH() or memchr() */ +#if STRINGLIB_SIZEOF_CHAR == 1 + s++; + s = memchr(s, u1, end - s); + if (s == NULL) + return; +#else + Py_ssize_t i; + STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1; + s++; + i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH); + if (i < 0) + return; + s += i; +#endif + /* restart the dummy loop */ + break; + } + } + } + *s = u2; + } +} diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e52571db2e..3688f4a789 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -605,6 +605,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len) #include "stringlib/split.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/replace.h" #include "stringlib/find_max_char.h" #include "stringlib/localeutil.h" #include "stringlib/undef.h" @@ -615,6 +616,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len) #include "stringlib/split.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/replace.h" #include "stringlib/find_max_char.h" #include "stringlib/localeutil.h" #include "stringlib/undef.h" @@ -625,6 +627,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len) #include "stringlib/split.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/replace.h" #include "stringlib/find_max_char.h" #include "stringlib/localeutil.h" #include "stringlib/undef.h" @@ -9927,6 +9930,31 @@ anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen, return 0; } +static void +replace_1char_inplace(PyObject *u, Py_ssize_t pos, + Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount) +{ + int kind = PyUnicode_KIND(u); + void *data = PyUnicode_DATA(u); + Py_ssize_t len = PyUnicode_GET_LENGTH(u); + if (kind == PyUnicode_1BYTE_KIND) { + ucs1lib_replace_1char_inplace((Py_UCS1 *)data + pos, + (Py_UCS1 *)data + len, + u1, u2, maxcount); + } + else if (kind == PyUnicode_2BYTE_KIND) { + ucs2lib_replace_1char_inplace((Py_UCS2 *)data + pos, + (Py_UCS2 *)data + len, + u1, u2, maxcount); + } + else { + assert(kind == PyUnicode_4BYTE_KIND); + ucs4lib_replace_1char_inplace((Py_UCS4 *)data + pos, + (Py_UCS4 *)data + len, + u1, u2, maxcount); + } +} + static PyObject * replace(PyObject *self, PyObject *str1, PyObject *str2, Py_ssize_t maxcount) @@ -9943,7 +9971,7 @@ replace(PyObject *self, PyObject *str1, Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1); Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2); int mayshrink; - Py_UCS4 maxchar, maxchar_str2; + Py_UCS4 maxchar, maxchar_str1, maxchar_str2; if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; @@ -9952,15 +9980,16 @@ replace(PyObject *self, PyObject *str1, if (str1 == str2) goto nothing; - if (skind < kind1) - /* substring too wide to be present */ - goto nothing; maxchar = PyUnicode_MAX_CHAR_VALUE(self); + maxchar_str1 = PyUnicode_MAX_CHAR_VALUE(str1); + if (maxchar < maxchar_str1) + /* substring too wide to be present */ + goto nothing; maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2); /* Replacing str1 with str2 may cause a maxchar reduction in the result string. */ - mayshrink = (maxchar_str2 < maxchar); + mayshrink = (maxchar_str2 < maxchar_str1) && (maxchar == maxchar_str1); maxchar = MAX_MAXCHAR(maxchar, maxchar_str2); if (len1 == len2) { @@ -9970,36 +9999,19 @@ replace(PyObject *self, PyObject *str1, if (len1 == 1) { /* replace characters */ Py_UCS4 u1, u2; - int rkind; - Py_ssize_t index, pos; - char *src, *rbuf; + Py_ssize_t pos; u1 = PyUnicode_READ(kind1, buf1, 0); - pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1); + pos = findchar(sbuf, skind, slen, u1, 1); if (pos < 0) goto nothing; u2 = PyUnicode_READ(kind2, buf2, 0); u = PyUnicode_New(slen, maxchar); if (!u) goto error; - _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen); - rkind = PyUnicode_KIND(u); - rbuf = PyUnicode_DATA(u); - PyUnicode_WRITE(rkind, rbuf, pos, u2); - index = 0; - src = sbuf; - while (--maxcount) - { - pos++; - src += pos * PyUnicode_KIND(self); - slen -= pos; - index += pos; - pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1); - if (pos < 0) - break; - PyUnicode_WRITE(rkind, rbuf, index + pos, u2); - } + _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen); + replace_1char_inplace(u, pos, u1, u2, maxcount); } else { int rkind = skind; -- cgit v1.2.1 From 568effd671d2ed20e4d5e5f5aceac80b3f0a8f21 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 14 Apr 2013 02:35:33 +0200 Subject: Add _PyUnicodeWriter_WriteCharInline() --- Objects/unicodeobject.c | 106 ++++++++++++++++-------------------------------- 1 file changed, 35 insertions(+), 71 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3688f4a789..99628cad72 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -196,6 +196,10 @@ static PyObject *unicode_empty = NULL; return unicode_empty; \ } while (0) +/* Forward declaration */ +Py_LOCAL_INLINE(int) +_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch); + /* List of static strings. */ static _Py_Identifier *static_strings = NULL; @@ -2432,10 +2436,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, "character argument not in range(0x110000)"); return NULL; } - if (_PyUnicodeWriter_Prepare(writer, 1, ordinal) == -1) + if (_PyUnicodeWriter_WriteCharInline(writer, ordinal) < 0) return NULL; - PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ordinal); - writer->pos++; break; } @@ -2636,10 +2638,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, } case '%': - if (_PyUnicodeWriter_Prepare(writer, 1, '%') == 1) + if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0) return NULL; - PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '%'); - writer->pos++; break; default: @@ -4282,18 +4282,14 @@ PyUnicode_DecodeUTF7Stateful(const char *s, /* expecting a second surrogate */ if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) { Py_UCS4 ch2 = Py_UNICODE_JOIN_SURROGATES(surrogate, outCh); - if (_PyUnicodeWriter_Prepare(&writer, 1, ch2) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, ch2) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch2); - writer.pos++; surrogate = 0; continue; } else { - if (_PyUnicodeWriter_Prepare(&writer, 1, surrogate) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, surrogate) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, surrogate); - writer.pos++; surrogate = 0; } } @@ -4302,10 +4298,8 @@ PyUnicode_DecodeUTF7Stateful(const char *s, surrogate = outCh; } else { - if (_PyUnicodeWriter_Prepare(&writer, 1, outCh) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, outCh) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, outCh); - writer.pos++; } } } @@ -4313,10 +4307,8 @@ PyUnicode_DecodeUTF7Stateful(const char *s, inShift = 0; s++; if (surrogate) { - if (_PyUnicodeWriter_Prepare(&writer, 1, surrogate) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, surrogate) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, surrogate); - writer.pos++; surrogate = 0; } if (base64bits > 0) { /* left-over bits */ @@ -4336,10 +4328,8 @@ PyUnicode_DecodeUTF7Stateful(const char *s, if (ch != '-') { /* '-' is absorbed; other terminating characters are preserved */ - if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); - writer.pos++; } } } @@ -4348,10 +4338,8 @@ PyUnicode_DecodeUTF7Stateful(const char *s, s++; /* consume '+' */ if (s < e && *s == '-') { /* '+-' encodes '+' */ s++; - if (_PyUnicodeWriter_Prepare(&writer, 1, '+') == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '+'); - writer.pos++; } else { /* begin base64-encoded section */ inShift = 1; @@ -4361,10 +4349,8 @@ PyUnicode_DecodeUTF7Stateful(const char *s, } else if (DECODE_DIRECT(ch)) { /* character decodes as itself */ s++; - if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); - writer.pos++; } else { startinpos = s-starts; @@ -4711,10 +4697,8 @@ PyUnicode_DecodeUTF8Stateful(const char *s, endinpos = startinpos + ch - 1; break; default: - if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); - writer.pos++; continue; } @@ -4970,10 +4954,8 @@ PyUnicode_DecodeUTF32Stateful(const char *s, } else { if (ch < 0x110000) { - if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); - writer.pos++; q += 4; continue; } @@ -5227,10 +5209,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s, endinpos = startinpos + 2; break; default: - if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); - writer.pos++; continue; } @@ -5469,10 +5449,8 @@ PyUnicode_DecodeUnicodeEscape(const char *s, if (*s != '\\') { x = (unsigned char)*s; s++; - if (_PyUnicodeWriter_Prepare(&writer, 1, x) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, x); - writer.pos++; continue; } @@ -5492,10 +5470,8 @@ PyUnicode_DecodeUnicodeEscape(const char *s, /* \x escapes */ #define WRITECHAR(ch) \ do { \ - if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) \ + if (_PyUnicodeWriter_WriteCharInline(&writer, (ch)) < 0) \ goto onError; \ - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); \ - writer.pos++; \ } while(0) case '\n': break; @@ -5825,10 +5801,8 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s, /* Non-escape characters are interpreted as Unicode ordinals */ if (*s != '\\') { x = (unsigned char)*s++; - if (_PyUnicodeWriter_Prepare(&writer, 1, x) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, x); - writer.pos++; continue; } startinpos = s-starts; @@ -5840,10 +5814,8 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s, if (*s != '\\') break; x = (unsigned char)*s++; - if (_PyUnicodeWriter_Prepare(&writer, 1, x) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, x); - writer.pos++; } if (((s - bs) & 1) == 0 || s >= end || @@ -5876,10 +5848,8 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s, x += 10 + c - 'A'; } if (x <= MAX_UNICODE) { - if (_PyUnicodeWriter_Prepare(&writer, 1, x) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, x); - writer.pos++; } else { endinpos = s-starts; @@ -6059,10 +6029,8 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, } #endif - if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); - writer.pos++; continue; error: @@ -7409,10 +7377,8 @@ Error: continue; } - if (_PyUnicodeWriter_Prepare(&writer, 1, x) == -1) + if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0) goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, x); - writer.pos++; ++s; } } @@ -7451,12 +7417,10 @@ Error: goto onError; } - if (_PyUnicodeWriter_Prepare(&writer, 1, value) == -1) { + if (_PyUnicodeWriter_WriteCharInline(&writer, value) < 0) { Py_DECREF(x); goto onError; } - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, value); - writer.pos++; } else if (PyUnicode_Check(x)) { if (PyUnicode_READY(x) == -1) { @@ -7467,12 +7431,10 @@ Error: Py_UCS4 value = PyUnicode_READ_CHAR(x, 0); if (value == 0xFFFE) goto Undefined; - if (_PyUnicodeWriter_Prepare(&writer, 1, value) == -1) { + if (_PyUnicodeWriter_WriteCharInline(&writer, value) < 0) { Py_DECREF(x); goto onError; } - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, value); - writer.pos++; } else { writer.overallocate = 1; @@ -12959,8 +12921,8 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, return 0; } -int -_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch) +Py_LOCAL_INLINE(int) +_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch) { if (_PyUnicodeWriter_Prepare(writer, 1, ch) < 0) return -1; @@ -12969,6 +12931,12 @@ _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch) return 0; } +int +_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch) +{ + return _PyUnicodeWriter_WriteCharInline(writer, ch); +} + int _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) { @@ -13873,10 +13841,8 @@ unicode_format_arg_format(struct unicode_formatter_t *ctx, ctx->writer.overallocate = 0; if (arg->ch == '%') { - if (_PyUnicodeWriter_Prepare(writer, 1, '%') == -1) + if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0) return -1; - PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '%'); - writer->pos += 1; return 1; } @@ -13951,10 +13917,8 @@ unicode_format_arg_format(struct unicode_formatter_t *ctx, return -1; if (arg->width == -1 && arg->prec == -1) { /* Fast path */ - if (_PyUnicodeWriter_Prepare(writer, 1, ch) == -1) + if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) return -1; - PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch); - writer->pos += 1; return 1; } *p_str = PyUnicode_FromOrdinal(ch); -- cgit v1.2.1 From 56d291705741ab97b77c7d804da162d964de69a6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 14 Apr 2013 16:29:09 +0200 Subject: stringlib: remove unused STRINGLIB_RESIZE macro --- Objects/stringlib/asciilib.h | 1 - Objects/stringlib/stringdefs.h | 1 - Objects/stringlib/ucs1lib.h | 1 - Objects/stringlib/ucs2lib.h | 1 - Objects/stringlib/ucs4lib.h | 1 - Objects/stringlib/undef.h | 1 - Objects/stringlib/unicodedefs.h | 1 - 7 files changed, 7 deletions(-) (limited to 'Objects') diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h index f62813d2fd..d0fc18d22f 100644 --- a/Objects/stringlib/asciilib.h +++ b/Objects/stringlib/asciilib.h @@ -19,7 +19,6 @@ #define STRINGLIB_STR PyUnicode_1BYTE_DATA #define STRINGLIB_LEN PyUnicode_GET_LENGTH #define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN)) -#define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h index 7bb91a7a5b..ce27f3e408 100644 --- a/Objects/stringlib/stringdefs.h +++ b/Objects/stringlib/stringdefs.h @@ -21,7 +21,6 @@ #define STRINGLIB_STR PyBytes_AS_STRING #define STRINGLIB_LEN PyBytes_GET_SIZE #define STRINGLIB_NEW PyBytes_FromStringAndSize -#define STRINGLIB_RESIZE _PyBytes_Resize #define STRINGLIB_CHECK PyBytes_Check #define STRINGLIB_CHECK_EXACT PyBytes_CheckExact #define STRINGLIB_TOSTR PyObject_Str diff --git a/Objects/stringlib/ucs1lib.h b/Objects/stringlib/ucs1lib.h index e8c6fcb85f..ce1eb57f0d 100644 --- a/Objects/stringlib/ucs1lib.h +++ b/Objects/stringlib/ucs1lib.h @@ -19,7 +19,6 @@ #define STRINGLIB_STR PyUnicode_1BYTE_DATA #define STRINGLIB_LEN PyUnicode_GET_LENGTH #define STRINGLIB_NEW _PyUnicode_FromUCS1 -#define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact diff --git a/Objects/stringlib/ucs2lib.h b/Objects/stringlib/ucs2lib.h index 45e572963d..f900cb65f8 100644 --- a/Objects/stringlib/ucs2lib.h +++ b/Objects/stringlib/ucs2lib.h @@ -19,7 +19,6 @@ #define STRINGLIB_STR PyUnicode_2BYTE_DATA #define STRINGLIB_LEN PyUnicode_GET_LENGTH #define STRINGLIB_NEW _PyUnicode_FromUCS2 -#define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact diff --git a/Objects/stringlib/ucs4lib.h b/Objects/stringlib/ucs4lib.h index 647a27e233..86a480f1e3 100644 --- a/Objects/stringlib/ucs4lib.h +++ b/Objects/stringlib/ucs4lib.h @@ -19,7 +19,6 @@ #define STRINGLIB_STR PyUnicode_4BYTE_DATA #define STRINGLIB_LEN PyUnicode_GET_LENGTH #define STRINGLIB_NEW _PyUnicode_FromUCS4 -#define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact diff --git a/Objects/stringlib/undef.h b/Objects/stringlib/undef.h index 03117ec443..f9d3f1d332 100644 --- a/Objects/stringlib/undef.h +++ b/Objects/stringlib/undef.h @@ -6,7 +6,6 @@ #undef STRINGLIB_STR #undef STRINGLIB_LEN #undef STRINGLIB_NEW -#undef STRINGLIB_RESIZE #undef _Py_InsertThousandsGrouping #undef STRINGLIB_IS_UNICODE diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h index f16f21e60c..48d00eccd0 100644 --- a/Objects/stringlib/unicodedefs.h +++ b/Objects/stringlib/unicodedefs.h @@ -21,7 +21,6 @@ #define STRINGLIB_STR PyUnicode_AS_UNICODE #define STRINGLIB_LEN PyUnicode_GET_SIZE #define STRINGLIB_NEW PyUnicode_FromUnicode -#define STRINGLIB_RESIZE PyUnicode_Resize #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact -- cgit v1.2.1 From b1c33691fe13c8a05e2483cc21209ff320f0ce72 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 14 Apr 2013 18:44:10 +0200 Subject: Optimize ascii(str): don't encode/decode repr if repr is already ASCII --- Objects/object.c | 3 +++ Objects/unicodeobject.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index fd1fd256ba..79f1c8a835 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -451,6 +451,9 @@ PyObject_ASCII(PyObject *v) if (repr == NULL) return NULL; + if (PyUnicode_IS_ASCII(repr)) + return repr; + /* repr is guaranteed to be a PyUnicode object by PyObject_Repr */ ascii = _PyUnicode_AsASCIIString(repr, "backslashreplace"); Py_DECREF(repr); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 99628cad72..0996afbbb8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6499,7 +6499,7 @@ _PyUnicode_AsASCIIString(PyObject *unicode, const char *errors) return NULL; /* Fast path: if it is an ASCII-only string, construct bytes object directly. Else defer to above function to raise the exception. */ - if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128) + if (PyUnicode_IS_ASCII(unicode)) return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode), PyUnicode_GET_LENGTH(unicode)); return unicode_encode_ucs1(unicode, errors, 128); -- cgit v1.2.1 From a2d3a609bfdb05fff788c836775bfe7de557e081 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 14 Apr 2013 18:45:39 +0200 Subject: Optimize repr(str): use _PyUnicode_FastCopyCharacters() when no character is escaped --- Objects/unicodeobject.c | 147 +++++++++++++++++++++++++----------------------- 1 file changed, 78 insertions(+), 69 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0996afbbb8..907fa5ad6a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11968,7 +11968,7 @@ unicode_repr(PyObject *unicode) Py_ssize_t isize; Py_ssize_t osize, squote, dquote, i, o; Py_UCS4 max, quote; - int ikind, okind; + int ikind, okind, unchanged; void *idata, *odata; if (PyUnicode_READY(unicode) == -1) @@ -11979,7 +11979,7 @@ unicode_repr(PyObject *unicode) /* Compute length of output, quote characters, and maximum character */ - osize = 2; /* quotes */ + osize = 0; max = 127; squote = dquote = 0; ikind = PyUnicode_KIND(unicode); @@ -12010,7 +12010,9 @@ unicode_repr(PyObject *unicode) } quote = '\''; + unchanged = (osize == isize); if (squote) { + unchanged = 0; if (dquote) /* Both squote and dquote present. Use squote, and escape them */ @@ -12018,6 +12020,7 @@ unicode_repr(PyObject *unicode) else quote = '"'; } + osize += 2; /* quotes */ repr = PyUnicode_New(osize, max); if (repr == NULL) @@ -12027,82 +12030,88 @@ unicode_repr(PyObject *unicode) PyUnicode_WRITE(okind, odata, 0, quote); PyUnicode_WRITE(okind, odata, osize-1, quote); + if (unchanged) { + _PyUnicode_FastCopyCharacters(repr, 1, + unicode, 0, + isize); + } + else { + for (i = 0, o = 1; i < isize; i++) { + Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); - for (i = 0, o = 1; i < isize; i++) { - Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); - - /* Escape quotes and backslashes */ - if ((ch == quote) || (ch == '\\')) { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, ch); - continue; - } + /* Escape quotes and backslashes */ + if ((ch == quote) || (ch == '\\')) { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, ch); + continue; + } - /* Map special whitespace to '\t', \n', '\r' */ - if (ch == '\t') { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 't'); - } - else if (ch == '\n') { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 'n'); - } - else if (ch == '\r') { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 'r'); - } + /* Map special whitespace to '\t', \n', '\r' */ + if (ch == '\t') { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 't'); + } + else if (ch == '\n') { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 'n'); + } + else if (ch == '\r') { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 'r'); + } - /* Map non-printable US ASCII to '\xhh' */ - else if (ch < ' ' || ch == 0x7F) { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 'x'); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); - } + /* Map non-printable US ASCII to '\xhh' */ + else if (ch < ' ' || ch == 0x7F) { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 'x'); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); + } - /* Copy ASCII characters as-is */ - else if (ch < 0x7F) { - PyUnicode_WRITE(okind, odata, o++, ch); - } + /* Copy ASCII characters as-is */ + else if (ch < 0x7F) { + PyUnicode_WRITE(okind, odata, o++, ch); + } - /* Non-ASCII characters */ - else { - /* Map Unicode whitespace and control characters - (categories Z* and C* except ASCII space) - */ - if (!Py_UNICODE_ISPRINTABLE(ch)) { - PyUnicode_WRITE(okind, odata, o++, '\\'); - /* Map 8-bit characters to '\xhh' */ - if (ch <= 0xff) { - PyUnicode_WRITE(okind, odata, o++, 'x'); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); - } - /* Map 16-bit characters to '\uxxxx' */ - else if (ch <= 0xffff) { - PyUnicode_WRITE(okind, odata, o++, 'u'); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); + /* Non-ASCII characters */ + else { + /* Map Unicode whitespace and control characters + (categories Z* and C* except ASCII space) + */ + if (!Py_UNICODE_ISPRINTABLE(ch)) { + PyUnicode_WRITE(okind, odata, o++, '\\'); + /* Map 8-bit characters to '\xhh' */ + if (ch <= 0xff) { + PyUnicode_WRITE(okind, odata, o++, 'x'); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); + } + /* Map 16-bit characters to '\uxxxx' */ + else if (ch <= 0xffff) { + PyUnicode_WRITE(okind, odata, o++, 'u'); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); + } + /* Map 21-bit characters to '\U00xxxxxx' */ + else { + PyUnicode_WRITE(okind, odata, o++, 'U'); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); + } } - /* Map 21-bit characters to '\U00xxxxxx' */ + /* Copy characters as-is */ else { - PyUnicode_WRITE(okind, odata, o++, 'U'); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); + PyUnicode_WRITE(okind, odata, o++, ch); } } - /* Copy characters as-is */ - else { - PyUnicode_WRITE(okind, odata, o++, ch); - } } } /* Closing quote already added at the beginning */ -- cgit v1.2.1 From 4b33c7b0403a1af343b0745df552264c91333284 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 14 Apr 2013 18:56:46 +0200 Subject: PyUnicode_Join(): move use_memcpy test out of the loop to cleanup and optimize the code --- Objects/unicodeobject.c | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 907fa5ad6a..cd2acc0f83 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9466,41 +9466,49 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) sep_data = PyUnicode_1BYTE_DATA(sep); } #endif - for (i = 0, res_offset = 0; i < seqlen; ++i) { - Py_ssize_t itemlen; - item = items[i]; - /* Copy item, and maybe the separator. */ - if (i && seplen != 0) { - if (use_memcpy) { + if (use_memcpy) { + for (i = 0; i < seqlen; ++i) { + Py_ssize_t itemlen; + item = items[i]; + + /* Copy item, and maybe the separator. */ + if (i && seplen != 0) { Py_MEMCPY(res_data, sep_data, kind * seplen); res_data += kind * seplen; } - else { - _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen); - res_offset += seplen; - } - } - itemlen = PyUnicode_GET_LENGTH(item); - if (itemlen != 0) { - if (use_memcpy) { + + itemlen = PyUnicode_GET_LENGTH(item); + if (itemlen != 0) { Py_MEMCPY(res_data, PyUnicode_DATA(item), kind * itemlen); res_data += kind * itemlen; } - else { + } + assert(res_data == PyUnicode_1BYTE_DATA(res) + + kind * PyUnicode_GET_LENGTH(res)); + } + else { + for (i = 0, res_offset = 0; i < seqlen; ++i) { + Py_ssize_t itemlen; + item = items[i]; + + /* Copy item, and maybe the separator. */ + if (i && seplen != 0) { + _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen); + res_offset += seplen; + } + + itemlen = PyUnicode_GET_LENGTH(item); + if (itemlen != 0) { _PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen); res_offset += itemlen; } } - } - if (use_memcpy) - assert(res_data == PyUnicode_1BYTE_DATA(res) - + kind * PyUnicode_GET_LENGTH(res)); - else assert(res_offset == PyUnicode_GET_LENGTH(res)); + } Py_DECREF(fseq); Py_XDECREF(sep); -- cgit v1.2.1 From 085d74ff5490d8ce5f1be59d62c63a78adbf3c2d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 14 Apr 2013 19:13:03 +0200 Subject: Cleanup PyUnicode_Append() * Check also that right is a Unicode object * call directly resize_compact() instead of unicode_resize() for a more explicit error handling, and to avoid testing some properties twice (ex: unicode_modifiable()) --- Objects/unicodeobject.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index cd2acc0f83..dfc3cf21c9 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10671,7 +10671,8 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) return; } left = *p_left; - if (right == NULL || left == NULL || !PyUnicode_Check(left)) { + if (right == NULL || left == NULL + || !PyUnicode_Check(left) || !PyUnicode_Check(right)) { if (!PyErr_Occurred()) PyErr_BadInternalCall(); goto error; @@ -10711,17 +10712,12 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) && !(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right))) { /* append inplace */ - if (unicode_resize(p_left, new_len) != 0) { - /* XXX if _PyUnicode_Resize() fails, 'left' has been - * deallocated so it cannot be put back into - * 'variable'. The MemoryError is raised when there - * is no value in 'variable', which might (very - * remotely) be a cause of incompatibilities. - */ + res = resize_compact(left, new_len); + if (res == NULL) goto error; - } - /* copy 'right' into the newly allocated area of 'left' */ - _PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len); + + /* copy 'right' into the newly allocated area of 'res' (left) */ + _PyUnicode_FastCopyCharacters(res, left_len, right, 0, right_len); } else { maxchar = PyUnicode_MAX_CHAR_VALUE(left); @@ -10735,8 +10731,8 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) _PyUnicode_FastCopyCharacters(res, 0, left, 0, left_len); _PyUnicode_FastCopyCharacters(res, left_len, right, 0, right_len); Py_DECREF(left); - *p_left = res; } + *p_left = res; assert(_PyUnicode_CheckConsistency(*p_left, 1)); return; @@ -14520,12 +14516,12 @@ PyUnicode_InternInPlace(PyObject **p) t = PyDict_GetItem(interned, s); Py_END_ALLOW_RECURSION - if (t) { - Py_INCREF(t); - Py_DECREF(*p); - *p = t; - return; - } + if (t) { + Py_INCREF(t); + Py_DECREF(*p); + *p = t; + return; + } PyThreadState_GET()->recursion_critical = 1; if (PyDict_SetItem(interned, s, s) < 0) { -- cgit v1.2.1 From 70c975fa38d7f7bd414111be17f064c530094fde Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 14 Apr 2013 19:17:42 +0200 Subject: Minor change: fix character in do_strip() for the ASCII case --- Objects/unicodeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index dfc3cf21c9..17a19db34b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11705,7 +11705,7 @@ do_strip(PyObject *self, int striptype) i = 0; if (striptype != RIGHTSTRIP) { while (i < len) { - Py_UCS4 ch = data[i]; + Py_UCS1 ch = data[i]; if (!_Py_ascii_whitespace[ch]) break; i++; @@ -11716,7 +11716,7 @@ do_strip(PyObject *self, int striptype) if (striptype != LEFTSTRIP) { j--; while (j >= i) { - Py_UCS4 ch = data[j]; + Py_UCS1 ch = data[j]; if (!_Py_ascii_whitespace[ch]) break; j--; -- cgit v1.2.1 From 5ddd0d2268a3ffbf4812e1894c0f7ab295b90369 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 14 Apr 2013 19:22:47 +0200 Subject: Cleanup PyUnicode_Contains() * No need to double-check that strings are ready: test already done by PyUnicode_FromObject() * Remove useless kind variable (use kind1 instead) --- Objects/unicodeobject.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 17a19db34b..748fcc7665 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10532,7 +10532,7 @@ int PyUnicode_Contains(PyObject *container, PyObject *element) { PyObject *str, *sub; - int kind1, kind2, kind; + int kind1, kind2; void *buf1, *buf2; Py_ssize_t len1, len2; int result; @@ -10551,23 +10551,18 @@ PyUnicode_Contains(PyObject *container, PyObject *element) Py_DECREF(sub); return -1; } - if (PyUnicode_READY(sub) == -1 || PyUnicode_READY(str) == -1) { - Py_DECREF(sub); - Py_DECREF(str); - } kind1 = PyUnicode_KIND(str); kind2 = PyUnicode_KIND(sub); - kind = kind1; buf1 = PyUnicode_DATA(str); buf2 = PyUnicode_DATA(sub); - if (kind2 != kind) { - if (kind2 > kind) { + if (kind2 != kind1) { + if (kind2 > kind1) { Py_DECREF(sub); Py_DECREF(str); return 0; } - buf2 = _PyUnicode_AsKind(sub, kind); + buf2 = _PyUnicode_AsKind(sub, kind1); } if (!buf2) { Py_DECREF(sub); @@ -10577,7 +10572,7 @@ PyUnicode_Contains(PyObject *container, PyObject *element) len1 = PyUnicode_GET_LENGTH(str); len2 = PyUnicode_GET_LENGTH(sub); - switch (kind) { + switch (kind1) { case PyUnicode_1BYTE_KIND: result = ucs1lib_find(buf1, len1, buf2, len2, 0) != -1; break; @@ -10595,7 +10590,7 @@ PyUnicode_Contains(PyObject *container, PyObject *element) Py_DECREF(str); Py_DECREF(sub); - if (kind2 != kind) + if (kind2 != kind1) PyMem_Free(buf2); return result; -- cgit v1.2.1 From 8b66a0c1cb6cf41b7903a69a150c2c657af5826c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 17 Apr 2013 23:02:17 +0200 Subject: Close #17694: Add minimum length to _PyUnicodeWriter * Add also min_char attribute to _PyUnicodeWriter structure (currently unused) * _PyUnicodeWriter_Init() has no more argument (except the writer itself): min_length and overallocate must be set explicitly * In error handlers, only enable overallocation if the replacement string is longer than 1 character * CJK decoders don't use overallocation anymore * Set min_length, instead of preallocating memory using _PyUnicodeWriter_Prepare(), in many decoders * _PyUnicode_DecodeUnicodeInternal() checks for integer overflow --- Objects/complexobject.c | 2 +- Objects/floatobject.c | 2 +- Objects/longobject.c | 2 +- Objects/stringlib/unicode_format.h | 6 +- Objects/unicodeobject.c | 111 +++++++++++++++++++------------------ 5 files changed, 63 insertions(+), 60 deletions(-) (limited to 'Objects') diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 355b063f28..54838ccdbd 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -705,7 +705,7 @@ complex__format__(PyObject* self, PyObject* args) if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - _PyUnicodeWriter_Init(&writer, 0); + _PyUnicodeWriter_Init(&writer); ret = _PyComplex_FormatAdvancedWriter( &writer, self, diff --git a/Objects/floatobject.c b/Objects/floatobject.c index b571ca8c70..c54c8e1a1d 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -1711,7 +1711,7 @@ float__format__(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - _PyUnicodeWriter_Init(&writer, 0); + _PyUnicodeWriter_Init(&writer); ret = _PyFloat_FormatAdvancedWriter( &writer, self, diff --git a/Objects/longobject.c b/Objects/longobject.c index cdaea02775..2b04804216 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -4379,7 +4379,7 @@ long__format__(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - _PyUnicodeWriter_Init(&writer, 0); + _PyUnicodeWriter_Init(&writer); ret = _PyLong_FormatAdvancedWriter( &writer, self, diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index 2f58946ec3..942916938c 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -906,7 +906,6 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs, int recursion_depth, AutoNumber *auto_number) { _PyUnicodeWriter writer; - Py_ssize_t minlen; /* check the recursion level */ if (recursion_depth <= 0) { @@ -915,8 +914,9 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs, return NULL; } - minlen = PyUnicode_GET_LENGTH(input->str) + 100; - _PyUnicodeWriter_Init(&writer, minlen); + _PyUnicodeWriter_Init(&writer); + writer.overallocate = 1; + writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100; if (!do_markup(input, args, kwargs, &writer, recursion_depth, auto_number)) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 748fcc7665..c4157d8270 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2665,7 +2665,9 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) const char *f; _PyUnicodeWriter writer; - _PyUnicodeWriter_Init(&writer, strlen(format) + 100); + _PyUnicodeWriter_Init(&writer); + writer.min_length = strlen(format) + 100; + writer.overallocate = 1; /* va_list may be an array (of 1 item) on some platforms (ex: AMD64). Copy it to be able to pass a reference to a subfunction. */ @@ -4117,7 +4119,10 @@ unicode_decode_call_errorhandler_writer( goto onError; } - writer->overallocate = 1; + if (PyUnicode_READY(repunicode) < 0) + goto onError; + if (PyUnicode_GET_LENGTH(repunicode) > 1) + writer->overallocate = 1; if (_PyUnicodeWriter_WriteStr(writer, repunicode) == -1) return @@ -4256,9 +4261,8 @@ PyUnicode_DecodeUTF7Stateful(const char *s, } /* Start off assuming it's all ASCII. Widen later as necessary. */ - _PyUnicodeWriter_Init(&writer, 0); - if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) - goto onError; + _PyUnicodeWriter_Init(&writer); + writer.min_length = size; shiftOutStart = 0; e = s + size; @@ -4655,7 +4659,7 @@ PyUnicode_DecodeUTF8Stateful(const char *s, return get_latin1_char((unsigned char)s[0]); } - _PyUnicodeWriter_Init(&writer, 0); + _PyUnicodeWriter_Init(&writer); if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) goto onError; @@ -4910,7 +4914,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, le = bo <= 0; #endif - _PyUnicodeWriter_Init(&writer, 0); + _PyUnicodeWriter_Init(&writer); if (_PyUnicodeWriter_Prepare(&writer, (e - q + 3) / 4, 127) == -1) goto onError; @@ -5149,7 +5153,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s, /* Note: size will always be longer than the resulting Unicode character count */ - _PyUnicodeWriter_Init(&writer, 0); + _PyUnicodeWriter_Init(&writer); if (_PyUnicodeWriter_Prepare(&writer, (e - q + 1) / 2, 127) == -1) goto onError; @@ -5420,11 +5424,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s, and we determined it's exact size (common case) or it contains \x, \u, ... escape sequences. then we create a legacy wchar string and resize it at the end of this function. */ - _PyUnicodeWriter_Init(&writer, 0); + _PyUnicodeWriter_Init(&writer); if (len > 0) { - if (_PyUnicodeWriter_Prepare(&writer, len, 127) == -1) - goto onError; - assert(writer.kind == PyUnicode_1BYTE_KIND); + writer.min_length = len; } else { /* Escaped strings will always be longer than the resulting @@ -5432,8 +5434,7 @@ PyUnicode_DecodeUnicodeEscape(const char *s, length after conversion to the true value. (but if the error callback returns a long replacement string we'll have to allocate more space) */ - if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) - goto onError; + writer.min_length = size; } if (size == 0) @@ -5461,10 +5462,6 @@ PyUnicode_DecodeUnicodeEscape(const char *s, if (s > end) c = '\0'; /* Invalid after \ */ - /* The only case in which i == ascii_length is a backslash - followed by a newline. */ - assert(writer.pos < writer.size || (writer.pos == writer.size && c == '\n')); - switch (c) { /* \x escapes */ @@ -5787,9 +5784,8 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s, Unicode string, so we start with size here and then reduce the length after conversion to the true value. (But decoding error handler might have to resize the string) */ - _PyUnicodeWriter_Init(&writer, 1); - if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) - goto onError; + _PyUnicodeWriter_Init(&writer); + writer.min_length = size; end = s + size; while (s < end) { @@ -5982,12 +5978,14 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, if (size == 0) _Py_RETURN_UNICODE_EMPTY(); - /* XXX overflow detection missing */ - _PyUnicodeWriter_Init(&writer, 0); - if (_PyUnicodeWriter_Prepare(&writer, (size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127) == -1) + _PyUnicodeWriter_Init(&writer); + if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) { + PyErr_NoMemory(); goto onError; - end = s + size; + } + writer.min_length = (size + (Py_UNICODE_SIZE - 1)) / Py_UNICODE_SIZE; + end = s + size; while (s < end) { Py_UNICODE uch; Py_UCS4 ch; @@ -6429,9 +6427,9 @@ PyUnicode_DecodeASCII(const char *s, if (size == 1 && (unsigned char)s[0] < 128) return get_latin1_char((unsigned char)s[0]); - _PyUnicodeWriter_Init(&writer, 0); - if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) - goto onError; + _PyUnicodeWriter_Init(&writer); + if (_PyUnicodeWriter_Prepare(&writer, size, 127) < 0) + return NULL; e = s + size; data = writer.data; @@ -7280,7 +7278,7 @@ PyUnicode_DecodeCharmap(const char *s, if (size == 0) _Py_RETURN_UNICODE_EMPTY(); - _PyUnicodeWriter_Init(&writer, 0); + _PyUnicodeWriter_Init(&writer); if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) goto onError; @@ -7312,7 +7310,7 @@ PyUnicode_DecodeCharmap(const char *s, ch = *s; x = mapdata_ucs1[ch]; if (x > maxchar) { - if (_PyUnicodeWriter_PrepareInternal(&writer, 1, 0xff) == -1) + if (_PyUnicodeWriter_Prepare(&writer, 1, 0xff) == -1) goto onError; maxchar = writer.maxchar; outdata = (Py_UCS1 *)writer.data; @@ -12841,21 +12839,27 @@ unicode_endswith(PyObject *self, Py_LOCAL_INLINE(void) _PyUnicodeWriter_Update(_PyUnicodeWriter *writer) { - writer->size = PyUnicode_GET_LENGTH(writer->buffer); + if (!writer->readonly) + writer->size = PyUnicode_GET_LENGTH(writer->buffer); + else { + /* Copy-on-write mode: set buffer size to 0 so + * _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on + * next write. */ + writer->size = 0; + } writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer); writer->data = PyUnicode_DATA(writer->buffer); writer->kind = PyUnicode_KIND(writer->buffer); } void -_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length) +_PyUnicodeWriter_Init(_PyUnicodeWriter *writer) { memset(writer, 0, sizeof(*writer)); #ifdef Py_DEBUG writer->kind = 5; /* invalid kind */ #endif - writer->min_length = Py_MAX(min_length, 100); - writer->overallocate = (min_length > 0); + writer->min_char = 127; } int @@ -12873,29 +12877,28 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, } newlen = writer->pos + length; + maxchar = MAX_MAXCHAR(maxchar, writer->min_char); + if (writer->buffer == NULL) { - if (writer->overallocate) { + assert(!writer->readonly); + if (writer->overallocate && newlen <= (PY_SSIZE_T_MAX - newlen / 4)) { /* overallocate 25% to limit the number of resize */ - if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) - newlen += newlen / 4; - if (newlen < writer->min_length) - newlen = writer->min_length; + newlen += newlen / 4; } + if (newlen < writer->min_length) + newlen = writer->min_length; + writer->buffer = PyUnicode_New(newlen, maxchar); if (writer->buffer == NULL) return -1; - _PyUnicodeWriter_Update(writer); - return 0; } - - if (newlen > writer->size) { - if (writer->overallocate) { + else if (newlen > writer->size) { + if (writer->overallocate && newlen <= (PY_SSIZE_T_MAX - newlen / 4)) { /* overallocate 25% to limit the number of resize */ - if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) - newlen += newlen / 4; - if (newlen < writer->min_length) - newlen = writer->min_length; + newlen += newlen / 4; } + if (newlen < writer->min_length) + newlen = writer->min_length; if (maxchar > writer->maxchar || writer->readonly) { /* resize + widen */ @@ -12913,7 +12916,6 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, return -1; } writer->buffer = newbuffer; - _PyUnicodeWriter_Update(writer); } else if (maxchar > writer->maxchar) { assert(!writer->readonly); @@ -12924,8 +12926,8 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, writer->buffer, 0, writer->pos); Py_DECREF(writer->buffer); writer->buffer = newbuffer; - _PyUnicodeWriter_Update(writer); } + _PyUnicodeWriter_Update(writer); return 0; } @@ -12959,11 +12961,10 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) maxchar = PyUnicode_MAX_CHAR_VALUE(str); if (maxchar > writer->maxchar || len > writer->size - writer->pos) { if (writer->buffer == NULL && !writer->overallocate) { + writer->readonly = 1; Py_INCREF(str); writer->buffer = str; _PyUnicodeWriter_Update(writer); - writer->readonly = 1; - writer->size = 0; writer->pos += len; return 0; } @@ -13080,7 +13081,7 @@ unicode__format__(PyObject* self, PyObject* args) if (PyUnicode_READY(self) == -1) return NULL; - _PyUnicodeWriter_Init(&writer, 0); + _PyUnicodeWriter_Init(&writer); ret = _PyUnicode_FormatAdvancedWriter(&writer, self, format_spec, 0, PyUnicode_GET_LENGTH(format_spec)); @@ -14164,7 +14165,9 @@ PyUnicode_Format(PyObject *format, PyObject *args) ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr); ctx.fmtpos = 0; - _PyUnicodeWriter_Init(&ctx.writer, ctx.fmtcnt + 100); + _PyUnicodeWriter_Init(&ctx.writer); + ctx.writer.min_length = ctx.fmtcnt + 100; + ctx.writer.overallocate = 1; if (PyTuple_Check(args)) { ctx.arglen = PyTuple_Size(args); -- cgit v1.2.1 From b1ba991e7fd2295126d6cd80cf6d05d18d509210 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 17 Apr 2013 23:58:16 +0200 Subject: Fix typo in unicode_decode_call_errorhandler_writer() Bug introduced by changeset 7ed9993d53b4. --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c4157d8270..d4cb9c985d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4124,7 +4124,7 @@ unicode_decode_call_errorhandler_writer( if (PyUnicode_GET_LENGTH(repunicode) > 1) writer->overallocate = 1; if (_PyUnicodeWriter_WriteStr(writer, repunicode) == -1) - return + goto onError; *endinpos = newpos; *inptr = *input + newpos; -- cgit v1.2.1 From f1f1a1a7f419fc8c1155d96f1ec8c03e97fefe8e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 18 Apr 2013 00:25:28 +0200 Subject: Fix bug in Unicode decoders related to _PyUnicodeWriter Bug introduced by changesets 7ed9993d53b4 and edf029fc9591. --- Objects/unicodeobject.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d4cb9c985d..9a59f38211 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4071,6 +4071,7 @@ unicode_decode_call_errorhandler_writer( PyObject *repunicode = NULL; Py_ssize_t insize; Py_ssize_t newpos; + Py_ssize_t replen; PyObject *inputobj = NULL; if (*errorHandler == NULL) { @@ -4121,7 +4122,9 @@ unicode_decode_call_errorhandler_writer( if (PyUnicode_READY(repunicode) < 0) goto onError; - if (PyUnicode_GET_LENGTH(repunicode) > 1) + replen = PyUnicode_GET_LENGTH(repunicode); + writer->min_length += replen; + if (replen > 1) writer->overallocate = 1; if (_PyUnicodeWriter_WriteStr(writer, repunicode) == -1) goto onError; @@ -4660,7 +4663,8 @@ PyUnicode_DecodeUTF8Stateful(const char *s, } _PyUnicodeWriter_Init(&writer); - if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) + writer.min_length = size; + if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1) goto onError; writer.pos = ascii_decode(s, end, writer.data); @@ -4915,7 +4919,8 @@ PyUnicode_DecodeUTF32Stateful(const char *s, #endif _PyUnicodeWriter_Init(&writer); - if (_PyUnicodeWriter_Prepare(&writer, (e - q + 3) / 4, 127) == -1) + writer.min_length = (e - q + 3) / 4; + if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1) goto onError; while (1) { @@ -5154,7 +5159,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s, /* Note: size will always be longer than the resulting Unicode character count */ _PyUnicodeWriter_Init(&writer); - if (_PyUnicodeWriter_Prepare(&writer, (e - q + 1) / 2, 127) == -1) + writer.min_length = (e - q + 1) / 2; + if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1) goto onError; while (1) { @@ -6428,7 +6434,8 @@ PyUnicode_DecodeASCII(const char *s, return get_latin1_char((unsigned char)s[0]); _PyUnicodeWriter_Init(&writer); - if (_PyUnicodeWriter_Prepare(&writer, size, 127) < 0) + writer.min_length = size; + if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) < 0) return NULL; e = s + size; @@ -7279,7 +7286,8 @@ PyUnicode_DecodeCharmap(const char *s, if (size == 0) _Py_RETURN_UNICODE_EMPTY(); _PyUnicodeWriter_Init(&writer); - if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) + writer.min_length = size; + if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1) goto onError; e = s + size; -- cgit v1.2.1 From acf2368f248b0b391f056088e6ef728a8aacd466 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 18 Apr 2013 01:44:27 +0200 Subject: Split PyUnicode_DecodeCharmap() into subfunction for readability --- Objects/unicodeobject.c | 391 ++++++++++++++++++++++++++---------------------- 1 file changed, 213 insertions(+), 178 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9a59f38211..076674c68b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7265,222 +7265,257 @@ PyUnicode_AsMBCSString(PyObject *unicode) /* --- Character Mapping Codec -------------------------------------------- */ -PyObject * -PyUnicode_DecodeCharmap(const char *s, - Py_ssize_t size, - PyObject *mapping, - const char *errors) +static int +charmap_decode_string(const char *s, + Py_ssize_t size, + PyObject *mapping, + const char *errors, + _PyUnicodeWriter *writer) { const char *starts = s; - Py_ssize_t startinpos; - Py_ssize_t endinpos; const char *e; - _PyUnicodeWriter writer; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - - /* Default to Latin-1 */ - if (mapping == NULL) - return PyUnicode_DecodeLatin1(s, size, errors); + Py_ssize_t startinpos, endinpos; + PyObject *errorHandler = NULL, *exc = NULL; + Py_ssize_t maplen; + enum PyUnicode_Kind mapkind; + void *mapdata; + Py_UCS4 x; + unsigned char ch; + + if (PyUnicode_READY(mapping) == -1) + return -1; - if (size == 0) - _Py_RETURN_UNICODE_EMPTY(); - _PyUnicodeWriter_Init(&writer); - writer.min_length = size; - if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1) - goto onError; + maplen = PyUnicode_GET_LENGTH(mapping); + mapdata = PyUnicode_DATA(mapping); + mapkind = PyUnicode_KIND(mapping); e = s + size; - if (PyUnicode_CheckExact(mapping)) { - Py_ssize_t maplen; - enum PyUnicode_Kind mapkind; - void *mapdata; - Py_UCS4 x; - unsigned char ch; - if (PyUnicode_READY(mapping) == -1) - return NULL; + if (mapkind == PyUnicode_1BYTE_KIND && maplen >= 256) { + /* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1 + * is disabled in encoding aliases, latin1 is preferred because + * its implementation is faster. */ + Py_UCS1 *mapdata_ucs1 = (Py_UCS1 *)mapdata; + Py_UCS1 *outdata = (Py_UCS1 *)writer->data; + Py_UCS4 maxchar = writer->maxchar; - maplen = PyUnicode_GET_LENGTH(mapping); - mapdata = PyUnicode_DATA(mapping); - mapkind = PyUnicode_KIND(mapping); - - if (mapkind == PyUnicode_1BYTE_KIND && maplen >= 256) { - /* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1 - * is disabled in encoding aliases, latin1 is preferred because - * its implementation is faster. */ - Py_UCS1 *mapdata_ucs1 = (Py_UCS1 *)mapdata; - Py_UCS1 *outdata = (Py_UCS1 *)writer.data; - Py_UCS4 maxchar = writer.maxchar; - - assert (writer.kind == PyUnicode_1BYTE_KIND); - while (s < e) { - ch = *s; - x = mapdata_ucs1[ch]; - if (x > maxchar) { - if (_PyUnicodeWriter_Prepare(&writer, 1, 0xff) == -1) - goto onError; - maxchar = writer.maxchar; - outdata = (Py_UCS1 *)writer.data; - } - outdata[writer.pos] = x; - writer.pos++; - ++s; + assert (writer->kind == PyUnicode_1BYTE_KIND); + while (s < e) { + ch = *s; + x = mapdata_ucs1[ch]; + if (x > maxchar) { + if (_PyUnicodeWriter_Prepare(writer, 1, 0xff) == -1) + goto onError; + maxchar = writer->maxchar; + outdata = (Py_UCS1 *)writer->data; } + outdata[writer->pos] = x; + writer->pos++; + ++s; } + return 0; + } - while (s < e) { - if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) { - enum PyUnicode_Kind outkind = writer.kind; - Py_UCS2 *mapdata_ucs2 = (Py_UCS2 *)mapdata; - if (outkind == PyUnicode_1BYTE_KIND) { - Py_UCS1 *outdata = (Py_UCS1 *)writer.data; - Py_UCS4 maxchar = writer.maxchar; - while (s < e) { - ch = *s; - x = mapdata_ucs2[ch]; - if (x > maxchar) - goto Error; - outdata[writer.pos] = x; - writer.pos++; - ++s; - } - break; + while (s < e) { + if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) { + enum PyUnicode_Kind outkind = writer->kind; + Py_UCS2 *mapdata_ucs2 = (Py_UCS2 *)mapdata; + if (outkind == PyUnicode_1BYTE_KIND) { + Py_UCS1 *outdata = (Py_UCS1 *)writer->data; + Py_UCS4 maxchar = writer->maxchar; + while (s < e) { + ch = *s; + x = mapdata_ucs2[ch]; + if (x > maxchar) + goto Error; + outdata[writer->pos] = x; + writer->pos++; + ++s; } - else if (outkind == PyUnicode_2BYTE_KIND) { - Py_UCS2 *outdata = (Py_UCS2 *)writer.data; - while (s < e) { - ch = *s; - x = mapdata_ucs2[ch]; - if (x == 0xFFFE) - goto Error; - outdata[writer.pos] = x; - writer.pos++; - ++s; - } - break; + break; + } + else if (outkind == PyUnicode_2BYTE_KIND) { + Py_UCS2 *outdata = (Py_UCS2 *)writer->data; + while (s < e) { + ch = *s; + x = mapdata_ucs2[ch]; + if (x == 0xFFFE) + goto Error; + outdata[writer->pos] = x; + writer->pos++; + ++s; } + break; } - ch = *s; + } + ch = *s; - if (ch < maplen) - x = PyUnicode_READ(mapkind, mapdata, ch); - else - x = 0xfffe; /* invalid value */ + if (ch < maplen) + x = PyUnicode_READ(mapkind, mapdata, ch); + else + x = 0xfffe; /* invalid value */ Error: - if (x == 0xfffe) - { - /* undefined mapping */ - startinpos = s-starts; - endinpos = startinpos+1; - if (unicode_decode_call_errorhandler_writer( - errors, &errorHandler, - "charmap", "character maps to ", - &starts, &e, &startinpos, &endinpos, &exc, &s, - &writer)) { - goto onError; - } - continue; + if (x == 0xfffe) + { + /* undefined mapping */ + startinpos = s-starts; + endinpos = startinpos+1; + if (unicode_decode_call_errorhandler_writer( + errors, &errorHandler, + "charmap", "character maps to ", + &starts, &e, &startinpos, &endinpos, &exc, &s, + writer)) { + goto onError; } + continue; + } - if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0) + if (_PyUnicodeWriter_WriteCharInline(writer, x) < 0) + goto onError; + ++s; + } + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return 0; + +onError: + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return -1; +} + +static int +charmap_decode_mapping(const char *s, + Py_ssize_t size, + PyObject *mapping, + const char *errors, + _PyUnicodeWriter *writer) +{ + const char *starts = s; + const char *e; + Py_ssize_t startinpos, endinpos; + PyObject *errorHandler = NULL, *exc = NULL; + unsigned char ch; + PyObject *key, *item; + + e = s + size; + + while (s < e) { + ch = *s; + + /* Get mapping (char ordinal -> integer, Unicode char or None) */ + key = PyLong_FromLong((long)ch); + if (key == NULL) + goto onError; + + item = PyObject_GetItem(mapping, key); + Py_DECREF(key); + if (item == NULL) { + if (PyErr_ExceptionMatches(PyExc_LookupError)) { + /* No mapping found means: mapping is undefined. */ + PyErr_Clear(); + goto Undefined; + } else goto onError; - ++s; } - } - else { - while (s < e) { - unsigned char ch = *s; - PyObject *w, *x; - /* Get mapping (char ordinal -> integer, Unicode char or None) */ - w = PyLong_FromLong((long)ch); - if (w == NULL) + /* Apply mapping */ + if (item == Py_None) + goto Undefined; + if (PyLong_Check(item)) { + long value = PyLong_AS_LONG(item); + if (value == 0xFFFE) + goto Undefined; + if (value < 0 || value > MAX_UNICODE) { + PyErr_Format(PyExc_TypeError, + "character mapping must be in range(0x%lx)", + (unsigned long)MAX_UNICODE + 1); goto onError; - x = PyObject_GetItem(mapping, w); - Py_DECREF(w); - if (x == NULL) { - if (PyErr_ExceptionMatches(PyExc_LookupError)) { - /* No mapping found means: mapping is undefined. */ - PyErr_Clear(); - goto Undefined; - } else - goto onError; } - /* Apply mapping */ - if (x == Py_None) - goto Undefined; - if (PyLong_Check(x)) { - long value = PyLong_AS_LONG(x); + if (_PyUnicodeWriter_WriteCharInline(writer, value) < 0) + goto onError; + } + else if (PyUnicode_Check(item)) { + if (PyUnicode_READY(item) == -1) + goto onError; + if (PyUnicode_GET_LENGTH(item) == 1) { + Py_UCS4 value = PyUnicode_READ_CHAR(item, 0); if (value == 0xFFFE) goto Undefined; - if (value < 0 || value > MAX_UNICODE) { - PyErr_Format(PyExc_TypeError, - "character mapping must be in range(0x%lx)", - (unsigned long)MAX_UNICODE + 1); - Py_DECREF(x); - goto onError; - } - - if (_PyUnicodeWriter_WriteCharInline(&writer, value) < 0) { - Py_DECREF(x); + if (_PyUnicodeWriter_WriteCharInline(writer, value) < 0) goto onError; - } - } - else if (PyUnicode_Check(x)) { - if (PyUnicode_READY(x) == -1) { - Py_DECREF(x); - goto onError; - } - if (PyUnicode_GET_LENGTH(x) == 1) { - Py_UCS4 value = PyUnicode_READ_CHAR(x, 0); - if (value == 0xFFFE) - goto Undefined; - if (_PyUnicodeWriter_WriteCharInline(&writer, value) < 0) { - Py_DECREF(x); - goto onError; - } - } - else { - writer.overallocate = 1; - if (_PyUnicodeWriter_WriteStr(&writer, x) == -1) { - Py_DECREF(x); - goto onError; - } - } } else { - /* wrong return value */ - PyErr_SetString(PyExc_TypeError, - "character mapping must return integer, None or str"); - Py_DECREF(x); - goto onError; + writer->overallocate = 1; + if (_PyUnicodeWriter_WriteStr(writer, item) == -1) + goto onError; } - Py_DECREF(x); - ++s; - continue; + } + else { + /* wrong return value */ + PyErr_SetString(PyExc_TypeError, + "character mapping must return integer, None or str"); + goto onError; + } + Py_CLEAR(item); + ++s; + continue; + Undefined: - /* undefined mapping */ - Py_XDECREF(x); - startinpos = s-starts; - endinpos = startinpos+1; - if (unicode_decode_call_errorhandler_writer( - errors, &errorHandler, - "charmap", "character maps to ", - &starts, &e, &startinpos, &endinpos, &exc, &s, - &writer)) { - goto onError; - } + /* undefined mapping */ + Py_CLEAR(item); + startinpos = s-starts; + endinpos = startinpos+1; + if (unicode_decode_call_errorhandler_writer( + errors, &errorHandler, + "charmap", "character maps to ", + &starts, &e, &startinpos, &endinpos, &exc, &s, + writer)) { + goto onError; } } Py_XDECREF(errorHandler); Py_XDECREF(exc); - return _PyUnicodeWriter_Finish(&writer); + return 0; - onError: +onError: + Py_XDECREF(item); Py_XDECREF(errorHandler); Py_XDECREF(exc); + return -1; +} + +PyObject * +PyUnicode_DecodeCharmap(const char *s, + Py_ssize_t size, + PyObject *mapping, + const char *errors) +{ + _PyUnicodeWriter writer; + + /* Default to Latin-1 */ + if (mapping == NULL) + return PyUnicode_DecodeLatin1(s, size, errors); + + if (size == 0) + _Py_RETURN_UNICODE_EMPTY(); + _PyUnicodeWriter_Init(&writer); + writer.min_length = size; + if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1) + goto onError; + + if (PyUnicode_CheckExact(mapping)) { + if (charmap_decode_string(s, size, mapping, errors, &writer) < 0) + goto onError; + } + else { + if (charmap_decode_mapping(s, size, mapping, errors, &writer) < 0) + goto onError; + } + return _PyUnicodeWriter_Finish(&writer); + + onError: _PyUnicodeWriter_Dealloc(&writer); return NULL; } -- cgit v1.2.1 From 8908dc6ed2473a2c838bb0832b49dc242dc3a90b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 18 Apr 2013 09:41:34 +0200 Subject: Partial revert of changeset 9744b2df134c PyUnicode_Append() cannot call directly resize_compact(): I forgot that a string can be ready *and* not compact (a legacy string can also be ready). --- Objects/unicodeobject.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 076674c68b..4c532afc3e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10748,12 +10748,11 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) && !(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right))) { /* append inplace */ - res = resize_compact(left, new_len); - if (res == NULL) + if (unicode_resize(p_left, new_len) != 0) goto error; - /* copy 'right' into the newly allocated area of 'res' (left) */ - _PyUnicode_FastCopyCharacters(res, left_len, right, 0, right_len); + /* copy 'right' into the newly allocated area of 'left' */ + _PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len); } else { maxchar = PyUnicode_MAX_CHAR_VALUE(left); @@ -10767,8 +10766,8 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) _PyUnicode_FastCopyCharacters(res, 0, left, 0, left_len); _PyUnicode_FastCopyCharacters(res, left_len, right, 0, right_len); Py_DECREF(left); + *p_left = res; } - *p_left = res; assert(_PyUnicode_CheckConsistency(*p_left, 1)); return; -- cgit v1.2.1 From 05af1e7fca1463b6a0fc7a790313d93a3e68c222 Mon Sep 17 00:00:00 2001 From: Alexandre Vassalotti Date: Thu, 2 May 2013 10:44:04 -0700 Subject: Closes #17892: Fix the name of _PyObject_CallMethodObjIdArgs --- Objects/abstract.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/abstract.c b/Objects/abstract.c index bfd53c9bfb..0934b950f0 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -2343,7 +2343,7 @@ PyObject_CallMethodObjArgs(PyObject *callable, PyObject *name, ...) } PyObject * -_PyObject_CallMethodObjIdArgs(PyObject *callable, +_PyObject_CallMethodIdObjArgs(PyObject *callable, struct _Py_Identifier *name, ...) { PyObject *args, *tmp; -- cgit v1.2.1 From 0010471202cc1a3eff955f556a247c78e7e2c34b Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Sat, 4 May 2013 13:56:58 -0400 Subject: #17115,17116: Have modules initialize the __package__ and __loader__ attributes to None. The long-term goal is for people to be able to rely on these attributes existing and checking for None to see if they have been set. Since import itself sets these attributes when a loader does not the only instances when the attributes are None are from someone overloading __import__() and not using a loader or someone creating a module from scratch. This patch also unifies module initialization. Before you could have different attributes with default values depending on how the module object was created. Now the only way to not get the same default set of attributes is to circumvent initialization by calling ModuleType.__new__() directly. --- Objects/moduleobject.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) (limited to 'Objects') diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index 2f2bd36b55..5970901558 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -26,6 +26,27 @@ static PyTypeObject moduledef_type = { }; +static int +module_init_dict(PyObject *md_dict, PyObject *name, PyObject *doc) +{ + if (md_dict == NULL) + return -1; + if (doc == NULL) + doc = Py_None; + + if (PyDict_SetItemString(md_dict, "__name__", name) != 0) + return -1; + if (PyDict_SetItemString(md_dict, "__doc__", doc) != 0) + return -1; + if (PyDict_SetItemString(md_dict, "__package__", Py_None) != 0) + return -1; + if (PyDict_SetItemString(md_dict, "__loader__", Py_None) != 0) + return -1; + + return 0; +} + + PyObject * PyModule_NewObject(PyObject *name) { @@ -36,13 +57,7 @@ PyModule_NewObject(PyObject *name) m->md_def = NULL; m->md_state = NULL; m->md_dict = PyDict_New(); - if (m->md_dict == NULL) - goto fail; - if (PyDict_SetItemString(m->md_dict, "__name__", name) != 0) - goto fail; - if (PyDict_SetItemString(m->md_dict, "__doc__", Py_None) != 0) - goto fail; - if (PyDict_SetItemString(m->md_dict, "__package__", Py_None) != 0) + if (module_init_dict(m->md_dict, name, NULL) != 0) goto fail; PyObject_GC_Track(m); return (PyObject *)m; @@ -347,9 +362,7 @@ module_init(PyModuleObject *m, PyObject *args, PyObject *kwds) return -1; m->md_dict = dict; } - if (PyDict_SetItemString(dict, "__name__", name) < 0) - return -1; - if (PyDict_SetItemString(dict, "__doc__", doc) < 0) + if (module_init_dict(dict, name, doc) < 0) return -1; return 0; } @@ -380,7 +393,7 @@ module_repr(PyModuleObject *m) if (m->md_dict != NULL) { loader = PyDict_GetItemString(m->md_dict, "__loader__"); } - if (loader != NULL) { + if (loader != NULL && loader != Py_None) { repr = PyObject_CallMethod(loader, "module_repr", "(O)", (PyObject *)m, NULL); if (repr == NULL) { @@ -404,10 +417,10 @@ module_repr(PyModuleObject *m) filename = PyModule_GetFilenameObject((PyObject *)m); if (filename == NULL) { PyErr_Clear(); - /* There's no m.__file__, so if there was an __loader__, use that in + /* There's no m.__file__, so if there was a __loader__, use that in * the repr, otherwise, the only thing you can use is m.__name__ */ - if (loader == NULL) { + if (loader == NULL || loader == Py_None) { repr = PyUnicode_FromFormat("", name); } else { -- cgit v1.2.1 From 10527d88576456c9f91d17043979ed231664a774 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 6 May 2013 23:11:54 +0200 Subject: Issue #7330: Implement width and precision (ex: "%5.3s") for the format string of PyUnicode_FromFormat() function, original patch written by Ysj Ray. --- Objects/unicodeobject.c | 155 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 109 insertions(+), 46 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4c532afc3e..795c18f25b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2346,6 +2346,67 @@ makefmt(char *fmt, int longflag, int longlongflag, int size_tflag, plus 1 for the sign. 53/22 is an upper bound for log10(256). */ #define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22) +static int +unicode_fromformat_write_str(_PyUnicodeWriter *writer, PyObject *str, + Py_ssize_t width, Py_ssize_t precision) +{ + Py_ssize_t length, fill, arglen; + Py_UCS4 maxchar; + + if (PyUnicode_READY(str) == -1) + return -1; + + length = PyUnicode_GET_LENGTH(str); + if ((precision == -1 || precision >= length) + && width <= length) + return _PyUnicodeWriter_WriteStr(writer, str); + + if (precision != -1) + length = Py_MIN(precision, length); + + arglen = Py_MAX(length, width); + if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar) + maxchar = _PyUnicode_FindMaxChar(str, 0, length); + else + maxchar = writer->maxchar; + + if (_PyUnicodeWriter_Prepare(writer, arglen, maxchar) == -1) + return -1; + + if (width > length) { + fill = width - length; + if (PyUnicode_Fill(writer->buffer, writer->pos, fill, ' ') == -1) + return -1; + writer->pos += fill; + } + + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + str, 0, length); + writer->pos += length; + return 0; +} + +static int +unicode_fromformat_write_cstr(_PyUnicodeWriter *writer, const char *str, + Py_ssize_t width, Py_ssize_t precision) +{ + /* UTF-8 */ + Py_ssize_t length; + PyObject *unicode; + int res; + + length = strlen(str); + if (precision != -1) + length = Py_MIN(length, precision); + unicode = PyUnicode_DecodeUTF8Stateful(str, length, "replace", NULL); + if (unicode == NULL) + return -1; + + res = unicode_fromformat_write_str(writer, unicode, width, -1); + Py_DECREF(unicode); + return res; +} + static const char* unicode_fromformat_arg(_PyUnicodeWriter *writer, const char *f, va_list *vargs) @@ -2353,12 +2414,12 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, const char *p; Py_ssize_t len; int zeropad; - int width; - int precision; + Py_ssize_t width; + Py_ssize_t precision; int longflag; int longlongflag; int size_tflag; - int fill; + Py_ssize_t fill; p = f; f++; @@ -2369,28 +2430,36 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, } /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */ - width = 0; - while (Py_ISDIGIT((unsigned)*f)) { - if (width > (INT_MAX - ((int)*f - '0')) / 10) { - PyErr_SetString(PyExc_ValueError, - "width too big"); - return NULL; - } - width = (width*10) + (*f - '0'); - f++; - } - precision = 0; - if (*f == '.') { + width = -1; + if (Py_ISDIGIT((unsigned)*f)) { + width = *f - '0'; f++; while (Py_ISDIGIT((unsigned)*f)) { - if (precision > (INT_MAX - ((int)*f - '0')) / 10) { + if (width > (PY_SSIZE_T_MAX - ((int)*f - '0')) / 10) { PyErr_SetString(PyExc_ValueError, - "precision too big"); + "width too big"); return NULL; } - precision = (precision*10) + (*f - '0'); + width = (width * 10) + (*f - '0'); f++; } + } + precision = -1; + if (*f == '.') { + f++; + if (Py_ISDIGIT((unsigned)*f)) { + precision = (*f - '0'); + f++; + while (Py_ISDIGIT((unsigned)*f)) { + if (precision > (PY_SSIZE_T_MAX - ((int)*f - '0')) / 10) { + PyErr_SetString(PyExc_ValueError, + "precision too big"); + return NULL; + } + precision = (precision * 10) + (*f - '0'); + f++; + } + } if (*f == '%') { /* "%.3%s" => f points to "3" */ f--; @@ -2449,6 +2518,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, /* used by sprintf */ char fmt[10]; /* should be enough for "%0lld\0" */ char buffer[MAX_LONG_LONG_CHARS]; + Py_ssize_t arglen; if (*f == 'u') { makefmt(fmt, longflag, longlongflag, size_tflag, *f); @@ -2494,26 +2564,29 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, if (precision < len) precision = len; + + arglen = Py_MAX(precision, width); + assert(ucs1lib_find_max_char((Py_UCS1*)buffer, (Py_UCS1*)buffer + len) <= 127); + if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1) + return NULL; + if (width > precision) { Py_UCS4 fillchar; fill = width - precision; fillchar = zeropad?'0':' '; - if (_PyUnicodeWriter_Prepare(writer, fill, fillchar) == -1) - return NULL; if (PyUnicode_Fill(writer->buffer, writer->pos, fill, fillchar) == -1) return NULL; writer->pos += fill; } if (precision > len) { fill = precision - len; - if (_PyUnicodeWriter_Prepare(writer, fill, '0') == -1) - return NULL; if (PyUnicode_Fill(writer->buffer, writer->pos, fill, '0') == -1) return NULL; writer->pos += fill; } - if (_PyUnicodeWriter_WriteCstr(writer, buffer, len) == -1) - return NULL; + + unicode_write_cstr(writer->buffer, writer->pos, buffer, len); + writer->pos += len; break; } @@ -2535,8 +2608,11 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, len += 2; } - if (_PyUnicodeWriter_WriteCstr(writer, number, len) == -1) + assert(ucs1lib_find_max_char((Py_UCS1*)number, (Py_UCS1*)number + len) <= 127); + if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) return NULL; + unicode_write_cstr(writer->buffer, writer->pos, number, len); + writer->pos += len; break; } @@ -2544,14 +2620,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, { /* UTF-8 */ const char *s = va_arg(*vargs, const char*); - PyObject *str = PyUnicode_DecodeUTF8Stateful(s, strlen(s), "replace", NULL); - if (!str) + if (unicode_fromformat_write_cstr(writer, s, width, precision) < 0) return NULL; - if (_PyUnicodeWriter_WriteStr(writer, str) == -1) { - Py_DECREF(str); - return NULL; - } - Py_DECREF(str); break; } @@ -2560,7 +2630,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, PyObject *obj = va_arg(*vargs, PyObject *); assert(obj && _PyUnicode_CHECK(obj)); - if (_PyUnicodeWriter_WriteStr(writer, obj) == -1) + if (unicode_fromformat_write_str(writer, obj, width, precision) == -1) return NULL; break; } @@ -2569,22 +2639,15 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, { PyObject *obj = va_arg(*vargs, PyObject *); const char *str = va_arg(*vargs, const char *); - PyObject *str_obj; - assert(obj || str); if (obj) { assert(_PyUnicode_CHECK(obj)); - if (_PyUnicodeWriter_WriteStr(writer, obj) == -1) + if (unicode_fromformat_write_str(writer, obj, width, precision) == -1) return NULL; } else { - str_obj = PyUnicode_DecodeUTF8Stateful(str, strlen(str), "replace", NULL); - if (!str_obj) - return NULL; - if (_PyUnicodeWriter_WriteStr(writer, str_obj) == -1) { - Py_DECREF(str_obj); + assert(str != NULL); + if (unicode_fromformat_write_cstr(writer, str, width, precision) < 0) return NULL; - } - Py_DECREF(str_obj); } break; } @@ -2597,7 +2660,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, str = PyObject_Str(obj); if (!str) return NULL; - if (_PyUnicodeWriter_WriteStr(writer, str) == -1) { + if (unicode_fromformat_write_str(writer, str, width, precision) == -1) { Py_DECREF(str); return NULL; } @@ -2613,7 +2676,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, repr = PyObject_Repr(obj); if (!repr) return NULL; - if (_PyUnicodeWriter_WriteStr(writer, repr) == -1) { + if (unicode_fromformat_write_str(writer, repr, width, precision) == -1) { Py_DECREF(repr); return NULL; } @@ -2629,7 +2692,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, ascii = PyObject_ASCII(obj); if (!ascii) return NULL; - if (_PyUnicodeWriter_WriteStr(writer, ascii) == -1) { + if (unicode_fromformat_write_str(writer, ascii, width, precision) == -1) { Py_DECREF(ascii); return NULL; } -- cgit v1.2.1 From d6215d906be8c506f2ef3163bfe9e3a05adc482d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 7 May 2013 01:01:31 +0200 Subject: Fix uninitialized value in charmap_decode_mapping() --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 795c18f25b..c5e50eb68b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7459,7 +7459,7 @@ charmap_decode_mapping(const char *s, Py_ssize_t startinpos, endinpos; PyObject *errorHandler = NULL, *exc = NULL; unsigned char ch; - PyObject *key, *item; + PyObject *key, *item = NULL; e = s + size; -- cgit v1.2.1 From 299aa6988c7a1c896d250366e03da0dd4b8ae7aa Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 8 May 2013 00:44:15 +0200 Subject: Use Py_intptr_t to store the difference between two pointers, instead of int Fix a compiler warning on Windows 64-bit --- Objects/descrobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/descrobject.c b/Objects/descrobject.c index 3cf00d5293..d4f8048fa6 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -1009,7 +1009,7 @@ wrapper_dealloc(wrapperobject *wp) static PyObject * wrapper_richcompare(PyObject *a, PyObject *b, int op) { - int result; + Py_intptr_t result; PyObject *v; PyWrapperDescrObject *a_descr, *b_descr; -- cgit v1.2.1 From 29478d3f28b052eb4346d6a8cbc6a2cb5fd936ef Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 8 May 2013 18:12:35 +0200 Subject: Issue #17807: Generators can now be finalized even when they are part of a reference cycle. --- Objects/frameobject.c | 254 ++++++++++++++++++++++++++++++++++++++++++++++---- Objects/genobject.c | 251 +++++++------------------------------------------ 2 files changed, 266 insertions(+), 239 deletions(-) (limited to 'Objects') diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 6fff370bba..df7a1de03e 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -31,6 +31,195 @@ frame_getlocals(PyFrameObject *f, void *closure) return f->f_locals; } +/* + * Generator support. + */ + +PyObject * +_PyFrame_YieldingFrom(PyFrameObject *f) +{ + PyObject *yf = NULL; + + if (f && f->f_stacktop) { + PyObject *bytecode = f->f_code->co_code; + unsigned char *code = (unsigned char *)PyBytes_AS_STRING(bytecode); + + if (code[f->f_lasti + 1] != YIELD_FROM) + return NULL; + yf = f->f_stacktop[-1]; + Py_INCREF(yf); + } + return yf; +} + +PyObject * +_PyFrame_GeneratorSend(PyFrameObject *f, PyObject *arg, int exc) +{ + PyThreadState *tstate = PyThreadState_GET(); + PyObject *result; + PyGenObject *gen = (PyGenObject *) f->f_gen; + + assert(gen == NULL || PyGen_CheckExact(gen)); + if (gen && gen->gi_running) { + PyErr_SetString(PyExc_ValueError, + "generator already executing"); + return NULL; + } + if (f->f_stacktop == NULL) { + /* Only set exception if send() called, not throw() or next() */ + if (arg && !exc) + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + if (f->f_lasti == -1) { + if (arg && arg != Py_None) { + PyErr_SetString(PyExc_TypeError, + "can't send non-None value to a " + "just-started generator"); + return NULL; + } + } else { + /* Push arg onto the frame's value stack */ + result = arg ? arg : Py_None; + Py_INCREF(result); + *(f->f_stacktop++) = result; + } + + /* Generators always return to their most recent caller, not + * necessarily their creator. */ + Py_XINCREF(tstate->frame); + assert(f->f_back == NULL); + f->f_back = tstate->frame; + + if (gen) { + Py_INCREF(gen); + gen->gi_running = 1; + } + result = PyEval_EvalFrameEx(f, exc); + if (gen) { + gen->gi_running = 0; + /* In case running the frame has lost all external references + * to gen, we must be careful not to hold on an invalid object. */ + if (Py_REFCNT(gen) == 1) + Py_CLEAR(gen); + else + Py_DECREF(gen); + } + + /* Don't keep the reference to f_back any longer than necessary. It + * may keep a chain of frames alive or it could create a reference + * cycle. */ + assert(f->f_back == tstate->frame); + Py_CLEAR(f->f_back); + + /* If the generator just returned (as opposed to yielding), signal + * that the generator is exhausted. */ + if (result && f->f_stacktop == NULL) { + if (result == Py_None) { + /* Delay exception instantiation if we can */ + PyErr_SetNone(PyExc_StopIteration); + } else { + PyObject *e = PyObject_CallFunctionObjArgs( + PyExc_StopIteration, result, NULL); + if (e != NULL) { + PyErr_SetObject(PyExc_StopIteration, e); + Py_DECREF(e); + } + } + Py_CLEAR(result); + } + + if (f->f_stacktop == NULL) { + /* generator can't be rerun, so release the frame */ + /* first clean reference cycle through stored exception traceback */ + PyObject *t, *v, *tb; + t = f->f_exc_type; + v = f->f_exc_value; + tb = f->f_exc_traceback; + f->f_exc_type = NULL; + f->f_exc_value = NULL; + f->f_exc_traceback = NULL; + Py_XDECREF(t); + Py_XDECREF(v); + Py_XDECREF(tb); + if (gen) { + f->f_gen = NULL; + Py_CLEAR(gen->gi_frame); + } + } + + return result; +} + +int +_PyFrame_CloseIterator(PyObject *yf) +{ + PyObject *retval = NULL; + _Py_IDENTIFIER(close); + + if (PyGen_CheckExact(yf)) { + PyFrameObject *f = ((PyGenObject *) yf)->gi_frame; + assert(f != NULL); + retval = _PyFrame_Finalize(f); + if (retval == NULL) + return -1; + } else { + PyObject *meth = _PyObject_GetAttrId(yf, &PyId_close); + if (meth == NULL) { + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_WriteUnraisable(yf); + PyErr_Clear(); + } else { + retval = PyObject_CallFunction(meth, ""); + Py_DECREF(meth); + if (retval == NULL) + return -1; + } + } + Py_XDECREF(retval); + return 0; +} + +PyObject * +_PyFrame_Finalize(PyFrameObject *f) +{ + int err = 0; + PyObject *retval; + PyGenObject *gen = (PyGenObject *) f->f_gen; + PyObject *yf = _PyFrame_YieldingFrom(f); + + assert(gen == NULL || PyGen_CheckExact(gen)); + if (yf) { + if (gen) + gen->gi_running = 1; + err = _PyFrame_CloseIterator(yf); + if (gen) + gen->gi_running = 0; + Py_DECREF(yf); + } + if (err == 0) + PyErr_SetNone(PyExc_GeneratorExit); + retval = _PyFrame_GeneratorSend(f, Py_None, 1); + if (retval) { + Py_DECREF(retval); + PyErr_SetString(PyExc_RuntimeError, + "generator ignored GeneratorExit"); + return NULL; + } + if (PyErr_ExceptionMatches(PyExc_StopIteration) + || PyErr_ExceptionMatches(PyExc_GeneratorExit)) { + PyErr_Clear(); /* ignore these errors */ + Py_INCREF(Py_None); + return Py_None; + } + return NULL; +} + +/* + * Line number support. + */ + int PyFrame_GetLineNumber(PyFrameObject *f) { @@ -419,33 +608,44 @@ static int numfree = 0; /* number of frames currently in free_list */ /* max value for numfree */ #define PyFrame_MAXFREELIST 200 +static void +frame_clear(PyFrameObject *f); + static void frame_dealloc(PyFrameObject *f) { - PyObject **p, **valuestack; PyCodeObject *co; + Py_REFCNT(f)++; + frame_clear(f); + Py_REFCNT(f)--; + if (Py_REFCNT(f) > 0) { + /* Frame resurrected! */ + Py_ssize_t refcnt = Py_REFCNT(f); + _Py_NewReference((PyObject *) f); + Py_REFCNT(f) = refcnt; + /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so + * we need to undo that. */ + _Py_DEC_REFTOTAL; + /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object + * chain, so no more to do there. + * If COUNT_ALLOCS, the original decref bumped tp_frees, and + * _Py_NewReference bumped tp_allocs: both of those need to be + * undone. + */ +#ifdef COUNT_ALLOCS + --(Py_TYPE(self)->tp_frees); + --(Py_TYPE(self)->tp_allocs); +#endif + } + PyObject_GC_UnTrack(f); Py_TRASHCAN_SAFE_BEGIN(f) - /* Kill all local variables */ - valuestack = f->f_valuestack; - for (p = f->f_localsplus; p < valuestack; p++) - Py_CLEAR(*p); - - /* Free stack */ - if (f->f_stacktop != NULL) { - for (p = valuestack; p < f->f_stacktop; p++) - Py_XDECREF(*p); - } Py_XDECREF(f->f_back); Py_DECREF(f->f_builtins); Py_DECREF(f->f_globals); Py_CLEAR(f->f_locals); - Py_CLEAR(f->f_trace); - Py_CLEAR(f->f_exc_type); - Py_CLEAR(f->f_exc_value); - Py_CLEAR(f->f_exc_traceback); co = f->f_code; if (co->co_zombieframe == NULL) @@ -497,12 +697,25 @@ frame_clear(PyFrameObject *f) { PyObject **fastlocals, **p, **oldtop; Py_ssize_t i, slots; + PyObject *retval; + + if (f->f_back == NULL) { + PyObject *t, *v, *tb; + PyErr_Fetch(&t, &v, &tb); + /* Note that this can finalize a suspended generator frame even + * if the generator object was disposed of (i.e. if f_gen is NULL). + */ + retval = _PyFrame_Finalize(f); + if (retval == NULL) { + if (PyErr_Occurred()) + PyErr_WriteUnraisable((PyObject *) f); + } + else + Py_DECREF(retval); + PyErr_Restore(t, v, tb); + } - /* Before anything else, make sure that this frame is clearly marked - * as being defunct! Else, e.g., a generator reachable from this - * frame may also point to this frame, believe itself to still be - * active, and try cleaning up this frame again. - */ + /* Make sure the frame is now clearly marked as being defunct */ oldtop = f->f_stacktop; f->f_stacktop = NULL; @@ -713,6 +926,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, f->f_lasti = -1; f->f_lineno = code->co_firstlineno; f->f_iblock = 0; + f->f_gen = NULL; _PyObject_GC_TRACK(f); return f; diff --git a/Objects/genobject.c b/Objects/genobject.c index 016bfa2975..34ecf2cb5f 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -19,112 +19,50 @@ static void gen_dealloc(PyGenObject *gen) { PyObject *self = (PyObject *) gen; + PyFrameObject *f = gen->gi_frame; _PyObject_GC_UNTRACK(gen); if (gen->gi_weakreflist != NULL) PyObject_ClearWeakRefs(self); - _PyObject_GC_TRACK(self); - - if (gen->gi_frame != NULL && gen->gi_frame->f_stacktop != NULL) { - /* Generator is paused, so we need to close */ - Py_TYPE(gen)->tp_del(self); - if (self->ob_refcnt > 0) - return; /* resurrected. :( */ + gen->gi_frame = NULL; + if (f) { + /* Close the generator by finalizing the frame */ + PyObject *retval, *t, *v, *tb; + PyErr_Fetch(&t, &v, &tb); + f->f_gen = NULL; + retval = _PyFrame_Finalize(f); + if (retval) + Py_DECREF(retval); + else if (PyErr_Occurred()) + PyErr_WriteUnraisable((PyObject *) gen); + Py_DECREF(f); + PyErr_Restore(t, v, tb); } - - _PyObject_GC_UNTRACK(self); - Py_CLEAR(gen->gi_frame); Py_CLEAR(gen->gi_code); PyObject_GC_Del(gen); } - static PyObject * gen_send_ex(PyGenObject *gen, PyObject *arg, int exc) { - PyThreadState *tstate = PyThreadState_GET(); PyFrameObject *f = gen->gi_frame; - PyObject *result; + /* For compatibility, we check gi_running before f == NULL */ if (gen->gi_running) { PyErr_SetString(PyExc_ValueError, "generator already executing"); return NULL; } - if (f == NULL || f->f_stacktop == NULL) { - /* Only set exception if called from send() */ + if (f == NULL) { + /* Only set exception if send() called, not throw() or next() */ if (arg && !exc) PyErr_SetNone(PyExc_StopIteration); return NULL; } - if (f->f_lasti == -1) { - if (arg && arg != Py_None) { - PyErr_SetString(PyExc_TypeError, - "can't send non-None value to a " - "just-started generator"); - return NULL; - } - } else { - /* Push arg onto the frame's value stack */ - result = arg ? arg : Py_None; - Py_INCREF(result); - *(f->f_stacktop++) = result; - } - - /* Generators always return to their most recent caller, not - * necessarily their creator. */ - Py_XINCREF(tstate->frame); - assert(f->f_back == NULL); - f->f_back = tstate->frame; - - gen->gi_running = 1; - result = PyEval_EvalFrameEx(f, exc); - gen->gi_running = 0; - - /* Don't keep the reference to f_back any longer than necessary. It - * may keep a chain of frames alive or it could create a reference - * cycle. */ - assert(f->f_back == tstate->frame); - Py_CLEAR(f->f_back); - - /* If the generator just returned (as opposed to yielding), signal - * that the generator is exhausted. */ - if (result && f->f_stacktop == NULL) { - if (result == Py_None) { - /* Delay exception instantiation if we can */ - PyErr_SetNone(PyExc_StopIteration); - } else { - PyObject *e = PyObject_CallFunctionObjArgs( - PyExc_StopIteration, result, NULL); - if (e != NULL) { - PyErr_SetObject(PyExc_StopIteration, e); - Py_DECREF(e); - } - } - Py_CLEAR(result); - } - - if (!result || f->f_stacktop == NULL) { - /* generator can't be rerun, so release the frame */ - /* first clean reference cycle through stored exception traceback */ - PyObject *t, *v, *tb; - t = f->f_exc_type; - v = f->f_exc_value; - tb = f->f_exc_traceback; - f->f_exc_type = NULL; - f->f_exc_value = NULL; - f->f_exc_traceback = NULL; - Py_XDECREF(t); - Py_XDECREF(v); - Py_XDECREF(tb); - gen->gi_frame = NULL; - Py_DECREF(f); - } - - return result; + return _PyFrame_GeneratorSend(f, arg, exc); } PyDoc_STRVAR(send_doc, @@ -145,146 +83,33 @@ PyDoc_STRVAR(close_doc, * close a subiterator being delegated to by yield-from. */ -static int -gen_close_iter(PyObject *yf) -{ - PyObject *retval = NULL; - _Py_IDENTIFIER(close); - - if (PyGen_CheckExact(yf)) { - retval = gen_close((PyGenObject *)yf, NULL); - if (retval == NULL) - return -1; - } else { - PyObject *meth = _PyObject_GetAttrId(yf, &PyId_close); - if (meth == NULL) { - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) - PyErr_WriteUnraisable(yf); - PyErr_Clear(); - } else { - retval = PyObject_CallFunction(meth, ""); - Py_DECREF(meth); - if (retval == NULL) - return -1; - } - } - Py_XDECREF(retval); - return 0; -} - static PyObject * gen_yf(PyGenObject *gen) { - PyObject *yf = NULL; PyFrameObject *f = gen->gi_frame; - - if (f && f->f_stacktop) { - PyObject *bytecode = f->f_code->co_code; - unsigned char *code = (unsigned char *)PyBytes_AS_STRING(bytecode); - - if (code[f->f_lasti + 1] != YIELD_FROM) - return NULL; - yf = f->f_stacktop[-1]; - Py_INCREF(yf); - } - - return yf; + if (f) + return _PyFrame_YieldingFrom(f); + else + return NULL; } static PyObject * gen_close(PyGenObject *gen, PyObject *args) { - PyObject *retval; - PyObject *yf = gen_yf(gen); - int err = 0; + PyFrameObject *f = gen->gi_frame; - if (yf) { - gen->gi_running = 1; - err = gen_close_iter(yf); - gen->gi_running = 0; - Py_DECREF(yf); - } - if (err == 0) - PyErr_SetNone(PyExc_GeneratorExit); - retval = gen_send_ex(gen, Py_None, 1); - if (retval) { - Py_DECREF(retval); - PyErr_SetString(PyExc_RuntimeError, - "generator ignored GeneratorExit"); + /* For compatibility, we check gi_running before f == NULL */ + if (gen->gi_running) { + PyErr_SetString(PyExc_ValueError, + "generator already executing"); return NULL; } - if (PyErr_ExceptionMatches(PyExc_StopIteration) - || PyErr_ExceptionMatches(PyExc_GeneratorExit)) { - PyErr_Clear(); /* ignore these errors */ - Py_INCREF(Py_None); - return Py_None; - } - return NULL; -} + if (f == NULL) + Py_RETURN_NONE; -static void -gen_del(PyObject *self) -{ - PyObject *res; - PyObject *error_type, *error_value, *error_traceback; - PyGenObject *gen = (PyGenObject *)self; - - if (gen->gi_frame == NULL || gen->gi_frame->f_stacktop == NULL) - /* Generator isn't paused, so no need to close */ - return; - - /* Temporarily resurrect the object. */ - assert(self->ob_refcnt == 0); - self->ob_refcnt = 1; - - /* Save the current exception, if any. */ - PyErr_Fetch(&error_type, &error_value, &error_traceback); - - res = gen_close(gen, NULL); - - if (res == NULL) - PyErr_WriteUnraisable(self); - else - Py_DECREF(res); - - /* Restore the saved exception. */ - PyErr_Restore(error_type, error_value, error_traceback); - - /* Undo the temporary resurrection; can't use DECREF here, it would - * cause a recursive call. - */ - assert(self->ob_refcnt > 0); - if (--self->ob_refcnt == 0) - return; /* this is the normal path out */ - - /* close() resurrected it! Make it look like the original Py_DECREF - * never happened. - */ - { - Py_ssize_t refcnt = self->ob_refcnt; - _Py_NewReference(self); - self->ob_refcnt = refcnt; - } - assert(PyType_IS_GC(Py_TYPE(self)) && - _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED); - - /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so - * we need to undo that. */ - _Py_DEC_REFTOTAL; - /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object - * chain, so no more to do there. - * If COUNT_ALLOCS, the original decref bumped tp_frees, and - * _Py_NewReference bumped tp_allocs: both of those need to be - * undone. - */ -#ifdef COUNT_ALLOCS - --(Py_TYPE(self)->tp_frees); - --(Py_TYPE(self)->tp_allocs); -#endif + return _PyFrame_Finalize(f); } - - PyDoc_STRVAR(throw_doc, "throw(typ[,val[,tb]]) -> raise exception in generator,\n\ return next yielded value or raise StopIteration."); @@ -306,7 +131,7 @@ gen_throw(PyGenObject *gen, PyObject *args) int err; if (PyErr_GivenExceptionMatches(typ, PyExc_GeneratorExit)) { gen->gi_running = 1; - err = gen_close_iter(yf); + err = _PyFrame_CloseIterator(yf); gen->gi_running = 0; Py_DECREF(yf); if (err < 0) @@ -544,7 +369,6 @@ PyTypeObject PyGen_Type = { 0, /* tp_cache */ 0, /* tp_subclasses */ 0, /* tp_weaklist */ - gen_del, /* tp_del */ }; PyObject * @@ -556,6 +380,7 @@ PyGen_New(PyFrameObject *f) return NULL; } gen->gi_frame = f; + f->f_gen = (PyObject *) gen; Py_INCREF(f->f_code); gen->gi_code = (PyObject *)(f->f_code); gen->gi_running = 0; @@ -567,17 +392,5 @@ PyGen_New(PyFrameObject *f) int PyGen_NeedsFinalizing(PyGenObject *gen) { - int i; - PyFrameObject *f = gen->gi_frame; - - if (f == NULL || f->f_stacktop == NULL) - return 0; /* no frame or empty blockstack == no finalization */ - - /* Any block type besides a loop requires cleanup. */ - for (i = 0; i < f->f_iblock; i++) - if (f->f_blockstack[i].b_type != SETUP_LOOP) - return 1; - - /* No blocks except loops, it's safe to skip finalization. */ return 0; } -- cgit v1.2.1 From abd4b98103be57a960f50b24c134fe2d17d2ef4e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 10 May 2013 08:47:42 -0700 Subject: #17927: Keep frame from referencing cell-ified arguments. --- Objects/typeobject.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index aa67af81b0..e418a3a957 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6510,6 +6510,10 @@ super_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } obj = f->f_localsplus[0]; + if (obj != NULL && PyCell_Check(obj)) { + /* It might be a cell. See cell var initialization in ceval.c. */ + obj = PyCell_GET(obj); + } if (obj == NULL) { PyErr_SetString(PyExc_RuntimeError, "super(): arg[0] deleted"); -- cgit v1.2.1 From bf1ecdb264e1c5aaa25957c3f07291d886fad386 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Sat, 11 May 2013 14:46:48 -0400 Subject: Touch up grammar for dict.update() docstring. --- Objects/dictobject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 208888db0e..bf9ec550b3 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2473,10 +2473,10 @@ PyDoc_STRVAR(popitem__doc__, 2-tuple; but raise KeyError if D is empty."); PyDoc_STRVAR(update__doc__, -"D.update([E, ]**F) -> None. Update D from dict/iterable E and F.\n" -"If E present and has a .keys() method, does: for k in E: D[k] = E[k]\n\ -If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v\n\ -In either case, this is followed by: for k in F: D[k] = F[k]"); +"D.update([E, ]**F) -> None. Update D from dict/iterable E and F.\n\ +If E is present and has a .keys() method, then does: for k in E: D[k] = E[k]\n\ +If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v\n\ +In either case, this is followed by: for k in F: D[k] = F[k]"); PyDoc_STRVAR(fromkeys__doc__, "dict.fromkeys(S[,v]) -> New dict with keys from S and values equal to v.\n\ -- cgit v1.2.1 From 70930d4ba3c03001518c8b690fcbe54a41232db0 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Sun, 12 May 2013 18:16:06 -0500 Subject: when an argument is a cell, set the local copy to NULL (see #17927) --- Objects/typeobject.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e418a3a957..a3516671a7 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6510,9 +6510,17 @@ super_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } obj = f->f_localsplus[0]; - if (obj != NULL && PyCell_Check(obj)) { - /* It might be a cell. See cell var initialization in ceval.c. */ - obj = PyCell_GET(obj); + if (obj == NULL && co->co_cell2arg) { + /* The first argument might be a cell. */ + n = PyTuple_GET_SIZE(co->co_cellvars); + for (i = 0; i < n; i++) { + if (co->co_cell2arg[i] == 0) { + PyObject *cell = f->f_localsplus[co->co_nlocals + i]; + assert(PyCell_Check(cell)); + obj = PyCell_GET(cell); + break; + } + } } if (obj == NULL) { PyErr_SetString(PyExc_RuntimeError, -- cgit v1.2.1 From 2469b6b9b897bcccf6a9195427469c816f05e374 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 14 May 2013 20:37:52 +0200 Subject: Backout c89febab4648 following private feedback by Guido. (Issue #17807: Generators can now be finalized even when they are part of a reference cycle) --- Objects/frameobject.c | 254 ++++---------------------------------------------- Objects/genobject.c | 251 ++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 239 insertions(+), 266 deletions(-) (limited to 'Objects') diff --git a/Objects/frameobject.c b/Objects/frameobject.c index df7a1de03e..6fff370bba 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -31,195 +31,6 @@ frame_getlocals(PyFrameObject *f, void *closure) return f->f_locals; } -/* - * Generator support. - */ - -PyObject * -_PyFrame_YieldingFrom(PyFrameObject *f) -{ - PyObject *yf = NULL; - - if (f && f->f_stacktop) { - PyObject *bytecode = f->f_code->co_code; - unsigned char *code = (unsigned char *)PyBytes_AS_STRING(bytecode); - - if (code[f->f_lasti + 1] != YIELD_FROM) - return NULL; - yf = f->f_stacktop[-1]; - Py_INCREF(yf); - } - return yf; -} - -PyObject * -_PyFrame_GeneratorSend(PyFrameObject *f, PyObject *arg, int exc) -{ - PyThreadState *tstate = PyThreadState_GET(); - PyObject *result; - PyGenObject *gen = (PyGenObject *) f->f_gen; - - assert(gen == NULL || PyGen_CheckExact(gen)); - if (gen && gen->gi_running) { - PyErr_SetString(PyExc_ValueError, - "generator already executing"); - return NULL; - } - if (f->f_stacktop == NULL) { - /* Only set exception if send() called, not throw() or next() */ - if (arg && !exc) - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - - if (f->f_lasti == -1) { - if (arg && arg != Py_None) { - PyErr_SetString(PyExc_TypeError, - "can't send non-None value to a " - "just-started generator"); - return NULL; - } - } else { - /* Push arg onto the frame's value stack */ - result = arg ? arg : Py_None; - Py_INCREF(result); - *(f->f_stacktop++) = result; - } - - /* Generators always return to their most recent caller, not - * necessarily their creator. */ - Py_XINCREF(tstate->frame); - assert(f->f_back == NULL); - f->f_back = tstate->frame; - - if (gen) { - Py_INCREF(gen); - gen->gi_running = 1; - } - result = PyEval_EvalFrameEx(f, exc); - if (gen) { - gen->gi_running = 0; - /* In case running the frame has lost all external references - * to gen, we must be careful not to hold on an invalid object. */ - if (Py_REFCNT(gen) == 1) - Py_CLEAR(gen); - else - Py_DECREF(gen); - } - - /* Don't keep the reference to f_back any longer than necessary. It - * may keep a chain of frames alive or it could create a reference - * cycle. */ - assert(f->f_back == tstate->frame); - Py_CLEAR(f->f_back); - - /* If the generator just returned (as opposed to yielding), signal - * that the generator is exhausted. */ - if (result && f->f_stacktop == NULL) { - if (result == Py_None) { - /* Delay exception instantiation if we can */ - PyErr_SetNone(PyExc_StopIteration); - } else { - PyObject *e = PyObject_CallFunctionObjArgs( - PyExc_StopIteration, result, NULL); - if (e != NULL) { - PyErr_SetObject(PyExc_StopIteration, e); - Py_DECREF(e); - } - } - Py_CLEAR(result); - } - - if (f->f_stacktop == NULL) { - /* generator can't be rerun, so release the frame */ - /* first clean reference cycle through stored exception traceback */ - PyObject *t, *v, *tb; - t = f->f_exc_type; - v = f->f_exc_value; - tb = f->f_exc_traceback; - f->f_exc_type = NULL; - f->f_exc_value = NULL; - f->f_exc_traceback = NULL; - Py_XDECREF(t); - Py_XDECREF(v); - Py_XDECREF(tb); - if (gen) { - f->f_gen = NULL; - Py_CLEAR(gen->gi_frame); - } - } - - return result; -} - -int -_PyFrame_CloseIterator(PyObject *yf) -{ - PyObject *retval = NULL; - _Py_IDENTIFIER(close); - - if (PyGen_CheckExact(yf)) { - PyFrameObject *f = ((PyGenObject *) yf)->gi_frame; - assert(f != NULL); - retval = _PyFrame_Finalize(f); - if (retval == NULL) - return -1; - } else { - PyObject *meth = _PyObject_GetAttrId(yf, &PyId_close); - if (meth == NULL) { - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) - PyErr_WriteUnraisable(yf); - PyErr_Clear(); - } else { - retval = PyObject_CallFunction(meth, ""); - Py_DECREF(meth); - if (retval == NULL) - return -1; - } - } - Py_XDECREF(retval); - return 0; -} - -PyObject * -_PyFrame_Finalize(PyFrameObject *f) -{ - int err = 0; - PyObject *retval; - PyGenObject *gen = (PyGenObject *) f->f_gen; - PyObject *yf = _PyFrame_YieldingFrom(f); - - assert(gen == NULL || PyGen_CheckExact(gen)); - if (yf) { - if (gen) - gen->gi_running = 1; - err = _PyFrame_CloseIterator(yf); - if (gen) - gen->gi_running = 0; - Py_DECREF(yf); - } - if (err == 0) - PyErr_SetNone(PyExc_GeneratorExit); - retval = _PyFrame_GeneratorSend(f, Py_None, 1); - if (retval) { - Py_DECREF(retval); - PyErr_SetString(PyExc_RuntimeError, - "generator ignored GeneratorExit"); - return NULL; - } - if (PyErr_ExceptionMatches(PyExc_StopIteration) - || PyErr_ExceptionMatches(PyExc_GeneratorExit)) { - PyErr_Clear(); /* ignore these errors */ - Py_INCREF(Py_None); - return Py_None; - } - return NULL; -} - -/* - * Line number support. - */ - int PyFrame_GetLineNumber(PyFrameObject *f) { @@ -608,44 +419,33 @@ static int numfree = 0; /* number of frames currently in free_list */ /* max value for numfree */ #define PyFrame_MAXFREELIST 200 -static void -frame_clear(PyFrameObject *f); - static void frame_dealloc(PyFrameObject *f) { + PyObject **p, **valuestack; PyCodeObject *co; - Py_REFCNT(f)++; - frame_clear(f); - Py_REFCNT(f)--; - if (Py_REFCNT(f) > 0) { - /* Frame resurrected! */ - Py_ssize_t refcnt = Py_REFCNT(f); - _Py_NewReference((PyObject *) f); - Py_REFCNT(f) = refcnt; - /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so - * we need to undo that. */ - _Py_DEC_REFTOTAL; - /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object - * chain, so no more to do there. - * If COUNT_ALLOCS, the original decref bumped tp_frees, and - * _Py_NewReference bumped tp_allocs: both of those need to be - * undone. - */ -#ifdef COUNT_ALLOCS - --(Py_TYPE(self)->tp_frees); - --(Py_TYPE(self)->tp_allocs); -#endif - } - PyObject_GC_UnTrack(f); Py_TRASHCAN_SAFE_BEGIN(f) + /* Kill all local variables */ + valuestack = f->f_valuestack; + for (p = f->f_localsplus; p < valuestack; p++) + Py_CLEAR(*p); + + /* Free stack */ + if (f->f_stacktop != NULL) { + for (p = valuestack; p < f->f_stacktop; p++) + Py_XDECREF(*p); + } Py_XDECREF(f->f_back); Py_DECREF(f->f_builtins); Py_DECREF(f->f_globals); Py_CLEAR(f->f_locals); + Py_CLEAR(f->f_trace); + Py_CLEAR(f->f_exc_type); + Py_CLEAR(f->f_exc_value); + Py_CLEAR(f->f_exc_traceback); co = f->f_code; if (co->co_zombieframe == NULL) @@ -697,25 +497,12 @@ frame_clear(PyFrameObject *f) { PyObject **fastlocals, **p, **oldtop; Py_ssize_t i, slots; - PyObject *retval; - - if (f->f_back == NULL) { - PyObject *t, *v, *tb; - PyErr_Fetch(&t, &v, &tb); - /* Note that this can finalize a suspended generator frame even - * if the generator object was disposed of (i.e. if f_gen is NULL). - */ - retval = _PyFrame_Finalize(f); - if (retval == NULL) { - if (PyErr_Occurred()) - PyErr_WriteUnraisable((PyObject *) f); - } - else - Py_DECREF(retval); - PyErr_Restore(t, v, tb); - } - /* Make sure the frame is now clearly marked as being defunct */ + /* Before anything else, make sure that this frame is clearly marked + * as being defunct! Else, e.g., a generator reachable from this + * frame may also point to this frame, believe itself to still be + * active, and try cleaning up this frame again. + */ oldtop = f->f_stacktop; f->f_stacktop = NULL; @@ -926,7 +713,6 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, f->f_lasti = -1; f->f_lineno = code->co_firstlineno; f->f_iblock = 0; - f->f_gen = NULL; _PyObject_GC_TRACK(f); return f; diff --git a/Objects/genobject.c b/Objects/genobject.c index 34ecf2cb5f..016bfa2975 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -19,50 +19,112 @@ static void gen_dealloc(PyGenObject *gen) { PyObject *self = (PyObject *) gen; - PyFrameObject *f = gen->gi_frame; _PyObject_GC_UNTRACK(gen); if (gen->gi_weakreflist != NULL) PyObject_ClearWeakRefs(self); - gen->gi_frame = NULL; - if (f) { - /* Close the generator by finalizing the frame */ - PyObject *retval, *t, *v, *tb; - PyErr_Fetch(&t, &v, &tb); - f->f_gen = NULL; - retval = _PyFrame_Finalize(f); - if (retval) - Py_DECREF(retval); - else if (PyErr_Occurred()) - PyErr_WriteUnraisable((PyObject *) gen); - Py_DECREF(f); - PyErr_Restore(t, v, tb); + _PyObject_GC_TRACK(self); + + if (gen->gi_frame != NULL && gen->gi_frame->f_stacktop != NULL) { + /* Generator is paused, so we need to close */ + Py_TYPE(gen)->tp_del(self); + if (self->ob_refcnt > 0) + return; /* resurrected. :( */ } + + _PyObject_GC_UNTRACK(self); + Py_CLEAR(gen->gi_frame); Py_CLEAR(gen->gi_code); PyObject_GC_Del(gen); } + static PyObject * gen_send_ex(PyGenObject *gen, PyObject *arg, int exc) { + PyThreadState *tstate = PyThreadState_GET(); PyFrameObject *f = gen->gi_frame; + PyObject *result; - /* For compatibility, we check gi_running before f == NULL */ if (gen->gi_running) { PyErr_SetString(PyExc_ValueError, "generator already executing"); return NULL; } - if (f == NULL) { - /* Only set exception if send() called, not throw() or next() */ + if (f == NULL || f->f_stacktop == NULL) { + /* Only set exception if called from send() */ if (arg && !exc) PyErr_SetNone(PyExc_StopIteration); return NULL; } - return _PyFrame_GeneratorSend(f, arg, exc); + if (f->f_lasti == -1) { + if (arg && arg != Py_None) { + PyErr_SetString(PyExc_TypeError, + "can't send non-None value to a " + "just-started generator"); + return NULL; + } + } else { + /* Push arg onto the frame's value stack */ + result = arg ? arg : Py_None; + Py_INCREF(result); + *(f->f_stacktop++) = result; + } + + /* Generators always return to their most recent caller, not + * necessarily their creator. */ + Py_XINCREF(tstate->frame); + assert(f->f_back == NULL); + f->f_back = tstate->frame; + + gen->gi_running = 1; + result = PyEval_EvalFrameEx(f, exc); + gen->gi_running = 0; + + /* Don't keep the reference to f_back any longer than necessary. It + * may keep a chain of frames alive or it could create a reference + * cycle. */ + assert(f->f_back == tstate->frame); + Py_CLEAR(f->f_back); + + /* If the generator just returned (as opposed to yielding), signal + * that the generator is exhausted. */ + if (result && f->f_stacktop == NULL) { + if (result == Py_None) { + /* Delay exception instantiation if we can */ + PyErr_SetNone(PyExc_StopIteration); + } else { + PyObject *e = PyObject_CallFunctionObjArgs( + PyExc_StopIteration, result, NULL); + if (e != NULL) { + PyErr_SetObject(PyExc_StopIteration, e); + Py_DECREF(e); + } + } + Py_CLEAR(result); + } + + if (!result || f->f_stacktop == NULL) { + /* generator can't be rerun, so release the frame */ + /* first clean reference cycle through stored exception traceback */ + PyObject *t, *v, *tb; + t = f->f_exc_type; + v = f->f_exc_value; + tb = f->f_exc_traceback; + f->f_exc_type = NULL; + f->f_exc_value = NULL; + f->f_exc_traceback = NULL; + Py_XDECREF(t); + Py_XDECREF(v); + Py_XDECREF(tb); + gen->gi_frame = NULL; + Py_DECREF(f); + } + + return result; } PyDoc_STRVAR(send_doc, @@ -83,33 +145,146 @@ PyDoc_STRVAR(close_doc, * close a subiterator being delegated to by yield-from. */ +static int +gen_close_iter(PyObject *yf) +{ + PyObject *retval = NULL; + _Py_IDENTIFIER(close); + + if (PyGen_CheckExact(yf)) { + retval = gen_close((PyGenObject *)yf, NULL); + if (retval == NULL) + return -1; + } else { + PyObject *meth = _PyObject_GetAttrId(yf, &PyId_close); + if (meth == NULL) { + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_WriteUnraisable(yf); + PyErr_Clear(); + } else { + retval = PyObject_CallFunction(meth, ""); + Py_DECREF(meth); + if (retval == NULL) + return -1; + } + } + Py_XDECREF(retval); + return 0; +} + static PyObject * gen_yf(PyGenObject *gen) { + PyObject *yf = NULL; PyFrameObject *f = gen->gi_frame; - if (f) - return _PyFrame_YieldingFrom(f); - else - return NULL; + + if (f && f->f_stacktop) { + PyObject *bytecode = f->f_code->co_code; + unsigned char *code = (unsigned char *)PyBytes_AS_STRING(bytecode); + + if (code[f->f_lasti + 1] != YIELD_FROM) + return NULL; + yf = f->f_stacktop[-1]; + Py_INCREF(yf); + } + + return yf; } static PyObject * gen_close(PyGenObject *gen, PyObject *args) { - PyFrameObject *f = gen->gi_frame; + PyObject *retval; + PyObject *yf = gen_yf(gen); + int err = 0; - /* For compatibility, we check gi_running before f == NULL */ - if (gen->gi_running) { - PyErr_SetString(PyExc_ValueError, - "generator already executing"); + if (yf) { + gen->gi_running = 1; + err = gen_close_iter(yf); + gen->gi_running = 0; + Py_DECREF(yf); + } + if (err == 0) + PyErr_SetNone(PyExc_GeneratorExit); + retval = gen_send_ex(gen, Py_None, 1); + if (retval) { + Py_DECREF(retval); + PyErr_SetString(PyExc_RuntimeError, + "generator ignored GeneratorExit"); return NULL; } - if (f == NULL) - Py_RETURN_NONE; + if (PyErr_ExceptionMatches(PyExc_StopIteration) + || PyErr_ExceptionMatches(PyExc_GeneratorExit)) { + PyErr_Clear(); /* ignore these errors */ + Py_INCREF(Py_None); + return Py_None; + } + return NULL; +} - return _PyFrame_Finalize(f); +static void +gen_del(PyObject *self) +{ + PyObject *res; + PyObject *error_type, *error_value, *error_traceback; + PyGenObject *gen = (PyGenObject *)self; + + if (gen->gi_frame == NULL || gen->gi_frame->f_stacktop == NULL) + /* Generator isn't paused, so no need to close */ + return; + + /* Temporarily resurrect the object. */ + assert(self->ob_refcnt == 0); + self->ob_refcnt = 1; + + /* Save the current exception, if any. */ + PyErr_Fetch(&error_type, &error_value, &error_traceback); + + res = gen_close(gen, NULL); + + if (res == NULL) + PyErr_WriteUnraisable(self); + else + Py_DECREF(res); + + /* Restore the saved exception. */ + PyErr_Restore(error_type, error_value, error_traceback); + + /* Undo the temporary resurrection; can't use DECREF here, it would + * cause a recursive call. + */ + assert(self->ob_refcnt > 0); + if (--self->ob_refcnt == 0) + return; /* this is the normal path out */ + + /* close() resurrected it! Make it look like the original Py_DECREF + * never happened. + */ + { + Py_ssize_t refcnt = self->ob_refcnt; + _Py_NewReference(self); + self->ob_refcnt = refcnt; + } + assert(PyType_IS_GC(Py_TYPE(self)) && + _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED); + + /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so + * we need to undo that. */ + _Py_DEC_REFTOTAL; + /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object + * chain, so no more to do there. + * If COUNT_ALLOCS, the original decref bumped tp_frees, and + * _Py_NewReference bumped tp_allocs: both of those need to be + * undone. + */ +#ifdef COUNT_ALLOCS + --(Py_TYPE(self)->tp_frees); + --(Py_TYPE(self)->tp_allocs); +#endif } + + PyDoc_STRVAR(throw_doc, "throw(typ[,val[,tb]]) -> raise exception in generator,\n\ return next yielded value or raise StopIteration."); @@ -131,7 +306,7 @@ gen_throw(PyGenObject *gen, PyObject *args) int err; if (PyErr_GivenExceptionMatches(typ, PyExc_GeneratorExit)) { gen->gi_running = 1; - err = _PyFrame_CloseIterator(yf); + err = gen_close_iter(yf); gen->gi_running = 0; Py_DECREF(yf); if (err < 0) @@ -369,6 +544,7 @@ PyTypeObject PyGen_Type = { 0, /* tp_cache */ 0, /* tp_subclasses */ 0, /* tp_weaklist */ + gen_del, /* tp_del */ }; PyObject * @@ -380,7 +556,6 @@ PyGen_New(PyFrameObject *f) return NULL; } gen->gi_frame = f; - f->f_gen = (PyObject *) gen; Py_INCREF(f->f_code); gen->gi_code = (PyObject *)(f->f_code); gen->gi_running = 0; @@ -392,5 +567,17 @@ PyGen_New(PyFrameObject *f) int PyGen_NeedsFinalizing(PyGenObject *gen) { + int i; + PyFrameObject *f = gen->gi_frame; + + if (f == NULL || f->f_stacktop == NULL) + return 0; /* no frame or empty blockstack == no finalization */ + + /* Any block type besides a loop requires cleanup. */ + for (i = 0; i < f->f_iblock; i++) + if (f->f_blockstack[i].b_type != SETUP_LOOP) + return 1; + + /* No blocks except loops, it's safe to skip finalization. */ return 0; } -- cgit v1.2.1 From 8fcfc4fd7108cb9408fa81fdd8e69cd0ddaa465b Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Fri, 17 May 2013 18:22:31 -0500 Subject: rewrite the parsing of field names to be more consistent wrt recursive expansion --- Objects/stringlib/unicode_format.h | 115 +++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 62 deletions(-) (limited to 'Objects') diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index b01d756e71..aec221acff 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -543,7 +543,7 @@ done: static int parse_field(SubString *str, SubString *field_name, SubString *format_spec, - Py_UCS4 *conversion) + int *format_spec_needs_expanding, Py_UCS4 *conversion) { /* Note this function works if the field name is zero length, which is good. Zero length field names are handled later, in @@ -561,6 +561,15 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec, field_name->start = str->start; while (str->start < str->end) { switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { + case '{': + PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name"); + return 0; + case '[': + for (; str->start < str->end; str->start++) + if (PyUnicode_READ_CHAR(str->str, str->start) == ']') + break; + continue; + case '}': case ':': case '!': break; @@ -570,41 +579,62 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec, break; } + field_name->end = str->start - 1; if (c == '!' || c == ':') { + Py_ssize_t count; /* we have a format specifier and/or a conversion */ /* don't include the last character */ - field_name->end = str->start-1; - - /* the format specifier is the rest of the string */ - format_spec->str = str->str; - format_spec->start = str->start; - format_spec->end = str->end; /* see if there's a conversion specifier */ if (c == '!') { /* there must be another character present */ - if (format_spec->start >= format_spec->end) { + if (str->start >= str->end) { PyErr_SetString(PyExc_ValueError, - "end of format while looking for conversion " + "end of string while looking for conversion " "specifier"); return 0; } - *conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++); + *conversion = PyUnicode_READ_CHAR(str->str, str->start++); - /* if there is another character, it must be a colon */ - if (format_spec->start < format_spec->end) { - c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++); + if (str->start < str->end) { + c = PyUnicode_READ_CHAR(str->str, str->start++); + if (c == '}') + return 1; if (c != ':') { PyErr_SetString(PyExc_ValueError, - "expected ':' after format specifier"); + "expected ':' after conversion specifier"); return 0; } } } + format_spec->str = str->str; + format_spec->start = str->start; + count = 1; + while (str->start < str->end) { + switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { + case '{': + *format_spec_needs_expanding = 1; + count++; + break; + case '}': + count--; + if (count == 0) { + format_spec->end = str->start - 1; + return 1; + } + break; + default: + break; + } + } + + PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec"); + return 0; + } + else if (c != '}') { + PyErr_SetString(PyExc_ValueError, "expected '}' before end of string"); + return 0; } - else - /* end of string, there's no format_spec or conversion */ - field_name->end = str->start; return 1; } @@ -638,10 +668,9 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal, SubString *format_spec, Py_UCS4 *conversion, int *format_spec_needs_expanding) { - int at_end, hit_format_spec; + int at_end; Py_UCS4 c = 0; Py_ssize_t start; - int count; Py_ssize_t len; int markup_follows = 0; @@ -713,50 +742,12 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal, if (!markup_follows) return 2; - /* this is markup, find the end of the string by counting nested - braces. note that this prohibits escaped braces, so that - format_specs cannot have braces in them. */ + /* this is markup; parse the field */ *field_present = 1; - count = 1; - - start = self->str.start; - - /* we know we can't have a zero length string, so don't worry - about that case */ - hit_format_spec = 0; - while (self->str.start < self->str.end) { - switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) { - case ':': - hit_format_spec = 1; - count = 1; - break; - case '{': - /* the format spec needs to be recursively expanded. - this is an optimization, and not strictly needed */ - if (hit_format_spec) - *format_spec_needs_expanding = 1; - count++; - break; - case '}': - count--; - if (count <= 0) { - /* we're done. parse and get out */ - SubString s; - - SubString_init(&s, self->str.str, start, self->str.start - 1); - if (parse_field(&s, field_name, format_spec, conversion) == 0) - return 0; - - /* success */ - return 2; - } - break; - } - } - - /* end of string while searching for matching '}' */ - PyErr_SetString(PyExc_ValueError, "unmatched '{' in format"); - return 0; + if (!parse_field(&self->str, field_name, format_spec, + format_spec_needs_expanding, conversion)) + return 0; + return 2; } -- cgit v1.2.1 From 311eb0400f5718391b1b6ab7b54b2f155730c433 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 29 May 2013 18:50:54 +0300 Subject: Issue #9369: The types of `char*` arguments of PyObject_CallFunction() and PyObject_CallMethod() now changed to `const char*`. Based on patches by J?rg M?ller and Lars Buitinck. --- Objects/abstract.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'Objects') diff --git a/Objects/abstract.c b/Objects/abstract.c index 0934b950f0..40ea43bc8d 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -2142,7 +2142,7 @@ call_function_tail(PyObject *callable, PyObject *args) } PyObject * -PyObject_CallFunction(PyObject *callable, char *format, ...) +PyObject_CallFunction(PyObject *callable, const char *format, ...) { va_list va; PyObject *args; @@ -2162,7 +2162,7 @@ PyObject_CallFunction(PyObject *callable, char *format, ...) } PyObject * -_PyObject_CallFunction_SizeT(PyObject *callable, char *format, ...) +_PyObject_CallFunction_SizeT(PyObject *callable, const char *format, ...) { va_list va; PyObject *args; @@ -2182,7 +2182,7 @@ _PyObject_CallFunction_SizeT(PyObject *callable, char *format, ...) } static PyObject* -callmethod(PyObject* func, char *format, va_list va, int is_size_t) +callmethod(PyObject* func, const char *format, va_list va, int is_size_t) { PyObject *retval = NULL; PyObject *args; @@ -2211,7 +2211,7 @@ callmethod(PyObject* func, char *format, va_list va, int is_size_t) } PyObject * -PyObject_CallMethod(PyObject *o, char *name, char *format, ...) +PyObject_CallMethod(PyObject *o, const char *name, const char *format, ...) { va_list va; PyObject *func = NULL; @@ -2232,7 +2232,8 @@ PyObject_CallMethod(PyObject *o, char *name, char *format, ...) } PyObject * -_PyObject_CallMethodId(PyObject *o, _Py_Identifier *name, char *format, ...) +_PyObject_CallMethodId(PyObject *o, _Py_Identifier *name, + const char *format, ...) { va_list va; PyObject *func = NULL; @@ -2253,7 +2254,8 @@ _PyObject_CallMethodId(PyObject *o, _Py_Identifier *name, char *format, ...) } PyObject * -_PyObject_CallMethod_SizeT(PyObject *o, char *name, char *format, ...) +_PyObject_CallMethod_SizeT(PyObject *o, const char *name, + const char *format, ...) { va_list va; PyObject *func = NULL; @@ -2273,7 +2275,8 @@ _PyObject_CallMethod_SizeT(PyObject *o, char *name, char *format, ...) } PyObject * -_PyObject_CallMethodId_SizeT(PyObject *o, _Py_Identifier *name, char *format, ...) +_PyObject_CallMethodId_SizeT(PyObject *o, _Py_Identifier *name, + const char *format, ...) { va_list va; PyObject *func = NULL; -- cgit v1.2.1 From 87d6c6f82c851f48e4ddfb84dd5379bb8066e307 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Jun 2013 23:02:46 +0200 Subject: Close #17932: Fix an integer overflow issue on Windows 64-bit in iterators: change the C type of seqiterobject.it_index from long to Py_ssize_t. --- Objects/iterobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/iterobject.c b/Objects/iterobject.c index 9acd1b79d7..77ff8106fd 100644 --- a/Objects/iterobject.c +++ b/Objects/iterobject.c @@ -4,7 +4,7 @@ typedef struct { PyObject_HEAD - long it_index; + Py_ssize_t it_index; PyObject *it_seq; /* Set to NULL when iterator is exhausted */ } seqiterobject; -- cgit v1.2.1 From 8362e4b8afd435ff62d89e6b69171cc382fadddc Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Jun 2013 23:14:37 +0200 Subject: Reuse Py_MIN and Py_MAX macros: remove duplicate MIN/MAX macros multiprocessing.h: remove unused MIN and MAX macros --- Objects/floatobject.c | 13 ++++--------- Objects/frameobject.c | 11 +++-------- Objects/longobject.c | 13 ++++--------- 3 files changed, 11 insertions(+), 26 deletions(-) (limited to 'Objects') diff --git a/Objects/floatobject.c b/Objects/floatobject.c index c54c8e1a1d..1398fa5981 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -9,11 +9,6 @@ #include #include -#undef MAX -#undef MIN -#define MAX(x, y) ((x) < (y) ? (y) : (x)) -#define MIN(x, y) ((x) < (y) ? (x) : (y)) - /* Special free list free_list is a singly-linked list of available PyFloatObjects, linked @@ -1131,7 +1126,7 @@ float_hex(PyObject *v) } m = frexp(fabs(x), &e); - shift = 1 - MAX(DBL_MIN_EXP - e, 0); + shift = 1 - Py_MAX(DBL_MIN_EXP - e, 0); m = ldexp(m, shift); e -= shift; @@ -1285,8 +1280,8 @@ float_fromhex(PyObject *cls, PyObject *arg) fdigits = coeff_end - s_store; if (ndigits == 0) goto parse_error; - if (ndigits > MIN(DBL_MIN_EXP - DBL_MANT_DIG - LONG_MIN/2, - LONG_MAX/2 + 1 - DBL_MAX_EXP)/4) + if (ndigits > Py_MIN(DBL_MIN_EXP - DBL_MANT_DIG - LONG_MIN/2, + LONG_MAX/2 + 1 - DBL_MAX_EXP)/4) goto insane_length_error; /* [p ] */ @@ -1342,7 +1337,7 @@ float_fromhex(PyObject *cls, PyObject *arg) /* lsb = exponent of least significant bit of the *rounded* value. This is top_exp - DBL_MANT_DIG unless result is subnormal. */ - lsb = MAX(top_exp, (long)DBL_MIN_EXP) - DBL_MANT_DIG; + lsb = Py_MAX(top_exp, (long)DBL_MIN_EXP) - DBL_MANT_DIG; x = 0.0; if (exp >= lsb) { diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 6fff370bba..d3b59f1ea6 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -7,11 +7,6 @@ #include "opcode.h" #include "structmember.h" -#undef MIN -#undef MAX -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - #define OFF(x) offsetof(PyFrameObject, x) static PyMemberDef frame_memberlist[] = { @@ -160,8 +155,8 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno) /* We're now ready to look at the bytecode. */ PyBytes_AsStringAndSize(f->f_code->co_code, (char **)&code, &code_len); - min_addr = MIN(new_lasti, f->f_lasti); - max_addr = MAX(new_lasti, f->f_lasti); + min_addr = Py_MIN(new_lasti, f->f_lasti); + max_addr = Py_MAX(new_lasti, f->f_lasti); /* You can't jump onto a line with an 'except' statement on it - * they expect to have an exception on the top of the stack, which @@ -293,7 +288,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno) break; } - min_delta_iblock = MIN(min_delta_iblock, delta_iblock); + min_delta_iblock = Py_MIN(min_delta_iblock, delta_iblock); if (op >= HAVE_ARGUMENT) { addr += 2; diff --git a/Objects/longobject.c b/Objects/longobject.c index 2b04804216..dbedadb5ba 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -89,11 +89,6 @@ maybe_small_long(PyLongObject *v) */ #define FIVEARY_CUTOFF 8 -#undef MIN -#undef MAX -#define MAX(x, y) ((x) < (y) ? (y) : (x)) -#define MIN(x, y) ((x) > (y) ? (y) : (x)) - #define SIGCHECK(PyTryBlock) \ do { \ if (PyErr_CheckSignals()) PyTryBlock \ @@ -3029,7 +3024,7 @@ kmul_split(PyLongObject *n, Py_ssize_t size_lo, size_hi; const Py_ssize_t size_n = ABS(Py_SIZE(n)); - size_lo = MIN(size_n, size); + size_lo = Py_MIN(size_n, size); size_hi = size_n - size_lo; if ((hi = _PyLong_New(size_hi)) == NULL) @@ -3300,7 +3295,7 @@ k_lopsided_mul(PyLongObject *a, PyLongObject *b) nbdone = 0; while (bsize > 0) { PyLongObject *product; - const Py_ssize_t nbtouse = MIN(bsize, asize); + const Py_ssize_t nbtouse = Py_MIN(bsize, asize); /* Multiply the next slice of b by a. */ memcpy(bslice->ob_digit, b->ob_digit + nbdone, @@ -3591,7 +3586,7 @@ long_true_divide(PyObject *v, PyObject *w) goto underflow_or_zero; /* Choose value for shift; see comments for step 1 above. */ - shift = MAX(diff, DBL_MIN_EXP) - DBL_MANT_DIG - 2; + shift = Py_MAX(diff, DBL_MIN_EXP) - DBL_MANT_DIG - 2; inexact = 0; @@ -3662,7 +3657,7 @@ long_true_divide(PyObject *v, PyObject *w) x_bits = (x_size-1)*PyLong_SHIFT+bits_in_digit(x->ob_digit[x_size-1]); /* The number of extra bits that have to be rounded away. */ - extra_bits = MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG; + extra_bits = Py_MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG; assert(extra_bits == 2 || extra_bits == 3); /* Round by directly modifying the low digit of x. */ -- cgit v1.2.1 From cd91bb340afd5069022b9072077710878f940943 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 5 Jun 2013 00:11:34 +0200 Subject: Issuse #17932: Fix an integer overflow issue on Windows 64-bit in tuple iterators: change the C type of tupleiterobject.it_index from long to Py_ssize_t. --- Objects/tupleobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 0a95909275..d808e08aa1 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -930,7 +930,7 @@ PyTuple_Fini(void) typedef struct { PyObject_HEAD - long it_index; + Py_ssize_t it_index; PyTupleObject *it_seq; /* Set to NULL when iterator is exhausted */ } tupleiterobject; -- cgit v1.2.1 From 0b7627f231720776d04a9da095917cac6428e783 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 5 Jun 2013 00:13:51 +0200 Subject: Issue #9566: Fix a compiler warning on Windows 64-bit in namespace_init() The result type is int, return -1 to avoid a compiler warning (cast Py_ssize_t to int). PyObject_Size() can only fail with -1, and anyway a constructor should return -1 on error, not an arbitrary negative number. --- Objects/namespaceobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c index 7e9107a744..8c51b07fc1 100644 --- a/Objects/namespaceobject.c +++ b/Objects/namespaceobject.c @@ -44,7 +44,7 @@ namespace_init(_PyNamespaceObject *ns, PyObject *args, PyObject *kwds) if (args != NULL) { Py_ssize_t argcount = PyObject_Size(args); if (argcount < 0) - return argcount; + return -1; else if (argcount > 0) { PyErr_Format(PyExc_TypeError, "no positional arguments expected"); return -1; -- cgit v1.2.1 From 946cd11c3f84ad95c343a77b415e256baaa5e1bd Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 5 Jun 2013 00:21:31 +0200 Subject: Issue #9566: Fix compiler warning on Windows 64-bit --- Objects/unicodeobject.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9f57cdb50c..4c3ecd6f76 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6725,7 +6725,8 @@ decode_code_page_errors(UINT code_page, /* each step cannot decode more than 1 character, but a character can be represented as a surrogate pair */ wchar_t buffer[2], *startout, *out; - int insize, outsize; + int insize; + Py_ssize_t outsize; PyObject *errorHandler = NULL; PyObject *exc = NULL; PyObject *encoding_obj = NULL; @@ -6995,10 +6996,11 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, Py_DECREF(substring); return -1; } + assert(size <= INT_MAX); /* First get the size of the result */ outsize = WideCharToMultiByte(code_page, flags, - p, size, + p, (int)size, NULL, 0, NULL, pusedDefaultChar); if (outsize <= 0) @@ -7035,7 +7037,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, /* Do the conversion */ outsize = WideCharToMultiByte(code_page, flags, - p, size, + p, (int)size, out, outsize, NULL, pusedDefaultChar); Py_CLEAR(substring); -- cgit v1.2.1 From 3ed18c3ae6d63a37d827c99f8cf44ce7b5325632 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Wed, 12 Jun 2013 16:59:46 -0400 Subject: Issue #15767: Introduce ModuleNotFoundError, a subclass of ImportError. The exception is raised by import when a module could not be found. Technically this is defined as no viable loader could be found for the specified module. This includes ``from ... import`` statements so that the module usage is consistent for all situations where import couldn't find what was requested. This should allow for the common idiom of:: try: import something except ImportError: pass to be updated to using ModuleNotFoundError and not accidentally mask ImportError messages that should propagate (e.g. issues with a loader). This work was driven by the fact that the ``from ... import`` statement needed to be able to tell the difference between an ImportError that simply couldn't find a module (and thus silence the exception so that ceval can raise it) and an ImportError that represented an actual problem. --- Objects/exceptions.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'Objects') diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 79bbb8f2ff..23f6605e3f 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -709,6 +709,13 @@ ComplexExtendsException(PyExc_Exception, ImportError, "Import can't find module, or can't find name in " "module."); +/* + * ModuleNotFoundError extends ImportError + */ + +MiddlingExtendsException(PyExc_ImportError, ModuleNotFoundError, ImportError, + "Module not found."); + /* * OSError extends Exception */ @@ -2395,6 +2402,7 @@ _PyExc_Init(PyObject *bltinmod) PRE_INIT(SystemExit) PRE_INIT(KeyboardInterrupt) PRE_INIT(ImportError) + PRE_INIT(ModuleNotFoundError) PRE_INIT(OSError) PRE_INIT(EOFError) PRE_INIT(RuntimeError) @@ -2465,6 +2473,7 @@ _PyExc_Init(PyObject *bltinmod) POST_INIT(SystemExit) POST_INIT(KeyboardInterrupt) POST_INIT(ImportError) + POST_INIT(ModuleNotFoundError) POST_INIT(OSError) INIT_ALIAS(EnvironmentError, OSError) INIT_ALIAS(IOError, OSError) -- cgit v1.2.1 From 047ccd59bd528edcb8d24ebb8c6d5284155bf003 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 15 Jun 2013 00:37:46 +0200 Subject: Issue #3329: Add new APIs to customize memory allocators * Add a new PyMemAllocators structure * New functions: - PyMem_RawMalloc(), PyMem_RawRealloc(), PyMem_RawFree(): GIL-free memory allocator functions - PyMem_GetRawAllocators(), PyMem_SetRawAllocators() - PyMem_GetAllocators(), PyMem_SetAllocators() - PyMem_SetupDebugHooks() - _PyObject_GetArenaAllocators(), _PyObject_SetArenaAllocators() * Add unit test for PyMem_Malloc(0) and PyObject_Malloc(0) * Add unit test for new get/set allocators functions * PyObject_Malloc() now falls back on PyMem_Malloc() instead of malloc() if size is bigger than SMALL_REQUEST_THRESHOLD, and PyObject_Realloc() falls back on PyMem_Realloc() instead of realloc() * PyMem_Malloc() and PyMem_Realloc() now always call malloc() and realloc(), instead of calling PyObject_Malloc() and PyObject_Realloc() in debug mode --- Objects/object.c | 20 --- Objects/obmalloc.c | 501 ++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 364 insertions(+), 157 deletions(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index 79f1c8a835..d382a3c157 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1859,26 +1859,6 @@ PyTypeObject *_PyCapsule_hack = &PyCapsule_Type; Py_ssize_t (*_Py_abstract_hack)(PyObject *) = PyObject_Size; -/* Python's malloc wrappers (see pymem.h) */ - -void * -PyMem_Malloc(size_t nbytes) -{ - return PyMem_MALLOC(nbytes); -} - -void * -PyMem_Realloc(void *p, size_t nbytes) -{ - return PyMem_REALLOC(p, nbytes); -} - -void -PyMem_Free(void *p) -{ - PyMem_FREE(p); -} - void _PyObject_DebugTypeStats(FILE *out) { diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 3028f225ae..6dceb38eb6 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1,5 +1,327 @@ #include "Python.h" +/* Python's malloc wrappers (see pymem.h) */ + +/* Forward declaration */ + +#ifdef PYMALLOC_DEBUG /* WITH_PYMALLOC && PYMALLOC_DEBUG */ +static void* _PyMem_DebugMalloc(void *ctx, size_t size); +static void _PyMem_DebugFree(void *ctx, void *p); +static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size); + +static void _PyObject_DebugDumpAddress(const void *p); +static void _PyMem_DebugCheckAddress(char api_id, const void *p); +#endif + +#ifdef WITH_PYMALLOC +static void* _PyObject_Malloc(void *ctx, size_t size); +static void _PyObject_Free(void *ctx, void *p); +static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size); +#endif + + +static void * +_PyMem_RawMalloc(void *ctx, size_t size) +{ + return malloc(size); +} + +static void * +_PyMem_RawRealloc(void *ctx, void *ptr, size_t size) +{ + return realloc(ptr, size); +} + +static void +_PyMem_RawFree(void *ctx, void *ptr) +{ + return free(ptr); +} + +static void * +_PyMem_Malloc(void *ctx, size_t size) +{ + /* PyMem_Malloc(0) means malloc(1). Some systems would return NULL + for malloc(0), which would be treated as an error. Some platforms would + return a pointer with no memory behind it, which would break pymalloc. + To solve these problems, allocate an extra byte. */ + if (size == 0) + size = 1; + return malloc(size); +} + +static void * +_PyMem_Realloc(void *ctx, void *ptr, size_t size) +{ + if (size == 0) + size = 1; + return realloc(ptr, size); +} + +#ifdef ARENAS_USE_MMAP +static void * +_PyObject_ArenaMmap(void *ctx, size_t size) +{ + void *ptr; + ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) + return NULL; + assert(ptr != NULL); + return ptr; +} + +static void +_PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size) +{ + return munmap(ptr, size); +} +#else +static void * +_PyObject_ArenaMalloc(void *ctx, size_t size) +{ + return malloc(size); +} + +static void +_PyObject_ArenaFree(void *ctx, void *ptr, size_t size) +{ + free(ptr); +} +#endif + +#define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawRealloc, _PyMem_RawFree +#define PYMEM_FUNCS _PyMem_Malloc, _PyMem_Realloc, _PyMem_RawFree +#ifdef WITH_PYMALLOC +#define PYOBJECT_FUNCS _PyObject_Malloc, _PyObject_Realloc, _PyObject_Free +#else +#define PYOBJECT_FUNCS PYMEM_FUNCS +#endif + +#ifdef PYMALLOC_DEBUG +typedef struct { + /* We tag each block with an API ID in order to tag API violations */ + char api_id; + PyMemAllocators alloc; +} debug_alloc_api_t; +static struct { + debug_alloc_api_t raw; + debug_alloc_api_t mem; + debug_alloc_api_t obj; +} _PyMem_Debug = { + {'r', {NULL, PYRAW_FUNCS}}, + {'m', {NULL, PYMEM_FUNCS}}, + {'o', {NULL, PYOBJECT_FUNCS}} + }; + +#define PYDEBUG_FUNCS _PyMem_DebugMalloc, _PyMem_DebugRealloc, _PyMem_DebugFree +#endif + +static PyMemAllocators _PyMem_Raw = { +#ifdef PYMALLOC_DEBUG + &_PyMem_Debug.raw, PYDEBUG_FUNCS +#else + NULL, PYMEM_FUNCS +#endif + }; + +static PyMemAllocators _PyMem = { +#ifdef PYMALLOC_DEBUG + &_PyMem_Debug.mem, PYDEBUG_FUNCS +#else + NULL, PYMEM_FUNCS +#endif + }; + +static PyMemAllocators _PyObject = { +#ifdef PYMALLOC_DEBUG + &_PyMem_Debug.obj, PYDEBUG_FUNCS +#else + NULL, PYOBJECT_FUNCS +#endif + }; + +#undef PYRAW_FUNCS +#undef PYMEM_FUNCS +#undef PYOBJECT_FUNCS +#undef PYDEBUG_FUNCS + +static struct { + void *ctx; + void* (*malloc) (void*, size_t); + void (*free) (void*, void*, size_t); +} _PyObject_Arena = {NULL, +#ifdef ARENAS_USE_MMAP + _PyObject_ArenaMmap, _PyObject_ArenaMunmap +#else + _PyObject_ArenaMalloc, _PyObject_ArenaFree +#endif + }; + +void +PyMem_SetupDebugHooks(void) +{ +#ifdef PYMALLOC_DEBUG + PyMemAllocators alloc; + + alloc.malloc = _PyMem_DebugMalloc; + alloc.realloc = _PyMem_DebugRealloc; + alloc.free = _PyMem_DebugFree; + + if (_PyMem_Raw.malloc != _PyMem_DebugMalloc) { + alloc.ctx = &_PyMem_Debug.raw; + PyMem_GetAllocators(&_PyMem_Debug.raw.alloc); + PyMem_SetAllocators(&alloc); + } + + if (_PyMem.malloc != _PyMem_DebugMalloc) { + alloc.ctx = &_PyMem_Debug.mem; + PyMem_GetAllocators(&_PyMem_Debug.mem.alloc); + PyMem_SetAllocators(&alloc); + } + + if (_PyObject.malloc != _PyMem_DebugMalloc) { + alloc.ctx = &_PyMem_Debug.obj; + PyObject_GetAllocators(&_PyMem_Debug.obj.alloc); + PyObject_SetAllocators(&alloc); + } +#endif +} + +void +PyMem_GetRawAllocators(PyMemAllocators *allocators) +{ + *allocators = _PyMem_Raw; +} + +void +PyMem_SetRawAllocators(PyMemAllocators *allocators) +{ + _PyMem_Raw = *allocators; +} + +void +PyMem_GetAllocators(PyMemAllocators *allocators) +{ + *allocators = _PyMem; +} + +void +PyMem_SetAllocators(PyMemAllocators *allocators) +{ + _PyMem = *allocators; +} + +void +PyObject_GetAllocators(PyMemAllocators *allocators) +{ + *allocators = _PyObject; +} + +void +PyObject_SetAllocators(PyMemAllocators *allocators) +{ + _PyObject = *allocators; +} + +void +_PyObject_GetArenaAllocators(void **ctx_p, + void* (**malloc_p) (void *ctx, size_t size), + void (**free_p) (void *ctx, void *ptr, size_t size)) +{ + *malloc_p = _PyObject_Arena.malloc; + *free_p = _PyObject_Arena.free; + *ctx_p = _PyObject_Arena.ctx; +} + +void +_PyObject_SetArenaAllocators(void *ctx, + void* (*malloc) (void *ctx, size_t size), + void (*free) (void *ctx, void *ptr, size_t size)) +{ + _PyObject_Arena.malloc = malloc; + _PyObject_Arena.free = free; + _PyObject_Arena.ctx = ctx; +} + +void * +PyMem_RawMalloc(size_t size) +{ + return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size); +} + +void* +PyMem_RawRealloc(void *ptr, size_t new_size) +{ + return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); +} + +void PyMem_RawFree(void *ptr) +{ + _PyMem_Raw.free(_PyMem_Raw.ctx, ptr); +} + +void * +PyMem_Malloc(size_t size) +{ + /* + * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. + * Most python internals blindly use a signed Py_ssize_t to track + * things without checking for overflows or negatives. + * As size_t is unsigned, checking for size < 0 is not required. + */ + if (size > (size_t)PY_SSIZE_T_MAX) + return NULL; + + return _PyMem.malloc(_PyMem.ctx, size); +} + +void * +PyMem_Realloc(void *ptr, size_t new_size) +{ + if (new_size > (size_t)PY_SSIZE_T_MAX) + return NULL; + + return _PyMem.realloc(_PyMem.ctx, ptr, new_size); +} + +void +PyMem_Free(void *ptr) +{ + _PyMem.free(_PyMem.ctx, ptr); +} + +void * +PyObject_Malloc(size_t size) +{ + /* + * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. + * Most python internals blindly use a signed Py_ssize_t to track + * things without checking for overflows or negatives. + * As size_t is unsigned, checking for size < 0 is not required. + */ + if (size > (size_t)PY_SSIZE_T_MAX) + return NULL; + + return _PyObject.malloc(_PyObject.ctx, size); +} + +void * +PyObject_Realloc(void *ptr, size_t new_size) +{ + if (new_size > (size_t)PY_SSIZE_T_MAX) + return NULL; + + return _PyObject.realloc(_PyObject.ctx, ptr, new_size); +} + +void +PyObject_Free(void *ptr) +{ + _PyObject.free(_PyObject.ctx, ptr); +} + + #ifdef WITH_PYMALLOC #ifdef HAVE_MMAP @@ -545,7 +867,6 @@ new_arena(void) struct arena_object* arenaobj; uint excess; /* number of bytes above pool alignment */ void *address; - int err; #ifdef PYMALLOC_DEBUG if (Py_GETENV("PYTHONMALLOCSTATS")) @@ -567,11 +888,12 @@ new_arena(void) return NULL; /* overflow */ #endif nbytes = numarenas * sizeof(*arenas); - arenaobj = (struct arena_object *)realloc(arenas, nbytes); + arenaobj = (struct arena_object *)PyMem_Realloc(arenas, nbytes); if (arenaobj == NULL) return NULL; arenas = arenaobj; + /* We might need to fix pointers that were copied. However, * new_arena only gets called when all the pages in the * previous arenas are full. Thus, there are *no* pointers @@ -598,15 +920,8 @@ new_arena(void) arenaobj = unused_arena_objects; unused_arena_objects = arenaobj->nextarena; assert(arenaobj->address == 0); -#ifdef ARENAS_USE_MMAP - address = mmap(NULL, ARENA_SIZE, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - err = (address == MAP_FAILED); -#else - address = malloc(ARENA_SIZE); - err = (address == 0); -#endif - if (err) { + address = _PyObject_Arena.malloc(_PyObject_Arena.ctx, ARENA_SIZE); + if (address == NULL) { /* The allocation failed: return NULL after putting the * arenaobj back. */ @@ -769,9 +1084,8 @@ int Py_ADDRESS_IN_RANGE(void *P, poolp pool) Py_NO_INLINE; * Unless the optimizer reorders everything, being too smart... */ -#undef PyObject_Malloc -void * -PyObject_Malloc(size_t nbytes) +static void * +_PyObject_Malloc(void *ctx, size_t nbytes) { block *bp; poolp pool; @@ -787,17 +1101,6 @@ PyObject_Malloc(size_t nbytes) goto redirect; #endif - /* - * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. - * Most python internals blindly use a signed Py_ssize_t to track - * things without checking for overflows or negatives. - * As size_t is unsigned, checking for nbytes < 0 is not required. - */ - if (nbytes > PY_SSIZE_T_MAX) { - _Py_AllocatedBlocks--; - return NULL; - } - /* * This implicitly redirects malloc(0). */ @@ -970,10 +1273,8 @@ redirect: * last chance to serve the request) or when the max memory limit * has been reached. */ - if (nbytes == 0) - nbytes = 1; { - void *result = malloc(nbytes); + void *result = PyMem_Malloc(nbytes); if (!result) _Py_AllocatedBlocks--; return result; @@ -982,9 +1283,8 @@ redirect: /* free */ -#undef PyObject_Free -void -PyObject_Free(void *p) +static void +_PyObject_Free(void *ctx, void *p) { poolp pool; block *lastfree; @@ -1093,11 +1393,8 @@ PyObject_Free(void *p) unused_arena_objects = ao; /* Free the entire arena. */ -#ifdef ARENAS_USE_MMAP - munmap((void *)ao->address, ARENA_SIZE); -#else - free((void *)ao->address); -#endif + _PyObject_Arena.free(_PyObject_Arena.ctx, + (void *)ao->address, ARENA_SIZE); ao->address = 0; /* mark unassociated */ --narenas_currently_allocated; @@ -1206,7 +1503,7 @@ PyObject_Free(void *p) redirect: #endif /* We didn't allocate this address. */ - free(p); + PyMem_Free(p); } /* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0, @@ -1214,9 +1511,8 @@ redirect: * return a non-NULL result. */ -#undef PyObject_Realloc -void * -PyObject_Realloc(void *p, size_t nbytes) +static void * +_PyObject_Realloc(void *ctx, void *p, size_t nbytes) { void *bp; poolp pool; @@ -1226,16 +1522,7 @@ PyObject_Realloc(void *p, size_t nbytes) #endif if (p == NULL) - return PyObject_Malloc(nbytes); - - /* - * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. - * Most python internals blindly use a signed Py_ssize_t to track - * things without checking for overflows or negatives. - * As size_t is unsigned, checking for nbytes < 0 is not required. - */ - if (nbytes > PY_SSIZE_T_MAX) - return NULL; + return _PyObject_Malloc(ctx, nbytes); #ifdef WITH_VALGRIND /* Treat running_on_valgrind == -1 the same as 0 */ @@ -1263,10 +1550,10 @@ PyObject_Realloc(void *p, size_t nbytes) } size = nbytes; } - bp = PyObject_Malloc(nbytes); + bp = _PyObject_Malloc(ctx, nbytes); if (bp != NULL) { memcpy(bp, p, size); - PyObject_Free(p); + _PyObject_Free(ctx, p); } return bp; } @@ -1284,14 +1571,14 @@ PyObject_Realloc(void *p, size_t nbytes) * at p. Instead we punt: let C continue to manage this block. */ if (nbytes) - return realloc(p, nbytes); + return PyMem_Realloc(p, nbytes); /* C doesn't define the result of realloc(p, 0) (it may or may not * return NULL then), but Python's docs promise that nbytes==0 never * returns NULL. We don't pass 0 to realloc(), to avoid that endcase * to begin with. Even then, we can't be sure that realloc() won't * return NULL. */ - bp = realloc(p, 1); + bp = PyMem_Realloc(p, 1); return bp ? bp : p; } @@ -1301,24 +1588,6 @@ PyObject_Realloc(void *p, size_t nbytes) /* pymalloc not enabled: Redirect the entry points to malloc. These will * only be used by extensions that are compiled with pymalloc enabled. */ -void * -PyObject_Malloc(size_t n) -{ - return PyMem_MALLOC(n); -} - -void * -PyObject_Realloc(void *p, size_t n) -{ - return PyMem_REALLOC(p, n); -} - -void -PyObject_Free(void *p) -{ - PyMem_FREE(p); -} - Py_ssize_t _Py_GetAllocatedBlocks(void) { @@ -1344,10 +1613,6 @@ _Py_GetAllocatedBlocks(void) #define DEADBYTE 0xDB /* dead (newly freed) memory */ #define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */ -/* We tag each block with an API ID in order to tag API violations */ -#define _PYMALLOC_MEM_ID 'm' /* the PyMem_Malloc() API */ -#define _PYMALLOC_OBJ_ID 'o' /* The PyObject_Malloc() API */ - static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ /* serialno is always incremented via calling this routine. The point is @@ -1430,58 +1695,18 @@ p[2*S: 2*S+n] p[2*S+n: 2*S+n+S] Copies of FORBIDDENBYTE. Used to catch over- writes and reads. p[2*S+n+S: 2*S+n+2*S] - A serial number, incremented by 1 on each call to _PyObject_DebugMalloc - and _PyObject_DebugRealloc. + A serial number, incremented by 1 on each call to _PyMem_DebugMalloc + and _PyMem_DebugRealloc. This is a big-endian size_t. If "bad memory" is detected later, the serial number gives an excellent way to set a breakpoint on the next run, to capture the instant at which this block was passed out. */ -/* debug replacements for the PyMem_* memory API */ -void * -_PyMem_DebugMalloc(size_t nbytes) -{ - return _PyObject_DebugMallocApi(_PYMALLOC_MEM_ID, nbytes); -} -void * -_PyMem_DebugRealloc(void *p, size_t nbytes) -{ - return _PyObject_DebugReallocApi(_PYMALLOC_MEM_ID, p, nbytes); -} -void -_PyMem_DebugFree(void *p) -{ - _PyObject_DebugFreeApi(_PYMALLOC_MEM_ID, p); -} - -/* debug replacements for the PyObject_* memory API */ -void * -_PyObject_DebugMalloc(size_t nbytes) -{ - return _PyObject_DebugMallocApi(_PYMALLOC_OBJ_ID, nbytes); -} -void * -_PyObject_DebugRealloc(void *p, size_t nbytes) -{ - return _PyObject_DebugReallocApi(_PYMALLOC_OBJ_ID, p, nbytes); -} -void -_PyObject_DebugFree(void *p) -{ - _PyObject_DebugFreeApi(_PYMALLOC_OBJ_ID, p); -} -void -_PyObject_DebugCheckAddress(const void *p) -{ - _PyObject_DebugCheckAddressApi(_PYMALLOC_OBJ_ID, p); -} - - -/* generic debug memory api, with an "id" to identify the API in use */ -void * -_PyObject_DebugMallocApi(char id, size_t nbytes) +static void * +_PyMem_DebugMalloc(void *ctx, size_t nbytes) { + debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; uchar *p; /* base address of malloc'ed block */ uchar *tail; /* p + 2*SST + nbytes == pointer to tail pad bytes */ size_t total; /* nbytes + 4*SST */ @@ -1492,14 +1717,14 @@ _PyObject_DebugMallocApi(char id, size_t nbytes) /* overflow: can't represent total as a size_t */ return NULL; - p = (uchar *)PyObject_Malloc(total); + p = (uchar *)api->alloc.malloc(api->alloc.ctx, total); if (p == NULL) return NULL; /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */ write_size_t(p, nbytes); - p[SST] = (uchar)id; - memset(p + SST + 1 , FORBIDDENBYTE, SST-1); + p[SST] = (uchar)api->api_id; + memset(p + SST + 1, FORBIDDENBYTE, SST-1); if (nbytes > 0) memset(p + 2*SST, CLEANBYTE, nbytes); @@ -1517,25 +1742,27 @@ _PyObject_DebugMallocApi(char id, size_t nbytes) Then fills the original bytes with DEADBYTE. Then calls the underlying free. */ -void -_PyObject_DebugFreeApi(char api, void *p) +static void +_PyMem_DebugFree(void *ctx, void *p) { + debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; uchar *q = (uchar *)p - 2*SST; /* address returned from malloc */ size_t nbytes; if (p == NULL) return; - _PyObject_DebugCheckAddressApi(api, p); + _PyMem_DebugCheckAddress(api->api_id, p); nbytes = read_size_t(q); nbytes += 4*SST; if (nbytes > 0) memset(q, DEADBYTE, nbytes); - PyObject_Free(q); + api->alloc.free(api->alloc.ctx, q); } -void * -_PyObject_DebugReallocApi(char api, void *p, size_t nbytes) +static void * +_PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes) { + debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; uchar *q = (uchar *)p; uchar *tail; size_t total; /* nbytes + 4*SST */ @@ -1543,9 +1770,9 @@ _PyObject_DebugReallocApi(char api, void *p, size_t nbytes) int i; if (p == NULL) - return _PyObject_DebugMallocApi(api, nbytes); + return _PyMem_DebugMalloc(ctx, nbytes); - _PyObject_DebugCheckAddressApi(api, p); + _PyMem_DebugCheckAddress(api->api_id, p); bumpserialno(); original_nbytes = read_size_t(q - 2*SST); total = nbytes + 4*SST; @@ -1562,12 +1789,12 @@ _PyObject_DebugReallocApi(char api, void *p, size_t nbytes) * case we didn't get the chance to mark the old memory with DEADBYTE, * but we live with that. */ - q = (uchar *)PyObject_Realloc(q - 2*SST, total); + q = (uchar *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total); if (q == NULL) return NULL; write_size_t(q, nbytes); - assert(q[SST] == (uchar)api); + assert(q[SST] == (uchar)api->api_id); for (i = 1; i < SST; ++i) assert(q[SST + i] == FORBIDDENBYTE); q += 2*SST; @@ -1589,8 +1816,8 @@ _PyObject_DebugReallocApi(char api, void *p, size_t nbytes) * and call Py_FatalError to kill the program. * The API id, is also checked. */ - void -_PyObject_DebugCheckAddressApi(char api, const void *p) +static void +_PyMem_DebugCheckAddress(char api, const void *p) { const uchar *q = (const uchar *)p; char msgbuf[64]; @@ -1642,7 +1869,7 @@ error: } /* Display info to stderr about the memory block at p. */ -void +static void _PyObject_DebugDumpAddress(const void *p) { const uchar *q = (const uchar *)p; -- cgit v1.2.1 From 5daa7c9b3301bead661f12d4608859f2f19e0521 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 15 Jun 2013 03:37:01 +0200 Subject: Revert changeset 6661a8154eb3: Issue #3329: Add new APIs to customize memory allocators The new API require more discussion. --- Objects/object.c | 20 +++ Objects/obmalloc.c | 501 +++++++++++++++-------------------------------------- 2 files changed, 157 insertions(+), 364 deletions(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index d382a3c157..79f1c8a835 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1859,6 +1859,26 @@ PyTypeObject *_PyCapsule_hack = &PyCapsule_Type; Py_ssize_t (*_Py_abstract_hack)(PyObject *) = PyObject_Size; +/* Python's malloc wrappers (see pymem.h) */ + +void * +PyMem_Malloc(size_t nbytes) +{ + return PyMem_MALLOC(nbytes); +} + +void * +PyMem_Realloc(void *p, size_t nbytes) +{ + return PyMem_REALLOC(p, nbytes); +} + +void +PyMem_Free(void *p) +{ + PyMem_FREE(p); +} + void _PyObject_DebugTypeStats(FILE *out) { diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 6dceb38eb6..3028f225ae 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1,327 +1,5 @@ #include "Python.h" -/* Python's malloc wrappers (see pymem.h) */ - -/* Forward declaration */ - -#ifdef PYMALLOC_DEBUG /* WITH_PYMALLOC && PYMALLOC_DEBUG */ -static void* _PyMem_DebugMalloc(void *ctx, size_t size); -static void _PyMem_DebugFree(void *ctx, void *p); -static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size); - -static void _PyObject_DebugDumpAddress(const void *p); -static void _PyMem_DebugCheckAddress(char api_id, const void *p); -#endif - -#ifdef WITH_PYMALLOC -static void* _PyObject_Malloc(void *ctx, size_t size); -static void _PyObject_Free(void *ctx, void *p); -static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size); -#endif - - -static void * -_PyMem_RawMalloc(void *ctx, size_t size) -{ - return malloc(size); -} - -static void * -_PyMem_RawRealloc(void *ctx, void *ptr, size_t size) -{ - return realloc(ptr, size); -} - -static void -_PyMem_RawFree(void *ctx, void *ptr) -{ - return free(ptr); -} - -static void * -_PyMem_Malloc(void *ctx, size_t size) -{ - /* PyMem_Malloc(0) means malloc(1). Some systems would return NULL - for malloc(0), which would be treated as an error. Some platforms would - return a pointer with no memory behind it, which would break pymalloc. - To solve these problems, allocate an extra byte. */ - if (size == 0) - size = 1; - return malloc(size); -} - -static void * -_PyMem_Realloc(void *ctx, void *ptr, size_t size) -{ - if (size == 0) - size = 1; - return realloc(ptr, size); -} - -#ifdef ARENAS_USE_MMAP -static void * -_PyObject_ArenaMmap(void *ctx, size_t size) -{ - void *ptr; - ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (ptr == MAP_FAILED) - return NULL; - assert(ptr != NULL); - return ptr; -} - -static void -_PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size) -{ - return munmap(ptr, size); -} -#else -static void * -_PyObject_ArenaMalloc(void *ctx, size_t size) -{ - return malloc(size); -} - -static void -_PyObject_ArenaFree(void *ctx, void *ptr, size_t size) -{ - free(ptr); -} -#endif - -#define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawRealloc, _PyMem_RawFree -#define PYMEM_FUNCS _PyMem_Malloc, _PyMem_Realloc, _PyMem_RawFree -#ifdef WITH_PYMALLOC -#define PYOBJECT_FUNCS _PyObject_Malloc, _PyObject_Realloc, _PyObject_Free -#else -#define PYOBJECT_FUNCS PYMEM_FUNCS -#endif - -#ifdef PYMALLOC_DEBUG -typedef struct { - /* We tag each block with an API ID in order to tag API violations */ - char api_id; - PyMemAllocators alloc; -} debug_alloc_api_t; -static struct { - debug_alloc_api_t raw; - debug_alloc_api_t mem; - debug_alloc_api_t obj; -} _PyMem_Debug = { - {'r', {NULL, PYRAW_FUNCS}}, - {'m', {NULL, PYMEM_FUNCS}}, - {'o', {NULL, PYOBJECT_FUNCS}} - }; - -#define PYDEBUG_FUNCS _PyMem_DebugMalloc, _PyMem_DebugRealloc, _PyMem_DebugFree -#endif - -static PyMemAllocators _PyMem_Raw = { -#ifdef PYMALLOC_DEBUG - &_PyMem_Debug.raw, PYDEBUG_FUNCS -#else - NULL, PYMEM_FUNCS -#endif - }; - -static PyMemAllocators _PyMem = { -#ifdef PYMALLOC_DEBUG - &_PyMem_Debug.mem, PYDEBUG_FUNCS -#else - NULL, PYMEM_FUNCS -#endif - }; - -static PyMemAllocators _PyObject = { -#ifdef PYMALLOC_DEBUG - &_PyMem_Debug.obj, PYDEBUG_FUNCS -#else - NULL, PYOBJECT_FUNCS -#endif - }; - -#undef PYRAW_FUNCS -#undef PYMEM_FUNCS -#undef PYOBJECT_FUNCS -#undef PYDEBUG_FUNCS - -static struct { - void *ctx; - void* (*malloc) (void*, size_t); - void (*free) (void*, void*, size_t); -} _PyObject_Arena = {NULL, -#ifdef ARENAS_USE_MMAP - _PyObject_ArenaMmap, _PyObject_ArenaMunmap -#else - _PyObject_ArenaMalloc, _PyObject_ArenaFree -#endif - }; - -void -PyMem_SetupDebugHooks(void) -{ -#ifdef PYMALLOC_DEBUG - PyMemAllocators alloc; - - alloc.malloc = _PyMem_DebugMalloc; - alloc.realloc = _PyMem_DebugRealloc; - alloc.free = _PyMem_DebugFree; - - if (_PyMem_Raw.malloc != _PyMem_DebugMalloc) { - alloc.ctx = &_PyMem_Debug.raw; - PyMem_GetAllocators(&_PyMem_Debug.raw.alloc); - PyMem_SetAllocators(&alloc); - } - - if (_PyMem.malloc != _PyMem_DebugMalloc) { - alloc.ctx = &_PyMem_Debug.mem; - PyMem_GetAllocators(&_PyMem_Debug.mem.alloc); - PyMem_SetAllocators(&alloc); - } - - if (_PyObject.malloc != _PyMem_DebugMalloc) { - alloc.ctx = &_PyMem_Debug.obj; - PyObject_GetAllocators(&_PyMem_Debug.obj.alloc); - PyObject_SetAllocators(&alloc); - } -#endif -} - -void -PyMem_GetRawAllocators(PyMemAllocators *allocators) -{ - *allocators = _PyMem_Raw; -} - -void -PyMem_SetRawAllocators(PyMemAllocators *allocators) -{ - _PyMem_Raw = *allocators; -} - -void -PyMem_GetAllocators(PyMemAllocators *allocators) -{ - *allocators = _PyMem; -} - -void -PyMem_SetAllocators(PyMemAllocators *allocators) -{ - _PyMem = *allocators; -} - -void -PyObject_GetAllocators(PyMemAllocators *allocators) -{ - *allocators = _PyObject; -} - -void -PyObject_SetAllocators(PyMemAllocators *allocators) -{ - _PyObject = *allocators; -} - -void -_PyObject_GetArenaAllocators(void **ctx_p, - void* (**malloc_p) (void *ctx, size_t size), - void (**free_p) (void *ctx, void *ptr, size_t size)) -{ - *malloc_p = _PyObject_Arena.malloc; - *free_p = _PyObject_Arena.free; - *ctx_p = _PyObject_Arena.ctx; -} - -void -_PyObject_SetArenaAllocators(void *ctx, - void* (*malloc) (void *ctx, size_t size), - void (*free) (void *ctx, void *ptr, size_t size)) -{ - _PyObject_Arena.malloc = malloc; - _PyObject_Arena.free = free; - _PyObject_Arena.ctx = ctx; -} - -void * -PyMem_RawMalloc(size_t size) -{ - return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size); -} - -void* -PyMem_RawRealloc(void *ptr, size_t new_size) -{ - return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); -} - -void PyMem_RawFree(void *ptr) -{ - _PyMem_Raw.free(_PyMem_Raw.ctx, ptr); -} - -void * -PyMem_Malloc(size_t size) -{ - /* - * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. - * Most python internals blindly use a signed Py_ssize_t to track - * things without checking for overflows or negatives. - * As size_t is unsigned, checking for size < 0 is not required. - */ - if (size > (size_t)PY_SSIZE_T_MAX) - return NULL; - - return _PyMem.malloc(_PyMem.ctx, size); -} - -void * -PyMem_Realloc(void *ptr, size_t new_size) -{ - if (new_size > (size_t)PY_SSIZE_T_MAX) - return NULL; - - return _PyMem.realloc(_PyMem.ctx, ptr, new_size); -} - -void -PyMem_Free(void *ptr) -{ - _PyMem.free(_PyMem.ctx, ptr); -} - -void * -PyObject_Malloc(size_t size) -{ - /* - * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. - * Most python internals blindly use a signed Py_ssize_t to track - * things without checking for overflows or negatives. - * As size_t is unsigned, checking for size < 0 is not required. - */ - if (size > (size_t)PY_SSIZE_T_MAX) - return NULL; - - return _PyObject.malloc(_PyObject.ctx, size); -} - -void * -PyObject_Realloc(void *ptr, size_t new_size) -{ - if (new_size > (size_t)PY_SSIZE_T_MAX) - return NULL; - - return _PyObject.realloc(_PyObject.ctx, ptr, new_size); -} - -void -PyObject_Free(void *ptr) -{ - _PyObject.free(_PyObject.ctx, ptr); -} - - #ifdef WITH_PYMALLOC #ifdef HAVE_MMAP @@ -867,6 +545,7 @@ new_arena(void) struct arena_object* arenaobj; uint excess; /* number of bytes above pool alignment */ void *address; + int err; #ifdef PYMALLOC_DEBUG if (Py_GETENV("PYTHONMALLOCSTATS")) @@ -888,12 +567,11 @@ new_arena(void) return NULL; /* overflow */ #endif nbytes = numarenas * sizeof(*arenas); - arenaobj = (struct arena_object *)PyMem_Realloc(arenas, nbytes); + arenaobj = (struct arena_object *)realloc(arenas, nbytes); if (arenaobj == NULL) return NULL; arenas = arenaobj; - /* We might need to fix pointers that were copied. However, * new_arena only gets called when all the pages in the * previous arenas are full. Thus, there are *no* pointers @@ -920,8 +598,15 @@ new_arena(void) arenaobj = unused_arena_objects; unused_arena_objects = arenaobj->nextarena; assert(arenaobj->address == 0); - address = _PyObject_Arena.malloc(_PyObject_Arena.ctx, ARENA_SIZE); - if (address == NULL) { +#ifdef ARENAS_USE_MMAP + address = mmap(NULL, ARENA_SIZE, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + err = (address == MAP_FAILED); +#else + address = malloc(ARENA_SIZE); + err = (address == 0); +#endif + if (err) { /* The allocation failed: return NULL after putting the * arenaobj back. */ @@ -1084,8 +769,9 @@ int Py_ADDRESS_IN_RANGE(void *P, poolp pool) Py_NO_INLINE; * Unless the optimizer reorders everything, being too smart... */ -static void * -_PyObject_Malloc(void *ctx, size_t nbytes) +#undef PyObject_Malloc +void * +PyObject_Malloc(size_t nbytes) { block *bp; poolp pool; @@ -1101,6 +787,17 @@ _PyObject_Malloc(void *ctx, size_t nbytes) goto redirect; #endif + /* + * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. + * Most python internals blindly use a signed Py_ssize_t to track + * things without checking for overflows or negatives. + * As size_t is unsigned, checking for nbytes < 0 is not required. + */ + if (nbytes > PY_SSIZE_T_MAX) { + _Py_AllocatedBlocks--; + return NULL; + } + /* * This implicitly redirects malloc(0). */ @@ -1273,8 +970,10 @@ redirect: * last chance to serve the request) or when the max memory limit * has been reached. */ + if (nbytes == 0) + nbytes = 1; { - void *result = PyMem_Malloc(nbytes); + void *result = malloc(nbytes); if (!result) _Py_AllocatedBlocks--; return result; @@ -1283,8 +982,9 @@ redirect: /* free */ -static void -_PyObject_Free(void *ctx, void *p) +#undef PyObject_Free +void +PyObject_Free(void *p) { poolp pool; block *lastfree; @@ -1393,8 +1093,11 @@ _PyObject_Free(void *ctx, void *p) unused_arena_objects = ao; /* Free the entire arena. */ - _PyObject_Arena.free(_PyObject_Arena.ctx, - (void *)ao->address, ARENA_SIZE); +#ifdef ARENAS_USE_MMAP + munmap((void *)ao->address, ARENA_SIZE); +#else + free((void *)ao->address); +#endif ao->address = 0; /* mark unassociated */ --narenas_currently_allocated; @@ -1503,7 +1206,7 @@ _PyObject_Free(void *ctx, void *p) redirect: #endif /* We didn't allocate this address. */ - PyMem_Free(p); + free(p); } /* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0, @@ -1511,8 +1214,9 @@ redirect: * return a non-NULL result. */ -static void * -_PyObject_Realloc(void *ctx, void *p, size_t nbytes) +#undef PyObject_Realloc +void * +PyObject_Realloc(void *p, size_t nbytes) { void *bp; poolp pool; @@ -1522,7 +1226,16 @@ _PyObject_Realloc(void *ctx, void *p, size_t nbytes) #endif if (p == NULL) - return _PyObject_Malloc(ctx, nbytes); + return PyObject_Malloc(nbytes); + + /* + * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. + * Most python internals blindly use a signed Py_ssize_t to track + * things without checking for overflows or negatives. + * As size_t is unsigned, checking for nbytes < 0 is not required. + */ + if (nbytes > PY_SSIZE_T_MAX) + return NULL; #ifdef WITH_VALGRIND /* Treat running_on_valgrind == -1 the same as 0 */ @@ -1550,10 +1263,10 @@ _PyObject_Realloc(void *ctx, void *p, size_t nbytes) } size = nbytes; } - bp = _PyObject_Malloc(ctx, nbytes); + bp = PyObject_Malloc(nbytes); if (bp != NULL) { memcpy(bp, p, size); - _PyObject_Free(ctx, p); + PyObject_Free(p); } return bp; } @@ -1571,14 +1284,14 @@ _PyObject_Realloc(void *ctx, void *p, size_t nbytes) * at p. Instead we punt: let C continue to manage this block. */ if (nbytes) - return PyMem_Realloc(p, nbytes); + return realloc(p, nbytes); /* C doesn't define the result of realloc(p, 0) (it may or may not * return NULL then), but Python's docs promise that nbytes==0 never * returns NULL. We don't pass 0 to realloc(), to avoid that endcase * to begin with. Even then, we can't be sure that realloc() won't * return NULL. */ - bp = PyMem_Realloc(p, 1); + bp = realloc(p, 1); return bp ? bp : p; } @@ -1588,6 +1301,24 @@ _PyObject_Realloc(void *ctx, void *p, size_t nbytes) /* pymalloc not enabled: Redirect the entry points to malloc. These will * only be used by extensions that are compiled with pymalloc enabled. */ +void * +PyObject_Malloc(size_t n) +{ + return PyMem_MALLOC(n); +} + +void * +PyObject_Realloc(void *p, size_t n) +{ + return PyMem_REALLOC(p, n); +} + +void +PyObject_Free(void *p) +{ + PyMem_FREE(p); +} + Py_ssize_t _Py_GetAllocatedBlocks(void) { @@ -1613,6 +1344,10 @@ _Py_GetAllocatedBlocks(void) #define DEADBYTE 0xDB /* dead (newly freed) memory */ #define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */ +/* We tag each block with an API ID in order to tag API violations */ +#define _PYMALLOC_MEM_ID 'm' /* the PyMem_Malloc() API */ +#define _PYMALLOC_OBJ_ID 'o' /* The PyObject_Malloc() API */ + static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ /* serialno is always incremented via calling this routine. The point is @@ -1695,18 +1430,58 @@ p[2*S: 2*S+n] p[2*S+n: 2*S+n+S] Copies of FORBIDDENBYTE. Used to catch over- writes and reads. p[2*S+n+S: 2*S+n+2*S] - A serial number, incremented by 1 on each call to _PyMem_DebugMalloc - and _PyMem_DebugRealloc. + A serial number, incremented by 1 on each call to _PyObject_DebugMalloc + and _PyObject_DebugRealloc. This is a big-endian size_t. If "bad memory" is detected later, the serial number gives an excellent way to set a breakpoint on the next run, to capture the instant at which this block was passed out. */ -static void * -_PyMem_DebugMalloc(void *ctx, size_t nbytes) +/* debug replacements for the PyMem_* memory API */ +void * +_PyMem_DebugMalloc(size_t nbytes) +{ + return _PyObject_DebugMallocApi(_PYMALLOC_MEM_ID, nbytes); +} +void * +_PyMem_DebugRealloc(void *p, size_t nbytes) +{ + return _PyObject_DebugReallocApi(_PYMALLOC_MEM_ID, p, nbytes); +} +void +_PyMem_DebugFree(void *p) +{ + _PyObject_DebugFreeApi(_PYMALLOC_MEM_ID, p); +} + +/* debug replacements for the PyObject_* memory API */ +void * +_PyObject_DebugMalloc(size_t nbytes) +{ + return _PyObject_DebugMallocApi(_PYMALLOC_OBJ_ID, nbytes); +} +void * +_PyObject_DebugRealloc(void *p, size_t nbytes) +{ + return _PyObject_DebugReallocApi(_PYMALLOC_OBJ_ID, p, nbytes); +} +void +_PyObject_DebugFree(void *p) +{ + _PyObject_DebugFreeApi(_PYMALLOC_OBJ_ID, p); +} +void +_PyObject_DebugCheckAddress(const void *p) +{ + _PyObject_DebugCheckAddressApi(_PYMALLOC_OBJ_ID, p); +} + + +/* generic debug memory api, with an "id" to identify the API in use */ +void * +_PyObject_DebugMallocApi(char id, size_t nbytes) { - debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; uchar *p; /* base address of malloc'ed block */ uchar *tail; /* p + 2*SST + nbytes == pointer to tail pad bytes */ size_t total; /* nbytes + 4*SST */ @@ -1717,14 +1492,14 @@ _PyMem_DebugMalloc(void *ctx, size_t nbytes) /* overflow: can't represent total as a size_t */ return NULL; - p = (uchar *)api->alloc.malloc(api->alloc.ctx, total); + p = (uchar *)PyObject_Malloc(total); if (p == NULL) return NULL; /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */ write_size_t(p, nbytes); - p[SST] = (uchar)api->api_id; - memset(p + SST + 1, FORBIDDENBYTE, SST-1); + p[SST] = (uchar)id; + memset(p + SST + 1 , FORBIDDENBYTE, SST-1); if (nbytes > 0) memset(p + 2*SST, CLEANBYTE, nbytes); @@ -1742,27 +1517,25 @@ _PyMem_DebugMalloc(void *ctx, size_t nbytes) Then fills the original bytes with DEADBYTE. Then calls the underlying free. */ -static void -_PyMem_DebugFree(void *ctx, void *p) +void +_PyObject_DebugFreeApi(char api, void *p) { - debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; uchar *q = (uchar *)p - 2*SST; /* address returned from malloc */ size_t nbytes; if (p == NULL) return; - _PyMem_DebugCheckAddress(api->api_id, p); + _PyObject_DebugCheckAddressApi(api, p); nbytes = read_size_t(q); nbytes += 4*SST; if (nbytes > 0) memset(q, DEADBYTE, nbytes); - api->alloc.free(api->alloc.ctx, q); + PyObject_Free(q); } -static void * -_PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes) +void * +_PyObject_DebugReallocApi(char api, void *p, size_t nbytes) { - debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; uchar *q = (uchar *)p; uchar *tail; size_t total; /* nbytes + 4*SST */ @@ -1770,9 +1543,9 @@ _PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes) int i; if (p == NULL) - return _PyMem_DebugMalloc(ctx, nbytes); + return _PyObject_DebugMallocApi(api, nbytes); - _PyMem_DebugCheckAddress(api->api_id, p); + _PyObject_DebugCheckAddressApi(api, p); bumpserialno(); original_nbytes = read_size_t(q - 2*SST); total = nbytes + 4*SST; @@ -1789,12 +1562,12 @@ _PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes) * case we didn't get the chance to mark the old memory with DEADBYTE, * but we live with that. */ - q = (uchar *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total); + q = (uchar *)PyObject_Realloc(q - 2*SST, total); if (q == NULL) return NULL; write_size_t(q, nbytes); - assert(q[SST] == (uchar)api->api_id); + assert(q[SST] == (uchar)api); for (i = 1; i < SST; ++i) assert(q[SST + i] == FORBIDDENBYTE); q += 2*SST; @@ -1816,8 +1589,8 @@ _PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes) * and call Py_FatalError to kill the program. * The API id, is also checked. */ -static void -_PyMem_DebugCheckAddress(char api, const void *p) + void +_PyObject_DebugCheckAddressApi(char api, const void *p) { const uchar *q = (const uchar *)p; char msgbuf[64]; @@ -1869,7 +1642,7 @@ error: } /* Display info to stderr about the memory block at p. */ -static void +void _PyObject_DebugDumpAddress(const void *p) { const uchar *q = (const uchar *)p; -- cgit v1.2.1 From 8c81a0a7808d5aed206dabec4633d721e0e3d392 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 18 Jun 2013 23:28:18 +0200 Subject: Fix compilation warning with gcc 4.8 (unused typedef) --- Objects/setobject.c | 1 - 1 file changed, 1 deletion(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index c484dce413..ea5a24c516 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -214,7 +214,6 @@ static int set_insert_key(register PySetObject *so, PyObject *key, Py_hash_t hash) { register setentry *entry; - typedef setentry *(*lookupfunc)(PySetObject *, PyObject *, Py_hash_t); assert(so->lookup != NULL); entry = so->lookup(so, key, hash); -- cgit v1.2.1 From 699c61801285baa270d0bae0b6fd81b83700b62a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 24 Jun 2013 23:31:48 +0200 Subject: Issue #9566: Fix a compiler warning in tupleiter_setstate() on Windows x64 --- Objects/tupleobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index d808e08aa1..de43ee4bdf 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -997,7 +997,7 @@ tupleiter_reduce(tupleiterobject *it) static PyObject * tupleiter_setstate(tupleiterobject *it, PyObject *state) { - long index = PyLong_AsLong(state); + Py_ssize_t index = PyLong_AsLong(state); if (index == -1 && PyErr_Occurred()) return NULL; if (it->it_seq != NULL) { -- cgit v1.2.1 From 3ab1e8a7d7a2b8e99d4623261b0c378f65dfffc3 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 24 Jun 2013 23:59:24 +0200 Subject: Issue #9566: More long/Py_ssize_t fixes in tuple and list iterators (it_index) --- Objects/listobject.c | 6 +++--- Objects/tupleobject.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'Objects') diff --git a/Objects/listobject.c b/Objects/listobject.c index 4cc34b5a3c..0c82cc40ec 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2660,7 +2660,7 @@ PyTypeObject PyList_Type = { typedef struct { PyObject_HEAD - long it_index; + Py_ssize_t it_index; PyListObject *it_seq; /* Set to NULL when iterator is exhausted */ } listiterobject; @@ -2797,7 +2797,7 @@ listiter_reduce(listiterobject *it) static PyObject * listiter_setstate(listiterobject *it, PyObject *state) { - long index = PyLong_AsLong(state); + Py_ssize_t index = PyLong_AsSsize_t(state); if (index == -1 && PyErr_Occurred()) return NULL; if (it->it_seq != NULL) { @@ -2958,7 +2958,7 @@ listiter_reduce_general(void *_it, int forward) if (forward) { listiterobject *it = (listiterobject *)_it; if (it->it_seq) - return Py_BuildValue("N(O)l", _PyObject_GetBuiltin("iter"), + return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"), it->it_seq, it->it_index); } else { listreviterobject *it = (listreviterobject *)_it; diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index de43ee4bdf..52896b7919 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -988,7 +988,7 @@ static PyObject * tupleiter_reduce(tupleiterobject *it) { if (it->it_seq) - return Py_BuildValue("N(O)l", _PyObject_GetBuiltin("iter"), + return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"), it->it_seq, it->it_index); else return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter")); @@ -997,7 +997,7 @@ tupleiter_reduce(tupleiterobject *it) static PyObject * tupleiter_setstate(tupleiterobject *it, PyObject *state) { - Py_ssize_t index = PyLong_AsLong(state); + Py_ssize_t index = PyLong_AsSsize_t(state); if (index == -1 && PyErr_Occurred()) return NULL; if (it->it_seq != NULL) { -- cgit v1.2.1 From b963769c480e670e792ee3da641ded5452d83194 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 25 Jun 2013 00:37:25 +0200 Subject: If MS_WIN64 is defined, MS_WINDOWS is also defined: #ifdef can be simplified. --- Objects/fileobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 3a31314086..e0c5bfef55 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -390,7 +390,7 @@ stdprinter_write(PyStdPrinter_Object *self, PyObject *args) Py_BEGIN_ALLOW_THREADS errno = 0; -#if defined(MS_WIN64) || defined(MS_WINDOWS) +#ifdef MS_WINDOWS if (n > INT_MAX) n = INT_MAX; n = write(self->fd, c, (int)n); -- cgit v1.2.1 From e3f53947f72c81ffc563aef97678c912272c2ed1 Mon Sep 17 00:00:00 2001 From: "Martin v. L?wis" Date: Thu, 27 Jun 2013 12:23:29 +0200 Subject: Issue #13483: Use VirtualAlloc in obmalloc on Windows. --- Objects/obmalloc.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 3028f225ae..3fac6d4bce 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -9,6 +9,10 @@ #endif #endif +#ifdef MS_WINDOWS +#include +#endif + #ifdef WITH_VALGRIND #include @@ -598,7 +602,11 @@ new_arena(void) arenaobj = unused_arena_objects; unused_arena_objects = arenaobj->nextarena; assert(arenaobj->address == 0); -#ifdef ARENAS_USE_MMAP +#ifdef MS_WINDOWS + address = (void*)VirtualAlloc(NULL, ARENA_SIZE, + MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + err = (address == NULL); +#elif defined(ARENAS_USE_MMAP) address = mmap(NULL, ARENA_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); err = (address == MAP_FAILED); @@ -1093,7 +1101,9 @@ PyObject_Free(void *p) unused_arena_objects = ao; /* Free the entire arena. */ -#ifdef ARENAS_USE_MMAP +#ifdef MS_WINDOWS + VirtualFree((void *)ao->address, 0, MEM_RELEASE); +#elif defined(ARENAS_USE_MMAP) munmap((void *)ao->address, ARENA_SIZE); #else free((void *)ao->address); -- cgit v1.2.1 From 1a11ff53d2a4570215be7e79666fceecf80470a0 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sat, 29 Jun 2013 20:41:06 +0200 Subject: Fix memory leak in endswith CID 1040368 (#1 of 1): Resource leak (RESOURCE_LEAK) leaked_storage: Variable substring going out of scope leaks the storage it points to. --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5659c71ce8..30a925c341 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12941,9 +12941,9 @@ unicode_endswith(PyObject *self, return NULL; } result = tailmatch(self, substring, start, end, +1); + Py_DECREF(substring); if (result == -1) return NULL; - Py_DECREF(substring); return PyBool_FromLong(result); } -- cgit v1.2.1 From a71d1d264054600c5ac4c035401002c22ffb0b9a Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sat, 29 Jun 2013 21:17:34 +0200 Subject: Fix ref leak in error case of unicode rindex and rfind CID 983320: Resource leak (RESOURCE_LEAK) CID 983321: Resource leak (RESOURCE_LEAK) leaked_storage: Variable substring going out of scope leaks the storage it points to. --- Objects/unicodeobject.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 30a925c341..fe0337fc46 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12248,10 +12248,14 @@ unicode_rfind(PyObject *self, PyObject *args) &start, &end)) return NULL; - if (PyUnicode_READY(self) == -1) + if (PyUnicode_READY(self) == -1) { + Py_DECREF(substring); return NULL; - if (PyUnicode_READY(substring) == -1) + } + if (PyUnicode_READY(substring) == -1) { + Py_DECREF(substring); return NULL; + } result = any_find_slice(-1, self, substring, start, end); @@ -12280,10 +12284,14 @@ unicode_rindex(PyObject *self, PyObject *args) &start, &end)) return NULL; - if (PyUnicode_READY(self) == -1) + if (PyUnicode_READY(self) == -1) { + Py_DECREF(substring); return NULL; - if (PyUnicode_READY(substring) == -1) + } + if (PyUnicode_READY(substring) == -1) { + Py_DECREF(substring); return NULL; + } result = any_find_slice(-1, self, substring, start, end); -- cgit v1.2.1 From 47eba54cec78afc2dd4907d05515c140e929b417 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sat, 29 Jun 2013 21:21:37 +0200 Subject: Fix ref leak in error case of unicode index CID 983319 (#1 of 2): Resource leak (RESOURCE_LEAK) leaked_storage: Variable substring going out of scope leaks the storage it points to. --- Objects/unicodeobject.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index fe0337fc46..501921df8e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11180,10 +11180,14 @@ unicode_index(PyObject *self, PyObject *args) &start, &end)) return NULL; - if (PyUnicode_READY(self) == -1) + if (PyUnicode_READY(self) == -1) { + Py_DECREF(substring); return NULL; - if (PyUnicode_READY(substring) == -1) + } + if (PyUnicode_READY(substring) == -1) { + Py_DECREF(substring); return NULL; + } result = any_find_slice(1, self, substring, start, end); -- cgit v1.2.1 From 02ebf23e6996b9803d42236a1db43f0c3def4c02 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sat, 29 Jun 2013 21:33:36 +0200 Subject: Fix ref leak in error case of unicode find, count, formatlong CID 983315: Resource leak (RESOURCE_LEAK) CID 983316: Resource leak (RESOURCE_LEAK) CID 983317: Resource leak (RESOURCE_LEAK) --- Objects/unicodeobject.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 501921df8e..ab1dbb9625 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10875,8 +10875,10 @@ unicode_count(PyObject *self, PyObject *args) kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); - if (kind2 > kind1) + if (kind2 > kind1) { + Py_DECREF(substring); return PyLong_FromLong(0); + } kind = kind1; buf1 = PyUnicode_DATA(self); buf2 = PyUnicode_DATA(substring); @@ -11054,10 +11056,14 @@ unicode_find(PyObject *self, PyObject *args) &start, &end)) return NULL; - if (PyUnicode_READY(self) == -1) + if (PyUnicode_READY(self) == -1) { + Py_DECREF(substring); return NULL; - if (PyUnicode_READY(substring) == -1) + } + if (PyUnicode_READY(substring) == -1) { + Py_DECREF(substring); return NULL; + } result = any_find_slice(1, self, substring, start, end); @@ -13581,12 +13587,14 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg) /* To modify the string in-place, there can only be one reference. */ if (Py_REFCNT(result) != 1) { + Py_DECREF(result); PyErr_BadInternalCall(); return NULL; } buf = PyUnicode_DATA(result); llen = PyUnicode_GET_LENGTH(result); if (llen > INT_MAX) { + Py_DECREF(result); PyErr_SetString(PyExc_ValueError, "string too large in _PyBytes_FormatLong"); return NULL; -- cgit v1.2.1 From d5e4e1699da17182503b77eb27bf6cee3c46bb7f Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Thu, 4 Jul 2013 17:51:50 -0400 Subject: Issue #15767: back out 8a0ed9f63c6e, finishing the removal of ModuleNotFoundError. --- Objects/exceptions.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'Objects') diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 23f6605e3f..79bbb8f2ff 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -709,13 +709,6 @@ ComplexExtendsException(PyExc_Exception, ImportError, "Import can't find module, or can't find name in " "module."); -/* - * ModuleNotFoundError extends ImportError - */ - -MiddlingExtendsException(PyExc_ImportError, ModuleNotFoundError, ImportError, - "Module not found."); - /* * OSError extends Exception */ @@ -2402,7 +2395,6 @@ _PyExc_Init(PyObject *bltinmod) PRE_INIT(SystemExit) PRE_INIT(KeyboardInterrupt) PRE_INIT(ImportError) - PRE_INIT(ModuleNotFoundError) PRE_INIT(OSError) PRE_INIT(EOFError) PRE_INIT(RuntimeError) @@ -2473,7 +2465,6 @@ _PyExc_Init(PyObject *bltinmod) POST_INIT(SystemExit) POST_INIT(KeyboardInterrupt) POST_INIT(ImportError) - POST_INIT(ModuleNotFoundError) POST_INIT(OSError) INIT_ALIAS(EnvironmentError, OSError) INIT_ALIAS(IOError, OSError) -- cgit v1.2.1 From bf95048c04e98cbbec367c6112d057f19aa441ae Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 7 Jul 2013 02:05:46 +0200 Subject: Issue #3329: Implement the PEP 445 Add new enum: * PyMemAllocatorDomain Add new structures: * PyMemAllocator * PyObjectArenaAllocator Add new functions: * PyMem_RawMalloc(), PyMem_RawRealloc(), PyMem_RawFree() * PyMem_GetAllocator(), PyMem_SetAllocator() * PyObject_GetArenaAllocator(), PyObject_SetArenaAllocator() * PyMem_SetupDebugHooks() Changes: * PyMem_Malloc()/PyObject_Realloc() now always call malloc()/realloc(), instead of calling PyObject_Malloc()/PyObject_Realloc() in debug mode. * PyObject_Malloc()/PyObject_Realloc() now falls back to PyMem_Malloc()/PyMem_Realloc() for allocations larger than 512 bytes. * Redesign debug checks on memory block allocators as hooks, instead of using C macros --- Objects/object.c | 20 --- Objects/obmalloc.c | 504 +++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 355 insertions(+), 169 deletions(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index 79f1c8a835..d382a3c157 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1859,26 +1859,6 @@ PyTypeObject *_PyCapsule_hack = &PyCapsule_Type; Py_ssize_t (*_Py_abstract_hack)(PyObject *) = PyObject_Size; -/* Python's malloc wrappers (see pymem.h) */ - -void * -PyMem_Malloc(size_t nbytes) -{ - return PyMem_MALLOC(nbytes); -} - -void * -PyMem_Realloc(void *p, size_t nbytes) -{ - return PyMem_REALLOC(p, nbytes); -} - -void -PyMem_Free(void *p) -{ - PyMem_FREE(p); -} - void _PyObject_DebugTypeStats(FILE *out) { diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 3fac6d4bce..d08515d776 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1,18 +1,326 @@ #include "Python.h" +/* Python's malloc wrappers (see pymem.h) */ + +#ifdef PYMALLOC_DEBUG /* WITH_PYMALLOC && PYMALLOC_DEBUG */ +/* Forward declaration */ +static void* _PyMem_DebugMalloc(void *ctx, size_t size); +static void _PyMem_DebugFree(void *ctx, void *p); +static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size); + +static void _PyObject_DebugDumpAddress(const void *p); +static void _PyMem_DebugCheckAddress(char api_id, const void *p); +#endif + #ifdef WITH_PYMALLOC -#ifdef HAVE_MMAP - #include - #ifdef MAP_ANONYMOUS - #define ARENAS_USE_MMAP - #endif +#ifdef MS_WINDOWS +# include +#elif defined(HAVE_MMAP) +# include +# ifdef MAP_ANONYMOUS +# define ARENAS_USE_MMAP +# endif +#endif + +/* Forward declaration */ +static void* _PyObject_Malloc(void *ctx, size_t size); +static void _PyObject_Free(void *ctx, void *p); +static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size); #endif + +static void * +_PyMem_RawMalloc(void *ctx, size_t size) +{ + /* PyMem_Malloc(0) means malloc(1). Some systems would return NULL + for malloc(0), which would be treated as an error. Some platforms would + return a pointer with no memory behind it, which would break pymalloc. + To solve these problems, allocate an extra byte. */ + if (size == 0) + size = 1; + return malloc(size); +} + +static void * +_PyMem_RawRealloc(void *ctx, void *ptr, size_t size) +{ + if (size == 0) + size = 1; + return realloc(ptr, size); +} + +static void +_PyMem_RawFree(void *ctx, void *ptr) +{ + free(ptr); +} + + #ifdef MS_WINDOWS -#include +static void * +_PyObject_ArenaVirtualAlloc(void *ctx, size_t size) +{ + return VirtualAlloc(NULL, size, + MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); +} + +static void +_PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size) +{ + VirtualFree(ptr, size, MEM_RELEASE); +} + +#elif defined(ARENAS_USE_MMAP) +static void * +_PyObject_ArenaMmap(void *ctx, size_t size) +{ + void *ptr; + ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) + return NULL; + assert(ptr != NULL); + return ptr; +} + +static void +_PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size) +{ + munmap(ptr, size); +} + +#else +static void * +_PyObject_ArenaMalloc(void *ctx, size_t size) +{ + return malloc(size); +} + +static void +_PyObject_ArenaFree(void *ctx, void *ptr, size_t size) +{ + free(ptr); +} #endif + +#define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawRealloc, _PyMem_RawFree +#ifdef WITH_PYMALLOC +#define PYOBJECT_FUNCS _PyObject_Malloc, _PyObject_Realloc, _PyObject_Free +#else +#define PYOBJECT_FUNCS PYRAW_FUNCS +#endif + +#ifdef PYMALLOC_DEBUG +typedef struct { + /* We tag each block with an API ID in order to tag API violations */ + char api_id; + PyMemAllocator alloc; +} debug_alloc_api_t; +static struct { + debug_alloc_api_t raw; + debug_alloc_api_t mem; + debug_alloc_api_t obj; +} _PyMem_Debug = { + {'r', {NULL, PYRAW_FUNCS}}, + {'m', {NULL, PYRAW_FUNCS}}, + {'o', {NULL, PYOBJECT_FUNCS}} + }; + +#define PYDEBUG_FUNCS _PyMem_DebugMalloc, _PyMem_DebugRealloc, _PyMem_DebugFree +#endif + +static PyMemAllocator _PyMem_Raw = { +#ifdef PYMALLOC_DEBUG + &_PyMem_Debug.raw, PYDEBUG_FUNCS +#else + NULL, PYRAW_FUNCS +#endif + }; + +static PyMemAllocator _PyMem = { +#ifdef PYMALLOC_DEBUG + &_PyMem_Debug.mem, PYDEBUG_FUNCS +#else + NULL, PYRAW_FUNCS +#endif + }; + +static PyMemAllocator _PyObject = { +#ifdef PYMALLOC_DEBUG + &_PyMem_Debug.obj, PYDEBUG_FUNCS +#else + NULL, PYOBJECT_FUNCS +#endif + }; + +#undef PYRAW_FUNCS +#undef PYOBJECT_FUNCS +#undef PYDEBUG_FUNCS + +static PyObjectArenaAllocator _PyObject_Arena = {NULL, +#ifdef MS_WINDOWS + _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree +#elif defined(ARENAS_USE_MMAP) + _PyObject_ArenaMmap, _PyObject_ArenaMunmap +#else + _PyObject_ArenaMalloc, _PyObject_ArenaFree +#endif + }; + +void +PyMem_SetupDebugHooks(void) +{ +#ifdef PYMALLOC_DEBUG + PyMemAllocator alloc; + + alloc.malloc = _PyMem_DebugMalloc; + alloc.realloc = _PyMem_DebugRealloc; + alloc.free = _PyMem_DebugFree; + + if (_PyMem_Raw.malloc != _PyMem_DebugMalloc) { + alloc.ctx = &_PyMem_Debug.raw; + PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc); + PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc); + } + + if (_PyMem.malloc != _PyMem_DebugMalloc) { + alloc.ctx = &_PyMem_Debug.mem; + PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc); + PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc); + } + + if (_PyObject.malloc != _PyMem_DebugMalloc) { + alloc.ctx = &_PyMem_Debug.obj; + PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc); + PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc); + } +#endif +} + +void +PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocator *allocator) +{ + switch(domain) + { + case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break; + case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break; + case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break; + default: + /* unknown domain */ + allocator->ctx = NULL; + allocator->malloc = NULL; + allocator->realloc = NULL; + allocator->free = NULL; + } +} + +void +PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocator *allocator) +{ + switch(domain) + { + case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break; + case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break; + case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break; + /* ignore unknown domain */ + } + +} + +void +PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator) +{ + *allocator = _PyObject_Arena; +} + +void +PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) +{ + _PyObject_Arena = *allocator; +} + +void * +PyMem_RawMalloc(size_t size) +{ + /* + * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. + * Most python internals blindly use a signed Py_ssize_t to track + * things without checking for overflows or negatives. + * As size_t is unsigned, checking for size < 0 is not required. + */ + if (size > (size_t)PY_SSIZE_T_MAX) + return NULL; + + return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size); +} + +void* +PyMem_RawRealloc(void *ptr, size_t new_size) +{ + /* see PyMem_RawMalloc() */ + if (new_size > (size_t)PY_SSIZE_T_MAX) + return NULL; + return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); +} + +void PyMem_RawFree(void *ptr) +{ + _PyMem_Raw.free(_PyMem_Raw.ctx, ptr); +} + +void * +PyMem_Malloc(size_t size) +{ + /* see PyMem_RawMalloc() */ + if (size > (size_t)PY_SSIZE_T_MAX) + return NULL; + return _PyMem.malloc(_PyMem.ctx, size); +} + +void * +PyMem_Realloc(void *ptr, size_t new_size) +{ + /* see PyMem_RawMalloc() */ + if (new_size > (size_t)PY_SSIZE_T_MAX) + return NULL; + return _PyMem.realloc(_PyMem.ctx, ptr, new_size); +} + +void +PyMem_Free(void *ptr) +{ + _PyMem.free(_PyMem.ctx, ptr); +} + +void * +PyObject_Malloc(size_t size) +{ + /* see PyMem_RawMalloc() */ + if (size > (size_t)PY_SSIZE_T_MAX) + return NULL; + return _PyObject.malloc(_PyObject.ctx, size); +} + +void * +PyObject_Realloc(void *ptr, size_t new_size) +{ + /* see PyMem_RawMalloc() */ + if (new_size > (size_t)PY_SSIZE_T_MAX) + return NULL; + return _PyObject.realloc(_PyObject.ctx, ptr, new_size); +} + +void +PyObject_Free(void *ptr) +{ + _PyObject.free(_PyObject.ctx, ptr); +} + + +#ifdef WITH_PYMALLOC + #ifdef WITH_VALGRIND #include @@ -549,7 +857,6 @@ new_arena(void) struct arena_object* arenaobj; uint excess; /* number of bytes above pool alignment */ void *address; - int err; #ifdef PYMALLOC_DEBUG if (Py_GETENV("PYTHONMALLOCSTATS")) @@ -571,7 +878,7 @@ new_arena(void) return NULL; /* overflow */ #endif nbytes = numarenas * sizeof(*arenas); - arenaobj = (struct arena_object *)realloc(arenas, nbytes); + arenaobj = (struct arena_object *)PyMem_Realloc(arenas, nbytes); if (arenaobj == NULL) return NULL; arenas = arenaobj; @@ -602,19 +909,8 @@ new_arena(void) arenaobj = unused_arena_objects; unused_arena_objects = arenaobj->nextarena; assert(arenaobj->address == 0); -#ifdef MS_WINDOWS - address = (void*)VirtualAlloc(NULL, ARENA_SIZE, - MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); - err = (address == NULL); -#elif defined(ARENAS_USE_MMAP) - address = mmap(NULL, ARENA_SIZE, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - err = (address == MAP_FAILED); -#else - address = malloc(ARENA_SIZE); - err = (address == 0); -#endif - if (err) { + address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); + if (address == NULL) { /* The allocation failed: return NULL after putting the * arenaobj back. */ @@ -777,9 +1073,8 @@ int Py_ADDRESS_IN_RANGE(void *P, poolp pool) Py_NO_INLINE; * Unless the optimizer reorders everything, being too smart... */ -#undef PyObject_Malloc -void * -PyObject_Malloc(size_t nbytes) +static void * +_PyObject_Malloc(void *ctx, size_t nbytes) { block *bp; poolp pool; @@ -795,17 +1090,6 @@ PyObject_Malloc(size_t nbytes) goto redirect; #endif - /* - * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. - * Most python internals blindly use a signed Py_ssize_t to track - * things without checking for overflows or negatives. - * As size_t is unsigned, checking for nbytes < 0 is not required. - */ - if (nbytes > PY_SSIZE_T_MAX) { - _Py_AllocatedBlocks--; - return NULL; - } - /* * This implicitly redirects malloc(0). */ @@ -978,10 +1262,8 @@ redirect: * last chance to serve the request) or when the max memory limit * has been reached. */ - if (nbytes == 0) - nbytes = 1; { - void *result = malloc(nbytes); + void *result = PyMem_Malloc(nbytes); if (!result) _Py_AllocatedBlocks--; return result; @@ -990,9 +1272,8 @@ redirect: /* free */ -#undef PyObject_Free -void -PyObject_Free(void *p) +static void +_PyObject_Free(void *ctx, void *p) { poolp pool; block *lastfree; @@ -1101,13 +1382,8 @@ PyObject_Free(void *p) unused_arena_objects = ao; /* Free the entire arena. */ -#ifdef MS_WINDOWS - VirtualFree((void *)ao->address, 0, MEM_RELEASE); -#elif defined(ARENAS_USE_MMAP) - munmap((void *)ao->address, ARENA_SIZE); -#else - free((void *)ao->address); -#endif + _PyObject_Arena.free(_PyObject_Arena.ctx, + (void *)ao->address, ARENA_SIZE); ao->address = 0; /* mark unassociated */ --narenas_currently_allocated; @@ -1216,7 +1492,7 @@ PyObject_Free(void *p) redirect: #endif /* We didn't allocate this address. */ - free(p); + PyMem_Free(p); } /* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0, @@ -1224,9 +1500,8 @@ redirect: * return a non-NULL result. */ -#undef PyObject_Realloc -void * -PyObject_Realloc(void *p, size_t nbytes) +static void * +_PyObject_Realloc(void *ctx, void *p, size_t nbytes) { void *bp; poolp pool; @@ -1236,16 +1511,7 @@ PyObject_Realloc(void *p, size_t nbytes) #endif if (p == NULL) - return PyObject_Malloc(nbytes); - - /* - * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. - * Most python internals blindly use a signed Py_ssize_t to track - * things without checking for overflows or negatives. - * As size_t is unsigned, checking for nbytes < 0 is not required. - */ - if (nbytes > PY_SSIZE_T_MAX) - return NULL; + return _PyObject_Malloc(ctx, nbytes); #ifdef WITH_VALGRIND /* Treat running_on_valgrind == -1 the same as 0 */ @@ -1273,10 +1539,10 @@ PyObject_Realloc(void *p, size_t nbytes) } size = nbytes; } - bp = PyObject_Malloc(nbytes); + bp = _PyObject_Malloc(ctx, nbytes); if (bp != NULL) { memcpy(bp, p, size); - PyObject_Free(p); + _PyObject_Free(ctx, p); } return bp; } @@ -1294,14 +1560,14 @@ PyObject_Realloc(void *p, size_t nbytes) * at p. Instead we punt: let C continue to manage this block. */ if (nbytes) - return realloc(p, nbytes); + return PyMem_Realloc(p, nbytes); /* C doesn't define the result of realloc(p, 0) (it may or may not * return NULL then), but Python's docs promise that nbytes==0 never * returns NULL. We don't pass 0 to realloc(), to avoid that endcase * to begin with. Even then, we can't be sure that realloc() won't * return NULL. */ - bp = realloc(p, 1); + bp = PyMem_Realloc(p, 1); return bp ? bp : p; } @@ -1311,24 +1577,6 @@ PyObject_Realloc(void *p, size_t nbytes) /* pymalloc not enabled: Redirect the entry points to malloc. These will * only be used by extensions that are compiled with pymalloc enabled. */ -void * -PyObject_Malloc(size_t n) -{ - return PyMem_MALLOC(n); -} - -void * -PyObject_Realloc(void *p, size_t n) -{ - return PyMem_REALLOC(p, n); -} - -void -PyObject_Free(void *p) -{ - PyMem_FREE(p); -} - Py_ssize_t _Py_GetAllocatedBlocks(void) { @@ -1354,10 +1602,6 @@ _Py_GetAllocatedBlocks(void) #define DEADBYTE 0xDB /* dead (newly freed) memory */ #define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */ -/* We tag each block with an API ID in order to tag API violations */ -#define _PYMALLOC_MEM_ID 'm' /* the PyMem_Malloc() API */ -#define _PYMALLOC_OBJ_ID 'o' /* The PyObject_Malloc() API */ - static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ /* serialno is always incremented via calling this routine. The point is @@ -1440,58 +1684,18 @@ p[2*S: 2*S+n] p[2*S+n: 2*S+n+S] Copies of FORBIDDENBYTE. Used to catch over- writes and reads. p[2*S+n+S: 2*S+n+2*S] - A serial number, incremented by 1 on each call to _PyObject_DebugMalloc - and _PyObject_DebugRealloc. + A serial number, incremented by 1 on each call to _PyMem_DebugMalloc + and _PyMem_DebugRealloc. This is a big-endian size_t. If "bad memory" is detected later, the serial number gives an excellent way to set a breakpoint on the next run, to capture the instant at which this block was passed out. */ -/* debug replacements for the PyMem_* memory API */ -void * -_PyMem_DebugMalloc(size_t nbytes) -{ - return _PyObject_DebugMallocApi(_PYMALLOC_MEM_ID, nbytes); -} -void * -_PyMem_DebugRealloc(void *p, size_t nbytes) -{ - return _PyObject_DebugReallocApi(_PYMALLOC_MEM_ID, p, nbytes); -} -void -_PyMem_DebugFree(void *p) -{ - _PyObject_DebugFreeApi(_PYMALLOC_MEM_ID, p); -} - -/* debug replacements for the PyObject_* memory API */ -void * -_PyObject_DebugMalloc(size_t nbytes) -{ - return _PyObject_DebugMallocApi(_PYMALLOC_OBJ_ID, nbytes); -} -void * -_PyObject_DebugRealloc(void *p, size_t nbytes) -{ - return _PyObject_DebugReallocApi(_PYMALLOC_OBJ_ID, p, nbytes); -} -void -_PyObject_DebugFree(void *p) -{ - _PyObject_DebugFreeApi(_PYMALLOC_OBJ_ID, p); -} -void -_PyObject_DebugCheckAddress(const void *p) -{ - _PyObject_DebugCheckAddressApi(_PYMALLOC_OBJ_ID, p); -} - - -/* generic debug memory api, with an "id" to identify the API in use */ -void * -_PyObject_DebugMallocApi(char id, size_t nbytes) +static void * +_PyMem_DebugMalloc(void *ctx, size_t nbytes) { + debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; uchar *p; /* base address of malloc'ed block */ uchar *tail; /* p + 2*SST + nbytes == pointer to tail pad bytes */ size_t total; /* nbytes + 4*SST */ @@ -1502,14 +1706,14 @@ _PyObject_DebugMallocApi(char id, size_t nbytes) /* overflow: can't represent total as a size_t */ return NULL; - p = (uchar *)PyObject_Malloc(total); + p = (uchar *)api->alloc.malloc(api->alloc.ctx, total); if (p == NULL) return NULL; /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */ write_size_t(p, nbytes); - p[SST] = (uchar)id; - memset(p + SST + 1 , FORBIDDENBYTE, SST-1); + p[SST] = (uchar)api->api_id; + memset(p + SST + 1, FORBIDDENBYTE, SST-1); if (nbytes > 0) memset(p + 2*SST, CLEANBYTE, nbytes); @@ -1527,25 +1731,27 @@ _PyObject_DebugMallocApi(char id, size_t nbytes) Then fills the original bytes with DEADBYTE. Then calls the underlying free. */ -void -_PyObject_DebugFreeApi(char api, void *p) +static void +_PyMem_DebugFree(void *ctx, void *p) { + debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; uchar *q = (uchar *)p - 2*SST; /* address returned from malloc */ size_t nbytes; if (p == NULL) return; - _PyObject_DebugCheckAddressApi(api, p); + _PyMem_DebugCheckAddress(api->api_id, p); nbytes = read_size_t(q); nbytes += 4*SST; if (nbytes > 0) memset(q, DEADBYTE, nbytes); - PyObject_Free(q); + api->alloc.free(api->alloc.ctx, q); } -void * -_PyObject_DebugReallocApi(char api, void *p, size_t nbytes) +static void * +_PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes) { + debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; uchar *q = (uchar *)p; uchar *tail; size_t total; /* nbytes + 4*SST */ @@ -1553,9 +1759,9 @@ _PyObject_DebugReallocApi(char api, void *p, size_t nbytes) int i; if (p == NULL) - return _PyObject_DebugMallocApi(api, nbytes); + return _PyMem_DebugMalloc(ctx, nbytes); - _PyObject_DebugCheckAddressApi(api, p); + _PyMem_DebugCheckAddress(api->api_id, p); bumpserialno(); original_nbytes = read_size_t(q - 2*SST); total = nbytes + 4*SST; @@ -1572,12 +1778,12 @@ _PyObject_DebugReallocApi(char api, void *p, size_t nbytes) * case we didn't get the chance to mark the old memory with DEADBYTE, * but we live with that. */ - q = (uchar *)PyObject_Realloc(q - 2*SST, total); + q = (uchar *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total); if (q == NULL) return NULL; write_size_t(q, nbytes); - assert(q[SST] == (uchar)api); + assert(q[SST] == (uchar)api->api_id); for (i = 1; i < SST; ++i) assert(q[SST + i] == FORBIDDENBYTE); q += 2*SST; @@ -1599,8 +1805,8 @@ _PyObject_DebugReallocApi(char api, void *p, size_t nbytes) * and call Py_FatalError to kill the program. * The API id, is also checked. */ - void -_PyObject_DebugCheckAddressApi(char api, const void *p) +static void +_PyMem_DebugCheckAddress(char api, const void *p) { const uchar *q = (const uchar *)p; char msgbuf[64]; @@ -1652,7 +1858,7 @@ error: } /* Display info to stderr about the memory block at p. */ -void +static void _PyObject_DebugDumpAddress(const void *p) { const uchar *q = (const uchar *)p; -- cgit v1.2.1 From a8f35d2e376b9e6dfd44b93ff2dfdda8fc135b0f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 7 Jul 2013 03:06:16 +0200 Subject: Issue #3329: Fix _PyObject_ArenaVirtualFree() According to VirtualFree() documentation, the size must be zero if the "free type" is MEM_RELEASE. --- Objects/obmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index d08515d776..97a137db37 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -68,7 +68,7 @@ _PyObject_ArenaVirtualAlloc(void *ctx, size_t size) static void _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size) { - VirtualFree(ptr, size, MEM_RELEASE); + VirtualFree(ptr, 0, MEM_RELEASE); } #elif defined(ARENAS_USE_MMAP) -- cgit v1.2.1 From 0e6215070ac12c5aecdce573f75624eaa7ed056e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 7 Jul 2013 16:25:15 +0200 Subject: Issue #18203: Replace malloc() with PyMem_RawMalloc() at Python initialization * Replace malloc() with PyMem_RawMalloc() * Replace PyMem_Malloc() with PyMem_RawMalloc() where the GIL is not held. * _Py_char2wchar() now returns a buffer allocated by PyMem_RawMalloc(), instead of PyMem_Malloc() --- Objects/unicodeobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ab1dbb9625..3a4cc20718 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3316,7 +3316,7 @@ encode_error: wstr = _Py_char2wchar(errmsg, &errlen); if (wstr != NULL) { reason = PyUnicode_FromWideChar(wstr, errlen); - PyMem_Free(wstr); + PyMem_RawFree(wstr); } else errmsg = NULL; } @@ -3535,7 +3535,7 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, } unicode = PyUnicode_FromWideChar(wstr, wlen); - PyMem_Free(wstr); + PyMem_RawFree(wstr); } else { /* strict mode */ @@ -3583,7 +3583,7 @@ decode_error: wstr = _Py_char2wchar(errmsg, &errlen); if (wstr != NULL) { reason = PyUnicode_FromWideChar(wstr, errlen); - PyMem_Free(wstr); + PyMem_RawFree(wstr); } else errmsg = NULL; } -- cgit v1.2.1 From 878680d5e354d2bf6d4635b6bacd7809750c7637 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 7 Jul 2013 17:22:41 +0200 Subject: Issue #18203: Replace malloc() with PyMem_Malloc() in _PySequence_BytesToCharpArray() --- Objects/abstract.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'Objects') diff --git a/Objects/abstract.c b/Objects/abstract.c index 40ea43bc8d..244dcafc4b 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1238,7 +1238,7 @@ PyNumber_AsSsize_t(PyObject *item, PyObject *err) to be an int or have an __int__ method. Steals integral's reference. error_format will be used to create the TypeError if integral isn't actually an Integral instance. error_format should be a format string - that can accept a char* naming integral's type. + that can accept a char* naming integral's type. */ static PyObject * convert_integral_to_int(PyObject *integral, const char *error_format) @@ -1257,7 +1257,7 @@ convert_integral_to_int(PyObject *integral, const char *error_format) } PyErr_Format(PyExc_TypeError, error_format, Py_TYPE(integral)->tp_name); Py_DECREF(integral); - return NULL; + return NULL; } @@ -2721,8 +2721,8 @@ PyIter_Next(PyObject *iter) * NULL terminated string pointers with a NULL char* terminating the array. * (ie: an argv or env list) * - * Memory allocated for the returned list is allocated using malloc() and MUST - * be freed by the caller using a free() loop or _Py_FreeCharPArray(). + * Memory allocated for the returned list is allocated using PyMem_Malloc() + * and MUST be freed by _Py_FreeCharPArray(). */ char *const * _PySequence_BytesToCharpArray(PyObject* self) @@ -2730,6 +2730,7 @@ _PySequence_BytesToCharpArray(PyObject* self) char **array; Py_ssize_t i, argc; PyObject *item = NULL; + Py_ssize_t size; argc = PySequence_Size(self); if (argc == -1) @@ -2742,7 +2743,7 @@ _PySequence_BytesToCharpArray(PyObject* self) return NULL; } - array = malloc((argc + 1) * sizeof(char *)); + array = PyMem_Malloc((argc + 1) * sizeof(char *)); if (array == NULL) { PyErr_NoMemory(); return NULL; @@ -2761,11 +2762,13 @@ _PySequence_BytesToCharpArray(PyObject* self) array[i] = NULL; goto fail; } - array[i] = strdup(data); + size = PyBytes_GET_SIZE(item) + 1; + array[i] = PyMem_Malloc(size); if (!array[i]) { PyErr_NoMemory(); goto fail; } + memcpy(array[i], data, size); Py_DECREF(item); } array[argc] = NULL; @@ -2785,7 +2788,7 @@ _Py_FreeCharPArray(char *const array[]) { Py_ssize_t i; for (i = 0; array[i] != NULL; ++i) { - free(array[i]); + PyMem_Free(array[i]); } - free((void*)array); + PyMem_Free((void*)array); } -- cgit v1.2.1 From ddc20be3da791483925ab07fab83592b6f9de748 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 7 Jul 2013 22:57:45 +0200 Subject: Issue #18203: Fix _Py_DecodeUTF8_surrogateescape(), use PyMem_RawMalloc() as _Py_char2wchar() --- Objects/unicodeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3a4cc20718..ac5f66aeaa 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4806,7 +4806,7 @@ onError: used to decode the command line arguments on Mac OS X. Return a pointer to a newly allocated wide character string (use - PyMem_Free() to free the memory), or NULL on memory allocation error. */ + PyMem_RawFree() to free the memory), or NULL on memory allocation error. */ wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size) @@ -4819,7 +4819,7 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size) character count */ if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) return NULL; - unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t)); + unicode = PyMem_RawMalloc((size + 1) * sizeof(wchar_t)); if (!unicode) return NULL; -- cgit v1.2.1 From 28ee3a69061fefad791404c84825222db9c7d0f2 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 7 Jul 2013 23:30:24 +0200 Subject: Issue #18203: Add _PyMem_RawStrdup() and _PyMem_Strdup() Replace strdup() with _PyMem_RawStrdup() or _PyMem_Strdup(), depending if the GIL is held or not. --- Objects/obmalloc.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'Objects') diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 97a137db37..8e25229e09 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -294,6 +294,34 @@ PyMem_Free(void *ptr) _PyMem.free(_PyMem.ctx, ptr); } +char * +_PyMem_RawStrdup(const char *str) +{ + size_t size; + char *copy; + + size = strlen(str) + 1; + copy = PyMem_RawMalloc(size); + if (copy == NULL) + return NULL; + memcpy(copy, str, size); + return copy; +} + +char * +_PyMem_Strdup(const char *str) +{ + size_t size; + char *copy; + + size = strlen(str) + 1; + copy = PyMem_Malloc(size); + if (copy == NULL) + return NULL; + memcpy(copy, str, size); + return copy; +} + void * PyObject_Malloc(size_t size) { -- cgit v1.2.1 From f91e7aa7b53d8e7b68fad6036bc1d8f7fc8e5357 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 8 Jul 2013 22:19:20 +0200 Subject: Issue #18408: Fix PyDict_New() to handle correctly new_keys_object() failure (MemoryError). --- Objects/dictobject.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 250c890cd7..9d8696a89e 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -389,6 +389,7 @@ static PyObject * new_dict(PyDictKeysObject *keys, PyObject **values) { PyDictObject *mp; + assert(keys != NULL); if (numfree) { mp = free_list[--numfree]; assert (mp != NULL); @@ -431,7 +432,10 @@ new_dict_with_shared_keys(PyDictKeysObject *keys) PyObject * PyDict_New(void) { - return new_dict(new_keys_object(PyDict_MINSIZE_COMBINED), NULL); + PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE_COMBINED); + if (keys == NULL) + return NULL; + return new_dict(keys, NULL); } /* -- cgit v1.2.1 From b41ab1de3f633673be55acf787d8a0f29fffab78 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 8 Jul 2013 22:20:44 +0200 Subject: Issue #18408: Fix list.pop() to handle list_resize() failure (MemoryError). --- Objects/listobject.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/listobject.c b/Objects/listobject.c index 0c82cc40ec..b18ef5763a 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -925,8 +925,10 @@ listpop(PyListObject *self, PyObject *args) v = self->ob_item[i]; if (i == Py_SIZE(self) - 1) { status = list_resize(self, Py_SIZE(self) - 1); - assert(status >= 0); - return v; /* and v now owns the reference the list had */ + if (status >= 0) + return v; /* and v now owns the reference the list had */ + else + return NULL; } Py_INCREF(v); status = list_ass_slice(self, i, i+1, (PyObject *)NULL); -- cgit v1.2.1 From dc4bb975e498e57c04ff680280f5ebcf5825de47 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 8 Jul 2013 22:25:48 +0200 Subject: Issue #18408: Fix PyType_Ready() and type.__bases__ setter to handle PyWeakref_NewRef() failure (ex: MemoryError). --- Objects/typeobject.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index a3516671a7..5565b20662 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4329,6 +4329,8 @@ add_subclass(PyTypeObject *base, PyTypeObject *type) } assert(PyList_Check(list)); newobj = PyWeakref_NewRef((PyObject *)type, NULL); + if (newobj == NULL) + return -1; i = PyList_GET_SIZE(list); while (--i >= 0) { ref = PyList_GET_ITEM(list, i); -- cgit v1.2.1 From 5cd8da58ba772f2bc261729e56b7f3e7571b50a6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 8 Jul 2013 22:29:55 +0200 Subject: Issue #18408: Fix _PyUnicodeWriter_Finish(): clear writer->buffer, so _PyUnicodeWriter_Dealloc() can be called on the writer after finish. --- Objects/unicodeobject.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ac5f66aeaa..96c7c5825e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13157,6 +13157,7 @@ _PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t PyObject * _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) { + PyObject *str; if (writer->pos == 0) { Py_XDECREF(writer->buffer); _Py_RETURN_UNICODE_EMPTY(); @@ -13174,8 +13175,10 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) } writer->buffer = newbuffer; } - assert(_PyUnicode_CheckConsistency(writer->buffer, 1)); - return unicode_result_ready(writer->buffer); + str = writer->buffer; + writer->buffer = NULL; + assert(_PyUnicode_CheckConsistency(str, 1)); + return unicode_result_ready(str); } void -- cgit v1.2.1 From 68bdfd00ec42630c74720ee83ffa41fe2750156d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 9 Jul 2013 00:37:24 +0200 Subject: Issue #18408: _PyUnicodeWriter_Finish() now clears its buffer attribute in all cases, so _PyUnicodeWriter_Dealloc() can be called after finish. --- Objects/unicodeobject.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 96c7c5825e..1a448824f7 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13159,18 +13159,21 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) { PyObject *str; if (writer->pos == 0) { - Py_XDECREF(writer->buffer); + Py_CLEAR(writer->buffer); _Py_RETURN_UNICODE_EMPTY(); } if (writer->readonly) { - assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos); - return writer->buffer; + str = writer->buffer; + writer->buffer = NULL; + assert(PyUnicode_GET_LENGTH(str) == writer->pos); + return str; } if (PyUnicode_GET_LENGTH(writer->buffer) != writer->pos) { PyObject *newbuffer; newbuffer = resize_compact(writer->buffer, writer->pos); if (newbuffer == NULL) { Py_DECREF(writer->buffer); + writer->buffer = NULL; return NULL; } writer->buffer = newbuffer; -- cgit v1.2.1 From 33e0699a7f12c4cade813adc5d42b761c99bc7f9 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 9 Jul 2013 00:44:43 +0200 Subject: Issue #18408: Fix _PyMem_DebugRealloc() Don't mark old extra memory dead before calling realloc(). realloc() can fail and realloc() must not touch the original buffer on failure. So mark old extra memory dead only on success if the new buffer did not move (has the same address). --- Objects/obmalloc.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'Objects') diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 8e25229e09..f1462225a0 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1780,7 +1780,7 @@ static void * _PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes) { debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; - uchar *q = (uchar *)p; + uchar *q = (uchar *)p, *oldq; uchar *tail; size_t total; /* nbytes + 4*SST */ size_t original_nbytes; @@ -1797,24 +1797,26 @@ _PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes) /* overflow: can't represent total as a size_t */ return NULL; - if (nbytes < original_nbytes) { - /* shrinking: mark old extra memory dead */ - memset(q + nbytes, DEADBYTE, original_nbytes - nbytes + 2*SST); - } - /* Resize and add decorations. We may get a new pointer here, in which * case we didn't get the chance to mark the old memory with DEADBYTE, * but we live with that. */ + oldq = q; q = (uchar *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total); if (q == NULL) return NULL; + if (q == oldq && nbytes < original_nbytes) { + /* shrinking: mark old extra memory dead */ + memset(q + nbytes, DEADBYTE, original_nbytes - nbytes); + } + write_size_t(q, nbytes); assert(q[SST] == (uchar)api->api_id); for (i = 1; i < SST; ++i) assert(q[SST + i] == FORBIDDENBYTE); q += 2*SST; + tail = q + nbytes; memset(tail, FORBIDDENBYTE, SST); write_size_t(tail + SST, serialno); -- cgit v1.2.1 From 20b92d4419e0c965a60a9c1279a40916f5f2129f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 11 Jul 2013 22:42:25 +0200 Subject: typeobject.c: remove trailing spaces --- Objects/typeobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5565b20662..13ea1caead 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2411,7 +2411,7 @@ PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases) char *s; char *res_start = (char*)res; PyType_Slot *slot; - + /* Set the type name and qualname */ s = strrchr(spec->name, '.'); if (s == NULL) @@ -2432,7 +2432,7 @@ PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases) type->tp_name = spec->name; if (!type->tp_name) goto fail; - + /* Adjust for empty tuple bases */ if (!bases) { base = &PyBaseObject_Type; @@ -2516,7 +2516,7 @@ PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases) /* Set type.__module__ */ s = strrchr(spec->name, '.'); if (s != NULL) - _PyDict_SetItemId(type->tp_dict, &PyId___module__, + _PyDict_SetItemId(type->tp_dict, &PyId___module__, PyUnicode_FromStringAndSize( spec->name, (Py_ssize_t)(s - spec->name))); -- cgit v1.2.1 From 101a3f7be109597969a32c5202b2381d1e8d9923 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 11 Jul 2013 22:46:11 +0200 Subject: Issue #18408: slot_tp_str() must not fallback on slot_tp_repr() on error type->tp_str must not point to slot_tp_str() if type has no __str__ attribute, so there is no reason for slot_tp_str() to fallback on slot_tp_str() on lookup error. Moreover, calling PyErr_Clear() may hide a real bug like MemoryError. If __str__ attribute is removed, slots must be updated (which is done by type_setattro()). --- Objects/typeobject.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 13ea1caead..6eb2cf1103 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5274,29 +5274,12 @@ slot_tp_str(PyObject *self) _Py_IDENTIFIER(__str__); func = lookup_method(self, &PyId___str__); - if (func != NULL) { + if (func == NULL) + return NULL; res = PyEval_CallObject(func, NULL); Py_DECREF(func); return res; } - else { - /* PyObject *ress; */ - PyErr_Clear(); - res = slot_tp_repr(self); - if (!res) - return NULL; - /* XXX this is non-sensical. Why should we return - a bytes object from __str__. Is this code even - used? - mvl */ - assert(0); - return res; - /* - ress = _PyUnicode_AsDefaultEncodedString(res); - Py_DECREF(res); - return ress; - */ - } -} static Py_hash_t slot_tp_hash(PyObject *self) -- cgit v1.2.1 From e822e9e9c1a27846eb90b5357d2692b01abe1599 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 11 Jul 2013 23:44:46 +0200 Subject: Issue #18408: In debug mode, PyCFunction_Call() now checks if an exception was raised if the result is NULL to help to find bugs in C mode (get the error earlier than the SystemError in ceval.c). --- Objects/methodobject.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'Objects') diff --git a/Objects/methodobject.c b/Objects/methodobject.c index 9944fade10..11c8b6e9c1 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -79,23 +79,34 @@ PyCFunction_GetFlags(PyObject *op) PyObject * PyCFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) { +#define CHECK_RESULT(res) assert(res != NULL || PyErr_Occurred()) + PyCFunctionObject* f = (PyCFunctionObject*)func; PyCFunction meth = PyCFunction_GET_FUNCTION(func); PyObject *self = PyCFunction_GET_SELF(func); + PyObject *res; Py_ssize_t size; switch (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST)) { case METH_VARARGS: - if (kw == NULL || PyDict_Size(kw) == 0) - return (*meth)(self, arg); + if (kw == NULL || PyDict_Size(kw) == 0) { + res = (*meth)(self, arg); + CHECK_RESULT(res); + return res; + } break; case METH_VARARGS | METH_KEYWORDS: - return (*(PyCFunctionWithKeywords)meth)(self, arg, kw); + res = (*(PyCFunctionWithKeywords)meth)(self, arg, kw); + CHECK_RESULT(res); + return res; case METH_NOARGS: if (kw == NULL || PyDict_Size(kw) == 0) { size = PyTuple_GET_SIZE(arg); - if (size == 0) - return (*meth)(self, NULL); + if (size == 0) { + res = (*meth)(self, NULL); + CHECK_RESULT(res); + return res; + } PyErr_Format(PyExc_TypeError, "%.200s() takes no arguments (%zd given)", f->m_ml->ml_name, size); @@ -105,8 +116,11 @@ PyCFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) case METH_O: if (kw == NULL || PyDict_Size(kw) == 0) { size = PyTuple_GET_SIZE(arg); - if (size == 1) - return (*meth)(self, PyTuple_GET_ITEM(arg, 0)); + if (size == 1) { + res = (*meth)(self, PyTuple_GET_ITEM(arg, 0)); + CHECK_RESULT(res); + return res; + } PyErr_Format(PyExc_TypeError, "%.200s() takes exactly one argument (%zd given)", f->m_ml->ml_name, size); @@ -123,6 +137,8 @@ PyCFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments", f->m_ml->ml_name); return NULL; + +#undef CHECK_RESULT } /* Methods (the standard built-in methods, that is) */ -- cgit v1.2.1 From d80e57c04c75ef851f23adef7413551fa3f7cc33 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 12 Jul 2013 00:42:14 +0200 Subject: Issue #18408: pmerge() help of mro_implementation() now raises MemoryError on memory allocation failure Replace also PyMem_Free() with PyMem_FREE() to be consistent with the rest of the function. --- Objects/typeobject.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 6eb2cf1103..560c929b73 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1456,8 +1456,10 @@ pmerge(PyObject *acc, PyObject* to_merge) { that is not included in acc. */ remain = (int *)PyMem_MALLOC(SIZEOF_INT*to_merge_size); - if (remain == NULL) + if (remain == NULL) { + PyErr_NoMemory(); return -1; + } for (i = 0; i < to_merge_size; i++) remain[i] = 0; @@ -1489,7 +1491,7 @@ pmerge(PyObject *acc, PyObject* to_merge) { } ok = PyList_Append(acc, candidate); if (ok < 0) { - PyMem_Free(remain); + PyMem_FREE(remain); return -1; } for (j = 0; j < to_merge_size; j++) { -- cgit v1.2.1 From b5a558b8646d958e496f0b0195b84a9ac618f31e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 15 Jul 2013 17:50:07 +0200 Subject: Issue #18408: PyObject_Call() now fails with an assertion error in debug mode if the function called failed whereas no exception was raised, to detect bugs earlier. --- Objects/abstract.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/abstract.c b/Objects/abstract.c index 244dcafc4b..6896600eba 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -2104,10 +2104,16 @@ PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) return NULL; result = (*call)(func, arg, kw); Py_LeaveRecursiveCall(); - if (result == NULL && !PyErr_Occurred()) +#ifdef NDEBUG + if (result == NULL && !PyErr_Occurred()) { PyErr_SetString( PyExc_SystemError, "NULL result without error in PyObject_Call"); + } +#else + if (result == NULL) + assert(PyErr_Occurred()); +#endif return result; } PyErr_Format(PyExc_TypeError, "'%.200s' object is not callable", -- cgit v1.2.1 From c4cabf2e84d9a57e01af78a7def66ac5af9fad0d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 15 Jul 2013 18:22:47 +0200 Subject: Issue #18408: Don't check unicode consistency in _PyUnicode_HAS_UTF8_MEMORY() and _PyUnicode_HAS_WSTR_MEMORY() macros These macros are called in unicode_dealloc(), whereas the unicode object can be "inconsistent" if the creation of the object failed. For example, when unicode_subtype_new() fails on a memory allocation, _PyUnicode_CheckConsistency() fails with an assertion error because data is NULL. --- Objects/unicodeobject.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1a448824f7..8ce3f96933 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -122,16 +122,14 @@ extern "C" { /* true if the Unicode object has an allocated UTF-8 memory block (not shared with other data) */ #define _PyUnicode_HAS_UTF8_MEMORY(op) \ - (assert(_PyUnicode_CHECK(op)), \ - (!PyUnicode_IS_COMPACT_ASCII(op) \ + ((!PyUnicode_IS_COMPACT_ASCII(op) \ && _PyUnicode_UTF8(op) \ && _PyUnicode_UTF8(op) != PyUnicode_DATA(op))) /* true if the Unicode object has an allocated wstr memory block (not shared with other data) */ #define _PyUnicode_HAS_WSTR_MEMORY(op) \ - (assert(_PyUnicode_CHECK(op)), \ - (_PyUnicode_WSTR(op) && \ + ((_PyUnicode_WSTR(op) && \ (!PyUnicode_IS_READY(op) || \ _PyUnicode_WSTR(op) != PyUnicode_DATA(op)))) -- cgit v1.2.1 From 0669c107380af3048a3f670ee742788152d6c066 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 15 Jul 2013 19:34:20 +0200 Subject: Issue #18408: type_new() and PyType_FromSpecWithBases() now raise MemoryError on memory allocation failure --- Objects/typeobject.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 560c929b73..9b69021299 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2292,8 +2292,10 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) /* Silently truncate the docstring if it contains null bytes. */ len = strlen(doc_str); tp_doc = (char *)PyObject_MALLOC(len + 1); - if (tp_doc == NULL) + if (tp_doc == NULL) { + PyErr_NoMemory(); goto error; + } memcpy(tp_doc, doc_str, len + 1); type->tp_doc = tp_doc; } @@ -2496,8 +2498,10 @@ PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases) if (slot->slot == Py_tp_doc) { size_t len = strlen(slot->pfunc)+1; char *tp_doc = PyObject_MALLOC(len); - if (tp_doc == NULL) + if (tp_doc == NULL) { + PyErr_NoMemory(); goto fail; + } memcpy(tp_doc, slot->pfunc, len); type->tp_doc = tp_doc; } -- cgit v1.2.1 From f973f6bbe55ca322f5f32954cdb47fc9a418eb78 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 16 Jul 2013 01:02:12 +0200 Subject: Issue #18408: add more assertions on PyErr_Occurred() in ceval.c to detect bugs earlier --- Objects/abstract.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/abstract.c b/Objects/abstract.c index 6896600eba..6c8c561eac 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -2111,8 +2111,7 @@ PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) "NULL result without error in PyObject_Call"); } #else - if (result == NULL) - assert(PyErr_Occurred()); + assert(result != NULL || PyErr_Occurred()); #endif return result; } -- cgit v1.2.1 From 5e4e7f7c07ba735beedf75350ba7b1ba048d2443 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 16 Jul 2013 21:45:58 +0200 Subject: Issue #18408: Fix list.extend(), handle list_resize() failure --- Objects/listobject.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/listobject.c b/Objects/listobject.c index b18ef5763a..0ec70e587a 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -871,8 +871,10 @@ listextend(PyListObject *self, PyObject *b) } /* Cut back result list if initial guess was too large. */ - if (Py_SIZE(self) < self->allocated) - list_resize(self, Py_SIZE(self)); /* shrinking can't fail */ + if (Py_SIZE(self) < self->allocated) { + if (list_resize(self, Py_SIZE(self)) < 0) + goto error; + } Py_DECREF(it); Py_RETURN_NONE; -- cgit v1.2.1 From f60e8336fb0ee1a26b9cdc598da6cd8a953fb212 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 16 Jul 2013 22:16:05 +0200 Subject: Issue #18408: Fix PyDict_GetItemString(), suppress PyUnicode_FromString() error As PyDict_GetItem(), PyDict_GetItemString() suppresses all errors that may occur for historical reasons. --- Objects/dictobject.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 9d8696a89e..99247d39a3 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2692,8 +2692,10 @@ PyDict_GetItemString(PyObject *v, const char *key) { PyObject *kv, *rv; kv = PyUnicode_FromString(key); - if (kv == NULL) + if (kv == NULL) { + PyErr_Clear(); return NULL; + } rv = PyDict_GetItem(v, kv); Py_DECREF(kv); return rv; -- cgit v1.2.1 From 0253ae02445bec97496776427c84f64a47d7bc67 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 16 Jul 2013 22:17:26 +0200 Subject: Cleanup dictobject.c --- Objects/dictobject.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 99247d39a3..d02ef02b68 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -305,9 +305,9 @@ PyDict_Fini(void) * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3)) */ -/* GROWTH_RATE. Growth rate upon hitting maximum load. - * Currently set to used*2 + capacity/2. - * This means that dicts double in size when growing without deletions, +/* GROWTH_RATE. Growth rate upon hitting maximum load. + * Currently set to used*2 + capacity/2. + * This means that dicts double in size when growing without deletions, * but have more head room when the number of deletions is on a par with the * number of insertions. * Raising this to used*4 doubles memory consumption depending on the size of @@ -2589,23 +2589,25 @@ static PyObject * dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *self; + PyDictObject *d; assert(type != NULL && type->tp_alloc != NULL); self = type->tp_alloc(type, 0); - if (self != NULL) { - PyDictObject *d = (PyDictObject *)self; - d->ma_keys = new_keys_object(PyDict_MINSIZE_COMBINED); - /* XXX - Should we raise a no-memory error? */ - if (d->ma_keys == NULL) { - DK_INCREF(Py_EMPTY_KEYS); - d->ma_keys = Py_EMPTY_KEYS; - d->ma_values = empty_values; - } - d->ma_used = 0; - /* The object has been implicitly tracked by tp_alloc */ - if (type == &PyDict_Type) - _PyObject_GC_UNTRACK(d); - } + if (self == NULL) + return NULL; + + d = (PyDictObject *)self; + d->ma_keys = new_keys_object(PyDict_MINSIZE_COMBINED); + /* XXX - Should we raise a no-memory error? */ + if (d->ma_keys == NULL) { + DK_INCREF(Py_EMPTY_KEYS); + d->ma_keys = Py_EMPTY_KEYS; + d->ma_values = empty_values; + } + d->ma_used = 0; + /* The object has been implicitly tracked by tp_alloc */ + if (type == &PyDict_Type) + _PyObject_GC_UNTRACK(d); return self; } -- cgit v1.2.1 From 3bdf98f4efae10f7bb4968a8685bbbd95bae97a5 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 16 Jul 2013 22:19:00 +0200 Subject: Issue #18408: dict_new() now fails on new_keys_object() error Pass the MemoryError exception to the caller, instead of using empty keys. --- Objects/dictobject.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index d02ef02b68..3243061b68 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1395,7 +1395,7 @@ dict_dealloc(PyDictObject *mp) } DK_DECREF(keys); } - else { + else if (keys != NULL) { assert(keys->dk_refcnt == 1); DK_DECREF(keys); } @@ -2595,19 +2595,18 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self = type->tp_alloc(type, 0); if (self == NULL) return NULL; - d = (PyDictObject *)self; - d->ma_keys = new_keys_object(PyDict_MINSIZE_COMBINED); - /* XXX - Should we raise a no-memory error? */ - if (d->ma_keys == NULL) { - DK_INCREF(Py_EMPTY_KEYS); - d->ma_keys = Py_EMPTY_KEYS; - d->ma_values = empty_values; - } - d->ma_used = 0; + /* The object has been implicitly tracked by tp_alloc */ if (type == &PyDict_Type) _PyObject_GC_UNTRACK(d); + + d->ma_used = 0; + d->ma_keys = new_keys_object(PyDict_MINSIZE_COMBINED); + if (d->ma_keys == NULL) { + Py_DECREF(self); + return NULL; + } return self; } -- cgit v1.2.1 From 1953c33e239c62bb65e35860be24f05112beabbc Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 16 Jul 2013 22:24:44 +0200 Subject: Issue #18408: Py_ReprLeave() now saves/restores the current exception, and ignores exceptions raised during the call --- Objects/object.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index d382a3c157..700e8bef56 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1920,13 +1920,18 @@ Py_ReprLeave(PyObject *obj) PyObject *dict; PyObject *list; Py_ssize_t i; + PyObject *error_type, *error_value, *error_traceback; + + PyErr_Fetch(&error_type, &error_value, &error_traceback); dict = PyThreadState_GetDict(); if (dict == NULL) - return; + goto finally; + list = PyDict_GetItemString(dict, KEY); if (list == NULL || !PyList_Check(list)) - return; + goto finally; + i = PyList_GET_SIZE(list); /* Count backwards because we always expect obj to be list[-1] */ while (--i >= 0) { @@ -1935,6 +1940,10 @@ Py_ReprLeave(PyObject *obj) break; } } + +finally: + /* ignore exceptions because there is no way to report them. */ + PyErr_Restore(error_type, error_value, error_traceback); } /* Trashcan support. */ -- cgit v1.2.1 From c05e944f691f9a3edb9173b44425f7575db7098a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 16 Jul 2013 22:51:21 +0200 Subject: Cleanup type_call() to ease debug It was easy to miss the call to type->tp_init because it was done in a long conditional expression. Split the long expression in multiple lines to make the debug step by step easier. --- Objects/typeobject.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 9b69021299..f311af8f25 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -750,10 +750,12 @@ type_call(PyTypeObject *type, PyObject *args, PyObject *kwds) if (!PyType_IsSubtype(Py_TYPE(obj), type)) return obj; type = Py_TYPE(obj); - if (type->tp_init != NULL && - type->tp_init(obj, args, kwds) < 0) { - Py_DECREF(obj); - obj = NULL; + if (type->tp_init != NULL) { + int res = type->tp_init(obj, args, kwds); + if (res < 0) { + Py_DECREF(obj); + obj = NULL; + } } } return obj; -- cgit v1.2.1 From 485316200dd2089a50ed5b41f2041bd1a8603622 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 17 Jul 2013 01:22:45 +0200 Subject: Close #18469: Replace PyDict_GetItemString() with _PyDict_GetItemId() in structseq.c _PyDict_GetItemId() is more efficient: it only builds the Unicode string once. Identifiers (dictionary keys) are now created at Python initialization, and if the creation failed, Python does exit with a fatal error. Before, PyDict_GetItemString() failure was not handled: structseq_new() could call PyObject_GC_NewVar() with a negative size, and structseq_dealloc() could also crash. --- Objects/structseq.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'Objects') diff --git a/Objects/structseq.c b/Objects/structseq.c index c3b9a72989..212ab52d42 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -11,17 +11,20 @@ static char unnamed_fields_key[] = "n_unnamed_fields"; /* Fields with this name have only a field index, not a field name. They are only allowed for indices < n_visible_fields. */ char *PyStructSequence_UnnamedField = "unnamed field"; +_Py_IDENTIFIER(n_sequence_fields); +_Py_IDENTIFIER(n_fields); +_Py_IDENTIFIER(n_unnamed_fields); #define VISIBLE_SIZE(op) Py_SIZE(op) #define VISIBLE_SIZE_TP(tp) PyLong_AsLong( \ - PyDict_GetItemString((tp)->tp_dict, visible_length_key)) + _PyDict_GetItemId((tp)->tp_dict, &PyId_n_sequence_fields)) #define REAL_SIZE_TP(tp) PyLong_AsLong( \ - PyDict_GetItemString((tp)->tp_dict, real_length_key)) + _PyDict_GetItemId((tp)->tp_dict, &PyId_n_fields)) #define REAL_SIZE(op) REAL_SIZE_TP(Py_TYPE(op)) #define UNNAMED_FIELDS_TP(tp) PyLong_AsLong( \ - PyDict_GetItemString((tp)->tp_dict, unnamed_fields_key)) + _PyDict_GetItemId((tp)->tp_dict, &PyId_n_unnamed_fields)) #define UNNAMED_FIELDS(op) UNNAMED_FIELDS_TP(Py_TYPE(op)) @@ -59,7 +62,7 @@ static void structseq_dealloc(PyStructSequence *obj) { Py_ssize_t i, size; - + size = REAL_SIZE(obj); for (i = 0; i < size; ++i) { Py_XDECREF(obj->ob_item[i]); @@ -382,9 +385,21 @@ PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc) PyTypeObject* PyStructSequence_NewType(PyStructSequence_Desc *desc) { - PyTypeObject *result = (PyTypeObject*)PyType_GenericAlloc(&PyType_Type, 0); + PyTypeObject *result; + + result = (PyTypeObject*)PyType_GenericAlloc(&PyType_Type, 0); if (result != NULL) { PyStructSequence_InitType(result, desc); } return result; } + +int _PyStructSequence_Init(void) +{ + if (_PyUnicode_FromId(&PyId_n_sequence_fields) == NULL + || _PyUnicode_FromId(&PyId_n_fields) == NULL + || _PyUnicode_FromId(&PyId_n_unnamed_fields) == NULL) + return -1; + + return 0; +} -- cgit v1.2.1 From e7fc7cb4bf8c0736c7d61c497be8681c7b19a441 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 17 Jul 2013 13:41:39 +0200 Subject: Issue #18408: Fix structseq_reduce(), handle PyDict_SetItemString() failure --- Objects/structseq.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'Objects') diff --git a/Objects/structseq.c b/Objects/structseq.c index 212ab52d42..5553267d2e 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -233,8 +233,8 @@ structseq_repr(PyStructSequence *obj) static PyObject * structseq_reduce(PyStructSequence* self) { - PyObject* tup; - PyObject* dict; + PyObject* tup = NULL; + PyObject* dict = NULL; PyObject* result; Py_ssize_t n_fields, n_visible_fields, n_unnamed_fields; int i; @@ -243,15 +243,12 @@ structseq_reduce(PyStructSequence* self) n_visible_fields = VISIBLE_SIZE(self); n_unnamed_fields = UNNAMED_FIELDS(self); tup = PyTuple_New(n_visible_fields); - if (!tup) { - return NULL; - } + if (!tup) + goto error; dict = PyDict_New(); - if (!dict) { - Py_DECREF(tup); - return NULL; - } + if (!dict) + goto error; for (i = 0; i < n_visible_fields; i++) { Py_INCREF(self->ob_item[i]); @@ -260,8 +257,8 @@ structseq_reduce(PyStructSequence* self) for (; i < n_fields; i++) { char *n = Py_TYPE(self)->tp_members[i-n_unnamed_fields].name; - PyDict_SetItemString(dict, n, - self->ob_item[i]); + if (PyDict_SetItemString(dict, n, self->ob_item[i]) < 0) + goto error; } result = Py_BuildValue("(O(OO))", Py_TYPE(self), tup, dict); @@ -270,6 +267,11 @@ structseq_reduce(PyStructSequence* self) Py_DECREF(dict); return result; + +error: + Py_XDECREF(tup); + Py_XDECREF(dict); + return NULL; } static PyMethodDef structseq_methods[] = { -- cgit v1.2.1 From c50cbdbec824a1fc21945f79917e9cfb7cd7dfc0 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 17 Jul 2013 21:58:01 +0200 Subject: Issue #18408: Fix listpop(), handle list_ass_slice() failure --- Objects/listobject.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'Objects') diff --git a/Objects/listobject.c b/Objects/listobject.c index 0ec70e587a..ce6b70889e 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -934,12 +934,10 @@ listpop(PyListObject *self, PyObject *args) } Py_INCREF(v); status = list_ass_slice(self, i, i+1, (PyObject *)NULL); - assert(status >= 0); - /* Use status, so that in a release build compilers don't - * complain about the unused name. - */ - (void) status; - + if (status < 0) { + Py_DECREF(v); + return NULL; + } return v; } -- cgit v1.2.1 From be2675d1c62cdb776f3f001377e0a75f7ef8f1fc Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 17 Jul 2013 21:58:41 +0200 Subject: Issue #18408: Fix Py_ReprEnter(), handle PyList_Append() failure --- Objects/object.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index 700e8bef56..e957d9d7b0 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1910,7 +1910,8 @@ Py_ReprEnter(PyObject *obj) if (PyList_GET_ITEM(list, i) == obj) return 1; } - PyList_Append(list, obj); + if (PyList_Append(list, obj) < 0) + return -1; return 0; } -- cgit v1.2.1 From 47eb963a14c669acc539a4d6c452e3743352cae2 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 17 Jul 2013 22:01:37 +0200 Subject: Issue #18408: Fix PyType_Ready(), handle _PyDict_SetItemId() failure --- Objects/typeobject.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index f311af8f25..c970ada1df 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4256,11 +4256,15 @@ PyType_Ready(PyTypeObject *type) PyObject *doc = PyUnicode_FromString(type->tp_doc); if (doc == NULL) goto error; - _PyDict_SetItemId(type->tp_dict, &PyId___doc__, doc); + if (_PyDict_SetItemId(type->tp_dict, &PyId___doc__, doc) < 0) { + Py_DECREF(doc); + goto error; + } Py_DECREF(doc); } else { - _PyDict_SetItemId(type->tp_dict, - &PyId___doc__, Py_None); + if (_PyDict_SetItemId(type->tp_dict, + &PyId___doc__, Py_None) < 0) + goto error; } } -- cgit v1.2.1 From 9b623aea7a1dec4d69c6c1b35b83e4769706213b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 17 Jul 2013 22:31:17 +0200 Subject: Issue #18408: Rewrite NEGATE() macro in longobject.c to handle PyLong_FromLong() failure --- Objects/longobject.c | 75 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 24 deletions(-) (limited to 'Objects') diff --git a/Objects/longobject.c b/Objects/longobject.c index dbedadb5ba..a894ec5a64 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -70,11 +70,21 @@ maybe_small_long(PyLongObject *v) /* If a freshly-allocated long is already shared, it must be a small integer, so negating it must go to PyLong_FromLong */ -#define NEGATE(x) \ - do if (Py_REFCNT(x) == 1) Py_SIZE(x) = -Py_SIZE(x); \ - else { PyObject* tmp=PyLong_FromLong(-MEDIUM_VALUE(x)); \ - Py_DECREF(x); (x) = (PyLongObject*)tmp; } \ - while(0) +Py_LOCAL_INLINE(void) +_PyLong_Negate(PyLongObject **x_p) +{ + PyLongObject *x; + + x = (PyLongObject *)*x_p; + if (Py_REFCNT(x) == 1) { + Py_SIZE(x) = -Py_SIZE(x); + return; + } + + *x_p = (PyLongObject *)PyLong_FromLong(-MEDIUM_VALUE(x)); + Py_DECREF(x); +} + /* For long multiplication, use the O(N**2) school algorithm unless * both operands contain more than KARATSUBA_CUTOFF digits (this * being an internal Python long digit, in base BASE). @@ -2357,10 +2367,21 @@ long_divrem(PyLongObject *a, PyLongObject *b, The quotient z has the sign of a*b; the remainder r has the sign of a, so a = b*z + r. */ - if ((Py_SIZE(a) < 0) != (Py_SIZE(b) < 0)) - NEGATE(z); - if (Py_SIZE(a) < 0 && Py_SIZE(*prem) != 0) - NEGATE(*prem); + if ((Py_SIZE(a) < 0) != (Py_SIZE(b) < 0)) { + _PyLong_Negate(&z); + if (z == NULL) { + Py_CLEAR(*prem); + return -1; + } + } + if (Py_SIZE(a) < 0 && Py_SIZE(*prem) != 0) { + _PyLong_Negate(prem); + if (*prem == NULL) { + Py_DECREF(z); + Py_CLEAR(*prem); + return -1; + } + } *pdiv = maybe_small_long(z); return 0; } @@ -2856,8 +2877,11 @@ x_sub(PyLongObject *a, PyLongObject *b) borrow &= 1; /* Keep only one sign bit */ } assert(borrow == 0); - if (sign < 0) - NEGATE(z); + if (sign < 0) { + _PyLong_Negate(&z); + if (z == NULL) + return NULL; + } return long_normalize(z); } @@ -3348,8 +3372,11 @@ long_mul(PyLongObject *a, PyLongObject *b) z = k_mul(a, b); /* Negate if exactly one of the inputs is negative. */ - if (((Py_SIZE(a) ^ Py_SIZE(b)) < 0) && z) - NEGATE(z); + if (((Py_SIZE(a) ^ Py_SIZE(b)) < 0) && z) { + _PyLong_Negate(&z); + if (z == NULL) + return NULL; + } return (PyObject *)z; } @@ -3796,7 +3823,9 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) Py_DECREF(c); c = temp; temp = NULL; - NEGATE(c); + _PyLong_Negate(&c); + if (c == NULL) + goto Error; } /* if modulus == 1: @@ -3896,10 +3925,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) goto Done; Error: - if (z != NULL) { - Py_DECREF(z); - z = NULL; - } + Py_CLEAR(z); /* fall through */ Done: if (Py_SIZE(b) > FIVEARY_CUTOFF) { @@ -4029,10 +4055,10 @@ long_lshift(PyObject *v, PyObject *w) shiftby = PyLong_AsSsize_t((PyObject *)b); if (shiftby == -1L && PyErr_Occurred()) - goto lshift_error; + return NULL; if (shiftby < 0) { PyErr_SetString(PyExc_ValueError, "negative shift count"); - goto lshift_error; + return NULL; } /* wordshift, remshift = divmod(shiftby, PyLong_SHIFT) */ wordshift = shiftby / PyLong_SHIFT; @@ -4044,9 +4070,11 @@ long_lshift(PyObject *v, PyObject *w) ++newsize; z = _PyLong_New(newsize); if (z == NULL) - goto lshift_error; - if (Py_SIZE(a) < 0) - NEGATE(z); + return NULL; + if (Py_SIZE(a) < 0) { + assert(Py_REFCNT(z) == 1); + Py_SIZE(z) = -Py_SIZE(z); + } for (i = 0; i < wordshift; i++) z->ob_digit[i] = 0; accum = 0; @@ -4060,7 +4088,6 @@ long_lshift(PyObject *v, PyObject *w) else assert(!accum); z = long_normalize(z); - lshift_error: return (PyObject *) maybe_small_long(z); } -- cgit v1.2.1 From b31933d93f562c5cff975c99332a2bd7a6a1265d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 17 Jul 2013 22:33:42 +0200 Subject: longobject.c: add an assertion to ensure that MEDIUM_VALUE() is only called on small integers (0 or 1 digit) --- Objects/longobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/longobject.c b/Objects/longobject.c index a894ec5a64..925e55a138 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -17,7 +17,8 @@ #endif /* convert a PyLong of size 1, 0 or -1 to an sdigit */ -#define MEDIUM_VALUE(x) (Py_SIZE(x) < 0 ? -(sdigit)(x)->ob_digit[0] : \ +#define MEDIUM_VALUE(x) (assert(-1 <= Py_SIZE(x) && Py_SIZE(x) <= 1), \ + Py_SIZE(x) < 0 ? -(sdigit)(x)->ob_digit[0] : \ (Py_SIZE(x) == 0 ? (sdigit)0 : \ (sdigit)(x)->ob_digit[0])) #define ABS(x) ((x) < 0 ? -(x) : (x)) -- cgit v1.2.1 From d2f8f1bf739a09dce0d2e88da25a86f4b75eae66 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 18 Jul 2013 01:00:45 +0200 Subject: Issue #18408: Fix dict_repr(), don't call PyObject_Repr() with an exception set PyObject_Repr() can removes the current exception. For example, module_repr() calls PyErr_Clear() if calling loader.module_repr(mod) failed. --- Objects/dictobject.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 3243061b68..36c710ed5b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1443,6 +1443,9 @@ dict_repr(PyDictObject *mp) Py_INCREF(value); s = PyObject_Repr(key); PyUnicode_Append(&s, colon); + if (s == NULL) + goto Done; + PyUnicode_AppendAndDel(&s, PyObject_Repr(value)); Py_DECREF(key); Py_DECREF(value); -- cgit v1.2.1 From e27f769457f0a511e5ea3beae33be854ad566902 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 18 Jul 2013 01:49:30 +0200 Subject: Issue #18408: PyObject_Str(), PyObject_Repr() and type_call() now fail with an assertion error if they are called with an exception set (PyErr_Occurred()). As PyEval_EvalFrameEx(), they may clear the current exception and so the caller looses its exception. --- Objects/object.c | 16 ++++++++++++++++ Objects/typeobject.c | 7 +++++++ 2 files changed, 23 insertions(+) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index e957d9d7b0..47d3ebd2ca 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -377,6 +377,14 @@ PyObject_Repr(PyObject *v) if (Py_TYPE(v)->tp_repr == NULL) return PyUnicode_FromFormat("<%s object at %p>", v->ob_type->tp_name, v); + +#ifdef Py_DEBUG + /* PyObject_Repr() must not be called with an exception set, + because it may clear it (directly or indirectly) and so the + caller looses its exception */ + assert(!PyErr_Occurred()); +#endif + res = (*v->ob_type->tp_repr)(v); if (res == NULL) return NULL; @@ -408,6 +416,7 @@ PyObject_Str(PyObject *v) #endif if (v == NULL) return PyUnicode_FromString(""); + if (PyUnicode_CheckExact(v)) { #ifndef Py_DEBUG if (PyUnicode_READY(v) < 0) @@ -419,6 +428,13 @@ PyObject_Str(PyObject *v) if (Py_TYPE(v)->tp_str == NULL) return PyObject_Repr(v); +#ifdef Py_DEBUG + /* PyObject_Str() must not be called with an exception set, + because it may clear it (directly or indirectly) and so the + caller looses its exception */ + assert(!PyErr_Occurred()); +#endif + /* It is possible for a type to have a tp_str representation that loops infinitely. */ if (Py_EnterRecursiveCall(" while getting the str of an object")) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index c970ada1df..0eea38443f 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -736,6 +736,13 @@ type_call(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } +#ifdef Py_DEBUG + /* type_call() must not be called with an exception set, + because it may clear it (directly or indirectly) and so the + caller looses its exception */ + assert(!PyErr_Occurred()); +#endif + obj = type->tp_new(type, args, kwds); if (obj != NULL) { /* Ugly exception: when the call was type(something), -- cgit v1.2.1 From 2c0f400d362494e894d4574c86fb11848da28793 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 19 Jul 2013 23:06:21 +0200 Subject: Issue #18408: Fix list_ass_slice(), handle list_resize() failure I tested the patch manually by injecting a fault using gdb: list items are correctly restored on failure. --- Objects/listobject.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/listobject.c b/Objects/listobject.c index ce6b70889e..2f203b343f 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -644,9 +644,14 @@ list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v) memcpy(recycle, &item[ilow], s); if (d < 0) { /* Delete -d items */ - memmove(&item[ihigh+d], &item[ihigh], - (Py_SIZE(a) - ihigh)*sizeof(PyObject *)); - list_resize(a, Py_SIZE(a) + d); + Py_ssize_t tail; + tail = (Py_SIZE(a) - ihigh) * sizeof(PyObject *); + memmove(&item[ihigh+d], &item[ihigh], tail); + if (list_resize(a, Py_SIZE(a) + d) < 0) { + memmove(&item[ihigh], &item[ihigh+d], tail); + memcpy(&item[ilow], recycle, s); + goto Error; + } item = a->ob_item; } else if (d > 0) { /* Insert d items */ -- cgit v1.2.1 From f1d5dffe1339dbef733a98cc5ae3775c73202f5f Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sat, 20 Jul 2013 14:19:46 +0200 Subject: Issue #18327: Fix argument order in call to compatible_for_assignment(oldto, newto, attr). The fix only affects the error message of __class__ assignment. CID 983564 --- Objects/typeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 0eea38443f..b8b5076c1d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3338,7 +3338,7 @@ object_set_class(PyObject *self, PyObject *value, void *closure) "__class__ assignment: only for heap types"); return -1; } - if (compatible_for_assignment(newto, oldto, "__class__")) { + if (compatible_for_assignment(oldto, newto, "__class__")) { Py_INCREF(newto); Py_TYPE(self) = newto; Py_DECREF(oldto); -- cgit v1.2.1 From 92d3e3cf6bfe7320aea27780a6525e6075624935 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 22 Jul 2013 22:24:54 +0200 Subject: Issue #18520: Add a new PyStructSequence_InitType2() function, same than PyStructSequence_InitType() except that it has a return value (0 on success, -1 on error). * PyStructSequence_InitType2() now raises MemoryError on memory allocation failure * Fix also some calls to PyDict_SetItemString(): handle error --- Objects/floatobject.c | 9 ++++++--- Objects/longobject.c | 6 ++++-- Objects/structseq.c | 37 +++++++++++++++++++++++++++---------- 3 files changed, 37 insertions(+), 15 deletions(-) (limited to 'Objects') diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 1398fa5981..7ee2034f89 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -1853,7 +1853,7 @@ PyTypeObject PyFloat_Type = { float_new, /* tp_new */ }; -void +int _PyFloat_Init(void) { /* We attempt to determine if this machine is using IEEE @@ -1903,8 +1903,11 @@ _PyFloat_Init(void) float_format = detected_float_format; /* Init float info */ - if (FloatInfoType.tp_name == 0) - PyStructSequence_InitType(&FloatInfoType, &floatinfo_desc); + if (FloatInfoType.tp_name == NULL) { + if (PyStructSequence_InitType2(&FloatInfoType, &floatinfo_desc) < 0) + return 0; + } + return 1; } int diff --git a/Objects/longobject.c b/Objects/longobject.c index 925e55a138..ce75888f7e 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -5059,8 +5059,10 @@ _PyLong_Init(void) } #endif /* initialize int_info */ - if (Int_InfoType.tp_name == 0) - PyStructSequence_InitType(&Int_InfoType, &int_info_desc); + if (Int_InfoType.tp_name == NULL) { + if (PyStructSequence_InitType2(&Int_InfoType, &int_info_desc) < 0) + return 0; + } return 1; } diff --git a/Objects/structseq.c b/Objects/structseq.c index 5553267d2e..664344be6c 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -320,12 +320,13 @@ static PyTypeObject _struct_sequence_template = { structseq_new, /* tp_new */ }; -void -PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc) +int +PyStructSequence_InitType2(PyTypeObject *type, PyStructSequence_Desc *desc) { PyObject *dict; PyMemberDef* members; int n_members, n_unnamed_members, i, k; + PyObject *v; #ifdef Py_TRACE_REFS /* if the type object was chained, unchain it first @@ -347,8 +348,10 @@ PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc) type->tp_doc = desc->doc; members = PyMem_NEW(PyMemberDef, n_members-n_unnamed_members+1); - if (members == NULL) - return; + if (members == NULL) { + PyErr_NoMemory(); + return -1; + } for (i = k = 0; i < n_members; ++i) { if (desc->fields[i].name == PyStructSequence_UnnamedField) @@ -366,22 +369,33 @@ PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc) type->tp_members = members; if (PyType_Ready(type) < 0) - return; + return -1; Py_INCREF(type); dict = type->tp_dict; #define SET_DICT_FROM_INT(key, value) \ do { \ - PyObject *v = PyLong_FromLong((long) value); \ - if (v != NULL) { \ - PyDict_SetItemString(dict, key, v); \ + v = PyLong_FromLong((long) value); \ + if (v == NULL) \ + return -1; \ + if (PyDict_SetItemString(dict, key, v) < 0) { \ Py_DECREF(v); \ + return -1; \ } \ + Py_DECREF(v); \ } while (0) SET_DICT_FROM_INT(visible_length_key, desc->n_in_sequence); SET_DICT_FROM_INT(real_length_key, n_members); SET_DICT_FROM_INT(unnamed_fields_key, n_unnamed_members); + + return 0; +} + +void +PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc) +{ + (void)PyStructSequence_InitType2(type, desc); } PyTypeObject* @@ -390,8 +404,11 @@ PyStructSequence_NewType(PyStructSequence_Desc *desc) PyTypeObject *result; result = (PyTypeObject*)PyType_GenericAlloc(&PyType_Type, 0); - if (result != NULL) { - PyStructSequence_InitType(result, desc); + if (result == NULL) + return NULL; + if (PyStructSequence_InitType2(result, desc) < 0) { + Py_DECREF(result); + return NULL; } return result; } -- cgit v1.2.1 From b564936ca45abbd86976a16069155982dfd3e66d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 22 Jul 2013 23:02:05 +0200 Subject: Reindent PyFunction_NewWithQualName() --- Objects/funcobject.c | 98 ++++++++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 49 deletions(-) (limited to 'Objects') diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 49415b95e1..b5525e3d44 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -8,60 +8,60 @@ PyObject * PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname) { - PyFunctionObject *op = PyObject_GC_New(PyFunctionObject, - &PyFunction_Type); - static PyObject *__name__ = 0; - if (op != NULL) { - PyObject *doc; - PyObject *consts; - PyObject *module; - op->func_weakreflist = NULL; - Py_INCREF(code); - op->func_code = code; - Py_INCREF(globals); - op->func_globals = globals; - op->func_name = ((PyCodeObject *)code)->co_name; - Py_INCREF(op->func_name); - op->func_defaults = NULL; /* No default arguments */ - op->func_kwdefaults = NULL; /* No keyword only defaults */ - op->func_closure = NULL; - consts = ((PyCodeObject *)code)->co_consts; - if (PyTuple_Size(consts) >= 1) { - doc = PyTuple_GetItem(consts, 0); - if (!PyUnicode_Check(doc)) - doc = Py_None; - } - else + PyFunctionObject *op; + PyObject *doc, *consts, *module; + static PyObject *__name__ = NULL; + + op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type); + if (op == NULL) + return NULL; + + op->func_weakreflist = NULL; + Py_INCREF(code); + op->func_code = code; + Py_INCREF(globals); + op->func_globals = globals; + op->func_name = ((PyCodeObject *)code)->co_name; + Py_INCREF(op->func_name); + op->func_defaults = NULL; /* No default arguments */ + op->func_kwdefaults = NULL; /* No keyword only defaults */ + op->func_closure = NULL; + consts = ((PyCodeObject *)code)->co_consts; + if (PyTuple_Size(consts) >= 1) { + doc = PyTuple_GetItem(consts, 0); + if (!PyUnicode_Check(doc)) doc = Py_None; - Py_INCREF(doc); - op->func_doc = doc; - op->func_dict = NULL; - op->func_module = NULL; - op->func_annotations = NULL; - - /* __module__: If module name is in globals, use it. - Otherwise, use None. - */ + } + else + doc = Py_None; + Py_INCREF(doc); + op->func_doc = doc; + op->func_dict = NULL; + op->func_module = NULL; + op->func_annotations = NULL; + + /* __module__: If module name is in globals, use it. + Otherwise, use None. + */ + if (!__name__) { + __name__ = PyUnicode_InternFromString("__name__"); if (!__name__) { - __name__ = PyUnicode_InternFromString("__name__"); - if (!__name__) { - Py_DECREF(op); - return NULL; - } - } - module = PyDict_GetItem(globals, __name__); - if (module) { - Py_INCREF(module); - op->func_module = module; + Py_DECREF(op); + return NULL; } - if (qualname) - op->func_qualname = qualname; - else - op->func_qualname = op->func_name; - Py_INCREF(op->func_qualname); } + + module = PyDict_GetItem(globals, __name__); + if (module) { + Py_INCREF(module); + op->func_module = module; + } + if (qualname) + op->func_qualname = qualname; else - return NULL; + op->func_qualname = op->func_name; + Py_INCREF(op->func_qualname); + _PyObject_GC_TRACK(op); return (PyObject *)op; } -- cgit v1.2.1 From 165cd2bc6d4c8b366abdb7f32a7eb004fd3850b9 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 22 Jul 2013 23:04:55 +0200 Subject: Issue #18520: Fix PyFunction_NewWithQualName() error handling --- Objects/funcobject.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'Objects') diff --git a/Objects/funcobject.c b/Objects/funcobject.c index b5525e3d44..b04393415a 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -12,6 +12,12 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname PyObject *doc, *consts, *module; static PyObject *__name__ = NULL; + if (__name__ == NULL) { + __name__ = PyUnicode_InternFromString("__name__"); + if (__name__ == NULL) + return NULL; + } + op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type); if (op == NULL) return NULL; @@ -26,6 +32,7 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname op->func_defaults = NULL; /* No default arguments */ op->func_kwdefaults = NULL; /* No keyword only defaults */ op->func_closure = NULL; + consts = ((PyCodeObject *)code)->co_consts; if (PyTuple_Size(consts) >= 1) { doc = PyTuple_GetItem(consts, 0); @@ -36,21 +43,13 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname doc = Py_None; Py_INCREF(doc); op->func_doc = doc; + op->func_dict = NULL; op->func_module = NULL; op->func_annotations = NULL; /* __module__: If module name is in globals, use it. - Otherwise, use None. - */ - if (!__name__) { - __name__ = PyUnicode_InternFromString("__name__"); - if (!__name__) { - Py_DECREF(op); - return NULL; - } - } - + Otherwise, use None. */ module = PyDict_GetItem(globals, __name__); if (module) { Py_INCREF(module); -- cgit v1.2.1 From 015fad13eabce6c057cbdb62ae93ea80b4803718 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 22 Jul 2013 23:50:57 +0200 Subject: Issue #18520: Fix _PyDict_GetItemId(), suppress _PyUnicode_FromId() error As PyDict_GetItem(), _PyDict_GetItemId() suppresses all errors that may occur, for historical reasons. --- Objects/dictobject.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 36c710ed5b..e0cb8ac753 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2684,8 +2684,10 @@ _PyDict_GetItemId(PyObject *dp, struct _Py_Identifier *key) { PyObject *kv; kv = _PyUnicode_FromId(key); /* borrowed */ - if (kv == NULL) + if (kv == NULL) { + PyErr_Clear(); return NULL; + } return PyDict_GetItem(dp, kv); } -- cgit v1.2.1 From 3e0aaf3e7d782ca7e08de2ba352be9c80d634dbe Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 30 Jul 2013 19:59:21 +0200 Subject: Issue #18112: PEP 442 implementation (safe object finalization). --- Objects/genobject.c | 103 ++++++++++++++++----------------------------------- Objects/object.c | 70 +++++++++++++++++++++++++++++++++- Objects/typeobject.c | 97 +++++++++++++++++++++++------------------------- 3 files changed, 147 insertions(+), 123 deletions(-) (limited to 'Objects') diff --git a/Objects/genobject.c b/Objects/genobject.c index 016bfa2975..dfd90aa4b1 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -15,6 +15,31 @@ gen_traverse(PyGenObject *gen, visitproc visit, void *arg) return 0; } +static void +gen_finalize(PyObject *self) +{ + PyGenObject *gen = (PyGenObject *)self; + PyObject *res; + PyObject *error_type, *error_value, *error_traceback; + + if (gen->gi_frame == NULL || gen->gi_frame->f_stacktop == NULL) + /* Generator isn't paused, so no need to close */ + return; + + /* Save the current exception, if any. */ + PyErr_Fetch(&error_type, &error_value, &error_traceback); + + res = gen_close(gen, NULL); + + if (res == NULL) + PyErr_WriteUnraisable(self); + else + Py_DECREF(res); + + /* Restore the saved exception. */ + PyErr_Restore(error_type, error_value, error_traceback); +} + static void gen_dealloc(PyGenObject *gen) { @@ -27,12 +52,8 @@ gen_dealloc(PyGenObject *gen) _PyObject_GC_TRACK(self); - if (gen->gi_frame != NULL && gen->gi_frame->f_stacktop != NULL) { - /* Generator is paused, so we need to close */ - Py_TYPE(gen)->tp_del(self); - if (self->ob_refcnt > 0) - return; /* resurrected. :( */ - } + if (PyObject_CallFinalizerFromDealloc(self)) + return; /* resurrected. :( */ _PyObject_GC_UNTRACK(self); Py_CLEAR(gen->gi_frame); @@ -40,7 +61,6 @@ gen_dealloc(PyGenObject *gen) PyObject_GC_Del(gen); } - static PyObject * gen_send_ex(PyGenObject *gen, PyObject *arg, int exc) { @@ -222,68 +242,6 @@ gen_close(PyGenObject *gen, PyObject *args) return NULL; } -static void -gen_del(PyObject *self) -{ - PyObject *res; - PyObject *error_type, *error_value, *error_traceback; - PyGenObject *gen = (PyGenObject *)self; - - if (gen->gi_frame == NULL || gen->gi_frame->f_stacktop == NULL) - /* Generator isn't paused, so no need to close */ - return; - - /* Temporarily resurrect the object. */ - assert(self->ob_refcnt == 0); - self->ob_refcnt = 1; - - /* Save the current exception, if any. */ - PyErr_Fetch(&error_type, &error_value, &error_traceback); - - res = gen_close(gen, NULL); - - if (res == NULL) - PyErr_WriteUnraisable(self); - else - Py_DECREF(res); - - /* Restore the saved exception. */ - PyErr_Restore(error_type, error_value, error_traceback); - - /* Undo the temporary resurrection; can't use DECREF here, it would - * cause a recursive call. - */ - assert(self->ob_refcnt > 0); - if (--self->ob_refcnt == 0) - return; /* this is the normal path out */ - - /* close() resurrected it! Make it look like the original Py_DECREF - * never happened. - */ - { - Py_ssize_t refcnt = self->ob_refcnt; - _Py_NewReference(self); - self->ob_refcnt = refcnt; - } - assert(PyType_IS_GC(Py_TYPE(self)) && - _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED); - - /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so - * we need to undo that. */ - _Py_DEC_REFTOTAL; - /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object - * chain, so no more to do there. - * If COUNT_ALLOCS, the original decref bumped tp_frees, and - * _Py_NewReference bumped tp_allocs: both of those need to be - * undone. - */ -#ifdef COUNT_ALLOCS - --(Py_TYPE(self)->tp_frees); - --(Py_TYPE(self)->tp_allocs); -#endif -} - - PyDoc_STRVAR(throw_doc, "throw(typ[,val[,tb]]) -> raise exception in generator,\n\ @@ -517,7 +475,8 @@ PyTypeObject PyGen_Type = { PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_HAVE_FINALIZE, /* tp_flags */ 0, /* tp_doc */ (traverseproc)gen_traverse, /* tp_traverse */ 0, /* tp_clear */ @@ -544,7 +503,9 @@ PyTypeObject PyGen_Type = { 0, /* tp_cache */ 0, /* tp_subclasses */ 0, /* tp_weaklist */ - gen_del, /* tp_del */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + gen_finalize, /* tp_finalize */ }; PyObject * diff --git a/Objects/object.c b/Objects/object.c index 47d3ebd2ca..c83109d39b 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -255,6 +255,72 @@ _PyObject_NewVar(PyTypeObject *tp, Py_ssize_t nitems) return PyObject_INIT_VAR(op, tp, nitems); } +void +PyObject_CallFinalizer(PyObject *self) +{ + PyTypeObject *tp = Py_TYPE(self); + + /* The former could happen on heaptypes created from the C API, e.g. + PyType_FromSpec(). */ + if (!PyType_HasFeature(tp, Py_TPFLAGS_HAVE_FINALIZE) || + tp->tp_finalize == NULL) + return; + /* tp_finalize should only be called once. */ + if (PyType_IS_GC(tp) && _PyGC_FINALIZED(self)) + return; + + tp->tp_finalize(self); + if (PyType_IS_GC(tp)) + _PyGC_SET_FINALIZED(self, 1); +} + +int +PyObject_CallFinalizerFromDealloc(PyObject *self) +{ + Py_ssize_t refcnt; + + /* Temporarily resurrect the object. */ + if (self->ob_refcnt != 0) { + Py_FatalError("PyObject_CallFinalizerFromDealloc called on " + "object with a non-zero refcount"); + } + self->ob_refcnt = 1; + + PyObject_CallFinalizer(self); + + /* Undo the temporary resurrection; can't use DECREF here, it would + * cause a recursive call. + */ + assert(self->ob_refcnt > 0); + if (--self->ob_refcnt == 0) + return 0; /* this is the normal path out */ + + /* tp_finalize resurrected it! Make it look like the original Py_DECREF + * never happened. + */ + refcnt = self->ob_refcnt; + _Py_NewReference(self); + self->ob_refcnt = refcnt; + + if (PyType_IS_GC(Py_TYPE(self))) { + assert(_PyGC_REFS(self) != _PyGC_REFS_UNTRACKED); + } + /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so + * we need to undo that. */ + _Py_DEC_REFTOTAL; + /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object + * chain, so no more to do there. + * If COUNT_ALLOCS, the original decref bumped tp_frees, and + * _Py_NewReference bumped tp_allocs: both of those need to be + * undone. + */ +#ifdef COUNT_ALLOCS + --Py_TYPE(self)->tp_frees; + --Py_TYPE(self)->tp_allocs; +#endif + return -1; +} + int PyObject_Print(PyObject *op, FILE *fp, int flags) { @@ -1981,7 +2047,7 @@ void _PyTrash_deposit_object(PyObject *op) { assert(PyObject_IS_GC(op)); - assert(_Py_AS_GC(op)->gc.gc_refs == _PyGC_REFS_UNTRACKED); + assert(_PyGC_REFS(op) == _PyGC_REFS_UNTRACKED); assert(op->ob_refcnt == 0); _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyTrash_delete_later; _PyTrash_delete_later = op; @@ -1993,7 +2059,7 @@ _PyTrash_thread_deposit_object(PyObject *op) { PyThreadState *tstate = PyThreadState_GET(); assert(PyObject_IS_GC(op)); - assert(_Py_AS_GC(op)->gc.gc_refs == _PyGC_REFS_UNTRACKED); + assert(_PyGC_REFS(op) == _PyGC_REFS_UNTRACKED); assert(op->ob_refcnt == 0); _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *) tstate->trash_delete_later; tstate->trash_delete_later = op; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b8b5076c1d..3ff42da1dd 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -921,6 +921,7 @@ subtype_dealloc(PyObject *self) PyTypeObject *type, *base; destructor basedealloc; PyThreadState *tstate = PyThreadState_GET(); + int has_finalizer; /* Extract the type; we expect it to be a heap type */ type = Py_TYPE(self); @@ -936,6 +937,10 @@ subtype_dealloc(PyObject *self) clear_slots(), or DECREF the dict, or clear weakrefs. */ /* Maybe call finalizer; exit early if resurrected */ + if (type->tp_finalize) { + if (PyObject_CallFinalizerFromDealloc(self) < 0) + return; + } if (type->tp_del) { type->tp_del(self); if (self->ob_refcnt > 0) @@ -987,25 +992,36 @@ subtype_dealloc(PyObject *self) assert(base); } - /* If we added a weaklist, we clear it. Do this *before* calling - the finalizer (__del__), clearing slots, or clearing the instance - dict. */ + has_finalizer = type->tp_finalize || type->tp_del; + + /* Maybe call finalizer; exit early if resurrected */ + if (has_finalizer) + _PyObject_GC_TRACK(self); + if (type->tp_finalize) { + if (PyObject_CallFinalizerFromDealloc(self) < 0) { + /* Resurrected */ + goto endlabel; + } + } + /* If we added a weaklist, we clear it. Do this *before* calling + tp_del, clearing slots, or clearing the instance dict. */ if (type->tp_weaklistoffset && !base->tp_weaklistoffset) PyObject_ClearWeakRefs(self); - /* Maybe call finalizer; exit early if resurrected */ if (type->tp_del) { - _PyObject_GC_TRACK(self); type->tp_del(self); - if (self->ob_refcnt > 0) - goto endlabel; /* resurrected */ - else - _PyObject_GC_UNTRACK(self); + if (self->ob_refcnt > 0) { + /* Resurrected */ + goto endlabel; + } + } + if (has_finalizer) { + _PyObject_GC_UNTRACK(self); /* New weakrefs could be created during the finalizer call. - If this occurs, clear them out without calling their - finalizers since they might rely on part of the object - being finalized that has already been destroyed. */ + If this occurs, clear them out without calling their + finalizers since they might rely on part of the object + being finalized that has already been destroyed. */ if (type->tp_weaklistoffset && !base->tp_weaklistoffset) { /* Modeled after GET_WEAKREFS_LISTPTR() */ PyWeakReference **list = (PyWeakReference **) \ @@ -2231,7 +2247,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) /* Initialize tp_flags */ type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE | - Py_TPFLAGS_BASETYPE; + Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_FINALIZE; if (base->tp_flags & Py_TPFLAGS_HAVE_GC) type->tp_flags |= Py_TPFLAGS_HAVE_GC; @@ -4111,6 +4127,10 @@ inherit_slots(PyTypeObject *type, PyTypeObject *base) COPYSLOT(tp_init); COPYSLOT(tp_alloc); COPYSLOT(tp_is_gc); + if ((type->tp_flags & Py_TPFLAGS_HAVE_FINALIZE) && + (base->tp_flags & Py_TPFLAGS_HAVE_FINALIZE)) { + COPYSLOT(tp_finalize); + } if ((type->tp_flags & Py_TPFLAGS_HAVE_GC) == (base->tp_flags & Py_TPFLAGS_HAVE_GC)) { /* They agree about gc. */ @@ -4736,6 +4756,18 @@ wrap_call(PyObject *self, PyObject *args, void *wrapped, PyObject *kwds) return (*func)(self, args, kwds); } +static PyObject * +wrap_del(PyObject *self, PyObject *args, void *wrapped) +{ + destructor func = (destructor)wrapped; + + if (!check_num_args(args, 0)) + return NULL; + + (*func)(self); + Py_RETURN_NONE; +} + static PyObject * wrap_richcmpfunc(PyObject *self, PyObject *args, void *wrapped, int op) { @@ -5617,16 +5649,12 @@ slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } static void -slot_tp_del(PyObject *self) +slot_tp_finalize(PyObject *self) { _Py_IDENTIFIER(__del__); PyObject *del, *res; PyObject *error_type, *error_value, *error_traceback; - /* Temporarily resurrect the object. */ - assert(self->ob_refcnt == 0); - self->ob_refcnt = 1; - /* Save the current exception, if any. */ PyErr_Fetch(&error_type, &error_value, &error_traceback); @@ -5643,37 +5671,6 @@ slot_tp_del(PyObject *self) /* Restore the saved exception. */ PyErr_Restore(error_type, error_value, error_traceback); - - /* Undo the temporary resurrection; can't use DECREF here, it would - * cause a recursive call. - */ - assert(self->ob_refcnt > 0); - if (--self->ob_refcnt == 0) - return; /* this is the normal path out */ - - /* __del__ resurrected it! Make it look like the original Py_DECREF - * never happened. - */ - { - Py_ssize_t refcnt = self->ob_refcnt; - _Py_NewReference(self); - self->ob_refcnt = refcnt; - } - assert(!PyType_IS_GC(Py_TYPE(self)) || - _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED); - /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so - * we need to undo that. */ - _Py_DEC_REFTOTAL; - /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object - * chain, so no more to do there. - * If COUNT_ALLOCS, the original decref bumped tp_frees, and - * _Py_NewReference bumped tp_allocs: both of those need to be - * undone. - */ -#ifdef COUNT_ALLOCS - --Py_TYPE(self)->tp_frees; - --Py_TYPE(self)->tp_allocs; -#endif } @@ -5782,7 +5779,7 @@ static slotdef slotdefs[] = { "see help(type(x)) for signature", PyWrapperFlag_KEYWORDS), TPSLOT("__new__", tp_new, slot_tp_new, NULL, ""), - TPSLOT("__del__", tp_del, slot_tp_del, NULL, ""), + TPSLOT("__del__", tp_finalize, slot_tp_finalize, (wrapperfunc)wrap_del, ""), BINSLOT("__add__", nb_add, slot_nb_add, "+"), -- cgit v1.2.1 From c506ce6f93bff13d93144e71284cf0e5acf34814 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 31 Jul 2013 23:14:08 +0200 Subject: Issue #18214: Improve finalization of Python modules to avoid setting their globals to None, in most cases. --- Objects/moduleobject.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'Objects') diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index 5970901558..3ea3be8b82 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -11,6 +11,8 @@ typedef struct { PyObject *md_dict; struct PyModuleDef *md_def; void *md_state; + PyObject *md_weaklist; + PyObject *md_name; /* for logging purposes after md_dict is cleared */ } PyModuleObject; static PyMemberDef module_members[] = { @@ -27,7 +29,8 @@ static PyTypeObject moduledef_type = { static int -module_init_dict(PyObject *md_dict, PyObject *name, PyObject *doc) +module_init_dict(PyModuleObject *mod, PyObject *md_dict, + PyObject *name, PyObject *doc) { if (md_dict == NULL) return -1; @@ -42,6 +45,11 @@ module_init_dict(PyObject *md_dict, PyObject *name, PyObject *doc) return -1; if (PyDict_SetItemString(md_dict, "__loader__", Py_None) != 0) return -1; + if (PyUnicode_CheckExact(name)) { + Py_INCREF(name); + Py_XDECREF(mod->md_name); + mod->md_name = name; + } return 0; } @@ -56,8 +64,10 @@ PyModule_NewObject(PyObject *name) return NULL; m->md_def = NULL; m->md_state = NULL; + m->md_weaklist = NULL; + m->md_name = NULL; m->md_dict = PyDict_New(); - if (module_init_dict(m->md_dict, name, NULL) != 0) + if (module_init_dict(m, m->md_dict, name, NULL) != 0) goto fail; PyObject_GC_Track(m); return (PyObject *)m; @@ -362,7 +372,7 @@ module_init(PyModuleObject *m, PyObject *args, PyObject *kwds) return -1; m->md_dict = dict; } - if (module_init_dict(dict, name, doc) < 0) + if (module_init_dict(m, dict, name, doc) < 0) return -1; return 0; } @@ -371,12 +381,15 @@ static void module_dealloc(PyModuleObject *m) { PyObject_GC_UnTrack(m); + if (Py_VerboseFlag && m->md_name) { + PySys_FormatStderr("# destroy %S\n", m->md_name); + } + if (m->md_weaklist != NULL) + PyObject_ClearWeakRefs((PyObject *) m); if (m->md_def && m->md_def->m_free) m->md_def->m_free(m); - if (m->md_dict != NULL) { - _PyModule_Clear((PyObject *)m); - Py_DECREF(m->md_dict); - } + Py_XDECREF(m->md_dict); + Py_XDECREF(m->md_name); if (m->md_state != NULL) PyMem_FREE(m->md_state); Py_TYPE(m)->tp_free((PyObject *)m); @@ -522,7 +535,7 @@ PyTypeObject PyModule_Type = { (traverseproc)module_traverse, /* tp_traverse */ (inquiry)module_clear, /* tp_clear */ 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ + offsetof(PyModuleObject, md_weaklist), /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ module_methods, /* tp_methods */ -- cgit v1.2.1 From 9f4dab6e85196495c6e8d2100bcfa61112f3aec5 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sat, 3 Aug 2013 12:08:22 +0100 Subject: Minor consistency fixes for some longobject.c exception messages: - replace 'long int' / 'long' by 'int' - fix capitalization of "Python" in PyLong_AsUnsignedLong - "is too large" -> "too large", for consistency with other messages. --- Objects/longobject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/longobject.c b/Objects/longobject.c index ce75888f7e..b47be40627 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -549,7 +549,7 @@ PyLong_AsUnsignedLong(PyObject *vv) x = (x << PyLong_SHIFT) | v->ob_digit[i]; if ((x >> PyLong_SHIFT) != prev) { PyErr_SetString(PyExc_OverflowError, - "python int too large to convert " + "Python int too large to convert " "to C unsigned long"); return (unsigned long) -1; } @@ -1602,7 +1602,7 @@ long_to_decimal_string_internal(PyObject *aa, */ if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) { PyErr_SetString(PyExc_OverflowError, - "long is too large to format"); + "int too large to format"); return -1; } /* the expression size_a * PyLong_SHIFT is now safe from overflow */ @@ -1785,7 +1785,7 @@ long_format_binary(PyObject *aa, int base, int alternate, /* Ensure overflow doesn't occur during computation of sz. */ if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT) { PyErr_SetString(PyExc_OverflowError, - "int is too large to format"); + "int too large to format"); return -1; } size_a_in_bits = (size_a - 1) * PyLong_SHIFT + @@ -2658,7 +2658,7 @@ PyLong_AsDouble(PyObject *v) x = _PyLong_Frexp((PyLongObject *)v, &exponent); if ((x == -1.0 && PyErr_Occurred()) || exponent > DBL_MAX_EXP) { PyErr_SetString(PyExc_OverflowError, - "long int too large to convert to float"); + "int too large to convert to float"); return -1.0; } return ldexp(x, (int)exponent); -- cgit v1.2.1 From 216d79ead3d1811c9b33c13ddeb3e41963753676 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 5 Aug 2013 23:26:40 +0200 Subject: Issue #17934: Add a clear() method to frame objects, to help clean up expensive details (local variables) and break reference cycles. --- Objects/frameobject.c | 28 ++++++++++++++++++++++++++-- Objects/genobject.c | 8 +++++--- 2 files changed, 31 insertions(+), 5 deletions(-) (limited to 'Objects') diff --git a/Objects/frameobject.c b/Objects/frameobject.c index d3b59f1ea6..a62a45e1f6 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -488,7 +488,7 @@ frame_traverse(PyFrameObject *f, visitproc visit, void *arg) } static void -frame_clear(PyFrameObject *f) +frame_tp_clear(PyFrameObject *f) { PyObject **fastlocals, **p, **oldtop; Py_ssize_t i, slots; @@ -500,6 +500,7 @@ frame_clear(PyFrameObject *f) */ oldtop = f->f_stacktop; f->f_stacktop = NULL; + f->f_executing = 0; Py_CLEAR(f->f_exc_type); Py_CLEAR(f->f_exc_value); @@ -519,6 +520,25 @@ frame_clear(PyFrameObject *f) } } +static PyObject * +frame_clear(PyFrameObject *f) +{ + if (f->f_executing) { + PyErr_SetString(PyExc_RuntimeError, + "cannot clear an executing frame"); + return NULL; + } + if (f->f_gen) { + _PyGen_Finalize(f->f_gen); + assert(f->f_gen == NULL); + } + frame_tp_clear(f); + Py_RETURN_NONE; +} + +PyDoc_STRVAR(clear__doc__, +"F.clear(): clear most references held by the frame"); + static PyObject * frame_sizeof(PyFrameObject *f) { @@ -538,6 +558,8 @@ PyDoc_STRVAR(sizeof__doc__, "F.__sizeof__() -> size of F in memory, in bytes"); static PyMethodDef frame_methods[] = { + {"clear", (PyCFunction)frame_clear, METH_NOARGS, + clear__doc__}, {"__sizeof__", (PyCFunction)frame_sizeof, METH_NOARGS, sizeof__doc__}, {NULL, NULL} /* sentinel */ @@ -566,7 +588,7 @@ PyTypeObject PyFrame_Type = { Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ 0, /* tp_doc */ (traverseproc)frame_traverse, /* tp_traverse */ - (inquiry)frame_clear, /* tp_clear */ + (inquiry)frame_tp_clear, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ @@ -708,6 +730,8 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, f->f_lasti = -1; f->f_lineno = code->co_firstlineno; f->f_iblock = 0; + f->f_executing = 0; + f->f_gen = NULL; _PyObject_GC_TRACK(f); return f; diff --git a/Objects/genobject.c b/Objects/genobject.c index dfd90aa4b1..08d30bf4b7 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -15,8 +15,8 @@ gen_traverse(PyGenObject *gen, visitproc visit, void *arg) return 0; } -static void -gen_finalize(PyObject *self) +void +_PyGen_Finalize(PyObject *self) { PyGenObject *gen = (PyGenObject *)self; PyObject *res; @@ -140,6 +140,7 @@ gen_send_ex(PyGenObject *gen, PyObject *arg, int exc) Py_XDECREF(t); Py_XDECREF(v); Py_XDECREF(tb); + gen->gi_frame->f_gen = NULL; gen->gi_frame = NULL; Py_DECREF(f); } @@ -505,7 +506,7 @@ PyTypeObject PyGen_Type = { 0, /* tp_weaklist */ 0, /* tp_del */ 0, /* tp_version_tag */ - gen_finalize, /* tp_finalize */ + _PyGen_Finalize, /* tp_finalize */ }; PyObject * @@ -517,6 +518,7 @@ PyGen_New(PyFrameObject *f) return NULL; } gen->gi_frame = f; + f->f_gen = (PyObject *) gen; Py_INCREF(f->f_code); gen->gi_code = (PyObject *)(f->f_code); gen->gi_running = 0; -- cgit v1.2.1 From 3108fc5302e47237d4ccf18e307a4f88e5d0436c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 5 Aug 2013 22:24:50 -0700 Subject: Replace outdated optimization with clearer code that compiles better. Letting the compiler decide how to optimize the multiply by five gives it the freedom to make better choices for the best technique for a given target machine. For example, GCC on x86_64 produces a little bit better code: Old-way (3 steps with a data dependency between each step): shrq $5, %r13 leaq 1(%rbx,%r13), %rax leaq (%rax,%rbx,4), %rbx New-way (3 steps with no dependency between the first two steps which can be run in parallel): leaq (%rbx,%rbx,4), %rax # i*5 shrq $5, %r13 # perturb >>= PERTURB_SHIFT leaq 1(%r13,%rax), %rbx # 1 + perturb + i*5 --- Objects/setobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index ea5a24c516..0cea2a81c8 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -118,7 +118,7 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash) /* In the loop, key == dummy is by far (factor of 100s) the least likely outcome, so test for that last. */ for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { - i = (i << 2) + i + perturb + 1; + i = i * 5 + perturb + 1; entry = &table[i & mask]; if (entry->key == NULL) { if (freeslot != NULL) @@ -189,7 +189,7 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash) /* In the loop, key == dummy is by far (factor of 100s) the least likely outcome, so test for that last. */ for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { - i = (i << 2) + i + perturb + 1; + i = i * 5 + perturb + 1; entry = &table[i & mask]; if (entry->key == NULL) return freeslot == NULL ? entry : freeslot; @@ -258,7 +258,7 @@ set_insert_clean(register PySetObject *so, PyObject *key, Py_hash_t hash) i = (size_t)hash & mask; entry = &table[i]; for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) { - i = (i << 2) + i + perturb + 1; + i = i * 5 + perturb + 1; entry = &table[i & mask]; } so->fill++; -- cgit v1.2.1 From a12be6ae60b6a559296d637e558408d4bc2092a5 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 13 Aug 2013 20:18:52 +0200 Subject: Issue #18722: Remove uses of the "register" keyword in C code. --- Objects/bytearrayobject.c | 12 +++--- Objects/bytes_methods.c | 30 +++++++-------- Objects/bytesobject.c | 44 +++++++++++----------- Objects/classobject.c | 6 +-- Objects/complexobject.c | 2 +- Objects/dictobject.c | 78 +++++++++++++++++++-------------------- Objects/floatobject.c | 4 +- Objects/listobject.c | 14 +++---- Objects/longobject.c | 24 ++++++------ Objects/namespaceobject.c | 2 +- Objects/object.c | 4 +- Objects/setobject.c | 74 ++++++++++++++++++------------------- Objects/stringlib/codecs.h | 6 +-- Objects/stringlib/eq.h | 4 +- Objects/stringlib/find_max_char.h | 4 +- Objects/stringlib/split.h | 4 +- Objects/tupleobject.c | 48 ++++++++++++------------ Objects/unicodectype.c | 2 +- Objects/unicodeobject.c | 26 ++++++------- Objects/unicodetype_db.h | 4 +- 20 files changed, 196 insertions(+), 196 deletions(-) (limited to 'Objects') diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index aa3892417a..6ea654e4b6 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -862,9 +862,9 @@ bytearray_repr(PyByteArrayObject *self) /* 15 == strlen(quote_prefix) + 2 + strlen(quote_postfix) + 1 */ size_t newsize; PyObject *v; - register Py_ssize_t i; - register char c; - register char *p; + Py_ssize_t i; + char c; + char *p; int quote; char *test, *start; char *buffer; @@ -1431,9 +1431,9 @@ table, which must be a bytes object of length 256."); static PyObject * bytearray_translate(PyByteArrayObject *self, PyObject *args) { - register char *input, *output; - register const char *table; - register Py_ssize_t i, c; + char *input, *output; + const char *table; + Py_ssize_t i, c; PyObject *input_obj = (PyObject*)self; const char *output_start; Py_ssize_t inlen; diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c index ef3c2f729d..4e8107b491 100644 --- a/Objects/bytes_methods.c +++ b/Objects/bytes_methods.c @@ -10,9 +10,9 @@ and there is at least one character in B, False otherwise."); PyObject* _Py_bytes_isspace(const char *cptr, Py_ssize_t len) { - register const unsigned char *p + const unsigned char *p = (unsigned char *) cptr; - register const unsigned char *e; + const unsigned char *e; /* Shortcut for single character strings */ if (len == 1 && Py_ISSPACE(*p)) @@ -40,9 +40,9 @@ and there is at least one character in B, False otherwise."); PyObject* _Py_bytes_isalpha(const char *cptr, Py_ssize_t len) { - register const unsigned char *p + const unsigned char *p = (unsigned char *) cptr; - register const unsigned char *e; + const unsigned char *e; /* Shortcut for single character strings */ if (len == 1 && Py_ISALPHA(*p)) @@ -70,9 +70,9 @@ and there is at least one character in B, False otherwise."); PyObject* _Py_bytes_isalnum(const char *cptr, Py_ssize_t len) { - register const unsigned char *p + const unsigned char *p = (unsigned char *) cptr; - register const unsigned char *e; + const unsigned char *e; /* Shortcut for single character strings */ if (len == 1 && Py_ISALNUM(*p)) @@ -100,9 +100,9 @@ and there is at least one character in B, False otherwise."); PyObject* _Py_bytes_isdigit(const char *cptr, Py_ssize_t len) { - register const unsigned char *p + const unsigned char *p = (unsigned char *) cptr; - register const unsigned char *e; + const unsigned char *e; /* Shortcut for single character strings */ if (len == 1 && Py_ISDIGIT(*p)) @@ -130,9 +130,9 @@ at least one cased character in B, False otherwise."); PyObject* _Py_bytes_islower(const char *cptr, Py_ssize_t len) { - register const unsigned char *p + const unsigned char *p = (unsigned char *) cptr; - register const unsigned char *e; + const unsigned char *e; int cased; /* Shortcut for single character strings */ @@ -164,9 +164,9 @@ at least one cased character in B, False otherwise."); PyObject* _Py_bytes_isupper(const char *cptr, Py_ssize_t len) { - register const unsigned char *p + const unsigned char *p = (unsigned char *) cptr; - register const unsigned char *e; + const unsigned char *e; int cased; /* Shortcut for single character strings */ @@ -200,9 +200,9 @@ otherwise."); PyObject* _Py_bytes_istitle(const char *cptr, Py_ssize_t len) { - register const unsigned char *p + const unsigned char *p = (unsigned char *) cptr; - register const unsigned char *e; + const unsigned char *e; int cased, previous_is_cased; /* Shortcut for single character strings */ @@ -217,7 +217,7 @@ _Py_bytes_istitle(const char *cptr, Py_ssize_t len) cased = 0; previous_is_cased = 0; for (; p < e; p++) { - register const unsigned char ch = *p; + const unsigned char ch = *p; if (Py_ISUPPER(ch)) { if (previous_is_cased) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 056ac3689f..3a2906cf3c 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -74,7 +74,7 @@ static PyBytesObject *nullstring; PyObject * PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) { - register PyBytesObject *op; + PyBytesObject *op; if (size < 0) { PyErr_SetString(PyExc_SystemError, "Negative size passed to PyBytes_FromStringAndSize"); @@ -126,8 +126,8 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) PyObject * PyBytes_FromString(const char *str) { - register size_t size; - register PyBytesObject *op; + size_t size; + PyBytesObject *op; assert(str != NULL); size = strlen(str); @@ -513,7 +513,7 @@ PyObject *PyBytes_DecodeEscape(const char *s, /* object api */ Py_ssize_t -PyBytes_Size(register PyObject *op) +PyBytes_Size(PyObject *op) { if (!PyBytes_Check(op)) { PyErr_Format(PyExc_TypeError, @@ -524,7 +524,7 @@ PyBytes_Size(register PyObject *op) } char * -PyBytes_AsString(register PyObject *op) +PyBytes_AsString(PyObject *op) { if (!PyBytes_Check(op)) { PyErr_Format(PyExc_TypeError, @@ -535,9 +535,9 @@ PyBytes_AsString(register PyObject *op) } int -PyBytes_AsStringAndSize(register PyObject *obj, - register char **s, - register Py_ssize_t *len) +PyBytes_AsStringAndSize(PyObject *obj, + char **s, + Py_ssize_t *len) { if (s == NULL) { PyErr_BadInternalCall(); @@ -579,7 +579,7 @@ PyBytes_AsStringAndSize(register PyObject *obj, PyObject * PyBytes_Repr(PyObject *obj, int smartquotes) { - register PyBytesObject* op = (PyBytesObject*) obj; + PyBytesObject* op = (PyBytesObject*) obj; Py_ssize_t i, length = Py_SIZE(op); size_t newsize, squotes, dquotes; PyObject *v; @@ -718,12 +718,12 @@ bytes_concat(PyObject *a, PyObject *b) } static PyObject * -bytes_repeat(register PyBytesObject *a, register Py_ssize_t n) +bytes_repeat(PyBytesObject *a, Py_ssize_t n) { - register Py_ssize_t i; - register Py_ssize_t j; - register Py_ssize_t size; - register PyBytesObject *op; + Py_ssize_t i; + Py_ssize_t j; + Py_ssize_t size; + PyBytesObject *op; size_t nbytes; if (n < 0) n = 0; @@ -793,7 +793,7 @@ bytes_contains(PyObject *self, PyObject *arg) } static PyObject * -bytes_item(PyBytesObject *a, register Py_ssize_t i) +bytes_item(PyBytesObject *a, Py_ssize_t i) { if (i < 0 || i >= Py_SIZE(a)) { PyErr_SetString(PyExc_IndexError, "index out of range"); @@ -1461,9 +1461,9 @@ table, which must be a bytes object of length 256."); static PyObject * bytes_translate(PyBytesObject *self, PyObject *args) { - register char *input, *output; + char *input, *output; const char *table; - register Py_ssize_t i, c, changed = 0; + Py_ssize_t i, c, changed = 0; PyObject *input_obj = (PyObject*)self; const char *output_start, *del_table=NULL; Py_ssize_t inlen, tablen, dellen = 0; @@ -2748,9 +2748,9 @@ PyTypeObject PyBytes_Type = { }; void -PyBytes_Concat(register PyObject **pv, register PyObject *w) +PyBytes_Concat(PyObject **pv, PyObject *w) { - register PyObject *v; + PyObject *v; assert(pv != NULL); if (*pv == NULL) return; @@ -2764,7 +2764,7 @@ PyBytes_Concat(register PyObject **pv, register PyObject *w) } void -PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w) +PyBytes_ConcatAndDel(PyObject **pv, PyObject *w) { PyBytes_Concat(pv, w); Py_XDECREF(w); @@ -2788,8 +2788,8 @@ PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w) int _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) { - register PyObject *v; - register PyBytesObject *sv; + PyObject *v; + PyBytesObject *sv; v = *pv; if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) { *pv = 0; diff --git a/Objects/classobject.c b/Objects/classobject.c index cdc9b1c647..27f7ef4522 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -44,7 +44,7 @@ PyMethod_Self(PyObject *im) PyObject * PyMethod_New(PyObject *func, PyObject *self) { - register PyMethodObject *im; + PyMethodObject *im; if (self == NULL) { PyErr_BadInternalCall(); return NULL; @@ -164,7 +164,7 @@ method_new(PyTypeObject* type, PyObject* args, PyObject *kw) } static void -method_dealloc(register PyMethodObject *im) +method_dealloc(PyMethodObject *im) { _PyObject_GC_UNTRACK(im); if (im->im_weakreflist != NULL) @@ -509,7 +509,7 @@ instancemethod_call(PyObject *self, PyObject *arg, PyObject *kw) static PyObject * instancemethod_descr_get(PyObject *descr, PyObject *obj, PyObject *type) { - register PyObject *func = PyInstanceMethod_GET_FUNCTION(descr); + PyObject *func = PyInstanceMethod_GET_FUNCTION(descr); if (obj == NULL) { Py_INCREF(func); return func; diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 54838ccdbd..5747450adb 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -211,7 +211,7 @@ complex_subtype_from_c_complex(PyTypeObject *type, Py_complex cval) PyObject * PyComplex_FromCComplex(Py_complex cval) { - register PyComplexObject *op; + PyComplexObject *op; /* Inline PyObject_New */ op = (PyComplexObject *) PyObject_MALLOC(sizeof(PyComplexObject)); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index e0cb8ac753..b5cbfb1f25 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -467,13 +467,13 @@ static PyDictKeyEntry * lookdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr) { - register size_t i; - register size_t perturb; - register PyDictKeyEntry *freeslot; - register size_t mask; + size_t i; + size_t perturb; + PyDictKeyEntry *freeslot; + size_t mask; PyDictKeyEntry *ep0; - register PyDictKeyEntry *ep; - register int cmp; + PyDictKeyEntry *ep; + int cmp; PyObject *startkey; top: @@ -559,12 +559,12 @@ static PyDictKeyEntry * lookdict_unicode(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr) { - register size_t i; - register size_t perturb; - register PyDictKeyEntry *freeslot; - register size_t mask = DK_MASK(mp->ma_keys); + size_t i; + size_t perturb; + PyDictKeyEntry *freeslot; + size_t mask = DK_MASK(mp->ma_keys); PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0]; - register PyDictKeyEntry *ep; + PyDictKeyEntry *ep; /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass @@ -624,11 +624,11 @@ static PyDictKeyEntry * lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr) { - register size_t i; - register size_t perturb; - register size_t mask = DK_MASK(mp->ma_keys); + size_t i; + size_t perturb; + size_t mask = DK_MASK(mp->ma_keys); PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0]; - register PyDictKeyEntry *ep; + PyDictKeyEntry *ep; /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass @@ -669,11 +669,11 @@ static PyDictKeyEntry * lookdict_split(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr) { - register size_t i; - register size_t perturb; - register size_t mask = DK_MASK(mp->ma_keys); + size_t i; + size_t perturb; + size_t mask = DK_MASK(mp->ma_keys); PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0]; - register PyDictKeyEntry *ep; + PyDictKeyEntry *ep; if (!PyUnicode_CheckExact(key)) { ep = lookdict(mp, key, hash, value_addr); @@ -1498,7 +1498,7 @@ dict_length(PyDictObject *mp) } static PyObject * -dict_subscript(PyDictObject *mp, register PyObject *key) +dict_subscript(PyDictObject *mp, PyObject *key) { PyObject *v; Py_hash_t hash; @@ -1554,10 +1554,10 @@ static PyMappingMethods dict_as_mapping = { }; static PyObject * -dict_keys(register PyDictObject *mp) +dict_keys(PyDictObject *mp) { - register PyObject *v; - register Py_ssize_t i, j; + PyObject *v; + Py_ssize_t i, j; PyDictKeyEntry *ep; Py_ssize_t size, n, offset; PyObject **value_ptr; @@ -1598,10 +1598,10 @@ dict_keys(register PyDictObject *mp) } static PyObject * -dict_values(register PyDictObject *mp) +dict_values(PyDictObject *mp) { - register PyObject *v; - register Py_ssize_t i, j; + PyObject *v; + Py_ssize_t i, j; Py_ssize_t size, n, offset; PyObject **value_ptr; @@ -1640,10 +1640,10 @@ dict_values(register PyDictObject *mp) } static PyObject * -dict_items(register PyDictObject *mp) +dict_items(PyDictObject *mp) { - register PyObject *v; - register Py_ssize_t i, j, n; + PyObject *v; + Py_ssize_t i, j, n; Py_ssize_t size, offset; PyObject *item, *key; PyDictKeyEntry *ep; @@ -1915,8 +1915,8 @@ PyDict_Update(PyObject *a, PyObject *b) int PyDict_Merge(PyObject *a, PyObject *b, int override) { - register PyDictObject *mp, *other; - register Py_ssize_t i, n; + PyDictObject *mp, *other; + Py_ssize_t i, n; PyDictKeyEntry *entry; /* We accept for the argument either a concrete dictionary object, @@ -2013,7 +2013,7 @@ PyDict_Merge(PyObject *a, PyObject *b, int override) } static PyObject * -dict_copy(register PyDictObject *mp) +dict_copy(PyDictObject *mp) { return PyDict_Copy((PyObject*)mp); } @@ -2175,7 +2175,7 @@ dict_richcompare(PyObject *v, PyObject *w, int op) } static PyObject * -dict_contains(register PyDictObject *mp, PyObject *key) +dict_contains(PyDictObject *mp, PyObject *key) { Py_hash_t hash; PyDictKeyEntry *ep; @@ -2194,7 +2194,7 @@ dict_contains(register PyDictObject *mp, PyObject *key) } static PyObject * -dict_get(register PyDictObject *mp, PyObject *args) +dict_get(PyDictObject *mp, PyObject *args) { PyObject *key; PyObject *failobj = Py_None; @@ -2280,7 +2280,7 @@ dict_setdefault(PyDictObject *mp, PyObject *args) } static PyObject * -dict_clear(register PyDictObject *mp) +dict_clear(PyDictObject *mp) { PyDict_Clear((PyObject *)mp); Py_RETURN_NONE; @@ -2824,8 +2824,8 @@ static PyMethodDef dictiter_methods[] = { static PyObject *dictiter_iternextkey(dictiterobject *di) { PyObject *key; - register Py_ssize_t i, mask, offset; - register PyDictKeysObject *k; + Py_ssize_t i, mask, offset; + PyDictKeysObject *k; PyDictObject *d = di->di_dict; PyObject **value_ptr; @@ -2907,7 +2907,7 @@ PyTypeObject PyDictIterKey_Type = { static PyObject *dictiter_iternextvalue(dictiterobject *di) { PyObject *value; - register Py_ssize_t i, mask, offset; + Py_ssize_t i, mask, offset; PyDictObject *d = di->di_dict; PyObject **value_ptr; @@ -2988,7 +2988,7 @@ PyTypeObject PyDictIterValue_Type = { static PyObject *dictiter_iternextitem(dictiterobject *di) { PyObject *key, *value, *result = di->di_result; - register Py_ssize_t i, mask, offset; + Py_ssize_t i, mask, offset; PyDictObject *d = di->di_dict; PyObject **value_ptr; diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 7ee2034f89..fea83fa864 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -109,7 +109,7 @@ PyFloat_GetInfo(void) PyObject * PyFloat_FromDouble(double fval) { - register PyFloatObject *op = free_list; + PyFloatObject *op = free_list; if (op != NULL) { free_list = (PyFloatObject *) Py_TYPE(op); numfree--; @@ -241,7 +241,7 @@ PyFloat_AsDouble(PyObject *op) static int convert_to_double(PyObject **v, double *dbl) { - register PyObject *obj = *v; + PyObject *obj = *v; if (PyLong_Check(obj)) { *dbl = PyLong_AsDouble(obj); diff --git a/Objects/listobject.c b/Objects/listobject.c index 2f203b343f..c08c1f66ec 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -213,11 +213,11 @@ PyList_GetItem(PyObject *op, Py_ssize_t i) } int -PyList_SetItem(register PyObject *op, register Py_ssize_t i, - register PyObject *newitem) +PyList_SetItem(PyObject *op, Py_ssize_t i, + PyObject *newitem) { - register PyObject *olditem; - register PyObject **p; + PyObject *olditem; + PyObject **p; if (!PyList_Check(op)) { Py_XDECREF(newitem); PyErr_BadInternalCall(); @@ -1058,9 +1058,9 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) static int binarysort(sortslice lo, PyObject **hi, PyObject **start) { - register Py_ssize_t k; - register PyObject **l, **p, **r; - register PyObject *pivot; + Py_ssize_t k; + PyObject **l, **p, **r; + PyObject *pivot; assert(lo.keys <= start && start <= hi); /* assert [lo, start) is sorted */ diff --git a/Objects/longobject.c b/Objects/longobject.c index e0d641a016..978473410f 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -110,7 +110,7 @@ _PyLong_Negate(PyLongObject **x_p) of the algorithms used, this could save at most be one word anyway. */ static PyLongObject * -long_normalize(register PyLongObject *v) +long_normalize(PyLongObject *v) { Py_ssize_t j = ABS(Py_SIZE(v)); Py_ssize_t i = j; @@ -340,7 +340,7 @@ long PyLong_AsLongAndOverflow(PyObject *vv, int *overflow) { /* This version by Tim Peters */ - register PyLongObject *v; + PyLongObject *v; unsigned long x, prev; long res; Py_ssize_t i; @@ -463,7 +463,7 @@ _PyLong_AsInt(PyObject *obj) Py_ssize_t PyLong_AsSsize_t(PyObject *vv) { - register PyLongObject *v; + PyLongObject *v; size_t x, prev; Py_ssize_t i; int sign; @@ -519,7 +519,7 @@ PyLong_AsSsize_t(PyObject *vv) { unsigned long PyLong_AsUnsignedLong(PyObject *vv) { - register PyLongObject *v; + PyLongObject *v; unsigned long x, prev; Py_ssize_t i; @@ -563,7 +563,7 @@ PyLong_AsUnsignedLong(PyObject *vv) size_t PyLong_AsSize_t(PyObject *vv) { - register PyLongObject *v; + PyLongObject *v; size_t x, prev; Py_ssize_t i; @@ -606,7 +606,7 @@ PyLong_AsSize_t(PyObject *vv) static unsigned long _PyLong_AsUnsignedLongMask(PyObject *vv) { - register PyLongObject *v; + PyLongObject *v; unsigned long x; Py_ssize_t i; int sign; @@ -634,7 +634,7 @@ _PyLong_AsUnsignedLongMask(PyObject *vv) } unsigned long -PyLong_AsUnsignedLongMask(register PyObject *op) +PyLong_AsUnsignedLongMask(PyObject *op) { PyNumberMethods *nb; PyLongObject *lo; @@ -1250,7 +1250,7 @@ PyLong_AsUnsignedLongLong(PyObject *vv) static unsigned PY_LONG_LONG _PyLong_AsUnsignedLongLongMask(PyObject *vv) { - register PyLongObject *v; + PyLongObject *v; unsigned PY_LONG_LONG x; Py_ssize_t i; int sign; @@ -1278,7 +1278,7 @@ _PyLong_AsUnsignedLongLongMask(PyObject *vv) } unsigned PY_LONG_LONG -PyLong_AsUnsignedLongLongMask(register PyObject *op) +PyLong_AsUnsignedLongLongMask(PyObject *op) { PyNumberMethods *nb; PyLongObject *lo; @@ -1326,7 +1326,7 @@ PY_LONG_LONG PyLong_AsLongLongAndOverflow(PyObject *vv, int *overflow) { /* This version by Tim Peters */ - register PyLongObject *v; + PyLongObject *v; unsigned PY_LONG_LONG x, prev; PY_LONG_LONG res; Py_ssize_t i; @@ -1744,7 +1744,7 @@ static int long_format_binary(PyObject *aa, int base, int alternate, PyObject **p_output, _PyUnicodeWriter *writer) { - register PyLongObject *a = (PyLongObject *)aa; + PyLongObject *a = (PyLongObject *)aa; PyObject *v; Py_ssize_t sz; Py_ssize_t size_a; @@ -2141,7 +2141,7 @@ that triggers it(!). Instead the code was tested by artificially allocating just 1 digit at the start, so that the copying code was exercised for every digit beyond the first. ***/ - register twodigits c; /* current input character */ + twodigits c; /* current input character */ Py_ssize_t size_z; int i; int convwidth; diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c index 8c51b07fc1..9e950946aa 100644 --- a/Objects/namespaceobject.c +++ b/Objects/namespaceobject.c @@ -176,7 +176,7 @@ namespace_richcompare(PyObject *self, PyObject *other, int op) PyDoc_STRVAR(namespace_reduce__doc__, "Return state information for pickling"); static PyObject * -namespace_reduce(register _PyNamespaceObject *ns) +namespace_reduce(_PyNamespaceObject *ns) { PyObject *result, *args = PyTuple_New(0); diff --git a/Objects/object.c b/Objects/object.c index c83109d39b..8d4fe4248e 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1834,10 +1834,10 @@ _Py_NewReference(PyObject *op) } void -_Py_ForgetReference(register PyObject *op) +_Py_ForgetReference(PyObject *op) { #ifdef SLOW_UNREF_CHECK - register PyObject *p; + PyObject *p; #endif if (op->ob_refcnt < 0) Py_FatalError("UNREF negative refcnt"); diff --git a/Objects/setobject.c b/Objects/setobject.c index 0cea2a81c8..eff91c566d 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -75,15 +75,15 @@ NULL if the rich comparison returns an error. */ static setentry * -set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash) +set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) { - register size_t i; /* Unsigned for defined overflow behavior. */ - register size_t perturb; - register setentry *freeslot; - register size_t mask = so->mask; + size_t i; /* Unsigned for defined overflow behavior. */ + size_t perturb; + setentry *freeslot; + size_t mask = so->mask; setentry *table = so->table; - register setentry *entry; - register int cmp; + setentry *entry; + int cmp; PyObject *startkey; i = (size_t)hash & mask; @@ -157,14 +157,14 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash) * see if the comparison altered the table. */ static setentry * -set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash) +set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) { - register size_t i; /* Unsigned for defined overflow behavior. */ - register size_t perturb; - register setentry *freeslot; - register size_t mask = so->mask; + size_t i; /* Unsigned for defined overflow behavior. */ + size_t perturb; + setentry *freeslot; + size_t mask = so->mask; setentry *table = so->table; - register setentry *entry; + setentry *entry; /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass @@ -211,9 +211,9 @@ Used by the public insert routine. Eats a reference to key. */ static int -set_insert_key(register PySetObject *so, PyObject *key, Py_hash_t hash) +set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) { - register setentry *entry; + setentry *entry; assert(so->lookup != NULL); entry = so->lookup(so, key, hash); @@ -247,13 +247,13 @@ Note that no refcounts are changed by this routine; if needed, the caller is responsible for incref'ing `key`. */ static void -set_insert_clean(register PySetObject *so, PyObject *key, Py_hash_t hash) +set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) { - register size_t i; - register size_t perturb; - register size_t mask = (size_t)so->mask; + size_t i; + size_t perturb; + size_t mask = (size_t)so->mask; setentry *table = so->table; - register setentry *entry; + setentry *entry; i = (size_t)hash & mask; entry = &table[i]; @@ -360,9 +360,9 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) /* CAUTION: set_add_key/entry() must guarantee it won't resize the table */ static int -set_add_entry(register PySetObject *so, setentry *entry) +set_add_entry(PySetObject *so, setentry *entry) { - register Py_ssize_t n_used; + Py_ssize_t n_used; PyObject *key = entry->key; Py_hash_t hash = entry->hash; @@ -379,10 +379,10 @@ set_add_entry(register PySetObject *so, setentry *entry) } static int -set_add_key(register PySetObject *so, PyObject *key) +set_add_key(PySetObject *so, PyObject *key) { - register Py_hash_t hash; - register Py_ssize_t n_used; + Py_hash_t hash; + Py_ssize_t n_used; if (!PyUnicode_CheckExact(key) || (hash = ((PyASCIIObject *) key)->hash) == -1) { @@ -407,7 +407,7 @@ set_add_key(register PySetObject *so, PyObject *key) static int set_discard_entry(PySetObject *so, setentry *oldentry) -{ register setentry *entry; +{ setentry *entry; PyObject *old_key; entry = (so->lookup)(so, oldentry->key, oldentry->hash); @@ -426,8 +426,8 @@ set_discard_entry(PySetObject *so, setentry *oldentry) static int set_discard_key(PySetObject *so, PyObject *key) { - register Py_hash_t hash; - register setentry *entry; + Py_hash_t hash; + setentry *entry; PyObject *old_key; assert (PyAnySet_Check(so)); @@ -533,7 +533,7 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) { Py_ssize_t i; Py_ssize_t mask; - register setentry *table; + setentry *table; assert (PyAnySet_Check(so)); i = *pos_ptr; @@ -553,7 +553,7 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) static void set_dealloc(PySetObject *so) { - register setentry *entry; + setentry *entry; Py_ssize_t fill = so->fill; PyObject_GC_UnTrack(so); Py_TRASHCAN_SAFE_BEGIN(so) @@ -632,8 +632,8 @@ set_merge(PySetObject *so, PyObject *otherset) PySetObject *other; PyObject *key; Py_hash_t hash; - register Py_ssize_t i; - register setentry *entry; + Py_ssize_t i; + setentry *entry; assert (PyAnySet_Check(so)); assert (PyAnySet_Check(otherset)); @@ -701,8 +701,8 @@ set_contains_entry(PySetObject *so, setentry *entry) static PyObject * set_pop(PySetObject *so) { - register Py_ssize_t i = 0; - register setentry *entry; + Py_ssize_t i = 0; + setentry *entry; PyObject *key; assert (PyAnySet_Check(so)); @@ -869,8 +869,8 @@ static PyMethodDef setiter_methods[] = { static PyObject *setiter_iternext(setiterobject *si) { PyObject *key; - register Py_ssize_t i, mask; - register setentry *entry; + Py_ssize_t i, mask; + setentry *entry; PySetObject *so = si->si_set; if (so == NULL) @@ -1024,7 +1024,7 @@ PyDoc_STRVAR(update_doc, static PyObject * make_new_set(PyTypeObject *type, PyObject *iterable) { - register PySetObject *so = NULL; + PySetObject *so = NULL; if (dummy == NULL) { /* Auto-initialize dummy */ dummy = PyUnicode_FromString(""); diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index f855003308..57319c6572 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -38,8 +38,8 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, */ if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) { /* Help register allocation */ - register const char *_s = s; - register STRINGLIB_CHAR *_p = p; + const char *_s = s; + STRINGLIB_CHAR *_p = p; while (_s < aligned_end) { /* Read a whole long at a time (either 4 or 8 bytes), and do a fast unrolled copy if it only contains ASCII @@ -499,7 +499,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e, reads are more expensive, better to defer to another iteration. */ if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) { /* Fast path for runs of in-range non-surrogate chars. */ - register const unsigned char *_q = q; + const unsigned char *_q = q; while (_q < aligned_end) { unsigned long block = * (unsigned long *) _q; if (native_ordering) { diff --git a/Objects/stringlib/eq.h b/Objects/stringlib/eq.h index 8e79a43f72..4ad6dc0ed9 100644 --- a/Objects/stringlib/eq.h +++ b/Objects/stringlib/eq.h @@ -6,8 +6,8 @@ Py_LOCAL_INLINE(int) unicode_eq(PyObject *aa, PyObject *bb) { - register PyUnicodeObject *a = (PyUnicodeObject *)aa; - register PyUnicodeObject *b = (PyUnicodeObject *)bb; + PyUnicodeObject *a = (PyUnicodeObject *)aa; + PyUnicodeObject *b = (PyUnicodeObject *)bb; if (PyUnicode_READY(a) == -1 || PyUnicode_READY(b) == -1) { assert(0 && "unicode_eq ready fail"); diff --git a/Objects/stringlib/find_max_char.h b/Objects/stringlib/find_max_char.h index 06559c8a9f..eb3fe886e2 100644 --- a/Objects/stringlib/find_max_char.h +++ b/Objects/stringlib/find_max_char.h @@ -24,7 +24,7 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) while (p < end) { if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { /* Help register allocation */ - register const unsigned char *_p = p; + const unsigned char *_p = p; while (_p < aligned_end) { unsigned long value = *(unsigned long *) _p; if (value & UCS1_ASCII_CHAR_MASK) @@ -66,7 +66,7 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) #else #error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4) #endif - register Py_UCS4 mask; + Py_UCS4 mask; Py_ssize_t n = end - begin; const STRINGLIB_CHAR *p = begin; const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4); diff --git a/Objects/stringlib/split.h b/Objects/stringlib/split.h index 947dd28e6c..31f77a7724 100644 --- a/Objects/stringlib/split.h +++ b/Objects/stringlib/split.h @@ -345,8 +345,8 @@ STRINGLIB(splitlines)(PyObject* str_obj, and the appends only done when the prealloc buffer is full. That's too much work for little gain.*/ - register Py_ssize_t i; - register Py_ssize_t j; + Py_ssize_t i; + Py_ssize_t j; PyObject *list = PyList_New(0); PyObject *sub; diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 52896b7919..a33d8c06ee 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -63,9 +63,9 @@ _PyTuple_DebugMallocStats(FILE *out) } PyObject * -PyTuple_New(register Py_ssize_t size) +PyTuple_New(Py_ssize_t size) { - register PyTupleObject *op; + PyTupleObject *op; Py_ssize_t i; if (size < 0) { PyErr_BadInternalCall(); @@ -122,7 +122,7 @@ PyTuple_New(register Py_ssize_t size) } Py_ssize_t -PyTuple_Size(register PyObject *op) +PyTuple_Size(PyObject *op) { if (!PyTuple_Check(op)) { PyErr_BadInternalCall(); @@ -133,7 +133,7 @@ PyTuple_Size(register PyObject *op) } PyObject * -PyTuple_GetItem(register PyObject *op, register Py_ssize_t i) +PyTuple_GetItem(PyObject *op, Py_ssize_t i) { if (!PyTuple_Check(op)) { PyErr_BadInternalCall(); @@ -147,10 +147,10 @@ PyTuple_GetItem(register PyObject *op, register Py_ssize_t i) } int -PyTuple_SetItem(register PyObject *op, register Py_ssize_t i, PyObject *newitem) +PyTuple_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem) { - register PyObject *olditem; - register PyObject **p; + PyObject *olditem; + PyObject **p; if (!PyTuple_Check(op) || op->ob_refcnt != 1) { Py_XDECREF(newitem); PyErr_BadInternalCall(); @@ -224,10 +224,10 @@ PyTuple_Pack(Py_ssize_t n, ...) /* Methods */ static void -tupledealloc(register PyTupleObject *op) +tupledealloc(PyTupleObject *op) { - register Py_ssize_t i; - register Py_ssize_t len = Py_SIZE(op); + Py_ssize_t i; + Py_ssize_t len = Py_SIZE(op); PyObject_GC_UnTrack(op); Py_TRASHCAN_SAFE_BEGIN(op) if (len > 0) { @@ -330,10 +330,10 @@ error: static Py_hash_t tuplehash(PyTupleObject *v) { - register Py_uhash_t x; /* Unsigned for defined overflow behavior. */ - register Py_hash_t y; - register Py_ssize_t len = Py_SIZE(v); - register PyObject **p; + Py_uhash_t x; /* Unsigned for defined overflow behavior. */ + Py_hash_t y; + Py_ssize_t len = Py_SIZE(v); + PyObject **p; Py_uhash_t mult = _PyHASH_MULTIPLIER; x = 0x345678UL; p = v->ob_item; @@ -370,7 +370,7 @@ tuplecontains(PyTupleObject *a, PyObject *el) } static PyObject * -tupleitem(register PyTupleObject *a, register Py_ssize_t i) +tupleitem(PyTupleObject *a, Py_ssize_t i) { if (i < 0 || i >= Py_SIZE(a)) { PyErr_SetString(PyExc_IndexError, "tuple index out of range"); @@ -381,12 +381,12 @@ tupleitem(register PyTupleObject *a, register Py_ssize_t i) } static PyObject * -tupleslice(register PyTupleObject *a, register Py_ssize_t ilow, - register Py_ssize_t ihigh) +tupleslice(PyTupleObject *a, Py_ssize_t ilow, + Py_ssize_t ihigh) { - register PyTupleObject *np; + PyTupleObject *np; PyObject **src, **dest; - register Py_ssize_t i; + Py_ssize_t i; Py_ssize_t len; if (ilow < 0) ilow = 0; @@ -423,10 +423,10 @@ PyTuple_GetSlice(PyObject *op, Py_ssize_t i, Py_ssize_t j) } static PyObject * -tupleconcat(register PyTupleObject *a, register PyObject *bb) +tupleconcat(PyTupleObject *a, PyObject *bb) { - register Py_ssize_t size; - register Py_ssize_t i; + Py_ssize_t size; + Py_ssize_t i; PyObject **src, **dest; PyTupleObject *np; if (!PyTuple_Check(bb)) { @@ -836,8 +836,8 @@ PyTypeObject PyTuple_Type = { int _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) { - register PyTupleObject *v; - register PyTupleObject *sv; + PyTupleObject *v; + PyTupleObject *sv; Py_ssize_t i; Py_ssize_t oldsize; diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c index a572c12bcc..ea540d605d 100644 --- a/Objects/unicodectype.c +++ b/Objects/unicodectype.c @@ -61,7 +61,7 @@ gettyperecord(Py_UCS4 code) /* Returns the titlecase Unicode characters corresponding to ch or just ch if no titlecase mapping is known. */ -Py_UCS4 _PyUnicode_ToTitlecase(register Py_UCS4 ch) +Py_UCS4 _PyUnicode_ToTitlecase(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2f437f6ad7..f1d687ae9d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -873,7 +873,7 @@ resize_copy(PyObject *unicode, Py_ssize_t length) static PyUnicodeObject * _PyUnicode_New(Py_ssize_t length) { - register PyUnicodeObject *unicode; + PyUnicodeObject *unicode; size_t new_size; /* Optimization for empty strings */ @@ -1557,7 +1557,7 @@ _PyUnicode_Ready(PyObject *unicode) } static void -unicode_dealloc(register PyObject *unicode) +unicode_dealloc(PyObject *unicode) { switch (PyUnicode_CHECK_INTERNED(unicode)) { case SSTATE_NOT_INTERNED: @@ -2287,7 +2287,7 @@ PyUnicode_AsUCS4Copy(PyObject *string) #ifdef HAVE_WCHAR_H PyObject * -PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size) +PyUnicode_FromWideChar(const wchar_t *w, Py_ssize_t size) { if (w == NULL) { if (size == 0) @@ -2898,7 +2898,7 @@ PyUnicode_FromOrdinal(int ordinal) } PyObject * -PyUnicode_FromObject(register PyObject *obj) +PyUnicode_FromObject(PyObject *obj) { /* XXX Perhaps we should make this API an alias of PyObject_Str() instead ?! */ @@ -2920,7 +2920,7 @@ PyUnicode_FromObject(register PyObject *obj) } PyObject * -PyUnicode_FromEncodedObject(register PyObject *obj, +PyUnicode_FromEncodedObject(PyObject *obj, const char *encoding, const char *errors) { @@ -4653,9 +4653,9 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest) if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { /* Fast path, see in STRINGLIB(utf8_decode) for an explanation. */ - /* Help register allocation */ - register const char *_p = p; - register Py_UCS1 * q = dest; + /* Help allocation */ + const char *_p = p; + Py_UCS1 * q = dest; while (_p < aligned_end) { unsigned long value = *(const unsigned long *) _p; if (value & ASCII_CHAR_MASK) @@ -4678,8 +4678,8 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest) /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h for an explanation. */ if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { - /* Help register allocation */ - register const char *_p = p; + /* Help allocation */ + const char *_p = p; while (_p < aligned_end) { unsigned long value = *(unsigned long *) _p; if (value & ASCII_CHAR_MASK) @@ -6513,7 +6513,7 @@ PyUnicode_DecodeASCII(const char *s, s += writer.pos; kind = writer.kind; while (s < e) { - register unsigned char c = (unsigned char)*s; + unsigned char c = (unsigned char)*s; if (c < 128) { PyUnicode_WRITE(kind, data, writer.pos, c); writer.pos++; @@ -14621,7 +14621,7 @@ _PyUnicode_Fini(void) void PyUnicode_InternInPlace(PyObject **p) { - register PyObject *s = *p; + PyObject *s = *p; PyObject *t; #ifdef Py_DEBUG assert(s != NULL); @@ -14954,7 +14954,7 @@ Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2) int Py_UNICODE_strncmp(const Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) { - register Py_UNICODE u1, u2; + Py_UNICODE u1, u2; for (; n != 0; n--) { u1 = *s1; u2 = *s2; diff --git a/Objects/unicodetype_db.h b/Objects/unicodetype_db.h index 1009bb3bc7..57add8fac6 100644 --- a/Objects/unicodetype_db.h +++ b/Objects/unicodetype_db.h @@ -4278,7 +4278,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch) /* Returns 1 for Unicode characters having the bidirectional * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */ -int _PyUnicode_IsWhitespace(register const Py_UCS4 ch) +int _PyUnicode_IsWhitespace(const Py_UCS4 ch) { switch (ch) { case 0x0009: @@ -4320,7 +4320,7 @@ int _PyUnicode_IsWhitespace(register const Py_UCS4 ch) * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional * type 'B', 0 otherwise. */ -int _PyUnicode_IsLinebreak(register const Py_UCS4 ch) +int _PyUnicode_IsLinebreak(const Py_UCS4 ch) { switch (ch) { case 0x000A: -- cgit v1.2.1 From 5eed76e4f3d4c9b73132afb1a52748fb627da4d7 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 15 Aug 2013 02:18:55 -0700 Subject: Hoist the global "dummy" lookup outside of the reinsertion loop. --- Objects/setobject.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index eff91c566d..ac501b6000 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -280,6 +280,7 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) Py_ssize_t i; int is_oldtable_malloced; setentry small_copy[PySet_MINSIZE]; + PyObject *dummy_entry; assert(minused >= 0); @@ -336,11 +337,12 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) /* Copy the data over; this is refcount-neutral for active entries; dummy entries aren't copied over, of course */ + dummy_entry = dummy; for (entry = oldtable; i > 0; entry++) { if (entry->key == NULL) { /* UNUSED */ ; - } else if (entry->key == dummy) { + } else if (entry->key == dummy_entry) { /* DUMMY */ --i; assert(entry->key == dummy); -- cgit v1.2.1 From a66f221e549be146e0a08fc6c87e6800fa6dec2a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 17 Aug 2013 00:48:02 +0300 Subject: Issue #18701: Remove support of old CPython versions (<3.0) from C code. --- Objects/stringlib/unicodedefs.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'Objects') diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h index 48d00eccd0..3db5629e11 100644 --- a/Objects/stringlib/unicodedefs.h +++ b/Objects/stringlib/unicodedefs.h @@ -24,13 +24,8 @@ #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact -#if PY_VERSION_HEX < 0x03000000 -#define STRINGLIB_TOSTR PyObject_Unicode -#define STRINGLIB_TOASCII PyObject_Repr -#else #define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOASCII PyObject_ASCII -#endif #define STRINGLIB_WANT_CONTAINS_OBJ 1 -- cgit v1.2.1 From b19bf5e4c2a03a5ba54ddc0b664bd733c74640b4 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 17 Aug 2013 02:31:53 -0700 Subject: Use a known unique object for the dummy entry. This lets us run PyObject_RichCompareBool() without first needing to check whether the entry is a dummy. --- Objects/setobject.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index ac501b6000..da3b95555f 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -91,32 +91,27 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == NULL || entry->key == key) return entry; - if (entry->key == dummy) - freeslot = entry; - else { - if (entry->hash == hash) { - startkey = entry->key; - Py_INCREF(startkey); - cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); - Py_DECREF(startkey); - if (cmp < 0) - return NULL; - if (table == so->table && entry->key == startkey) { - if (cmp > 0) - return entry; - } - else { - /* The compare did major nasty stuff to the - * set: start over. - */ - return set_lookkey(so, key, hash); - } + if (entry->hash == hash) { + startkey = entry->key; + Py_INCREF(startkey); + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) + return NULL; + if (table == so->table && entry->key == startkey) { + if (cmp > 0) + return entry; + } + else { + /* Start over if the compare altered the set */ + return set_lookkey(so, key, hash); } - freeslot = NULL; } - /* In the loop, key == dummy is by far (factor of 100s) the - least likely outcome, so test for that last. */ + freeslot = (entry->key == dummy) ? entry : NULL; + + /* In the loop, key == dummy is by far (factor of 100s) + the least likely outcome, so test for that last. */ for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { i = i * 5 + perturb + 1; entry = &table[i & mask]; @@ -127,7 +122,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) } if (entry->key == key) break; - if (entry->hash == hash && entry->key != dummy) { + if (entry->hash == hash) { startkey = entry->key; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); @@ -1029,7 +1024,7 @@ make_new_set(PyTypeObject *type, PyObject *iterable) PySetObject *so = NULL; if (dummy == NULL) { /* Auto-initialize dummy */ - dummy = PyUnicode_FromString(""); + dummy = _PyObject_New(&PyBaseObject_Type); if (dummy == NULL) return NULL; } -- cgit v1.2.1 From 6a8989e6913b62b6dcda6b6aaa7eec7d8edda3e0 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 17 Aug 2013 02:39:46 -0700 Subject: Remove the else-clause because the conditions are no longer mutually exclusive. --- Objects/setobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index da3b95555f..0db7e885e9 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -140,7 +140,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return set_lookkey(so, key, hash); } } - else if (entry->key == dummy && freeslot == NULL) + if (entry->key == dummy && freeslot == NULL) freeslot = entry; } return entry; -- cgit v1.2.1 From 840c517e9e08e54c1c452286f5e31f4c624dfc20 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 19 Aug 2013 07:36:04 -0700 Subject: Issue18771: Reduce the cost of hash collisions for set objects. --- Objects/setobject.c | 106 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 20 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 0db7e885e9..6327a312c9 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -68,6 +68,11 @@ chaining would be substantial (100% with typical malloc overhead). The initial probe index is computed as hash mod the table size. Subsequent probe indices are computed as explained in Objects/dictobject.c. +To improve cache locality, each probe is done in pairs. +After the probe is examined, an adjacent entry is then examined as well. +The likelihood is that an adjacent entry is in the same cache line and +can be examined more cheaply than another probe elsewhere in memory. + All arithmetic on hash should ignore overflow. Unlike the dictionary implementation, the lookkey functions can return @@ -77,7 +82,7 @@ NULL if the rich comparison returns an error. static setentry * set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) { - size_t i; /* Unsigned for defined overflow behavior. */ + size_t i, j; /* Unsigned for defined overflow behavior. */ size_t perturb; setentry *freeslot; size_t mask = so->mask; @@ -90,7 +95,6 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) entry = &table[i]; if (entry->key == NULL || entry->key == key) return entry; - if (entry->hash == hash) { startkey = entry->key; Py_INCREF(startkey); @@ -107,14 +111,45 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return set_lookkey(so, key, hash); } } - freeslot = (entry->key == dummy) ? entry : NULL; /* In the loop, key == dummy is by far (factor of 100s) the least likely outcome, so test for that last. */ - for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { + j = i; + perturb = hash; + while (1) { + j ^= 1; + entry = &table[j]; + if (entry->key == NULL) { + if (freeslot != NULL) + entry = freeslot; + break; + } + if (entry->key == key) + break; + if (entry->hash == hash) { + startkey = entry->key; + Py_INCREF(startkey); + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) + return NULL; + if (table == so->table && entry->key == startkey) { + if (cmp > 0) + break; + } + else { + return set_lookkey(so, key, hash); + } + } + if (entry->key == dummy && freeslot == NULL) + freeslot = entry; + i = i * 5 + perturb + 1; - entry = &table[i & mask]; + j = i & mask; + perturb >>= PERTURB_SHIFT; + + entry = &table[j]; if (entry->key == NULL) { if (freeslot != NULL) entry = freeslot; @@ -134,14 +169,12 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) break; } else { - /* The compare did major nasty stuff to the - * set: start over. - */ return set_lookkey(so, key, hash); } } if (entry->key == dummy && freeslot == NULL) freeslot = entry; + } return entry; } @@ -154,7 +187,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) static setentry * set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) { - size_t i; /* Unsigned for defined overflow behavior. */ + size_t i, j; /* Unsigned for defined overflow behavior. */ size_t perturb; setentry *freeslot; size_t mask = so->mask; @@ -169,6 +202,7 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) so->lookup = set_lookkey; return set_lookkey(so, key, hash); } + i = (size_t)hash & mask; entry = &table[i]; if (entry->key == NULL || entry->key == key) @@ -181,11 +215,37 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) freeslot = NULL; } - /* In the loop, key == dummy is by far (factor of 100s) the - least likely outcome, so test for that last. */ - for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { + entry = &table[i ^ 1]; + if (entry->key == NULL) + return freeslot == NULL ? entry : freeslot; + if (entry->key == key + || (entry->hash == hash + && entry->key != dummy + && unicode_eq(entry->key, key))) + return entry; + if (entry->key == dummy && freeslot == NULL) + freeslot = entry; + + j = i; + perturb = hash; + while (1) { + j ^= 1; + entry = &table[j]; + if (entry->key == NULL) + return freeslot == NULL ? entry : freeslot; + if (entry->key == key + || (entry->hash == hash + && entry->key != dummy + && unicode_eq(entry->key, key))) + return entry; + if (entry->key == dummy && freeslot == NULL) + freeslot = entry; + i = i * 5 + perturb + 1; - entry = &table[i & mask]; + j = i & mask; + perturb >>= PERTURB_SHIFT; + + entry = &table[j]; if (entry->key == NULL) return freeslot == NULL ? entry : freeslot; if (entry->key == key @@ -244,17 +304,23 @@ is responsible for incref'ing `key`. static void set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) { - size_t i; - size_t perturb; - size_t mask = (size_t)so->mask; setentry *table = so->table; setentry *entry; + size_t perturb = hash; + size_t mask = (size_t)so->mask; + size_t i, j; - i = (size_t)hash & mask; - entry = &table[i]; - for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) { + i = j = (size_t)hash & mask; + while (1) { + entry = &table[j]; + if (entry->key == NULL) + break; + entry = &table[j ^ 1]; + if (entry->key == NULL) + break; i = i * 5 + perturb + 1; - entry = &table[i & mask]; + j = i & mask; + perturb >>= PERTURB_SHIFT; } so->fill++; entry->key = key; -- cgit v1.2.1 From 6b24a69409b1e936686017064ea3afa1b3bd139e Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 20 Aug 2013 22:28:24 -0700 Subject: Issue 18772: Restore set dummy object back to unicode and restore the identity checks in lookkey(). The Gdb prettyprint plugin depended on the dummy object being displayable. Other solutions besides a unicode object are possible. For now, get it back up and running. The identity checks in lookkey() need to be there to prevent the dummy object from leaking through Py_RichCompareBool() into user code in the rare circumstance where the dummy's hash value exactly matches the hash value of the actual key being looked up. --- Objects/setobject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 6327a312c9..3d9deac096 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -95,7 +95,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) entry = &table[i]; if (entry->key == NULL || entry->key == key) return entry; - if (entry->hash == hash) { + if (entry->hash == hash && entry->key != dummy) { startkey = entry->key; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); @@ -127,7 +127,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) } if (entry->key == key) break; - if (entry->hash == hash) { + if (entry->hash == hash && entry->key != dummy) { startkey = entry->key; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); @@ -157,7 +157,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) } if (entry->key == key) break; - if (entry->hash == hash) { + if (entry->hash == hash && entry->key != dummy) { startkey = entry->key; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); @@ -1090,7 +1090,7 @@ make_new_set(PyTypeObject *type, PyObject *iterable) PySetObject *so = NULL; if (dummy == NULL) { /* Auto-initialize dummy */ - dummy = _PyObject_New(&PyBaseObject_Type); + dummy = PyUnicode_FromString(""); if (dummy == NULL) return NULL; } -- cgit v1.2.1 From fb6ac62e3beb380a1becfe886d1716facc215fd6 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 20 Aug 2013 23:03:28 -0700 Subject: Remove a redundant hash table probe (this was artifact from an earlier draft of the patch). --- Objects/setobject.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 3d9deac096..9074a76125 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -215,17 +215,6 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) freeslot = NULL; } - entry = &table[i ^ 1]; - if (entry->key == NULL) - return freeslot == NULL ? entry : freeslot; - if (entry->key == key - || (entry->hash == hash - && entry->key != dummy - && unicode_eq(entry->key, key))) - return entry; - if (entry->key == dummy && freeslot == NULL) - freeslot = entry; - j = i; perturb = hash; while (1) { -- cgit v1.2.1 From 8d78b37b4eae9ab6191aacd9b885f32d134a70f6 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 21 Aug 2013 01:34:18 -0700 Subject: Hoist the global dummy lookup out of the inner loop for set_merge(). --- Objects/setobject.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 9074a76125..f84c929a47 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -683,6 +683,7 @@ set_merge(PySetObject *so, PyObject *otherset) { PySetObject *other; PyObject *key; + PyObject *dummy_entry; Py_hash_t hash; Py_ssize_t i; setentry *entry; @@ -702,12 +703,13 @@ set_merge(PySetObject *so, PyObject *otherset) if (set_table_resize(so, (so->used + other->used)*2) != 0) return -1; } + dummy_entry = dummy; for (i = 0; i <= other->mask; i++) { entry = &other->table[i]; key = entry->key; hash = entry->hash; if (key != NULL && - key != dummy) { + key != dummy_entry) { Py_INCREF(key); if (set_insert_key(so, key, hash) == -1) { Py_DECREF(key); -- cgit v1.2.1 From f25e3eb4e834fc78b2b447533cebd7358aebd5b4 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 22 Aug 2013 08:20:31 -0700 Subject: Issue 18797: Remove unneeded refcount adjustments for dummy objects. It suffices to keep just one reference when the object is created. --- Objects/setobject.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index f84c929a47..94789c6645 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -274,7 +274,6 @@ set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) entry->key = key; entry->hash = hash; so->used++; - Py_DECREF(dummy); } else { /* ACTIVE */ Py_DECREF(key); @@ -381,23 +380,15 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) so->table = newtable; so->mask = newsize - 1; memset(newtable, 0, sizeof(setentry) * newsize); + i = so->used; so->used = 0; - i = so->fill; so->fill = 0; /* Copy the data over; this is refcount-neutral for active entries; dummy entries aren't copied over, of course */ dummy_entry = dummy; for (entry = oldtable; i > 0; entry++) { - if (entry->key == NULL) { - /* UNUSED */ - ; - } else if (entry->key == dummy_entry) { - /* DUMMY */ - --i; - assert(entry->key == dummy); - Py_DECREF(entry->key); - } else { + if (entry->key != NULL && entry->key != dummy_entry) { /* ACTIVE */ --i; set_insert_clean(so, entry->key, entry->hash); @@ -468,7 +459,6 @@ set_discard_entry(PySetObject *so, setentry *oldentry) if (entry->key == NULL || entry->key == dummy) return DISCARD_NOTFOUND; old_key = entry->key; - Py_INCREF(dummy); entry->key = dummy; so->used--; Py_DECREF(old_key); @@ -496,7 +486,6 @@ set_discard_key(PySetObject *so, PyObject *key) if (entry->key == NULL || entry->key == dummy) return DISCARD_NOTFOUND; old_key = entry->key; - Py_INCREF(dummy); entry->key = dummy; so->used--; Py_DECREF(old_key); @@ -554,7 +543,8 @@ set_clear_internal(PySetObject *so) #endif if (entry->key) { --fill; - Py_DECREF(entry->key); + if (entry->key != dummy) + Py_DECREF(entry->key); } #ifdef Py_DEBUG else @@ -615,7 +605,8 @@ set_dealloc(PySetObject *so) for (entry = so->table; fill > 0; entry++) { if (entry->key) { --fill; - Py_DECREF(entry->key); + if (entry->key != dummy) + Py_DECREF(entry->key); } } if (so->table != so->smalltable) @@ -788,7 +779,6 @@ set_pop(PySetObject *so) } } key = entry->key; - Py_INCREF(dummy); entry->key = dummy; so->used--; so->table[0].hash = i + 1; /* next place to start */ -- cgit v1.2.1 From 3e709da88040952e79b512ed6eca83195224805c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 23 Aug 2013 03:22:15 -0500 Subject: Add the same dummy type that is used in dictionaries. --- Objects/setobject.c | 64 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 15 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 94789c6645..8a855a3bd9 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -29,7 +29,10 @@ set_key_error(PyObject *arg) #define PERTURB_SHIFT 5 /* Object used as dummy key to fill deleted entries */ -static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */ + +static PyObject _dummy_struct; + +#define dummy (&_dummy_struct) #ifdef Py_REF_DEBUG PyObject * @@ -329,7 +332,6 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) Py_ssize_t i; int is_oldtable_malloced; setentry small_copy[PySet_MINSIZE]; - PyObject *dummy_entry; assert(minused >= 0); @@ -386,10 +388,8 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) /* Copy the data over; this is refcount-neutral for active entries; dummy entries aren't copied over, of course */ - dummy_entry = dummy; for (entry = oldtable; i > 0; entry++) { - if (entry->key != NULL && entry->key != dummy_entry) { - /* ACTIVE */ + if (entry->key != NULL && entry->key != dummy) { --i; set_insert_clean(so, entry->key, entry->hash); } @@ -674,7 +674,6 @@ set_merge(PySetObject *so, PyObject *otherset) { PySetObject *other; PyObject *key; - PyObject *dummy_entry; Py_hash_t hash; Py_ssize_t i; setentry *entry; @@ -694,13 +693,12 @@ set_merge(PySetObject *so, PyObject *otherset) if (set_table_resize(so, (so->used + other->used)*2) != 0) return -1; } - dummy_entry = dummy; for (i = 0; i <= other->mask; i++) { entry = &other->table[i]; key = entry->key; hash = entry->hash; if (key != NULL && - key != dummy_entry) { + key != dummy) { Py_INCREF(key); if (set_insert_key(so, key, hash) == -1) { Py_DECREF(key); @@ -1070,12 +1068,6 @@ make_new_set(PyTypeObject *type, PyObject *iterable) { PySetObject *so = NULL; - if (dummy == NULL) { /* Auto-initialize dummy */ - dummy = PyUnicode_FromString(""); - if (dummy == NULL) - return NULL; - } - /* create PySetObject structure */ if (numfree && (type == &PySet_Type || type == &PyFrozenSet_Type)) { @@ -1172,7 +1164,6 @@ void PySet_Fini(void) { PySet_ClearFreeList(); - Py_CLEAR(dummy); Py_CLEAR(emptyfrozenset); } @@ -2581,3 +2572,46 @@ test_c_api(PySetObject *so) #undef assertRaises #endif + +/***** Dummy Struct *************************************************/ + +static PyObject * +dummy_repr(PyObject *op) +{ + return PyUnicode_FromString(""); +} + +static void +dummy_dealloc(PyObject* ignore) +{ + Py_FatalError("deallocating "); +} + +static PyTypeObject _PySetDummy_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + " type", + 0, + 0, + dummy_dealloc, /*tp_dealloc*/ /*never called*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + dummy_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call */ + 0, /*tp_str */ + 0, /*tp_getattro */ + 0, /*tp_setattro */ + 0, /*tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /*tp_flags */ +}; + +static PyObject _dummy_struct = { + _PyObject_EXTRA_INIT + 2, &_PySetDummy_Type +}; + -- cgit v1.2.1 From b86ea6424b692dcb175d187258ea8065b64728d7 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 23 Aug 2013 23:18:20 +0200 Subject: Back out 5bd9db528aed (issue #18408). It caused unsolved buildbot failures. --- Objects/object.c | 16 ---------------- Objects/typeobject.c | 7 ------- 2 files changed, 23 deletions(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index 8d4fe4248e..81d6d4b838 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -443,14 +443,6 @@ PyObject_Repr(PyObject *v) if (Py_TYPE(v)->tp_repr == NULL) return PyUnicode_FromFormat("<%s object at %p>", v->ob_type->tp_name, v); - -#ifdef Py_DEBUG - /* PyObject_Repr() must not be called with an exception set, - because it may clear it (directly or indirectly) and so the - caller looses its exception */ - assert(!PyErr_Occurred()); -#endif - res = (*v->ob_type->tp_repr)(v); if (res == NULL) return NULL; @@ -482,7 +474,6 @@ PyObject_Str(PyObject *v) #endif if (v == NULL) return PyUnicode_FromString(""); - if (PyUnicode_CheckExact(v)) { #ifndef Py_DEBUG if (PyUnicode_READY(v) < 0) @@ -494,13 +485,6 @@ PyObject_Str(PyObject *v) if (Py_TYPE(v)->tp_str == NULL) return PyObject_Repr(v); -#ifdef Py_DEBUG - /* PyObject_Str() must not be called with an exception set, - because it may clear it (directly or indirectly) and so the - caller looses its exception */ - assert(!PyErr_Occurred()); -#endif - /* It is possible for a type to have a tp_str representation that loops infinitely. */ if (Py_EnterRecursiveCall(" while getting the str of an object")) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 3ff42da1dd..c6ff0193e7 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -736,13 +736,6 @@ type_call(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } -#ifdef Py_DEBUG - /* type_call() must not be called with an exception set, - because it may clear it (directly or indirectly) and so the - caller looses its exception */ - assert(!PyErr_Occurred()); -#endif - obj = type->tp_new(type, args, kwds); if (obj != NULL) { /* Ugly exception: when the call was type(something), -- cgit v1.2.1 From c120a5b619a30e764a8f03e321b308e717195b77 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 24 Aug 2013 21:07:07 +0200 Subject: Issue #18772: fix the gdb plugin after the set implementation changes --- Objects/object.c | 2 +- Objects/setobject.c | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index 81d6d4b838..0561e09f86 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -22,7 +22,7 @@ _Py_GetRefTotal(void) o = _PyDict_Dummy(); if (o != NULL) total -= o->ob_refcnt; - o = _PySet_Dummy(); + o = _PySet_Dummy; if (o != NULL) total -= o->ob_refcnt; return total; diff --git a/Objects/setobject.c b/Objects/setobject.c index 8a855a3bd9..1ad78c4deb 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -29,18 +29,12 @@ set_key_error(PyObject *arg) #define PERTURB_SHIFT 5 /* Object used as dummy key to fill deleted entries */ - static PyObject _dummy_struct; #define dummy (&_dummy_struct) -#ifdef Py_REF_DEBUG -PyObject * -_PySet_Dummy(void) -{ - return dummy; -} -#endif +/* Exported for the gdb plugin's benefit. */ +PyObject *_PySet_Dummy = dummy; #define INIT_NONZERO_SET_SLOTS(so) do { \ (so)->table = (so)->smalltable; \ -- cgit v1.2.1 From 05ac164b5aa7cc1a17a9f4cef26edbca83fbbd25 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 26 Aug 2013 13:49:06 +0200 Subject: Issue #18408: _PyObject_Dump() now saves/restores the current exception So it can be called even if an exception was raised --- Objects/object.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index 0561e09f86..006f0d4dbc 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -406,11 +406,17 @@ _PyObject_Dump(PyObject* op) #ifdef WITH_THREAD PyGILState_STATE gil; #endif + PyObject *error_type, *error_value, *error_traceback; + fprintf(stderr, "object : "); #ifdef WITH_THREAD gil = PyGILState_Ensure(); #endif + + PyErr_Fetch(&error_type, &error_value, &error_traceback); (void)PyObject_Print(op, stderr, 0); + PyErr_Restore(error_type, error_value, error_traceback); + #ifdef WITH_THREAD PyGILState_Release(gil); #endif -- cgit v1.2.1 From 7b89cc3b894c7eeb22c24a8df73683332509875b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 26 Aug 2013 14:05:19 +0200 Subject: Restore changeset 5bd9db528aed (issue #18408) "Issue #18408: PyObject_Str(), PyObject_Repr() and type_call() now fail with an assertion error if they are called with an exception set (PyErr_Occurred()). As PyEval_EvalFrameEx(), they may clear the current exception and so the caller looses its exception." --- Objects/object.c | 15 +++++++++++++++ Objects/typeobject.c | 7 +++++++ 2 files changed, 22 insertions(+) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index 006f0d4dbc..693d8c73b1 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -449,6 +449,14 @@ PyObject_Repr(PyObject *v) if (Py_TYPE(v)->tp_repr == NULL) return PyUnicode_FromFormat("<%s object at %p>", v->ob_type->tp_name, v); + +#ifdef Py_DEBUG + /* PyObject_Repr() must not be called with an exception set, + because it may clear it (directly or indirectly) and so the + caller looses its exception */ + assert(!PyErr_Occurred()); +#endif + res = (*v->ob_type->tp_repr)(v); if (res == NULL) return NULL; @@ -491,6 +499,13 @@ PyObject_Str(PyObject *v) if (Py_TYPE(v)->tp_str == NULL) return PyObject_Repr(v); +#ifdef Py_DEBUG + /* PyObject_Str() must not be called with an exception set, + because it may clear it (directly or indirectly) and so the + caller looses its exception */ + assert(!PyErr_Occurred()); +#endif + /* It is possible for a type to have a tp_str representation that loops infinitely. */ if (Py_EnterRecursiveCall(" while getting the str of an object")) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index c6ff0193e7..3ff42da1dd 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -736,6 +736,13 @@ type_call(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } +#ifdef Py_DEBUG + /* type_call() must not be called with an exception set, + because it may clear it (directly or indirectly) and so the + caller looses its exception */ + assert(!PyErr_Occurred()); +#endif + obj = type->tp_new(type, args, kwds); if (obj != NULL) { /* Ugly exception: when the call was type(something), -- cgit v1.2.1 From 1c5cb63b938b85ea512cf993f3867d8ef0f2ac3f Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 28 Aug 2013 20:59:31 -0700 Subject: Tighten-up the lookkey() logic and beautify the code a bit. Use less code by moving many of the steps from the initial lookup into the main search loop. Beautify the code but keep the overall logic unchanged. --- Objects/setobject.c | 131 +++++++++++++++++----------------------------------- 1 file changed, 43 insertions(+), 88 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 1ad78c4deb..98969f5c81 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -79,101 +79,66 @@ NULL if the rich comparison returns an error. static setentry * set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) { - size_t i, j; /* Unsigned for defined overflow behavior. */ - size_t perturb; - setentry *freeslot; - size_t mask = so->mask; setentry *table = so->table; + setentry *freeslot = NULL; setentry *entry; + size_t perturb = hash; + size_t mask = so->mask; + size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior. */ + size_t j = i; int cmp; - PyObject *startkey; - i = (size_t)hash & mask; entry = &table[i]; - if (entry->key == NULL || entry->key == key) + if (entry->key == NULL) return entry; - if (entry->hash == hash && entry->key != dummy) { - startkey = entry->key; - Py_INCREF(startkey); - cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); - Py_DECREF(startkey); - if (cmp < 0) - return NULL; - if (table == so->table && entry->key == startkey) { - if (cmp > 0) - return entry; - } - else { - /* Start over if the compare altered the set */ - return set_lookkey(so, key, hash); - } - } - freeslot = (entry->key == dummy) ? entry : NULL; - /* In the loop, key == dummy is by far (factor of 100s) - the least likely outcome, so test for that last. */ - j = i; - perturb = hash; while (1) { - j ^= 1; - entry = &table[j]; - if (entry->key == NULL) { - if (freeslot != NULL) - entry = freeslot; - break; - } if (entry->key == key) - break; + return entry; if (entry->hash == hash && entry->key != dummy) { - startkey = entry->key; + PyObject *startkey = entry->key; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); Py_DECREF(startkey); if (cmp < 0) return NULL; - if (table == so->table && entry->key == startkey) { - if (cmp > 0) - break; - } - else { + if (table != so->table || entry->key != startkey) return set_lookkey(so, key, hash); - } + if (cmp > 0) + return entry; } if (entry->key == dummy && freeslot == NULL) freeslot = entry; - i = i * 5 + perturb + 1; - j = i & mask; - perturb >>= PERTURB_SHIFT; - - entry = &table[j]; - if (entry->key == NULL) { - if (freeslot != NULL) - entry = freeslot; + entry = &table[j ^ 1]; + if (entry->key == NULL) break; - } if (entry->key == key) - break; + return entry; if (entry->hash == hash && entry->key != dummy) { - startkey = entry->key; + PyObject *startkey = entry->key; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); Py_DECREF(startkey); if (cmp < 0) return NULL; - if (table == so->table && entry->key == startkey) { - if (cmp > 0) - break; - } - else { + if (table != so->table || entry->key != startkey) return set_lookkey(so, key, hash); - } + if (cmp > 0) + return entry; } if (entry->key == dummy && freeslot == NULL) freeslot = entry; + i = i * 5 + perturb + 1; + j = i & mask; + perturb >>= PERTURB_SHIFT; + + entry = &table[j]; + if (entry->key == NULL) + break; } - return entry; + return freeslot == NULL ? entry : freeslot; } /* @@ -184,12 +149,13 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) static setentry * set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) { - size_t i, j; /* Unsigned for defined overflow behavior. */ - size_t perturb; - setentry *freeslot; - size_t mask = so->mask; setentry *table = so->table; + setentry *freeslot = NULL; setentry *entry; + size_t perturb = hash; + size_t mask = so->mask; + size_t i = (size_t)hash & mask; + size_t j = i; /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass @@ -200,25 +166,11 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) return set_lookkey(so, key, hash); } - i = (size_t)hash & mask; entry = &table[i]; - if (entry->key == NULL || entry->key == key) + if (entry->key == NULL) return entry; - if (entry->key == dummy) - freeslot = entry; - else { - if (entry->hash == hash && unicode_eq(entry->key, key)) - return entry; - freeslot = NULL; - } - j = i; - perturb = hash; while (1) { - j ^= 1; - entry = &table[j]; - if (entry->key == NULL) - return freeslot == NULL ? entry : freeslot; if (entry->key == key || (entry->hash == hash && entry->key != dummy @@ -227,13 +179,9 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; - i = i * 5 + perturb + 1; - j = i & mask; - perturb >>= PERTURB_SHIFT; - - entry = &table[j]; + entry = &table[j ^ 1]; if (entry->key == NULL) - return freeslot == NULL ? entry : freeslot; + break; if (entry->key == key || (entry->hash == hash && entry->key != dummy @@ -241,9 +189,16 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; if (entry->key == dummy && freeslot == NULL) freeslot = entry; + + i = i * 5 + perturb + 1; + j = i & mask; + perturb >>= PERTURB_SHIFT; + + entry = &table[j]; + if (entry->key == NULL) + break; } - assert(0); /* NOT REACHED */ - return 0; + return freeslot == NULL ? entry : freeslot; } /* -- cgit v1.2.1 From d42be4bfbe9b95b6f4b39ebc29e4fec7ab43b95a Mon Sep 17 00:00:00 2001 From: Ethan Furman Date: Sat, 31 Aug 2013 10:18:55 -0700 Subject: Close #18780: %-formatting now prints value for int subclasses with %d, %i, and %u codes. --- Objects/unicodeobject.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f1d687ae9d..6dc583517c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13566,11 +13566,9 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg) case 'd': case 'i': case 'u': - /* Special-case boolean: we want 0/1 */ - if (PyBool_Check(val)) - result = PyNumber_ToBase(val, 10); - else - result = Py_TYPE(val)->tp_str(val); + /* int and int subclasses should print numerically when a numeric */ + /* format code is used (see issue18780) */ + result = PyNumber_ToBase(val, 10); break; case 'o': numnondigits = 2; -- cgit v1.2.1 From 2e2e08c04af47046560573eee808cb7d81f599d6 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 31 Aug 2013 21:27:08 -0700 Subject: Further reduce the cost of hash collisions by inspecting an additional nearby entry. --- Objects/setobject.c | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 98969f5c81..51a1653c32 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -65,10 +65,11 @@ chaining would be substantial (100% with typical malloc overhead). The initial probe index is computed as hash mod the table size. Subsequent probe indices are computed as explained in Objects/dictobject.c. -To improve cache locality, each probe is done in pairs. -After the probe is examined, an adjacent entry is then examined as well. -The likelihood is that an adjacent entry is in the same cache line and -can be examined more cheaply than another probe elsewhere in memory. +To improve cache locality, each probe inspects nearby entries before +moving on to probes elsewhere in memory. Depending on alignment and the +size of a cache line, the nearby entries are cheaper to inspect than +other probes elsewhere in memory. This probe strategy reduces the cost +of hash collisions. All arithmetic on hash should ignore overflow. @@ -130,6 +131,26 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; + entry = &table[j ^ 2]; + if (entry->key == NULL) + break; + if (entry->key == key) + return entry; + if (entry->hash == hash && entry->key != dummy) { + PyObject *startkey = entry->key; + Py_INCREF(startkey); + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) + return NULL; + if (table != so->table || entry->key != startkey) + return set_lookkey(so, key, hash); + if (cmp > 0) + return entry; + } + if (entry->key == dummy && freeslot == NULL) + freeslot = entry; + i = i * 5 + perturb + 1; j = i & mask; perturb >>= PERTURB_SHIFT; @@ -190,6 +211,17 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; + entry = &table[j ^ 2]; + if (entry->key == NULL) + break; + if (entry->key == key + || (entry->hash == hash + && entry->key != dummy + && unicode_eq(entry->key, key))) + return entry; + if (entry->key == dummy && freeslot == NULL) + freeslot = entry; + i = i * 5 + perturb + 1; j = i & mask; perturb >>= PERTURB_SHIFT; @@ -256,6 +288,9 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == NULL) break; entry = &table[j ^ 1]; + if (entry->key == NULL) + break; + entry = &table[j ^ 2]; if (entry->key == NULL) break; i = i * 5 + perturb + 1; -- cgit v1.2.1 From 5388cc35895cab9d30ffcb384f38c3b9bcbcee8c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 31 Aug 2013 21:34:24 -0700 Subject: Update copyright. --- Objects/setobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 51a1653c32..dabcc25292 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -3,7 +3,7 @@ Written and maintained by Raymond D. Hettinger Derived from Lib/sets.py and Objects/dictobject.c. - Copyright (c) 2003-2008 Python Software Foundation. + Copyright (c) 2003-2013 Python Software Foundation. All rights reserved. */ -- cgit v1.2.1 From 968fd0f6a4bea93321c592a159fb7ae2344fe3be Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 2 Sep 2013 03:23:21 -0700 Subject: Instead of XORed indicies, switch to a hybrid of linear probing and open addressing. Modern processors tend to make consecutive memory accesses cheaper than random probes into memory. Small sets can fit into L1 cache, so they get less benefit. But they do come out ahead because the consecutive probes don't probe the same key more than once and because the randomization step occurs less frequently (or not at all). For the open addressing step, putting the perturb shift before the index calculation gets the upper bits into play sooner. --- Objects/setobject.c | 159 ++++++++++++++++++++++------------------------------ 1 file changed, 68 insertions(+), 91 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index dabcc25292..8a3d4f2141 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -27,6 +27,7 @@ set_key_error(PyObject *arg) /* This must be >= 1. */ #define PERTURB_SHIFT 5 +#define LINEAR_PROBES 9 /* Object used as dummy key to fill deleted entries */ static PyObject _dummy_struct; @@ -59,17 +60,17 @@ static int numfree = 0; /* The basic lookup function used by all operations. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. -Open addressing is preferred over chaining since the link overhead for -chaining would be substantial (100% with typical malloc overhead). -The initial probe index is computed as hash mod the table size. Subsequent -probe indices are computed as explained in Objects/dictobject.c. +The initial probe index is computed as hash mod the table size. +Subsequent probe indices are computed as explained in Objects/dictobject.c. -To improve cache locality, each probe inspects nearby entries before -moving on to probes elsewhere in memory. Depending on alignment and the -size of a cache line, the nearby entries are cheaper to inspect than -other probes elsewhere in memory. This probe strategy reduces the cost -of hash collisions. +To improve cache locality, each probe inspects a series of consecutive +nearby entries before moving on to probes elsewhere in memory. This leaves +us with a hybrid of linear probing and open addressing. The linear probing +reduces the cost of hash collisions because consecutive memory accesses +tend to be much cheaper than scattered probes. After LINEAR_PROBES steps, +we then use open addressing with the upper bits from the hash value. This +helps break-up long chains of collisions. All arithmetic on hash should ignore overflow. @@ -83,13 +84,14 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) setentry *table = so->table; setentry *freeslot = NULL; setentry *entry; + setentry *limit; size_t perturb = hash; size_t mask = so->mask; - size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior. */ - size_t j = i; + size_t i = (size_t)hash; /* Unsigned for defined overflow behavior. */ + size_t j; int cmp; - entry = &table[i]; + entry = &table[i & mask]; if (entry->key == NULL) return entry; @@ -111,54 +113,37 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; - entry = &table[j ^ 1]; - if (entry->key == NULL) - break; - if (entry->key == key) - return entry; - if (entry->hash == hash && entry->key != dummy) { - PyObject *startkey = entry->key; - Py_INCREF(startkey); - cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); - Py_DECREF(startkey); - if (cmp < 0) - return NULL; - if (table != so->table || entry->key != startkey) - return set_lookkey(so, key, hash); - if (cmp > 0) - return entry; - } - if (entry->key == dummy && freeslot == NULL) - freeslot = entry; - - entry = &table[j ^ 2]; - if (entry->key == NULL) - break; - if (entry->key == key) - return entry; - if (entry->hash == hash && entry->key != dummy) { - PyObject *startkey = entry->key; - Py_INCREF(startkey); - cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); - Py_DECREF(startkey); - if (cmp < 0) - return NULL; - if (table != so->table || entry->key != startkey) - return set_lookkey(so, key, hash); - if (cmp > 0) + limit = &table[mask]; + for (j = 0 ; j < LINEAR_PROBES ; j++) { + entry = (entry == limit) ? &table[0] : entry + 1; + if (entry->key == NULL) + goto found_null; + if (entry->key == key) return entry; + if (entry->hash == hash && entry->key != dummy) { + PyObject *startkey = entry->key; + Py_INCREF(startkey); + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) + return NULL; + if (table != so->table || entry->key != startkey) + return set_lookkey(so, key, hash); + if (cmp > 0) + return entry; + } + if (entry->key == dummy && freeslot == NULL) + freeslot = entry; } - if (entry->key == dummy && freeslot == NULL) - freeslot = entry; - i = i * 5 + perturb + 1; - j = i & mask; perturb >>= PERTURB_SHIFT; + i = i * 5 + perturb + 1; - entry = &table[j]; + entry = &table[i & mask]; if (entry->key == NULL) break; } + found_null: return freeslot == NULL ? entry : freeslot; } @@ -173,10 +158,11 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) setentry *table = so->table; setentry *freeslot = NULL; setentry *entry; + setentry *limit; size_t perturb = hash; size_t mask = so->mask; - size_t i = (size_t)hash & mask; - size_t j = i; + size_t i = (size_t)hash; + size_t j; /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass @@ -187,7 +173,7 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) return set_lookkey(so, key, hash); } - entry = &table[i]; + entry = &table[i & mask]; if (entry->key == NULL) return entry; @@ -200,36 +186,28 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; - entry = &table[j ^ 1]; - if (entry->key == NULL) - break; - if (entry->key == key - || (entry->hash == hash - && entry->key != dummy - && unicode_eq(entry->key, key))) - return entry; - if (entry->key == dummy && freeslot == NULL) - freeslot = entry; - - entry = &table[j ^ 2]; - if (entry->key == NULL) - break; - if (entry->key == key - || (entry->hash == hash - && entry->key != dummy - && unicode_eq(entry->key, key))) - return entry; - if (entry->key == dummy && freeslot == NULL) - freeslot = entry; + limit = &table[mask]; + for (j = 0 ; j < LINEAR_PROBES ; j++) { + entry = (entry == limit) ? &table[0] : entry + 1; + if (entry->key == NULL) + goto found_null; + if (entry->key == key + || (entry->hash == hash + && entry->key != dummy + && unicode_eq(entry->key, key))) + return entry; + if (entry->key == dummy && freeslot == NULL) + freeslot = entry; + } - i = i * 5 + perturb + 1; - j = i & mask; perturb >>= PERTURB_SHIFT; + i = i * 5 + perturb + 1; - entry = &table[j]; + entry = &table[i & mask]; if (entry->key == NULL) break; } + found_null: return freeslot == NULL ? entry : freeslot; } @@ -280,23 +258,22 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) setentry *entry; size_t perturb = hash; size_t mask = (size_t)so->mask; - size_t i, j; + size_t i = (size_t)hash; + size_t j; - i = j = (size_t)hash & mask; while (1) { - entry = &table[j]; + entry = &table[i & mask]; if (entry->key == NULL) - break; - entry = &table[j ^ 1]; - if (entry->key == NULL) - break; - entry = &table[j ^ 2]; - if (entry->key == NULL) - break; - i = i * 5 + perturb + 1; - j = i & mask; + goto found_null; + for (j = 1 ; j <= LINEAR_PROBES ; j++) { + entry = &table[(i + j) & mask]; + if (entry->key == NULL) + goto found_null; + } perturb >>= PERTURB_SHIFT; + i = i * 5 + perturb + 1; } + found_null: so->fill++; entry->key = key; entry->hash = hash; -- cgit v1.2.1 From 81ee0af7749af3994faad79de5e3092e5fb8ea49 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 2 Sep 2013 15:59:26 -0700 Subject: Factor-out the common code for setting a KeyError. --- Objects/dictobject.c | 22 ++++------------------ Objects/setobject.c | 16 +--------------- 2 files changed, 5 insertions(+), 33 deletions(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index b5cbfb1f25..bbee1a61a3 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -95,20 +95,6 @@ To avoid slowing down lookups on a near-full table, we resize the table when it's USABLE_FRACTION (currently two-thirds) full. */ -/* Set a key error with the specified argument, wrapping it in a - * tuple automatically so that tuple keys are not unpacked as the - * exception arguments. */ -static void -set_key_error(PyObject *arg) -{ - PyObject *tup; - tup = PyTuple_Pack(1, arg); - if (!tup) - return; /* caller will expect error to be set anyway */ - PyErr_SetObject(PyExc_KeyError, tup); - Py_DECREF(tup); -} - #define PERTURB_SHIFT 5 /* @@ -1241,7 +1227,7 @@ PyDict_DelItem(PyObject *op, PyObject *key) if (ep == NULL) return -1; if (*value_addr == NULL) { - set_key_error(key); + _PyErr_SetKeyError(key); return -1; } old_value = *value_addr; @@ -1530,7 +1516,7 @@ dict_subscript(PyDictObject *mp, PyObject *key) else if (PyErr_Occurred()) return NULL; } - set_key_error(key); + _PyErr_SetKeyError(key); return NULL; } else @@ -2302,7 +2288,7 @@ dict_pop(PyDictObject *mp, PyObject *args) Py_INCREF(deflt); return deflt; } - set_key_error(key); + _PyErr_SetKeyError(key); return NULL; } if (!PyUnicode_CheckExact(key) || @@ -2320,7 +2306,7 @@ dict_pop(PyDictObject *mp, PyObject *args) Py_INCREF(deflt); return deflt; } - set_key_error(key); + _PyErr_SetKeyError(key); return NULL; } *value_addr = NULL; diff --git a/Objects/setobject.c b/Objects/setobject.c index 8a3d4f2141..524bda9156 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -11,20 +11,6 @@ #include "structmember.h" #include "stringlib/eq.h" -/* Set a key error with the specified argument, wrapping it in a - * tuple automatically so that tuple keys are not unpacked as the - * exception arguments. */ -static void -set_key_error(PyObject *arg) -{ - PyObject *tup; - tup = PyTuple_Pack(1, arg); - if (!tup) - return; /* caller will expect error to be set anyway */ - PyErr_SetObject(PyExc_KeyError, tup); - Py_DECREF(tup); -} - /* This must be >= 1. */ #define PERTURB_SHIFT 5 #define LINEAR_PROBES 9 @@ -1948,7 +1934,7 @@ set_remove(PySetObject *so, PyObject *key) } if (rv == DISCARD_NOTFOUND) { - set_key_error(key); + _PyErr_SetKeyError(key); return NULL; } Py_RETURN_NONE; -- cgit v1.2.1 From d3e5ab4722a7da0edca4ed6812c297b8eab3154c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 2 Sep 2013 16:32:27 -0700 Subject: Minor touchups. --- Objects/setobject.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 524bda9156..0aec100294 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -11,8 +11,10 @@ #include "structmember.h" #include "stringlib/eq.h" -/* This must be >= 1. */ +/* This must be >= 1 */ #define PERTURB_SHIFT 5 + +/* This should be >= PySet_MINSIZE - 1 */ #define LINEAR_PROBES 9 /* Object used as dummy key to fill deleted entries */ @@ -123,7 +125,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) } perturb >>= PERTURB_SHIFT; - i = i * 5 + perturb + 1; + i = i * 5 + 1 + perturb; entry = &table[i & mask]; if (entry->key == NULL) @@ -187,7 +189,7 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) } perturb >>= PERTURB_SHIFT; - i = i * 5 + perturb + 1; + i = i * 5 + 1 + perturb; entry = &table[i & mask]; if (entry->key == NULL) @@ -257,7 +259,7 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) goto found_null; } perturb >>= PERTURB_SHIFT; - i = i * 5 + perturb + 1; + i = i * 5 + 1 + perturb; } found_null: so->fill++; -- cgit v1.2.1 From 8b89c2b1b7cd01e2ca677c3d0d2d7f62484bef61 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 7 Sep 2013 15:05:00 -0700 Subject: Move the overview comment to the top of the file. --- Objects/setobject.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 0aec100294..b63a96684e 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1,10 +1,30 @@ /* set object implementation + Written and maintained by Raymond D. Hettinger Derived from Lib/sets.py and Objects/dictobject.c. Copyright (c) 2003-2013 Python Software Foundation. All rights reserved. + + The basic lookup function used by all operations. + This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. + + The initial probe index is computed as hash mod the table size. + Subsequent probe indices are computed as explained in Objects/dictobject.c. + + To improve cache locality, each probe inspects a series of consecutive + nearby entries before moving on to probes elsewhere in memory. This leaves + us with a hybrid of linear probing and open addressing. The linear probing + reduces the cost of hash collisions because consecutive memory accesses + tend to be much cheaper than scattered probes. After LINEAR_PROBES steps, + we then use open addressing with the upper bits from the hash value. This + helps break-up long chains of collisions. + + All arithmetic on hash should ignore overflow. + + Unlike the dictionary implementation, the lookkey functions can return + NULL if the rich comparison returns an error. */ #include "Python.h" @@ -44,28 +64,6 @@ PyObject *_PySet_Dummy = dummy; static PySetObject *free_list[PySet_MAXFREELIST]; static int numfree = 0; - -/* -The basic lookup function used by all operations. -This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. - -The initial probe index is computed as hash mod the table size. -Subsequent probe indices are computed as explained in Objects/dictobject.c. - -To improve cache locality, each probe inspects a series of consecutive -nearby entries before moving on to probes elsewhere in memory. This leaves -us with a hybrid of linear probing and open addressing. The linear probing -reduces the cost of hash collisions because consecutive memory accesses -tend to be much cheaper than scattered probes. After LINEAR_PROBES steps, -we then use open addressing with the upper bits from the hash value. This -helps break-up long chains of collisions. - -All arithmetic on hash should ignore overflow. - -Unlike the dictionary implementation, the lookkey functions can return -NULL if the rich comparison returns an error. -*/ - static setentry * set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) { -- cgit v1.2.1 From 1fd233c7ba839d5eb508e1214fcaeb6294df7ec7 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 7 Sep 2013 17:41:01 -0700 Subject: Small rearrangement to bring together the three functions for probing the hash table. --- Objects/setobject.c | 71 +++++++++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 32 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index b63a96684e..362273ab49 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -64,6 +64,9 @@ PyObject *_PySet_Dummy = dummy; static PySetObject *free_list[PySet_MAXFREELIST]; static int numfree = 0; +/* ======================================================================== */ +/* ======= Begin logic for probing the hash table ========================= */ + static setentry * set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) { @@ -197,38 +200,6 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) return freeslot == NULL ? entry : freeslot; } -/* -Internal routine to insert a new key into the table. -Used by the public insert routine. -Eats a reference to key. -*/ -static int -set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) -{ - setentry *entry; - - assert(so->lookup != NULL); - entry = so->lookup(so, key, hash); - if (entry == NULL) - return -1; - if (entry->key == NULL) { - /* UNUSED */ - so->fill++; - entry->key = key; - entry->hash = hash; - so->used++; - } else if (entry->key == dummy) { - /* DUMMY */ - entry->key = key; - entry->hash = hash; - so->used++; - } else { - /* ACTIVE */ - Py_DECREF(key); - } - return 0; -} - /* Internal routine used by set_table_resize() to insert an item which is known to be absent from the set. This routine also assumes that @@ -266,6 +237,42 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) so->used++; } +/* ======== End logic for probing the hash table ========================== */ +/* ======================================================================== */ + + +/* +Internal routine to insert a new key into the table. +Used by the public insert routine. +Eats a reference to key. +*/ +static int +set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) +{ + setentry *entry; + + assert(so->lookup != NULL); + entry = so->lookup(so, key, hash); + if (entry == NULL) + return -1; + if (entry->key == NULL) { + /* UNUSED */ + so->fill++; + entry->key = key; + entry->hash = hash; + so->used++; + } else if (entry->key == dummy) { + /* DUMMY */ + entry->key = key; + entry->hash = hash; + so->used++; + } else { + /* ACTIVE */ + Py_DECREF(key); + } + return 0; +} + /* Restructure the table by allocating a new table and reinserting all keys again. When entries have been deleted, the new table may -- cgit v1.2.1 From 16b839dd0dc07f48e3322b260525b60b298fcc83 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 7 Sep 2013 20:26:50 -0700 Subject: Remove the freelist scheme for setobjects. The setobject freelist was consuming memory but not providing much value. Even when a freelisted setobject was available, most of the setobject fields still needed to be initialized and the small table still required a memset(). This meant that the custom freelisting scheme for sets was providing almost no incremental benefit over the default Python freelist scheme used by _PyObject_Malloc() in Objects/obmalloc.c. --- Objects/object.c | 1 - Objects/setobject.c | 55 ++++++++--------------------------------------------- 2 files changed, 8 insertions(+), 48 deletions(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index 693d8c73b1..8018c6a58c 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1955,7 +1955,6 @@ _PyObject_DebugTypeStats(FILE *out) _PyFrame_DebugMallocStats(out); _PyList_DebugMallocStats(out); _PyMethod_DebugMallocStats(out); - _PySet_DebugMallocStats(out); _PyTuple_DebugMallocStats(out); } diff --git a/Objects/setobject.c b/Objects/setobject.c index 362273ab49..e2cb666b67 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -57,13 +57,6 @@ PyObject *_PySet_Dummy = dummy; INIT_NONZERO_SET_SLOTS(so); \ } while(0) -/* Reuse scheme to save calls to malloc, free, and memset */ -#ifndef PySet_MAXFREELIST -#define PySet_MAXFREELIST 80 -#endif -static PySetObject *free_list[PySet_MAXFREELIST]; -static int numfree = 0; - /* ======================================================================== */ /* ======= Begin logic for probing the hash table ========================= */ @@ -565,10 +558,7 @@ set_dealloc(PySetObject *so) } if (so->table != so->smalltable) PyMem_DEL(so->table); - if (numfree < PySet_MAXFREELIST && PyAnySet_CheckExact(so)) - free_list[numfree++] = so; - else - Py_TYPE(so)->tp_free(so); + Py_TYPE(so)->tp_free(so); Py_TRASHCAN_SAFE_END(so) } @@ -1023,22 +1013,12 @@ make_new_set(PyTypeObject *type, PyObject *iterable) PySetObject *so = NULL; /* create PySetObject structure */ - if (numfree && - (type == &PySet_Type || type == &PyFrozenSet_Type)) { - so = free_list[--numfree]; - assert (so != NULL && PyAnySet_CheckExact(so)); - Py_TYPE(so) = type; - _Py_NewReference((PyObject *)so); - EMPTY_TO_MINSIZE(so); - PyObject_GC_Track(so); - } else { - so = (PySetObject *)type->tp_alloc(type, 0); - if (so == NULL) - return NULL; - /* tp_alloc has already zeroed the structure */ - assert(so->table == NULL && so->fill == 0 && so->used == 0); - INIT_NONZERO_SET_SLOTS(so); - } + so = (PySetObject *)type->tp_alloc(type, 0); + if (so == NULL) + return NULL; + /* tp_alloc has already zeroed the structure */ + assert(so->table == NULL && so->fill == 0 && so->used == 0); + INIT_NONZERO_SET_SLOTS(so); so->lookup = set_lookkey_unicode; so->weakreflist = NULL; @@ -1103,34 +1083,15 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds) int PySet_ClearFreeList(void) { - int freelist_size = numfree; - PySetObject *so; - - while (numfree) { - numfree--; - so = free_list[numfree]; - PyObject_GC_Del(so); - } - return freelist_size; + return 0; } void PySet_Fini(void) { - PySet_ClearFreeList(); Py_CLEAR(emptyfrozenset); } -/* Print summary info about the state of the optimized allocator */ -void -_PySet_DebugMallocStats(FILE *out) -{ - _PyDebugAllocatorStats(out, - "free PySetObject", - numfree, sizeof(PySetObject)); -} - - static PyObject * set_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { -- cgit v1.2.1 From 69e774e541a199a32886b84d39fed83fd808969c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 7 Sep 2013 21:01:29 -0700 Subject: Improve code clarity by removing two unattractive macros. --- Objects/setobject.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index e2cb666b67..7364fca737 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -45,17 +45,6 @@ static PyObject _dummy_struct; /* Exported for the gdb plugin's benefit. */ PyObject *_PySet_Dummy = dummy; -#define INIT_NONZERO_SET_SLOTS(so) do { \ - (so)->table = (so)->smalltable; \ - (so)->mask = PySet_MINSIZE - 1; \ - (so)->hash = -1; \ - } while(0) - -#define EMPTY_TO_MINSIZE(so) do { \ - memset((so)->smalltable, 0, sizeof((so)->smalltable)); \ - (so)->used = (so)->fill = 0; \ - INIT_NONZERO_SET_SLOTS(so); \ - } while(0) /* ======================================================================== */ /* ======= Begin logic for probing the hash table ========================= */ @@ -439,6 +428,17 @@ set_discard_key(PySetObject *so, PyObject *key) return DISCARD_FOUND; } +static void +set_empty_to_minsize(PySetObject *so) +{ + memset(so->smalltable, 0, sizeof(so->smalltable)); + so->fill = 0; + so->used = 0; + so->mask = PySet_MINSIZE - 1; + so->table = so->smalltable; + so->hash = -1; +} + static int set_clear_internal(PySetObject *so) { @@ -466,7 +466,7 @@ set_clear_internal(PySetObject *so) */ fill = so->fill; if (table_is_malloced) - EMPTY_TO_MINSIZE(so); + set_empty_to_minsize(so); else if (fill > 0) { /* It's a small table with something that needs to be cleared. @@ -475,7 +475,7 @@ set_clear_internal(PySetObject *so) */ memcpy(small_copy, table, sizeof(small_copy)); table = small_copy; - EMPTY_TO_MINSIZE(so); + set_empty_to_minsize(so); } /* else it's a small table that's already empty */ @@ -1016,11 +1016,13 @@ make_new_set(PyTypeObject *type, PyObject *iterable) so = (PySetObject *)type->tp_alloc(type, 0); if (so == NULL) return NULL; - /* tp_alloc has already zeroed the structure */ - assert(so->table == NULL && so->fill == 0 && so->used == 0); - INIT_NONZERO_SET_SLOTS(so); + so->fill = 0; + so->used = 0; + so->mask = PySet_MINSIZE - 1; + so->table = so->smalltable; so->lookup = set_lookkey_unicode; + so->hash = -1; so->weakreflist = NULL; if (iterable != NULL) { -- cgit v1.2.1 From 92c7acebc598c008e7badc37b3d9ddd9e9a6fa9b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 7 Sep 2013 22:06:35 -0700 Subject: Minor code beautification. --- Objects/setobject.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 7364fca737..9df2fd322f 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -446,14 +446,13 @@ set_clear_internal(PySetObject *so) int table_is_malloced; Py_ssize_t fill; setentry small_copy[PySet_MINSIZE]; -#ifdef Py_DEBUG - Py_ssize_t i, n; - assert (PyAnySet_Check(so)); - n = so->mask + 1; - i = 0; +#ifdef Py_DEBUG + Py_ssize_t i = 0; + Py_ssize_t n = so->mask + 1; #endif + assert (PyAnySet_Check(so)); table = so->table; assert(table != NULL); table_is_malloced = table != so->smalltable; @@ -2366,7 +2365,7 @@ test_c_api(PySetObject *so) Py_ssize_t count; char *s; Py_ssize_t i; - PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x; + PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL; PyObject *ob = (PyObject *)so; Py_hash_t hash; PyObject *str; -- cgit v1.2.1 From f5bb8353602df775f9622ee978d9ffcbe254681a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 8 Sep 2013 00:25:57 -0700 Subject: Put the defines in the logical section and fix indentation. --- Objects/setobject.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 9df2fd322f..23d624f915 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -31,12 +31,6 @@ #include "structmember.h" #include "stringlib/eq.h" -/* This must be >= 1 */ -#define PERTURB_SHIFT 5 - -/* This should be >= PySet_MINSIZE - 1 */ -#define LINEAR_PROBES 9 - /* Object used as dummy key to fill deleted entries */ static PyObject _dummy_struct; @@ -49,6 +43,12 @@ PyObject *_PySet_Dummy = dummy; /* ======================================================================== */ /* ======= Begin logic for probing the hash table ========================= */ +/* This should be >= PySet_MINSIZE - 1 */ +#define LINEAR_PROBES 9 + +/* This must be >= 1 */ +#define PERTURB_SHIFT 5 + static setentry * set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) { @@ -151,8 +151,8 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) while (1) { if (entry->key == key || (entry->hash == hash - && entry->key != dummy - && unicode_eq(entry->key, key))) + && entry->key != dummy + && unicode_eq(entry->key, key))) return entry; if (entry->key == dummy && freeslot == NULL) freeslot = entry; -- cgit v1.2.1 From 0e4f6599066500d0780e4ebf8f4c903c2ec0ad7d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 15 Sep 2013 14:57:15 -0700 Subject: Issue 18771: Make it possible to set the number linear probes at compile-time. --- Objects/setobject.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 23d624f915..ece76bfc25 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -44,7 +44,9 @@ PyObject *_PySet_Dummy = dummy; /* ======= Begin logic for probing the hash table ========================= */ /* This should be >= PySet_MINSIZE - 1 */ +#ifndef LINEAR_PROBES #define LINEAR_PROBES 9 +#endif /* This must be >= 1 */ #define PERTURB_SHIFT 5 @@ -55,12 +57,14 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) setentry *table = so->table; setentry *freeslot = NULL; setentry *entry; - setentry *limit; size_t perturb = hash; size_t mask = so->mask; size_t i = (size_t)hash; /* Unsigned for defined overflow behavior. */ - size_t j; int cmp; +#if LINEAR_PROBES + setentry *limit; + size_t j; +#endif entry = &table[i & mask]; if (entry->key == NULL) @@ -84,6 +88,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; +#if LINEAR_PROBES limit = &table[mask]; for (j = 0 ; j < LINEAR_PROBES ; j++) { entry = (entry == limit) ? &table[0] : entry + 1; @@ -106,13 +111,14 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; } +#endif perturb >>= PERTURB_SHIFT; i = i * 5 + 1 + perturb; entry = &table[i & mask]; if (entry->key == NULL) - break; + goto found_null; } found_null: return freeslot == NULL ? entry : freeslot; @@ -129,11 +135,13 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) setentry *table = so->table; setentry *freeslot = NULL; setentry *entry; - setentry *limit; size_t perturb = hash; size_t mask = so->mask; size_t i = (size_t)hash; +#if LINEAR_PROBES + setentry *limit; size_t j; +#endif /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass @@ -157,6 +165,7 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; +#if LINEAR_PROBES limit = &table[mask]; for (j = 0 ; j < LINEAR_PROBES ; j++) { entry = (entry == limit) ? &table[0] : entry + 1; @@ -170,13 +179,14 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; } +#endif perturb >>= PERTURB_SHIFT; i = i * 5 + 1 + perturb; entry = &table[i & mask]; if (entry->key == NULL) - break; + goto found_null; } found_null: return freeslot == NULL ? entry : freeslot; @@ -198,17 +208,21 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) size_t perturb = hash; size_t mask = (size_t)so->mask; size_t i = (size_t)hash; +#if LINEAR_PROBES size_t j; +#endif while (1) { entry = &table[i & mask]; if (entry->key == NULL) goto found_null; +#if LINEAR_PROBES for (j = 1 ; j <= LINEAR_PROBES ; j++) { entry = &table[(i + j) & mask]; if (entry->key == NULL) goto found_null; } +#endif perturb >>= PERTURB_SHIFT; i = i * 5 + 1 + perturb; } -- cgit v1.2.1 From f80a38e439814141f061632ea62934d6c7c6eeba Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Thu, 19 Sep 2013 21:06:37 -0500 Subject: Update internal comments to say _something_ about the "API ID". Best I can tell, the possible values for this aren't documented anywhere. --- Objects/obmalloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index db774b8423..2ab66a33b9 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1702,7 +1702,9 @@ pool_is_in_list(const poolp target, poolp list) p[0: S] Number of bytes originally asked for. This is a size_t, big-endian (easier to read in a memory dump). -p[S: 2*S] +p[S} + API ID. See PEP 445. This is a character, but seems undocumented. +p[S+1: 2*S] Copies of FORBIDDENBYTE. Used to catch under- writes and reads. p[2*S: 2*S+n] The requested memory, filled with copies of CLEANBYTE. -- cgit v1.2.1 From dede5d715b428a7fee94c232546b68175ac1e682 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 21 Sep 2013 14:02:55 -0700 Subject: Make the linear probe sequence clearer. --- Objects/setobject.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index ece76bfc25..05b672fbec 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -62,7 +62,6 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) size_t i = (size_t)hash; /* Unsigned for defined overflow behavior. */ int cmp; #if LINEAR_PROBES - setentry *limit; size_t j; #endif @@ -89,9 +88,8 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) freeslot = entry; #if LINEAR_PROBES - limit = &table[mask]; - for (j = 0 ; j < LINEAR_PROBES ; j++) { - entry = (entry == limit) ? &table[0] : entry + 1; + for (j = 1 ; j <= LINEAR_PROBES ; j++) { + entry = &table[(i + j) & mask]; if (entry->key == NULL) goto found_null; if (entry->key == key) @@ -139,7 +137,6 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) size_t mask = so->mask; size_t i = (size_t)hash; #if LINEAR_PROBES - setentry *limit; size_t j; #endif @@ -166,9 +163,8 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) freeslot = entry; #if LINEAR_PROBES - limit = &table[mask]; - for (j = 0 ; j < LINEAR_PROBES ; j++) { - entry = (entry == limit) ? &table[0] : entry + 1; + for (j = 1 ; j <= LINEAR_PROBES ; j++) { + entry = &table[(i + j) & mask]; if (entry->key == NULL) goto found_null; if (entry->key == key -- cgit v1.2.1 From ade76ffd6308462babe54800c3f3dc5b2a93f9cb Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 21 Sep 2013 14:07:18 -0700 Subject: When LINEAR_PROBES=0, let the compiler remove the dead code on its own. --- Objects/setobject.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 05b672fbec..017fcd88db 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -61,9 +61,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) size_t mask = so->mask; size_t i = (size_t)hash; /* Unsigned for defined overflow behavior. */ int cmp; -#if LINEAR_PROBES size_t j; -#endif entry = &table[i & mask]; if (entry->key == NULL) @@ -87,7 +85,6 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; -#if LINEAR_PROBES for (j = 1 ; j <= LINEAR_PROBES ; j++) { entry = &table[(i + j) & mask]; if (entry->key == NULL) @@ -109,7 +106,6 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; } -#endif perturb >>= PERTURB_SHIFT; i = i * 5 + 1 + perturb; @@ -136,9 +132,7 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) size_t perturb = hash; size_t mask = so->mask; size_t i = (size_t)hash; -#if LINEAR_PROBES size_t j; -#endif /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass @@ -162,7 +156,6 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; -#if LINEAR_PROBES for (j = 1 ; j <= LINEAR_PROBES ; j++) { entry = &table[(i + j) & mask]; if (entry->key == NULL) @@ -175,7 +168,6 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; } -#endif perturb >>= PERTURB_SHIFT; i = i * 5 + 1 + perturb; @@ -204,21 +196,17 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) size_t perturb = hash; size_t mask = (size_t)so->mask; size_t i = (size_t)hash; -#if LINEAR_PROBES size_t j; -#endif while (1) { entry = &table[i & mask]; if (entry->key == NULL) goto found_null; -#if LINEAR_PROBES for (j = 1 ; j <= LINEAR_PROBES ; j++) { entry = &table[(i + j) & mask]; if (entry->key == NULL) goto found_null; } -#endif perturb >>= PERTURB_SHIFT; i = i * 5 + 1 + perturb; } -- cgit v1.2.1 From 040c04ea234c09a8acd780f1ae5aab3b25a9037d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 21 Sep 2013 15:39:49 -0700 Subject: Minor beautification. Put updates and declarations in a more logical order. --- Objects/setobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 017fcd88db..22d9cb35cb 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -60,8 +60,8 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) size_t perturb = hash; size_t mask = so->mask; size_t i = (size_t)hash; /* Unsigned for defined overflow behavior. */ - int cmp; size_t j; + int cmp; entry = &table[i & mask]; if (entry->key == NULL) @@ -211,9 +211,9 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) i = i * 5 + 1 + perturb; } found_null: - so->fill++; entry->key = key; entry->hash = hash; + so->fill++; so->used++; } -- cgit v1.2.1 From 8988841db43c68214c76c0a03ce8e538b1a60b14 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 21 Sep 2013 20:17:31 -0700 Subject: Note that LINEAR_PROBES can be set to zero. --- Objects/setobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/setobject.c b/Objects/setobject.c index 22d9cb35cb..adc99da627 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -43,7 +43,7 @@ PyObject *_PySet_Dummy = dummy; /* ======================================================================== */ /* ======= Begin logic for probing the hash table ========================= */ -/* This should be >= PySet_MINSIZE - 1 */ +/* Set this to zero to turn-off linear probing */ #ifndef LINEAR_PROBES #define LINEAR_PROBES 9 #endif -- cgit v1.2.1 From 772eae5d61decf01ccb2de810635e97a57a6f6de Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Wed, 25 Sep 2013 09:04:23 +0200 Subject: Fix minor typo. --- Objects/obmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 2ab66a33b9..4437bf9a65 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1702,7 +1702,7 @@ pool_is_in_list(const poolp target, poolp list) p[0: S] Number of bytes originally asked for. This is a size_t, big-endian (easier to read in a memory dump). -p[S} +p[S] API ID. See PEP 445. This is a character, but seems undocumented. p[S+1: 2*S] Copies of FORBIDDENBYTE. Used to catch under- writes and reads. -- cgit v1.2.1 From 75a8814a123a1be57a57ddfd5d28e027a2a6b2b0 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sun, 29 Sep 2013 00:28:55 +1000 Subject: Close #18596: Support address sanity checking in clang/GCC This patch appropriately marks known false alarms in the small object allocator when address sanity checking is enabled (patch contributed by Dhiru Kholia). --- Objects/obmalloc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'Objects') diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 4437bf9a65..f7b3e491ca 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -12,6 +12,24 @@ static void _PyObject_DebugDumpAddress(const void *p); static void _PyMem_DebugCheckAddress(char api_id, const void *p); #endif +#if defined(__has_feature) /* Clang */ + #if __has_feature(address_sanitizer) /* is ASAN enabled? */ + #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \ + __attribute__((no_address_safety_analysis)) \ + __attribute__ ((noinline)) + #else + #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS + #endif +#else + #if defined(__SANITIZE_ADDRESS__) /* GCC 4.8.x, is ASAN enabled? */ + #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \ + __attribute__((no_address_safety_analysis)) \ + __attribute__ ((noinline)) + #else + #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS + #endif +#endif + #ifdef WITH_PYMALLOC #ifdef MS_WINDOWS @@ -1300,6 +1318,7 @@ redirect: /* free */ +ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS static void _PyObject_Free(void *ctx, void *p) { @@ -1528,6 +1547,7 @@ redirect: * return a non-NULL result. */ +ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS static void * _PyObject_Realloc(void *ctx, void *p, size_t nbytes) { -- cgit v1.2.1 From ce17139e415e3ca37cf0125793c95e659596d65d Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Wed, 2 Oct 2013 22:06:54 +1000 Subject: Close #19078: memoryview now supports reversed Patch by Claudiu Popa --- Objects/memoryobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index abd069b324..e57af1832f 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -2402,7 +2402,7 @@ static PyMappingMethods memory_as_mapping = { /* As sequence */ static PySequenceMethods memory_as_sequence = { - 0, /* sq_length */ + (lenfunc)memory_length, /* sq_length */ 0, /* sq_concat */ 0, /* sq_repeat */ (ssizeargfunc)memory_item, /* sq_item */ -- cgit v1.2.1 From 07c0c2dac0e9a8f50472d0645713c07f3a0e8c8d Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 5 Oct 2013 21:12:18 +0200 Subject: Issue #19087: Improve bytearray allocation in order to allow cheap popping of data at the front (slice deletion). --- Objects/bytearrayobject.c | 308 ++++++++++++++++++++++++---------------------- 1 file changed, 163 insertions(+), 145 deletions(-) (limited to 'Objects') diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 6ea654e4b6..c0f0819c12 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -150,6 +150,7 @@ PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size) } Py_SIZE(new) = size; new->ob_alloc = alloc; + new->ob_start = new->ob_bytes; new->ob_exports = 0; return (PyObject *)new; @@ -177,48 +178,70 @@ int PyByteArray_Resize(PyObject *self, Py_ssize_t size) { void *sval; - Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc; + PyByteArrayObject *obj = ((PyByteArrayObject *)self); + Py_ssize_t alloc = obj->ob_alloc; + Py_ssize_t logical_offset = obj->ob_start - obj->ob_bytes; assert(self != NULL); assert(PyByteArray_Check(self)); assert(size >= 0); + assert(logical_offset >= 0); + assert(logical_offset <= alloc); if (size == Py_SIZE(self)) { return 0; } - if (!_canresize((PyByteArrayObject *)self)) { + if (!_canresize(obj)) { return -1; } - if (size < alloc / 2) { - /* Major downsize; resize down to exact size */ - alloc = size + 1; - } - else if (size < alloc) { - /* Within allocated size; quick exit */ - Py_SIZE(self) = size; - ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */ - return 0; - } - else if (size <= alloc * 1.125) { - /* Moderate upsize; overallocate similar to list_resize() */ - alloc = size + (size >> 3) + (size < 9 ? 3 : 6); + if (size + logical_offset + 1 < alloc) { + /* Current buffer is large enough to host the requested size, + decide on a strategy. */ + if (size < alloc / 2) { + /* Major downsize; resize down to exact size */ + alloc = size + 1; + } + else { + /* Minor downsize; quick exit */ + Py_SIZE(self) = size; + PyByteArray_AS_STRING(self)[size] = '\0'; /* Trailing null */ + return 0; + } } else { - /* Major upsize; resize up to exact size */ - alloc = size + 1; + /* Need growing, decide on a strategy */ + if (size <= alloc * 1.125) { + /* Moderate upsize; overallocate similar to list_resize() */ + alloc = size + (size >> 3) + (size < 9 ? 3 : 6); + } + else { + /* Major upsize; resize up to exact size */ + alloc = size + 1; + } } - sval = PyObject_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc); - if (sval == NULL) { - PyErr_NoMemory(); - return -1; + if (logical_offset > 0) { + sval = PyObject_Malloc(alloc); + if (sval == NULL) { + PyErr_NoMemory(); + return -1; + } + memcpy(sval, PyByteArray_AS_STRING(self), Py_MIN(size, Py_SIZE(self))); + PyObject_Free(obj->ob_bytes); + } + else { + sval = PyObject_Realloc(obj->ob_bytes, alloc); + if (sval == NULL) { + PyErr_NoMemory(); + return -1; + } } - ((PyByteArrayObject *)self)->ob_bytes = sval; + obj->ob_bytes = obj->ob_start = sval; Py_SIZE(self) = size; - ((PyByteArrayObject *)self)->ob_alloc = alloc; - ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */ + obj->ob_alloc = alloc; + obj->ob_bytes[size] = '\0'; /* Trailing null byte */ return 0; } @@ -288,13 +311,13 @@ bytearray_iconcat(PyByteArrayObject *self, PyObject *other) } if (size < self->ob_alloc) { Py_SIZE(self) = size; - self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */ + PyByteArray_AS_STRING(self)[Py_SIZE(self)] = '\0'; /* Trailing null byte */ } else if (PyByteArray_Resize((PyObject *)self, size) < 0) { PyBuffer_Release(&vo); return NULL; } - memcpy(self->ob_bytes + mysize, vo.buf, vo.len); + memcpy(PyByteArray_AS_STRING(self) + mysize, vo.buf, vo.len); PyBuffer_Release(&vo); Py_INCREF(self); return (PyObject *)self; @@ -331,6 +354,7 @@ bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count) { Py_ssize_t mysize; Py_ssize_t size; + char *buf; if (count < 0) count = 0; @@ -338,19 +362,16 @@ bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count) if (count > 0 && mysize > PY_SSIZE_T_MAX / count) return PyErr_NoMemory(); size = mysize * count; - if (size < self->ob_alloc) { - Py_SIZE(self) = size; - self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */ - } - else if (PyByteArray_Resize((PyObject *)self, size) < 0) + if (PyByteArray_Resize((PyObject *)self, size) < 0) return NULL; + buf = PyByteArray_AS_STRING(self); if (mysize == 1) - memset(self->ob_bytes, self->ob_bytes[0], size); + memset(buf, buf[0], size); else { Py_ssize_t i; for (i = 1; i < count; i++) - memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize); + memcpy(buf + i*mysize, buf, mysize); } Py_INCREF(self); @@ -366,7 +387,7 @@ bytearray_getitem(PyByteArrayObject *self, Py_ssize_t i) PyErr_SetString(PyExc_IndexError, "bytearray index out of range"); return NULL; } - return PyLong_FromLong((unsigned char)(self->ob_bytes[i])); + return PyLong_FromLong((unsigned char)(PyByteArray_AS_STRING(self)[i])); } static PyObject * @@ -385,7 +406,7 @@ bytearray_subscript(PyByteArrayObject *self, PyObject *index) PyErr_SetString(PyExc_IndexError, "bytearray index out of range"); return NULL; } - return PyLong_FromLong((unsigned char)(self->ob_bytes[i])); + return PyLong_FromLong((unsigned char)(PyByteArray_AS_STRING(self)[i])); } else if (PySlice_Check(index)) { Py_ssize_t start, stop, step, slicelength, cur, i; @@ -398,8 +419,8 @@ bytearray_subscript(PyByteArrayObject *self, PyObject *index) if (slicelength <= 0) return PyByteArray_FromStringAndSize("", 0); else if (step == 1) { - return PyByteArray_FromStringAndSize(self->ob_bytes + start, - slicelength); + return PyByteArray_FromStringAndSize( + PyByteArray_AS_STRING(self) + start, slicelength); } else { char *source_buf = PyByteArray_AS_STRING(self); @@ -424,11 +445,69 @@ bytearray_subscript(PyByteArrayObject *self, PyObject *index) } } +static int +bytearray_setslice_linear(PyByteArrayObject *self, + Py_ssize_t lo, Py_ssize_t hi, + char *bytes, Py_ssize_t bytes_len) +{ + Py_ssize_t avail = hi - lo; + char *buf = PyByteArray_AS_STRING(self); + Py_ssize_t growth = bytes_len - avail; + assert(avail >= 0); + + if (growth != 0) { + if (growth < 0) { + if (!_canresize(self)) + return -1; + if (lo == 0) { + /* Shrink the buffer by advancing its logical start */ + self->ob_start -= growth; + /* + 0 lo hi old_size + | |<----avail----->|<-----tail------>| + | |<-bytes_len->|<-----tail------>| + 0 new_lo new_hi new_size + */ + } + else { + /* + 0 lo hi old_size + | |<----avail----->|<-----tomove------>| + | |<-bytes_len->|<-----tomove------>| + 0 lo new_hi new_size + */ + memmove(buf + lo + bytes_len, buf + hi, + Py_SIZE(self) - hi); + } + } + /* XXX(nnorwitz): need to verify this can't overflow! */ + if (PyByteArray_Resize( + (PyObject *)self, Py_SIZE(self) + growth) < 0) + return -1; + buf = PyByteArray_AS_STRING(self); + if (growth > 0) { + /* Make the place for the additional bytes */ + /* + 0 lo hi old_size + | |<-avail->|<-----tomove------>| + | |<---bytes_len-->|<-----tomove------>| + 0 lo new_hi new_size + */ + memmove(buf + lo + bytes_len, buf + hi, + Py_SIZE(self) - lo - bytes_len); + } + } + + if (bytes_len > 0) + memcpy(buf + lo, bytes, bytes_len); + return 0; +} + static int bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi, PyObject *values) { - Py_ssize_t avail, needed; + Py_ssize_t needed; void *bytes; Py_buffer vbytes; int res = 0; @@ -467,50 +546,9 @@ bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi, if (hi > Py_SIZE(self)) hi = Py_SIZE(self); - avail = hi - lo; - if (avail < 0) - lo = hi = avail = 0; - - if (avail != needed) { - if (avail > needed) { - if (!_canresize(self)) { - res = -1; - goto finish; - } - /* - 0 lo hi old_size - | |<----avail----->|<-----tomove------>| - | |<-needed->|<-----tomove------>| - 0 lo new_hi new_size - */ - memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi, - Py_SIZE(self) - hi); - } - /* XXX(nnorwitz): need to verify this can't overflow! */ - if (PyByteArray_Resize((PyObject *)self, - Py_SIZE(self) + needed - avail) < 0) { - res = -1; - goto finish; - } - if (avail < needed) { - /* - 0 lo hi old_size - | |<-avail->|<-----tomove------>| - | |<----needed---->|<-----tomove------>| - 0 lo new_hi new_size - */ - memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi, - Py_SIZE(self) - lo - needed); - } - } - - if (needed > 0) - memcpy(self->ob_bytes + lo, bytes, needed); - - - finish: + res = bytearray_setslice_linear(self, lo, hi, bytes, needed); if (vbytes.len != -1) - PyBuffer_Release(&vbytes); + PyBuffer_Release(&vbytes); return res; } @@ -533,7 +571,7 @@ bytearray_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value) if (!_getbytevalue(value, &ival)) return -1; - self->ob_bytes[i] = ival; + PyByteArray_AS_STRING(self)[i] = ival; return 0; } @@ -541,7 +579,8 @@ static int bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values) { Py_ssize_t start, stop, step, slicelen, needed; - char *bytes; + char *buf, *bytes; + buf = PyByteArray_AS_STRING(self); if (PyIndex_Check(index)) { Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError); @@ -568,7 +607,7 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu int ival; if (!_getbytevalue(values, &ival)) return -1; - self->ob_bytes[i] = (char)ival; + buf[i] = (char)ival; return 0; } } @@ -606,7 +645,7 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu } else { assert(PyByteArray_Check(values)); - bytes = ((PyByteArrayObject *)values)->ob_bytes; + bytes = PyByteArray_AS_STRING(values); needed = Py_SIZE(values); } /* Make sure b[5:2] = ... inserts before 5, not before 2. */ @@ -614,38 +653,7 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu (step > 0 && start > stop)) stop = start; if (step == 1) { - if (slicelen != needed) { - if (!_canresize(self)) - return -1; - if (slicelen > needed) { - /* - 0 start stop old_size - | |<---slicelen--->|<-----tomove------>| - | |<-needed->|<-----tomove------>| - 0 lo new_hi new_size - */ - memmove(self->ob_bytes + start + needed, self->ob_bytes + stop, - Py_SIZE(self) - stop); - } - if (PyByteArray_Resize((PyObject *)self, - Py_SIZE(self) + needed - slicelen) < 0) - return -1; - if (slicelen < needed) { - /* - 0 lo hi old_size - | |<-avail->|<-----tomove------>| - | |<----needed---->|<-----tomove------>| - 0 lo new_hi new_size - */ - memmove(self->ob_bytes + start + needed, self->ob_bytes + stop, - Py_SIZE(self) - start - needed); - } - } - - if (needed > 0) - memcpy(self->ob_bytes + start, bytes, needed); - - return 0; + return bytearray_setslice_linear(self, start, stop, bytes, needed); } else { if (needed == 0) { @@ -672,14 +680,14 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu if (cur + step >= (size_t)PyByteArray_GET_SIZE(self)) lim = PyByteArray_GET_SIZE(self) - cur - 1; - memmove(self->ob_bytes + cur - i, - self->ob_bytes + cur + 1, lim); + memmove(buf + cur - i, + buf + cur + 1, lim); } /* Move the tail of the bytes, in one chunk */ cur = start + (size_t)slicelen*step; if (cur < (size_t)PyByteArray_GET_SIZE(self)) { - memmove(self->ob_bytes + cur - slicelen, - self->ob_bytes + cur, + memmove(buf + cur - slicelen, + buf + cur, PyByteArray_GET_SIZE(self) - cur); } if (PyByteArray_Resize((PyObject *)self, @@ -701,7 +709,7 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu return -1; } for (cur = start, i = 0; i < slicelen; cur += step, i++) - self->ob_bytes[cur] = bytes[i]; + buf[cur] = bytes[i]; return 0; } } @@ -781,7 +789,7 @@ bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds) if (count > 0) { if (PyByteArray_Resize((PyObject *)self, count)) return -1; - memset(self->ob_bytes, 0, count); + memset(PyByteArray_AS_STRING(self), 0, count); } return 0; } @@ -794,7 +802,8 @@ bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds) return -1; size = view.len; if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail; - if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0) + if (PyBuffer_ToContiguous(PyByteArray_AS_STRING(self), + &view, size, 'C') < 0) goto fail; PyBuffer_Release(&view); return 0; @@ -838,7 +847,7 @@ bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds) Py_SIZE(self)++; else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0) goto error; - self->ob_bytes[Py_SIZE(self)-1] = value; + PyByteArray_AS_STRING(self)[Py_SIZE(self)-1] = value; } /* Clean up and return success */ @@ -863,6 +872,7 @@ bytearray_repr(PyByteArrayObject *self) size_t newsize; PyObject *v; Py_ssize_t i; + char *bytes; char c; char *p; int quote; @@ -899,11 +909,12 @@ bytearray_repr(PyByteArrayObject *self) *p++ = *quote_prefix++; *p++ = quote; + bytes = PyByteArray_AS_STRING(self); for (i = 0; i < length; i++) { /* There's at least enough room for a hex escape and a closing quote. */ assert(newsize - (p - buffer) >= 5); - c = self->ob_bytes[i]; + c = bytes[i]; if (c == '\'' || c == '\\') *p++ = '\\', *p++ = c; else if (c == '\t') @@ -2194,7 +2205,7 @@ bytearray_reverse(PyByteArrayObject *self, PyObject *unused) Py_ssize_t i, j, n = Py_SIZE(self); j = n / 2; - head = self->ob_bytes; + head = PyByteArray_AS_STRING(self); tail = head + n - 1; for (i = 0; i < j; i++) { swap = *head; @@ -2215,6 +2226,7 @@ bytearray_insert(PyByteArrayObject *self, PyObject *args) PyObject *value; int ival; Py_ssize_t where, n = Py_SIZE(self); + char *buf; if (!PyArg_ParseTuple(args, "nO:insert", &where, &value)) return NULL; @@ -2228,6 +2240,7 @@ bytearray_insert(PyByteArrayObject *self, PyObject *args) return NULL; if (PyByteArray_Resize((PyObject *)self, n + 1) < 0) return NULL; + buf = PyByteArray_AS_STRING(self); if (where < 0) { where += n; @@ -2236,8 +2249,8 @@ bytearray_insert(PyByteArrayObject *self, PyObject *args) } if (where > n) where = n; - memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where); - self->ob_bytes[where] = ival; + memmove(buf + where + 1, buf + where, n - where); + buf[where] = ival; Py_RETURN_NONE; } @@ -2262,7 +2275,7 @@ bytearray_append(PyByteArrayObject *self, PyObject *arg) if (PyByteArray_Resize((PyObject *)self, n + 1) < 0) return NULL; - self->ob_bytes[n] = value; + PyByteArray_AS_STRING(self)[n] = value; Py_RETURN_NONE; } @@ -2355,6 +2368,7 @@ bytearray_pop(PyByteArrayObject *self, PyObject *args) { int value; Py_ssize_t where = -1, n = Py_SIZE(self); + char *buf; if (!PyArg_ParseTuple(args, "|n:pop", &where)) return NULL; @@ -2373,8 +2387,9 @@ bytearray_pop(PyByteArrayObject *self, PyObject *args) if (!_canresize(self)) return NULL; - value = self->ob_bytes[where]; - memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where); + buf = PyByteArray_AS_STRING(self); + value = buf[where]; + memmove(buf + where, buf + where + 1, n - where); if (PyByteArray_Resize((PyObject *)self, n - 1) < 0) return NULL; @@ -2390,12 +2405,13 @@ bytearray_remove(PyByteArrayObject *self, PyObject *arg) { int value; Py_ssize_t where, n = Py_SIZE(self); + char *buf = PyByteArray_AS_STRING(self); if (! _getbytevalue(arg, &value)) return NULL; for (where = 0; where < n; where++) { - if (self->ob_bytes[where] == value) + if (buf[where] == value) break; } if (where == n) { @@ -2405,7 +2421,7 @@ bytearray_remove(PyByteArrayObject *self, PyObject *arg) if (!_canresize(self)) return NULL; - memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where); + memmove(buf + where, buf + where + 1, n - where); if (PyByteArray_Resize((PyObject *)self, n - 1) < 0) return NULL; @@ -2459,7 +2475,7 @@ bytearray_strip(PyByteArrayObject *self, PyObject *args) argptr = varg.buf; argsize = varg.len; } - myptr = self->ob_bytes; + myptr = PyByteArray_AS_STRING(self); mysize = Py_SIZE(self); left = lstrip_helper(myptr, mysize, argptr, argsize); if (left == mysize) @@ -2468,7 +2484,7 @@ bytearray_strip(PyByteArrayObject *self, PyObject *args) right = rstrip_helper(myptr, mysize, argptr, argsize); if (arg != Py_None) PyBuffer_Release(&varg); - return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left); + return PyByteArray_FromStringAndSize(myptr + left, right - left); } PyDoc_STRVAR(lstrip__doc__, @@ -2496,13 +2512,13 @@ bytearray_lstrip(PyByteArrayObject *self, PyObject *args) argptr = varg.buf; argsize = varg.len; } - myptr = self->ob_bytes; + myptr = PyByteArray_AS_STRING(self); mysize = Py_SIZE(self); left = lstrip_helper(myptr, mysize, argptr, argsize); right = mysize; if (arg != Py_None) PyBuffer_Release(&varg); - return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left); + return PyByteArray_FromStringAndSize(myptr + left, right - left); } PyDoc_STRVAR(rstrip__doc__, @@ -2530,12 +2546,12 @@ bytearray_rstrip(PyByteArrayObject *self, PyObject *args) argptr = varg.buf; argsize = varg.len; } - myptr = self->ob_bytes; + myptr = PyByteArray_AS_STRING(self); mysize = Py_SIZE(self); right = rstrip_helper(myptr, mysize, argptr, argsize); if (arg != Py_None) PyBuffer_Release(&varg); - return PyByteArray_FromStringAndSize(self->ob_bytes, right); + return PyByteArray_FromStringAndSize(myptr, right); } PyDoc_STRVAR(decode_doc, @@ -2686,6 +2702,7 @@ _common_reduce(PyByteArrayObject *self, int proto) { PyObject *dict; _Py_IDENTIFIER(__dict__); + char *buf; dict = _PyObject_GetAttrId((PyObject *)self, &PyId___dict__); if (dict == NULL) { @@ -2694,19 +2711,20 @@ _common_reduce(PyByteArrayObject *self, int proto) Py_INCREF(dict); } + buf = PyByteArray_AS_STRING(self); if (proto < 3) { /* use str based reduction for backwards compatibility with Python 2.x */ PyObject *latin1; - if (self->ob_bytes) - latin1 = PyUnicode_DecodeLatin1(self->ob_bytes, Py_SIZE(self), NULL); + if (Py_SIZE(self)) + latin1 = PyUnicode_DecodeLatin1(buf, Py_SIZE(self), NULL); else latin1 = PyUnicode_FromString(""); return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict); } else { /* use more efficient byte based reduction */ - if (self->ob_bytes) { - return Py_BuildValue("(O(y#)N)", Py_TYPE(self), self->ob_bytes, Py_SIZE(self), dict); + if (Py_SIZE(self)) { + return Py_BuildValue("(O(y#)N)", Py_TYPE(self), buf, Py_SIZE(self), dict); } else { return Py_BuildValue("(O()N)", Py_TYPE(self), dict); @@ -2938,7 +2956,7 @@ bytearrayiter_next(bytesiterobject *it) if (it->it_index < PyByteArray_GET_SIZE(seq)) { item = PyLong_FromLong( - (unsigned char)seq->ob_bytes[it->it_index]); + (unsigned char)PyByteArray_AS_STRING(seq)[it->it_index]); if (item != NULL) ++it->it_index; return item; -- cgit v1.2.1 From 1d56b3c8f2f9b2460d2bc62d12948c61f4e301a8 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 5 Oct 2013 21:24:10 +0200 Subject: Hopefully fix Windows compilation error following 499a96611baa --- Objects/bytearrayobject.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'Objects') diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index c0f0819c12..2358e05d25 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2431,21 +2431,21 @@ bytearray_remove(PyByteArrayObject *self, PyObject *arg) /* XXX These two helpers could be optimized if argsize == 1 */ static Py_ssize_t -lstrip_helper(unsigned char *myptr, Py_ssize_t mysize, +lstrip_helper(char *myptr, Py_ssize_t mysize, void *argptr, Py_ssize_t argsize) { Py_ssize_t i = 0; - while (i < mysize && memchr(argptr, myptr[i], argsize)) + while (i < mysize && memchr(argptr, (unsigned char) myptr[i], argsize)) i++; return i; } static Py_ssize_t -rstrip_helper(unsigned char *myptr, Py_ssize_t mysize, +rstrip_helper(char *myptr, Py_ssize_t mysize, void *argptr, Py_ssize_t argsize) { Py_ssize_t i = mysize - 1; - while (i >= 0 && memchr(argptr, myptr[i], argsize)) + while (i >= 0 && memchr(argptr, (unsigned char) myptr[i], argsize)) i--; return i + 1; } @@ -2460,7 +2460,7 @@ static PyObject * bytearray_strip(PyByteArrayObject *self, PyObject *args) { Py_ssize_t left, right, mysize, argsize; - void *myptr, *argptr; + char *myptr, *argptr; PyObject *arg = Py_None; Py_buffer varg; if (!PyArg_ParseTuple(args, "|O:strip", &arg)) @@ -2472,7 +2472,7 @@ bytearray_strip(PyByteArrayObject *self, PyObject *args) else { if (_getbuffer(arg, &varg) < 0) return NULL; - argptr = varg.buf; + argptr = (char *) varg.buf; argsize = varg.len; } myptr = PyByteArray_AS_STRING(self); @@ -2497,7 +2497,7 @@ static PyObject * bytearray_lstrip(PyByteArrayObject *self, PyObject *args) { Py_ssize_t left, right, mysize, argsize; - void *myptr, *argptr; + char *myptr, *argptr; PyObject *arg = Py_None; Py_buffer varg; if (!PyArg_ParseTuple(args, "|O:lstrip", &arg)) @@ -2509,7 +2509,7 @@ bytearray_lstrip(PyByteArrayObject *self, PyObject *args) else { if (_getbuffer(arg, &varg) < 0) return NULL; - argptr = varg.buf; + argptr = (char *) varg.buf; argsize = varg.len; } myptr = PyByteArray_AS_STRING(self); @@ -2531,7 +2531,7 @@ static PyObject * bytearray_rstrip(PyByteArrayObject *self, PyObject *args) { Py_ssize_t right, mysize, argsize; - void *myptr, *argptr; + char *myptr, *argptr; PyObject *arg = Py_None; Py_buffer varg; if (!PyArg_ParseTuple(args, "|O:rstrip", &arg)) @@ -2543,7 +2543,7 @@ bytearray_rstrip(PyByteArrayObject *self, PyObject *args) else { if (_getbuffer(arg, &varg) < 0) return NULL; - argptr = varg.buf; + argptr = (char *) varg.buf; argsize = varg.len; } myptr = PyByteArray_AS_STRING(self); -- cgit v1.2.1 From 4bc6a340b1946d4947ad76d11dbd922529898968 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2013 15:55:14 +0200 Subject: Issue #18874: PyCode_New() now ensures that the filename is a ready Unicode string. This change does nothing is most cases, but it is useful on Windows in some cases. --- Objects/codeobject.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Objects') diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 9713f61b24..353f414a38 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -74,6 +74,11 @@ PyCode_New(int argcount, int kwonlyargcount, PyErr_BadInternalCall(); return NULL; } + + /* Ensure that the filename is a ready Unicode string */ + if (PyUnicode_READY(filename) < 0) + return NULL; + n_cellvars = PyTuple_GET_SIZE(cellvars); intern_strings(names); intern_strings(varnames); -- cgit v1.2.1 From 8737be3b0719a50db07ecb29bf7e87f63a151691 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2013 15:58:42 +0200 Subject: Issue #18874: _PyObject_Malloc/Realloc/Free() now falls back on _PyMem_RawMalloc/Realloc/Free, instead of _PyMem_Malloc/Realloc/Free. So it becomes possible to use the fast pymalloc allocator for the PYMEM_DOMAIN_MEM domain (PyMem_Malloc/Realloc/Free functions). --- Objects/obmalloc.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'Objects') diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index f7b3e491ca..004cfaac61 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -125,10 +125,11 @@ _PyObject_ArenaFree(void *ctx, void *ptr, size_t size) #define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawRealloc, _PyMem_RawFree #ifdef WITH_PYMALLOC -#define PYOBJECT_FUNCS _PyObject_Malloc, _PyObject_Realloc, _PyObject_Free +# define PYOBJ_FUNCS _PyObject_Malloc, _PyObject_Realloc, _PyObject_Free #else -#define PYOBJECT_FUNCS PYRAW_FUNCS +# define PYOBJ_FUNCS PYRAW_FUNCS #endif +#define PYMEM_FUNCS PYRAW_FUNCS #ifdef PYMALLOC_DEBUG typedef struct { @@ -142,16 +143,16 @@ static struct { debug_alloc_api_t obj; } _PyMem_Debug = { {'r', {NULL, PYRAW_FUNCS}}, - {'m', {NULL, PYRAW_FUNCS}}, - {'o', {NULL, PYOBJECT_FUNCS}} + {'m', {NULL, PYMEM_FUNCS}}, + {'o', {NULL, PYOBJ_FUNCS}} }; -#define PYDEBUG_FUNCS _PyMem_DebugMalloc, _PyMem_DebugRealloc, _PyMem_DebugFree +#define PYDBG_FUNCS _PyMem_DebugMalloc, _PyMem_DebugRealloc, _PyMem_DebugFree #endif static PyMemAllocator _PyMem_Raw = { #ifdef PYMALLOC_DEBUG - &_PyMem_Debug.raw, PYDEBUG_FUNCS + &_PyMem_Debug.raw, PYDBG_FUNCS #else NULL, PYRAW_FUNCS #endif @@ -159,23 +160,24 @@ static PyMemAllocator _PyMem_Raw = { static PyMemAllocator _PyMem = { #ifdef PYMALLOC_DEBUG - &_PyMem_Debug.mem, PYDEBUG_FUNCS + &_PyMem_Debug.mem, PYDBG_FUNCS #else - NULL, PYRAW_FUNCS + NULL, PYMEM_FUNCS #endif }; static PyMemAllocator _PyObject = { #ifdef PYMALLOC_DEBUG - &_PyMem_Debug.obj, PYDEBUG_FUNCS + &_PyMem_Debug.obj, PYDBG_FUNCS #else - NULL, PYOBJECT_FUNCS + NULL, PYOBJ_FUNCS #endif }; #undef PYRAW_FUNCS -#undef PYOBJECT_FUNCS -#undef PYDEBUG_FUNCS +#undef PYMEM_FUNCS +#undef PYOBJ_FUNCS +#undef PYDBG_FUNCS static PyObjectArenaAllocator _PyObject_Arena = {NULL, #ifdef MS_WINDOWS @@ -924,7 +926,7 @@ new_arena(void) return NULL; /* overflow */ #endif nbytes = numarenas * sizeof(*arenas); - arenaobj = (struct arena_object *)PyMem_Realloc(arenas, nbytes); + arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes); if (arenaobj == NULL) return NULL; arenas = arenaobj; @@ -1309,7 +1311,7 @@ redirect: * has been reached. */ { - void *result = PyMem_Malloc(nbytes); + void *result = PyMem_RawMalloc(nbytes); if (!result) _Py_AllocatedBlocks--; return result; @@ -1539,7 +1541,7 @@ _PyObject_Free(void *ctx, void *p) redirect: #endif /* We didn't allocate this address. */ - PyMem_Free(p); + PyMem_RawFree(p); } /* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0, @@ -1608,14 +1610,14 @@ _PyObject_Realloc(void *ctx, void *p, size_t nbytes) * at p. Instead we punt: let C continue to manage this block. */ if (nbytes) - return PyMem_Realloc(p, nbytes); + return PyMem_RawRealloc(p, nbytes); /* C doesn't define the result of realloc(p, 0) (it may or may not * return NULL then), but Python's docs promise that nbytes==0 never * returns NULL. We don't pass 0 to realloc(), to avoid that endcase * to begin with. Even then, we can't be sure that realloc() won't * return NULL. */ - bp = PyMem_Realloc(p, 1); + bp = PyMem_RawRealloc(p, 1); return bp ? bp : p; } -- cgit v1.2.1 From 0be244fd304f3f6a24bde0c5ac54e2cbac67adea Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Thu, 10 Oct 2013 17:24:45 -0400 Subject: upgrade unicode db to 6.3.0 (closes #19221) --- Objects/unicodetype_db.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodetype_db.h b/Objects/unicodetype_db.h index 57add8fac6..1fdc092a3d 100644 --- a/Objects/unicodetype_db.h +++ b/Objects/unicodetype_db.h @@ -1589,7 +1589,7 @@ static unsigned short index2[] = { 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 55, 55, 55, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 5, 0, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 25, 25, 25, 5, 21, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 96, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 7, 8, @@ -1801,7 +1801,7 @@ static unsigned short index2[] = { 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 96, 5, 5, 5, 5, 55, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 25, 25, 25, 2, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, + 25, 25, 25, 21, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 96, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, @@ -1828,7 +1828,7 @@ static unsigned short index2[] = { 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 132, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, - 55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 18, 0, 0, 5, 5, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 25, 0, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 25, 18, 25, @@ -1915,7 +1915,7 @@ static unsigned short index2[] = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 6, 3, 3, 21, 21, 21, 21, 21, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 21, - 21, 21, 21, 21, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 245, 95, 0, 0, + 21, 21, 21, 21, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 245, 95, 0, 0, 246, 247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 95, 245, 26, 22, 23, 246, 247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 0, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -2925,9 +2925,6 @@ static unsigned short index2[] = { double _PyUnicode_ToNumeric(Py_UCS4 ch) { switch (ch) { - case 0x12456: - case 0x12457: - return (double) -1.0; case 0x0F33: return (double) -1.0/2.0; case 0x0030: @@ -3383,6 +3380,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch) case 0x12435: case 0x1244A: case 0x12450: + case 0x12456: case 0x12459: case 0x1D361: case 0x1D7D0: @@ -3539,6 +3537,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch) case 0x1243B: case 0x1244B: case 0x12451: + case 0x12457: case 0x1D362: case 0x1D7D1: case 0x1D7DB: @@ -4294,7 +4293,6 @@ int _PyUnicode_IsWhitespace(const Py_UCS4 ch) case 0x0085: case 0x00A0: case 0x1680: - case 0x180E: case 0x2000: case 0x2001: case 0x2002: -- cgit v1.2.1 From f79808079953a455962c39025c1d18fd239388a9 Mon Sep 17 00:00:00 2001 From: Larry Hastings Date: Sat, 19 Oct 2013 00:09:25 -0700 Subject: Issue #16612: Add "Argument Clinic", a compile-time preprocessor for C files to generate argument parsing code. (See PEP 436.) --- Objects/dictobject.c | 30 ++++++++++++++---- Objects/unicodeobject.c | 81 ++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 88 insertions(+), 23 deletions(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index bbee1a61a3..2424176b71 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2160,9 +2160,31 @@ dict_richcompare(PyObject *v, PyObject *w, int op) return res; } +/*[clinic] +module dict + +@coexist +dict.__contains__ + + key: object + / + +True if D has a key k, else False" +[clinic]*/ + +PyDoc_STRVAR(dict___contains____doc__, +"True if D has a key k, else False\"\n" +"\n" +"dict.__contains__(key)"); + +#define DICT___CONTAINS___METHODDEF \ + {"__contains__", (PyCFunction)dict___contains__, METH_O|METH_COEXIST, dict___contains____doc__}, + static PyObject * -dict_contains(PyDictObject *mp, PyObject *key) +dict___contains__(PyObject *self, PyObject *key) +/*[clinic checksum: 61c5c802ea1d35699a1a754f1f3538ea9b259cf4]*/ { + register PyDictObject *mp = (PyDictObject *)self; Py_hash_t hash; PyDictKeyEntry *ep; PyObject **value_addr; @@ -2447,9 +2469,6 @@ _PyDict_KeysSize(PyDictKeysObject *keys) return sizeof(PyDictKeysObject) + (DK_SIZE(keys)-1) * sizeof(PyDictKeyEntry); } -PyDoc_STRVAR(contains__doc__, -"D.__contains__(k) -> True if D has a key k, else False"); - PyDoc_STRVAR(getitem__doc__, "x.__getitem__(y) <==> x[y]"); PyDoc_STRVAR(sizeof__doc__, @@ -2498,8 +2517,7 @@ PyDoc_STRVAR(values__doc__, "D.values() -> an object providing a view on D's values"); static PyMethodDef mapp_methods[] = { - {"__contains__",(PyCFunction)dict_contains, METH_O | METH_COEXIST, - contains__doc__}, + DICT___CONTAINS___METHODDEF {"__getitem__", (PyCFunction)dict_subscript, METH_O | METH_COEXIST, getitem__doc__}, {"__sizeof__", (PyCFunction)dict_sizeof, METH_NOARGS, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6dc583517c..5df4df68a5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12656,28 +12656,76 @@ unicode_swapcase(PyObject *self) return case_operation(self, do_swapcase); } -PyDoc_STRVAR(maketrans__doc__, - "str.maketrans(x[, y[, z]]) -> dict (static method)\n\ -\n\ -Return a translation table usable for str.translate().\n\ -If there is only one argument, it must be a dictionary mapping Unicode\n\ -ordinals (integers) or characters to Unicode ordinals, strings or None.\n\ -Character keys will be then converted to ordinals.\n\ -If there are two arguments, they must be strings of equal length, and\n\ -in the resulting dictionary, each character in x will be mapped to the\n\ -character at the same position in y. If there is a third argument, it\n\ -must be a string, whose characters will be mapped to None in the result."); +/*[clinic] +module str -static PyObject* +@staticmethod +str.maketrans as unicode_maketrans + + x: object + + y: unicode=NULL + + z: unicode=NULL + + / + +Return a translation table usable for str.translate(). + +If there is only one argument, it must be a dictionary mapping Unicode +ordinals (integers) or characters to Unicode ordinals, strings or None. +Character keys will be then converted to ordinals. +If there are two arguments, they must be strings of equal length, and +in the resulting dictionary, each character in x will be mapped to the +character at the same position in y. If there is a third argument, it +must be a string, whose characters will be mapped to None in the result. +[clinic]*/ + +PyDoc_STRVAR(unicode_maketrans__doc__, +"Return a translation table usable for str.translate().\n" +"\n" +"str.maketrans(x, y=None, z=None)\n" +"\n" +"If there is only one argument, it must be a dictionary mapping Unicode\n" +"ordinals (integers) or characters to Unicode ordinals, strings or None.\n" +"Character keys will be then converted to ordinals.\n" +"If there are two arguments, they must be strings of equal length, and\n" +"in the resulting dictionary, each character in x will be mapped to the\n" +"character at the same position in y. If there is a third argument, it\n" +"must be a string, whose characters will be mapped to None in the result."); + +#define UNICODE_MAKETRANS_METHODDEF \ + {"maketrans", (PyCFunction)unicode_maketrans, METH_VARARGS|METH_STATIC, unicode_maketrans__doc__}, + +static PyObject * +unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z); + +static PyObject * unicode_maketrans(PyObject *null, PyObject *args) { - PyObject *x, *y = NULL, *z = NULL; + PyObject *return_value = NULL; + PyObject *x; + PyObject *y = NULL; + PyObject *z = NULL; + + if (!PyArg_ParseTuple(args, + "O|UU:maketrans", + &x, &y, &z)) + goto exit; + return_value = unicode_maketrans_impl(x, y, z); + +exit: + return return_value; +} + +static PyObject * +unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z) +/*[clinic checksum: 137db9c3199e7906b7967009f511c24fa3235b5f]*/ +{ PyObject *new = NULL, *key, *value; Py_ssize_t i = 0; int res; - if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z)) - return NULL; new = PyDict_New(); if (!new) return NULL; @@ -13317,8 +13365,7 @@ static PyMethodDef unicode_methods[] = { {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, {"format_map", (PyCFunction) do_string_format_map, METH_O, format_map__doc__}, {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__}, - {"maketrans", (PyCFunction) unicode_maketrans, - METH_VARARGS | METH_STATIC, maketrans__doc__}, + UNICODE_MAKETRANS_METHODDEF {"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__}, #if 0 /* These methods are just used for debugging the implementation. */ -- cgit v1.2.1 From 6e75c19396402b107f1dfffa8746c6b8b679a9a4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 19 Oct 2013 21:03:34 +0300 Subject: Issue #1772673: The type of `char*` arguments now changed to `const char*`. --- Objects/abstract.c | 8 ++++---- Objects/bytesobject.c | 2 +- Objects/exceptions.c | 8 ++++---- Objects/fileobject.c | 4 ++-- Objects/longobject.c | 16 ++++++++-------- Objects/unicodeobject.c | 2 +- 6 files changed, 20 insertions(+), 20 deletions(-) (limited to 'Objects') diff --git a/Objects/abstract.c b/Objects/abstract.c index 078b4bcac7..d937892781 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -223,7 +223,7 @@ PyObject_DelItem(PyObject *o, PyObject *key) } int -PyObject_DelItemString(PyObject *o, char *key) +PyObject_DelItemString(PyObject *o, const char *key) { PyObject *okey; int ret; @@ -1950,7 +1950,7 @@ PyMapping_Length(PyObject *o) #define PyMapping_Length PyMapping_Size PyObject * -PyMapping_GetItemString(PyObject *o, char *key) +PyMapping_GetItemString(PyObject *o, const char *key) { PyObject *okey, *r; @@ -1966,7 +1966,7 @@ PyMapping_GetItemString(PyObject *o, char *key) } int -PyMapping_SetItemString(PyObject *o, char *key, PyObject *value) +PyMapping_SetItemString(PyObject *o, const char *key, PyObject *value) { PyObject *okey; int r; @@ -1985,7 +1985,7 @@ PyMapping_SetItemString(PyObject *o, char *key, PyObject *value) } int -PyMapping_HasKeyString(PyObject *o, char *key) +PyMapping_HasKeyString(PyObject *o, const char *key) { PyObject *v; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 3a2906cf3c..613269045b 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1347,7 +1347,7 @@ do_argstrip(PyBytesObject *self, int striptype, PyObject *args) { PyObject *sep = NULL; - if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep)) + if (!PyArg_ParseTuple(args, stripformat[striptype], &sep)) return NULL; if (sep != NULL && sep != Py_None) { diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 79bbb8f2ff..de5d746858 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -121,11 +121,11 @@ BaseException_str(PyBaseExceptionObject *self) static PyObject * BaseException_repr(PyBaseExceptionObject *self) { - char *name; - char *dot; + const char *name; + const char *dot; - name = (char *)Py_TYPE(self)->tp_name; - dot = strrchr(name, '.'); + name = Py_TYPE(self)->tp_name; + dot = (const char *) strrchr(name, '.'); if (dot != NULL) name = dot+1; return PyUnicode_FromFormat("%s%R", name, self->args); diff --git a/Objects/fileobject.c b/Objects/fileobject.c index f273b0b44b..596f909755 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -26,8 +26,8 @@ extern "C" { /* External C interface */ PyObject * -PyFile_FromFd(int fd, char *name, char *mode, int buffering, char *encoding, - char *errors, char *newline, int closefd) +PyFile_FromFd(int fd, const char *name, const char *mode, int buffering, const char *encoding, + const char *errors, const char *newline, int closefd) { PyObject *io, *stream; _Py_IDENTIFIER(open); diff --git a/Objects/longobject.c b/Objects/longobject.c index 876cd19e61..a5c0d1b33f 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1941,10 +1941,10 @@ unsigned char _PyLong_DigitValue[256] = { * string characters. */ static PyLongObject * -long_from_binary_base(char **str, int base) +long_from_binary_base(const char **str, int base) { - char *p = *str; - char *start = p; + const char *p = *str; + const char *start = p; int bits_per_char; Py_ssize_t n; PyLongObject *z; @@ -2009,10 +2009,10 @@ long_from_binary_base(char **str, int base) * If unsuccessful, NULL will be returned. */ PyObject * -PyLong_FromString(char *str, char **pend, int base) +PyLong_FromString(const char *str, char **pend, int base) { int sign = 1, error_if_nonzero = 0; - char *start, *orig_str = str; + const char *start, *orig_str = str; PyLongObject *z = NULL; PyObject *strobj; Py_ssize_t slen; @@ -2147,7 +2147,7 @@ digit beyond the first. int convwidth; twodigits convmultmax, convmult; digit *pz, *pzstop; - char* scan; + const char* scan; static double log_base_BASE[37] = {0.0e0,}; static int convwidth_base[37] = {0,}; @@ -2275,12 +2275,12 @@ digit beyond the first. if (z == NULL) return NULL; if (pend != NULL) - *pend = str; + *pend = (char *)str; return (PyObject *) z; onError: if (pend != NULL) - *pend = str; + *pend = (char *)str; Py_XDECREF(z); slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200; strobj = PyUnicode_FromStringAndSize(orig_str, slen); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b9e8e1e003..ff806cf1a1 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11874,7 +11874,7 @@ do_argstrip(PyObject *self, int striptype, PyObject *args) { PyObject *sep = NULL; - if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep)) + if (!PyArg_ParseTuple(args, stripformat[striptype], &sep)) return NULL; if (sep != NULL && sep != Py_None) { -- cgit v1.2.1 From 5f42d8cfe8f7e6db342147f4f3e83a6268adf4e9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 24 Oct 2013 23:19:51 +0300 Subject: Issue #19369: Optimized the usage of __length_hint__(). --- Objects/abstract.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'Objects') diff --git a/Objects/abstract.c b/Objects/abstract.c index d937892781..6c7a6cd226 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -82,15 +82,17 @@ PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue) PyObject *hint, *result; Py_ssize_t res; _Py_IDENTIFIER(__length_hint__); - res = PyObject_Length(o); - if (res < 0 && PyErr_Occurred()) { - if (!PyErr_ExceptionMatches(PyExc_TypeError)) { - return -1; + if (_PyObject_HasLen(o)) { + res = PyObject_Length(o); + if (res < 0 && PyErr_Occurred()) { + if (!PyErr_ExceptionMatches(PyExc_TypeError)) { + return -1; + } + PyErr_Clear(); + } + else { + return res; } - PyErr_Clear(); - } - else { - return res; } hint = _PyObject_LookupSpecial(o, &PyId___length_hint__); if (hint == NULL) { -- cgit v1.2.1 From ac1d52868507eab11a080e7600005366a5e1be26 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 Oct 2013 01:19:37 +0100 Subject: Issue #18408: Add a new PyFrame_FastToLocalsWithError() function to handle exceptions when merging fast locals into f_locals of a frame. PyEval_GetLocals() now raises an exception and return NULL on failure. --- Objects/frameobject.c | 77 +++++++++++++++++++++++++++++++++------------------ Objects/object.c | 7 ++--- 2 files changed, 53 insertions(+), 31 deletions(-) (limited to 'Objects') diff --git a/Objects/frameobject.c b/Objects/frameobject.c index a62a45e1f6..76e77b8a9b 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -21,7 +21,8 @@ static PyMemberDef frame_memberlist[] = { static PyObject * frame_getlocals(PyFrameObject *f, void *closure) { - PyFrame_FastToLocals(f); + if (PyFrame_FastToLocalsWithError(f) < 0) + return NULL; Py_INCREF(f->f_locals); return f->f_locals; } @@ -772,12 +773,9 @@ PyFrame_BlockPop(PyFrameObject *f) If deref is true, then the values being copied are cell variables and the value is extracted from the cell variable before being put in dict. - - Exceptions raised while modifying the dict are silently ignored, - because there is no good way to report them. */ -static void +static int map_to_dict(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values, int deref) { @@ -794,14 +792,19 @@ map_to_dict(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values, value = PyCell_GET(value); } if (value == NULL) { - if (PyObject_DelItem(dict, key) != 0) - PyErr_Clear(); + if (PyObject_DelItem(dict, key) != 0) { + if (PyErr_ExceptionMatches(PyExc_KeyError)) + PyErr_Clear(); + else + return -1; + } } else { if (PyObject_SetItem(dict, key, value) != 0) - PyErr_Clear(); + return -1; } } + return 0; } /* Copy values from the "locals" dict into the fast locals. @@ -858,42 +861,49 @@ dict_to_map(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values, } } -void -PyFrame_FastToLocals(PyFrameObject *f) +int +PyFrame_FastToLocalsWithError(PyFrameObject *f) { /* Merge fast locals into f->f_locals */ PyObject *locals, *map; PyObject **fast; - PyObject *error_type, *error_value, *error_traceback; PyCodeObject *co; Py_ssize_t j; Py_ssize_t ncells, nfreevars; - if (f == NULL) - return; + + if (f == NULL) { + PyErr_BadInternalCall(); + return -1; + } locals = f->f_locals; if (locals == NULL) { locals = f->f_locals = PyDict_New(); - if (locals == NULL) { - PyErr_Clear(); /* Can't report it :-( */ - return; - } + if (locals == NULL) + return -1; } co = f->f_code; map = co->co_varnames; - if (!PyTuple_Check(map)) - return; - PyErr_Fetch(&error_type, &error_value, &error_traceback); + if (!PyTuple_Check(map)) { + PyErr_Format(PyExc_SystemError, + "co_varnames must be a tuple, not %s", + Py_TYPE(map)->tp_name); + return -1; + } fast = f->f_localsplus; j = PyTuple_GET_SIZE(map); if (j > co->co_nlocals) j = co->co_nlocals; - if (co->co_nlocals) - map_to_dict(map, j, locals, fast, 0); + if (co->co_nlocals) { + if (map_to_dict(map, j, locals, fast, 0) < 0) + return -1; + } ncells = PyTuple_GET_SIZE(co->co_cellvars); nfreevars = PyTuple_GET_SIZE(co->co_freevars); if (ncells || nfreevars) { - map_to_dict(co->co_cellvars, ncells, - locals, fast + co->co_nlocals, 1); + if (map_to_dict(co->co_cellvars, ncells, + locals, fast + co->co_nlocals, 1)) + return -1; + /* If the namespace is unoptimized, then one of the following cases applies: 1. It does not contain free variables, because it @@ -903,11 +913,24 @@ PyFrame_FastToLocals(PyFrameObject *f) into the locals dict used by the class. */ if (co->co_flags & CO_OPTIMIZED) { - map_to_dict(co->co_freevars, nfreevars, - locals, fast + co->co_nlocals + ncells, 1); + if (map_to_dict(co->co_freevars, nfreevars, + locals, fast + co->co_nlocals + ncells, 1) < 0) + return -1; } } - PyErr_Restore(error_type, error_value, error_traceback); + return 0; +} + +void +PyFrame_FastToLocals(PyFrameObject *f) +{ + int res; + + assert(!PyErr_Occurred()); + + res = PyFrame_FastToLocalsWithError(f); + if (res < 0) + PyErr_Clear(); } void diff --git a/Objects/object.c b/Objects/object.c index 8018c6a58c..95a5334d8b 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1407,12 +1407,11 @@ static PyObject * _dir_locals(void) { PyObject *names; - PyObject *locals = PyEval_GetLocals(); + PyObject *locals; - if (locals == NULL) { - PyErr_SetString(PyExc_SystemError, "frame does not exist"); + locals = PyEval_GetLocals(); + if (locals == NULL) return NULL; - } names = PyMapping_Keys(locals); if (!names) -- cgit v1.2.1 From ecd6eadafde9968cc081538a7ee60d7202acc5e2 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 Oct 2013 01:28:23 +0100 Subject: Issue #18408: Fix PyUnicode_AsUTF8AndSize(), raise MemoryError exception on memory allocation failure --- Objects/unicodeobject.c | 1 + 1 file changed, 1 insertion(+) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ff806cf1a1..87c6472e6a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3766,6 +3766,7 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize) return NULL; _PyUnicode_UTF8(unicode) = PyObject_MALLOC(PyBytes_GET_SIZE(bytes) + 1); if (_PyUnicode_UTF8(unicode) == NULL) { + PyErr_NoMemory(); Py_DECREF(bytes); return NULL; } -- cgit v1.2.1 From 227726df36d539bdfe467d1d828824fab8538318 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 Oct 2013 03:14:22 +0100 Subject: Issue #18408: Fix error handling in PyBytes_FromObject() _PyBytes_Resize(&new) sets new to NULL on error, don't call Py_DECREF() with NULL. --- Objects/bytesobject.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 613269045b..529c634708 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2660,9 +2660,8 @@ PyBytes_FromObject(PyObject *x) return new; error: - /* Error handling when new != NULL */ Py_XDECREF(it); - Py_DECREF(new); + Py_XDECREF(new); return NULL; } -- cgit v1.2.1 From 187f86ed62b8ac1791c206d84a82f095485c1015 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 Oct 2013 03:15:37 +0100 Subject: Issue #18408: Fix bytearrayiter.partition()/rpartition(), handle PyByteArray_FromStringAndSize() failure (ex: on memory allocation failure) --- Objects/stringlib/partition.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'Objects') diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h index 40cb5129d5..ed32a6f2b3 100644 --- a/Objects/stringlib/partition.h +++ b/Objects/stringlib/partition.h @@ -29,6 +29,11 @@ STRINGLIB(partition)(PyObject* str_obj, PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len)); PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0)); PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0)); + + if (PyErr_Occurred()) { + Py_DECREF(out); + return NULL; + } #else Py_INCREF(str_obj); PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); @@ -79,6 +84,11 @@ STRINGLIB(rpartition)(PyObject* str_obj, PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0)); PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0)); PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len)); + + if (PyErr_Occurred()) { + Py_DECREF(out); + return NULL; + } #else Py_INCREF(STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY); -- cgit v1.2.1 From 53c398f943a0016726ebc8d85a52cebe81d28d73 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 Oct 2013 10:56:34 +0100 Subject: fix indent --- Objects/typeobject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 6826cf9697..99fa899016 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5324,10 +5324,10 @@ slot_tp_str(PyObject *self) func = lookup_method(self, &PyId___str__); if (func == NULL) return NULL; - res = PyEval_CallObject(func, NULL); - Py_DECREF(func); - return res; - } + res = PyEval_CallObject(func, NULL); + Py_DECREF(func); + return res; +} static Py_hash_t slot_tp_hash(PyObject *self) -- cgit v1.2.1 From d9d79257770d34ff3040dd936e4f8f3fca346df7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 Oct 2013 11:34:05 +0100 Subject: Issue #18609: Add a fast-path for "iso8859-1" encoding On AIX, the locale encoding may be "iso8859-1", which was not a known syntax of the legacy ISO 8859-1 encoding. Using a C codec instead of a Python codec is faster but also avoids tricky issues during Python startup or complex code. --- Objects/unicodeobject.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 87c6472e6a..a7ea9c8597 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3021,7 +3021,8 @@ PyUnicode_Decode(const char *s, return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL); else if ((strcmp(lower, "latin-1") == 0) || (strcmp(lower, "latin1") == 0) || - (strcmp(lower, "iso-8859-1") == 0)) + (strcmp(lower, "iso-8859-1") == 0) || + (strcmp(lower, "iso8859-1") == 0)) return PyUnicode_DecodeLatin1(s, size, errors); #ifdef HAVE_MBCS else if (strcmp(lower, "mbcs") == 0) @@ -3392,7 +3393,8 @@ PyUnicode_AsEncodedString(PyObject *unicode, } else if ((strcmp(lower, "latin-1") == 0) || (strcmp(lower, "latin1") == 0) || - (strcmp(lower, "iso-8859-1") == 0)) + (strcmp(lower, "iso-8859-1") == 0) || + (strcmp(lower, "iso8859-1") == 0)) return _PyUnicode_AsLatin1String(unicode, errors); #ifdef HAVE_MBCS else if (strcmp(lower, "mbcs") == 0) -- cgit v1.2.1 From cb823bc9c2dc714636f5890b3bc374af0bc108b7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 Oct 2013 19:29:52 +0100 Subject: Issue #19437: Fix PyObject_CallFunction(), handle Py_VaBuildValue() and PyTuple_New() failure --- Objects/abstract.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Objects') diff --git a/Objects/abstract.c b/Objects/abstract.c index 6c7a6cd226..91df5da655 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -2144,6 +2144,8 @@ PyObject_CallFunction(PyObject *callable, const char *format, ...) } else args = PyTuple_New(0); + if (args == NULL) + return NULL; return call_function_tail(callable, args); } -- cgit v1.2.1 From 71dc7d3856ab046c1c81b74b064ea95c22868447 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 Oct 2013 19:31:43 +0100 Subject: Issue #19437: Fix _PyUnicode_New() (constructor of legacy string), set all attributes before checking for error. The destructor expects all attributes to be set. It is now safe to call Py_DECREF(unicode) in the constructor. --- Objects/unicodeobject.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a7ea9c8597..208e5e3db3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -896,6 +896,19 @@ _PyUnicode_New(Py_ssize_t length) if (unicode == NULL) return NULL; new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); + + _PyUnicode_WSTR_LENGTH(unicode) = length; + _PyUnicode_HASH(unicode) = -1; + _PyUnicode_STATE(unicode).interned = 0; + _PyUnicode_STATE(unicode).kind = 0; + _PyUnicode_STATE(unicode).compact = 0; + _PyUnicode_STATE(unicode).ready = 0; + _PyUnicode_STATE(unicode).ascii = 0; + _PyUnicode_DATA_ANY(unicode) = NULL; + _PyUnicode_LENGTH(unicode) = 0; + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + _PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_MALLOC(new_size); if (!_PyUnicode_WSTR(unicode)) { Py_DECREF(unicode); @@ -912,17 +925,7 @@ _PyUnicode_New(Py_ssize_t length) */ _PyUnicode_WSTR(unicode)[0] = 0; _PyUnicode_WSTR(unicode)[length] = 0; - _PyUnicode_WSTR_LENGTH(unicode) = length; - _PyUnicode_HASH(unicode) = -1; - _PyUnicode_STATE(unicode).interned = 0; - _PyUnicode_STATE(unicode).kind = 0; - _PyUnicode_STATE(unicode).compact = 0; - _PyUnicode_STATE(unicode).ready = 0; - _PyUnicode_STATE(unicode).ascii = 0; - _PyUnicode_DATA_ANY(unicode) = NULL; - _PyUnicode_LENGTH(unicode) = 0; - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; + assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0)); return unicode; } -- cgit v1.2.1 From 957bc90c955cb90b3db9e084086d30e6a49b4a84 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 29 Oct 2013 21:31:25 +0100 Subject: Issue #17936: Fix O(n**2) behaviour when adding or removing many subclasses of a given type. --- Objects/typeobject.c | 122 ++++++++++++++++++++++++++------------------------- 1 file changed, 63 insertions(+), 59 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 99fa899016..309afa4b85 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -101,16 +101,17 @@ PyType_Modified(PyTypeObject *type) needed. */ PyObject *raw, *ref; - Py_ssize_t i, n; + Py_ssize_t i; if (!PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)) return; raw = type->tp_subclasses; if (raw != NULL) { - n = PyList_GET_SIZE(raw); - for (i = 0; i < n; i++) { - ref = PyList_GET_ITEM(raw, i); + assert(PyDict_CheckExact(raw)); + i = 0; + while (PyDict_Next(raw, &i, NULL, &ref)) { + assert(PyWeakref_CheckRef(ref)); ref = PyWeakref_GET_OBJECT(ref); if (ref != Py_None) { PyType_Modified((PyTypeObject *)ref); @@ -435,6 +436,7 @@ static int mro_internal(PyTypeObject *); static int compatible_for_assignment(PyTypeObject *, PyTypeObject *, char *); static int add_subclass(PyTypeObject*, PyTypeObject*); static void remove_subclass(PyTypeObject *, PyTypeObject *); +static void remove_all_subclasses(PyTypeObject *type, PyObject *bases); static void update_all_slots(PyTypeObject *); typedef int (*update_callback)(PyTypeObject *, void *); @@ -448,15 +450,15 @@ mro_subclasses(PyTypeObject *type, PyObject* temp) { PyTypeObject *subclass; PyObject *ref, *subclasses, *old_mro; - Py_ssize_t i, n; + Py_ssize_t i; subclasses = type->tp_subclasses; if (subclasses == NULL) return 0; - assert(PyList_Check(subclasses)); - n = PyList_GET_SIZE(subclasses); - for (i = 0; i < n; i++) { - ref = PyList_GET_ITEM(subclasses, i); + assert(PyDict_CheckExact(subclasses)); + i = 0; + + while (PyDict_Next(subclasses, &i, NULL, &ref)) { assert(PyWeakref_CheckRef(ref)); subclass = (PyTypeObject *)PyWeakref_GET_OBJECT(ref); assert(subclass != NULL); @@ -575,13 +577,7 @@ type_set_bases(PyTypeObject *type, PyObject *value, void *context) /* for now, sod that: just remove from all old_bases, add to all new_bases */ - for (i = PyTuple_GET_SIZE(old_bases) - 1; i >= 0; i--) { - ob = PyTuple_GET_ITEM(old_bases, i); - if (PyType_Check(ob)) { - remove_subclass( - (PyTypeObject*)ob, type); - } - } + remove_all_subclasses(type, old_bases); for (i = PyTuple_GET_SIZE(value) - 1; i >= 0; i--) { ob = PyTuple_GET_ITEM(value, i); @@ -2733,10 +2729,14 @@ static void type_dealloc(PyTypeObject *type) { PyHeapTypeObject *et; + PyObject *tp, *val, *tb; /* Assert this is a heap-allocated type object */ assert(type->tp_flags & Py_TPFLAGS_HEAPTYPE); _PyObject_GC_UNTRACK(type); + PyErr_Fetch(&tp, &val, &tb); + remove_all_subclasses(type, type->tp_bases); + PyErr_Restore(tp, val, tb); PyObject_ClearWeakRefs((PyObject *)type); et = (PyHeapTypeObject *)type; Py_XDECREF(type->tp_base); @@ -2761,7 +2761,7 @@ static PyObject * type_subclasses(PyTypeObject *type, PyObject *args_ignored) { PyObject *list, *raw, *ref; - Py_ssize_t i, n; + Py_ssize_t i; list = PyList_New(0); if (list == NULL) @@ -2769,10 +2769,9 @@ type_subclasses(PyTypeObject *type, PyObject *args_ignored) raw = type->tp_subclasses; if (raw == NULL) return list; - assert(PyList_Check(raw)); - n = PyList_GET_SIZE(raw); - for (i = 0; i < n; i++) { - ref = PyList_GET_ITEM(raw, i); + assert(PyDict_CheckExact(raw)); + i = 0; + while (PyDict_Next(raw, &i, NULL, &ref)) { assert(PyWeakref_CheckRef(ref)); ref = PyWeakref_GET_OBJECT(ref); if (ref != Py_None) { @@ -2961,8 +2960,8 @@ type_clear(PyTypeObject *type) class's dict; the cycle will be broken that way. tp_subclasses: - A list of weak references can't be part of a cycle; and - lists have their own tp_clear. + A dict of weak references can't be part of a cycle; and + dicts have their own tp_clear. slots (in PyHeapTypeObject): A tuple of strings can't be part of a cycle. @@ -4353,51 +4352,57 @@ PyType_Ready(PyTypeObject *type) static int add_subclass(PyTypeObject *base, PyTypeObject *type) { - Py_ssize_t i; - int result; - PyObject *list, *ref, *newobj; + int result = -1; + PyObject *dict, *key, *newobj; - list = base->tp_subclasses; - if (list == NULL) { - base->tp_subclasses = list = PyList_New(0); - if (list == NULL) + dict = base->tp_subclasses; + if (dict == NULL) { + base->tp_subclasses = dict = PyDict_New(); + if (dict == NULL) return -1; } - assert(PyList_Check(list)); - newobj = PyWeakref_NewRef((PyObject *)type, NULL); - if (newobj == NULL) + assert(PyDict_CheckExact(dict)); + key = PyLong_FromVoidPtr((void *) type); + if (key == NULL) return -1; - i = PyList_GET_SIZE(list); - while (--i >= 0) { - ref = PyList_GET_ITEM(list, i); - assert(PyWeakref_CheckRef(ref)); - if (PyWeakref_GET_OBJECT(ref) == Py_None) - return PyList_SetItem(list, i, newobj); + newobj = PyWeakref_NewRef((PyObject *)type, NULL); + if (newobj != NULL) { + result = PyDict_SetItem(dict, key, newobj); + Py_DECREF(newobj); } - result = PyList_Append(list, newobj); - Py_DECREF(newobj); + Py_DECREF(key); return result; } static void remove_subclass(PyTypeObject *base, PyTypeObject *type) { - Py_ssize_t i; - PyObject *list, *ref; + PyObject *dict, *key; - list = base->tp_subclasses; - if (list == NULL) { + dict = base->tp_subclasses; + if (dict == NULL) { return; } - assert(PyList_Check(list)); - i = PyList_GET_SIZE(list); - while (--i >= 0) { - ref = PyList_GET_ITEM(list, i); - assert(PyWeakref_CheckRef(ref)); - if (PyWeakref_GET_OBJECT(ref) == (PyObject*)type) { - /* this can't fail, right? */ - PySequence_DelItem(list, i); - return; + assert(PyDict_CheckExact(dict)); + key = PyLong_FromVoidPtr((void *) type); + if (key == NULL || PyDict_DelItem(dict, key)) { + /* This can happen if the type initialization errored out before + the base subclasses were updated (e.g. a non-str __qualname__ + was passed in the type dict). */ + PyErr_Clear(); + } + Py_XDECREF(key); +} + +static void +remove_all_subclasses(PyTypeObject *type, PyObject *bases) +{ + if (bases) { + Py_ssize_t i; + for (i = 0; i < PyTuple_GET_SIZE(bases); i++) { + PyObject *base = PyTuple_GET_ITEM(bases, i); + if (PyType_Check(base)) + remove_subclass((PyTypeObject*) base, type); } } } @@ -6173,15 +6178,14 @@ recurse_down_subclasses(PyTypeObject *type, PyObject *name, { PyTypeObject *subclass; PyObject *ref, *subclasses, *dict; - Py_ssize_t i, n; + Py_ssize_t i; subclasses = type->tp_subclasses; if (subclasses == NULL) return 0; - assert(PyList_Check(subclasses)); - n = PyList_GET_SIZE(subclasses); - for (i = 0; i < n; i++) { - ref = PyList_GET_ITEM(subclasses, i); + assert(PyDict_CheckExact(subclasses)); + i = 0; + while (PyDict_Next(subclasses, &i, NULL, &ref)) { assert(PyWeakref_CheckRef(ref)); subclass = (PyTypeObject *)PyWeakref_GET_OBJECT(ref); assert(subclass != NULL); -- cgit v1.2.1 From b51933a70a76d37320ae3271334120b04945ab5c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 Oct 2013 23:31:50 +0100 Subject: Issue #19424: Optimize PyUnicode_CompareWithASCIIString() Use fast memcmp() instead of a loop using the slow PyUnicode_READ() macro. strlen() is still necessary to check Unicode string containing null bytes. --- Objects/unicodeobject.c | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 208e5e3db3..e6195fe53b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10573,25 +10573,42 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) { Py_ssize_t i; int kind; - void *data; Py_UCS4 chr; assert(_PyUnicode_CHECK(uni)); if (PyUnicode_READY(uni) == -1) return -1; kind = PyUnicode_KIND(uni); - data = PyUnicode_DATA(uni); - /* Compare Unicode string and source character set string */ - for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++) - if (chr != str[i]) - return (chr < (unsigned char)(str[i])) ? -1 : 1; - /* This check keeps Python strings that end in '\0' from comparing equal - to C strings identical up to that point. */ - if (PyUnicode_GET_LENGTH(uni) != i || chr) - return 1; /* uni is longer */ - if (str[i]) - return -1; /* str is longer */ - return 0; + if (kind == PyUnicode_1BYTE_KIND) { + char *data = PyUnicode_1BYTE_DATA(uni); + Py_ssize_t len1 = PyUnicode_GET_LENGTH(uni); + size_t len, len2 = strlen(str); + int cmp; + + len = Py_MIN(len1, len2); + cmp = memcmp(data, str, len); + if (cmp != 0) + return cmp; + if (len1 > len2) + return 1; /* uni is longer */ + if (len2 > len1) + return -1; /* str is longer */ + return 0; + } + else { + void *data = PyUnicode_DATA(uni); + /* Compare Unicode string and source character set string */ + for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++) + if (chr != str[i]) + return (chr < (unsigned char)(str[i])) ? -1 : 1; + /* This check keeps Python strings that end in '\0' from comparing equal + to C strings identical up to that point. */ + if (PyUnicode_GET_LENGTH(uni) != i || chr) + return 1; /* uni is longer */ + if (str[i]) + return -1; /* str is longer */ + return 0; + } } -- cgit v1.2.1 From 209e15bf433577bc45b9f31a71d862d1121686c5 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 30 Oct 2013 18:27:13 +0100 Subject: Issue #19424: Fix a compiler warning memcmp() just takes raw pointers --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e6195fe53b..abaadcda9f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10580,7 +10580,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) return -1; kind = PyUnicode_KIND(uni); if (kind == PyUnicode_1BYTE_KIND) { - char *data = PyUnicode_1BYTE_DATA(uni); + const void *data = PyUnicode_1BYTE_DATA(uni); Py_ssize_t len1 = PyUnicode_GET_LENGTH(uni); size_t len, len2 = strlen(str); int cmp; -- cgit v1.2.1 From e0b216cf5b1ddd951c08d6a0da19ee0913d1496e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 3 Nov 2013 13:53:12 +0100 Subject: Issue #19424: Fix a compiler warning on comparing signed/unsigned size_t Patch written by Zachary Ware. --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index abaadcda9f..01e5355449 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10581,7 +10581,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) kind = PyUnicode_KIND(uni); if (kind == PyUnicode_1BYTE_KIND) { const void *data = PyUnicode_1BYTE_DATA(uni); - Py_ssize_t len1 = PyUnicode_GET_LENGTH(uni); + size_t len1 = (size_t)PyUnicode_GET_LENGTH(uni); size_t len, len2 = strlen(str); int cmp; -- cgit v1.2.1 From fccc53e9c5cb955b0cc5cbfabd0667c32557e82a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 4 Nov 2013 11:08:10 +0100 Subject: Issue #16286: write a new subfunction bytes_compare_eq() * cleanup bytes_richcompare() * PyUnicode_RichCompare(): replace a test with a XOR --- Objects/bytesobject.c | 84 +++++++++++++++++++++++++++---------------------- Objects/unicodeobject.c | 8 ++--- 2 files changed, 50 insertions(+), 42 deletions(-) (limited to 'Objects') diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 529c634708..9aa3ee286b 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -802,6 +802,23 @@ bytes_item(PyBytesObject *a, Py_ssize_t i) return PyLong_FromLong((unsigned char)a->ob_sval[i]); } +Py_LOCAL(int) +bytes_compare_eq(PyBytesObject *a, PyBytesObject *b) +{ + int cmp; + Py_ssize_t len; + + len = Py_SIZE(a); + if (Py_SIZE(b) != len) + return 0; + + if (a->ob_sval[0] != b->ob_sval[0]) + return 0; + + cmp = memcmp(a->ob_sval, b->ob_sval, len); + return (cmp == 0); +} + static PyObject* bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) { @@ -822,53 +839,46 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) return NULL; } result = Py_NotImplemented; - goto out; } - if (a == b) { + else if (a == b) { switch (op) { case Py_EQ:case Py_LE:case Py_GE: result = Py_True; - goto out; + break; case Py_NE:case Py_LT:case Py_GT: result = Py_False; - goto out; + break; } } - if (op == Py_EQ) { - /* Supporting Py_NE here as well does not save - much time, since Py_NE is rarely used. */ - if (Py_SIZE(a) == Py_SIZE(b) - && (a->ob_sval[0] == b->ob_sval[0] - && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) { - result = Py_True; - } else { - result = Py_False; + else if (op == Py_EQ || op == Py_NE) { + int eq = bytes_compare_eq(a, b); + eq ^= (op == Py_NE); + result = eq ? Py_True : Py_False; + } + else { + len_a = Py_SIZE(a); len_b = Py_SIZE(b); + min_len = (len_a < len_b) ? len_a : len_b; + if (min_len > 0) { + c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval); + if (c==0) + c = memcmp(a->ob_sval, b->ob_sval, min_len); } - goto out; - } - len_a = Py_SIZE(a); len_b = Py_SIZE(b); - min_len = (len_a < len_b) ? len_a : len_b; - if (min_len > 0) { - c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval); - if (c==0) - c = memcmp(a->ob_sval, b->ob_sval, min_len); - } else - c = 0; - if (c == 0) - c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0; - switch (op) { - case Py_LT: c = c < 0; break; - case Py_LE: c = c <= 0; break; - case Py_EQ: assert(0); break; /* unreachable */ - case Py_NE: c = c != 0; break; - case Py_GT: c = c > 0; break; - case Py_GE: c = c >= 0; break; - default: - result = Py_NotImplemented; - goto out; + else + c = 0; + if (c == 0) + c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0; + switch (op) { + case Py_LT: c = c < 0; break; + case Py_LE: c = c <= 0; break; + case Py_GT: c = c > 0; break; + case Py_GE: c = c >= 0; break; + default: + assert(op != Py_EQ && op != Py_NE); + Py_RETURN_NOTIMPLEMENTED; + } + result = c ? Py_True : Py_False; } - result = c ? Py_True : Py_False; - out: + Py_INCREF(result); return result; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 01e5355449..17ae481aec 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10526,7 +10526,7 @@ unicode_compare(PyObject *str1, PyObject *str2) #undef COMPARE } -static int +Py_LOCAL(int) unicode_compare_eq(PyObject *str1, PyObject *str2) { int kind; @@ -10630,10 +10630,8 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) if (op == Py_EQ || op == Py_NE) { result = unicode_compare_eq(left, right); - if (op == Py_EQ) - v = TEST_COND(result); - else - v = TEST_COND(!result); + result ^= (op == Py_NE); + v = TEST_COND(result); } else { result = unicode_compare(left, right); -- cgit v1.2.1 From 944007fbc0dd42ac2e24a9d462e861234d109483 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 4 Nov 2013 11:23:05 +0100 Subject: Issue #16286: optimize PyUnicode_RichCompare() for identical strings (same pointer) for any operator, not only Py_EQ and Py_NE. Code of bytes_richcompare() and PyUnicode_RichCompare() is now closer. --- Objects/bytesobject.c | 23 ++++++++++++++++------- Objects/unicodeobject.c | 24 +++++++++++++++++++----- 2 files changed, 35 insertions(+), 12 deletions(-) (limited to 'Objects') diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 9aa3ee286b..0a9d04d6db 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -842,12 +842,20 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) } else if (a == b) { switch (op) { - case Py_EQ:case Py_LE:case Py_GE: + case Py_EQ: + case Py_LE: + case Py_GE: + /* a string is equal to itself */ result = Py_True; break; - case Py_NE:case Py_LT:case Py_GT: + case Py_NE: + case Py_LT: + case Py_GT: result = Py_False; break; + default: + PyErr_BadArgument(); + return NULL; } } else if (op == Py_EQ || op == Py_NE) { @@ -856,11 +864,12 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) result = eq ? Py_True : Py_False; } else { - len_a = Py_SIZE(a); len_b = Py_SIZE(b); - min_len = (len_a < len_b) ? len_a : len_b; + len_a = Py_SIZE(a); + len_b = Py_SIZE(b); + min_len = Py_MIN(len_a, len_b); if (min_len > 0) { c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval); - if (c==0) + if (c == 0) c = memcmp(a->ob_sval, b->ob_sval, min_len); } else @@ -873,8 +882,8 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) case Py_GT: c = c > 0; break; case Py_GE: c = c >= 0; break; default: - assert(op != Py_EQ && op != Py_NE); - Py_RETURN_NOTIMPLEMENTED; + PyErr_BadArgument(); + return NULL; } result = c ? Py_True : Py_False; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 17ae481aec..f0aff5f16a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10534,10 +10534,6 @@ unicode_compare_eq(PyObject *str1, PyObject *str2) Py_ssize_t len; int cmp; - /* a string is equal to itself */ - if (str1 == str2) - return 1; - len = PyUnicode_GET_LENGTH(str1); if (PyUnicode_GET_LENGTH(str2) != len) return 0; @@ -10628,7 +10624,25 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) PyUnicode_READY(right) == -1) return NULL; - if (op == Py_EQ || op == Py_NE) { + if (left == right) { + switch (op) { + case Py_EQ: + case Py_LE: + case Py_GE: + /* a string is equal to itself */ + v = Py_True; + break; + case Py_NE: + case Py_LT: + case Py_GT: + v = Py_False; + break; + default: + PyErr_BadArgument(); + return NULL; + } + } + else if (op == Py_EQ || op == Py_NE) { result = unicode_compare_eq(left, right); result ^= (op == Py_NE); v = TEST_COND(result); -- cgit v1.2.1 From 6812d64f7bb4b91c6937ca0c9d84db70e18e98ac Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 4 Nov 2013 11:27:14 +0100 Subject: Issue #16286: remove duplicated identity check from unicode_compare() Move the test to PyUnicode_Compare() --- Objects/unicodeobject.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f0aff5f16a..154103dfea 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10428,10 +10428,6 @@ unicode_compare(PyObject *str1, PyObject *str2) void *data1, *data2; Py_ssize_t len1, len2, len; - /* a string is equal to itself */ - if (str1 == str2) - return 0; - kind1 = PyUnicode_KIND(str1); kind2 = PyUnicode_KIND(str2); data1 = PyUnicode_DATA(str1); @@ -10555,6 +10551,11 @@ PyUnicode_Compare(PyObject *left, PyObject *right) if (PyUnicode_READY(left) == -1 || PyUnicode_READY(right) == -1) return -1; + + /* a string is equal to itself */ + if (left == right) + return 0; + return unicode_compare(left, right); } PyErr_Format(PyExc_TypeError, -- cgit v1.2.1 From a8dd861b5e647a493bd8c10295d0bfcc52513411 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 4 Nov 2013 11:28:26 +0100 Subject: Issue #19424: PyUnicode_CompareWithASCIIString() normalizes memcmp() result to -1, 0, 1 --- Objects/unicodeobject.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 154103dfea..574b57a259 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10584,8 +10584,12 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) len = Py_MIN(len1, len2); cmp = memcmp(data, str, len); - if (cmp != 0) - return cmp; + if (cmp != 0) { + if (cmp < 0) + return -1; + else + return 1; + } if (len1 > len2) return 1; /* uni is longer */ if (len2 > len1) -- cgit v1.2.1 From 3f015d12ddbbb67cee527bf30110281390bb219a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 6 Nov 2013 18:57:29 +0100 Subject: Issue #19512: Py_ReprEnter() and Py_ReprLeave() now use an identifier for the "Py_Repr" dictionary key --- Objects/object.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index 95a5334d8b..80786239fe 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1969,7 +1969,7 @@ _PyObject_DebugTypeStats(FILE *out) See dictobject.c and listobject.c for examples of use. */ -#define KEY "Py_Repr" +_Py_IDENTIFIER(Py_Repr); int Py_ReprEnter(PyObject *obj) @@ -1981,12 +1981,12 @@ Py_ReprEnter(PyObject *obj) dict = PyThreadState_GetDict(); if (dict == NULL) return 0; - list = PyDict_GetItemString(dict, KEY); + list = _PyDict_GetItemId(dict, &PyId_Py_Repr); if (list == NULL) { list = PyList_New(0); if (list == NULL) return -1; - if (PyDict_SetItemString(dict, KEY, list) < 0) + if (_PyDict_SetItemId(dict, &PyId_Py_Repr, list) < 0) return -1; Py_DECREF(list); } @@ -2014,7 +2014,7 @@ Py_ReprLeave(PyObject *obj) if (dict == NULL) goto finally; - list = PyDict_GetItemString(dict, KEY); + list = _PyDict_GetItemId(dict, &PyId_Py_Repr); if (list == NULL || !PyList_Check(list)) goto finally; -- cgit v1.2.1 From bb96238c450ebdcdb364139e57e69eaecbf84062 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 6 Nov 2013 18:58:22 +0100 Subject: Issue #19512: Add a new _PyDict_DelItemId() function, similar to PyDict_DelItemString() but using an identifier for the key --- Objects/dictobject.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 2424176b71..a5072c8bd2 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2735,6 +2735,15 @@ PyDict_SetItemString(PyObject *v, const char *key, PyObject *item) return err; } +int +_PyDict_DelItemId(PyObject *v, _Py_Identifier *key) +{ + PyObject *kv = _PyUnicode_FromId(key); /* borrowed */ + if (kv == NULL) + return -1; + return PyDict_DelItem(v, kv); +} + int PyDict_DelItemString(PyObject *v, const char *key) { -- cgit v1.2.1 From a6a12381e9bc2d8652a1cfccb4a053d43072affe Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 6 Nov 2013 18:59:18 +0100 Subject: Issue #19512: type_abstractmethods() and type_set_abstractmethods() now use an identifier for the "__abstractmethods__" string --- Objects/typeobject.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 309afa4b85..02ad0b7e57 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -48,6 +48,7 @@ _Py_IDENTIFIER(__hash__); _Py_IDENTIFIER(__module__); _Py_IDENTIFIER(__name__); _Py_IDENTIFIER(__new__); +_Py_IDENTIFIER(__abstractmethods__); static PyObject * slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds); @@ -383,9 +384,11 @@ type_abstractmethods(PyTypeObject *type, void *context) /* type itself has an __abstractmethods__ descriptor (this). Don't return that. */ if (type != &PyType_Type) - mod = PyDict_GetItemString(type->tp_dict, "__abstractmethods__"); + mod = _PyDict_GetItemId(type->tp_dict, &PyId___abstractmethods__); if (!mod) { - PyErr_SetString(PyExc_AttributeError, "__abstractmethods__"); + PyObject *message = _PyUnicode_FromId(&PyId___abstractmethods__); + if (message) + PyErr_SetObject(PyExc_AttributeError, message); return NULL; } Py_XINCREF(mod); @@ -404,13 +407,15 @@ type_set_abstractmethods(PyTypeObject *type, PyObject *value, void *context) abstract = PyObject_IsTrue(value); if (abstract < 0) return -1; - res = PyDict_SetItemString(type->tp_dict, "__abstractmethods__", value); + res = _PyDict_SetItemId(type->tp_dict, &PyId___abstractmethods__, value); } else { abstract = 0; - res = PyDict_DelItemString(type->tp_dict, "__abstractmethods__"); + res = _PyDict_DelItemId(type->tp_dict, &PyId___abstractmethods__); if (res && PyErr_ExceptionMatches(PyExc_KeyError)) { - PyErr_SetString(PyExc_AttributeError, "__abstractmethods__"); + PyObject *message = _PyUnicode_FromId(&PyId___abstractmethods__); + if (message) + PyErr_SetObject(PyExc_AttributeError, message); return -1; } } -- cgit v1.2.1 From 36fcb30e4fb853c47112a363d46b9e77f3c5e97d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 7 Nov 2013 00:46:04 +0100 Subject: Issue #19512: add _PyUnicode_CompareWithId() function _PyUnicode_CompareWithId() is faster than PyUnicode_CompareWithASCIIString() when both strings are equal and interned. Add also _PyId_builtins identifier for "builtins" common string. --- Objects/typeobject.c | 23 ++++++++++++----------- Objects/unicodeobject.c | 9 +++++++++ 2 files changed, 21 insertions(+), 11 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 02ad0b7e57..299c6117c0 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -345,11 +345,10 @@ type_set_qualname(PyTypeObject *type, PyObject *value, void *context) static PyObject * type_module(PyTypeObject *type, void *context) { - PyObject *mod; char *s; if (type->tp_flags & Py_TPFLAGS_HEAPTYPE) { - mod = _PyDict_GetItemId(type->tp_dict, &PyId___module__); + PyObject *mod = _PyDict_GetItemId(type->tp_dict, &PyId___module__); if (!mod) { PyErr_Format(PyExc_AttributeError, "__module__"); return 0; @@ -358,11 +357,14 @@ type_module(PyTypeObject *type, void *context) return mod; } else { + PyObject *name; s = strrchr(type->tp_name, '.'); if (s != NULL) return PyUnicode_FromStringAndSize( type->tp_name, (Py_ssize_t)(s - type->tp_name)); - return PyUnicode_FromString("builtins"); + name = _PyUnicode_FromId(&_PyId_builtins); + Py_XINCREF(name); + return name; } } @@ -712,7 +714,7 @@ type_repr(PyTypeObject *type) return NULL; } - if (mod != NULL && PyUnicode_CompareWithASCIIString(mod, "builtins")) + if (mod != NULL && _PyUnicode_CompareWithId(mod, &_PyId_builtins)) rtn = PyUnicode_FromFormat("", mod, name); else rtn = PyUnicode_FromFormat("", type->tp_name); @@ -2143,7 +2145,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) if (!valid_identifier(tmp)) goto error; assert(PyUnicode_Check(tmp)); - if (PyUnicode_CompareWithASCIIString(tmp, "__dict__") == 0) { + if (_PyUnicode_CompareWithId(tmp, &PyId___dict__) == 0) { if (!may_add_dict || add_dict) { PyErr_SetString(PyExc_TypeError, "__dict__ slot disallowed: " @@ -2174,7 +2176,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) for (i = j = 0; i < nslots; i++) { tmp = PyTuple_GET_ITEM(slots, i); if ((add_dict && - PyUnicode_CompareWithASCIIString(tmp, "__dict__") == 0) || + _PyUnicode_CompareWithId(tmp, &PyId___dict__) == 0) || (add_weak && PyUnicode_CompareWithASCIIString(tmp, "__weakref__") == 0)) continue; @@ -3183,7 +3185,7 @@ object_repr(PyObject *self) Py_XDECREF(mod); return NULL; } - if (mod != NULL && PyUnicode_CompareWithASCIIString(mod, "builtins")) + if (mod != NULL && _PyUnicode_CompareWithId(mod, &_PyId_builtins)) rtn = PyUnicode_FromFormat("<%U.%U object at %p>", mod, name, self); else rtn = PyUnicode_FromFormat("<%s object at %p>", @@ -6336,8 +6338,8 @@ super_getattro(PyObject *self, PyObject *name) /* We want __class__ to return the class of the super object (i.e. super, or a subclass), not the class of su->obj. */ skip = (PyUnicode_Check(name) && - PyUnicode_GET_LENGTH(name) == 9 && - PyUnicode_CompareWithASCIIString(name, "__class__") == 0); + PyUnicode_GET_LENGTH(name) == 9 && + _PyUnicode_CompareWithId(name, &PyId___class__) == 0); } if (!skip) { @@ -6543,8 +6545,7 @@ super_init(PyObject *self, PyObject *args, PyObject *kwds) for (i = 0; i < n; i++) { PyObject *name = PyTuple_GET_ITEM(co->co_freevars, i); assert(PyUnicode_Check(name)); - if (!PyUnicode_CompareWithASCIIString(name, - "__class__")) { + if (!_PyUnicode_CompareWithId(name, &PyId___class__)) { Py_ssize_t index = co->co_nlocals + PyTuple_GET_SIZE(co->co_cellvars) + i; PyObject *cell = f->f_localsplus[index]; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 574b57a259..4ae73771f2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10565,6 +10565,15 @@ PyUnicode_Compare(PyObject *left, PyObject *right) return -1; } +int +_PyUnicode_CompareWithId(PyObject *left, _Py_Identifier *right) +{ + PyObject *right_str = _PyUnicode_FromId(right); /* borrowed */ + if (right_str == NULL) + return -1; + return PyUnicode_Compare(left, right_str); +} + int PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) { -- cgit v1.2.1 From b727bdb13eb840c0d5ed319c2ca8e4c84c429e7b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 7 Nov 2013 00:43:05 +0100 Subject: Issue #19512: Use the new _PyId_builtins identifier --- Objects/object.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index 80786239fe..9d96e86c58 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1122,8 +1122,12 @@ PyObject_SelfIter(PyObject *obj) PyObject * _PyObject_GetBuiltin(const char *name) { - PyObject *mod, *attr; - mod = PyImport_ImportModule("builtins"); + PyObject *mod_name, *mod, *attr; + + mod_name = _PyUnicode_FromId(&_PyId_builtins); /* borrowed */ + if (mod_name == NULL) + return NULL; + mod = PyImport_Import(mod_name); if (mod == NULL) return NULL; attr = PyObject_GetAttrString(mod, name); -- cgit v1.2.1 From 64d9c136cc42828c21bb728ab158e9ec6b3290aa Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Thu, 7 Nov 2013 19:18:34 +0200 Subject: #17080: improve error message of float/complex when the wrong type is passed. --- Objects/complexobject.c | 10 ++++++---- Objects/floatobject.c | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'Objects') diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 64e7b44577..60a388fa24 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -773,8 +773,9 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) goto error; } else if (PyObject_AsCharBuffer(v, &s, &len)) { - PyErr_SetString(PyExc_TypeError, - "complex() argument must be a string or a number"); + PyErr_Format(PyExc_TypeError, + "complex() argument must be a string or a number, not '%.200s'", + Py_TYPE(v)->tp_name); return NULL; } @@ -953,8 +954,9 @@ complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds) nbi = i->ob_type->tp_as_number; if (nbr == NULL || nbr->nb_float == NULL || ((i != NULL) && (nbi == NULL || nbi->nb_float == NULL))) { - PyErr_SetString(PyExc_TypeError, - "complex() argument must be a string or a number"); + PyErr_Format(PyExc_TypeError, + "complex() argument must be a string or a number, not '%.200s'", + Py_TYPE(r)->tp_name); if (own_r) { Py_DECREF(r); } diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 9771063644..abea975c59 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -144,8 +144,9 @@ PyFloat_FromString(PyObject *v) } } else if (PyObject_AsCharBuffer(v, &s, &len)) { - PyErr_SetString(PyExc_TypeError, - "float() argument must be a string or a number"); + PyErr_Format(PyExc_TypeError, + "float() argument must be a string or a number, not '%.200s'", + Py_TYPE(v)->tp_name); return NULL; } last = s + len; -- cgit v1.2.1 From ae9ee6f438d281574857937ce069f255669fd940 Mon Sep 17 00:00:00 2001 From: "Martin v. L?wis" Date: Thu, 7 Nov 2013 18:46:53 +0100 Subject: Issue #19514: Deduplicate some _Py_IDENTIFIER declarations. Patch by Andrei Dorian Duma. --- Objects/typeobject.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 299c6117c0..8bccb68692 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -39,16 +39,20 @@ struct method_cache_entry { static struct method_cache_entry method_cache[1 << MCACHE_SIZE_EXP]; static unsigned int next_version_tag = 0; +/* alphabetical order */ +_Py_IDENTIFIER(__abstractmethods__); _Py_IDENTIFIER(__class__); +_Py_IDENTIFIER(__delitem__); _Py_IDENTIFIER(__dict__); _Py_IDENTIFIER(__doc__); -_Py_IDENTIFIER(__getitem__); _Py_IDENTIFIER(__getattribute__); +_Py_IDENTIFIER(__getitem__); _Py_IDENTIFIER(__hash__); +_Py_IDENTIFIER(__len__); _Py_IDENTIFIER(__module__); _Py_IDENTIFIER(__name__); _Py_IDENTIFIER(__new__); -_Py_IDENTIFIER(__abstractmethods__); +_Py_IDENTIFIER(__setitem__); static PyObject * slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds); @@ -5068,7 +5072,6 @@ FUNCNAME(PyObject *self, ARG1TYPE arg1, ARG2TYPE arg2) \ static Py_ssize_t slot_sq_length(PyObject *self) { - _Py_IDENTIFIER(__len__); PyObject *res = call_method(self, &PyId___len__, "()"); Py_ssize_t len; @@ -5129,8 +5132,6 @@ static int slot_sq_ass_item(PyObject *self, Py_ssize_t index, PyObject *value) { PyObject *res; - _Py_IDENTIFIER(__delitem__); - _Py_IDENTIFIER(__setitem__); if (value == NULL) res = call_method(self, &PyId___delitem__, "(n)", index); @@ -5180,8 +5181,6 @@ static int slot_mp_ass_subscript(PyObject *self, PyObject *key, PyObject *value) { PyObject *res; - _Py_IDENTIFIER(__delitem__); - _Py_IDENTIFIER(__setitem__); if (value == NULL) res = call_method(self, &PyId___delitem__, "(O)", key); @@ -5232,7 +5231,6 @@ slot_nb_bool(PyObject *self) PyObject *func, *args; int result = -1; int using_len = 0; - _Py_IDENTIFIER(__len__); _Py_IDENTIFIER(__bool__); func = lookup_maybe(self, &PyId___bool__); -- cgit v1.2.1 From 8a00a8df9c8b876ab12a4402f639ac8f424a4420 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 7 Nov 2013 13:33:36 +0100 Subject: Fix _Py_normalize_encoding(): ensure that buffer is big enough to store "utf-8" if the input string is NULL --- Objects/unicodeobject.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4ae73771f2..1375ef3093 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2983,6 +2983,8 @@ _Py_normalize_encoding(const char *encoding, char *l_end; if (encoding == NULL) { + if (lower_len < 6) + return 0; strcpy(lower, "utf-8"); return 1; } -- cgit v1.2.1 From 0256aab9761d4459c9ceb8fdb11e4fe86f2bdb69 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 7 Nov 2013 22:22:39 +0100 Subject: frameobject.c: Use an identifer instead of creating explicitly an interned string for "__builtins__" literal string --- Objects/frameobject.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'Objects') diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 76e77b8a9b..63f03a64e0 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -601,13 +601,13 @@ PyTypeObject PyFrame_Type = { 0, /* tp_dict */ }; -static PyObject *builtin_object; +_Py_IDENTIFIER(__builtins__); int _PyFrame_Init() { - builtin_object = PyUnicode_InternFromString("__builtins__"); - if (builtin_object == NULL) - return 0; + /* Before, PyId___builtins__ was a string created explicitly in + this function. Now there is nothing to initialize anymore, but + the function is kept for backward compatibility. */ return 1; } @@ -628,7 +628,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, } #endif if (back == NULL || back->f_globals != globals) { - builtins = PyDict_GetItem(globals, builtin_object); + builtins = _PyDict_GetItemId(globals, &PyId___builtins__); if (builtins) { if (PyModule_Check(builtins)) { builtins = PyModule_GetDict(builtins); @@ -994,8 +994,6 @@ void PyFrame_Fini(void) { (void)PyFrame_ClearFreeList(); - Py_XDECREF(builtin_object); - builtin_object = NULL; } /* Print summary info about the state of the optimized allocator */ -- cgit v1.2.1 From 359ed22ea49d63923b451351ebf80decaaa7b79e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 7 Nov 2013 23:07:29 +0100 Subject: Issue #19512, #19515: remove shared identifiers, move identifiers where they are used. Move also _Py_IDENTIFIER() defintions to the top in modified files to remove identifiers duplicated in the same file. --- Objects/object.c | 13 +++++++------ Objects/typeobject.c | 7 ++++--- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index 9d96e86c58..acc34af349 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -8,6 +8,12 @@ extern "C" { #endif +_Py_IDENTIFIER(Py_Repr); +_Py_IDENTIFIER(__bytes__); +_Py_IDENTIFIER(__dir__); +_Py_IDENTIFIER(__isabstractmethod__); +_Py_IDENTIFIER(builtins); + #ifdef Py_REF_DEBUG Py_ssize_t _Py_RefTotal; @@ -560,7 +566,6 @@ PyObject * PyObject_Bytes(PyObject *v) { PyObject *result, *func; - _Py_IDENTIFIER(__bytes__); if (v == NULL) return PyBytes_FromString(""); @@ -949,7 +954,6 @@ _PyObject_IsAbstract(PyObject *obj) { int res; PyObject* isabstract; - _Py_IDENTIFIER(__isabstractmethod__); if (obj == NULL) return 0; @@ -1124,7 +1128,7 @@ _PyObject_GetBuiltin(const char *name) { PyObject *mod_name, *mod, *attr; - mod_name = _PyUnicode_FromId(&_PyId_builtins); /* borrowed */ + mod_name = _PyUnicode_FromId(&PyId_builtins); /* borrowed */ if (mod_name == NULL) return NULL; mod = PyImport_Import(mod_name); @@ -1440,7 +1444,6 @@ static PyObject * _dir_object(PyObject *obj) { PyObject *result, *sorted; - _Py_IDENTIFIER(__dir__); PyObject *dirfunc = _PyObject_LookupSpecial(obj, &PyId___dir__); assert(obj); @@ -1973,8 +1976,6 @@ _PyObject_DebugTypeStats(FILE *out) See dictobject.c and listobject.c for examples of use. */ -_Py_IDENTIFIER(Py_Repr); - int Py_ReprEnter(PyObject *obj) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 8bccb68692..0708d678e1 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -53,6 +53,7 @@ _Py_IDENTIFIER(__module__); _Py_IDENTIFIER(__name__); _Py_IDENTIFIER(__new__); _Py_IDENTIFIER(__setitem__); +_Py_IDENTIFIER(builtins); static PyObject * slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds); @@ -366,7 +367,7 @@ type_module(PyTypeObject *type, void *context) if (s != NULL) return PyUnicode_FromStringAndSize( type->tp_name, (Py_ssize_t)(s - type->tp_name)); - name = _PyUnicode_FromId(&_PyId_builtins); + name = _PyUnicode_FromId(&PyId_builtins); Py_XINCREF(name); return name; } @@ -718,7 +719,7 @@ type_repr(PyTypeObject *type) return NULL; } - if (mod != NULL && _PyUnicode_CompareWithId(mod, &_PyId_builtins)) + if (mod != NULL && _PyUnicode_CompareWithId(mod, &PyId_builtins)) rtn = PyUnicode_FromFormat("", mod, name); else rtn = PyUnicode_FromFormat("", type->tp_name); @@ -3189,7 +3190,7 @@ object_repr(PyObject *self) Py_XDECREF(mod); return NULL; } - if (mod != NULL && _PyUnicode_CompareWithId(mod, &_PyId_builtins)) + if (mod != NULL && _PyUnicode_CompareWithId(mod, &PyId_builtins)) rtn = PyUnicode_FromFormat("<%U.%U object at %p>", mod, name, self); else rtn = PyUnicode_FromFormat("<%s object at %p>", -- cgit v1.2.1 From 99263c04f3b1c36b261f8a24a02ef06d703e6909 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 7 Nov 2013 23:12:23 +0100 Subject: _Py_normalize_encoding(): explain how the value 6 was computed --- Objects/unicodeobject.c | 1 + 1 file changed, 1 insertion(+) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1375ef3093..224a80b45e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2983,6 +2983,7 @@ _Py_normalize_encoding(const char *encoding, char *l_end; if (encoding == NULL) { + /* 6 == strlen("utf-8") + 1 */ if (lower_len < 6) return 0; strcpy(lower, "utf-8"); -- cgit v1.2.1 From fb9987791e8c7266ae19cf54761130808e66538b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 12 Nov 2013 21:39:02 +0100 Subject: Issue #19515: Remove identifiers duplicated in the same file. Patch written by Andrei Dorian Duma. --- Objects/typeobject.c | 1 - 1 file changed, 1 deletion(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 0708d678e1..09f77fa53d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5633,7 +5633,6 @@ slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) PyObject *func; PyObject *newargs, *x; Py_ssize_t i, n; - _Py_IDENTIFIER(__new__); func = _PyObject_GetAttrId((PyObject *)type, &PyId___new__); if (func == NULL) -- cgit v1.2.1 From 04bc197701ee728c336b097129d912f2c94839f8 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 13 Nov 2013 14:17:30 +0100 Subject: Don't use deprecated function PyUnicode_GET_SIZE() Replace it with PyUnicode_GET_LENGTH() or PyUnicode_AsUnicodeAndSize() --- Objects/namespaceobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c index 9e950946aa..720ac0d30f 100644 --- a/Objects/namespaceobject.c +++ b/Objects/namespaceobject.c @@ -101,7 +101,7 @@ namespace_repr(PyObject *ns) goto error; while ((key = PyIter_Next(keys_iter)) != NULL) { - if (PyUnicode_Check(key) && PyUnicode_GET_SIZE(key) > 0) { + if (PyUnicode_Check(key) && PyUnicode_GET_LENGTH(key) > 0) { PyObject *value, *item; value = PyDict_GetItem(d, key); -- cgit v1.2.1 From 644dcdc965cc26de0542280e625dbe6cd1a070ed Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Wed, 13 Nov 2013 23:49:21 +1000 Subject: Close #17828: better handling of codec errors - output type errors now redirect users to the type-neutral convenience functions in the codecs module - stateless errors that occur during encoding and decoding will now be automatically wrapped in exceptions that give the name of the codec involved --- Objects/exceptions.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++ Objects/unicodeobject.c | 27 ++++++++---- 2 files changed, 131 insertions(+), 9 deletions(-) (limited to 'Objects') diff --git a/Objects/exceptions.c b/Objects/exceptions.c index de5d746858..53d8b6668a 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2591,3 +2591,116 @@ _PyExc_Fini(void) free_preallocated_memerrors(); Py_CLEAR(errnomap); } + +/* Helper to do the equivalent of "raise X from Y" in C, but always using + * the current exception rather than passing one in. + * + * We currently limit this to *only* exceptions that use the BaseException + * tp_init and tp_new methods, since we can be reasonably sure we can wrap + * those correctly without losing data and without losing backwards + * compatibility. + * + * We also aim to rule out *all* exceptions that might be storing additional + * state, whether by having a size difference relative to BaseException, + * additional arguments passed in during construction or by having a + * non-empty instance dict. + * + * We need to be very careful with what we wrap, since changing types to + * a broader exception type would be backwards incompatible for + * existing codecs, and with different init or new method implementations + * may either not support instantiation with PyErr_Format or lose + * information when instantiated that way. + * + * XXX (ncoghlan): This could be made more comprehensive by exploiting the + * fact that exceptions are expected to support pickling. If more builtin + * exceptions (e.g. AttributeError) start to be converted to rich + * exceptions with additional attributes, that's probably a better approach + * to pursue over adding special cases for particular stateful subclasses. + * + * Returns a borrowed reference to the new exception (if any), NULL if the + * existing exception was left in place. + */ +PyObject * +_PyErr_TrySetFromCause(const char *format, ...) +{ + PyObject* msg_prefix; + PyObject *exc, *val, *tb; + PyTypeObject *caught_type; + PyObject *instance_dict; + PyObject *instance_args; + Py_ssize_t num_args; + PyObject *new_exc, *new_val, *new_tb; + va_list vargs; + +#ifdef HAVE_STDARG_PROTOTYPES + va_start(vargs, format); +#else + va_start(vargs); +#endif + + PyErr_Fetch(&exc, &val, &tb); + caught_type = (PyTypeObject *) exc; + /* Ensure type info indicates no extra state is stored at the C level */ + if (caught_type->tp_init != (initproc) BaseException_init || + caught_type->tp_new != BaseException_new || + caught_type->tp_basicsize != _PyExc_BaseException.tp_basicsize || + caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize + ) { + /* We can't be sure we can wrap this safely, since it may contain + * more state than just the exception type. Accordingly, we just + * leave it alone. + */ + PyErr_Restore(exc, val, tb); + return NULL; + } + + /* Check the args are empty or contain a single string */ + PyErr_NormalizeException(&exc, &val, &tb); + instance_args = ((PyBaseExceptionObject *) val)->args; + num_args = PyTuple_GET_SIZE(instance_args); + if ((num_args > 1) || + (num_args == 1 && + !PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0)) + ) + ) { + /* More than 1 arg, or the one arg we do have isn't a string + */ + PyErr_Restore(exc, val, tb); + return NULL; + } + + /* Ensure the instance dict is also empty */ + instance_dict = *_PyObject_GetDictPtr(val); + if (instance_dict != NULL && PyObject_Length(instance_dict) > 0) { + /* While we could potentially copy a non-empty instance dictionary + * to the replacement exception, for now we take the more + * conservative path of leaving exceptions with attributes set + * alone. + */ + PyErr_Restore(exc, val, tb); + return NULL; + } + + /* For exceptions that we can wrap safely, we chain the original + * exception to a new one of the exact same type with an + * error message that mentions the additional details and the + * original exception. + * + * It would be nice to wrap OSError and various other exception + * types as well, but that's quite a bit trickier due to the extra + * state potentially stored on OSError instances. + */ + msg_prefix = PyUnicode_FromFormatV(format, vargs); + if (msg_prefix == NULL) + return NULL; + + PyErr_Format(exc, "%U (%s: %S)", + msg_prefix, Py_TYPE(val)->tp_name, val); + Py_DECREF(exc); + Py_XDECREF(tb); + PyErr_Fetch(&new_exc, &new_val, &new_tb); + PyErr_NormalizeException(&new_exc, &new_val, &new_tb); + PyException_SetCause(new_val, val); + PyErr_Restore(new_exc, new_val, new_tb); + return new_val; +} diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 224a80b45e..77898165a7 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3054,8 +3054,10 @@ PyUnicode_Decode(const char *s, goto onError; if (!PyUnicode_Check(unicode)) { PyErr_Format(PyExc_TypeError, - "decoder did not return a str object (type=%.400s)", - Py_TYPE(unicode)->tp_name); + "'%.400s' decoder returned '%.400s' instead of 'str'; " + "use codecs.decode() to decode to arbitrary types", + encoding, + Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name); Py_DECREF(unicode); goto onError; } @@ -3113,8 +3115,10 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode, goto onError; if (!PyUnicode_Check(v)) { PyErr_Format(PyExc_TypeError, - "decoder did not return a str object (type=%.400s)", - Py_TYPE(v)->tp_name); + "'%.400s' decoder returned '%.400s' instead of 'str'; " + "use codecs.decode() to decode to arbitrary types", + encoding, + Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name); Py_DECREF(v); goto onError; } @@ -3425,7 +3429,8 @@ PyUnicode_AsEncodedString(PyObject *unicode, PyObject *b; error = PyErr_WarnFormat(PyExc_RuntimeWarning, 1, - "encoder %s returned bytearray instead of bytes", + "encoder %s returned bytearray instead of bytes; " + "use codecs.encode() to encode to arbitrary types", encoding); if (error) { Py_DECREF(v); @@ -3438,8 +3443,10 @@ PyUnicode_AsEncodedString(PyObject *unicode, } PyErr_Format(PyExc_TypeError, - "encoder did not return a bytes object (type=%.400s)", - Py_TYPE(v)->tp_name); + "'%.400s' encoder returned '%.400s' instead of 'bytes'; " + "use codecs.encode() to encode to arbitrary types", + encoding, + Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name); Py_DECREF(v); return NULL; } @@ -3465,8 +3472,10 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode, goto onError; if (!PyUnicode_Check(v)) { PyErr_Format(PyExc_TypeError, - "encoder did not return an str object (type=%.400s)", - Py_TYPE(v)->tp_name); + "'%.400s' encoder returned '%.400s' instead of 'str'; " + "use codecs.encode() to encode to arbitrary types", + encoding, + Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name); Py_DECREF(v); goto onError; } -- cgit v1.2.1 From c30321c1e366a30afe4066a67cd60373f9de98cd Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Thu, 14 Nov 2013 01:39:35 +0100 Subject: Issue #17828: va_start() must be accompanied by va_end() CID 1128793: Missing varargs init or cleanup (VARARGS) --- Objects/exceptions.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'Objects') diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 53d8b6668a..2f0d5b6400 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2632,12 +2632,6 @@ _PyErr_TrySetFromCause(const char *format, ...) PyObject *new_exc, *new_val, *new_tb; va_list vargs; -#ifdef HAVE_STDARG_PROTOTYPES - va_start(vargs, format); -#else - va_start(vargs); -#endif - PyErr_Fetch(&exc, &val, &tb); caught_type = (PyTypeObject *) exc; /* Ensure type info indicates no extra state is stored at the C level */ @@ -2690,7 +2684,14 @@ _PyErr_TrySetFromCause(const char *format, ...) * types as well, but that's quite a bit trickier due to the extra * state potentially stored on OSError instances. */ + +#ifdef HAVE_STDARG_PROTOTYPES + va_start(vargs, format); +#else + va_start(vargs); +#endif msg_prefix = PyUnicode_FromFormatV(format, vargs); + va_end(vargs); if (msg_prefix == NULL) return NULL; -- cgit v1.2.1 From 3c0deb2938edd28ed70ace91aceae1e9346458c3 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Thu, 14 Nov 2013 01:47:14 +0100 Subject: Issue #17828: _PyObject_GetDictPtr() may return NULL instead of a PyObject** CID 1128792: Dereference null return value (NULL_RETURNS) --- Objects/exceptions.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 2f0d5b6400..94f581b7b0 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2626,7 +2626,7 @@ _PyErr_TrySetFromCause(const char *format, ...) PyObject* msg_prefix; PyObject *exc, *val, *tb; PyTypeObject *caught_type; - PyObject *instance_dict; + PyObject **dictptr; PyObject *instance_args; Py_ssize_t num_args; PyObject *new_exc, *new_val, *new_tb; @@ -2664,8 +2664,10 @@ _PyErr_TrySetFromCause(const char *format, ...) } /* Ensure the instance dict is also empty */ - instance_dict = *_PyObject_GetDictPtr(val); - if (instance_dict != NULL && PyObject_Length(instance_dict) > 0) { + dictptr = _PyObject_GetDictPtr(val); + if ((dictptr != NULL) && (*dictptr != NULL) && + (PyObject_Length(*dictptr) > 0) + ) { /* While we could potentially copy a non-empty instance dictionary * to the replacement exception, for now we take the more * conservative path of leaving exceptions with attributes set -- cgit v1.2.1 From c1c90013b825a1cb6b90a2ea3b62f6d9165bd0d9 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Wed, 13 Nov 2013 23:25:01 -0500 Subject: adjust style --- Objects/exceptions.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'Objects') diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 94f581b7b0..53dab6224a 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2633,13 +2633,12 @@ _PyErr_TrySetFromCause(const char *format, ...) va_list vargs; PyErr_Fetch(&exc, &val, &tb); - caught_type = (PyTypeObject *) exc; + caught_type = (PyTypeObject *)exc; /* Ensure type info indicates no extra state is stored at the C level */ - if (caught_type->tp_init != (initproc) BaseException_init || + if (caught_type->tp_init != (initproc)BaseException_init || caught_type->tp_new != BaseException_new || caught_type->tp_basicsize != _PyExc_BaseException.tp_basicsize || - caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize - ) { + caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize) { /* We can't be sure we can wrap this safely, since it may contain * more state than just the exception type. Accordingly, we just * leave it alone. @@ -2650,13 +2649,11 @@ _PyErr_TrySetFromCause(const char *format, ...) /* Check the args are empty or contain a single string */ PyErr_NormalizeException(&exc, &val, &tb); - instance_args = ((PyBaseExceptionObject *) val)->args; + instance_args = ((PyBaseExceptionObject *)val)->args; num_args = PyTuple_GET_SIZE(instance_args); - if ((num_args > 1) || + if (num_args > 1 || (num_args == 1 && - !PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0)) - ) - ) { + !PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0)))) { /* More than 1 arg, or the one arg we do have isn't a string */ PyErr_Restore(exc, val, tb); @@ -2665,9 +2662,8 @@ _PyErr_TrySetFromCause(const char *format, ...) /* Ensure the instance dict is also empty */ dictptr = _PyObject_GetDictPtr(val); - if ((dictptr != NULL) && (*dictptr != NULL) && - (PyObject_Length(*dictptr) > 0) - ) { + if (dictptr != NULL && *dictptr != NULL && + PyObject_Length(*dictptr) > 0) { /* While we could potentially copy a non-empty instance dictionary * to the replacement exception, for now we take the more * conservative path of leaving exceptions with attributes set -- cgit v1.2.1 From 5bed619b5b6809c156f73856af3e1e8e1edd58e1 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Wed, 13 Nov 2013 23:49:49 -0500 Subject: fix refleaks --- Objects/exceptions.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 53dab6224a..8b109703f7 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2683,6 +2683,9 @@ _PyErr_TrySetFromCause(const char *format, ...) * state potentially stored on OSError instances. */ + Py_DECREF(exc); + Py_XDECREF(tb); + #ifdef HAVE_STDARG_PROTOTYPES va_start(vargs, format); #else @@ -2690,13 +2693,14 @@ _PyErr_TrySetFromCause(const char *format, ...) #endif msg_prefix = PyUnicode_FromFormatV(format, vargs); va_end(vargs); - if (msg_prefix == NULL) + if (msg_prefix == NULL) { + Py_DECREF(val); return NULL; + } PyErr_Format(exc, "%U (%s: %S)", msg_prefix, Py_TYPE(val)->tp_name, val); - Py_DECREF(exc); - Py_XDECREF(tb); + Py_DECREF(msg_prefix); PyErr_Fetch(&new_exc, &new_val, &new_tb); PyErr_NormalizeException(&new_exc, &new_val, &new_tb); PyException_SetCause(new_val, val); -- cgit v1.2.1 From 4c31587fa5c07ca821be423305695621d4ecb425 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 14 Nov 2013 22:31:41 +0100 Subject: Issue #19429, #19437: fix error handling in the OSError constructor --- Objects/exceptions.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 8b109703f7..bb61ea5a3c 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -845,7 +845,7 @@ oserror_init(PyOSErrorObject *self, PyObject **p_args, /* Steals the reference to args */ Py_CLEAR(self->args); self->args = args; - args = NULL; + *p_args = args = NULL; return 0; } @@ -885,11 +885,12 @@ OSError_new(PyTypeObject *type, PyObject *args, PyObject *kwds) PyObject *winerror = NULL; #endif + Py_INCREF(args); + if (!oserror_use_init(type)) { if (!_PyArg_NoKeywords(type->tp_name, kwds)) - return NULL; + goto error; - Py_INCREF(args); if (oserror_parse_args(&args, &myerrno, &strerror, &filename #ifdef MS_WINDOWS , &winerror @@ -932,6 +933,7 @@ OSError_new(PyTypeObject *type, PyObject *args, PyObject *kwds) goto error; } + Py_XDECREF(args); return (PyObject *) self; error: -- cgit v1.2.1 From 3b70e13c1794a8ae862bdd7b37163c754a2c7334 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sat, 16 Nov 2013 00:34:13 +1000 Subject: Don't decref exc too soon --- Objects/exceptions.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/exceptions.c b/Objects/exceptions.c index bb61ea5a3c..3476db02df 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2685,7 +2685,6 @@ _PyErr_TrySetFromCause(const char *format, ...) * state potentially stored on OSError instances. */ - Py_DECREF(exc); Py_XDECREF(tb); #ifdef HAVE_STDARG_PROTOTYPES @@ -2696,12 +2695,14 @@ _PyErr_TrySetFromCause(const char *format, ...) msg_prefix = PyUnicode_FromFormatV(format, vargs); va_end(vargs); if (msg_prefix == NULL) { + Py_DECREF(exc); Py_DECREF(val); return NULL; } PyErr_Format(exc, "%U (%s: %S)", msg_prefix, Py_TYPE(val)->tp_name, val); + Py_DECREF(exc); Py_DECREF(msg_prefix); PyErr_Fetch(&new_exc, &new_val, &new_tb); PyErr_NormalizeException(&new_exc, &new_val, &new_tb); -- cgit v1.2.1 From fc91271d4d79f1c398f2bbd56d59cea762554e91 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Sat, 16 Nov 2013 19:10:57 +0200 Subject: #17806: Added keyword-argument support for "tabsize" to str/bytes.expandtabs(). --- Objects/bytearrayobject.c | 2 +- Objects/bytesobject.c | 2 +- Objects/stringlib/transmogrify.h | 8 +++++--- Objects/unicodeobject.c | 14 +++++++++----- 4 files changed, 16 insertions(+), 10 deletions(-) (limited to 'Objects') diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 2358e05d25..400da1c1ac 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2805,7 +2805,7 @@ bytearray_methods[] = { {"count", (PyCFunction)bytearray_count, METH_VARARGS, count__doc__}, {"decode", (PyCFunction)bytearray_decode, METH_VARARGS | METH_KEYWORDS, decode_doc}, {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS, endswith__doc__}, - {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS, + {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS, expandtabs__doc__}, {"extend", (PyCFunction)bytearray_extend, METH_O, extend__doc__}, {"find", (PyCFunction)bytearray_find, METH_VARARGS, find__doc__}, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 0a9d04d6db..efa0192a01 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2389,7 +2389,7 @@ bytes_methods[] = { {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__}, {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__}, - {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS, + {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS, expandtabs__doc__}, {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__}, {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h index 90fa129b32..dd00976eac 100644 --- a/Objects/stringlib/transmogrify.h +++ b/Objects/stringlib/transmogrify.h @@ -5,21 +5,23 @@ shared code in bytes_methods.c to cut down on duplicate code bloat. */ PyDoc_STRVAR(expandtabs__doc__, -"B.expandtabs([tabsize]) -> copy of B\n\ +"B.expandtabs(tabsize=8) -> copy of B\n\ \n\ Return a copy of B where all tab characters are expanded using spaces.\n\ If tabsize is not given, a tab size of 8 characters is assumed."); static PyObject* -stringlib_expandtabs(PyObject *self, PyObject *args) +stringlib_expandtabs(PyObject *self, PyObject *args, PyObject *kwds) { const char *e, *p; char *q; size_t i, j; PyObject *u; + static char *kwlist[] = {"tabsize", 0}; int tabsize = 8; - if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs", + kwlist, &tabsize)) return NULL; /* First pass: determine size of output string */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 77898165a7..925d86c6cf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11010,23 +11010,25 @@ unicode_encode(PyObject *self, PyObject *args, PyObject *kwargs) } PyDoc_STRVAR(expandtabs__doc__, - "S.expandtabs([tabsize]) -> str\n\ + "S.expandtabs(tabsize=8) -> str\n\ \n\ Return a copy of S where all tab characters are expanded using spaces.\n\ If tabsize is not given, a tab size of 8 characters is assumed."); static PyObject* -unicode_expandtabs(PyObject *self, PyObject *args) +unicode_expandtabs(PyObject *self, PyObject *args, PyObject *kwds) { Py_ssize_t i, j, line_pos, src_len, incr; Py_UCS4 ch; PyObject *u; void *src_data, *dest_data; + static char *kwlist[] = {"tabsize", 0}; int tabsize = 8; int kind; int found; - if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs", + kwlist, &tabsize)) return NULL; if (PyUnicode_READY(self) == -1) @@ -13394,7 +13396,8 @@ static PyMethodDef unicode_methods[] = { {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__}, {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__}, {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__}, - {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__}, + {"expandtabs", (PyCFunction) unicode_expandtabs, + METH_VARARGS | METH_KEYWORDS, expandtabs__doc__}, {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__}, {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__}, {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__}, @@ -13406,7 +13409,8 @@ static PyMethodDef unicode_methods[] = { {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__}, {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__}, {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__}, - {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS | METH_KEYWORDS, splitlines__doc__}, + {"splitlines", (PyCFunction) unicode_splitlines, + METH_VARARGS | METH_KEYWORDS, splitlines__doc__}, {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__}, {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__}, {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__}, -- cgit v1.2.1 From 8dc755f298e73a0671e00a5ef39c3d6a9275b60a Mon Sep 17 00:00:00 2001 From: Larry Hastings Date: Mon, 18 Nov 2013 09:32:13 -0800 Subject: Argument Clinic: rename "self" to "module" for module-level functions. --- Objects/dictobject.c | 2 +- Objects/unicodeobject.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index a5072c8bd2..c224052b12 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2161,7 +2161,7 @@ dict_richcompare(PyObject *v, PyObject *w, int op) } /*[clinic] -module dict +class dict @coexist dict.__contains__ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 925d86c6cf..b2f488d1f2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12722,7 +12722,7 @@ unicode_swapcase(PyObject *self) } /*[clinic] -module str +class str @staticmethod str.maketrans as unicode_maketrans -- cgit v1.2.1 From 2253995da522bc69a0c69a410274be4625b4006a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 18 Nov 2013 21:08:39 +0100 Subject: Issue #19581: Change the overallocation factor of _PyUnicodeWriter on Windows On Windows, a factor of 50% gives best performances. --- Objects/unicodeobject.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b2f488d1f2..bddfafd90f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13106,6 +13106,13 @@ int _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, Py_ssize_t length, Py_UCS4 maxchar) { +#ifdef MS_WINDOWS + /* On Windows, overallocate by 50% is the best factor */ +# define OVERALLOCATE_FACTOR 2 +#else + /* On Linux, overallocate by 25% is the best factor */ +# define OVERALLOCATE_FACTOR 4 +#endif Py_ssize_t newlen; PyObject *newbuffer; @@ -13121,9 +13128,10 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, if (writer->buffer == NULL) { assert(!writer->readonly); - if (writer->overallocate && newlen <= (PY_SSIZE_T_MAX - newlen / 4)) { - /* overallocate 25% to limit the number of resize */ - newlen += newlen / 4; + if (writer->overallocate + && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) { + /* overallocate to limit the number of realloc() */ + newlen += newlen / OVERALLOCATE_FACTOR; } if (newlen < writer->min_length) newlen = writer->min_length; @@ -13133,9 +13141,10 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, return -1; } else if (newlen > writer->size) { - if (writer->overallocate && newlen <= (PY_SSIZE_T_MAX - newlen / 4)) { - /* overallocate 25% to limit the number of resize */ - newlen += newlen / 4; + if (writer->overallocate + && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) { + /* overallocate to limit the number of realloc() */ + newlen += newlen / OVERALLOCATE_FACTOR; } if (newlen < writer->min_length) newlen = writer->min_length; @@ -13169,6 +13178,8 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, } _PyUnicodeWriter_Update(writer); return 0; + +#undef OVERALLOCATE_FACTOR } Py_LOCAL_INLINE(int) -- cgit v1.2.1 From 1b7276a9fb0e6a7761e734af6c3a08ba8ce40761 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 18 Nov 2013 21:11:57 +0100 Subject: Issue #19513: repr(list) now uses the PyUnicodeWriter API, it is faster than the PyAccu API --- Objects/listobject.c | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) (limited to 'Objects') diff --git a/Objects/listobject.c b/Objects/listobject.c index c08c1f66ec..5e40667239 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -338,9 +338,9 @@ static PyObject * list_repr(PyListObject *v) { Py_ssize_t i; - PyObject *s = NULL; - _PyAccu acc; + PyObject *s; static PyObject *sep = NULL; + _PyUnicodeWriter writer; if (Py_SIZE(v) == 0) { return PyUnicode_FromString("[]"); @@ -357,38 +357,50 @@ list_repr(PyListObject *v) return i > 0 ? PyUnicode_FromString("[...]") : NULL; } - if (_PyAccu_Init(&acc)) - goto error; + _PyUnicodeWriter_Init(&writer); + writer.overallocate = 1; + if (Py_SIZE(v) > 1) { + /* "[" + "1" + ", 2" * (len - 1) + "]" */ + writer.min_length = 1 + 1 + (2 + 1) * (Py_SIZE(v) - 1) + 1; + } + else { + /* "[1]" */ + writer.min_length = 3; + } - s = PyUnicode_FromString("["); - if (s == NULL || _PyAccu_Accumulate(&acc, s)) + if (_PyUnicodeWriter_WriteChar(&writer, '[') < 0) goto error; - Py_CLEAR(s); /* Do repr() on each element. Note that this may mutate the list, so must refetch the list size on each iteration. */ for (i = 0; i < Py_SIZE(v); ++i) { + if (i > 0) { + if (_PyUnicodeWriter_WriteStr(&writer, sep) < 0) + goto error; + } + if (Py_EnterRecursiveCall(" while getting the repr of a list")) goto error; s = PyObject_Repr(v->ob_item[i]); Py_LeaveRecursiveCall(); - if (i > 0 && _PyAccu_Accumulate(&acc, sep)) + if (s == NULL) goto error; - if (s == NULL || _PyAccu_Accumulate(&acc, s)) + + if (_PyUnicodeWriter_WriteStr(&writer, s) < 0) { + Py_DECREF(s); goto error; - Py_CLEAR(s); + } + Py_DECREF(s); } - s = PyUnicode_FromString("]"); - if (s == NULL || _PyAccu_Accumulate(&acc, s)) + + if (_PyUnicodeWriter_WriteChar(&writer, ']') < 0) goto error; - Py_CLEAR(s); Py_ReprLeave((PyObject *)v); - return _PyAccu_Finish(&acc); + return _PyUnicodeWriter_Finish(&writer); error: - _PyAccu_Destroy(&acc); - Py_XDECREF(s); + _PyUnicodeWriter_Dealloc(&writer); Py_ReprLeave((PyObject *)v); return NULL; } -- cgit v1.2.1 From ea29c2eb51334b29ea2ec82c27d2ac56504e9693 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 18 Nov 2013 22:15:44 +0100 Subject: Issue #19513: Simplify list_repr() --- Objects/listobject.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'Objects') diff --git a/Objects/listobject.c b/Objects/listobject.c index 5e40667239..50538e178a 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -359,14 +359,8 @@ list_repr(PyListObject *v) _PyUnicodeWriter_Init(&writer); writer.overallocate = 1; - if (Py_SIZE(v) > 1) { - /* "[" + "1" + ", 2" * (len - 1) + "]" */ - writer.min_length = 1 + 1 + (2 + 1) * (Py_SIZE(v) - 1) + 1; - } - else { - /* "[1]" */ - writer.min_length = 3; - } + /* "[" + "1" + ", 2" * (len - 1) + "]" */ + writer.min_length = 1 + 1 + (2 + 1) * (Py_SIZE(v) - 1) + 1; if (_PyUnicodeWriter_WriteChar(&writer, '[') < 0) goto error; -- cgit v1.2.1 From c1c39389d8a80767e05f84820e28caf528ba0f81 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 19 Nov 2013 11:32:41 +0200 Subject: Issue #12892: The utf-16* and utf-32* codecs now reject (lone) surrogates. The utf-16* and utf-32* encoders no longer allow surrogate code points (U+D800-U+DFFF) to be encoded. The utf-32* decoders no longer decode byte sequences that correspond to surrogate code points. The surrogatepass error handler now works with the utf-16* and utf-32* codecs. Based on patches by Victor Stinner and Kang-Hao (Kenny) Lu. --- Objects/stringlib/codecs.h | 198 +++++++++++++++++++++++++++++++++--- Objects/unicodeobject.c | 245 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 403 insertions(+), 40 deletions(-) (limited to 'Objects') diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 57319c6572..14fdc6c083 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -596,66 +596,232 @@ IllegalSurrogate: #undef SWAB -Py_LOCAL_INLINE(void) -STRINGLIB(utf16_encode)(unsigned short *out, - const STRINGLIB_CHAR *in, +#if STRINGLIB_MAX_CHAR >= 0x80 +Py_LOCAL_INLINE(Py_ssize_t) +STRINGLIB(utf16_encode_)(const STRINGLIB_CHAR *in, Py_ssize_t len, + unsigned short **outptr, int native_ordering) { + unsigned short *out = *outptr; const STRINGLIB_CHAR *end = in + len; #if STRINGLIB_SIZEOF_CHAR == 1 # define SWAB2(CH) ((CH) << 8) #else # define SWAB2(CH) (((CH) << 8) | ((CH) >> 8)) #endif -#if STRINGLIB_MAX_CHAR < 0x10000 if (native_ordering) { -# if STRINGLIB_SIZEOF_CHAR == 2 - Py_MEMCPY(out, in, 2 * len); -# else - _PyUnicode_CONVERT_BYTES(STRINGLIB_CHAR, unsigned short, in, end, out); +#if STRINGLIB_MAX_CHAR < 0x10000 + const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); + while (in < unrolled_end) { +# if STRINGLIB_MAX_CHAR >= 0xd800 + if (((in[0] ^ 0xd800) & + (in[1] ^ 0xd800) & + (in[2] ^ 0xd800) & + (in[3] ^ 0xd800) & 0xf800) == 0) + break; # endif + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + in += 4; out += 4; + } +#endif + while (in < end) { + Py_UCS4 ch; + ch = *in++; +#if STRINGLIB_MAX_CHAR >= 0xd800 + if (ch < 0xd800) + *out++ = ch; + else if (ch < 0xe000) + /* reject surrogate characters (U+DC800-U+DFFF) */ + goto fail; +# if STRINGLIB_MAX_CHAR >= 0x10000 + else if (ch >= 0x10000) { + out[0] = Py_UNICODE_HIGH_SURROGATE(ch); + out[1] = Py_UNICODE_LOW_SURROGATE(ch); + out += 2; + } +# endif + else +#endif + *out++ = ch; + } } else { +#if STRINGLIB_MAX_CHAR < 0x10000 const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); while (in < unrolled_end) { +# if STRINGLIB_MAX_CHAR >= 0xd800 + if (((in[0] ^ 0xd800) & + (in[1] ^ 0xd800) & + (in[2] ^ 0xd800) & + (in[3] ^ 0xd800) & 0xf800) == 0) + break; +# endif out[0] = SWAB2(in[0]); out[1] = SWAB2(in[1]); out[2] = SWAB2(in[2]); out[3] = SWAB2(in[3]); in += 4; out += 4; } +#endif while (in < end) { - *out++ = SWAB2(*in); - ++in; + Py_UCS4 ch = *in++; +#if STRINGLIB_MAX_CHAR >= 0xd800 + if (ch < 0xd800) + *out++ = SWAB2((Py_UCS2)ch); + else if (ch < 0xe000) + /* reject surrogate characters (U+DC800-U+DFFF) */ + goto fail; +# if STRINGLIB_MAX_CHAR >= 0x10000 + else if (ch >= 0x10000) { + Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch); + Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch); + out[0] = SWAB2(ch1); + out[1] = SWAB2(ch2); + out += 2; + } +# endif + else +#endif + *out++ = SWAB2((Py_UCS2)ch); } } -#else + *outptr = out; + return len; +#if STRINGLIB_MAX_CHAR >= 0xd800 + fail: +#endif + *outptr = out; + return len - (end - in + 1); +} +#endif + +#undef SWAB2 + +#if STRINGLIB_MAX_CHAR >= 0x80 +Py_LOCAL_INLINE(Py_ssize_t) +STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in, + Py_ssize_t len, + unsigned short **outptr, + int native_ordering) +{ + unsigned short *out = *outptr; + const STRINGLIB_CHAR *end = in + len; +#if STRINGLIB_SIZEOF_CHAR == 1 if (native_ordering) { + const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); + while (in < unrolled_end) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + in += 4; out += 4; + } + while (in < end) { + *out++ = *in++; + } + } else { +# define SWAB2(CH) ((CH) << 8) /* high byte is zero */ + const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); + while (in < unrolled_end) { + out[0] = SWAB2(in[0]); + out[1] = SWAB2(in[1]); + out[2] = SWAB2(in[2]); + out[3] = SWAB2(in[3]); + in += 4; out += 4; + } while (in < end) { Py_UCS4 ch = *in++; - if (ch < 0x10000) + *out++ = SWAB2((Py_UCS2)ch); + } +#undef SWAB2 + } + *outptr = out; + return len; +#else + if (native_ordering) { +#if STRINGLIB_MAX_CHAR < 0x10000 + const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); + while (in < unrolled_end) { + /* check if any character is a surrogate character */ + if (((in[0] ^ 0xd800) & + (in[1] ^ 0xd800) & + (in[2] ^ 0xd800) & + (in[3] ^ 0xd800) & 0xf800) == 0) + break; + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + in += 4; out += 4; + } +#endif + while (in < end) { + Py_UCS4 ch; + ch = *in++; + if (ch < 0xd800) *out++ = ch; - else { + else if (ch < 0xe000) + /* reject surrogate characters (U+DC800-U+DFFF) */ + goto fail; +#if STRINGLIB_MAX_CHAR >= 0x10000 + else if (ch >= 0x10000) { out[0] = Py_UNICODE_HIGH_SURROGATE(ch); out[1] = Py_UNICODE_LOW_SURROGATE(ch); out += 2; } +#endif + else + *out++ = ch; } } else { +#define SWAB2(CH) (((CH) << 8) | ((CH) >> 8)) +#if STRINGLIB_MAX_CHAR < 0x10000 + const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); + while (in < unrolled_end) { + /* check if any character is a surrogate character */ + if (((in[0] ^ 0xd800) & + (in[1] ^ 0xd800) & + (in[2] ^ 0xd800) & + (in[3] ^ 0xd800) & 0xf800) == 0) + break; + out[0] = SWAB2(in[0]); + out[1] = SWAB2(in[1]); + out[2] = SWAB2(in[2]); + out[3] = SWAB2(in[3]); + in += 4; out += 4; + } +#endif while (in < end) { Py_UCS4 ch = *in++; - if (ch < 0x10000) + if (ch < 0xd800) *out++ = SWAB2((Py_UCS2)ch); - else { + else if (ch < 0xe000) + /* reject surrogate characters (U+DC800-U+DFFF) */ + goto fail; +#if STRINGLIB_MAX_CHAR >= 0x10000 + else if (ch >= 0x10000) { Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch); Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch); out[0] = SWAB2(ch1); out[1] = SWAB2(ch2); out += 2; } +#endif + else + *out++ = SWAB2((Py_UCS2)ch); } +#undef SWAB2 } + *outptr = out; + return len; + fail: + *outptr = out; + return len - (end - in + 1); #endif -#undef SWAB2 } +#endif + #endif /* STRINGLIB_IS_UNICODE */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index bddfafd90f..7114006d35 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4963,6 +4963,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, _PyUnicodeWriter writer; const unsigned char *q, *e; int le, bo = 0; /* assume native ordering by default */ + const char *encoding; const char *errmsg = ""; PyObject *errorHandler = NULL; PyObject *exc = NULL; @@ -5002,6 +5003,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, #else le = bo <= 0; #endif + encoding = le ? "utf-32-le" : "utf-32-be"; _PyUnicodeWriter_Init(&writer); writer.min_length = (e - q + 3) / 4; @@ -5022,6 +5024,9 @@ PyUnicode_DecodeUTF32Stateful(const char *s, ch = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0]; if (ch > maxch) break; + if (kind != PyUnicode_1BYTE_KIND && + Py_UNICODE_IS_SURROGATE(ch)) + break; PyUnicode_WRITE(kind, data, pos++, ch); q += 4; } while (q <= last); @@ -5031,6 +5036,9 @@ PyUnicode_DecodeUTF32Stateful(const char *s, ch = (q[0] << 24) | (q[1] << 16) | (q[2] << 8) | q[3]; if (ch > maxch) break; + if (kind != PyUnicode_1BYTE_KIND && + Py_UNICODE_IS_SURROGATE(ch)) + break; PyUnicode_WRITE(kind, data, pos++, ch); q += 4; } while (q <= last); @@ -5038,7 +5046,12 @@ PyUnicode_DecodeUTF32Stateful(const char *s, writer.pos = pos; } - if (ch <= maxch) { + if (Py_UNICODE_IS_SURROGATE(ch)) { + errmsg = "codepoint in surrogate code point range(0xd800, 0xe000)"; + startinpos = ((const char *)q) - starts; + endinpos = startinpos + 4; + } + else if (ch <= maxch) { if (q == e || consumed) break; /* remaining bytes at the end? (size should be divisible by 4) */ @@ -5062,7 +5075,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, chooses to skip the input */ if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, - "utf32", errmsg, + encoding, errmsg, &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q, &writer)) goto onError; @@ -5099,6 +5112,10 @@ _PyUnicode_EncodeUTF32(PyObject *str, #else int iorder[] = {3, 2, 1, 0}; #endif + const char *encoding; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + PyObject *rep = NULL; #define STORECHAR(CH) \ do { \ @@ -5130,7 +5147,7 @@ _PyUnicode_EncodeUTF32(PyObject *str, if (byteorder == 0) STORECHAR(0xFEFF); if (len == 0) - goto done; + return v; if (byteorder == -1) { /* force LE */ @@ -5138,6 +5155,7 @@ _PyUnicode_EncodeUTF32(PyObject *str, iorder[1] = 1; iorder[2] = 2; iorder[3] = 3; + encoding = "utf-32-le"; } else if (byteorder == 1) { /* force BE */ @@ -5145,13 +5163,103 @@ _PyUnicode_EncodeUTF32(PyObject *str, iorder[1] = 2; iorder[2] = 1; iorder[3] = 0; + encoding = "utf-32-be"; } + else + encoding = "utf-32"; - for (i = 0; i < len; i++) - STORECHAR(PyUnicode_READ(kind, data, i)); + if (kind == PyUnicode_1BYTE_KIND) { + for (i = 0; i < len; i++) + STORECHAR(PyUnicode_READ(kind, data, i)); + return v; + } - done: + for (i = 0; i < len;) { + Py_ssize_t repsize, moreunits; + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + i++; + assert(ch <= MAX_UNICODE); + if (!Py_UNICODE_IS_SURROGATE(ch)) { + STORECHAR(ch); + continue; + } + + rep = unicode_encode_call_errorhandler( + errors, &errorHandler, + encoding, "surrogates not allowed", + str, &exc, i-1, i, &i); + + if (!rep) + goto error; + + if (PyBytes_Check(rep)) { + repsize = PyBytes_GET_SIZE(rep); + if (repsize & 3) { + raise_encode_exception(&exc, encoding, + str, i - 1, i, + "surrogates not allowed"); + goto error; + } + moreunits = repsize / 4; + } + else { + assert(PyUnicode_Check(rep)); + if (PyUnicode_READY(rep) < 0) + goto error; + moreunits = repsize = PyUnicode_GET_LENGTH(rep); + if (!PyUnicode_IS_ASCII(rep)) { + raise_encode_exception(&exc, encoding, + str, i - 1, i, + "surrogates not allowed"); + goto error; + } + } + + /* four bytes are reserved for each surrogate */ + if (moreunits > 1) { + Py_ssize_t outpos = p - (unsigned char*) PyBytes_AS_STRING(v); + Py_ssize_t morebytes = 4 * (moreunits - 1); + if (PyBytes_GET_SIZE(v) > PY_SSIZE_T_MAX - morebytes) { + /* integer overflow */ + PyErr_NoMemory(); + goto error; + } + if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + morebytes) < 0) + goto error; + p = (unsigned char*) PyBytes_AS_STRING(v) + outpos; + } + + if (PyBytes_Check(rep)) { + Py_MEMCPY(p, PyBytes_AS_STRING(rep), repsize); + p += repsize; + } else /* rep is unicode */ { + const Py_UCS1 *repdata; + assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND); + repdata = PyUnicode_1BYTE_DATA(rep); + while (repsize--) { + Py_UCS4 ch = *repdata++; + STORECHAR(ch); + } + } + + Py_CLEAR(rep); + } + + /* Cut back to size actually needed. This is necessary for, for example, + encoding of a string containing isolated surrogates and the 'ignore' + handler is used. */ + nsize = p - (unsigned char*) PyBytes_AS_STRING(v); + if (nsize != PyBytes_GET_SIZE(v)) + _PyBytes_Resize(&v, nsize); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); return v; + error: + Py_XDECREF(rep); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + Py_XDECREF(v); + return NULL; #undef STORECHAR } @@ -5204,6 +5312,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s, const char *errmsg = ""; PyObject *errorHandler = NULL; PyObject *exc = NULL; + const char *encoding; q = (unsigned char *)s; e = q + size; @@ -5237,8 +5346,10 @@ PyUnicode_DecodeUTF16Stateful(const char *s, #if PY_LITTLE_ENDIAN native_ordering = bo <= 0; + encoding = bo <= 0 ? "utf-16-le" : "utf-16-be"; #else native_ordering = bo >= 0; + encoding = bo >= 0 ? "utf-16-be" : "utf-16-le"; #endif /* Note: size will always be longer than the resulting Unicode @@ -5312,7 +5423,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s, if (unicode_decode_call_errorhandler_writer( errors, &errorHandler, - "utf16", errmsg, + encoding, errmsg, &starts, (const char **)&e, &startinpos, @@ -5348,13 +5459,17 @@ _PyUnicode_EncodeUTF16(PyObject *str, Py_ssize_t len; PyObject *v; unsigned short *out; - Py_ssize_t bytesize; Py_ssize_t pairs; #if PY_BIG_ENDIAN int native_ordering = byteorder >= 0; #else int native_ordering = byteorder <= 0; #endif + const char *encoding; + Py_ssize_t nsize, pos; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + PyObject *rep = NULL; if (!PyUnicode_Check(str)) { PyErr_BadArgument(); @@ -5376,8 +5491,8 @@ _PyUnicode_EncodeUTF16(PyObject *str, } if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0)) return PyErr_NoMemory(); - bytesize = (len + pairs + (byteorder == 0)) * 2; - v = PyBytes_FromStringAndSize(NULL, bytesize); + nsize = len + pairs + (byteorder == 0); + v = PyBytes_FromStringAndSize(NULL, nsize * 2); if (v == NULL) return NULL; @@ -5389,25 +5504,107 @@ _PyUnicode_EncodeUTF16(PyObject *str, if (len == 0) goto done; - switch (kind) { - case PyUnicode_1BYTE_KIND: { - ucs1lib_utf16_encode(out, (const Py_UCS1 *)data, len, native_ordering); - break; - } - case PyUnicode_2BYTE_KIND: { - ucs2lib_utf16_encode(out, (const Py_UCS2 *)data, len, native_ordering); - break; - } - case PyUnicode_4BYTE_KIND: { - ucs4lib_utf16_encode(out, (const Py_UCS4 *)data, len, native_ordering); - break; + if (kind == PyUnicode_1BYTE_KIND) { + ucs1lib_utf16_encode((const Py_UCS1 *)data, len, &out, native_ordering); + goto done; } - default: - assert(0); + + if (byteorder < 0) + encoding = "utf-16-le"; + else if (byteorder > 0) + encoding = "utf-16-be"; + else + encoding = "utf-16"; + + pos = 0; + while (pos < len) { + Py_ssize_t repsize, moreunits; + + if (kind == PyUnicode_2BYTE_KIND) { + pos += ucs2lib_utf16_encode((const Py_UCS2 *)data + pos, len - pos, + &out, native_ordering); + } + else { + assert(kind == PyUnicode_4BYTE_KIND); + pos += ucs4lib_utf16_encode((const Py_UCS4 *)data + pos, len - pos, + &out, native_ordering); + } + if (pos == len) + break; + + rep = unicode_encode_call_errorhandler( + errors, &errorHandler, + encoding, "surrogates not allowed", + str, &exc, pos, pos + 1, &pos); + if (!rep) + goto error; + + if (PyBytes_Check(rep)) { + repsize = PyBytes_GET_SIZE(rep); + if (repsize & 1) { + raise_encode_exception(&exc, encoding, + str, pos - 1, pos, + "surrogates not allowed"); + goto error; + } + moreunits = repsize / 2; + } + else { + assert(PyUnicode_Check(rep)); + if (PyUnicode_READY(rep) < 0) + goto error; + moreunits = repsize = PyUnicode_GET_LENGTH(rep); + if (!PyUnicode_IS_ASCII(rep)) { + raise_encode_exception(&exc, encoding, + str, pos - 1, pos, + "surrogates not allowed"); + goto error; + } + } + + /* two bytes are reserved for each surrogate */ + if (moreunits > 1) { + Py_ssize_t outpos = out - (unsigned short*) PyBytes_AS_STRING(v); + Py_ssize_t morebytes = 2 * (moreunits - 1); + if (PyBytes_GET_SIZE(v) > PY_SSIZE_T_MAX - morebytes) { + /* integer overflow */ + PyErr_NoMemory(); + goto error; + } + if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + morebytes) < 0) + goto error; + out = (unsigned short*) PyBytes_AS_STRING(v) + outpos; + } + + if (PyBytes_Check(rep)) { + Py_MEMCPY(out, PyBytes_AS_STRING(rep), repsize); + out += moreunits; + } else /* rep is unicode */ { + assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND); + ucs1lib_utf16_encode(PyUnicode_1BYTE_DATA(rep), repsize, + &out, native_ordering); + } + + Py_CLEAR(rep); } + /* Cut back to size actually needed. This is necessary for, for example, + encoding of a string containing isolated surrogates and the 'ignore' handler + is used. */ + nsize = (unsigned char*) out - (unsigned char*) PyBytes_AS_STRING(v); + if (nsize != PyBytes_GET_SIZE(v)) + _PyBytes_Resize(&v, nsize); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); done: return v; + error: + Py_XDECREF(rep); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + Py_XDECREF(v); + return NULL; +#undef STORECHAR } PyObject * -- cgit v1.2.1 From 3feeb3fb7fdf72151d01282c6ae176f1fe51b297 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 19 Nov 2013 12:09:00 +0100 Subject: Issue #19513: Disable overallocation of the PyUnicodeWriter before the last write --- Objects/listobject.c | 1 + 1 file changed, 1 insertion(+) (limited to 'Objects') diff --git a/Objects/listobject.c b/Objects/listobject.c index 50538e178a..45666fddfa 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -387,6 +387,7 @@ list_repr(PyListObject *v) Py_DECREF(s); } + writer.overallocate = 0; if (_PyUnicodeWriter_WriteChar(&writer, ']') < 0) goto error; -- cgit v1.2.1 From 4d014c8f2ccc06eb04f9963df910d9d2357fc096 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 19 Nov 2013 12:54:53 +0100 Subject: Add _PyUnicodeWriter_WriteASCIIString() function --- Objects/listobject.c | 9 +---- Objects/unicodeobject.c | 90 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 73 insertions(+), 26 deletions(-) (limited to 'Objects') diff --git a/Objects/listobject.c b/Objects/listobject.c index 45666fddfa..7d5674c9f4 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -339,19 +339,12 @@ list_repr(PyListObject *v) { Py_ssize_t i; PyObject *s; - static PyObject *sep = NULL; _PyUnicodeWriter writer; if (Py_SIZE(v) == 0) { return PyUnicode_FromString("[]"); } - if (sep == NULL) { - sep = PyUnicode_FromString(", "); - if (sep == NULL) - return NULL; - } - i = Py_ReprEnter((PyObject*)v); if (i != 0) { return i > 0 ? PyUnicode_FromString("[...]") : NULL; @@ -369,7 +362,7 @@ list_repr(PyListObject *v) so must refetch the list size on each iteration. */ for (i = 0; i < Py_SIZE(v); ++i) { if (i > 0) { - if (_PyUnicodeWriter_WriteStr(&writer, sep) < 0) + if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) goto error; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7114006d35..880889e8a2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -140,9 +140,9 @@ extern "C" { buffer where the result characters are written to. */ #define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \ do { \ - to_type *_to = (to_type *) to; \ - const from_type *_iter = (begin); \ - const from_type *_end = (end); \ + to_type *_to = (to_type *)(to); \ + const from_type *_iter = (from_type *)(begin); \ + const from_type *_end = (from_type *)(end); \ Py_ssize_t n = (_end) - (_iter); \ const from_type *_unrolled_end = \ _iter + _Py_SIZE_ROUND_DOWN(n, 4); \ @@ -2562,7 +2562,6 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, precision = len; arglen = Py_MAX(precision, width); - assert(ucs1lib_find_max_char((Py_UCS1*)buffer, (Py_UCS1*)buffer + len) <= 127); if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1) return NULL; @@ -2581,8 +2580,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, writer->pos += fill; } - unicode_write_cstr(writer->buffer, writer->pos, buffer, len); - writer->pos += len; + if (_PyUnicodeWriter_WriteASCIIString(writer, buffer, len) < 0) + return NULL; break; } @@ -2604,11 +2603,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, len += 2; } - assert(ucs1lib_find_max_char((Py_UCS1*)number, (Py_UCS1*)number + len) <= 127); - if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) + if (_PyUnicodeWriter_WriteASCIIString(writer, number, len) < 0) return NULL; - unicode_write_cstr(writer->buffer, writer->pos, number, len); - writer->pos += len; break; } @@ -2707,7 +2703,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, skip the code, since there's no way to know what's in the argument list) */ len = strlen(p); - if (_PyUnicodeWriter_WriteCstr(writer, p, len) == -1) + if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1) return NULL; f = p+len; return f; @@ -2759,10 +2755,9 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) if (*p == '\0') writer.overallocate = 0; - if (_PyUnicodeWriter_Prepare(&writer, len, 127) == -1) + + if (_PyUnicodeWriter_WriteASCIIString(&writer, f, len) < 0) goto fail; - unicode_write_cstr(writer.buffer, writer.pos, f, len); - writer.pos += len; f = p; } @@ -13461,7 +13456,68 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str, } int -_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len) +_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, + const char *ascii, Py_ssize_t len) +{ + if (len == -1) + len = strlen(ascii); + + assert(ucs1lib_find_max_char((Py_UCS1*)ascii, (Py_UCS1*)ascii + len) < 128); + + if (writer->buffer == NULL && !writer->overallocate) { + PyObject *str; + + str = _PyUnicode_FromASCII(ascii, len); + if (str == NULL) + return -1; + + writer->readonly = 1; + writer->buffer = str; + _PyUnicodeWriter_Update(writer); + writer->pos += len; + return 0; + } + + if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) + return -1; + + switch (writer->kind) + { + case PyUnicode_1BYTE_KIND: + { + const Py_UCS1 *str = (const Py_UCS1 *)ascii; + Py_UCS1 *data = writer->data; + + Py_MEMCPY(data + writer->pos, str, len); + break; + } + case PyUnicode_2BYTE_KIND: + { + _PyUnicode_CONVERT_BYTES( + Py_UCS1, Py_UCS2, + ascii, ascii + len, + (Py_UCS2 *)writer->data + writer->pos); + break; + } + case PyUnicode_4BYTE_KIND: + { + _PyUnicode_CONVERT_BYTES( + Py_UCS1, Py_UCS4, + ascii, ascii + len, + (Py_UCS4 *)writer->data + writer->pos); + break; + } + default: + assert(0); + } + + writer->pos += len; + return 0; +} + +int +_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, + const char *str, Py_ssize_t len) { Py_UCS4 maxchar; @@ -13828,12 +13884,10 @@ formatfloat(PyObject *v, struct unicode_format_arg_t *arg, return -1; len = strlen(p); if (writer) { - if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) { + if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) { PyMem_Free(p); return -1; } - unicode_write_cstr(writer->buffer, writer->pos, p, len); - writer->pos += len; } else *p_output = _PyUnicode_FromASCII(p, len); -- cgit v1.2.1 From b130007f95de02932dc257740d2de278cd864e8a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 19 Nov 2013 12:59:46 +0100 Subject: Issue #19513: repr(tuple) now uses _PyUnicodeWriter for better performances --- Objects/tupleobject.c | 64 +++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 27 deletions(-) (limited to 'Objects') diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index a33d8c06ee..5625a6547c 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -255,20 +255,12 @@ static PyObject * tuplerepr(PyTupleObject *v) { Py_ssize_t i, n; - PyObject *s = NULL; - _PyAccu acc; - static PyObject *sep = NULL; + _PyUnicodeWriter writer; n = Py_SIZE(v); if (n == 0) return PyUnicode_FromString("()"); - if (sep == NULL) { - sep = PyUnicode_FromString(", "); - if (sep == NULL) - return NULL; - } - /* While not mutable, it is still possible to end up with a cycle in a tuple through an object that stores itself within a tuple (and thus infinitely asks for the repr of itself). This should only be @@ -278,40 +270,58 @@ tuplerepr(PyTupleObject *v) return i > 0 ? PyUnicode_FromString("(...)") : NULL; } - if (_PyAccu_Init(&acc)) - goto error; + _PyUnicodeWriter_Init(&writer); + writer.overallocate = 1; + if (Py_SIZE(v) > 1) { + /* "(" + "1" + ", 2" * (len - 1) + ")" */ + writer.min_length = 1 + 1 + (2 + 1) * (Py_SIZE(v) - 1) + 1; + } + else { + /* "(1,)" */ + writer.min_length = 4; + } - s = PyUnicode_FromString("("); - if (s == NULL || _PyAccu_Accumulate(&acc, s)) + if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) goto error; - Py_CLEAR(s); /* Do repr() on each element. */ for (i = 0; i < n; ++i) { + PyObject *s; + + if (i > 0) { + if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) + goto error; + } + if (Py_EnterRecursiveCall(" while getting the repr of a tuple")) goto error; s = PyObject_Repr(v->ob_item[i]); Py_LeaveRecursiveCall(); - if (i > 0 && _PyAccu_Accumulate(&acc, sep)) + if (s == NULL) goto error; - if (s == NULL || _PyAccu_Accumulate(&acc, s)) + + if (_PyUnicodeWriter_WriteStr(&writer, s) < 0) { + Py_DECREF(s); + goto error; + } + Py_DECREF(s); + } + + writer.overallocate = 0; + if (n > 1) { + if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) + goto error; + } + else { + if (_PyUnicodeWriter_WriteASCIIString(&writer, ",)", 2) < 0) goto error; - Py_CLEAR(s); } - if (n > 1) - s = PyUnicode_FromString(")"); - else - s = PyUnicode_FromString(",)"); - if (s == NULL || _PyAccu_Accumulate(&acc, s)) - goto error; - Py_CLEAR(s); Py_ReprLeave((PyObject *)v); - return _PyAccu_Finish(&acc); + return _PyUnicodeWriter_Finish(&writer); error: - _PyAccu_Destroy(&acc); - Py_XDECREF(s); + _PyUnicodeWriter_Dealloc(&writer); Py_ReprLeave((PyObject *)v); return NULL; } -- cgit v1.2.1 From f55afc5b8d34fe56f504c701d128739563e285bc Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 19 Nov 2013 13:07:38 +0100 Subject: Issue #19646: repr(dict) now uses _PyUnicodeWriter API for better performances --- Objects/dictobject.c | 109 +++++++++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 55 deletions(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c224052b12..3c1b3bba8c 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1397,9 +1397,9 @@ static PyObject * dict_repr(PyDictObject *mp) { Py_ssize_t i; - PyObject *s, *temp, *colon = NULL; - PyObject *pieces = NULL, *result = NULL; - PyObject *key, *value; + PyObject *key = NULL, *value = NULL; + _PyUnicodeWriter writer; + int first; i = Py_ReprEnter((PyObject *)mp); if (i != 0) { @@ -1407,74 +1407,73 @@ dict_repr(PyDictObject *mp) } if (mp->ma_used == 0) { - result = PyUnicode_FromString("{}"); - goto Done; + Py_ReprLeave((PyObject *)mp); + return PyUnicode_FromString("{}"); } - pieces = PyList_New(0); - if (pieces == NULL) - goto Done; + _PyUnicodeWriter_Init(&writer); + writer.overallocate = 1; + /* "{" + "1: 2" + ", 3: 4" * (len - 1) + "}" */ + writer.min_length = 1 + 4 + (2 + 4) * (mp->ma_used - 1) + 1; - colon = PyUnicode_FromString(": "); - if (colon == NULL) - goto Done; + if (_PyUnicodeWriter_WriteChar(&writer, '{') < 0) + goto error; /* Do repr() on each key+value pair, and insert ": " between them. Note that repr may mutate the dict. */ i = 0; + first = 1; while (PyDict_Next((PyObject *)mp, &i, &key, &value)) { - int status; + PyObject *s; + int res; + /* Prevent repr from deleting key or value during key format. */ Py_INCREF(key); Py_INCREF(value); + + if (!first) { + if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) + goto error; + } + first = 0; + s = PyObject_Repr(key); - PyUnicode_Append(&s, colon); if (s == NULL) - goto Done; + goto error; + res = _PyUnicodeWriter_WriteStr(&writer, s); + Py_DECREF(s); + if (res < 0) + goto error; - PyUnicode_AppendAndDel(&s, PyObject_Repr(value)); - Py_DECREF(key); - Py_DECREF(value); + if (_PyUnicodeWriter_WriteASCIIString(&writer, ": ", 2) < 0) + goto error; + + s = PyObject_Repr(value); if (s == NULL) - goto Done; - status = PyList_Append(pieces, s); - Py_DECREF(s); /* append created a new ref */ - if (status < 0) - goto Done; - } - - /* Add "{}" decorations to the first and last items. */ - assert(PyList_GET_SIZE(pieces) > 0); - s = PyUnicode_FromString("{"); - if (s == NULL) - goto Done; - temp = PyList_GET_ITEM(pieces, 0); - PyUnicode_AppendAndDel(&s, temp); - PyList_SET_ITEM(pieces, 0, s); - if (s == NULL) - goto Done; - - s = PyUnicode_FromString("}"); - if (s == NULL) - goto Done; - temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1); - PyUnicode_AppendAndDel(&temp, s); - PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp); - if (temp == NULL) - goto Done; - - /* Paste them all together with ", " between. */ - s = PyUnicode_FromString(", "); - if (s == NULL) - goto Done; - result = PyUnicode_Join(s, pieces); - Py_DECREF(s); - -Done: - Py_XDECREF(pieces); - Py_XDECREF(colon); + goto error; + res = _PyUnicodeWriter_WriteStr(&writer, s); + Py_DECREF(s); + if (res < 0) + goto error; + + Py_CLEAR(key); + Py_CLEAR(value); + } + + writer.overallocate = 0; + if (_PyUnicodeWriter_WriteChar(&writer, '}') < 0) + goto error; + Py_ReprLeave((PyObject *)mp); - return result; + + return _PyUnicodeWriter_Finish(&writer); + +error: + Py_ReprLeave((PyObject *)mp); + _PyUnicodeWriter_Dealloc(&writer); + Py_XDECREF(key); + Py_XDECREF(value); + return NULL; } static Py_ssize_t -- cgit v1.2.1 From e675bb67e3b11a47610e090b75389cc51907b973 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Tue, 19 Nov 2013 22:33:10 +1000 Subject: Also chain codec exceptions that allow weakrefs The zlib and hex codecs throw custom exception types with weakref support if the input type is valid, but the data fails validation. Make sure the exception chaining in the codec infrastructure can wrap those as well. --- Objects/exceptions.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'Objects') diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 3476db02df..af40bc8fea 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2630,16 +2630,27 @@ _PyErr_TrySetFromCause(const char *format, ...) PyTypeObject *caught_type; PyObject **dictptr; PyObject *instance_args; - Py_ssize_t num_args; + Py_ssize_t num_args, caught_type_size, base_exc_size; PyObject *new_exc, *new_val, *new_tb; va_list vargs; + int same_basic_size; PyErr_Fetch(&exc, &val, &tb); caught_type = (PyTypeObject *)exc; - /* Ensure type info indicates no extra state is stored at the C level */ + /* Ensure type info indicates no extra state is stored at the C level + * and that the type can be reinstantiated using PyErr_Format + */ + caught_type_size = caught_type->tp_basicsize; + base_exc_size = _PyExc_BaseException.tp_basicsize; + same_basic_size = ( + caught_type_size == base_exc_size || + (PyType_SUPPORTS_WEAKREFS(caught_type) && + (caught_type_size == base_exc_size + sizeof(PyObject *)) + ) + ); if (caught_type->tp_init != (initproc)BaseException_init || caught_type->tp_new != BaseException_new || - caught_type->tp_basicsize != _PyExc_BaseException.tp_basicsize || + !same_basic_size || caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize) { /* We can't be sure we can wrap this safely, since it may contain * more state than just the exception type. Accordingly, we just -- cgit v1.2.1 From b818839114b1b68e3d3c05be0608727a2b4ca9d8 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 19 Nov 2013 15:56:05 +0200 Subject: Remove dead code committed in issue #12892. --- Objects/stringlib/codecs.h | 104 --------------------------------------------- 1 file changed, 104 deletions(-) (limited to 'Objects') diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 14fdc6c083..2eb2d1412f 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -596,110 +596,6 @@ IllegalSurrogate: #undef SWAB -#if STRINGLIB_MAX_CHAR >= 0x80 -Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(utf16_encode_)(const STRINGLIB_CHAR *in, - Py_ssize_t len, - unsigned short **outptr, - int native_ordering) -{ - unsigned short *out = *outptr; - const STRINGLIB_CHAR *end = in + len; -#if STRINGLIB_SIZEOF_CHAR == 1 -# define SWAB2(CH) ((CH) << 8) -#else -# define SWAB2(CH) (((CH) << 8) | ((CH) >> 8)) -#endif - if (native_ordering) { -#if STRINGLIB_MAX_CHAR < 0x10000 - const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); - while (in < unrolled_end) { -# if STRINGLIB_MAX_CHAR >= 0xd800 - if (((in[0] ^ 0xd800) & - (in[1] ^ 0xd800) & - (in[2] ^ 0xd800) & - (in[3] ^ 0xd800) & 0xf800) == 0) - break; -# endif - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; - in += 4; out += 4; - } -#endif - while (in < end) { - Py_UCS4 ch; - ch = *in++; -#if STRINGLIB_MAX_CHAR >= 0xd800 - if (ch < 0xd800) - *out++ = ch; - else if (ch < 0xe000) - /* reject surrogate characters (U+DC800-U+DFFF) */ - goto fail; -# if STRINGLIB_MAX_CHAR >= 0x10000 - else if (ch >= 0x10000) { - out[0] = Py_UNICODE_HIGH_SURROGATE(ch); - out[1] = Py_UNICODE_LOW_SURROGATE(ch); - out += 2; - } -# endif - else -#endif - *out++ = ch; - } - } else { -#if STRINGLIB_MAX_CHAR < 0x10000 - const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); - while (in < unrolled_end) { -# if STRINGLIB_MAX_CHAR >= 0xd800 - if (((in[0] ^ 0xd800) & - (in[1] ^ 0xd800) & - (in[2] ^ 0xd800) & - (in[3] ^ 0xd800) & 0xf800) == 0) - break; -# endif - out[0] = SWAB2(in[0]); - out[1] = SWAB2(in[1]); - out[2] = SWAB2(in[2]); - out[3] = SWAB2(in[3]); - in += 4; out += 4; - } -#endif - while (in < end) { - Py_UCS4 ch = *in++; -#if STRINGLIB_MAX_CHAR >= 0xd800 - if (ch < 0xd800) - *out++ = SWAB2((Py_UCS2)ch); - else if (ch < 0xe000) - /* reject surrogate characters (U+DC800-U+DFFF) */ - goto fail; -# if STRINGLIB_MAX_CHAR >= 0x10000 - else if (ch >= 0x10000) { - Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch); - Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch); - out[0] = SWAB2(ch1); - out[1] = SWAB2(ch2); - out += 2; - } -# endif - else -#endif - *out++ = SWAB2((Py_UCS2)ch); - } - } - *outptr = out; - return len; -#if STRINGLIB_MAX_CHAR >= 0xd800 - fail: -#endif - *outptr = out; - return len - (end - in + 1); -} -#endif - -#undef SWAB2 - #if STRINGLIB_MAX_CHAR >= 0x80 Py_LOCAL_INLINE(Py_ssize_t) STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in, -- cgit v1.2.1 From 1f4288b872047826fa7bc66ab5fe785e61a095b1 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 20 Nov 2013 11:46:18 +0100 Subject: ssue #19183: Implement PEP 456 'secure and interchangeable hash algorithm'. Python now uses SipHash24 on all major platforms. --- Objects/bytesobject.c | 2 +- Objects/memoryobject.c | 2 +- Objects/object.c | 146 ------------------------------------------------ Objects/unicodeobject.c | 35 +----------- 4 files changed, 4 insertions(+), 181 deletions(-) (limited to 'Objects') diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index efa0192a01..8217b1eab3 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -897,7 +897,7 @@ bytes_hash(PyBytesObject *a) { if (a->ob_shash == -1) { /* Can't fail */ - a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a)); + a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a)); } return a->ob_shash; } diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index 1d52d9dbd5..cb644b822b 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -2742,7 +2742,7 @@ memory_hash(PyMemoryViewObject *self) } /* Can't fail */ - self->hash = _Py_HashBytes((unsigned char *)mem, view->len); + self->hash = _Py_HashBytes(mem, view->len); if (mem != view->buf) PyMem_Free(mem); diff --git a/Objects/object.c b/Objects/object.c index acc34af349..395e28d63e 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -731,150 +731,6 @@ PyObject_RichCompareBool(PyObject *v, PyObject *w, int op) return ok; } -/* Set of hash utility functions to help maintaining the invariant that - if a==b then hash(a)==hash(b) - - All the utility functions (_Py_Hash*()) return "-1" to signify an error. -*/ - -/* For numeric types, the hash of a number x is based on the reduction - of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that - hash(x) == hash(y) whenever x and y are numerically equal, even if - x and y have different types. - - A quick summary of the hashing strategy: - - (1) First define the 'reduction of x modulo P' for any rational - number x; this is a standard extension of the usual notion of - reduction modulo P for integers. If x == p/q (written in lowest - terms), the reduction is interpreted as the reduction of p times - the inverse of the reduction of q, all modulo P; if q is exactly - divisible by P then define the reduction to be infinity. So we've - got a well-defined map - - reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }. - - (2) Now for a rational number x, define hash(x) by: - - reduce(x) if x >= 0 - -reduce(-x) if x < 0 - - If the result of the reduction is infinity (this is impossible for - integers, floats and Decimals) then use the predefined hash value - _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead. - _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the - hashes of float and Decimal infinities and nans. - - A selling point for the above strategy is that it makes it possible - to compute hashes of decimal and binary floating-point numbers - efficiently, even if the exponent of the binary or decimal number - is large. The key point is that - - reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS) - - provided that {reduce(x), reduce(y)} != {0, infinity}. The reduction of a - binary or decimal float is never infinity, since the denominator is a power - of 2 (for binary) or a divisor of a power of 10 (for decimal). So we have, - for nonnegative x, - - reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS - - reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS - - and reduce(10**e) can be computed efficiently by the usual modular - exponentiation algorithm. For reduce(2**e) it's even better: since - P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication - by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits. - - */ - -Py_hash_t -_Py_HashDouble(double v) -{ - int e, sign; - double m; - Py_uhash_t x, y; - - if (!Py_IS_FINITE(v)) { - if (Py_IS_INFINITY(v)) - return v > 0 ? _PyHASH_INF : -_PyHASH_INF; - else - return _PyHASH_NAN; - } - - m = frexp(v, &e); - - sign = 1; - if (m < 0) { - sign = -1; - m = -m; - } - - /* process 28 bits at a time; this should work well both for binary - and hexadecimal floating point. */ - x = 0; - while (m) { - x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28); - m *= 268435456.0; /* 2**28 */ - e -= 28; - y = (Py_uhash_t)m; /* pull out integer part */ - m -= y; - x += y; - if (x >= _PyHASH_MODULUS) - x -= _PyHASH_MODULUS; - } - - /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */ - e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS); - x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e); - - x = x * sign; - if (x == (Py_uhash_t)-1) - x = (Py_uhash_t)-2; - return (Py_hash_t)x; -} - -Py_hash_t -_Py_HashPointer(void *p) -{ - Py_hash_t x; - size_t y = (size_t)p; - /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid - excessive hash collisions for dicts and sets */ - y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4)); - x = (Py_hash_t)y; - if (x == -1) - x = -2; - return x; -} - -Py_hash_t -_Py_HashBytes(unsigned char *p, Py_ssize_t len) -{ - Py_uhash_t x; - Py_ssize_t i; - - /* - We make the hash of the empty string be 0, rather than using - (prefix ^ suffix), since this slightly obfuscates the hash secret - */ -#ifdef Py_DEBUG - assert(_Py_HashSecret_Initialized); -#endif - if (len == 0) { - return 0; - } - x = (Py_uhash_t) _Py_HashSecret.prefix; - x ^= (Py_uhash_t) *p << 7; - for (i = 0; i < len; i++) - x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++; - x ^= (Py_uhash_t) len; - x ^= (Py_uhash_t) _Py_HashSecret.suffix; - if (x == -1) - x = -2; - return x; -} - Py_hash_t PyObject_HashNotImplemented(PyObject *v) { @@ -883,8 +739,6 @@ PyObject_HashNotImplemented(PyObject *v) return -1; } -_Py_HashSecret_t _Py_HashSecret; - Py_hash_t PyObject_Hash(PyObject *v) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 880889e8a2..3644db3b13 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11386,39 +11386,8 @@ unicode_hash(PyObject *self) _PyUnicode_HASH(self) = 0; return 0; } - - /* The hash function as a macro, gets expanded three times below. */ -#define HASH(P) \ - x ^= (Py_uhash_t) *P << 7; \ - while (--len >= 0) \ - x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *P++; \ - - x = (Py_uhash_t) _Py_HashSecret.prefix; - switch (PyUnicode_KIND(self)) { - case PyUnicode_1BYTE_KIND: { - const unsigned char *c = PyUnicode_1BYTE_DATA(self); - HASH(c); - break; - } - case PyUnicode_2BYTE_KIND: { - const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self); - HASH(s); - break; - } - default: { - Py_UCS4 *l; - assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND && - "Impossible switch case in unicode_hash"); - l = PyUnicode_4BYTE_DATA(self); - HASH(l); - break; - } - } - x ^= (Py_uhash_t) PyUnicode_GET_LENGTH(self); - x ^= (Py_uhash_t) _Py_HashSecret.suffix; - - if (x == -1) - x = -2; + x = _Py_HashBytes(PyUnicode_DATA(self), + PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self)); _PyUnicode_HASH(self) = x; return x; } -- cgit v1.2.1 From 85a662a719a506b3920a65b73c3d3e9e71990491 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 21 Nov 2013 12:16:35 +0100 Subject: Close #19578: Fix list_ass_subscript(), handle list_resize() failure Notify the caller of the failure (MemoryError exception). --- Objects/listobject.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/listobject.c b/Objects/listobject.c index 7d5674c9f4..5b75968fb3 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2483,6 +2483,7 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) PyObject **garbage; size_t cur; Py_ssize_t i; + int res; if (slicelength <= 0) return 0; @@ -2533,14 +2534,14 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) } Py_SIZE(self) -= slicelength; - list_resize(self, Py_SIZE(self)); + res = list_resize(self, Py_SIZE(self)); for (i = 0; i < slicelength; i++) { Py_DECREF(garbage[i]); } PyMem_FREE(garbage); - return 0; + return res; } else { /* assign slice */ -- cgit v1.2.1 From d84e0e2beba45d9524bfe5d9d3b6543bb0c5a243 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 21 Nov 2013 12:29:51 +0100 Subject: Close #19568: Fix bytearray_setslice_linear(), fix handling of PyByteArray_Resize() failure: leave the bytearray object in an consistent state. If growth < 0, handling the memory allocation failure is tricky here because the bytearray object has already been modified. If lo != 0, the operation is completed, but a MemoryError is still raised and the memory block is not shrinked. If lo == 0, the bytearray is restored in its previous state and a MemoryError is raised. --- Objects/bytearrayobject.c | 100 +++++++++++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 37 deletions(-) (limited to 'Objects') diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 400da1c1ac..31cc4ccebd 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -453,54 +453,80 @@ bytearray_setslice_linear(PyByteArrayObject *self, Py_ssize_t avail = hi - lo; char *buf = PyByteArray_AS_STRING(self); Py_ssize_t growth = bytes_len - avail; + int res = 0; assert(avail >= 0); - if (growth != 0) { - if (growth < 0) { - if (!_canresize(self)) - return -1; + if (growth < 0) { + if (!_canresize(self)) + return -1; + + if (lo == 0) { + /* Shrink the buffer by advancing its logical start */ + self->ob_start -= growth; + /* + 0 lo hi old_size + | |<----avail----->|<-----tail------>| + | |<-bytes_len->|<-----tail------>| + 0 new_lo new_hi new_size + */ + } + else { + /* + 0 lo hi old_size + | |<----avail----->|<-----tomove------>| + | |<-bytes_len->|<-----tomove------>| + 0 lo new_hi new_size + */ + memmove(buf + lo + bytes_len, buf + hi, + Py_SIZE(self) - hi); + } + if (PyByteArray_Resize((PyObject *)self, + Py_SIZE(self) + growth) < 0) { + /* Issue #19578: Handling the memory allocation failure here is + tricky here because the bytearray object has already been + modified. Depending on growth and lo, the behaviour is + different. + + If growth < 0 and lo != 0, the operation is completed, but a + MemoryError is still raised and the memory block is not + shrinked. Otherwise, the bytearray is restored in its previous + state and a MemoryError is raised. */ if (lo == 0) { - /* Shrink the buffer by advancing its logical start */ - self->ob_start -= growth; - /* - 0 lo hi old_size - | |<----avail----->|<-----tail------>| - | |<-bytes_len->|<-----tail------>| - 0 new_lo new_hi new_size - */ - } - else { - /* - 0 lo hi old_size - | |<----avail----->|<-----tomove------>| - | |<-bytes_len->|<-----tomove------>| - 0 lo new_hi new_size - */ - memmove(buf + lo + bytes_len, buf + hi, - Py_SIZE(self) - hi); + self->ob_start += growth; + return -1; } + /* memmove() removed bytes, the bytearray object cannot be + restored in its previous state. */ + Py_SIZE(self) += growth; + res = -1; } - /* XXX(nnorwitz): need to verify this can't overflow! */ - if (PyByteArray_Resize( - (PyObject *)self, Py_SIZE(self) + growth) < 0) - return -1; buf = PyByteArray_AS_STRING(self); - if (growth > 0) { - /* Make the place for the additional bytes */ - /* - 0 lo hi old_size - | |<-avail->|<-----tomove------>| - | |<---bytes_len-->|<-----tomove------>| - 0 lo new_hi new_size - */ - memmove(buf + lo + bytes_len, buf + hi, - Py_SIZE(self) - lo - bytes_len); + } + else if (growth > 0) { + if (Py_SIZE(self) > (Py_ssize_t)PY_SSIZE_T_MAX - growth) { + PyErr_NoMemory(); + return -1; } + + if (PyByteArray_Resize((PyObject *)self, + Py_SIZE(self) + growth) < 0) { + return -1; + } + buf = PyByteArray_AS_STRING(self); + /* Make the place for the additional bytes */ + /* + 0 lo hi old_size + | |<-avail->|<-----tomove------>| + | |<---bytes_len-->|<-----tomove------>| + 0 lo new_hi new_size + */ + memmove(buf + lo + bytes_len, buf + hi, + Py_SIZE(self) - lo - bytes_len); } if (bytes_len > 0) memcpy(buf + lo, bytes, bytes_len); - return 0; + return res; } static int -- cgit v1.2.1 From 27e38b38ecb79431f7a7b33292399bfd6263924e Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Fri, 22 Nov 2013 22:39:36 +1000 Subject: Issue #19619: Blacklist non-text codecs in method API str.encode, bytes.decode and bytearray.decode now use an internal API to throw LookupError for known non-text encodings, rather than attempting the encoding or decoding operation and then throwing a TypeError for an unexpected output type. The latter mechanism remains in place for third party non-text encodings. --- Objects/unicodeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3644db3b13..7de5f1f40c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3044,7 +3044,7 @@ PyUnicode_Decode(const char *s, buffer = PyMemoryView_FromBuffer(&info); if (buffer == NULL) goto onError; - unicode = PyCodec_Decode(buffer, encoding, errors); + unicode = _PyCodec_DecodeText(buffer, encoding, errors); if (unicode == NULL) goto onError; if (!PyUnicode_Check(unicode)) { @@ -3410,7 +3410,7 @@ PyUnicode_AsEncodedString(PyObject *unicode, } /* Encode via the codec registry */ - v = PyCodec_Encode(unicode, encoding, errors); + v = _PyCodec_EncodeText(unicode, encoding, errors); if (v == NULL) return NULL; -- cgit v1.2.1 From af248a41919513d573cd57e6cb6cdfd4a9dde66b Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 22 Nov 2013 09:05:39 -0700 Subject: Implement PEP 451 (ModuleSpec). --- Objects/moduleobject.c | 53 +++++--------------------------------------------- 1 file changed, 5 insertions(+), 48 deletions(-) (limited to 'Objects') diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index 3ea3be8b82..d59475e2f3 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -45,6 +45,8 @@ module_init_dict(PyModuleObject *mod, PyObject *md_dict, return -1; if (PyDict_SetItemString(md_dict, "__loader__", Py_None) != 0) return -1; + if (PyDict_SetItemString(md_dict, "__spec__", Py_None) != 0) + return -1; if (PyUnicode_CheckExact(name)) { Py_INCREF(name); Py_XDECREF(mod->md_name); @@ -398,55 +400,10 @@ module_dealloc(PyModuleObject *m) static PyObject * module_repr(PyModuleObject *m) { - PyObject *name, *filename, *repr, *loader = NULL; + PyThreadState *tstate = PyThreadState_GET(); + PyInterpreterState *interp = tstate->interp; - /* See if the module has an __loader__. If it does, give the loader the - * first shot at producing a repr for the module. - */ - if (m->md_dict != NULL) { - loader = PyDict_GetItemString(m->md_dict, "__loader__"); - } - if (loader != NULL && loader != Py_None) { - repr = PyObject_CallMethod(loader, "module_repr", "(O)", - (PyObject *)m, NULL); - if (repr == NULL) { - PyErr_Clear(); - } - else { - return repr; - } - } - /* __loader__.module_repr(m) did not provide us with a repr. Next, see if - * the module has an __file__. If it doesn't then use repr(__loader__) if - * it exists, otherwise, just use module.__name__. - */ - name = PyModule_GetNameObject((PyObject *)m); - if (name == NULL) { - PyErr_Clear(); - name = PyUnicode_FromStringAndSize("?", 1); - if (name == NULL) - return NULL; - } - filename = PyModule_GetFilenameObject((PyObject *)m); - if (filename == NULL) { - PyErr_Clear(); - /* There's no m.__file__, so if there was a __loader__, use that in - * the repr, otherwise, the only thing you can use is m.__name__ - */ - if (loader == NULL || loader == Py_None) { - repr = PyUnicode_FromFormat("", name); - } - else { - repr = PyUnicode_FromFormat("", name, loader); - } - } - /* Finally, use m.__file__ */ - else { - repr = PyUnicode_FromFormat("", name, filename); - Py_DECREF(filename); - } - Py_DECREF(name); - return repr; + return PyObject_CallMethod(interp->importlib, "_module_repr", "O", m); } static int -- cgit v1.2.1 From 472532cb24e6ee8d18c52e2cb7c4d6b662c90285 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 23 Nov 2013 18:59:12 +0100 Subject: Issue #17810: Implement PEP 3154, pickle protocol 4. Most of the work is by Alexandre. --- Objects/classobject.c | 26 ++- Objects/descrobject.c | 45 ++++- Objects/typeobject.c | 466 ++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 438 insertions(+), 99 deletions(-) (limited to 'Objects') diff --git a/Objects/classobject.c b/Objects/classobject.c index 27f7ef4522..272f575dba 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -69,6 +69,30 @@ PyMethod_New(PyObject *func, PyObject *self) return (PyObject *)im; } +static PyObject * +method_reduce(PyMethodObject *im) +{ + PyObject *self = PyMethod_GET_SELF(im); + PyObject *func = PyMethod_GET_FUNCTION(im); + PyObject *builtins; + PyObject *getattr; + PyObject *funcname; + _Py_IDENTIFIER(getattr); + + funcname = _PyObject_GetAttrId(func, &PyId___name__); + if (funcname == NULL) { + return NULL; + } + builtins = PyEval_GetBuiltins(); + getattr = _PyDict_GetItemId(builtins, &PyId_getattr); + return Py_BuildValue("O(ON)", getattr, self, funcname); +} + +static PyMethodDef method_methods[] = { + {"__reduce__", (PyCFunction)method_reduce, METH_NOARGS, NULL}, + {NULL, NULL} +}; + /* Descriptors for PyMethod attributes */ /* im_func and im_self are stored in the PyMethod object */ @@ -367,7 +391,7 @@ PyTypeObject PyMethod_Type = { offsetof(PyMethodObject, im_weakreflist), /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + method_methods, /* tp_methods */ method_memberlist, /* tp_members */ method_getset, /* tp_getset */ 0, /* tp_base */ diff --git a/Objects/descrobject.c b/Objects/descrobject.c index d4f8048fa6..da88f8668a 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -398,6 +398,24 @@ descr_get_qualname(PyDescrObject *descr) return descr->d_qualname; } +static PyObject * +descr_reduce(PyDescrObject *descr) +{ + PyObject *builtins; + PyObject *getattr; + _Py_IDENTIFIER(getattr); + + builtins = PyEval_GetBuiltins(); + getattr = _PyDict_GetItemId(builtins, &PyId_getattr); + return Py_BuildValue("O(OO)", getattr, PyDescr_TYPE(descr), + PyDescr_NAME(descr)); +} + +static PyMethodDef descr_methods[] = { + {"__reduce__", (PyCFunction)descr_reduce, METH_NOARGS, NULL}, + {NULL, NULL} +}; + static PyMemberDef descr_members[] = { {"__objclass__", T_OBJECT, offsetof(PyDescrObject, d_type), READONLY}, {"__name__", T_OBJECT, offsetof(PyDescrObject, d_name), READONLY}, @@ -494,7 +512,7 @@ PyTypeObject PyMethodDescr_Type = { 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + descr_methods, /* tp_methods */ descr_members, /* tp_members */ method_getset, /* tp_getset */ 0, /* tp_base */ @@ -532,7 +550,7 @@ PyTypeObject PyClassMethodDescr_Type = { 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + descr_methods, /* tp_methods */ descr_members, /* tp_members */ method_getset, /* tp_getset */ 0, /* tp_base */ @@ -569,7 +587,7 @@ PyTypeObject PyMemberDescr_Type = { 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + descr_methods, /* tp_methods */ descr_members, /* tp_members */ member_getset, /* tp_getset */ 0, /* tp_base */ @@ -643,7 +661,7 @@ PyTypeObject PyWrapperDescr_Type = { 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + descr_methods, /* tp_methods */ descr_members, /* tp_members */ wrapperdescr_getset, /* tp_getset */ 0, /* tp_base */ @@ -1085,6 +1103,23 @@ wrapper_repr(wrapperobject *wp) wp->self); } +static PyObject * +wrapper_reduce(wrapperobject *wp) +{ + PyObject *builtins; + PyObject *getattr; + _Py_IDENTIFIER(getattr); + + builtins = PyEval_GetBuiltins(); + getattr = _PyDict_GetItemId(builtins, &PyId_getattr); + return Py_BuildValue("O(OO)", getattr, wp->self, PyDescr_NAME(wp->descr)); +} + +static PyMethodDef wrapper_methods[] = { + {"__reduce__", (PyCFunction)wrapper_reduce, METH_NOARGS, NULL}, + {NULL, NULL} +}; + static PyMemberDef wrapper_members[] = { {"__self__", T_OBJECT, offsetof(wrapperobject, self), READONLY}, {0} @@ -1193,7 +1228,7 @@ PyTypeObject _PyMethodWrapper_Type = { 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + wrapper_methods, /* tp_methods */ wrapper_members, /* tp_members */ wrapper_getsets, /* tp_getset */ 0, /* tp_base */ diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 09f77fa53d..5e951de85e 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3405,150 +3405,429 @@ import_copyreg(void) return cached_copyreg_module; } -static PyObject * -slotnames(PyObject *cls) +Py_LOCAL(PyObject *) +_PyType_GetSlotNames(PyTypeObject *cls) { - PyObject *clsdict; PyObject *copyreg; PyObject *slotnames; _Py_IDENTIFIER(__slotnames__); _Py_IDENTIFIER(_slotnames); - clsdict = ((PyTypeObject *)cls)->tp_dict; - slotnames = _PyDict_GetItemId(clsdict, &PyId___slotnames__); - if (slotnames != NULL && PyList_Check(slotnames)) { + assert(PyType_Check(cls)); + + /* Get the slot names from the cache in the class if possible. */ + slotnames = _PyDict_GetItemIdWithError(cls->tp_dict, &PyId___slotnames__); + if (slotnames != NULL) { + if (slotnames != Py_None && !PyList_Check(slotnames)) { + PyErr_Format(PyExc_TypeError, + "%.200s.__slotnames__ should be a list or None, " + "not %.200s", + cls->tp_name, Py_TYPE(slotnames)->tp_name); + return NULL; + } Py_INCREF(slotnames); return slotnames; } + else { + if (PyErr_Occurred()) { + return NULL; + } + /* The class does not have the slot names cached yet. */ + } copyreg = import_copyreg(); if (copyreg == NULL) return NULL; - slotnames = _PyObject_CallMethodId(copyreg, &PyId__slotnames, "O", cls); + /* Use _slotnames function from the copyreg module to find the slots + by this class and its bases. This function will cache the result + in __slotnames__. */ + slotnames = _PyObject_CallMethodIdObjArgs(copyreg, &PyId__slotnames, + cls, NULL); Py_DECREF(copyreg); - if (slotnames != NULL && - slotnames != Py_None && - !PyList_Check(slotnames)) - { + if (slotnames == NULL) + return NULL; + + if (slotnames != Py_None && !PyList_Check(slotnames)) { PyErr_SetString(PyExc_TypeError, - "copyreg._slotnames didn't return a list or None"); + "copyreg._slotnames didn't return a list or None"); Py_DECREF(slotnames); - slotnames = NULL; + return NULL; } return slotnames; } -static PyObject * -reduce_2(PyObject *obj) +Py_LOCAL(PyObject *) +_PyObject_GetState(PyObject *obj) { - PyObject *cls, *getnewargs; - PyObject *args = NULL, *args2 = NULL; - PyObject *getstate = NULL, *state = NULL, *names = NULL; - PyObject *slots = NULL, *listitems = NULL, *dictitems = NULL; - PyObject *copyreg = NULL, *newobj = NULL, *res = NULL; - Py_ssize_t i, n; - _Py_IDENTIFIER(__getnewargs__); + PyObject *state; + PyObject *getstate; _Py_IDENTIFIER(__getstate__); - _Py_IDENTIFIER(__newobj__); - cls = (PyObject *) Py_TYPE(obj); + getstate = _PyObject_GetAttrId(obj, &PyId___getstate__); + if (getstate == NULL) { + PyObject *slotnames; - getnewargs = _PyObject_GetAttrId(obj, &PyId___getnewargs__); - if (getnewargs != NULL) { - args = PyObject_CallObject(getnewargs, NULL); - Py_DECREF(getnewargs); - if (args != NULL && !PyTuple_Check(args)) { - PyErr_Format(PyExc_TypeError, - "__getnewargs__ should return a tuple, " - "not '%.200s'", Py_TYPE(args)->tp_name); - goto end; + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { + return NULL; } - } - else { PyErr_Clear(); - args = PyTuple_New(0); - } - if (args == NULL) - goto end; - getstate = _PyObject_GetAttrId(obj, &PyId___getstate__); - if (getstate != NULL) { - state = PyObject_CallObject(getstate, NULL); - Py_DECREF(getstate); - if (state == NULL) - goto end; - } - else { - PyObject **dict; - PyErr_Clear(); - dict = _PyObject_GetDictPtr(obj); - if (dict && *dict) - state = *dict; - else - state = Py_None; - Py_INCREF(state); - names = slotnames(cls); - if (names == NULL) - goto end; - if (names != Py_None && PyList_GET_SIZE(names) > 0) { - assert(PyList_Check(names)); + { + PyObject **dict; + dict = _PyObject_GetDictPtr(obj); + /* It is possible that the object's dict is not initialized + yet. In this case, we will return None for the state. + We also return None if the dict is empty to make the behavior + consistent regardless whether the dict was initialized or not. + This make unit testing easier. */ + if (dict != NULL && *dict != NULL && PyDict_Size(*dict) > 0) { + state = *dict; + } + else { + state = Py_None; + } + Py_INCREF(state); + } + + slotnames = _PyType_GetSlotNames(Py_TYPE(obj)); + if (slotnames == NULL) { + Py_DECREF(state); + return NULL; + } + + assert(slotnames == Py_None || PyList_Check(slotnames)); + if (slotnames != Py_None && Py_SIZE(slotnames) > 0) { + PyObject *slots; + Py_ssize_t slotnames_size, i; + slots = PyDict_New(); - if (slots == NULL) - goto end; - n = 0; - /* Can't pre-compute the list size; the list - is stored on the class so accessible to other - threads, which may be run by DECREF */ - for (i = 0; i < PyList_GET_SIZE(names); i++) { + if (slots == NULL) { + Py_DECREF(slotnames); + Py_DECREF(state); + return NULL; + } + + slotnames_size = Py_SIZE(slotnames); + for (i = 0; i < slotnames_size; i++) { PyObject *name, *value; - name = PyList_GET_ITEM(names, i); + + name = PyList_GET_ITEM(slotnames, i); value = PyObject_GetAttr(obj, name); - if (value == NULL) + if (value == NULL) { + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { + goto error; + } + /* It is not an error if the attribute is not present. */ PyErr_Clear(); + } else { - int err = PyDict_SetItem(slots, name, - value); + int err = PyDict_SetItem(slots, name, value); Py_DECREF(value); - if (err) - goto end; - n++; + if (err) { + goto error; + } + } + + /* The list is stored on the class so it may mutates while we + iterate over it */ + if (slotnames_size != Py_SIZE(slotnames)) { + PyErr_Format(PyExc_RuntimeError, + "__slotsname__ changed size during iteration"); + goto error; + } + + /* We handle errors within the loop here. */ + if (0) { + error: + Py_DECREF(slotnames); + Py_DECREF(slots); + Py_DECREF(state); + return NULL; } } - if (n) { - state = Py_BuildValue("(NO)", state, slots); - if (state == NULL) - goto end; + + /* If we found some slot attributes, pack them in a tuple along + the orginal attribute dictionary. */ + if (PyDict_Size(slots) > 0) { + PyObject *state2; + + state2 = PyTuple_Pack(2, state, slots); + Py_DECREF(state); + if (state2 == NULL) { + Py_DECREF(slotnames); + Py_DECREF(slots); + return NULL; + } + state = state2; } + Py_DECREF(slots); + } + Py_DECREF(slotnames); + } + else { /* getstate != NULL */ + state = PyObject_CallObject(getstate, NULL); + Py_DECREF(getstate); + if (state == NULL) + return NULL; + } + + return state; +} + +Py_LOCAL(int) +_PyObject_GetNewArguments(PyObject *obj, PyObject **args, PyObject **kwargs) +{ + PyObject *getnewargs, *getnewargs_ex; + _Py_IDENTIFIER(__getnewargs_ex__); + _Py_IDENTIFIER(__getnewargs__); + + if (args == NULL || kwargs == NULL) { + PyErr_BadInternalCall(); + return -1; + } + + /* We first attempt to fetch the arguments for __new__ by calling + __getnewargs_ex__ on the object. */ + getnewargs_ex = _PyObject_GetAttrId(obj, &PyId___getnewargs_ex__); + if (getnewargs_ex != NULL) { + PyObject *newargs = PyObject_CallObject(getnewargs_ex, NULL); + Py_DECREF(getnewargs_ex); + if (newargs == NULL) { + return -1; + } + if (!PyTuple_Check(newargs)) { + PyErr_Format(PyExc_TypeError, + "__getnewargs_ex__ should return a tuple, " + "not '%.200s'", Py_TYPE(newargs)->tp_name); + Py_DECREF(newargs); + return -1; + } + if (Py_SIZE(newargs) != 2) { + PyErr_Format(PyExc_ValueError, + "__getnewargs_ex__ should return a tuple of " + "length 2, not %zd", Py_SIZE(newargs)); + Py_DECREF(newargs); + return -1; + } + *args = PyTuple_GET_ITEM(newargs, 0); + Py_INCREF(*args); + *kwargs = PyTuple_GET_ITEM(newargs, 1); + Py_INCREF(*kwargs); + Py_DECREF(newargs); + + /* XXX We should perhaps allow None to be passed here. */ + if (!PyTuple_Check(*args)) { + PyErr_Format(PyExc_TypeError, + "first item of the tuple returned by " + "__getnewargs_ex__ must be a tuple, not '%.200s'", + Py_TYPE(*args)->tp_name); + Py_CLEAR(*args); + Py_CLEAR(*kwargs); + return -1; + } + if (!PyDict_Check(*kwargs)) { + PyErr_Format(PyExc_TypeError, + "second item of the tuple returned by " + "__getnewargs_ex__ must be a dict, not '%.200s'", + Py_TYPE(*kwargs)->tp_name); + Py_CLEAR(*args); + Py_CLEAR(*kwargs); + return -1; + } + return 0; + } else { + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { + return -1; + } + PyErr_Clear(); + } + + /* The object does not have __getnewargs_ex__ so we fallback on using + __getnewargs__ instead. */ + getnewargs = _PyObject_GetAttrId(obj, &PyId___getnewargs__); + if (getnewargs != NULL) { + *args = PyObject_CallObject(getnewargs, NULL); + Py_DECREF(getnewargs); + if (*args == NULL) { + return -1; + } + if (!PyTuple_Check(*args)) { + PyErr_Format(PyExc_TypeError, + "__getnewargs__ should return a tuple, " + "not '%.200s'", Py_TYPE(*args)->tp_name); + Py_CLEAR(*args); + return -1; + } + *kwargs = NULL; + return 0; + } else { + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { + return -1; } + PyErr_Clear(); + } + + /* The object does not have __getnewargs_ex__ and __getnewargs__. This may + means __new__ does not takes any arguments on this object, or that the + object does not implement the reduce protocol for pickling or + copying. */ + *args = NULL; + *kwargs = NULL; + return 0; +} + +Py_LOCAL(int) +_PyObject_GetItemsIter(PyObject *obj, PyObject **listitems, + PyObject **dictitems) +{ + if (listitems == NULL || dictitems == NULL) { + PyErr_BadInternalCall(); + return -1; } if (!PyList_Check(obj)) { - listitems = Py_None; - Py_INCREF(listitems); + *listitems = Py_None; + Py_INCREF(*listitems); } else { - listitems = PyObject_GetIter(obj); + *listitems = PyObject_GetIter(obj); if (listitems == NULL) - goto end; + return -1; } if (!PyDict_Check(obj)) { - dictitems = Py_None; - Py_INCREF(dictitems); + *dictitems = Py_None; + Py_INCREF(*dictitems); } else { + PyObject *items; _Py_IDENTIFIER(items); - PyObject *items = _PyObject_CallMethodId(obj, &PyId_items, ""); - if (items == NULL) - goto end; - dictitems = PyObject_GetIter(items); + + items = _PyObject_CallMethodIdObjArgs(obj, &PyId_items, NULL); + if (items == NULL) { + Py_CLEAR(*listitems); + return -1; + } + *dictitems = PyObject_GetIter(items); Py_DECREF(items); - if (dictitems == NULL) - goto end; + if (*dictitems == NULL) { + Py_CLEAR(*listitems); + return -1; + } + } + + assert(*listitems != NULL && *dictitems != NULL); + + return 0; +} + +static PyObject * +reduce_4(PyObject *obj) +{ + PyObject *args = NULL, *kwargs = NULL; + PyObject *copyreg; + PyObject *newobj, *newargs, *state, *listitems, *dictitems; + PyObject *result; + _Py_IDENTIFIER(__newobj_ex__); + + if (_PyObject_GetNewArguments(obj, &args, &kwargs) < 0) { + return NULL; + } + if (args == NULL) { + args = PyTuple_New(0); + if (args == NULL) + return NULL; + } + if (kwargs == NULL) { + kwargs = PyDict_New(); + if (kwargs == NULL) + return NULL; } + copyreg = import_copyreg(); + if (copyreg == NULL) { + Py_DECREF(args); + Py_DECREF(kwargs); + return NULL; + } + newobj = _PyObject_GetAttrId(copyreg, &PyId___newobj_ex__); + Py_DECREF(copyreg); + if (newobj == NULL) { + Py_DECREF(args); + Py_DECREF(kwargs); + return NULL; + } + newargs = PyTuple_Pack(3, Py_TYPE(obj), args, kwargs); + Py_DECREF(args); + Py_DECREF(kwargs); + if (newargs == NULL) { + Py_DECREF(newobj); + return NULL; + } + state = _PyObject_GetState(obj); + if (state == NULL) { + Py_DECREF(newobj); + Py_DECREF(newargs); + return NULL; + } + if (_PyObject_GetItemsIter(obj, &listitems, &dictitems) < 0) { + Py_DECREF(newobj); + Py_DECREF(newargs); + Py_DECREF(state); + return NULL; + } + + result = PyTuple_Pack(5, newobj, newargs, state, listitems, dictitems); + Py_DECREF(newobj); + Py_DECREF(newargs); + Py_DECREF(state); + Py_DECREF(listitems); + Py_DECREF(dictitems); + return result; +} + +static PyObject * +reduce_2(PyObject *obj) +{ + PyObject *cls; + PyObject *args = NULL, *args2 = NULL, *kwargs = NULL; + PyObject *state = NULL, *listitems = NULL, *dictitems = NULL; + PyObject *copyreg = NULL, *newobj = NULL, *res = NULL; + Py_ssize_t i, n; + _Py_IDENTIFIER(__newobj__); + + if (_PyObject_GetNewArguments(obj, &args, &kwargs) < 0) { + return NULL; + } + if (args == NULL) { + assert(kwargs == NULL); + args = PyTuple_New(0); + if (args == NULL) { + return NULL; + } + } + else if (kwargs != NULL) { + if (PyDict_Size(kwargs) > 0) { + PyErr_SetString(PyExc_ValueError, + "must use protocol 4 or greater to copy this " + "object; since __getnewargs_ex__ returned " + "keyword arguments."); + Py_DECREF(args); + Py_DECREF(kwargs); + return NULL; + } + Py_CLEAR(kwargs); + } + + state = _PyObject_GetState(obj); + if (state == NULL) + goto end; + + if (_PyObject_GetItemsIter(obj, &listitems, &dictitems) < 0) + goto end; + copyreg = import_copyreg(); if (copyreg == NULL) goto end; @@ -3560,6 +3839,7 @@ reduce_2(PyObject *obj) args2 = PyTuple_New(n+1); if (args2 == NULL) goto end; + cls = (PyObject *) Py_TYPE(obj); Py_INCREF(cls); PyTuple_SET_ITEM(args2, 0, cls); for (i = 0; i < n; i++) { @@ -3573,9 +3853,7 @@ reduce_2(PyObject *obj) end: Py_XDECREF(args); Py_XDECREF(args2); - Py_XDECREF(slots); Py_XDECREF(state); - Py_XDECREF(names); Py_XDECREF(listitems); Py_XDECREF(dictitems); Py_XDECREF(copyreg); @@ -3603,7 +3881,9 @@ _common_reduce(PyObject *self, int proto) { PyObject *copyreg, *res; - if (proto >= 2) + if (proto >= 4) + return reduce_4(self); + else if (proto >= 2) return reduce_2(self); copyreg = import_copyreg(); -- cgit v1.2.1 From 71e68e4d985a0b17184b1978474db759021224a0 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sat, 23 Nov 2013 21:19:43 +0100 Subject: Issue #17810: Fixed NULL check in _PyObject_GetItemsIter() CID 1131948: Logically dead code (DEADCODE) --- Objects/typeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5e951de85e..42a0a58b76 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3693,7 +3693,7 @@ _PyObject_GetItemsIter(PyObject *obj, PyObject **listitems, } else { *listitems = PyObject_GetIter(obj); - if (listitems == NULL) + if (*listitems == NULL) return -1; } -- cgit v1.2.1 From 0d082abc13877d4cb597a46baff8c8a52a91fcd1 Mon Sep 17 00:00:00 2001 From: Larry Hastings Date: Sat, 23 Nov 2013 14:54:00 -0800 Subject: Issue #19730: Argument Clinic now supports all the existing PyArg "format units" as legacy converters, as well as two new features: "self converters" and the "version" directive. --- Objects/unicodeobject.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'Objects') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7de5f1f40c..1f3164c9cf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12924,10 +12924,10 @@ PyDoc_STRVAR(unicode_maketrans__doc__, {"maketrans", (PyCFunction)unicode_maketrans, METH_VARARGS|METH_STATIC, unicode_maketrans__doc__}, static PyObject * -unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z); +unicode_maketrans_impl(void *null, PyObject *x, PyObject *y, PyObject *z); static PyObject * -unicode_maketrans(PyObject *null, PyObject *args) +unicode_maketrans(void *null, PyObject *args) { PyObject *return_value = NULL; PyObject *x; @@ -12938,15 +12938,15 @@ unicode_maketrans(PyObject *null, PyObject *args) "O|UU:maketrans", &x, &y, &z)) goto exit; - return_value = unicode_maketrans_impl(x, y, z); + return_value = unicode_maketrans_impl(null, x, y, z); exit: return return_value; } static PyObject * -unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z) -/*[clinic checksum: 137db9c3199e7906b7967009f511c24fa3235b5f]*/ +unicode_maketrans_impl(void *null, PyObject *x, PyObject *y, PyObject *z) +/*[clinic checksum: 6d522e3aea2f2e123da3c5d367132a99d803f9b9]*/ { PyObject *new = NULL, *key, *value; Py_ssize_t i = 0; -- cgit v1.2.1 From ed72aeef785d775a72872c2eeb68610a03468c2a Mon Sep 17 00:00:00 2001 From: Larry Hastings Date: Sat, 23 Nov 2013 15:37:55 -0800 Subject: Issue #19674: inspect.signature() now produces a correct signature for some builtins. --- Objects/dictobject.c | 13 +++++---- Objects/methodobject.c | 71 +++++++++++++++++++++++++++++++++++++++++++++---- Objects/unicodeobject.c | 11 +++++--- 3 files changed, 81 insertions(+), 14 deletions(-) (limited to 'Objects') diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 3c1b3bba8c..bfc730ba70 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -69,6 +69,11 @@ to the combined-table form. #include "Python.h" #include "stringlib/eq.h" +/*[clinic] +class dict +[clinic]*/ +/*[clinic checksum: da39a3ee5e6b4b0d3255bfef95601890afd80709]*/ + typedef struct { /* Cached hash code of me_key. */ Py_hash_t me_hash; @@ -2160,7 +2165,6 @@ dict_richcompare(PyObject *v, PyObject *w, int op) } /*[clinic] -class dict @coexist dict.__contains__ @@ -2172,16 +2176,15 @@ True if D has a key k, else False" [clinic]*/ PyDoc_STRVAR(dict___contains____doc__, -"True if D has a key k, else False\"\n" -"\n" -"dict.__contains__(key)"); +"__contains__(key)\n" +"True if D has a key k, else False\""); #define DICT___CONTAINS___METHODDEF \ {"__contains__", (PyCFunction)dict___contains__, METH_O|METH_COEXIST, dict___contains____doc__}, static PyObject * dict___contains__(PyObject *self, PyObject *key) -/*[clinic checksum: 61c5c802ea1d35699a1a754f1f3538ea9b259cf4]*/ +/*[clinic checksum: 3bbac5ce898ae630d9668fa1c8b3afb645ff22e8]*/ { register PyDictObject *mp = (PyDictObject *)self; Py_hash_t hash; diff --git a/Objects/methodobject.c b/Objects/methodobject.c index 11c8b6e9c1..ca21a68e2e 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -159,15 +159,75 @@ meth_dealloc(PyCFunctionObject *m) } } +/* + * finds the docstring's introspection signature. + * if present, returns a pointer pointing to the first '('. + * otherwise returns NULL. + */ +static const char *find_signature(PyCFunctionObject *m) +{ + const char *trace = m->m_ml->ml_doc; + const char *name = m->m_ml->ml_name; + size_t length; + if (!trace || !name) + return NULL; + length = strlen(name); + if (strncmp(trace, name, length)) + return NULL; + trace += length; + if (*trace != '(') + return NULL; + return trace; +} + +/* + * skips to the end of the docstring's instrospection signature. + */ +static const char *skip_signature(const char *trace) +{ + while (*trace && *trace != '\n') + trace++; + return trace; +} + +static const char *skip_eols(const char *trace) +{ + while (*trace == '\n') + trace++; + return trace; +} + +static PyObject * +meth_get__text_signature__(PyCFunctionObject *m, void *closure) +{ + const char *start = find_signature(m); + const char *trace; + + if (!start) { + Py_INCREF(Py_None); + return Py_None; + } + + trace = skip_signature(start); + return PyUnicode_FromStringAndSize(start, trace - start); +} + static PyObject * meth_get__doc__(PyCFunctionObject *m, void *closure) { - const char *doc = m->m_ml->ml_doc; + const char *doc = find_signature(m); + + if (doc) + doc = skip_eols(skip_signature(doc)); + else + doc = m->m_ml->ml_doc; + + if (!doc) { + Py_INCREF(Py_None); + return Py_None; + } - if (doc != NULL) - return PyUnicode_FromString(doc); - Py_INCREF(Py_None); - return Py_None; + return PyUnicode_FromString(doc); } static PyObject * @@ -236,6 +296,7 @@ static PyGetSetDef meth_getsets [] = { {"__name__", (getter)meth_get__name__, NULL, NULL}, {"__qualname__", (getter)meth_get__qualname__, NULL, NULL}, {"__self__", (getter)meth_get__self__, NULL, NULL}, + {"__text_signature__", (getter)meth_get__text_signature__, NULL, NULL}, {0} }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1f3164c9cf..34d51e404c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -47,6 +47,11 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include #endif +/*[clinic] +class str +[clinic]*/ +/*[clinic checksum: da39a3ee5e6b4b0d3255bfef95601890afd80709]*/ + /* --- Globals ------------------------------------------------------------ NOTE: In the interpreter's initialization phase, some globals are currently @@ -12883,7 +12888,6 @@ unicode_swapcase(PyObject *self) } /*[clinic] -class str @staticmethod str.maketrans as unicode_maketrans @@ -12908,10 +12912,9 @@ must be a string, whose characters will be mapped to None in the result. [clinic]*/ PyDoc_STRVAR(unicode_maketrans__doc__, +"maketrans(x, y=None, z=None)\n" "Return a translation table usable for str.translate().\n" "\n" -"str.maketrans(x, y=None, z=None)\n" -"\n" "If there is only one argument, it must be a dictionary mapping Unicode\n" "ordinals (integers) or characters to Unicode ordinals, strings or None.\n" "Character keys will be then converted to ordinals.\n" @@ -12946,7 +12949,7 @@ exit: static PyObject * unicode_maketrans_impl(void *null, PyObject *x, PyObject *y, PyObject *z) -/*[clinic checksum: 6d522e3aea2f2e123da3c5d367132a99d803f9b9]*/ +/*[clinic checksum: 7f76f414a0dfd0c614e0d4717872eeb520516da7]*/ { PyObject *new = NULL, *key, *value; Py_ssize_t i = 0; -- cgit v1.2.1 From bcdcda30998daef4fe2d2ee78683553ec31ff7ae Mon Sep 17 00:00:00 2001 From: Zachary Ware Date: Sun, 24 Nov 2013 01:19:09 -0600 Subject: Issue #3158: doctest can now find doctests in functions and methods written in C. As a part of this, a few doctests have been added to the builtins module (on hex(), oct(), and bin()), a doctest has been fixed (hopefully on all platforms) on float, and test_builtins now runs doctests in builtins. --- Objects/floatobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/floatobject.c b/Objects/floatobject.c index abea975c59..29c3b32763 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -1417,7 +1417,7 @@ Create a floating-point number from a hexadecimal string.\n\ >>> float.fromhex('0x1.ffffp10')\n\ 2047.984375\n\ >>> float.fromhex('-0x1p-1074')\n\ --4.9406564584124654e-324"); +-5e-324"); static PyObject * -- cgit v1.2.1 From 7c9f988c2374e240b2bc65e6f30371b1b871f68b Mon Sep 17 00:00:00 2001 From: Alexandre Vassalotti Date: Sun, 24 Nov 2013 02:41:05 -0800 Subject: Make built-in methods picklable through the reduce protocol. --- Objects/methodobject.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'Objects') diff --git a/Objects/methodobject.c b/Objects/methodobject.c index ca21a68e2e..55a7d6a35e 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -159,6 +159,26 @@ meth_dealloc(PyCFunctionObject *m) } } +static PyObject * +meth_reduce(PyCFunctionObject *m) +{ + PyObject *builtins; + PyObject *getattr; + _Py_IDENTIFIER(getattr); + + if (m->m_self == NULL || PyModule_Check(m->m_self)) + return PyUnicode_FromString(m->m_ml->ml_name); + + builtins = PyEval_GetBuiltins(); + getattr = _PyDict_GetItemId(builtins, &PyId_getattr); + return Py_BuildValue("O(Os)", getattr, m->m_self, m->m_ml->ml_name); +} + +static PyMethodDef meth_methods[] = { + {"__reduce__", (PyCFunction)meth_reduce, METH_NOARGS, NULL}, + {NULL, NULL} +}; + /* * finds the docstring's introspection signature. * if present, returns a pointer pointing to the first '('. @@ -394,7 +414,7 @@ PyTypeObject PyCFunction_Type = { 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + meth_methods, /* tp_methods */ meth_members, /* tp_members */ meth_getsets, /* tp_getset */ 0, /* tp_base */ -- cgit v1.2.1 From 867af6717544e28f8fcae6c015b6bac238df1fc9 Mon Sep 17 00:00:00 2001 From: Alexandre Vassalotti Date: Sun, 24 Nov 2013 02:53:45 -0800 Subject: Make Ellipsis and NotImplemented picklable through the reduce protocol. --- Objects/object.c | 13 ++++++++++++- Objects/sliceobject.c | 13 ++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'Objects') diff --git a/Objects/object.c b/Objects/object.c index 395e28d63e..11718aaae2 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1464,6 +1464,17 @@ NotImplemented_repr(PyObject *op) return PyUnicode_FromString("NotImplemented"); } +static PyObject * +NotImplemented_reduce(PyObject *op) +{ + return PyUnicode_FromString("NotImplemented"); +} + +static PyMethodDef notimplemented_methods[] = { + {"__reduce__", (PyCFunction)NotImplemented_reduce, METH_NOARGS, NULL}, + {NULL, NULL} +}; + static PyObject * notimplemented_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { @@ -1511,7 +1522,7 @@ static PyTypeObject PyNotImplemented_Type = { 0, /*tp_weaklistoffset */ 0, /*tp_iter */ 0, /*tp_iternext */ - 0, /*tp_methods */ + notimplemented_methods, /*tp_methods */ 0, /*tp_members */ 0, /*tp_getset */ 0, /*tp_base */ diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 52f1c89ded..104952333a 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -33,6 +33,17 @@ ellipsis_repr(PyObject *op) return PyUnicode_FromString("Ellipsis"); } +static PyObject * +ellipsis_reduce(PyObject *op) +{ + return PyUnicode_FromString("Ellipsis"); +} + +static PyMethodDef ellipsis_methods[] = { + {"__reduce__", (PyCFunction)ellipsis_reduce, METH_NOARGS, NULL}, + {NULL, NULL} +}; + PyTypeObject PyEllipsis_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "ellipsis", /* tp_name */ @@ -61,7 +72,7 @@ PyTypeObject PyEllipsis_Type = { 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - 0, /* tp_methods */ + ellipsis_methods, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ -- cgit v1.2.1 From abb53cb53299418e4bdafe124e3b682ef00a02c1 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 4 Dec 2013 09:27:47 +0100 Subject: Silence expression result unused warnings with clang. The PyObject_INIT() macros returns obj: ../cpython/Objects/methodobject.c:32:23: warning: expression result unused [-Wunused-value] PyObject_INIT(op, &PyCFunction_Type); ^~ ../cpython/Include/objimpl.h:139:69: note: expanded from macro 'PyObject_INIT' ( Py_TYPE(op) = (typeobj), _Py_NewReference((PyObject *)(op)), (op) ) ^ 1 warning generated. --- Objects/bytesobject.c | 6 +++--- Objects/classobject.c | 2 +- Objects/complexobject.c | 2 +- Objects/floatobject.c | 2 +- Objects/longobject.c | 2 +- Objects/methodobject.c | 2 +- Objects/typeobject.c | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) (limited to 'Objects') diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 8217b1eab3..63c67f8479 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -107,7 +107,7 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size); if (op == NULL) return PyErr_NoMemory(); - PyObject_INIT_VAR(op, &PyBytes_Type, size); + (void)PyObject_INIT_VAR(op, &PyBytes_Type, size); op->ob_shash = -1; if (str != NULL) Py_MEMCPY(op->ob_sval, str, size); @@ -155,7 +155,7 @@ PyBytes_FromString(const char *str) op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size); if (op == NULL) return PyErr_NoMemory(); - PyObject_INIT_VAR(op, &PyBytes_Type, size); + (void)PyObject_INIT_VAR(op, &PyBytes_Type, size); op->ob_shash = -1; Py_MEMCPY(op->ob_sval, str, size+1); /* share short strings */ @@ -749,7 +749,7 @@ bytes_repeat(PyBytesObject *a, Py_ssize_t n) op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes); if (op == NULL) return PyErr_NoMemory(); - PyObject_INIT_VAR(op, &PyBytes_Type, size); + (void)PyObject_INIT_VAR(op, &PyBytes_Type, size); op->ob_shash = -1; op->ob_sval[size] = '\0'; if (Py_SIZE(a) == 1 && n > 0) { diff --git a/Objects/classobject.c b/Objects/classobject.c index 272f575dba..0c0bd47fbb 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -52,7 +52,7 @@ PyMethod_New(PyObject *func, PyObject *self) im = free_list; if (im != NULL) { free_list = (PyMethodObject *)(im->im_self); - PyObject_INIT(im, &PyMethod_Type); + (void)PyObject_INIT(im, &PyMethod_Type); numfree--; } else { diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 60a388fa24..a5b76f0460 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -217,7 +217,7 @@ PyComplex_FromCComplex(Py_complex cval) op = (PyComplexObject *) PyObject_MALLOC(sizeof(PyComplexObject)); if (op == NULL) return PyErr_NoMemory(); - PyObject_INIT(op, &PyComplex_Type); + (void)PyObject_INIT(op, &PyComplex_Type); op->cval = cval; return (PyObject *) op; } diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 29c3b32763..05b7679553 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -119,7 +119,7 @@ PyFloat_FromDouble(double fval) return PyErr_NoMemory(); } /* Inline PyObject_New */ - PyObject_INIT(op, &PyFloat_Type); + (void)PyObject_INIT(op, &PyFloat_Type); op->ob_fval = fval; return (PyObject *) op; } diff --git a/Objects/longobject.c b/Objects/longobject.c index a5c0d1b33f..68a667e771 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -5089,7 +5089,7 @@ _PyLong_Init(void) assert(v->ob_digit[0] == abs(ival)); } else { - PyObject_INIT(v, &PyLong_Type); + (void)PyObject_INIT(v, &PyLong_Type); } Py_SIZE(v) = size; v->ob_digit[0] = abs(ival); diff --git a/Objects/methodobject.c b/Objects/methodobject.c index 55a7d6a35e..6179aeebd0 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -29,7 +29,7 @@ PyCFunction_NewEx(PyMethodDef *ml, PyObject *self, PyObject *module) op = free_list; if (op != NULL) { free_list = (PyCFunctionObject *)(op->m_self); - PyObject_INIT(op, &PyCFunction_Type); + (void)PyObject_INIT(op, &PyCFunction_Type); numfree--; } else { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 8b2ea1c1a6..530670f361 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -787,7 +787,7 @@ PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems) Py_INCREF(type); if (type->tp_itemsize == 0) - PyObject_INIT(obj, type); + (void)PyObject_INIT(obj, type); else (void) PyObject_INIT_VAR((PyVarObject *)obj, type, nitems); -- cgit v1.2.1 From ede64d46c3a6e8799206070d1c1e3120d0ca1df7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 13 Dec 2013 02:01:38 +0100 Subject: Issue #14432: Remove the thread state field from the frame structure. Fix a crash when a generator is created in a C thread that is destroyed while the generator is still used. The issue was that a generator contains a frame, and the frame kept a reference to the Python state of the destroyed C thread. The crash occurs when a trace function is setup. --- Objects/frameobject.c | 1 - 1 file changed, 1 deletion(-) (limited to 'Objects') diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 63f03a64e0..0d6229360e 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -726,7 +726,6 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, Py_INCREF(locals); f->f_locals = locals; } - f->f_tstate = tstate; f->f_lasti = -1; f->f_lineno = code->co_firstlineno; -- cgit v1.2.1