diff options
Diffstat (limited to 'Cython/Utility/StringTools.c')
-rw-r--r-- | Cython/Utility/StringTools.c | 129 |
1 files changed, 101 insertions, 28 deletions
diff --git a/Cython/Utility/StringTools.c b/Cython/Utility/StringTools.c index 98b5e260e..553585987 100644 --- a/Cython/Utility/StringTools.c +++ b/Cython/Utility/StringTools.c @@ -7,15 +7,73 @@ #include <string> + +//////////////////// ssize_strlen.proto //////////////////// + +static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s);/*proto*/ + +//////////////////// ssize_strlen //////////////////// +//@requires: IncludeStringH + +static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s) { + size_t len = strlen(s); + if (unlikely(len > PY_SSIZE_T_MAX)) { + PyErr_SetString(PyExc_OverflowError, "byte string is too long"); + return -1; + } + return (Py_ssize_t) len; +} + + +//////////////////// ssize_pyunicode_strlen.proto //////////////////// + +static CYTHON_INLINE Py_ssize_t __Pyx_Py_UNICODE_ssize_strlen(const Py_UNICODE *u);/*proto*/ + +//////////////////// ssize_pyunicode_strlen //////////////////// + +static CYTHON_INLINE Py_ssize_t __Pyx_Py_UNICODE_ssize_strlen(const Py_UNICODE *u) { + size_t len = __Pyx_Py_UNICODE_strlen(u); + if (unlikely(len > PY_SSIZE_T_MAX)) { + PyErr_SetString(PyExc_OverflowError, "Py_UNICODE string is too long"); + return -1; + } + return (Py_ssize_t) len; +} + + //////////////////// InitStrings.proto //////////////////// static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ //////////////////// InitStrings //////////////////// +#if PY_MAJOR_VERSION >= 3 +static int __Pyx_InitString(__Pyx_StringTabEntry t, PyObject **str) { + if (t.is_unicode | t.is_str) { + if (t.intern) { + *str = PyUnicode_InternFromString(t.s); + } else if (t.encoding) { + *str = PyUnicode_Decode(t.s, t.n - 1, t.encoding, NULL); + } else { + *str = PyUnicode_FromStringAndSize(t.s, t.n - 1); + } + } else { + *str = PyBytes_FromStringAndSize(t.s, t.n - 1); + } + if (!*str) + return -1; + // initialise cached hash value + if (PyObject_Hash(*str) == -1) + return -1; + return 0; +} +#endif + static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { while (t->p) { - #if PY_MAJOR_VERSION < 3 + #if PY_MAJOR_VERSION >= 3 /* Python 3+ has unicode identifiers */ + __Pyx_InitString(*t, t->p); + #else if (t->is_unicode) { *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); } else if (t->intern) { @@ -23,24 +81,12 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { } else { *t->p = PyString_FromStringAndSize(t->s, t->n - 1); } - #else /* Python 3+ has unicode identifiers */ - if (t->is_unicode | t->is_str) { - if (t->intern) { - *t->p = PyUnicode_InternFromString(t->s); - } else if (t->encoding) { - *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL); - } else { - *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); - } - } else { - *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1); - } - #endif if (!*t->p) return -1; // initialise cached hash value if (PyObject_Hash(*t->p) == -1) return -1; + #endif ++t; } return 0; @@ -183,7 +229,7 @@ static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int //@requires: BytesEquals static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { -#if CYTHON_COMPILING_IN_PYPY +#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API return PyObject_RichCompareBool(s1, s2, equals); #else #if PY_MAJOR_VERSION < 3 @@ -294,7 +340,7 @@ static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int eq //@requires: IncludeStringH static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { -#if CYTHON_COMPILING_IN_PYPY +#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API return PyObject_RichCompareBool(s1, s2, equals); #else if (s1 == s2) { @@ -591,6 +637,8 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring( stop = length; if (stop <= start) return __Pyx_NewRef($empty_unicode); + if (start == 0 && stop == length) + return __Pyx_NewRef(text); #if CYTHON_PEP393_ENABLED return PyUnicode_FromKindAndData(PyUnicode_KIND(text), PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start); @@ -835,25 +883,29 @@ static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_co //@substitute: naming static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength, - CYTHON_UNUSED Py_UCS4 max_char) { + Py_UCS4 max_char) { #if CYTHON_USE_UNICODE_INTERNALS && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS PyObject *result_uval; - int result_ukind; + int result_ukind, kind_shift; Py_ssize_t i, char_pos; void *result_udata; + CYTHON_MAYBE_UNUSED_VAR(max_char); #if CYTHON_PEP393_ENABLED // Py 3.3+ (post PEP-393) result_uval = PyUnicode_New(result_ulength, max_char); if (unlikely(!result_uval)) return NULL; result_ukind = (max_char <= 255) ? PyUnicode_1BYTE_KIND : (max_char <= 65535) ? PyUnicode_2BYTE_KIND : PyUnicode_4BYTE_KIND; + kind_shift = (result_ukind == PyUnicode_4BYTE_KIND) ? 2 : result_ukind - 1; result_udata = PyUnicode_DATA(result_uval); #else // Py 2.x/3.2 (pre PEP-393) result_uval = PyUnicode_FromUnicode(NULL, result_ulength); if (unlikely(!result_uval)) return NULL; result_ukind = sizeof(Py_UNICODE); + kind_shift = (result_ukind == 4) ? 2 : result_ukind - 1; result_udata = PyUnicode_AS_UNICODE(result_uval); #endif + assert(kind_shift == 2 || kind_shift == 1 || kind_shift == 0); char_pos = 0; for (i=0; i < value_count; i++) { @@ -866,12 +918,12 @@ static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_co ulength = __Pyx_PyUnicode_GET_LENGTH(uval); if (unlikely(!ulength)) continue; - if (unlikely(char_pos + ulength < 0)) + if (unlikely((PY_SSIZE_T_MAX >> kind_shift) - ulength < char_pos)) goto overflow; ukind = __Pyx_PyUnicode_KIND(uval); udata = __Pyx_PyUnicode_DATA(uval); if (!CYTHON_PEP393_ENABLED || ukind == result_ukind) { - memcpy((char *)result_udata + char_pos * result_ukind, udata, (size_t) (ulength * result_ukind)); + memcpy((char *)result_udata + (char_pos << kind_shift), udata, (size_t) (ulength << kind_shift)); } else { #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030300F0 || defined(_PyUnicode_FastCopyCharacters) _PyUnicode_FastCopyCharacters(result_uval, char_pos, uval, 0, ulength); @@ -893,8 +945,9 @@ bad: return NULL; #else // non-CPython fallback - result_ulength++; - value_count++; + CYTHON_UNUSED_VAR(max_char); + CYTHON_UNUSED_VAR(result_ulength); + CYTHON_UNUSED_VAR(value_count); return PyUnicode_Join($empty_unicode, value_tuple); #endif } @@ -1008,11 +1061,11 @@ static CYTHON_INLINE int __Pyx_PyByteArray_AppendObject(PyObject* bytearray, PyO } else #endif #if CYTHON_USE_PYLONG_INTERNALS - if (likely(PyLong_CheckExact(value)) && likely(Py_SIZE(value) == 1 || Py_SIZE(value) == 0)) { - if (Py_SIZE(value) == 0) { + if (likely(PyLong_CheckExact(value)) && likely(__Pyx_PyLong_IsCompact(value))) { + if (__Pyx_PyLong_IsZero(value)) { ival = 0; } else { - ival = ((PyLongObject*)value)->ob_digit[0]; + ival = __Pyx_PyLong_CompactValue(value); if (unlikely(ival > 255)) goto bad_range; } } else @@ -1130,11 +1183,12 @@ static PyObject* __Pyx_PyObject_Format(PyObject* obj, PyObject* format_spec) { likely(PyString_CheckExact(s)) ? PyUnicode_FromEncodedObject(s, NULL, "strict") : \ PyObject_Format(s, f)) #elif CYTHON_USE_TYPE_SLOTS - // Py3 nicely returns unicode strings from str() which makes this quite efficient for builtin types + // Py3 nicely returns unicode strings from str() and repr(), which makes this quite efficient for builtin types. + // In Py3.8+, tp_str() delegates to tp_repr(), so we call tp_repr() directly here. #define __Pyx_PyObject_FormatSimple(s, f) ( \ likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) : \ - likely(PyLong_CheckExact(s)) ? PyLong_Type.tp_str(s) : \ - likely(PyFloat_CheckExact(s)) ? PyFloat_Type.tp_str(s) : \ + likely(PyLong_CheckExact(s)) ? PyLong_Type.tp_repr(s) : \ + likely(PyFloat_CheckExact(s)) ? PyFloat_Type.tp_repr(s) : \ PyObject_Format(s, f)) #else #define __Pyx_PyObject_FormatSimple(s, f) ( \ @@ -1193,3 +1247,22 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Unicode(PyObject *obj) { #define __Pyx_PyObject_Unicode(obj) \ (likely(PyUnicode_CheckExact(obj)) ? __Pyx_NewRef(obj) : PyObject_Unicode(obj)) #endif + + +//////////////////// PyStr_Str.proto //////////////////// + +static CYTHON_INLINE PyObject* __Pyx_PyStr_Str(PyObject *obj);/*proto*/ + +//////////////////// PyStr_Str //////////////////// + +static CYTHON_INLINE PyObject* __Pyx_PyStr_Str(PyObject *obj) { + if (unlikely(obj == Py_None)) + obj = PYIDENT("None"); + return __Pyx_NewRef(obj); +} + + +//////////////////// PyObject_Str.proto //////////////////// + +#define __Pyx_PyObject_Str(obj) \ + (likely(PyString_CheckExact(obj)) ? __Pyx_NewRef(obj) : PyObject_Str(obj)) |