From a89a881286d3c709c35843642fbf5a02ca648418 Mon Sep 17 00:00:00 2001 From: scoder Date: Sun, 25 Apr 2021 07:12:44 +0200 Subject: Make "__Pyx_UnicodeContainsUCS4()" work for WCHAR unicode strings with Py3.9+. (GH-4135) * Use the same fallback as for missing PEP-393 support. * Prepare for "PyUnicode_READY()" and "PyUnicode_WCHAR_KIND" to be removed in Py3.12. See https://www.python.org/dev/peps/pep-0623/ * Avoid C compiler warnings about deprecated C-API functions in Py3.9+. Closes https://github.com/cython/cython/issues/3925 --- Cython/Utility/ModuleSetupCode.c | 13 +++++++++++++ Cython/Utility/StringTools.c | 35 ++++++++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/Cython/Utility/ModuleSetupCode.c b/Cython/Utility/ModuleSetupCode.c index aa3f368c7..52ec14f2b 100644 --- a/Cython/Utility/ModuleSetupCode.c +++ b/Cython/Utility/ModuleSetupCode.c @@ -523,8 +523,15 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { /* new Py3.3 unicode type (PEP 393) */ #if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) #define CYTHON_PEP393_ENABLED 1 + + #if defined(PyUnicode_IS_READY) #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ? \ 0 : _PyUnicode_Ready((PyObject *)(op))) + #else + // Py3.12 / PEP-623 will remove wstr type unicode strings and all of the PyUnicode_READY() machinery. + #define __Pyx_PyUnicode_READY(op) (0) + #endif + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) @@ -533,7 +540,13 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch) #if defined(PyUnicode_IS_READY) && defined(PyUnicode_GET_SIZE) + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 + // Avoid calling deprecated C-API functions in Py3.9+ that PEP-623 schedules for removal in Py3.12. + // https://www.python.org/dev/peps/pep-0623/ + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) + #else #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) + #endif #else #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) #endif diff --git a/Cython/Utility/StringTools.c b/Cython/Utility/StringTools.c index 7a3426215..a7559ec03 100644 --- a/Cython/Utility/StringTools.c +++ b/Cython/Utility/StringTools.c @@ -66,8 +66,19 @@ static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 ch //////////////////// PyUCS4InUnicode //////////////////// +#if PY_VERSION_HEX < 0x03090000 || (defined(PyUnicode_WCHAR_KIND) && defined(PyUnicode_AS_UNICODE)) + #if PY_VERSION_HEX < 0x03090000 -#if Py_UNICODE_SIZE == 2 +#define __Pyx_PyUnicode_AS_UNICODE(op) PyUnicode_AS_UNICODE(op) +#define __Pyx_PyUnicode_GET_SIZE(op) PyUnicode_GET_SIZE(op) +#else +// Avoid calling deprecated C-API functions in Py3.9+ that PEP-623 schedules for removal in Py3.12. +// https://www.python.org/dev/peps/pep-0623/ +#define __Pyx_PyUnicode_AS_UNICODE(op) (((PyASCIIObject *)(op))->wstr) +#define __Pyx_PyUnicode_GET_SIZE(op) ((PyCompactUnicodeObject *)(op))->wstr_length +#endif + +#if !defined(Py_UNICODE_SIZE) || Py_UNICODE_SIZE == 2 static int __Pyx_PyUnicodeBufferContainsUCS4_SP(Py_UNICODE* buffer, Py_ssize_t length, Py_UCS4 character) { /* handle surrogate pairs for Py_UNICODE buffers in 16bit Unicode builds */ Py_UNICODE high_val, low_val; @@ -95,7 +106,10 @@ static int __Pyx_PyUnicodeBufferContainsUCS4_BMP(Py_UNICODE* buffer, Py_ssize_t static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 character) { #if CYTHON_PEP393_ENABLED const int kind = PyUnicode_KIND(unicode); - if (likely(kind != PyUnicode_WCHAR_KIND)) { + #ifdef PyUnicode_WCHAR_KIND + if (likely(kind != PyUnicode_WCHAR_KIND)) + #endif + { Py_ssize_t i; const void* udata = PyUnicode_DATA(unicode); const Py_ssize_t length = PyUnicode_GET_LENGTH(unicode); @@ -106,20 +120,23 @@ static CYTHON_INLINE int __Pyx_UnicodeContainsUCS4(PyObject* unicode, Py_UCS4 ch } #elif PY_VERSION_HEX >= 0x03090000 #error Cannot use "UChar in Unicode" in Python 3.9 without PEP-393 unicode strings. +#elif !defined(PyUnicode_AS_UNICODE) + #error Cannot use "UChar in Unicode" in Python < 3.9 without Py_UNICODE support. #endif -#if PY_VERSION_HEX < 0x03090000 -#if Py_UNICODE_SIZE == 2 - if (unlikely(character > 65535)) { + +#if PY_VERSION_HEX < 0x03090000 || (defined(PyUnicode_WCHAR_KIND) && defined(PyUnicode_AS_UNICODE)) +#if !defined(Py_UNICODE_SIZE) || Py_UNICODE_SIZE == 2 + if ((sizeof(Py_UNICODE) == 2) && unlikely(character > 65535)) { return __Pyx_PyUnicodeBufferContainsUCS4_SP( - PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), + __Pyx_PyUnicode_AS_UNICODE(unicode), + __Pyx_PyUnicode_GET_SIZE(unicode), character); } else #endif { return __Pyx_PyUnicodeBufferContainsUCS4_BMP( - PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), + __Pyx_PyUnicode_AS_UNICODE(unicode), + __Pyx_PyUnicode_GET_SIZE(unicode), character); } -- cgit v1.2.1