From 145f9db4a999a387c22e735e37918e3c0b28228a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 9 Oct 2016 23:44:48 +0300 Subject: Issue #28400: Removed uncessary checks in unicode_char and resize_copy. 1. In resize_copy we don't need to PyUnicode_READY(unicode) since when it's not PyUnicode_WCHAR_KIND it should be ready. 2. In unicode_char, PyUnicode_1BYTE_KIND is handled by get_latin1_char. Patch by Xiang Zhang. --- Objects/unicodeobject.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b58cf02a8c..8f34e01232 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1029,8 +1029,7 @@ resize_copy(PyObject *unicode, Py_ssize_t length) if (_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND) { PyObject *copy; - if (PyUnicode_READY(unicode) == -1) - return NULL; + assert(PyUnicode_IS_READY(unicode)); copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); if (copy == NULL) @@ -1974,14 +1973,11 @@ unicode_char(Py_UCS4 ch) unicode = PyUnicode_New(1, ch); if (unicode == NULL) return NULL; - switch (PyUnicode_KIND(unicode)) { - case PyUnicode_1BYTE_KIND: - PyUnicode_1BYTE_DATA(unicode)[0] = (Py_UCS1)ch; - break; - case PyUnicode_2BYTE_KIND: + + assert(PyUnicode_KIND(unicode) != PyUnicode_1BYTE_KIND); + if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) { PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch; - break; - default: + } else { assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND); PyUnicode_4BYTE_DATA(unicode)[0] = ch; } -- cgit v1.2.1 From 1838227e043f62a985ee112c3666b214696a27d7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 22 Oct 2016 23:18:31 +0300 Subject: Issue #28504: Cleanup unicode_decode_call_errorhandler_wchar/writer. Patch by Xiang Zhang. --- Objects/unicodeobject.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 05f4d30e08..af95bb6b4a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4240,7 +4240,7 @@ unicode_decode_call_errorhandler_wchar( Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, PyObject **output, Py_ssize_t *outpos) { - static const char *argparse = "O!n;decoding error handler must return (str, int) tuple"; + static const char *argparse = "Un;decoding error handler must return (str, int) tuple"; PyObject *restuple = NULL; PyObject *repunicode = NULL; @@ -4273,10 +4273,10 @@ unicode_decode_call_errorhandler_wchar( if (restuple == NULL) goto onError; if (!PyTuple_Check(restuple)) { - PyErr_SetString(PyExc_TypeError, &argparse[4]); + PyErr_SetString(PyExc_TypeError, &argparse[3]); goto onError; } - if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos)) + if (!PyArg_ParseTuple(restuple, argparse, &repunicode, &newpos)) goto onError; /* Copy back the bytes variables, which might have been modified by the @@ -4284,9 +4284,6 @@ unicode_decode_call_errorhandler_wchar( inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject); if (!inputobj) goto onError; - if (!PyBytes_Check(inputobj)) { - PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes"); - } *input = PyBytes_AS_STRING(inputobj); insize = PyBytes_GET_SIZE(inputobj); *inend = *input + insize; @@ -4327,7 +4324,7 @@ unicode_decode_call_errorhandler_wchar( *inptr = *input + newpos; /* we made it! */ - Py_XDECREF(restuple); + Py_DECREF(restuple); return 0; overflow: @@ -4348,7 +4345,7 @@ unicode_decode_call_errorhandler_writer( Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, _PyUnicodeWriter *writer /* PyObject **output, Py_ssize_t *outpos */) { - static const char *argparse = "O!n;decoding error handler must return (str, int) tuple"; + static const char *argparse = "Un;decoding error handler must return (str, int) tuple"; PyObject *restuple = NULL; PyObject *repunicode = NULL; @@ -4375,10 +4372,10 @@ unicode_decode_call_errorhandler_writer( if (restuple == NULL) goto onError; if (!PyTuple_Check(restuple)) { - PyErr_SetString(PyExc_TypeError, &argparse[4]); + PyErr_SetString(PyExc_TypeError, &argparse[3]); goto onError; } - if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos)) + if (!PyArg_ParseTuple(restuple, argparse, &repunicode, &newpos)) goto onError; /* Copy back the bytes variables, which might have been modified by the @@ -4386,9 +4383,6 @@ unicode_decode_call_errorhandler_writer( inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject); if (!inputobj) goto onError; - if (!PyBytes_Check(inputobj)) { - PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes"); - } *input = PyBytes_AS_STRING(inputobj); insize = PyBytes_GET_SIZE(inputobj); *inend = *input + insize; @@ -4403,8 +4397,6 @@ unicode_decode_call_errorhandler_writer( goto onError; } - if (PyUnicode_READY(repunicode) < 0) - goto onError; replen = PyUnicode_GET_LENGTH(repunicode); if (replen > 1) { writer->min_length += replen - 1; @@ -4420,7 +4412,7 @@ unicode_decode_call_errorhandler_writer( *inptr = *input + newpos; /* we made it! */ - Py_XDECREF(restuple); + Py_DECREF(restuple); return 0; onError: -- cgit v1.2.1 From 6ed55b2d31ec9afacbecd309d2c3a4c099848a6c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 23 Oct 2016 15:12:25 +0300 Subject: Issue #28511: Use the "U" format instead of "O!" in PyArg_Parse*. --- Objects/unicodeobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index af95bb6b4a..6ab691ff25 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8621,7 +8621,7 @@ unicode_translate_call_errorhandler(const char *errors, Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos) { - static const char *argparse = "O!n;translating error handler must return (str, int) tuple"; + static const char *argparse = "Un;translating error handler must return (str, int) tuple"; Py_ssize_t i_newpos; PyObject *restuple; @@ -8643,11 +8643,11 @@ unicode_translate_call_errorhandler(const char *errors, if (restuple == NULL) return NULL; if (!PyTuple_Check(restuple)) { - PyErr_SetString(PyExc_TypeError, &argparse[4]); + PyErr_SetString(PyExc_TypeError, &argparse[3]); Py_DECREF(restuple); return NULL; } - if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, + if (!PyArg_ParseTuple(restuple, argparse, &resunicode, &i_newpos)) { Py_DECREF(restuple); return NULL; -- cgit v1.2.1 From 5303b85718c671e17716d3ddfb4f1440ca4abef0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 23 Oct 2016 15:41:36 +0300 Subject: Issue #28439: Remove redundant checks in PyUnicode_EncodeLocale and PyUnicode_DecodeLocaleAndSize. Patch by Xiang Zhang. --- Objects/unicodeobject.c | 47 ++++++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 27 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6ab691ff25..cd8b33c594 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3400,11 +3400,9 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) { Py_ssize_t wlen, wlen2; wchar_t *wstr; - PyObject *bytes = NULL; char *errmsg; - PyObject *reason = NULL; - PyObject *exc; - size_t error_pos; + PyObject *bytes, *reason, *exc; + size_t error_pos, errlen; int surrogateescape; if (locale_error_handler(errors, &surrogateescape) < 0) @@ -3459,6 +3457,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) len2 = wcstombs(PyBytes_AS_STRING(bytes), wstr, len+1); if (len2 == (size_t)-1 || len2 > len) { + Py_DECREF(bytes); error_pos = (size_t)-1; goto encode_error; } @@ -3474,17 +3473,15 @@ encode_error: error_pos = wcstombs_errorpos(wstr); PyMem_Free(wstr); - Py_XDECREF(bytes); - - if (errmsg != NULL) { - size_t errlen; - wstr = Py_DecodeLocale(errmsg, &errlen); - if (wstr != NULL) { - reason = PyUnicode_FromWideChar(wstr, errlen); - PyMem_RawFree(wstr); - } else - errmsg = NULL; + + wstr = Py_DecodeLocale(errmsg, &errlen); + if (wstr != NULL) { + reason = PyUnicode_FromWideChar(wstr, errlen); + PyMem_RawFree(wstr); + } else { + errmsg = NULL; } + if (errmsg == NULL) reason = PyUnicode_FromString( "wcstombs() encountered an unencodable " @@ -3500,7 +3497,7 @@ encode_error: Py_DECREF(reason); if (exc != NULL) { PyCodec_StrictErrors(exc); - Py_XDECREF(exc); + Py_DECREF(exc); } return NULL; } @@ -3702,10 +3699,9 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, size_t wlen, wlen2; PyObject *unicode; int surrogateescape; - size_t error_pos; + size_t error_pos, errlen; char *errmsg; - PyObject *reason = NULL; /* initialize to prevent gcc warning */ - PyObject *exc; + PyObject *exc, *reason = NULL; /* initialize to prevent gcc warning */ if (locale_error_handler(errors, &surrogateescape) < 0) return NULL; @@ -3763,19 +3759,16 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, return unicode; decode_error: - reason = NULL; errmsg = strerror(errno); assert(errmsg != NULL); error_pos = mbstowcs_errorpos(str, len); - if (errmsg != NULL) { - size_t errlen; - wstr = Py_DecodeLocale(errmsg, &errlen); - if (wstr != NULL) { - reason = PyUnicode_FromWideChar(wstr, errlen); - PyMem_RawFree(wstr); - } + wstr = Py_DecodeLocale(errmsg, &errlen); + if (wstr != NULL) { + reason = PyUnicode_FromWideChar(wstr, errlen); + PyMem_RawFree(wstr); } + if (reason == NULL) reason = PyUnicode_FromString( "mbstowcs() encountered an invalid multibyte sequence"); @@ -3790,7 +3783,7 @@ decode_error: Py_DECREF(reason); if (exc != NULL) { PyCodec_StrictErrors(exc); - Py_XDECREF(exc); + Py_DECREF(exc); } return NULL; } -- cgit v1.2.1 From 770891917b1ffe9eb66e72c61355c3a95d4a6db5 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Mon, 31 Oct 2016 09:22:08 -0400 Subject: Issue 28128: Print out better error/warning messages for invalid string escapes. --- Objects/unicodeobject.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 134fa07600..6e63e009a9 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5877,9 +5877,10 @@ PyUnicode_AsUTF16String(PyObject *unicode) static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; PyObject * -PyUnicode_DecodeUnicodeEscape(const char *s, - Py_ssize_t size, - const char *errors) +_PyUnicode_DecodeUnicodeEscape(const char *s, + Py_ssize_t size, + const char *errors, + const char **first_invalid_escape) { const char *starts = s; _PyUnicodeWriter writer; @@ -5887,6 +5888,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s, PyObject *errorHandler = NULL; PyObject *exc = NULL; + // so we can remember if we've seen an invalid escape char or not + *first_invalid_escape = NULL; + if (size == 0) { _Py_RETURN_UNICODE_EMPTY(); } @@ -6061,9 +6065,10 @@ PyUnicode_DecodeUnicodeEscape(const char *s, goto error; default: - if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid escape sequence '\\%c'", c) < 0) - goto onError; + if (*first_invalid_escape == NULL) { + *first_invalid_escape = s-1; /* Back up one char, since we've + already incremented s. */ + } WRITE_ASCII_CHAR('\\'); WRITE_CHAR(c); continue; @@ -6098,6 +6103,27 @@ PyUnicode_DecodeUnicodeEscape(const char *s, return NULL; } +PyObject * +PyUnicode_DecodeUnicodeEscape(const char *s, + Py_ssize_t size, + const char *errors) +{ + const char *first_invalid_escape; + PyObject *result = _PyUnicode_DecodeUnicodeEscape(s, size, errors, + &first_invalid_escape); + if (result == NULL) + return NULL; + if (first_invalid_escape != NULL) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '\\%c'", + *first_invalid_escape) < 0) { + Py_DECREF(result); + return NULL; + } + } + return result; +} + /* Return a Unicode-Escape string version of the Unicode object. If quotes is true, the string is enclosed in u"" or u'' quotes as -- cgit v1.2.1 From ba86008aac6890c5c4a2c6d30ea8110f228d483e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 20 Nov 2016 12:16:46 +0200 Subject: Issue #19569: Compiler warnings are now emitted if use most of deprecated functions. --- Objects/unicodeobject.c | 94 +++++++++++++++++++++++-------------------------- 1 file changed, 45 insertions(+), 49 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 02f726d0ce..0e88fca43f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1987,13 +1987,33 @@ unicode_char(Py_UCS4 ch) PyObject * PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size) +{ + if (u == NULL) + return (PyObject*)_PyUnicode_New(size); + + if (size < 0) { + PyErr_BadInternalCall(); + return NULL; + } + + return PyUnicode_FromWideChar(u, size); +} + +PyObject * +PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size) { PyObject *unicode; Py_UCS4 maxchar = 0; Py_ssize_t num_surrogates; - if (u == NULL) - return (PyObject*)_PyUnicode_New(size); + if (u == NULL && size != 0) { + PyErr_BadInternalCall(); + return NULL; + } + + if (size == -1) { + size = wcslen(u); + } /* If the Unicode data is known at construction time, we can apply some optimizations which share commonly used objects. */ @@ -2478,27 +2498,6 @@ PyUnicode_AsUCS4Copy(PyObject *string) return as_ucs4(string, NULL, 0, 1); } -#ifdef HAVE_WCHAR_H - -PyObject * -PyUnicode_FromWideChar(const wchar_t *w, Py_ssize_t size) -{ - if (w == NULL) { - if (size == 0) - _Py_RETURN_UNICODE_EMPTY(); - PyErr_BadInternalCall(); - return NULL; - } - - if (size == -1) { - size = wcslen(w); - } - - return PyUnicode_FromUnicode(w, size); -} - -#endif /* HAVE_WCHAR_H */ - /* maximum number of characters required for output of %lld or %p. We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits, plus 1 for the sign. 53/22 is an upper bound for log10(256). */ @@ -3296,7 +3295,7 @@ PyUnicode_Encode(const Py_UNICODE *s, { PyObject *v, *unicode; - unicode = PyUnicode_FromUnicode(s, size); + unicode = PyUnicode_FromWideChar(s, size); if (unicode == NULL) return NULL; v = PyUnicode_AsEncodedString(unicode, encoding, errors); @@ -4129,7 +4128,11 @@ PyUnicode_GetSize(PyObject *unicode) PyErr_BadArgument(); goto onError; } - return PyUnicode_GET_SIZE(unicode); + if (_PyUnicode_WSTR(unicode) == NULL) { + if (PyUnicode_AsUnicode(unicode) == NULL) + goto onError; + } + return PyUnicode_WSTR_LENGTH(unicode); onError: return -1; @@ -4815,7 +4818,7 @@ PyUnicode_EncodeUTF7(const Py_UNICODE *s, const char *errors) { PyObject *result; - PyObject *tmp = PyUnicode_FromUnicode(s, size); + PyObject *tmp = PyUnicode_FromWideChar(s, size); if (tmp == NULL) return NULL; result = _PyUnicode_EncodeUTF7(tmp, base64SetO, @@ -5171,7 +5174,7 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s, { PyObject *v, *unicode; - unicode = PyUnicode_FromUnicode(s, size); + unicode = PyUnicode_FromWideChar(s, size); if (unicode == NULL) return NULL; v = _PyUnicode_AsUTF8String(unicode, errors); @@ -5496,7 +5499,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, int byteorder) { PyObject *result; - PyObject *tmp = PyUnicode_FromUnicode(s, size); + PyObject *tmp = PyUnicode_FromWideChar(s, size); if (tmp == NULL) return NULL; result = _PyUnicode_EncodeUTF32(tmp, errors, byteorder); @@ -5849,7 +5852,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, int byteorder) { PyObject *result; - PyObject *tmp = PyUnicode_FromUnicode(s, size); + PyObject *tmp = PyUnicode_FromWideChar(s, size); if (tmp == NULL) return NULL; result = _PyUnicode_EncodeUTF16(tmp, errors, byteorder); @@ -6246,7 +6249,7 @@ PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, Py_ssize_t size) { PyObject *result; - PyObject *tmp = PyUnicode_FromUnicode(s, size); + PyObject *tmp = PyUnicode_FromWideChar(s, size); if (tmp == NULL) { return NULL; } @@ -6463,7 +6466,7 @@ PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, Py_ssize_t size) { PyObject *result; - PyObject *tmp = PyUnicode_FromUnicode(s, size); + PyObject *tmp = PyUnicode_FromWideChar(s, size); if (tmp == NULL) return NULL; result = PyUnicode_AsRawUnicodeEscapeString(tmp); @@ -6874,7 +6877,7 @@ PyUnicode_EncodeLatin1(const Py_UNICODE *p, const char *errors) { PyObject *result; - PyObject *unicode = PyUnicode_FromUnicode(p, size); + PyObject *unicode = PyUnicode_FromWideChar(p, size); if (unicode == NULL) return NULL; result = unicode_encode_ucs1(unicode, errors, 256); @@ -7015,7 +7018,7 @@ PyUnicode_EncodeASCII(const Py_UNICODE *p, const char *errors) { PyObject *result; - PyObject *unicode = PyUnicode_FromUnicode(p, size); + PyObject *unicode = PyUnicode_FromWideChar(p, size); if (unicode == NULL) return NULL; result = unicode_encode_ucs1(unicode, errors, 128); @@ -7741,7 +7744,7 @@ PyUnicode_EncodeMBCS(const Py_UNICODE *p, const char *errors) { PyObject *unicode, *res; - unicode = PyUnicode_FromUnicode(p, size); + unicode = PyUnicode_FromWideChar(p, size); if (unicode == NULL) return NULL; res = encode_code_page(CP_ACP, unicode, errors); @@ -8589,7 +8592,7 @@ PyUnicode_EncodeCharmap(const Py_UNICODE *p, const char *errors) { PyObject *result; - PyObject *unicode = PyUnicode_FromUnicode(p, size); + PyObject *unicode = PyUnicode_FromWideChar(p, size); if (unicode == NULL) return NULL; result = _PyUnicode_EncodeCharmap(unicode, mapping, errors); @@ -9029,7 +9032,7 @@ PyUnicode_TranslateCharmap(const Py_UNICODE *p, const char *errors) { PyObject *result; - PyObject *unicode = PyUnicode_FromUnicode(p, size); + PyObject *unicode = PyUnicode_FromWideChar(p, size); if (!unicode) return NULL; result = _PyUnicode_TranslateCharmap(unicode, mapping, errors); @@ -9157,14 +9160,10 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, return -1; } - unicode = PyUnicode_FromUnicode(s, length); + unicode = PyUnicode_FromWideChar(s, length); if (unicode == NULL) return -1; - if (PyUnicode_READY(unicode) == -1) { - Py_DECREF(unicode); - return -1; - } kind = PyUnicode_KIND(unicode); data = PyUnicode_DATA(unicode); @@ -15332,7 +15331,7 @@ unicodeiter_reduce(unicodeiterobject *it) return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"), it->it_seq, it->it_index); } else { - PyObject *u = PyUnicode_FromUnicode(NULL, 0); + PyObject *u = (PyObject *)_PyUnicode_New(0); if (u == NULL) return NULL; return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u); @@ -15427,10 +15426,7 @@ unicode_iter(PyObject *seq) size_t Py_UNICODE_strlen(const Py_UNICODE *u) { - int res = 0; - while(*u++) - res++; - return res; + return wcslen(u); } Py_UNICODE* @@ -15455,8 +15451,8 @@ Py_UNICODE* Py_UNICODE_strcat(Py_UNICODE *s1, const Py_UNICODE *s2) { Py_UNICODE *u1 = s1; - u1 += Py_UNICODE_strlen(u1); - Py_UNICODE_strcpy(u1, s2); + u1 += wcslen(u1); + while ((*u1++ = *s2++)); return s1; } @@ -15505,7 +15501,7 @@ Py_UNICODE* Py_UNICODE_strrchr(const Py_UNICODE *s, Py_UNICODE c) { const Py_UNICODE *p; - p = s + Py_UNICODE_strlen(s); + p = s + wcslen(s); while (p != s) { p--; if (*p == c) -- cgit v1.2.1 From ac89262abd868aed4b06bb0ebf898e97fbab6293 Mon Sep 17 00:00:00 2001 From: Xiang Zhang Date: Wed, 23 Nov 2016 19:34:01 +0800 Subject: Issue #28774: Fix start/end pos in unicode_encode_ucs1(). Fix error position of the unicode error in ASCII and Latin1 encoders when a string returned by the error handler contains multiple non-encodable characters (non-ASCII for the ASCII codec, characters out of the U+0000-U+00FF range for Latin1). --- Objects/unicodeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e88a126eba..2bf48b756f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6798,7 +6798,7 @@ unicode_encode_ucs1(PyObject *unicode, goto onError; /* subtract preallocated bytes */ - writer.min_size -= 1; + writer.min_size -= newpos - collstart; if (PyBytes_Check(rep)) { /* Directly copy bytes result to output. */ @@ -6835,7 +6835,7 @@ unicode_encode_ucs1(PyObject *unicode, ch = PyUnicode_READ_CHAR(rep, i); if (ch >= limit) { raise_encode_exception(&exc, encoding, unicode, - pos, pos+1, reason); + collstart, collend, reason); goto onError; } *str = (char)ch; -- cgit v1.2.1 From 2e722071c4726e050d85fd842cd1120288bbbc56 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 23 Nov 2016 15:13:00 +0200 Subject: Issue #28774: Simplified encoding a str result of an error handler in ASCII and Latin1 encoders. --- Objects/unicodeobject.c | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2bf48b756f..1c2257e141 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6814,33 +6814,19 @@ unicode_encode_ucs1(PyObject *unicode, if (PyUnicode_READY(rep) < 0) goto onError; - if (PyUnicode_IS_ASCII(rep)) { - /* Fast path: all characters are smaller than limit */ - assert(limit >= 128); - assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND); - str = _PyBytesWriter_WriteBytes(&writer, str, - PyUnicode_DATA(rep), - PyUnicode_GET_LENGTH(rep)); - } - else { - Py_ssize_t repsize = PyUnicode_GET_LENGTH(rep); - - str = _PyBytesWriter_Prepare(&writer, str, repsize); - if (str == NULL) - goto onError; - - /* check if there is anything unencodable in the - replacement and copy it to the output */ - for (i = 0; repsize-->0; ++i, ++str) { - ch = PyUnicode_READ_CHAR(rep, i); - if (ch >= limit) { - raise_encode_exception(&exc, encoding, unicode, - collstart, collend, reason); - goto onError; - } - *str = (char)ch; - } + if (limit == 256 ? + PyUnicode_KIND(rep) != PyUnicode_1BYTE_KIND : + !PyUnicode_IS_ASCII(rep)) + { + /* Not all characters are smaller than limit */ + raise_encode_exception(&exc, encoding, unicode, + collstart, collend, reason); + goto onError; } + assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND); + str = _PyBytesWriter_WriteBytes(&writer, str, + PyUnicode_DATA(rep), + PyUnicode_GET_LENGTH(rep)); } pos = newpos; Py_CLEAR(rep); -- cgit v1.2.1 From 366f4ee4da4420842eac26b3fa00c2b01d4515a6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 1 Dec 2016 14:43:22 +0100 Subject: Replace PyObject_CallFunctionObjArgs() with fastcall * PyObject_CallFunctionObjArgs(func, NULL) => _PyObject_CallNoArg(func) * PyObject_CallFunctionObjArgs(func, arg, NULL) => _PyObject_CallArg1(func, arg) PyObject_CallFunctionObjArgs() allocates 40 bytes on the C stack and requires extra work to "parse" C arguments to build a C array of PyObject*. _PyObject_CallNoArg() and _PyObject_CallArg1() are simpler and don't allocate memory on the C stack. This change is part of the fastcall project. The change on listsort() is related to the issue #23507. --- Objects/unicodeobject.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1c2257e141..8f6f6c675f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4269,7 +4269,7 @@ unicode_decode_call_errorhandler_wchar( if (*exceptionObject == NULL) goto onError; - restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL); + restuple = _PyObject_CallArg1(*errorHandler, *exceptionObject); if (restuple == NULL) goto onError; if (!PyTuple_Check(restuple)) { @@ -4368,7 +4368,7 @@ unicode_decode_call_errorhandler_writer( if (*exceptionObject == NULL) goto onError; - restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL); + restuple = _PyObject_CallArg1(*errorHandler, *exceptionObject); if (restuple == NULL) goto onError; if (!PyTuple_Check(restuple)) { @@ -6649,8 +6649,7 @@ unicode_encode_call_errorhandler(const char *errors, if (*exceptionObject == NULL) return NULL; - restuple = PyObject_CallFunctionObjArgs( - *errorHandler, *exceptionObject, NULL); + restuple = _PyObject_CallArg1(*errorHandler, *exceptionObject); if (restuple == NULL) return NULL; if (!PyTuple_Check(restuple)) { @@ -8644,8 +8643,7 @@ unicode_translate_call_errorhandler(const char *errors, if (*exceptionObject == NULL) return NULL; - restuple = PyObject_CallFunctionObjArgs( - *errorHandler, *exceptionObject, NULL); + restuple = _PyObject_CallArg1(*errorHandler, *exceptionObject); if (restuple == NULL) return NULL; if (!PyTuple_Check(restuple)) { -- cgit v1.2.1 From 39ae5bedd90f9caf9a78efa82f4d11e838933b3a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 4 Dec 2016 22:59:09 +0100 Subject: Backed out changeset b9c9691c72c5 Issue #28858: The change b9c9691c72c5 introduced a regression. It seems like _PyObject_CallArg1() uses more stack memory than PyObject_CallFunctionObjArgs(). --- Objects/unicodeobject.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8f6f6c675f..1c2257e141 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4269,7 +4269,7 @@ unicode_decode_call_errorhandler_wchar( if (*exceptionObject == NULL) goto onError; - restuple = _PyObject_CallArg1(*errorHandler, *exceptionObject); + restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL); if (restuple == NULL) goto onError; if (!PyTuple_Check(restuple)) { @@ -4368,7 +4368,7 @@ unicode_decode_call_errorhandler_writer( if (*exceptionObject == NULL) goto onError; - restuple = _PyObject_CallArg1(*errorHandler, *exceptionObject); + restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL); if (restuple == NULL) goto onError; if (!PyTuple_Check(restuple)) { @@ -6649,7 +6649,8 @@ unicode_encode_call_errorhandler(const char *errors, if (*exceptionObject == NULL) return NULL; - restuple = _PyObject_CallArg1(*errorHandler, *exceptionObject); + restuple = PyObject_CallFunctionObjArgs( + *errorHandler, *exceptionObject, NULL); if (restuple == NULL) return NULL; if (!PyTuple_Check(restuple)) { @@ -8643,7 +8644,8 @@ unicode_translate_call_errorhandler(const char *errors, if (*exceptionObject == NULL) return NULL; - restuple = _PyObject_CallArg1(*errorHandler, *exceptionObject); + restuple = PyObject_CallFunctionObjArgs( + *errorHandler, *exceptionObject, NULL); if (restuple == NULL) return NULL; if (!PyTuple_Check(restuple)) { -- cgit v1.2.1 From bed50e1fd3e82021bd51be24920880518268372e Mon Sep 17 00:00:00 2001 From: Xiang Zhang Date: Tue, 20 Dec 2016 22:52:33 +0800 Subject: Issue #28822: Adjust indices handling of PyUnicode_FindChar(). --- Objects/unicodeobject.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3fdce82ae6..bbda4d884c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9461,16 +9461,12 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch, int direction) { int kind; - Py_ssize_t result; + Py_ssize_t len, result; if (PyUnicode_READY(str) == -1) return -2; - if (start < 0 || end < 0) { - PyErr_SetString(PyExc_IndexError, "string index out of range"); - return -2; - } - if (end > PyUnicode_GET_LENGTH(str)) - end = PyUnicode_GET_LENGTH(str); - if (start >= end) + len = PyUnicode_GET_LENGTH(str); + ADJUST_INDICES(start, end, len); + if (end - start < 1) return -1; kind = PyUnicode_KIND(str); result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start, -- cgit v1.2.1 From 70249059aaa99129a75a69f75dddfb5731a71585 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 16 Jan 2017 20:41:20 +0900 Subject: Issue #20180: convert unicode methods to AC. --- Objects/unicodeobject.c | 919 +++++++++++++++++++++++++++--------------------- 1 file changed, 511 insertions(+), 408 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e322c0cf25..acf59e4d13 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -51,7 +51,21 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. /*[clinic input] class str "PyUnicodeObject *" "&PyUnicode_Type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=604e916854800fa8]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4884c934de622cf6]*/ + +/*[python input] +class Py_UCS4_converter(CConverter): + type = 'Py_UCS4' + converter = 'convert_uc' + + def converter_init(self): + if self.default is not unspecified: + self.c_default = ascii(self.default) + if len(self.c_default) > 4 or self.c_default[0] != "'": + self.c_default = hex(ord(self.default)) + +[python start generated code]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=88f5dd06cd8e7a61]*/ /* --- Globals ------------------------------------------------------------ @@ -299,6 +313,8 @@ static const unsigned char ascii_linebreak[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; +static int convert_uc(PyObject *obj, void *addr); + #include "clinic/unicodeobject.c.h" typedef enum { @@ -10710,28 +10726,36 @@ replace(PyObject *self, PyObject *str1, /* --- Unicode Object Methods --------------------------------------------- */ -PyDoc_STRVAR(title__doc__, - "S.title() -> str\n\ -\n\ -Return a titlecased version of S, i.e. words start with title case\n\ -characters, all remaining cased characters have lower case."); +/*[clinic input] +str.title as unicode_title -static PyObject* -unicode_title(PyObject *self) +Return a version of the string where each word is titlecased. + +More specifically, words start with uppercased characters and all remaining +cased characters have lower case. +[clinic start generated code]*/ + +static PyObject * +unicode_title_impl(PyObject *self) +/*[clinic end generated code: output=c75ae03809574902 input=4eb12c1bb8642cb9]*/ { if (PyUnicode_READY(self) == -1) return NULL; return case_operation(self, do_title); } -PyDoc_STRVAR(capitalize__doc__, - "S.capitalize() -> str\n\ -\n\ -Return a capitalized version of S, i.e. make the first character\n\ -have upper case and the rest lower case."); +/*[clinic input] +str.capitalize as unicode_capitalize -static PyObject* -unicode_capitalize(PyObject *self) +Return a capitalized version of the string. + +More specifically, make the first character have upper case and the rest lower +case. +[clinic start generated code]*/ + +static PyObject * +unicode_capitalize_impl(PyObject *self) +/*[clinic end generated code: output=e49a4c333cdb7667 input=f4cbf1016938da6d]*/ { if (PyUnicode_READY(self) == -1) return NULL; @@ -10740,13 +10764,15 @@ unicode_capitalize(PyObject *self) return case_operation(self, do_capitalize); } -PyDoc_STRVAR(casefold__doc__, - "S.casefold() -> str\n\ -\n\ -Return a version of S suitable for caseless comparisons."); +/*[clinic input] +str.casefold as unicode_casefold + +Return a version of the string suitable for caseless comparisons. +[clinic start generated code]*/ static PyObject * -unicode_casefold(PyObject *self) +unicode_casefold_impl(PyObject *self) +/*[clinic end generated code: output=0120daf657ca40af input=a96f2b0d3daabd94]*/ { if (PyUnicode_READY(self) == -1) return NULL; @@ -10780,21 +10806,23 @@ convert_uc(PyObject *obj, void *addr) return 1; } -PyDoc_STRVAR(center__doc__, - "S.center(width[, fillchar]) -> str\n\ -\n\ -Return S centered in a string of length width. Padding is\n\ -done using the specified fill character (default is a space)"); +/*[clinic input] +str.center as unicode_center + + width: Py_ssize_t + fillchar: Py_UCS4 = ' ' + / + +Return a centered string of length width. + +Padding is done using the specified fill character (default is a space). +[clinic start generated code]*/ static PyObject * -unicode_center(PyObject *self, PyObject *args) +unicode_center_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar) +/*[clinic end generated code: output=420c8859effc7c0c input=b42b247eb26e6519]*/ { Py_ssize_t marg, left; - Py_ssize_t width; - Py_UCS4 fillchar = ' '; - - if (!PyArg_ParseTuple(args, "n|O&:center", &width, convert_uc, &fillchar)) - return NULL; if (PyUnicode_READY(self) == -1) return NULL; @@ -11465,51 +11493,49 @@ unicode_count(PyObject *self, PyObject *args) return result; } -PyDoc_STRVAR(encode__doc__, - "S.encode(encoding='utf-8', errors='strict') -> bytes\n\ -\n\ -Encode S using the codec registered for encoding. Default encoding\n\ -is 'utf-8'. errors may be given to set a different error\n\ -handling scheme. Default is 'strict' meaning that encoding errors raise\n\ -a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\ -'xmlcharrefreplace' as well as any other name registered with\n\ -codecs.register_error that can handle UnicodeEncodeErrors."); +/*[clinic input] +str.encode as unicode_encode + + encoding: str(c_default="NULL") = 'utf-8' + The encoding in which to encode the string. + errors: str(c_default="NULL") = 'strict' + The error handling scheme to use for encoding errors. + The default is 'strict' meaning that encoding errors raise a + UnicodeEncodeError. Other possible values are 'ignore', 'replace' and + 'xmlcharrefreplace' as well as any other name registered with + codecs.register_error that can handle UnicodeEncodeErrors. + +Encode the string using the codec registered for encoding. +[clinic start generated code]*/ static PyObject * -unicode_encode(PyObject *self, PyObject *args, PyObject *kwargs) +unicode_encode_impl(PyObject *self, const char *encoding, const char *errors) +/*[clinic end generated code: output=bf78b6e2a9470e3c input=12fcb2e5798e96dc]*/ { - static char *kwlist[] = {"encoding", "errors", 0}; - char *encoding = NULL; - char *errors = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", - kwlist, &encoding, &errors)) - return NULL; return PyUnicode_AsEncodedString(self, encoding, errors); } -PyDoc_STRVAR(expandtabs__doc__, - "S.expandtabs(tabsize=8) -> str\n\ -\n\ -Return a copy of S where all tab characters are expanded using spaces.\n\ -If tabsize is not given, a tab size of 8 characters is assumed."); +/*[clinic input] +str.expandtabs as unicode_expandtabs -static PyObject* -unicode_expandtabs(PyObject *self, PyObject *args, PyObject *kwds) + tabsize: int = 8 + +Return a copy where all tab characters are expanded using spaces. + +If tabsize is not given, a tab size of 8 characters is assumed. +[clinic start generated code]*/ + +static PyObject * +unicode_expandtabs_impl(PyObject *self, int tabsize) +/*[clinic end generated code: output=3457c5dcee26928f input=8a01914034af4c85]*/ { Py_ssize_t i, j, line_pos, src_len, incr; Py_UCS4 ch; PyObject *u; void *src_data, *dest_data; - static char *kwlist[] = {"tabsize", 0}; - int tabsize = 8; int kind; int found; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs", - kwlist, &tabsize)) - return NULL; - if (PyUnicode_READY(self) == -1) return NULL; @@ -11693,14 +11719,18 @@ unicode_index(PyObject *self, PyObject *args) return PyLong_FromSsize_t(result); } -PyDoc_STRVAR(islower__doc__, - "S.islower() -> bool\n\ -\n\ -Return True if all cased characters in S are lowercase and there is\n\ -at least one cased character in S, False otherwise."); +/*[clinic input] +str.islower as unicode_islower -static PyObject* -unicode_islower(PyObject *self) +Return True if the string is a lowercase string, False otherwise. + +A string is lowercase if all cased characters in the string are lowercase and +there is at least one cased character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_islower_impl(PyObject *self) +/*[clinic end generated code: output=dbd41995bd005b81 input=46eeb20d935af050]*/ { Py_ssize_t i, length; int kind; @@ -11734,14 +11764,18 @@ unicode_islower(PyObject *self) return PyBool_FromLong(cased); } -PyDoc_STRVAR(isupper__doc__, - "S.isupper() -> bool\n\ -\n\ -Return True if all cased characters in S are uppercase and there is\n\ -at least one cased character in S, False otherwise."); +/*[clinic input] +str.isupper as unicode_isupper -static PyObject* -unicode_isupper(PyObject *self) +Return True if the string is an uppercase string, False otherwise. + +A string is uppercase if all cased characters in the string are uppercase and +there is at least one cased character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isupper_impl(PyObject *self) +/*[clinic end generated code: output=049209c8e7f15f59 input=dd0a595fc871eee0]*/ { Py_ssize_t i, length; int kind; @@ -11775,16 +11809,18 @@ unicode_isupper(PyObject *self) return PyBool_FromLong(cased); } -PyDoc_STRVAR(istitle__doc__, - "S.istitle() -> bool\n\ -\n\ -Return True if S is a titlecased string and there is at least one\n\ -character in S, i.e. upper- and titlecase characters may only\n\ -follow uncased characters and lowercase characters only cased ones.\n\ -Return False otherwise."); +/*[clinic input] +str.istitle as unicode_istitle -static PyObject* -unicode_istitle(PyObject *self) +Return True if the string is a title-cased string, False otherwise. + +In a title-cased string, upper- and title-case characters may only +follow uncased characters and lowercase characters only cased ones. +[clinic start generated code]*/ + +static PyObject * +unicode_istitle_impl(PyObject *self) +/*[clinic end generated code: output=e9bf6eb91f5d3f0e input=2c56883d113d644d]*/ { Py_ssize_t i, length; int kind; @@ -11831,14 +11867,18 @@ unicode_istitle(PyObject *self) return PyBool_FromLong(cased); } -PyDoc_STRVAR(isspace__doc__, - "S.isspace() -> bool\n\ -\n\ -Return True if all characters in S are whitespace\n\ -and there is at least one character in S, False otherwise."); +/*[clinic input] +str.isspace as unicode_isspace -static PyObject* -unicode_isspace(PyObject *self) +Return True if the string is a whitespace string, False otherwise. + +A string is whitespace if all characters in the string are whitespace and there +is at least one character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isspace_impl(PyObject *self) +/*[clinic end generated code: output=163a63bfa08ac2b9 input=b9506a23e312d203]*/ { Py_ssize_t i, length; int kind; @@ -11867,14 +11907,18 @@ unicode_isspace(PyObject *self) return PyBool_FromLong(1); } -PyDoc_STRVAR(isalpha__doc__, - "S.isalpha() -> bool\n\ -\n\ -Return True if all characters in S are alphabetic\n\ -and there is at least one character in S, False otherwise."); +/*[clinic input] +str.isalpha as unicode_isalpha -static PyObject* -unicode_isalpha(PyObject *self) +Return True if the string is an alphabetic string, False otherwise. + +A string is alphabetic if all characters in the string are alphabetic and there +is at least one character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isalpha_impl(PyObject *self) +/*[clinic end generated code: output=cc81b9ac3883ec4f input=17e3788814472079]*/ { Py_ssize_t i, length; int kind; @@ -11902,14 +11946,18 @@ unicode_isalpha(PyObject *self) return PyBool_FromLong(1); } -PyDoc_STRVAR(isalnum__doc__, - "S.isalnum() -> bool\n\ -\n\ -Return True if all characters in S are alphanumeric\n\ -and there is at least one character in S, False otherwise."); +/*[clinic input] +str.isalnum as unicode_isalnum -static PyObject* -unicode_isalnum(PyObject *self) +Return True if the string is an alpha-numeric string, False otherwise. + +A string is alpha-numeric if all characters in the string are alpha-numeric and +there is at least one character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isalnum_impl(PyObject *self) +/*[clinic end generated code: output=a5a23490ffc3660c input=d350c4f7c59b4758]*/ { int kind; void *data; @@ -11940,14 +11988,18 @@ unicode_isalnum(PyObject *self) return PyBool_FromLong(1); } -PyDoc_STRVAR(isdecimal__doc__, - "S.isdecimal() -> bool\n\ -\n\ -Return True if there are only decimal characters in S,\n\ -False otherwise."); +/*[clinic input] +str.isdecimal as unicode_isdecimal -static PyObject* -unicode_isdecimal(PyObject *self) +Return True if the string is a decimal string, False otherwise. + +A string is a decimal string if all characters in the string are decimal and +there is at least one character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isdecimal_impl(PyObject *self) +/*[clinic end generated code: output=fb2dcdb62d3fc548 input=40536fb80e5f1dc1]*/ { Py_ssize_t i, length; int kind; @@ -11975,14 +12027,18 @@ unicode_isdecimal(PyObject *self) return PyBool_FromLong(1); } -PyDoc_STRVAR(isdigit__doc__, - "S.isdigit() -> bool\n\ -\n\ -Return True if all characters in S are digits\n\ -and there is at least one character in S, False otherwise."); +/*[clinic input] +str.isdigit as unicode_isdigit -static PyObject* -unicode_isdigit(PyObject *self) +Return True if the string is a digit string, False otherwise. + +A string is a digit string if all characters in the string are digits and there +is at least one character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isdigit_impl(PyObject *self) +/*[clinic end generated code: output=10a6985311da6858 input=c6a222be1aaec2af]*/ { Py_ssize_t i, length; int kind; @@ -12011,14 +12067,18 @@ unicode_isdigit(PyObject *self) return PyBool_FromLong(1); } -PyDoc_STRVAR(isnumeric__doc__, - "S.isnumeric() -> bool\n\ -\n\ -Return True if there are only numeric characters in S,\n\ -False otherwise."); +/*[clinic input] +str.isnumeric as unicode_isnumeric -static PyObject* -unicode_isnumeric(PyObject *self) +Return True if the string is a numeric string, False otherwise. + +A string is numeric if all characters in the string are numeric and there is at +least one character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isnumeric_impl(PyObject *self) +/*[clinic end generated code: output=9172a32d9013051a input=e3b37b2cc8854f35]*/ { Py_ssize_t i, length; int kind; @@ -12083,29 +12143,34 @@ PyUnicode_IsIdentifier(PyObject *self) return 1; } -PyDoc_STRVAR(isidentifier__doc__, - "S.isidentifier() -> bool\n\ -\n\ -Return True if S is a valid identifier according\n\ -to the language definition.\n\ -\n\ -Use keyword.iskeyword() to test for reserved identifiers\n\ -such as \"def\" and \"class\".\n"); +/*[clinic input] +str.isidentifier as unicode_isidentifier -static PyObject* -unicode_isidentifier(PyObject *self) +Return True if the string is a valid Python identifier, False otherwise. + +Use keyword.iskeyword() to test for reserved identifiers such as "def" and +"class". +[clinic start generated code]*/ + +static PyObject * +unicode_isidentifier_impl(PyObject *self) +/*[clinic end generated code: output=fe585a9666572905 input=95eebe40d6d91234]*/ { return PyBool_FromLong(PyUnicode_IsIdentifier(self)); } -PyDoc_STRVAR(isprintable__doc__, - "S.isprintable() -> bool\n\ -\n\ -Return True if all characters in S are considered\n\ -printable in repr() or S is empty, False otherwise."); +/*[clinic input] +str.isprintable as unicode_isprintable -static PyObject* -unicode_isprintable(PyObject *self) +Return True if the string is printable, False otherwise. + +A string is printable if all of its characters are considered printable in +repr() or if it is empty. +[clinic start generated code]*/ + +static PyObject * +unicode_isprintable_impl(PyObject *self) +/*[clinic end generated code: output=3ab9626cd32dd1a0 input=20c53134171af84e]*/ { Py_ssize_t i, length; int kind; @@ -12130,16 +12195,25 @@ unicode_isprintable(PyObject *self) Py_RETURN_TRUE; } -PyDoc_STRVAR(join__doc__, - "S.join(iterable) -> str\n\ -\n\ -Return a string which is the concatenation of the strings in the\n\ -iterable. The separator between elements is S."); +/*[clinic input] +str.join as unicode_join -static PyObject* -unicode_join(PyObject *self, PyObject *data) + iterable: object + / + +Concatenate any number of strings. + +The string whose method is called is inserted in between each given strings. +The result is returned as a new string. + +Example: '.'.join(['ab', 'pq', 'rs']) -> 'ab.pq.rs' +[clinic start generated code]*/ + +static PyObject * +unicode_join(PyObject *self, PyObject *iterable) +/*[clinic end generated code: output=6857e7cecfe7bf98 input=465f62626109db6b]*/ { - return PyUnicode_Join(self, data); + return PyUnicode_Join(self, iterable); } static Py_ssize_t @@ -12150,21 +12224,22 @@ unicode_length(PyObject *self) return PyUnicode_GET_LENGTH(self); } -PyDoc_STRVAR(ljust__doc__, - "S.ljust(width[, fillchar]) -> str\n\ -\n\ -Return S left-justified in a Unicode string of length width. Padding is\n\ -done using the specified fill character (default is a space)."); +/*[clinic input] +str.ljust as unicode_ljust -static PyObject * -unicode_ljust(PyObject *self, PyObject *args) -{ - Py_ssize_t width; - Py_UCS4 fillchar = ' '; + width: Py_ssize_t + fillchar: Py_UCS4 = ' ' + / - if (!PyArg_ParseTuple(args, "n|O&:ljust", &width, convert_uc, &fillchar)) - return NULL; +Return a left-justified string of length width. +Padding is done using the specified fill character (default is a space). +[clinic start generated code]*/ + +static PyObject * +unicode_ljust_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar) +/*[clinic end generated code: output=1cce0e0e0a0b84b3 input=3ab599e335e60a32]*/ +{ if (PyUnicode_READY(self) == -1) return NULL; @@ -12174,13 +12249,15 @@ unicode_ljust(PyObject *self, PyObject *args) return pad(self, 0, width - PyUnicode_GET_LENGTH(self), fillchar); } -PyDoc_STRVAR(lower__doc__, - "S.lower() -> str\n\ -\n\ -Return a copy of the string S converted to lowercase."); +/*[clinic input] +str.lower as unicode_lower -static PyObject* -unicode_lower(PyObject *self) +Return a copy of the string converted to lowercase. +[clinic start generated code]*/ + +static PyObject * +unicode_lower_impl(PyObject *self) +/*[clinic end generated code: output=84ef9ed42efad663 input=60a2984b8beff23a]*/ { if (PyUnicode_READY(self) == -1) return NULL; @@ -12194,9 +12271,9 @@ unicode_lower(PyObject *self) #define BOTHSTRIP 2 /* Arrays indexed by above */ -static const char * const stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; +static const char *stripfuncnames[] = {"lstrip", "rstrip", "strip"}; -#define STRIPNAME(i) (stripformat[i]+3) +#define STRIPNAME(i) (stripfuncnames[i]) /* externally visible for str.strip(unicode) */ PyObject * @@ -12353,13 +12430,8 @@ do_strip(PyObject *self, int striptype) static PyObject * -do_argstrip(PyObject *self, int striptype, PyObject *args) +do_argstrip(PyObject *self, int striptype, PyObject *sep) { - PyObject *sep = NULL; - - if (!PyArg_ParseTuple(args, stripformat[striptype], &sep)) - return NULL; - if (sep != NULL && sep != Py_None) { if (PyUnicode_Check(sep)) return _PyUnicode_XStrip(self, striptype, sep); @@ -12375,52 +12447,60 @@ do_argstrip(PyObject *self, int striptype, PyObject *args) } -PyDoc_STRVAR(strip__doc__, - "S.strip([chars]) -> str\n\ -\n\ -Return a copy of the string S with leading and trailing\n\ -whitespace removed.\n\ -If chars is given and not None, remove characters in chars instead."); +/*[clinic input] +str.strip as unicode_strip + + chars: object = None + / + +Return a copy of the string with leading and trailing whitespace removed. + +If chars is given and not None, remove characters in chars instead. +[clinic start generated code]*/ static PyObject * -unicode_strip(PyObject *self, PyObject *args) +unicode_strip_impl(PyObject *self, PyObject *chars) +/*[clinic end generated code: output=ca19018454345d57 input=385289c6f423b954]*/ { - if (PyTuple_GET_SIZE(args) == 0) - return do_strip(self, BOTHSTRIP); /* Common case */ - else - return do_argstrip(self, BOTHSTRIP, args); + return do_argstrip(self, BOTHSTRIP, chars); } -PyDoc_STRVAR(lstrip__doc__, - "S.lstrip([chars]) -> str\n\ -\n\ -Return a copy of the string S with leading whitespace removed.\n\ -If chars is given and not None, remove characters in chars instead."); +/*[clinic input] +str.lstrip as unicode_lstrip + + chars: object = NULL + / + +Return a copy of the string with leading whitespace removed. + +If chars is given and not None, remove characters in chars instead. +[clinic start generated code]*/ static PyObject * -unicode_lstrip(PyObject *self, PyObject *args) +unicode_lstrip_impl(PyObject *self, PyObject *chars) +/*[clinic end generated code: output=3b43683251f79ca7 input=9e56f3c45f5ff4c3]*/ { - if (PyTuple_GET_SIZE(args) == 0) - return do_strip(self, LEFTSTRIP); /* Common case */ - else - return do_argstrip(self, LEFTSTRIP, args); + return do_argstrip(self, LEFTSTRIP, chars); } -PyDoc_STRVAR(rstrip__doc__, - "S.rstrip([chars]) -> str\n\ -\n\ -Return a copy of the string S with trailing whitespace removed.\n\ -If chars is given and not None, remove characters in chars instead."); +/*[clinic input] +str.rstrip as unicode_rstrip + + chars: object = NULL + / + +Return a copy of the string with trailing whitespace removed. + +If chars is given and not None, remove characters in chars instead. +[clinic start generated code]*/ static PyObject * -unicode_rstrip(PyObject *self, PyObject *args) +unicode_rstrip_impl(PyObject *self, PyObject *chars) +/*[clinic end generated code: output=4a59230017cc3b7a input=ac89d0219cb411ee]*/ { - if (PyTuple_GET_SIZE(args) == 0) - return do_strip(self, RIGHTSTRIP); /* Common case */ - else - return do_argstrip(self, RIGHTSTRIP, args); + return do_argstrip(self, RIGHTSTRIP, chars); } @@ -12500,25 +12580,30 @@ PyUnicode_Replace(PyObject *str, return replace(str, substr, replstr, maxcount); } -PyDoc_STRVAR(replace__doc__, - "S.replace(old, new[, count]) -> str\n\ -\n\ -Return a copy of S with all occurrences of substring\n\ -old replaced by new. If the optional argument count is\n\ -given, only the first count occurrences are replaced."); +/*[clinic input] +str.replace as unicode_replace -static PyObject* -unicode_replace(PyObject *self, PyObject *args) -{ - PyObject *str1; - PyObject *str2; - Py_ssize_t maxcount = -1; + old: unicode + new: unicode + count: Py_ssize_t = -1 + Maximum number of occurrences to replace. + -1 (the default value) means replace all occurrences. + / - if (!PyArg_ParseTuple(args, "UU|n:replace", &str1, &str2, &maxcount)) - return NULL; +Return a copy with all occurrences of substring old replaced by new. + +If the optional argument count is given, only the first count occurrences are +replaced. +[clinic start generated code]*/ + +static PyObject * +unicode_replace_impl(PyObject *self, PyObject *old, PyObject *new, + Py_ssize_t count) +/*[clinic end generated code: output=b63f1a8b5eebf448 input=147d12206276ebeb]*/ +{ if (PyUnicode_READY(self) == -1) return NULL; - return replace(self, str1, str2, maxcount); + return replace(self, old, new, count); } static PyObject * @@ -12750,21 +12835,22 @@ unicode_rindex(PyObject *self, PyObject *args) return PyLong_FromSsize_t(result); } -PyDoc_STRVAR(rjust__doc__, - "S.rjust(width[, fillchar]) -> str\n\ -\n\ -Return S right-justified in a string of length width. Padding is\n\ -done using the specified fill character (default is a space)."); +/*[clinic input] +str.rjust as unicode_rjust -static PyObject * -unicode_rjust(PyObject *self, PyObject *args) -{ - Py_ssize_t width; - Py_UCS4 fillchar = ' '; + width: Py_ssize_t + fillchar: Py_UCS4 = ' ' + / - if (!PyArg_ParseTuple(args, "n|O&:rjust", &width, convert_uc, &fillchar)) - return NULL; +Return a right-justified string of length width. + +Padding is done using the specified fill character (default is a space). +[clinic start generated code]*/ +static PyObject * +unicode_rjust_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar) +/*[clinic end generated code: output=804a1a57fbe8d5cf input=d05f550b5beb1f72]*/ +{ if (PyUnicode_READY(self) == -1) return NULL; @@ -12783,35 +12869,32 @@ PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) return split(s, sep, maxsplit); } -PyDoc_STRVAR(split__doc__, - "S.split(sep=None, maxsplit=-1) -> list of strings\n\ -\n\ -Return a list of the words in S, using sep as the\n\ -delimiter string. If maxsplit is given, at most maxsplit\n\ -splits are done. If sep is not specified or is None, any\n\ -whitespace string is a separator and empty strings are\n\ -removed from the result."); - -static PyObject* -unicode_split(PyObject *self, PyObject *args, PyObject *kwds) -{ - static char *kwlist[] = {"sep", "maxsplit", 0}; - PyObject *substring = Py_None; - Py_ssize_t maxcount = -1; +/*[clinic input] +str.split as unicode_split - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split", - kwlist, &substring, &maxcount)) - return NULL; + sep: object = None + The delimiter according which to split the string. + None (the default value) means split according to any whitespace, + and discard empty strings from the result. + maxsplit: Py_ssize_t = -1 + Maximum number of splits to do. + -1 (the default value) means no limit. - if (substring == Py_None) - return split(self, NULL, maxcount); +Return a list of the words in the string, using sep as the delimiter string. +[clinic start generated code]*/ - if (PyUnicode_Check(substring)) - return split(self, substring, maxcount); +static PyObject * +unicode_split_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit) +/*[clinic end generated code: output=3a65b1db356948dc input=606e750488a82359]*/ +{ + if (sep == Py_None) + return split(self, NULL, maxsplit); + if (PyUnicode_Check(sep)) + return split(self, sep, maxsplit); PyErr_Format(PyExc_TypeError, "must be str or None, not %.100s", - Py_TYPE(substring)->tp_name); + Py_TYPE(sep)->tp_name); return NULL; } @@ -12930,30 +13013,47 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj) return out; } -PyDoc_STRVAR(partition__doc__, - "S.partition(sep) -> (head, sep, tail)\n\ -\n\ -Search for the separator sep in S, and return the part before it,\n\ -the separator itself, and the part after it. If the separator is not\n\ -found, return S and two empty strings."); +/*[clinic input] +str.partition as unicode_partition -static PyObject* -unicode_partition(PyObject *self, PyObject *separator) + sep: object + / + +Partition the string into three parts using the given separator. + +This will search for the separator in the string. If the separator is found, +returns a 3-tuple containing the part before the separator, the separator +itself, and the part after it. + +If the separator is not found, returns a 3-tuple containing the original string +and two empty strings. +[clinic start generated code]*/ + +static PyObject * +unicode_partition(PyObject *self, PyObject *sep) +/*[clinic end generated code: output=e4ced7bd253ca3c4 input=f29b8d06c63e50be]*/ { - return PyUnicode_Partition(self, separator); + return PyUnicode_Partition(self, sep); } -PyDoc_STRVAR(rpartition__doc__, - "S.rpartition(sep) -> (head, sep, tail)\n\ -\n\ -Search for the separator sep in S, starting at the end of S, and return\n\ -the part before it, the separator itself, and the part after it. If the\n\ -separator is not found, return two empty strings and S."); +/*[clinic input] +str.rpartition as unicode_rpartition = str.partition -static PyObject* -unicode_rpartition(PyObject *self, PyObject *separator) +Partition the string into three parts using the given separator. + +This will search for the separator in the string, starting and the end. If +the separator is found, returns a 3-tuple containing the part before the +separator, the separator itself, and the part after it. + +If the separator is not found, returns a 3-tuple containing two empty strings +and the original string. +[clinic start generated code]*/ + +static PyObject * +unicode_rpartition(PyObject *self, PyObject *sep) +/*[clinic end generated code: output=1aa13cf1156572aa input=e77c7acb69bdfca6]*/ { - return PyUnicode_RPartition(self, separator); + return PyUnicode_RPartition(self, sep); } PyObject * @@ -12965,55 +13065,44 @@ PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) return rsplit(s, sep, maxsplit); } -PyDoc_STRVAR(rsplit__doc__, - "S.rsplit(sep=None, maxsplit=-1) -> list of strings\n\ -\n\ -Return a list of the words in S, using sep as the\n\ -delimiter string, starting at the end of the string and\n\ -working to the front. If maxsplit is given, at most maxsplit\n\ -splits are done. If sep is not specified, any whitespace string\n\ -is a separator."); - -static PyObject* -unicode_rsplit(PyObject *self, PyObject *args, PyObject *kwds) -{ - static char *kwlist[] = {"sep", "maxsplit", 0}; - PyObject *substring = Py_None; - Py_ssize_t maxcount = -1; +/*[clinic input] +str.rsplit as unicode_rsplit = str.split - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit", - kwlist, &substring, &maxcount)) - return NULL; +Return a list of the words in the string, using sep as the delimiter string. - if (substring == Py_None) - return rsplit(self, NULL, maxcount); +Splits are done starting at the end of the string and working to the front. +[clinic start generated code]*/ - if (PyUnicode_Check(substring)) - return rsplit(self, substring, maxcount); +static PyObject * +unicode_rsplit_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit) +/*[clinic end generated code: output=c2b815c63bcabffc input=12ad4bf57dd35f15]*/ +{ + if (sep == Py_None) + return rsplit(self, NULL, maxsplit); + if (PyUnicode_Check(sep)) + return rsplit(self, sep, maxsplit); PyErr_Format(PyExc_TypeError, "must be str or None, not %.100s", - Py_TYPE(substring)->tp_name); + Py_TYPE(sep)->tp_name); return NULL; } -PyDoc_STRVAR(splitlines__doc__, - "S.splitlines([keepends]) -> list of strings\n\ -\n\ -Return a list of the lines in S, breaking at line boundaries.\n\ -Line breaks are not included in the resulting list unless keepends\n\ -is given and true."); +/*[clinic input] +str.splitlines as unicode_splitlines -static PyObject* -unicode_splitlines(PyObject *self, PyObject *args, PyObject *kwds) -{ - static char *kwlist[] = {"keepends", 0}; - int keepends = 0; + keepends: int(c_default="0") = False - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines", - kwlist, &keepends)) - return NULL; +Return a list of the lines in the string, breaking at line boundaries. + +Line breaks are not included in the resulting list unless keepends is given and +true. +[clinic start generated code]*/ +static PyObject * +unicode_splitlines_impl(PyObject *self, int keepends) +/*[clinic end generated code: output=f664dcdad153ec40 input=d6ff99fe43465b0f]*/ +{ return PyUnicode_Splitlines(self, keepends); } @@ -13023,14 +13112,15 @@ PyObject *unicode_str(PyObject *self) return unicode_result_unchanged(self); } -PyDoc_STRVAR(swapcase__doc__, - "S.swapcase() -> str\n\ -\n\ -Return a copy of S with uppercase characters converted to lowercase\n\ -and vice versa."); +/*[clinic input] +str.swapcase as unicode_swapcase -static PyObject* -unicode_swapcase(PyObject *self) +Convert uppercase characters to lowercase and lowercase characters to uppercase. +[clinic start generated code]*/ + +static PyObject * +unicode_swapcase_impl(PyObject *self) +/*[clinic end generated code: output=5d28966bf6d7b2af input=3f3ef96d5798a7bb]*/ { if (PyUnicode_READY(self) == -1) return NULL; @@ -13167,29 +13257,37 @@ unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z) return NULL; } -PyDoc_STRVAR(translate__doc__, - "S.translate(table) -> str\n\ -\n\ -Return a copy of the string S in which each character has been mapped\n\ -through the given translation table. The table must implement\n\ -lookup/indexing via __getitem__, for instance a dictionary or list,\n\ -mapping Unicode ordinals to Unicode ordinals, strings, or None. If\n\ -this operation raises LookupError, the character is left untouched.\n\ -Characters mapped to None are deleted."); +/*[clinic input] +str.translate as unicode_translate -static PyObject* + table: object + Translation table, which must be a mapping of Unicode ordinals to + Unicode ordinals, strings, or None. + / + +Replace each character in the string using the given translation table. + +The table must implement lookup/indexing via __getitem__, for instance a +dictionary or list. If this operation raises LookupError, the character is +left untouched. Characters mapped to None are deleted. +[clinic start generated code]*/ + +static PyObject * unicode_translate(PyObject *self, PyObject *table) +/*[clinic end generated code: output=3cb448ff2fd96bf3 input=6d38343db63d8eb0]*/ { return _PyUnicode_TranslateCharmap(self, table, "ignore"); } -PyDoc_STRVAR(upper__doc__, - "S.upper() -> str\n\ -\n\ -Return a copy of S converted to uppercase."); +/*[clinic input] +str.upper as unicode_upper -static PyObject* -unicode_upper(PyObject *self) +Return a copy of the string converted to uppercase. +[clinic start generated code]*/ + +static PyObject * +unicode_upper_impl(PyObject *self) +/*[clinic end generated code: output=1b7ddd16bbcdc092 input=db3d55682dfe2e6c]*/ { if (PyUnicode_READY(self) == -1) return NULL; @@ -13198,25 +13296,27 @@ unicode_upper(PyObject *self) return case_operation(self, do_upper); } -PyDoc_STRVAR(zfill__doc__, - "S.zfill(width) -> str\n\ -\n\ -Pad a numeric string S with zeros on the left, to fill a field\n\ -of the specified width. The string S is never truncated."); +/*[clinic input] +str.zfill as unicode_zfill + + width: Py_ssize_t + / + +Pad a numeric string with zeros on the left, to fill a field of the given width. + +The string is never truncated. +[clinic start generated code]*/ static PyObject * -unicode_zfill(PyObject *self, PyObject *args) +unicode_zfill_impl(PyObject *self, Py_ssize_t width) +/*[clinic end generated code: output=e13fb6bdf8e3b9df input=3559257ab7bfed6e]*/ { Py_ssize_t fill; PyObject *u; - Py_ssize_t width; int kind; void *data; Py_UCS4 chr; - if (!PyArg_ParseTuple(args, "n:zfill", &width)) - return NULL; - if (PyUnicode_READY(self) == -1) return NULL; @@ -13703,16 +13803,22 @@ PyDoc_STRVAR(format_map__doc__, Return a formatted version of S, using substitutions from mapping.\n\ The substitutions are identified by braces ('{' and '}')."); +/*[clinic input] +str.__format__ as unicode___format__ + + format_spec: unicode + / + +Return a formatted version of the string as described by format_spec. +[clinic start generated code]*/ + static PyObject * -unicode__format__(PyObject* self, PyObject* args) +unicode___format___impl(PyObject *self, PyObject *format_spec) +/*[clinic end generated code: output=45fceaca6d2ba4c8 input=1f0623ca7b7c5981]*/ { - PyObject *format_spec; _PyUnicodeWriter writer; int ret; - if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) - return NULL; - if (PyUnicode_READY(self) == -1) return NULL; _PyUnicodeWriter_Init(&writer); @@ -13726,44 +13832,43 @@ unicode__format__(PyObject* self, PyObject* args) return _PyUnicodeWriter_Finish(&writer); } -PyDoc_STRVAR(p_format__doc__, - "S.__format__(format_spec) -> str\n\ -\n\ -Return a formatted version of S as described by format_spec."); +/*[clinic input] +str.__sizeof__ as unicode_sizeof + +Return the size of the string in memory, in bytes. +[clinic start generated code]*/ static PyObject * -unicode__sizeof__(PyObject *v) +unicode_sizeof_impl(PyObject *self) +/*[clinic end generated code: output=6dbc2f5a408b6d4f input=6dd011c108e33fb0]*/ { Py_ssize_t size; /* If it's a compact object, account for base structure + character data. */ - if (PyUnicode_IS_COMPACT_ASCII(v)) - size = sizeof(PyASCIIObject) + PyUnicode_GET_LENGTH(v) + 1; - else if (PyUnicode_IS_COMPACT(v)) + if (PyUnicode_IS_COMPACT_ASCII(self)) + size = sizeof(PyASCIIObject) + PyUnicode_GET_LENGTH(self) + 1; + else if (PyUnicode_IS_COMPACT(self)) size = sizeof(PyCompactUnicodeObject) + - (PyUnicode_GET_LENGTH(v) + 1) * PyUnicode_KIND(v); + (PyUnicode_GET_LENGTH(self) + 1) * PyUnicode_KIND(self); else { /* If it is a two-block object, account for base object, and for character block if present. */ size = sizeof(PyUnicodeObject); - if (_PyUnicode_DATA_ANY(v)) - size += (PyUnicode_GET_LENGTH(v) + 1) * - PyUnicode_KIND(v); + if (_PyUnicode_DATA_ANY(self)) + size += (PyUnicode_GET_LENGTH(self) + 1) * + PyUnicode_KIND(self); } /* If the wstr pointer is present, account for it unless it is shared with the data pointer. Check if the data is not shared. */ - if (_PyUnicode_HAS_WSTR_MEMORY(v)) - size += (PyUnicode_WSTR_LENGTH(v) + 1) * sizeof(wchar_t); - if (_PyUnicode_HAS_UTF8_MEMORY(v)) - size += PyUnicode_UTF8_LENGTH(v) + 1; + if (_PyUnicode_HAS_WSTR_MEMORY(self)) + size += (PyUnicode_WSTR_LENGTH(self) + 1) * sizeof(wchar_t); + if (_PyUnicode_HAS_UTF8_MEMORY(self)) + size += PyUnicode_UTF8_LENGTH(self) + 1; return PyLong_FromSsize_t(size); } -PyDoc_STRVAR(sizeof__doc__, - "S.__sizeof__() -> size of S in memory, in bytes"); - static PyObject * unicode_getnewargs(PyObject *v) { @@ -13774,54 +13879,52 @@ unicode_getnewargs(PyObject *v) } static PyMethodDef unicode_methods[] = { - {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__}, - {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__}, - {"split", (PyCFunction) unicode_split, METH_VARARGS | METH_KEYWORDS, split__doc__}, - {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__}, - {"join", (PyCFunction) unicode_join, METH_O, join__doc__}, - {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__}, - {"casefold", (PyCFunction) unicode_casefold, METH_NOARGS, casefold__doc__}, - {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__}, - {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__}, + UNICODE_ENCODE_METHODDEF + UNICODE_REPLACE_METHODDEF + UNICODE_SPLIT_METHODDEF + UNICODE_RSPLIT_METHODDEF + UNICODE_JOIN_METHODDEF + UNICODE_CAPITALIZE_METHODDEF + UNICODE_CASEFOLD_METHODDEF + UNICODE_TITLE_METHODDEF + UNICODE_CENTER_METHODDEF {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__}, - {"expandtabs", (PyCFunction) unicode_expandtabs, - METH_VARARGS | METH_KEYWORDS, expandtabs__doc__}, + UNICODE_EXPANDTABS_METHODDEF {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__}, - {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__}, + UNICODE_PARTITION_METHODDEF {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__}, - {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__}, - {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__}, - {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__}, + UNICODE_LJUST_METHODDEF + UNICODE_LOWER_METHODDEF + UNICODE_LSTRIP_METHODDEF {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__}, {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__}, - {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__}, - {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__}, - {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__}, - {"splitlines", (PyCFunction) unicode_splitlines, - METH_VARARGS | METH_KEYWORDS, splitlines__doc__}, - {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__}, - {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__}, - {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__}, - {"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__}, + UNICODE_RJUST_METHODDEF + UNICODE_RSTRIP_METHODDEF + UNICODE_RPARTITION_METHODDEF + UNICODE_SPLITLINES_METHODDEF + UNICODE_STRIP_METHODDEF + UNICODE_SWAPCASE_METHODDEF + UNICODE_TRANSLATE_METHODDEF + UNICODE_UPPER_METHODDEF {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__}, {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__}, - {"islower", (PyCFunction) unicode_islower, METH_NOARGS, islower__doc__}, - {"isupper", (PyCFunction) unicode_isupper, METH_NOARGS, isupper__doc__}, - {"istitle", (PyCFunction) unicode_istitle, METH_NOARGS, istitle__doc__}, - {"isspace", (PyCFunction) unicode_isspace, METH_NOARGS, isspace__doc__}, - {"isdecimal", (PyCFunction) unicode_isdecimal, METH_NOARGS, isdecimal__doc__}, - {"isdigit", (PyCFunction) unicode_isdigit, METH_NOARGS, isdigit__doc__}, - {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__}, - {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__}, - {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__}, - {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__}, - {"isprintable", (PyCFunction) unicode_isprintable, METH_NOARGS, isprintable__doc__}, - {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__}, + UNICODE_ISLOWER_METHODDEF + UNICODE_ISUPPER_METHODDEF + UNICODE_ISTITLE_METHODDEF + UNICODE_ISSPACE_METHODDEF + UNICODE_ISDECIMAL_METHODDEF + UNICODE_ISDIGIT_METHODDEF + UNICODE_ISNUMERIC_METHODDEF + UNICODE_ISALPHA_METHODDEF + UNICODE_ISALNUM_METHODDEF + UNICODE_ISIDENTIFIER_METHODDEF + UNICODE_ISPRINTABLE_METHODDEF + UNICODE_ZFILL_METHODDEF {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, {"format_map", (PyCFunction) do_string_format_map, METH_O, format_map__doc__}, - {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__}, + UNICODE___FORMAT___METHODDEF UNICODE_MAKETRANS_METHODDEF - {"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__}, + UNICODE_SIZEOF_METHODDEF #if 0 /* These methods are just used for debugging the implementation. */ {"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS}, -- cgit v1.2.1 From 6c812b4a4ef712e449e029dc7253e34c29ec3cff Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 16 Jan 2017 21:49:13 +0900 Subject: Issue #20180: forgot to update AC output. --- Objects/unicodeobject.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index acf59e4d13..e2bdf088ab 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -49,7 +49,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #endif /*[clinic input] -class str "PyUnicodeObject *" "&PyUnicode_Type" +class str "PyObject *" "&PyUnicode_Type" [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4884c934de622cf6]*/ @@ -10737,7 +10737,7 @@ cased characters have lower case. static PyObject * unicode_title_impl(PyObject *self) -/*[clinic end generated code: output=c75ae03809574902 input=4eb12c1bb8642cb9]*/ +/*[clinic end generated code: output=c75ae03809574902 input=fa945d669b26e683]*/ { if (PyUnicode_READY(self) == -1) return NULL; @@ -10772,7 +10772,7 @@ Return a version of the string suitable for caseless comparisons. static PyObject * unicode_casefold_impl(PyObject *self) -/*[clinic end generated code: output=0120daf657ca40af input=a96f2b0d3daabd94]*/ +/*[clinic end generated code: output=0120daf657ca40af input=384d66cc2ae30daf]*/ { if (PyUnicode_READY(self) == -1) return NULL; @@ -11510,7 +11510,7 @@ Encode the string using the codec registered for encoding. static PyObject * unicode_encode_impl(PyObject *self, const char *encoding, const char *errors) -/*[clinic end generated code: output=bf78b6e2a9470e3c input=12fcb2e5798e96dc]*/ +/*[clinic end generated code: output=bf78b6e2a9470e3c input=f0a9eb293d08fe02]*/ { return PyUnicode_AsEncodedString(self, encoding, errors); } @@ -11730,7 +11730,7 @@ there is at least one cased character in the string. static PyObject * unicode_islower_impl(PyObject *self) -/*[clinic end generated code: output=dbd41995bd005b81 input=46eeb20d935af050]*/ +/*[clinic end generated code: output=dbd41995bd005b81 input=acec65ac6821ae47]*/ { Py_ssize_t i, length; int kind; @@ -11775,7 +11775,7 @@ there is at least one cased character in the string. static PyObject * unicode_isupper_impl(PyObject *self) -/*[clinic end generated code: output=049209c8e7f15f59 input=dd0a595fc871eee0]*/ +/*[clinic end generated code: output=049209c8e7f15f59 input=e9b1feda5d17f2d3]*/ { Py_ssize_t i, length; int kind; @@ -11820,7 +11820,7 @@ follow uncased characters and lowercase characters only cased ones. static PyObject * unicode_istitle_impl(PyObject *self) -/*[clinic end generated code: output=e9bf6eb91f5d3f0e input=2c56883d113d644d]*/ +/*[clinic end generated code: output=e9bf6eb91f5d3f0e input=98d32bd2e1f06f8c]*/ { Py_ssize_t i, length; int kind; @@ -11878,7 +11878,7 @@ is at least one character in the string. static PyObject * unicode_isspace_impl(PyObject *self) -/*[clinic end generated code: output=163a63bfa08ac2b9 input=b9506a23e312d203]*/ +/*[clinic end generated code: output=163a63bfa08ac2b9 input=fe462cb74f8437d8]*/ { Py_ssize_t i, length; int kind; @@ -11918,7 +11918,7 @@ is at least one character in the string. static PyObject * unicode_isalpha_impl(PyObject *self) -/*[clinic end generated code: output=cc81b9ac3883ec4f input=17e3788814472079]*/ +/*[clinic end generated code: output=cc81b9ac3883ec4f input=d0fd18a96cbca5eb]*/ { Py_ssize_t i, length; int kind; @@ -11957,7 +11957,7 @@ there is at least one character in the string. static PyObject * unicode_isalnum_impl(PyObject *self) -/*[clinic end generated code: output=a5a23490ffc3660c input=d350c4f7c59b4758]*/ +/*[clinic end generated code: output=a5a23490ffc3660c input=5c6579bf2e04758c]*/ { int kind; void *data; @@ -11999,7 +11999,7 @@ there is at least one character in the string. static PyObject * unicode_isdecimal_impl(PyObject *self) -/*[clinic end generated code: output=fb2dcdb62d3fc548 input=40536fb80e5f1dc1]*/ +/*[clinic end generated code: output=fb2dcdb62d3fc548 input=336bc97ab4c8268f]*/ { Py_ssize_t i, length; int kind; @@ -12038,7 +12038,7 @@ is at least one character in the string. static PyObject * unicode_isdigit_impl(PyObject *self) -/*[clinic end generated code: output=10a6985311da6858 input=c6a222be1aaec2af]*/ +/*[clinic end generated code: output=10a6985311da6858 input=901116c31deeea4c]*/ { Py_ssize_t i, length; int kind; @@ -12078,7 +12078,7 @@ least one character in the string. static PyObject * unicode_isnumeric_impl(PyObject *self) -/*[clinic end generated code: output=9172a32d9013051a input=e3b37b2cc8854f35]*/ +/*[clinic end generated code: output=9172a32d9013051a input=722507db976f826c]*/ { Py_ssize_t i, length; int kind; @@ -12154,7 +12154,7 @@ Use keyword.iskeyword() to test for reserved identifiers such as "def" and static PyObject * unicode_isidentifier_impl(PyObject *self) -/*[clinic end generated code: output=fe585a9666572905 input=95eebe40d6d91234]*/ +/*[clinic end generated code: output=fe585a9666572905 input=916b0a3c9f57e919]*/ { return PyBool_FromLong(PyUnicode_IsIdentifier(self)); } @@ -12170,7 +12170,7 @@ repr() or if it is empty. static PyObject * unicode_isprintable_impl(PyObject *self) -/*[clinic end generated code: output=3ab9626cd32dd1a0 input=20c53134171af84e]*/ +/*[clinic end generated code: output=3ab9626cd32dd1a0 input=98a0e1c2c1813209]*/ { Py_ssize_t i, length; int kind; @@ -12211,7 +12211,7 @@ Example: '.'.join(['ab', 'pq', 'rs']) -> 'ab.pq.rs' static PyObject * unicode_join(PyObject *self, PyObject *iterable) -/*[clinic end generated code: output=6857e7cecfe7bf98 input=465f62626109db6b]*/ +/*[clinic end generated code: output=6857e7cecfe7bf98 input=d8311e5ccbafbeb6]*/ { return PyUnicode_Join(self, iterable); } @@ -13309,7 +13309,7 @@ The string is never truncated. static PyObject * unicode_zfill_impl(PyObject *self, Py_ssize_t width) -/*[clinic end generated code: output=e13fb6bdf8e3b9df input=3559257ab7bfed6e]*/ +/*[clinic end generated code: output=e13fb6bdf8e3b9df input=c6b2f772c6f27799]*/ { Py_ssize_t fill; PyObject *u; @@ -13814,7 +13814,7 @@ Return a formatted version of the string as described by format_spec. static PyObject * unicode___format___impl(PyObject *self, PyObject *format_spec) -/*[clinic end generated code: output=45fceaca6d2ba4c8 input=1f0623ca7b7c5981]*/ +/*[clinic end generated code: output=45fceaca6d2ba4c8 input=5e135645d167a214]*/ { _PyUnicodeWriter writer; int ret; -- cgit v1.2.1 From 104cb4a3bacdf9764f9f8670a80da16390231337 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 17 Jan 2017 02:21:47 +0100 Subject: Run Argument Clinic: METH_VARARGS=>METH_FASTCALL Issue #29286. Run Argument Clinic to get the new faster METH_FASTCALL calling convention for functions using "boring" positional arguments. Manually fix _elementtree: _elementtree_XMLParser_doctype() must remain consistent with the clinic code. --- Objects/unicodeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e2bdf088ab..5fbe56cce1 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12453,14 +12453,14 @@ str.strip as unicode_strip chars: object = None / -Return a copy of the string with leading and trailing whitespace removed. +Return a copy of the string with leading and trailing whitespace remove. If chars is given and not None, remove characters in chars instead. [clinic start generated code]*/ static PyObject * unicode_strip_impl(PyObject *self, PyObject *chars) -/*[clinic end generated code: output=ca19018454345d57 input=385289c6f423b954]*/ +/*[clinic end generated code: output=ca19018454345d57 input=eefe24a1059c352b]*/ { return do_argstrip(self, BOTHSTRIP, chars); } -- cgit v1.2.1 From 72ded8136b92c1b107f629021cf231fd3163f041 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 22 Jan 2017 23:07:07 +0200 Subject: Issue #28769: The result of PyUnicode_AsUTF8AndSize() and PyUnicode_AsUTF8() is now of type "const char *" rather of "char *". --- Objects/unicodeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5fbe56cce1..b711f0ccce 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3972,7 +3972,7 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr) } -char* +const char * PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize) { PyObject *bytes; @@ -4007,7 +4007,7 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize) return PyUnicode_UTF8(unicode); } -char* +const char * PyUnicode_AsUTF8(PyObject *unicode) { return PyUnicode_AsUTF8AndSize(unicode, NULL); -- cgit v1.2.1 From bfeec6d871e3db2e0ddfdef01387913bc19cadd4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 23 Jan 2017 09:47:21 +0200 Subject: Issue #28999: Use Py_RETURN_NONE, Py_RETURN_TRUE and Py_RETURN_FALSE wherever possible. Patch is writen with Coccinelle. --- Objects/unicodeobject.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b711f0ccce..0b79c5bd1a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8254,9 +8254,7 @@ charmapencode_lookup(Py_UCS4 c, PyObject *mapping) if (PyErr_ExceptionMatches(PyExc_LookupError)) { /* No mapping found means: mapping is undefined. */ PyErr_Clear(); - x = Py_None; - Py_INCREF(x); - return x; + Py_RETURN_NONE; } else return NULL; } -- cgit v1.2.1 From 8882e556eef08ade01461b9f4ccfe2d82bdae694 Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Tue, 24 Jan 2017 00:30:06 +0000 Subject: Fix grammar in doc string, RST markup --- Objects/unicodeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Objects/unicodeobject.c') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0b79c5bd1a..d3516fa45f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12201,7 +12201,7 @@ str.join as unicode_join Concatenate any number of strings. -The string whose method is called is inserted in between each given strings. +The string whose method is called is inserted in between each given string. The result is returned as a new string. Example: '.'.join(['ab', 'pq', 'rs']) -> 'ab.pq.rs' @@ -12209,7 +12209,7 @@ Example: '.'.join(['ab', 'pq', 'rs']) -> 'ab.pq.rs' static PyObject * unicode_join(PyObject *self, PyObject *iterable) -/*[clinic end generated code: output=6857e7cecfe7bf98 input=d8311e5ccbafbeb6]*/ +/*[clinic end generated code: output=6857e7cecfe7bf98 input=2f70422bfb8fa189]*/ { return PyUnicode_Join(self, iterable); } -- cgit v1.2.1