diff options
author | Berker Peksag <berker.peksag@gmail.com> | 2017-02-04 09:18:42 +0300 |
---|---|---|
committer | Berker Peksag <berker.peksag@gmail.com> | 2017-02-04 09:18:42 +0300 |
commit | 678487eb345f9f9dea3d3818ecad7d39145bdc65 (patch) | |
tree | 35c1a98d1ceadc6b67bf7119031d2a89f27f1cb1 /Include/unicodeobject.h | |
parent | 50a3761c130e9be725bacb5b99d624012c40414a (diff) | |
parent | 19d8ebcbbd396ec603ed03bc79add9b049c31919 (diff) | |
download | cpython-678487eb345f9f9dea3d3818ecad7d39145bdc65.tar.gz |
Issue #29198: Merge from 3.5
Diffstat (limited to 'Include/unicodeobject.h')
-rw-r--r-- | Include/unicodeobject.h | 138 |
1 files changed, 95 insertions, 43 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 9308a6aa96..587cf03e36 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -103,10 +103,6 @@ typedef wchar_t Py_UNICODE; # endif #endif -#if defined(MS_WINDOWS) -# define HAVE_MBCS -#endif - #ifdef HAVE_WCHAR_H /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ # ifdef _HAVE_BSDI @@ -117,21 +113,9 @@ typedef wchar_t Py_UNICODE; /* Py_UCS4 and Py_UCS2 are typedefs for the respective unicode representations. */ -#if SIZEOF_INT == 4 -typedef unsigned int Py_UCS4; -#elif SIZEOF_LONG == 4 -typedef unsigned long Py_UCS4; -#else -#error "Could not find a proper typedef for Py_UCS4" -#endif - -#if SIZEOF_SHORT == 2 -typedef unsigned short Py_UCS2; -#else -#error "Could not find a proper typedef for Py_UCS2" -#endif - -typedef unsigned char Py_UCS1; +typedef uint32_t Py_UCS4; +typedef uint16_t Py_UCS2; +typedef uint8_t Py_UCS1; /* --- Internal Unicode Operations ---------------------------------------- */ @@ -172,7 +156,7 @@ typedef unsigned char Py_UCS1; Py_UNICODE_ISNUMERIC(ch)) #define Py_UNICODE_COPY(target, source, length) \ - Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE)) + memcpy((target), (source), (length)*sizeof(Py_UNICODE)) #define Py_UNICODE_FILL(target, value, length) \ do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ @@ -734,10 +718,12 @@ PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII( Py_ssize_t size); #endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 PyAPI_FUNC(PyObject*) PyUnicode_Substring( PyObject *str, Py_ssize_t start, Py_ssize_t end); +#endif #ifndef Py_LIMITED_API /* Compute the maximum character of the substring unicode[start:end]. @@ -748,6 +734,7 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( Py_ssize_t end); #endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 /* Copy the string into a UCS4 buffer including the null character if copy_null is set. Return NULL and raise an exception on error. Raise a SystemError if the buffer is smaller than the string. Return buffer on success. @@ -763,6 +750,7 @@ PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( * PyMem_Malloc; if this fails, NULL is returned with a memory error exception set. */ PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); +#endif /* Return a read-only pointer to the Unicode object's internal Py_UNICODE buffer. @@ -787,11 +775,13 @@ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( ); #endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 /* Get the length of the Unicode object. */ PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( PyObject *unicode ); +#endif /* Get the number of Py_UNICODE units in the string representation. */ @@ -800,6 +790,7 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( PyObject *unicode /* Unicode object */ ); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 /* Read a character from the string. */ PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar( @@ -817,6 +808,7 @@ PyAPI_FUNC(int) PyUnicode_WriteChar( Py_ssize_t index, Py_UCS4 character ); +#endif #ifndef Py_LIMITED_API /* Get the maximum ordinal for a Unicode character. */ @@ -900,7 +892,7 @@ typedef struct { /* minimum character (default: 127, ASCII) */ Py_UCS4 min_char; - /* If non-zero, overallocate the buffer by 25% (default: 0). */ + /* If non-zero, overallocate the buffer (default: 0). */ unsigned char overallocate; /* If readonly is 1, buffer is a shared string (cannot be modified) @@ -934,6 +926,23 @@ PyAPI_FUNC(int) _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, Py_ssize_t length, Py_UCS4 maxchar); +/* Prepare the buffer to have at least the kind KIND. + For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will + support characters in range U+000-U+FFFF. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ + (assert((KIND) != PyUnicode_WCHAR_KIND), \ + (KIND) <= (WRITER)->kind \ + ? 0 \ + : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) + +/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind() + macro instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, + enum PyUnicode_Kind kind); + /* Append a Unicode character. Return 0 on success, raise an exception and return -1 on error. */ PyAPI_FUNC(int) @@ -1170,22 +1179,30 @@ PyAPI_FUNC(PyObject*) PyUnicode_Decode( ); /* Decode a Unicode object unicode and return the result as Python - object. */ + object. + + This API is DEPRECATED. The only supported standard encoding is rot13. + Use PyCodec_Decode() to decode with rot13 and non-standard codecs + that decode from str. */ PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( PyObject *unicode, /* Unicode object */ const char *encoding, /* encoding */ const char *errors /* error handling */ - ); + ) Py_DEPRECATED(3.6); /* Decode a Unicode object unicode and return the result as Unicode - object. */ + object. + + This API is DEPRECATED. The only supported standard encoding is rot13. + Use PyCodec_Decode() to decode with rot13 and non-standard codecs + that decode from str to str. */ PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( PyObject *unicode, /* Unicode object */ const char *encoding, /* encoding */ const char *errors /* error handling */ - ); + ) Py_DEPRECATED(3.6); /* Encodes a Py_UNICODE buffer of the given size and returns a Python string object. */ @@ -1200,13 +1217,18 @@ PyAPI_FUNC(PyObject*) PyUnicode_Encode( #endif /* Encodes a Unicode object and returns the result as Python - object. */ + object. + + This API is DEPRECATED. It is superceeded by PyUnicode_AsEncodedString() + since all standard encodings (except rot13) encode str to bytes. + Use PyCodec_Encode() for encoding with rot13 and non-standard codecs + that encode form str to non-bytes. */ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( PyObject *unicode, /* Unicode object */ const char *encoding, /* encoding */ const char *errors /* error handling */ - ); + ) Py_DEPRECATED(3.6); /* Encodes a Unicode object and returns the result as Python string object. */ @@ -1218,13 +1240,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( ); /* Encodes a Unicode object and returns the result as Unicode - object. */ + object. + + This API is DEPRECATED. The only supported standard encodings is rot13. + Use PyCodec_Encode() to encode with rot13 and non-standard codecs + that encode from str to str. */ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( PyObject *unicode, /* Unicode object */ const char *encoding, /* encoding */ const char *errors /* error handling */ - ); + ) Py_DEPRECATED(3.6); /* Build an encoding map. */ @@ -1468,6 +1494,19 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( const char *errors /* error handling */ ); +#ifndef Py_LIMITED_API +/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape + chars. */ +PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + const char **first_invalid_escape /* on return, points to first + invalid escaped char in + string. */ +); +#endif + PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( PyObject *unicode /* Unicode object */ ); @@ -1640,13 +1679,13 @@ PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( ); #endif -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS /* --- MBCS codecs for Windows -------------------------------------------- */ PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( const char *string, /* MBCS encoded string */ - Py_ssize_t length, /* size of string */ + Py_ssize_t length, /* size of string */ const char *errors /* error handling */ ); @@ -1657,6 +1696,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( Py_ssize_t *consumed /* bytes consumed */ ); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful( int code_page, /* code page number */ const char *string, /* encoded string */ @@ -1664,6 +1704,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful( const char *errors, /* error handling */ Py_ssize_t *consumed /* bytes consumed */ ); +#endif PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( PyObject *unicode /* Unicode object */ @@ -1677,13 +1718,15 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( ); #endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( int code_page, /* code page number */ PyObject *unicode, /* Unicode object */ const char *errors /* error handling */ ); +#endif -#endif /* HAVE_MBCS */ +#endif /* MS_WINDOWS */ /* --- Decimal Encoder ---------------------------------------------------- */ @@ -1744,6 +1787,7 @@ PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( /* --- Locale encoding --------------------------------------------------- */ +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 /* Decode a string from the current locale encoding. The decoder is strict if *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape' error handler (PEP 383) to escape undecodable bytes. If a byte sequence can @@ -1773,6 +1817,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( PyObject *unicode, const char *errors ); +#endif /* --- File system encoding ---------------------------------------------- */ @@ -1938,6 +1983,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_Join( PyObject *seq /* Sequence object */ ); +#ifndef Py_LIMITED_API +PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray( + PyObject *separator, + PyObject **items, + Py_ssize_t seqlen + ); +#endif /* Py_LIMITED_API */ + /* Return 1 if substr matches str[start:end] at the given tail end, 0 otherwise. */ @@ -1961,6 +2014,7 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( int direction /* Find direction: +1 forward, -1 backward */ ); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 /* Like PyUnicode_Find, but search for single character only. */ PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar( PyObject *str, @@ -1969,6 +2023,7 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar( Py_ssize_t end, int direction ); +#endif /* Count the number of occurrences of substr in str[start:end]. */ @@ -2000,17 +2055,8 @@ PyAPI_FUNC(int) PyUnicode_Compare( ); #ifndef Py_LIMITED_API -/* Compare a string with an identifier and return -1, 0, 1 for less than, - equal, and greater than, respectively. - Raise an exception and return -1 on error. */ - -PyAPI_FUNC(int) _PyUnicode_CompareWithId( - PyObject *left, /* Left string */ - _Py_Identifier *right /* Right identifier */ - ); - /* Test whether a unicode is equal to ASCII identifier. Return 1 if true, - 0 otherwise. Return 0 if any argument contains non-ASCII characters. + 0 otherwise. The right argument must be ASCII identifier. Any error occurs inside will be cleared before return. */ PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId( @@ -2032,7 +2078,7 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( #ifndef Py_LIMITED_API /* Test whether a unicode is equal to ASCII string. Return 1 if true, - 0 otherwise. Return 0 if any argument contains non-ASCII characters. + 0 otherwise. The right argument must be ASCII-encoded string. Any error occurs inside will be cleared before return. */ PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString( @@ -2274,11 +2320,17 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency( int check_content); #endif +#ifndef Py_LIMITED_API /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/ PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); /* Clear all static strings. */ PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void); +/* Fast equality check when the inputs are known to be exact unicode types + and where the hash values are equal (i.e. a very probable match) */ +PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); +#endif /* !Py_LIMITED_API */ + #ifdef __cplusplus } #endif |