summaryrefslogtreecommitdiff
path: root/Include/unicodeobject.h
diff options
context:
space:
mode:
Diffstat (limited to 'Include/unicodeobject.h')
-rw-r--r--Include/unicodeobject.h138
1 files changed, 95 insertions, 43 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 9308a6aa96..587cf03e36 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -103,10 +103,6 @@ typedef wchar_t Py_UNICODE;
# endif
#endif
-#if defined(MS_WINDOWS)
-# define HAVE_MBCS
-#endif
-
#ifdef HAVE_WCHAR_H
/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
# ifdef _HAVE_BSDI
@@ -117,21 +113,9 @@ typedef wchar_t Py_UNICODE;
/* Py_UCS4 and Py_UCS2 are typedefs for the respective
unicode representations. */
-#if SIZEOF_INT == 4
-typedef unsigned int Py_UCS4;
-#elif SIZEOF_LONG == 4
-typedef unsigned long Py_UCS4;
-#else
-#error "Could not find a proper typedef for Py_UCS4"
-#endif
-
-#if SIZEOF_SHORT == 2
-typedef unsigned short Py_UCS2;
-#else
-#error "Could not find a proper typedef for Py_UCS2"
-#endif
-
-typedef unsigned char Py_UCS1;
+typedef uint32_t Py_UCS4;
+typedef uint16_t Py_UCS2;
+typedef uint8_t Py_UCS1;
/* --- Internal Unicode Operations ---------------------------------------- */
@@ -172,7 +156,7 @@ typedef unsigned char Py_UCS1;
Py_UNICODE_ISNUMERIC(ch))
#define Py_UNICODE_COPY(target, source, length) \
- Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
+ memcpy((target), (source), (length)*sizeof(Py_UNICODE))
#define Py_UNICODE_FILL(target, value, length) \
do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
@@ -734,10 +718,12 @@ PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
Py_ssize_t size);
#endif
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
PyAPI_FUNC(PyObject*) PyUnicode_Substring(
PyObject *str,
Py_ssize_t start,
Py_ssize_t end);
+#endif
#ifndef Py_LIMITED_API
/* Compute the maximum character of the substring unicode[start:end].
@@ -748,6 +734,7 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
Py_ssize_t end);
#endif
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
/* Copy the string into a UCS4 buffer including the null character if copy_null
is set. Return NULL and raise an exception on error. Raise a SystemError if
the buffer is smaller than the string. Return buffer on success.
@@ -763,6 +750,7 @@ PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
* PyMem_Malloc; if this fails, NULL is returned with a memory error
exception set. */
PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
+#endif
/* Return a read-only pointer to the Unicode object's internal
Py_UNICODE buffer.
@@ -787,11 +775,13 @@ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
);
#endif
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
/* Get the length of the Unicode object. */
PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
PyObject *unicode
);
+#endif
/* Get the number of Py_UNICODE units in the
string representation. */
@@ -800,6 +790,7 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
PyObject *unicode /* Unicode object */
);
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
/* Read a character from the string. */
PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
@@ -817,6 +808,7 @@ PyAPI_FUNC(int) PyUnicode_WriteChar(
Py_ssize_t index,
Py_UCS4 character
);
+#endif
#ifndef Py_LIMITED_API
/* Get the maximum ordinal for a Unicode character. */
@@ -900,7 +892,7 @@ typedef struct {
/* minimum character (default: 127, ASCII) */
Py_UCS4 min_char;
- /* If non-zero, overallocate the buffer by 25% (default: 0). */
+ /* If non-zero, overallocate the buffer (default: 0). */
unsigned char overallocate;
/* If readonly is 1, buffer is a shared string (cannot be modified)
@@ -934,6 +926,23 @@ PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar);
+/* Prepare the buffer to have at least the kind KIND.
+ For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
+ support characters in range U+000-U+FFFF.
+
+ Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
+ (assert((KIND) != PyUnicode_WCHAR_KIND), \
+ (KIND) <= (WRITER)->kind \
+ ? 0 \
+ : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
+ macro instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
+ enum PyUnicode_Kind kind);
+
/* Append a Unicode character.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
@@ -1170,22 +1179,30 @@ PyAPI_FUNC(PyObject*) PyUnicode_Decode(
);
/* Decode a Unicode object unicode and return the result as Python
- object. */
+ object.
+
+ This API is DEPRECATED. The only supported standard encoding is rot13.
+ Use PyCodec_Decode() to decode with rot13 and non-standard codecs
+ that decode from str. */
PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
PyObject *unicode, /* Unicode object */
const char *encoding, /* encoding */
const char *errors /* error handling */
- );
+ ) Py_DEPRECATED(3.6);
/* Decode a Unicode object unicode and return the result as Unicode
- object. */
+ object.
+
+ This API is DEPRECATED. The only supported standard encoding is rot13.
+ Use PyCodec_Decode() to decode with rot13 and non-standard codecs
+ that decode from str to str. */
PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
PyObject *unicode, /* Unicode object */
const char *encoding, /* encoding */
const char *errors /* error handling */
- );
+ ) Py_DEPRECATED(3.6);
/* Encodes a Py_UNICODE buffer of the given size and returns a
Python string object. */
@@ -1200,13 +1217,18 @@ PyAPI_FUNC(PyObject*) PyUnicode_Encode(
#endif
/* Encodes a Unicode object and returns the result as Python
- object. */
+ object.
+
+ This API is DEPRECATED. It is superceeded by PyUnicode_AsEncodedString()
+ since all standard encodings (except rot13) encode str to bytes.
+ Use PyCodec_Encode() for encoding with rot13 and non-standard codecs
+ that encode form str to non-bytes. */
PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
PyObject *unicode, /* Unicode object */
const char *encoding, /* encoding */
const char *errors /* error handling */
- );
+ ) Py_DEPRECATED(3.6);
/* Encodes a Unicode object and returns the result as Python string
object. */
@@ -1218,13 +1240,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
);
/* Encodes a Unicode object and returns the result as Unicode
- object. */
+ object.
+
+ This API is DEPRECATED. The only supported standard encodings is rot13.
+ Use PyCodec_Encode() to encode with rot13 and non-standard codecs
+ that encode from str to str. */
PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
PyObject *unicode, /* Unicode object */
const char *encoding, /* encoding */
const char *errors /* error handling */
- );
+ ) Py_DEPRECATED(3.6);
/* Build an encoding map. */
@@ -1468,6 +1494,19 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
const char *errors /* error handling */
);
+#ifndef Py_LIMITED_API
+/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
+ chars. */
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
+ const char *string, /* Unicode-Escape encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors, /* error handling */
+ const char **first_invalid_escape /* on return, points to first
+ invalid escaped char in
+ string. */
+);
+#endif
+
PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
PyObject *unicode /* Unicode object */
);
@@ -1640,13 +1679,13 @@ PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
);
#endif
-#ifdef HAVE_MBCS
+#ifdef MS_WINDOWS
/* --- MBCS codecs for Windows -------------------------------------------- */
PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
const char *string, /* MBCS encoded string */
- Py_ssize_t length, /* size of string */
+ Py_ssize_t length, /* size of string */
const char *errors /* error handling */
);
@@ -1657,6 +1696,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
Py_ssize_t *consumed /* bytes consumed */
);
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
int code_page, /* code page number */
const char *string, /* encoded string */
@@ -1664,6 +1704,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
const char *errors, /* error handling */
Py_ssize_t *consumed /* bytes consumed */
);
+#endif
PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
PyObject *unicode /* Unicode object */
@@ -1677,13 +1718,15 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
);
#endif
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
int code_page, /* code page number */
PyObject *unicode, /* Unicode object */
const char *errors /* error handling */
);
+#endif
-#endif /* HAVE_MBCS */
+#endif /* MS_WINDOWS */
/* --- Decimal Encoder ---------------------------------------------------- */
@@ -1744,6 +1787,7 @@ PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
/* --- Locale encoding --------------------------------------------------- */
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
/* Decode a string from the current locale encoding. The decoder is strict if
*surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape'
error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
@@ -1773,6 +1817,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
PyObject *unicode,
const char *errors
);
+#endif
/* --- File system encoding ---------------------------------------------- */
@@ -1938,6 +1983,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_Join(
PyObject *seq /* Sequence object */
);
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
+ PyObject *separator,
+ PyObject **items,
+ Py_ssize_t seqlen
+ );
+#endif /* Py_LIMITED_API */
+
/* Return 1 if substr matches str[start:end] at the given tail end, 0
otherwise. */
@@ -1961,6 +2014,7 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
int direction /* Find direction: +1 forward, -1 backward */
);
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
/* Like PyUnicode_Find, but search for single character only. */
PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
PyObject *str,
@@ -1969,6 +2023,7 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
Py_ssize_t end,
int direction
);
+#endif
/* Count the number of occurrences of substr in str[start:end]. */
@@ -2000,17 +2055,8 @@ PyAPI_FUNC(int) PyUnicode_Compare(
);
#ifndef Py_LIMITED_API
-/* Compare a string with an identifier and return -1, 0, 1 for less than,
- equal, and greater than, respectively.
- Raise an exception and return -1 on error. */
-
-PyAPI_FUNC(int) _PyUnicode_CompareWithId(
- PyObject *left, /* Left string */
- _Py_Identifier *right /* Right identifier */
- );
-
/* Test whether a unicode is equal to ASCII identifier. Return 1 if true,
- 0 otherwise. Return 0 if any argument contains non-ASCII characters.
+ 0 otherwise. The right argument must be ASCII identifier.
Any error occurs inside will be cleared before return. */
PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
@@ -2032,7 +2078,7 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
#ifndef Py_LIMITED_API
/* Test whether a unicode is equal to ASCII string. Return 1 if true,
- 0 otherwise. Return 0 if any argument contains non-ASCII characters.
+ 0 otherwise. The right argument must be ASCII-encoded string.
Any error occurs inside will be cleared before return. */
PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
@@ -2274,11 +2320,17 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
int check_content);
#endif
+#ifndef Py_LIMITED_API
/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
/* Clear all static strings. */
PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
+/* Fast equality check when the inputs are known to be exact unicode types
+ and where the hash values are equal (i.e. a very probable match) */
+PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
+#endif /* !Py_LIMITED_API */
+
#ifdef __cplusplus
}
#endif