summaryrefslogtreecommitdiff
path: root/Python/codecs.c
diff options
context:
space:
mode:
Diffstat (limited to 'Python/codecs.c')
-rw-r--r--Python/codecs.c440
1 files changed, 284 insertions, 156 deletions
diff --git a/Python/codecs.c b/Python/codecs.c
index e21834a5c1..ea33c49f20 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -11,6 +11,8 @@ Copyright (c) Corporation for National Research Initiatives.
#include "Python.h"
#include <ctype.h>
+const char *Py_hexdigits = "0123456789abcdef";
+
/* --- Codec Registry ----------------------------------------------------- */
/* Import the standard encodings package which will register the first
@@ -241,20 +243,15 @@ PyObject *codec_getitem(const char *encoding, int index)
return v;
}
-/* Helper function to create an incremental codec. */
-
+/* Helper functions to create an incremental codec. */
static
-PyObject *codec_getincrementalcodec(const char *encoding,
- const char *errors,
- const char *attrname)
+PyObject *codec_makeincrementalcodec(PyObject *codec_info,
+ const char *errors,
+ const char *attrname)
{
- PyObject *codecs, *ret, *inccodec;
+ PyObject *ret, *inccodec;
- codecs = _PyCodec_Lookup(encoding);
- if (codecs == NULL)
- return NULL;
- inccodec = PyObject_GetAttrString(codecs, attrname);
- Py_DECREF(codecs);
+ inccodec = PyObject_GetAttrString(codec_info, attrname);
if (inccodec == NULL)
return NULL;
if (errors)
@@ -265,6 +262,21 @@ PyObject *codec_getincrementalcodec(const char *encoding,
return ret;
}
+static
+PyObject *codec_getincrementalcodec(const char *encoding,
+ const char *errors,
+ const char *attrname)
+{
+ PyObject *codec_info, *ret;
+
+ codec_info = _PyCodec_Lookup(encoding);
+ if (codec_info == NULL)
+ return NULL;
+ ret = codec_makeincrementalcodec(codec_info, errors, attrname);
+ Py_DECREF(codec_info);
+ return ret;
+}
+
/* Helper function to create a stream codec. */
static
@@ -288,6 +300,24 @@ PyObject *codec_getstreamcodec(const char *encoding,
return streamcodec;
}
+/* Helpers to work with the result of _PyCodec_Lookup
+
+ */
+PyObject *_PyCodecInfo_GetIncrementalDecoder(PyObject *codec_info,
+ const char *errors)
+{
+ return codec_makeincrementalcodec(codec_info, errors,
+ "incrementaldecoder");
+}
+
+PyObject *_PyCodecInfo_GetIncrementalEncoder(PyObject *codec_info,
+ const char *errors)
+{
+ return codec_makeincrementalcodec(codec_info, errors,
+ "incrementalencoder");
+}
+
+
/* Convenience APIs to query the Codec registry.
All APIs return a codec object with incremented refcount.
@@ -335,18 +365,15 @@ PyObject *PyCodec_StreamWriter(const char *encoding,
errors is passed to the encoder factory as argument if non-NULL. */
-PyObject *PyCodec_Encode(PyObject *object,
- const char *encoding,
- const char *errors)
+static PyObject *
+_PyCodec_EncodeInternal(PyObject *object,
+ PyObject *encoder,
+ const char *encoding,
+ const char *errors)
{
- PyObject *encoder = NULL;
PyObject *args = NULL, *result = NULL;
PyObject *v = NULL;
- encoder = PyCodec_Encoder(encoding);
- if (encoder == NULL)
- goto onError;
-
args = args_tuple(object, errors);
if (args == NULL)
goto onError;
@@ -382,18 +409,15 @@ PyObject *PyCodec_Encode(PyObject *object,
errors is passed to the decoder factory as argument if non-NULL. */
-PyObject *PyCodec_Decode(PyObject *object,
- const char *encoding,
- const char *errors)
+static PyObject *
+_PyCodec_DecodeInternal(PyObject *object,
+ PyObject *decoder,
+ const char *encoding,
+ const char *errors)
{
- PyObject *decoder = NULL;
PyObject *args = NULL, *result = NULL;
PyObject *v;
- decoder = PyCodec_Decoder(encoding);
- if (decoder == NULL)
- goto onError;
-
args = args_tuple(object, errors);
if (args == NULL)
goto onError;
@@ -423,6 +447,132 @@ PyObject *PyCodec_Decode(PyObject *object,
return NULL;
}
+/* Generic encoding/decoding API */
+PyObject *PyCodec_Encode(PyObject *object,
+ const char *encoding,
+ const char *errors)
+{
+ PyObject *encoder;
+
+ encoder = PyCodec_Encoder(encoding);
+ if (encoder == NULL)
+ return NULL;
+
+ return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
+}
+
+PyObject *PyCodec_Decode(PyObject *object,
+ const char *encoding,
+ const char *errors)
+{
+ PyObject *decoder;
+
+ decoder = PyCodec_Decoder(encoding);
+ if (decoder == NULL)
+ return NULL;
+
+ return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
+}
+
+/* Text encoding/decoding API */
+PyObject * _PyCodec_LookupTextEncoding(const char *encoding,
+ const char *alternate_command)
+{
+ _Py_IDENTIFIER(_is_text_encoding);
+ PyObject *codec;
+ PyObject *attr;
+ int is_text_codec;
+
+ codec = _PyCodec_Lookup(encoding);
+ if (codec == NULL)
+ return NULL;
+
+ /* Backwards compatibility: assume any raw tuple describes a text
+ * encoding, and the same for anything lacking the private
+ * attribute.
+ */
+ if (!PyTuple_CheckExact(codec)) {
+ attr = _PyObject_GetAttrId(codec, &PyId__is_text_encoding);
+ if (attr == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
+ PyErr_Clear();
+ } else {
+ Py_DECREF(codec);
+ return NULL;
+ }
+ } else {
+ is_text_codec = PyObject_IsTrue(attr);
+ Py_DECREF(attr);
+ if (!is_text_codec) {
+ Py_DECREF(codec);
+ PyErr_Format(PyExc_LookupError,
+ "'%.400s' is not a text encoding; "
+ "use %s to handle arbitrary codecs",
+ encoding, alternate_command);
+ return NULL;
+ }
+ }
+ }
+
+ /* This appears to be a valid text encoding */
+ return codec;
+}
+
+
+static
+PyObject *codec_getitem_checked(const char *encoding,
+ const char *alternate_command,
+ int index)
+{
+ PyObject *codec;
+ PyObject *v;
+
+ codec = _PyCodec_LookupTextEncoding(encoding, alternate_command);
+ if (codec == NULL)
+ return NULL;
+
+ v = PyTuple_GET_ITEM(codec, index);
+ Py_INCREF(v);
+ Py_DECREF(codec);
+ return v;
+}
+
+static PyObject * _PyCodec_TextEncoder(const char *encoding)
+{
+ return codec_getitem_checked(encoding, "codecs.encode()", 0);
+}
+
+static PyObject * _PyCodec_TextDecoder(const char *encoding)
+{
+ return codec_getitem_checked(encoding, "codecs.decode()", 1);
+}
+
+PyObject *_PyCodec_EncodeText(PyObject *object,
+ const char *encoding,
+ const char *errors)
+{
+ PyObject *encoder;
+
+ encoder = _PyCodec_TextEncoder(encoding);
+ if (encoder == NULL)
+ return NULL;
+
+ return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
+}
+
+PyObject *_PyCodec_DecodeText(PyObject *object,
+ const char *encoding,
+ const char *errors)
+{
+ PyObject *decoder;
+
+ decoder = _PyCodec_TextDecoder(encoding);
+ if (decoder == NULL)
+ return NULL;
+
+ return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
+}
+
/* Register the error handling callback function error under the name
name. This function will be called by the codec when it encounters
an unencodable characters/undecodable bytes and doesn't know the
@@ -465,9 +615,11 @@ PyObject *PyCodec_LookupError(const char *name)
static void wrong_exception_type(PyObject *exc)
{
- PyObject *type = PyObject_GetAttrString(exc, "__class__");
+ _Py_IDENTIFIER(__class__);
+ _Py_IDENTIFIER(__name__);
+ PyObject *type = _PyObject_GetAttrId(exc, &PyId___class__);
if (type != NULL) {
- PyObject *name = PyObject_GetAttrString(type, "__name__");
+ PyObject *name = _PyObject_GetAttrId(type, &PyId___name__);
Py_DECREF(type);
if (name != NULL) {
PyErr_Format(PyExc_TypeError,
@@ -506,57 +658,58 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
wrong_exception_type(exc);
return NULL;
}
- /* ouch: passing NULL, 0, pos gives None instead of u'' */
- return Py_BuildValue("(u#n)", &end, 0, end);
+ return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end);
}
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
{
- PyObject *restuple;
- Py_ssize_t start;
- Py_ssize_t end;
- Py_ssize_t i;
+ Py_ssize_t start, end, i, len;
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *res;
- Py_UNICODE *p;
+ int kind;
+ void *data;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
- res = PyUnicode_FromUnicode(NULL, end-start);
+ len = end - start;
+ res = PyUnicode_New(len, '?');
if (res == NULL)
return NULL;
- for (p = PyUnicode_AS_UNICODE(res), i = start;
- i<end; ++p, ++i)
- *p = '?';
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
- return restuple;
+ kind = PyUnicode_KIND(res);
+ data = PyUnicode_DATA(res);
+ for (i = 0; i < len; ++i)
+ PyUnicode_WRITE(kind, data, i, '?');
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ return Py_BuildValue("(Nn)", res, end);
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
- Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
if (PyUnicodeDecodeError_GetEnd(exc, &end))
return NULL;
- return Py_BuildValue("(u#n)", &res, 1, end);
+ return Py_BuildValue("(Cn)",
+ (int)Py_UNICODE_REPLACEMENT_CHARACTER,
+ end);
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
PyObject *res;
- Py_UNICODE *p;
+ int kind;
+ void *data;
if (PyUnicodeTranslateError_GetStart(exc, &start))
return NULL;
if (PyUnicodeTranslateError_GetEnd(exc, &end))
return NULL;
- res = PyUnicode_FromUnicode(NULL, end-start);
+ len = end - start;
+ res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
if (res == NULL)
return NULL;
- for (p = PyUnicode_AS_UNICODE(res), i = start;
- i<end; ++p, ++i)
- *p = Py_UNICODE_REPLACEMENT_CHARACTER;
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
- return restuple;
+ kind = PyUnicode_KIND(res);
+ data = PyUnicode_DATA(res);
+ for (i=0; i < len; i++)
+ PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ return Py_BuildValue("(Nn)", res, end);
}
else {
wrong_exception_type(exc);
@@ -569,82 +722,74 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *restuple;
PyObject *object;
+ Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
- Py_UNICODE *p;
- Py_UNICODE *startp;
- Py_UNICODE *outp;
- int ressize;
+ unsigned char *outp;
+ Py_ssize_t ressize;
+ Py_UCS4 ch;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- startp = PyUnicode_AS_UNICODE(object);
- for (p = startp+start, ressize = 0; p < startp+end; ++p) {
- if (*p<10)
+ if (end - start > PY_SSIZE_T_MAX / (2+7+1))
+ end = start + PY_SSIZE_T_MAX / (2+7+1);
+ for (i = start, ressize = 0; i < end; ++i) {
+ /* object is guaranteed to be "ready" */
+ ch = PyUnicode_READ_CHAR(object, i);
+ if (ch<10)
ressize += 2+1+1;
- else if (*p<100)
+ else if (ch<100)
ressize += 2+2+1;
- else if (*p<1000)
+ else if (ch<1000)
ressize += 2+3+1;
- else if (*p<10000)
+ else if (ch<10000)
ressize += 2+4+1;
-#ifndef Py_UNICODE_WIDE
- else
+ else if (ch<100000)
ressize += 2+5+1;
-#else
- else if (*p<100000)
- ressize += 2+5+1;
- else if (*p<1000000)
+ else if (ch<1000000)
ressize += 2+6+1;
else
ressize += 2+7+1;
-#endif
}
/* allocate replacement */
- res = PyUnicode_FromUnicode(NULL, ressize);
+ res = PyUnicode_New(ressize, 127);
if (res == NULL) {
Py_DECREF(object);
return NULL;
}
+ outp = PyUnicode_1BYTE_DATA(res);
/* generate replacement */
- for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
- p < startp+end; ++p) {
- Py_UNICODE c = *p;
+ for (i = start; i < end; ++i) {
int digits;
int base;
+ ch = PyUnicode_READ_CHAR(object, i);
*outp++ = '&';
*outp++ = '#';
- if (*p<10) {
+ if (ch<10) {
digits = 1;
base = 1;
}
- else if (*p<100) {
+ else if (ch<100) {
digits = 2;
base = 10;
}
- else if (*p<1000) {
+ else if (ch<1000) {
digits = 3;
base = 100;
}
- else if (*p<10000) {
+ else if (ch<10000) {
digits = 4;
base = 1000;
}
-#ifndef Py_UNICODE_WIDE
- else {
+ else if (ch<100000) {
digits = 5;
base = 10000;
}
-#else
- else if (*p<100000) {
- digits = 5;
- base = 10000;
- }
- else if (*p<1000000) {
+ else if (ch<1000000) {
digits = 6;
base = 100000;
}
@@ -652,16 +797,15 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
digits = 7;
base = 1000000;
}
-#endif
while (digits-->0) {
- *outp++ = '0' + c/base;
- c %= base;
+ *outp++ = '0' + ch/base;
+ ch %= base;
base /= 10;
}
*outp++ = ';';
}
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ restuple = Py_BuildValue("(Nn)", res, end);
Py_DECREF(object);
return restuple;
}
@@ -671,87 +815,67 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
}
}
-static Py_UNICODE hexdigits[] = {
- '0', '1', '2', '3', '4', '5', '6', '7',
- '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
-};
-
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
-#ifndef Py_UNICODE_WIDE
-#define IS_SURROGATE_PAIR(p, end) \
- (*p >= 0xD800 && *p <= 0xDBFF && (p + 1) < end && \
- *(p + 1) >= 0xDC00 && *(p + 1) <= 0xDFFF)
-#else
-#define IS_SURROGATE_PAIR(p, end) 0
-#endif
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *restuple;
PyObject *object;
+ Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
- Py_UNICODE *p;
- Py_UNICODE *startp;
- Py_UNICODE *outp;
- int ressize;
+ unsigned char *outp;
+ Py_ssize_t ressize;
+ Py_UCS4 c;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- startp = PyUnicode_AS_UNICODE(object);
- for (p = startp+start, ressize = 0; p < startp+end; ++p) {
-#ifdef Py_UNICODE_WIDE
- if (*p >= 0x00010000)
+ if (end - start > PY_SSIZE_T_MAX / (1+1+8))
+ end = start + PY_SSIZE_T_MAX / (1+1+8);
+ for (i = start, ressize = 0; i < end; ++i) {
+ /* object is guaranteed to be "ready" */
+ c = PyUnicode_READ_CHAR(object, i);
+ if (c >= 0x10000) {
ressize += 1+1+8;
- else
-#endif
- if (*p >= 0x100) {
- if (IS_SURROGATE_PAIR(p, startp+end)) {
- ressize += 1+1+8;
- ++p;
- }
- else
- ressize += 1+1+4;
+ }
+ else if (c >= 0x100) {
+ ressize += 1+1+4;
}
else
ressize += 1+1+2;
}
- res = PyUnicode_FromUnicode(NULL, ressize);
+ res = PyUnicode_New(ressize, 127);
if (res==NULL)
return NULL;
- for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
- p < startp+end; ++p) {
- Py_UCS4 c = (Py_UCS4) *p;
+ for (i = start, outp = PyUnicode_1BYTE_DATA(res);
+ i < end; ++i) {
+ c = PyUnicode_READ_CHAR(object, i);
*outp++ = '\\';
- if (IS_SURROGATE_PAIR(p, startp+end)) {
- c = ((*p & 0x3FF) << 10) + (*(p + 1) & 0x3FF) + 0x10000;
- ++p;
- }
if (c >= 0x00010000) {
*outp++ = 'U';
- *outp++ = hexdigits[(c>>28)&0xf];
- *outp++ = hexdigits[(c>>24)&0xf];
- *outp++ = hexdigits[(c>>20)&0xf];
- *outp++ = hexdigits[(c>>16)&0xf];
- *outp++ = hexdigits[(c>>12)&0xf];
- *outp++ = hexdigits[(c>>8)&0xf];
+ *outp++ = Py_hexdigits[(c>>28)&0xf];
+ *outp++ = Py_hexdigits[(c>>24)&0xf];
+ *outp++ = Py_hexdigits[(c>>20)&0xf];
+ *outp++ = Py_hexdigits[(c>>16)&0xf];
+ *outp++ = Py_hexdigits[(c>>12)&0xf];
+ *outp++ = Py_hexdigits[(c>>8)&0xf];
}
else if (c >= 0x100) {
*outp++ = 'u';
- *outp++ = hexdigits[(c>>12)&0xf];
- *outp++ = hexdigits[(c>>8)&0xf];
+ *outp++ = Py_hexdigits[(c>>12)&0xf];
+ *outp++ = Py_hexdigits[(c>>8)&0xf];
}
else
*outp++ = 'x';
- *outp++ = hexdigits[(c>>4)&0xf];
- *outp++ = hexdigits[c&0xf];
+ *outp++ = Py_hexdigits[(c>>4)&0xf];
+ *outp++ = Py_hexdigits[c&0xf];
}
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ restuple = Py_BuildValue("(Nn)", res, end);
Py_DECREF(object);
return restuple;
}
@@ -759,7 +883,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
wrong_exception_type(exc);
return NULL;
}
-#undef IS_SURROGATE_PAIR
}
/* This handler is declared static until someone demonstrates
@@ -769,12 +892,11 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
{
PyObject *restuple;
PyObject *object;
+ Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
- Py_UNICODE *p;
- Py_UNICODE *startp;
char *outp;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
@@ -782,15 +904,15 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- startp = PyUnicode_AS_UNICODE(object);
res = PyBytes_FromStringAndSize(NULL, 3*(end-start));
if (!res) {
Py_DECREF(object);
return NULL;
}
outp = PyBytes_AsString(res);
- for (p = startp+start; p < startp+end; p++) {
- Py_UNICODE ch = *p;
+ for (i = start; i < end; i++) {
+ /* object is guaranteed to be "ready" */
+ Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
if (ch < 0xd800 || ch > 0xdfff) {
/* Not a surrogate, fail with original exception */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
@@ -809,7 +931,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
unsigned char *p;
- Py_UNICODE ch = 0;
+ Py_UCS4 ch = 0;
if (PyUnicodeDecodeError_GetStart(exc, &start))
return NULL;
if (!(object = PyUnicodeDecodeError_GetObject(exc)))
@@ -836,7 +958,10 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
return NULL;
}
- return Py_BuildValue("(u#n)", &ch, 1, start+3);
+ res = PyUnicode_FromOrdinal(ch);
+ if (res == NULL)
+ return NULL;
+ return Py_BuildValue("(Nn)", res, start+3);
}
else {
wrong_exception_type(exc);
@@ -849,12 +974,11 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
{
PyObject *restuple;
PyObject *object;
+ Py_ssize_t i;
Py_ssize_t start;
Py_ssize_t end;
PyObject *res;
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
- Py_UNICODE *p;
- Py_UNICODE *startp;
char *outp;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
@@ -862,15 +986,15 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
- startp = PyUnicode_AS_UNICODE(object);
res = PyBytes_FromStringAndSize(NULL, end-start);
if (!res) {
Py_DECREF(object);
return NULL;
}
outp = PyBytes_AsString(res);
- for (p = startp+start; p < startp+end; p++) {
- Py_UNICODE ch = *p;
+ for (i = start; i < end; i++) {
+ /* object is guaranteed to be "ready" */
+ Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
if (ch < 0xdc80 || ch > 0xdcff) {
/* Not a UTF-8b surrogate, fail with original exception */
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
@@ -886,8 +1010,9 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
return restuple;
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+ PyObject *str;
unsigned char *p;
- Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */
+ Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
int consumed = 0;
if (PyUnicodeDecodeError_GetStart(exc, &start))
return NULL;
@@ -912,7 +1037,10 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
return NULL;
}
- return Py_BuildValue("(u#n)", ch, consumed, start+consumed);
+ str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
+ if (str == NULL)
+ return NULL;
+ return Py_BuildValue("(Nn)", str, start+consumed);
}
else {
wrong_exception_type(exc);
@@ -1049,7 +1177,7 @@ static int _PyCodecRegistry_Init(void)
interp->codec_error_registry = PyDict_New();
if (interp->codec_error_registry) {
- for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
+ for (i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
PyObject *func = PyCFunction_New(&methods[i].def, NULL);
int res;
if (!func)