From d1d013c01c268d869597b35cbcd8b5d7c5baf2ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= <martin@v.loewis.de>
Date: Wed, 28 Sep 2011 07:41:54 +0200
Subject: Implement PEP 393.

---
 Python/codecs.c | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 1a3e45774c..be2e8334c6 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -513,27 +513,25 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
 
 PyObject *PyCodec_ReplaceErrors(PyObject *exc)
 {
-    PyObject *restuple;
-    Py_ssize_t start;
-    Py_ssize_t end;
-    Py_ssize_t i;
+    Py_ssize_t start, end, i, len;
 
     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
         PyObject *res;
-        Py_UNICODE *p;
+        int kind;
+        void *data;
         if (PyUnicodeEncodeError_GetStart(exc, &start))
             return NULL;
         if (PyUnicodeEncodeError_GetEnd(exc, &end))
             return NULL;
-        res = PyUnicode_FromUnicode(NULL, end-start);
+        len = end - start;
+        res = PyUnicode_New(len, '?');
         if (res == NULL)
             return NULL;
-        for (p = PyUnicode_AS_UNICODE(res), i = start;
-            i<end; ++p, ++i)
-            *p = '?';
-        restuple = Py_BuildValue("(On)", res, end);
-        Py_DECREF(res);
-        return restuple;
+        kind = PyUnicode_KIND(res);
+        data = PyUnicode_DATA(res);
+        for (i = 0; i < len; ++i)
+            PyUnicode_WRITE(kind, data, i, '?');
+        return Py_BuildValue("(Nn)", res, end);
     }
     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
         Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
@@ -543,20 +541,21 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
     }
     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
         PyObject *res;
-        Py_UNICODE *p;
+        int kind;
+        void *data;
         if (PyUnicodeTranslateError_GetStart(exc, &start))
             return NULL;
         if (PyUnicodeTranslateError_GetEnd(exc, &end))
             return NULL;
-        res = PyUnicode_FromUnicode(NULL, end-start);
+        len = end - start;
+        res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
         if (res == NULL)
             return NULL;
-        for (p = PyUnicode_AS_UNICODE(res), i = start;
-            i<end; ++p, ++i)
-            *p = Py_UNICODE_REPLACEMENT_CHARACTER;
-        restuple = Py_BuildValue("(On)", res, end);
-        Py_DECREF(res);
-        return restuple;
+        kind = PyUnicode_KIND(res);
+        data = PyUnicode_DATA(res);
+        for (i=0; i < len; i++)
+            PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
+        return Py_BuildValue("(Nn)", res, end);
     }
     else {
         wrong_exception_type(exc);
@@ -671,10 +670,7 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
     }
 }
 
-static Py_UNICODE hexdigits[] = {
-    '0', '1', '2', '3', '4', '5', '6', '7',
-    '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
-};
+static const char *hexdigits = "0123456789abcdef";
 
 PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
 {
-- 
cgit v1.2.1


From 687c3b16f72a6b2b64971c1c27e33e27f001ede8 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@haypocalc.com>
Date: Thu, 29 Sep 2011 00:42:28 +0200
Subject: Use the new Py_ARRAY_LENGTH macro

---
 Python/codecs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index be2e8334c6..4c2fc5d722 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -1044,7 +1044,7 @@ static int _PyCodecRegistry_Init(void)
     interp->codec_error_registry = PyDict_New();
 
     if (interp->codec_error_registry) {
-        for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
+        for (i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
             PyObject *func = PyCFunction_New(&methods[i].def, NULL);
             int res;
             if (!func)
-- 
cgit v1.2.1


From ca68997fb8ada5c2a95caaf984f29bf02732336a Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@haypocalc.com>
Date: Sun, 2 Oct 2011 19:00:15 +0200
Subject: PyCodec_ReplaceErrors() uses "C" format instead of "u#" to build
 result

---
 Python/codecs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 4c2fc5d722..0008bdcbdc 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -534,10 +534,11 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
         return Py_BuildValue("(Nn)", res, end);
     }
     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
-        Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
         if (PyUnicodeDecodeError_GetEnd(exc, &end))
             return NULL;
-        return Py_BuildValue("(u#n)", &res, 1, end);
+        return Py_BuildValue("(Cn)",
+                             (int)Py_UNICODE_REPLACEMENT_CHARACTER,
+                             end);
     }
     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
         PyObject *res;
-- 
cgit v1.2.1


From c3eb93fe1ce5f5ff2666fb9f34a6fd7c46279554 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= <martin@v.loewis.de>
Date: Mon, 10 Oct 2011 18:11:30 +0200
Subject: Use identifier API for PyObject_GetAttrString.

---
 Python/codecs.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 0008bdcbdc..79dfe89aba 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -465,9 +465,11 @@ PyObject *PyCodec_LookupError(const char *name)
 
 static void wrong_exception_type(PyObject *exc)
 {
-    PyObject *type = PyObject_GetAttrString(exc, "__class__");
+    _Py_identifier(__class__);
+    _Py_identifier(__name__);
+    PyObject *type = _PyObject_GetAttrId(exc, &PyId___class__);
     if (type != NULL) {
-        PyObject *name = PyObject_GetAttrString(type, "__name__");
+        PyObject *name = _PyObject_GetAttrId(type, &PyId___name__);
         Py_DECREF(type);
         if (name != NULL) {
             PyErr_Format(PyExc_TypeError,
-- 
cgit v1.2.1


From da328a42858356139f9e6de860b802408ed76f5d Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@haypocalc.com>
Date: Fri, 14 Oct 2011 02:13:11 +0200
Subject: Issue #13088: Add shared Py_hexdigits constant to format a number
 into base 16

---
 Python/codecs.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 79dfe89aba..006d288b70 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -11,6 +11,8 @@ Copyright (c) Corporation for National Research Initiatives.
 #include "Python.h"
 #include <ctype.h>
 
+const char *Py_hexdigits = "0123456789abcdef";
+
 /* --- Codec Registry ----------------------------------------------------- */
 
 /* Import the standard encodings package which will register the first
@@ -673,8 +675,6 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
     }
 }
 
-static const char *hexdigits = "0123456789abcdef";
-
 PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
 {
 #ifndef Py_UNICODE_WIDE
@@ -731,22 +731,22 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
             }
             if (c >= 0x00010000) {
                 *outp++ = 'U';
-                *outp++ = hexdigits[(c>>28)&0xf];
-                *outp++ = hexdigits[(c>>24)&0xf];
-                *outp++ = hexdigits[(c>>20)&0xf];
-                *outp++ = hexdigits[(c>>16)&0xf];
-                *outp++ = hexdigits[(c>>12)&0xf];
-                *outp++ = hexdigits[(c>>8)&0xf];
+                *outp++ = Py_hexdigits[(c>>28)&0xf];
+                *outp++ = Py_hexdigits[(c>>24)&0xf];
+                *outp++ = Py_hexdigits[(c>>20)&0xf];
+                *outp++ = Py_hexdigits[(c>>16)&0xf];
+                *outp++ = Py_hexdigits[(c>>12)&0xf];
+                *outp++ = Py_hexdigits[(c>>8)&0xf];
             }
             else if (c >= 0x100) {
                 *outp++ = 'u';
-                *outp++ = hexdigits[(c>>12)&0xf];
-                *outp++ = hexdigits[(c>>8)&0xf];
+                *outp++ = Py_hexdigits[(c>>12)&0xf];
+                *outp++ = Py_hexdigits[(c>>8)&0xf];
             }
             else
                 *outp++ = 'x';
-            *outp++ = hexdigits[(c>>4)&0xf];
-            *outp++ = hexdigits[c&0xf];
+            *outp++ = Py_hexdigits[(c>>4)&0xf];
+            *outp++ = Py_hexdigits[c&0xf];
         }
 
         restuple = Py_BuildValue("(On)", res, end);
-- 
cgit v1.2.1


From f5ad3b280b43227eb0e3fa63d89490a58ba9c28b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= <martin@v.loewis.de>
Date: Fri, 14 Oct 2011 10:20:37 +0200
Subject: Rename _Py_identifier to _Py_IDENTIFIER.

---
 Python/codecs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 006d288b70..eca2b88bfb 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -467,8 +467,8 @@ PyObject *PyCodec_LookupError(const char *name)
 
 static void wrong_exception_type(PyObject *exc)
 {
-    _Py_identifier(__class__);
-    _Py_identifier(__name__);
+    _Py_IDENTIFIER(__class__);
+    _Py_IDENTIFIER(__name__);
     PyObject *type = _PyObject_GetAttrId(exc, &PyId___class__);
     if (type != NULL) {
         PyObject *name = _PyObject_GetAttrId(type, &PyId___name__);
-- 
cgit v1.2.1


From e30694e4817ae7ff2c1502e57ad2ea7f293dfe21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= <martin@v.loewis.de>
Date: Fri, 4 Nov 2011 11:16:41 +0100
Subject: Port error handlers from Py_UNICODE indexing to code point indexing.

---
 Python/codecs.c | 123 +++++++++++++++++++++-----------------------------------
 1 file changed, 46 insertions(+), 77 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index eca2b88bfb..47cdb20add 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -573,82 +573,72 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
         PyObject *restuple;
         PyObject *object;
+        Py_ssize_t i, o;
         Py_ssize_t start;
         Py_ssize_t end;
         PyObject *res;
-        Py_UNICODE *p;
-        Py_UNICODE *startp;
-        Py_UNICODE *outp;
+        unsigned char *outp;
         int ressize;
+        Py_UCS4 ch;
         if (PyUnicodeEncodeError_GetStart(exc, &start))
             return NULL;
         if (PyUnicodeEncodeError_GetEnd(exc, &end))
             return NULL;
         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
             return NULL;
-        startp = PyUnicode_AS_UNICODE(object);
-        for (p = startp+start, ressize = 0; p < startp+end; ++p) {
-            if (*p<10)
+        for (i = start, ressize = 0; i < end; ++i) {
+            /* object is guaranteed to be "ready" */
+            ch = PyUnicode_READ_CHAR(object, i);
+            if (ch<10)
                 ressize += 2+1+1;
-            else if (*p<100)
+            else if (ch<100)
                 ressize += 2+2+1;
-            else if (*p<1000)
+            else if (ch<1000)
                 ressize += 2+3+1;
-            else if (*p<10000)
+            else if (ch<10000)
                 ressize += 2+4+1;
-#ifndef Py_UNICODE_WIDE
-            else
-                ressize += 2+5+1;
-#else
-            else if (*p<100000)
+            else if (ch<100000)
                 ressize += 2+5+1;
-            else if (*p<1000000)
+            else if (ch<1000000)
                 ressize += 2+6+1;
             else
                 ressize += 2+7+1;
-#endif
         }
         /* allocate replacement */
-        res = PyUnicode_FromUnicode(NULL, ressize);
+        res = PyUnicode_New(ressize, 127);
         if (res == NULL) {
             Py_DECREF(object);
             return NULL;
         }
+        outp = PyUnicode_1BYTE_DATA(res);
         /* generate replacement */
-        for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
-            p < startp+end; ++p) {
-            Py_UNICODE c = *p;
+        for (i = start, o = 0; i < end; ++i) {
+            ch = PyUnicode_READ_CHAR(object, i);
             int digits;
             int base;
             *outp++ = '&';
             *outp++ = '#';
-            if (*p<10) {
+            if (ch<10) {
                 digits = 1;
                 base = 1;
             }
-            else if (*p<100) {
+            else if (ch<100) {
                 digits = 2;
                 base = 10;
             }
-            else if (*p<1000) {
+            else if (ch<1000) {
                 digits = 3;
                 base = 100;
             }
-            else if (*p<10000) {
+            else if (ch<10000) {
                 digits = 4;
                 base = 1000;
             }
-#ifndef Py_UNICODE_WIDE
-            else {
-                digits = 5;
-                base = 10000;
-            }
-#else
-            else if (*p<100000) {
+            else if (ch<100000) {
                 digits = 5;
                 base = 10000;
             }
-            else if (*p<1000000) {
+            else if (ch<1000000) {
                 digits = 6;
                 base = 100000;
             }
@@ -656,10 +646,9 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
                 digits = 7;
                 base = 1000000;
             }
-#endif
             while (digits-->0) {
-                *outp++ = '0' + c/base;
-                c %= base;
+                *outp++ = '0' + ch/base;
+                ch %= base;
                 base /= 10;
             }
             *outp++ = ';';
@@ -677,58 +666,41 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
 
 PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
 {
-#ifndef Py_UNICODE_WIDE
-#define IS_SURROGATE_PAIR(p, end) \
-    (*p >= 0xD800 && *p <= 0xDBFF && (p + 1) < end && \
-     *(p + 1) >= 0xDC00 && *(p + 1) <= 0xDFFF)
-#else
-#define IS_SURROGATE_PAIR(p, end) 0
-#endif
     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
         PyObject *restuple;
         PyObject *object;
+        Py_ssize_t i;
         Py_ssize_t start;
         Py_ssize_t end;
         PyObject *res;
-        Py_UNICODE *p;
-        Py_UNICODE *startp;
-        Py_UNICODE *outp;
+        unsigned char *outp;
         int ressize;
+        Py_UCS4 c;
         if (PyUnicodeEncodeError_GetStart(exc, &start))
             return NULL;
         if (PyUnicodeEncodeError_GetEnd(exc, &end))
             return NULL;
         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
             return NULL;
-        startp = PyUnicode_AS_UNICODE(object);
-        for (p = startp+start, ressize = 0; p < startp+end; ++p) {
-#ifdef Py_UNICODE_WIDE
-            if (*p >= 0x00010000)
+        for (i = start, ressize = 0; i < end; ++i) {
+            /* object is guaranteed to be "ready" */
+            c = PyUnicode_READ_CHAR(object, i);
+            if (c >= 0x10000) {
                 ressize += 1+1+8;
-            else
-#endif
-            if (*p >= 0x100) {
-                if (IS_SURROGATE_PAIR(p, startp+end)) {
-                    ressize += 1+1+8;
-                    ++p;
-                }
-                else
-                    ressize += 1+1+4;
+            }
+            else if (c >= 0x100) {
+                ressize += 1+1+4;
             }
             else
                 ressize += 1+1+2;
         }
-        res = PyUnicode_FromUnicode(NULL, ressize);
+        res = PyUnicode_New(ressize, 127);
         if (res==NULL)
             return NULL;
-        for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
-            p < startp+end; ++p) {
-            Py_UCS4 c = (Py_UCS4) *p;
+        for (i = start, outp = PyUnicode_1BYTE_DATA(res);
+            i < end; ++i) {
+            c = PyUnicode_READ_CHAR(object, i);
             *outp++ = '\\';
-            if (IS_SURROGATE_PAIR(p, startp+end)) {
-                c = ((*p & 0x3FF) << 10) + (*(p + 1) & 0x3FF) + 0x10000;
-                ++p;
-            }
             if (c >= 0x00010000) {
                 *outp++ = 'U';
                 *outp++ = Py_hexdigits[(c>>28)&0xf];
@@ -758,7 +730,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
         wrong_exception_type(exc);
         return NULL;
     }
-#undef IS_SURROGATE_PAIR
 }
 
 /* This handler is declared static until someone demonstrates
@@ -768,12 +739,11 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
 {
     PyObject *restuple;
     PyObject *object;
+    Py_ssize_t i;
     Py_ssize_t start;
     Py_ssize_t end;
     PyObject *res;
     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
-        Py_UNICODE *p;
-        Py_UNICODE *startp;
         char *outp;
         if (PyUnicodeEncodeError_GetStart(exc, &start))
             return NULL;
@@ -781,15 +751,15 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
             return NULL;
         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
             return NULL;
-        startp = PyUnicode_AS_UNICODE(object);
         res = PyBytes_FromStringAndSize(NULL, 3*(end-start));
         if (!res) {
             Py_DECREF(object);
             return NULL;
         }
         outp = PyBytes_AsString(res);
-        for (p = startp+start; p < startp+end; p++) {
-            Py_UNICODE ch = *p;
+        for (i = start; i < end; i++) {
+            /* object is guaranteed to be "ready" */
+            Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
             if (ch < 0xd800 || ch > 0xdfff) {
                 /* Not a surrogate, fail with original exception */
                 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
@@ -847,12 +817,11 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
 {
     PyObject *restuple;
     PyObject *object;
+    Py_ssize_t i;
     Py_ssize_t start;
     Py_ssize_t end;
     PyObject *res;
     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
-        Py_UNICODE *p;
-        Py_UNICODE *startp;
         char *outp;
         if (PyUnicodeEncodeError_GetStart(exc, &start))
             return NULL;
@@ -860,15 +829,15 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
             return NULL;
         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
             return NULL;
-        startp = PyUnicode_AS_UNICODE(object);
         res = PyBytes_FromStringAndSize(NULL, end-start);
         if (!res) {
             Py_DECREF(object);
             return NULL;
         }
         outp = PyBytes_AsString(res);
-        for (p = startp+start; p < startp+end; p++) {
-            Py_UNICODE ch = *p;
+        for (i = start; i < end; i++) {
+            /* object is guaranteed to be "ready" */
+            Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
             if (ch < 0xdc80 || ch > 0xdcff) {
                 /* Not a UTF-8b surrogate, fail with original exception */
                 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
-- 
cgit v1.2.1


From 762eb600184fb9894e88be2d36aaeecc8d8d5c6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= <martin@v.loewis.de>
Date: Fri, 4 Nov 2011 12:26:49 +0100
Subject: Fix C89 incompatibility.

---
 Python/codecs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 47cdb20add..dcc7c988d6 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -613,9 +613,9 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
         outp = PyUnicode_1BYTE_DATA(res);
         /* generate replacement */
         for (i = start, o = 0; i < end; ++i) {
-            ch = PyUnicode_READ_CHAR(object, i);
             int digits;
             int base;
+            ch = PyUnicode_READ_CHAR(object, i);
             *outp++ = '&';
             *outp++ = '#';
             if (ch<10) {
-- 
cgit v1.2.1


From caec74946ceabba5ddd3fa3d57eae053ca2f3767 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@haypocalc.com>
Date: Fri, 4 Nov 2011 21:29:10 +0100
Subject: PyCodec_XMLCharRefReplaceError(): Remove unused variable

---
 Python/codecs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index dcc7c988d6..07fe2eee2c 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -573,7 +573,7 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
         PyObject *restuple;
         PyObject *object;
-        Py_ssize_t i, o;
+        Py_ssize_t i;
         Py_ssize_t start;
         Py_ssize_t end;
         PyObject *res;
@@ -612,7 +612,7 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
         }
         outp = PyUnicode_1BYTE_DATA(res);
         /* generate replacement */
-        for (i = start, o = 0; i < end; ++i) {
+        for (i = start; i < end; ++i) {
             int digits;
             int base;
             ch = PyUnicode_READ_CHAR(object, i);
-- 
cgit v1.2.1


From 5e0e358d806886b550069eaa2235d8654ac762db Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@haypocalc.com>
Date: Fri, 4 Nov 2011 21:36:35 +0100
Subject: Avoid the Py_UNICODE type in codecs.c

---
 Python/codecs.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 07fe2eee2c..cdf2c4455a 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -778,7 +778,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
     }
     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
         unsigned char *p;
-        Py_UNICODE ch = 0;
+        Py_UCS4 ch = 0;
         if (PyUnicodeDecodeError_GetStart(exc, &start))
             return NULL;
         if (!(object = PyUnicodeDecodeError_GetObject(exc)))
@@ -804,7 +804,10 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
             PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
             return NULL;
         }
-        return Py_BuildValue("(u#n)", &ch, 1, start+3);
+        res = PyUnicode_FromOrdinal(ch);
+        if (res == NULL)
+            return NULL;
+        return Py_BuildValue("(Nn)", res, start+3);
     }
     else {
         wrong_exception_type(exc);
@@ -853,8 +856,9 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
         return restuple;
     }
     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+        PyObject *str;
         unsigned char *p;
-        Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */
+        Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
         int consumed = 0;
         if (PyUnicodeDecodeError_GetStart(exc, &start))
             return NULL;
@@ -879,7 +883,10 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
             PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
             return NULL;
         }
-        return Py_BuildValue("(u#n)", ch, consumed, start+consumed);
+        str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
+        if (str == NULL)
+            return NULL;
+        return Py_BuildValue("(Nn)", str, start+consumed);
     }
     else {
         wrong_exception_type(exc);
-- 
cgit v1.2.1


From ba2c39f9d3821cb88b0ebab7bb40fa48405e78a5 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@haypocalc.com>
Date: Thu, 1 Dec 2011 02:52:11 +0100
Subject: PyCodec_IgnoreErrors() avoids the deprecated "u#" format

---
 Python/codecs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index cdf2c4455a..9b0c4b2f44 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -510,8 +510,7 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
         wrong_exception_type(exc);
         return NULL;
     }
-    /* ouch: passing NULL, 0, pos gives None instead of u'' */
-    return Py_BuildValue("(u#n)", &end, 0, end);
+    return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end);
 }
 
 
-- 
cgit v1.2.1


From 2555301e0aafefea124fe79f268207bc752a4722 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Fri, 27 Apr 2012 13:55:39 +0200
Subject: Check newly created consistency using
 _PyUnicode_CheckConsistency(str, 1)

 * In debug mode, fill the string data with invalid characters
 * Simplify also reference counting in PyCodec_BackslashReplaceErrors()
   and PyCodec_XMLCharRefReplaceError()
---
 Python/codecs.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 607feea81c..797a45f5a1 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -534,6 +534,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
         data = PyUnicode_DATA(res);
         for (i = 0; i < len; ++i)
             PyUnicode_WRITE(kind, data, i, '?');
+        assert(_PyUnicode_CheckConsistency(res, 1));
         return Py_BuildValue("(Nn)", res, end);
     }
     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
@@ -559,6 +560,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
         data = PyUnicode_DATA(res);
         for (i=0; i < len; i++)
             PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
+        assert(_PyUnicode_CheckConsistency(res, 1));
         return Py_BuildValue("(Nn)", res, end);
     }
     else {
@@ -652,8 +654,8 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
             }
             *outp++ = ';';
         }
-        restuple = Py_BuildValue("(On)", res, end);
-        Py_DECREF(res);
+        assert(_PyUnicode_CheckConsistency(res, 1));
+        restuple = Py_BuildValue("(Nn)", res, end);
         Py_DECREF(object);
         return restuple;
     }
@@ -720,8 +722,8 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
             *outp++ = Py_hexdigits[c&0xf];
         }
 
-        restuple = Py_BuildValue("(On)", res, end);
-        Py_DECREF(res);
+        assert(_PyUnicode_CheckConsistency(res, 1));
+        restuple = Py_BuildValue("(Nn)", res, end);
         Py_DECREF(object);
         return restuple;
     }
-- 
cgit v1.2.1