From 4d0ebf454346e70e5b7c7803ab7939f7c12a2b39 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Tue, 30 Oct 2012 01:42:39 +0100
Subject: Issue #16330: Use surrogate-related macros

Patch written by Serhiy Storchaka.
---
 Python/codecs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 5470500577..5cfb1c9001 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -761,7 +761,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
         for (i = start; i < end; i++) {
             /* object is guaranteed to be "ready" */
             Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
-            if (ch < 0xd800 || ch > 0xdfff) {
+            if (!Py_UNICODE_IS_SURROGATE(ch)) {
                 /* Not a surrogate, fail with original exception */
                 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
                 Py_DECREF(res);
@@ -797,7 +797,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
              (p[2] & 0xc0) == 0x80)) {
             /* it's a three-byte code */
             ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
-            if (ch < 0xd800 || ch > 0xdfff)
+            if (!Py_UNICODE_IS_SURROGATE(ch))
                 /* it's not a surrogate - fail */
                 ch = 0;
         }
-- 
cgit v1.2.1


From a40d6e3ce9422ce472ac31cc4b8eabf66e5b0bac Mon Sep 17 00:00:00 2001
From: Andrew Svetlov <andrew.svetlov@gmail.com>
Date: Tue, 25 Dec 2012 13:32:35 +0200
Subject: Issue #15422: get rid of PyCFunction_New macro

---
 Python/codecs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 37ae41b1ca..8d9ce6f496 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -1026,7 +1026,7 @@ static int _PyCodecRegistry_Init(void)
 
     if (interp->codec_error_registry) {
         for (i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
-            PyObject *func = PyCFunction_New(&methods[i].def, NULL);
+            PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL);
             int res;
             if (!func)
                 Py_FatalError("can't initialize codec error registry");
-- 
cgit v1.2.1


From d8f2b1a5c5a9bdf30a0950ee96de960449346355 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Fri, 12 Jul 2013 00:02:55 +0200
Subject: Issue #18408: normalizestring() now raises MemoryError on memory
 allocation failure

---
 Python/codecs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 8d9ce6f496..899f0aa748 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -65,7 +65,7 @@ PyObject *normalizestring(const char *string)
 
     p = PyMem_Malloc(len + 1);
     if (p == NULL)
-        return NULL;
+        return PyErr_NoMemory();
     for (i = 0; i < len; i++) {
         register char ch = string[i];
         if (ch == ' ')
-- 
cgit v1.2.1


From a12be6ae60b6a559296d637e558408d4bc2092a5 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <solipsis@pitrou.net>
Date: Tue, 13 Aug 2013 20:18:52 +0200
Subject: Issue #18722: Remove uses of the "register" keyword in C code.

---
 Python/codecs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 899f0aa748..cb9f0d8a70 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -53,7 +53,7 @@ int PyCodec_Register(PyObject *search_function)
 static
 PyObject *normalizestring(const char *string)
 {
-    register size_t i;
+    size_t i;
     size_t len = strlen(string);
     char *p;
     PyObject *v;
@@ -67,7 +67,7 @@ PyObject *normalizestring(const char *string)
     if (p == NULL)
         return PyErr_NoMemory();
     for (i = 0; i < len; i++) {
-        register char ch = string[i];
+        char ch = string[i];
         if (ch == ' ')
             ch = '-';
         else
-- 
cgit v1.2.1


From 6e75c19396402b107f1dfffa8746c6b8b679a9a4 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 19 Oct 2013 21:03:34 +0300
Subject: Issue #1772673: The type of `char*` arguments now changed to `const
 char*`.

---
 Python/codecs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index cb9f0d8a70..c541ba02b1 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -441,7 +441,7 @@ int PyCodec_RegisterError(const char *name, PyObject *error)
         return -1;
     }
     return PyDict_SetItemString(interp->codec_error_registry,
-                                (char *)name, error);
+                                name, error);
 }
 
 /* Lookup the error handling callback function registered under the
@@ -457,7 +457,7 @@ PyObject *PyCodec_LookupError(const char *name)
 
     if (name==NULL)
         name = "strict";
-    handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
+    handler = PyDict_GetItemString(interp->codec_error_registry, name);
     if (!handler)
         PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
     else
-- 
cgit v1.2.1


From 644dcdc965cc26de0542280e625dbe6cd1a070ed Mon Sep 17 00:00:00 2001
From: Nick Coghlan <ncoghlan@gmail.com>
Date: Wed, 13 Nov 2013 23:49:21 +1000
Subject: Close #17828: better handling of codec errors

- output type errors now redirect users to the type-neutral
  convenience functions in the codecs module
- stateless errors that occur during encoding and decoding
  will now be automatically wrapped in exceptions that give
  the name of the codec involved
---
 Python/codecs.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index c541ba02b1..e2edc269a8 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -332,6 +332,22 @@ PyObject *PyCodec_StreamWriter(const char *encoding,
     return codec_getstreamcodec(encoding, stream, errors, 3);
 }
 
+/* Helper that tries to ensure the reported exception chain indicates the
+ * codec that was invoked to trigger the failure without changing the type
+ * of the exception raised.
+ */
+static void
+wrap_codec_error(const char *operation,
+                 const char *encoding)
+{
+    /* TrySetFromCause will replace the active exception with a suitably
+     * updated clone if it can, otherwise it will leave the original
+     * exception alone.
+     */
+    _PyErr_TrySetFromCause("%s with '%s' codec failed",
+                           operation, encoding);
+}
+
 /* Encode an object (e.g. an Unicode object) using the given encoding
    and return the resulting encoded object (usually a Python string).
 
@@ -376,6 +392,7 @@ PyObject *PyCodec_Encode(PyObject *object,
     Py_XDECREF(result);
     Py_XDECREF(args);
     Py_XDECREF(encoder);
+    wrap_codec_error("encoding", encoding);
     return NULL;
 }
 
@@ -422,6 +439,7 @@ PyObject *PyCodec_Decode(PyObject *object,
     Py_XDECREF(args);
     Py_XDECREF(decoder);
     Py_XDECREF(result);
+    wrap_codec_error("decoding", encoding);
     return NULL;
 }
 
-- 
cgit v1.2.1


From a3a1673d825f9d6711c1fe1571d575dc4934358b Mon Sep 17 00:00:00 2001
From: Nick Coghlan <ncoghlan@gmail.com>
Date: Fri, 15 Nov 2013 21:47:37 +1000
Subject: Close 19609: narrow scope of codec exc chaining

---
 Python/codecs.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index e2edc269a8..fe0cab4f7f 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -370,8 +370,10 @@ PyObject *PyCodec_Encode(PyObject *object,
         goto onError;
 
     result = PyEval_CallObject(encoder, args);
-    if (result == NULL)
+    if (result == NULL) {
+        wrap_codec_error("encoding", encoding);
         goto onError;
+    }
 
     if (!PyTuple_Check(result) ||
         PyTuple_GET_SIZE(result) != 2) {
@@ -392,7 +394,6 @@ PyObject *PyCodec_Encode(PyObject *object,
     Py_XDECREF(result);
     Py_XDECREF(args);
     Py_XDECREF(encoder);
-    wrap_codec_error("encoding", encoding);
     return NULL;
 }
 
@@ -418,8 +419,10 @@ PyObject *PyCodec_Decode(PyObject *object,
         goto onError;
 
     result = PyEval_CallObject(decoder,args);
-    if (result == NULL)
+    if (result == NULL) {
+        wrap_codec_error("decoding", encoding);
         goto onError;
+    }
     if (!PyTuple_Check(result) ||
         PyTuple_GET_SIZE(result) != 2) {
         PyErr_SetString(PyExc_TypeError,
@@ -439,7 +442,6 @@ PyObject *PyCodec_Decode(PyObject *object,
     Py_XDECREF(args);
     Py_XDECREF(decoder);
     Py_XDECREF(result);
-    wrap_codec_error("decoding", encoding);
     return NULL;
 }
 
-- 
cgit v1.2.1


From c1c39389d8a80767e05f84820e28caf528ba0f81 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 19 Nov 2013 11:32:41 +0200
Subject: Issue #12892: The utf-16* and utf-32* codecs now reject (lone)
 surrogates.

The utf-16* and utf-32* encoders no longer allow surrogate code points
(U+D800-U+DFFF) to be encoded.
The utf-32* decoders no longer decode byte sequences that correspond to
surrogate code points.
The surrogatepass error handler now works with the utf-16* and utf-32* codecs.

Based on patches by Victor Stinner and Kang-Hao (Kenny) Lu.
---
 Python/codecs.c | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 146 insertions(+), 17 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index fe0cab4f7f..8fe0af7bf0 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -753,6 +753,65 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
     }
 }
 
+#define ENC_UTF8        0
+#define ENC_UTF16BE     1
+#define ENC_UTF16LE     2
+#define ENC_UTF32BE     3
+#define ENC_UTF32LE     4
+
+static int
+get_standard_encoding(const char *encoding, int *bytelength)
+{
+    if (Py_TOLOWER(encoding[0]) == 'u' &&
+        Py_TOLOWER(encoding[1]) == 't' &&
+        Py_TOLOWER(encoding[2]) == 'f') {
+        encoding += 3;
+        if (*encoding == '-' || *encoding == '_' )
+            encoding++;
+        if (encoding[0] == '1' && encoding[1] == '6') {
+            encoding += 2;
+            *bytelength = 2;
+            if (*encoding == '\0') {
+#ifdef WORDS_BIGENDIAN
+                return ENC_UTF16BE;
+#else
+                return ENC_UTF16LE;
+#endif
+            }
+            if (*encoding == '-' || *encoding == '_' )
+                encoding++;
+            if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') {
+                if (Py_TOLOWER(encoding[0]) == 'b')
+                    return ENC_UTF16BE;
+                if (Py_TOLOWER(encoding[0]) == 'l')
+                    return ENC_UTF16LE;
+            }
+        }
+        else if (encoding[0] == '3' && encoding[1] == '2') {
+            encoding += 2;
+            *bytelength = 4;
+            if (*encoding == '\0') {
+#ifdef WORDS_BIGENDIAN
+                return ENC_UTF32BE;
+#else
+                return ENC_UTF32LE;
+#endif
+            }
+            if (*encoding == '-' || *encoding == '_' )
+                encoding++;
+            if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') {
+                if (Py_TOLOWER(encoding[0]) == 'b')
+                    return ENC_UTF32BE;
+                if (Py_TOLOWER(encoding[0]) == 'l')
+                    return ENC_UTF32LE;
+            }
+        }
+    }
+    /* utf-8 */
+    *bytelength = 3;
+    return ENC_UTF8;
+}
+
 /* This handler is declared static until someone demonstrates
    a need to call it directly. */
 static PyObject *
@@ -760,24 +819,40 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
 {
     PyObject *restuple;
     PyObject *object;
+    PyObject *encode;
+    char *encoding;
+    int code;
+    int bytelength;
     Py_ssize_t i;
     Py_ssize_t start;
     Py_ssize_t end;
     PyObject *res;
     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
-        char *outp;
+        unsigned char *outp;
         if (PyUnicodeEncodeError_GetStart(exc, &start))
             return NULL;
         if (PyUnicodeEncodeError_GetEnd(exc, &end))
             return NULL;
         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
             return NULL;
-        res = PyBytes_FromStringAndSize(NULL, 3*(end-start));
+        if (!(encode = PyUnicodeEncodeError_GetEncoding(exc))) {
+            Py_DECREF(object);
+            return NULL;
+        }
+        if (!(encoding = PyUnicode_AsUTF8(encode))) {
+            Py_DECREF(object);
+            Py_DECREF(encode);
+            return NULL;
+        }
+        code = get_standard_encoding(encoding, &bytelength);
+        Py_DECREF(encode);
+
+        res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start));
         if (!res) {
             Py_DECREF(object);
             return NULL;
         }
-        outp = PyBytes_AsString(res);
+        outp = (unsigned char*)PyBytes_AsString(res);
         for (i = start; i < end; i++) {
             /* object is guaranteed to be "ready" */
             Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
@@ -788,9 +863,33 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
                 Py_DECREF(object);
                 return NULL;
             }
-            *outp++ = (char)(0xe0 | (ch >> 12));
-            *outp++ = (char)(0x80 | ((ch >> 6) & 0x3f));
-            *outp++ = (char)(0x80 | (ch & 0x3f));
+            switch (code) {
+            case ENC_UTF8:
+                *outp++ = (unsigned char)(0xe0 | (ch >> 12));
+                *outp++ = (unsigned char)(0x80 | ((ch >> 6) & 0x3f));
+                *outp++ = (unsigned char)(0x80 | (ch & 0x3f));
+                break;
+            case ENC_UTF16LE:
+                *outp++ = (unsigned char) ch;
+                *outp++ = (unsigned char)(ch >> 8);
+                break;
+            case ENC_UTF16BE:
+                *outp++ = (unsigned char)(ch >> 8);
+                *outp++ = (unsigned char) ch;
+                break;
+            case ENC_UTF32LE:
+                *outp++ = (unsigned char) ch;
+                *outp++ = (unsigned char)(ch >> 8);
+                *outp++ = (unsigned char)(ch >> 16);
+                *outp++ = (unsigned char)(ch >> 24);
+                break;
+            case ENC_UTF32BE:
+                *outp++ = (unsigned char)(ch >> 24);
+                *outp++ = (unsigned char)(ch >> 16);
+                *outp++ = (unsigned char)(ch >> 8);
+                *outp++ = (unsigned char) ch;
+                break;
+            }
         }
         restuple = Py_BuildValue("(On)", res, end);
         Py_DECREF(res);
@@ -802,34 +901,64 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
         Py_UCS4 ch = 0;
         if (PyUnicodeDecodeError_GetStart(exc, &start))
             return NULL;
+        if (PyUnicodeDecodeError_GetEnd(exc, &end))
+            return NULL;
         if (!(object = PyUnicodeDecodeError_GetObject(exc)))
             return NULL;
         if (!(p = (unsigned char*)PyBytes_AsString(object))) {
             Py_DECREF(object);
             return NULL;
         }
+        if (!(encode = PyUnicodeDecodeError_GetEncoding(exc))) {
+            Py_DECREF(object);
+            return NULL;
+        }
+        if (!(encoding = PyUnicode_AsUTF8(encode))) {
+            Py_DECREF(object);
+            Py_DECREF(encode);
+            return NULL;
+        }
+        code = get_standard_encoding(encoding, &bytelength);
+        Py_DECREF(encode);
+
         /* Try decoding a single surrogate character. If
            there are more, let the codec call us again. */
         p += start;
-        if (PyBytes_GET_SIZE(object) - start >= 3 &&
-            (p[0] & 0xf0) == 0xe0 &&
-            (p[1] & 0xc0) == 0x80 &&
-            (p[2] & 0xc0) == 0x80) {
-            /* it's a three-byte code */
-            ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
-            if (!Py_UNICODE_IS_SURROGATE(ch))
-                /* it's not a surrogate - fail */
-                ch = 0;
+        if (PyBytes_GET_SIZE(object) - start >= bytelength) {
+            switch (code) {
+            case ENC_UTF8:
+                if ((p[0] & 0xf0) == 0xe0 &&
+                    (p[1] & 0xc0) == 0x80 &&
+                    (p[2] & 0xc0) == 0x80) {
+                    /* it's a three-byte code */
+                    ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
+                }
+                break;
+            case ENC_UTF16LE:
+                ch = p[1] << 8 | p[0];
+                break;
+            case ENC_UTF16BE:
+                ch = p[0] << 8 | p[1];
+                break;
+            case ENC_UTF32LE:
+                ch = (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0];
+                break;
+            case ENC_UTF32BE:
+                ch = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
+                break;
+            }
         }
+
         Py_DECREF(object);
-        if (ch == 0) {
+        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+            /* it's not a surrogate - fail */
             PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
             return NULL;
         }
         res = PyUnicode_FromOrdinal(ch);
         if (res == NULL)
             return NULL;
-        return Py_BuildValue("(Nn)", res, start+3);
+        return Py_BuildValue("(Nn)", res, start + bytelength);
     }
     else {
         wrong_exception_type(exc);
-- 
cgit v1.2.1


From 27e38b38ecb79431f7a7b33292399bfd6263924e Mon Sep 17 00:00:00 2001
From: Nick Coghlan <ncoghlan@gmail.com>
Date: Fri, 22 Nov 2013 22:39:36 +1000
Subject: Issue #19619: Blacklist non-text codecs in method API

str.encode, bytes.decode and bytearray.decode now use an
internal API to throw LookupError for known non-text encodings,
rather than attempting the encoding or decoding operation and
then throwing a TypeError for an unexpected output type.

The latter mechanism remains in place for third party non-text
encodings.
---
 Python/codecs.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 122 insertions(+), 16 deletions(-)

(limited to 'Python/codecs.c')

diff --git a/Python/codecs.c b/Python/codecs.c
index 8fe0af7bf0..5ff41b57df 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -353,18 +353,15 @@ wrap_codec_error(const char *operation,
 
    errors is passed to the encoder factory as argument if non-NULL. */
 
-PyObject *PyCodec_Encode(PyObject *object,
-                         const char *encoding,
-                         const char *errors)
+static PyObject *
+_PyCodec_EncodeInternal(PyObject *object,
+                        PyObject *encoder,
+                        const char *encoding,
+                        const char *errors)
 {
-    PyObject *encoder = NULL;
     PyObject *args = NULL, *result = NULL;
     PyObject *v = NULL;
 
-    encoder = PyCodec_Encoder(encoding);
-    if (encoder == NULL)
-        goto onError;
-
     args = args_tuple(object, errors);
     if (args == NULL)
         goto onError;
@@ -402,18 +399,15 @@ PyObject *PyCodec_Encode(PyObject *object,
 
    errors is passed to the decoder factory as argument if non-NULL. */
 
-PyObject *PyCodec_Decode(PyObject *object,
-                         const char *encoding,
-                         const char *errors)
+static PyObject *
+_PyCodec_DecodeInternal(PyObject *object,
+                        PyObject *decoder,
+                        const char *encoding,
+                        const char *errors)
 {
-    PyObject *decoder = NULL;
     PyObject *args = NULL, *result = NULL;
     PyObject *v;
 
-    decoder = PyCodec_Decoder(encoding);
-    if (decoder == NULL)
-        goto onError;
-
     args = args_tuple(object, errors);
     if (args == NULL)
         goto onError;
@@ -445,6 +439,118 @@ PyObject *PyCodec_Decode(PyObject *object,
     return NULL;
 }
 
+/* Generic encoding/decoding API */
+PyObject *PyCodec_Encode(PyObject *object,
+                         const char *encoding,
+                         const char *errors)
+{
+    PyObject *encoder;
+
+    encoder = PyCodec_Encoder(encoding);
+    if (encoder == NULL)
+        return NULL;
+
+    return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
+}
+
+PyObject *PyCodec_Decode(PyObject *object,
+                         const char *encoding,
+                         const char *errors)
+{
+    PyObject *decoder;
+
+    decoder = PyCodec_Decoder(encoding);
+    if (decoder == NULL)
+        return NULL;
+
+    return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
+}
+
+/* Text encoding/decoding API */
+static
+PyObject *codec_getitem_checked(const char *encoding,
+                                const char *operation_name,
+                                int index)
+{
+    _Py_IDENTIFIER(_is_text_encoding);
+    PyObject *codec;
+    PyObject *attr;
+    PyObject *v;
+    int is_text_codec;
+
+    codec = _PyCodec_Lookup(encoding);
+    if (codec == NULL)
+        return NULL;
+
+    /* Backwards compatibility: assume any raw tuple describes a text
+     * encoding, and the same for anything lacking the private
+     * attribute.
+     */
+    if (!PyTuple_CheckExact(codec)) {
+        attr = _PyObject_GetAttrId(codec, &PyId__is_text_encoding);
+        if (attr == NULL) {
+            if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
+                PyErr_Clear();
+            } else {
+                Py_DECREF(codec);
+                return NULL;
+            }
+        } else {
+            is_text_codec = PyObject_IsTrue(attr);
+            Py_DECREF(attr);
+            if (!is_text_codec) {
+                Py_DECREF(codec);
+                PyErr_Format(PyExc_LookupError,
+                             "'%.400s' is not a text encoding; "
+                             "use codecs.%s() to handle arbitrary codecs",
+                             encoding, operation_name);
+                return NULL;
+            }
+        }
+    }
+
+    v = PyTuple_GET_ITEM(codec, index);
+    Py_DECREF(codec);
+    Py_INCREF(v);
+    return v;
+}
+
+static PyObject * _PyCodec_TextEncoder(const char *encoding)
+{
+    return codec_getitem_checked(encoding, "encode", 0);
+}
+
+static PyObject * _PyCodec_TextDecoder(const char *encoding)
+{
+    return codec_getitem_checked(encoding, "decode", 1);
+}
+
+PyObject *_PyCodec_EncodeText(PyObject *object,
+                              const char *encoding,
+                              const char *errors)
+{
+    PyObject *encoder;
+
+    encoder = _PyCodec_TextEncoder(encoding);
+    if (encoder == NULL)
+        return NULL;
+
+    return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
+}
+
+PyObject *_PyCodec_DecodeText(PyObject *object,
+                              const char *encoding,
+                              const char *errors)
+{
+    PyObject *decoder;
+
+    decoder = _PyCodec_TextDecoder(encoding);
+    if (decoder == NULL)
+        return NULL;
+
+    return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
+}
+
 /* Register the error handling callback function error under the name
    name. This function will be called by the codec when it encounters
    an unencodable characters/undecodable bytes and doesn't know the
-- 
cgit v1.2.1