summaryrefslogtreecommitdiff
path: root/Modules/_codecsmodule.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_codecsmodule.c')
-rw-r--r--Modules/_codecsmodule.c259
1 files changed, 159 insertions, 100 deletions
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index eb740f95fd..40037b1dc6 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -38,6 +38,10 @@ Copyright (c) Corporation for National Research Initiatives.
#define PY_SSIZE_T_CLEAN
#include "Python.h"
+#ifdef MS_WINDOWS
+#include <windows.h>
+#endif
+
/* --- Registry ----------------------------------------------------------- */
PyDoc_STRVAR(register__doc__,
@@ -162,7 +166,6 @@ static PyObject *
escape_encode(PyObject *self,
PyObject *args)
{
- static const char *hexdigits = "0123456789abcdef";
PyObject *str;
Py_ssize_t size;
Py_ssize_t newsize;
@@ -174,12 +177,12 @@ escape_encode(PyObject *self,
return NULL;
size = PyBytes_GET_SIZE(str);
- newsize = 4*size;
- if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) {
+ if (size > PY_SSIZE_T_MAX / 4) {
PyErr_SetString(PyExc_OverflowError,
"string is too large to encode");
return NULL;
}
+ newsize = 4*size;
v = PyBytes_FromStringAndSize(NULL, newsize);
if (v == NULL) {
@@ -205,8 +208,8 @@ escape_encode(PyObject *self,
else if (c < ' ' || c >= 0x7f) {
*p++ = '\\';
*p++ = 'x';
- *p++ = hexdigits[(c & 0xf0) >> 4];
- *p++ = hexdigits[c & 0xf];
+ *p++ = Py_hexdigits[(c & 0xf0) >> 4];
+ *p++ = Py_hexdigits[c & 0xf];
}
else
*p++ = c;
@@ -236,8 +239,10 @@ unicode_internal_decode(PyObject *self,
return NULL;
if (PyUnicode_Check(obj)) {
+ if (PyUnicode_READY(obj) < 0)
+ return NULL;
Py_INCREF(obj);
- return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
+ return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
}
else {
if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
@@ -588,7 +593,7 @@ charmap_decode(PyObject *self,
return codec_tuple(unicode, pbuf.len);
}
-#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
+#ifdef HAVE_MBCS
static PyObject *
mbcs_decode(PyObject *self,
@@ -613,7 +618,32 @@ mbcs_decode(PyObject *self,
return codec_tuple(decoded, consumed);
}
-#endif /* MS_WINDOWS */
+static PyObject *
+code_page_decode(PyObject *self,
+ PyObject *args)
+{
+ Py_buffer pbuf;
+ const char *errors = NULL;
+ int final = 0;
+ Py_ssize_t consumed;
+ PyObject *decoded = NULL;
+ int code_page;
+
+ if (!PyArg_ParseTuple(args, "iy*|zi:code_page_decode",
+ &code_page, &pbuf, &errors, &final))
+ return NULL;
+ consumed = pbuf.len;
+
+ decoded = PyUnicode_DecodeCodePageStateful(code_page,
+ pbuf.buf, pbuf.len, errors,
+ final ? NULL : &consumed);
+ PyBuffer_Release(&pbuf);
+ if (decoded == NULL)
+ return NULL;
+ return codec_tuple(decoded, consumed);
+}
+
+#endif /* HAVE_MBCS */
/* --- Encoder ------------------------------------------------------------ */
@@ -645,17 +675,31 @@ unicode_internal_encode(PyObject *self,
PyObject *obj;
const char *errors = NULL;
const char *data;
- Py_ssize_t size;
+ Py_ssize_t len, size;
+
+ if (PyErr_WarnEx(PyExc_DeprecationWarning,
+ "unicode_internal codec has been deprecated",
+ 1))
+ return NULL;
if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
&obj, &errors))
return NULL;
if (PyUnicode_Check(obj)) {
- data = PyUnicode_AS_DATA(obj);
- size = PyUnicode_GET_DATA_SIZE(obj);
- return codec_tuple(PyBytes_FromStringAndSize(data, size),
- PyUnicode_GET_SIZE(obj));
+ Py_UNICODE *u;
+
+ if (PyUnicode_READY(obj) < 0)
+ return NULL;
+
+ u = PyUnicode_AsUnicodeAndSize(obj, &len);
+ if (u == NULL)
+ return NULL;
+ if (len > PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
+ return PyErr_NoMemory();
+ size = len * sizeof(Py_UNICODE);
+ return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
+ PyUnicode_GET_LENGTH(obj));
}
else {
if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
@@ -676,14 +720,12 @@ utf_7_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
- return NULL;
- v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- 0,
- 0,
- errors),
- PyUnicode_GET_SIZE(str));
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
+ return NULL;
+ }
+ v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -700,12 +742,12 @@ utf_8_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
return NULL;
- v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- errors),
- PyUnicode_GET_SIZE(str));
+ }
+ v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -730,13 +772,12 @@ utf_16_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
return NULL;
- v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- errors,
- byteorder),
- PyUnicode_GET_SIZE(str));
+ }
+ v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -753,13 +794,12 @@ utf_16_le_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
return NULL;
- v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- errors,
- -1),
- PyUnicode_GET_SIZE(str));
+ }
+ v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -776,13 +816,12 @@ utf_16_be_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
return NULL;
- v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- errors,
- +1),
- PyUnicode_GET_SIZE(str));
+ }
+ v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -807,13 +846,12 @@ utf_32_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
return NULL;
- v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- errors,
- byteorder),
- PyUnicode_GET_SIZE(str));
+ }
+ v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -830,13 +868,12 @@ utf_32_le_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
return NULL;
- v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- errors,
- -1),
- PyUnicode_GET_SIZE(str));
+ }
+ v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -853,13 +890,12 @@ utf_32_be_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
return NULL;
- v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- errors,
- +1),
- PyUnicode_GET_SIZE(str));
+ }
+ v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -876,11 +912,12 @@ unicode_escape_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
return NULL;
- v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str)),
- PyUnicode_GET_SIZE(str));
+ }
+ v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -897,12 +934,12 @@ raw_unicode_escape_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
return NULL;
- v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
- PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str)),
- PyUnicode_GET_SIZE(str));
+ }
+ v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -919,13 +956,12 @@ latin_1_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
return NULL;
- v = codec_tuple(PyUnicode_EncodeLatin1(
- PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- errors),
- PyUnicode_GET_SIZE(str));
+ }
+ v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -942,13 +978,12 @@ ascii_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
return NULL;
- v = codec_tuple(PyUnicode_EncodeASCII(
- PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- errors),
- PyUnicode_GET_SIZE(str));
+ }
+ v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -968,14 +1003,12 @@ charmap_encode(PyObject *self,
mapping = NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
- return NULL;
- v = codec_tuple(PyUnicode_EncodeCharmap(
- PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- mapping,
- errors),
- PyUnicode_GET_SIZE(str));
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
+ return NULL;
+ }
+ v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
@@ -989,7 +1022,7 @@ charmap_build(PyObject *self, PyObject *args)
return PyUnicode_BuildEncodingMap(map);
}
-#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
+#ifdef HAVE_MBCS
static PyObject *
mbcs_encode(PyObject *self,
@@ -1003,18 +1036,42 @@ mbcs_encode(PyObject *self,
return NULL;
str = PyUnicode_FromObject(str);
- if (str == NULL)
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
+ return NULL;
+ }
+ v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
+ PyUnicode_GET_LENGTH(str));
+ Py_DECREF(str);
+ return v;
+}
+
+static PyObject *
+code_page_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str, *v;
+ const char *errors = NULL;
+ int code_page;
+
+ if (!PyArg_ParseTuple(args, "iO|z:code_page_encode",
+ &code_page, &str, &errors))
+ return NULL;
+
+ str = PyUnicode_FromObject(str);
+ if (str == NULL || PyUnicode_READY(str) < 0) {
+ Py_XDECREF(str);
return NULL;
- v = codec_tuple(PyUnicode_EncodeMBCS(
- PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- errors),
- PyUnicode_GET_SIZE(str));
+ }
+ v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
+ str,
+ errors),
+ PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
}
-#endif /* MS_WINDOWS */
+#endif /* HAVE_MBCS */
/* --- Error handler registry --------------------------------------------- */
@@ -1101,9 +1158,11 @@ static PyMethodDef _codecs_functions[] = {
{"charmap_decode", charmap_decode, METH_VARARGS},
{"charmap_build", charmap_build, METH_VARARGS},
{"readbuffer_encode", readbuffer_encode, METH_VARARGS},
-#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
+#ifdef HAVE_MBCS
{"mbcs_encode", mbcs_encode, METH_VARARGS},
{"mbcs_decode", mbcs_decode, METH_VARARGS},
+ {"code_page_encode", code_page_encode, METH_VARARGS},
+ {"code_page_decode", code_page_decode, METH_VARARGS},
#endif
{"register_error", register_error, METH_VARARGS,
register_error__doc__},