summaryrefslogtreecommitdiff
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2009-05-05 04:43:17 +0000
committerMartin v. Löwis <martin@v.loewis.de>2009-05-05 04:43:17 +0000
commit032767018d78e6e0fe3136110e43f85d8df53290 (patch)
treecbde219181e89bacedae751572f1f9b43546e2e0 /Objects/unicodeobject.c
parent994b2b59666ac255f43d7d8069711001ded59c7f (diff)
downloadcpython-032767018d78e6e0fe3136110e43f85d8df53290.tar.gz
Issue #5915: Implement PEP 383, Non-decodable Bytes in
System Character Interfaces.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c89
1 files changed, 80 insertions, 9 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 18b6fa2680..218e70b093 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1530,6 +1530,53 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
}
}
+/* Convert the argument to a bytes object, according to the file
+ system encoding */
+
+int
+PyUnicode_FSConverter(PyObject* arg, void* addr)
+{
+ PyObject *output = NULL;
+ Py_ssize_t size;
+ void *data;
+ if (PyBytes_Check(arg) || PyByteArray_Check(arg)) {
+ output = arg;
+ Py_INCREF(output);
+ }
+ else {
+ arg = PyUnicode_FromObject(arg);
+ if (!arg)
+ return 0;
+ output = PyUnicode_AsEncodedObject(arg,
+ Py_FileSystemDefaultEncoding,
+ "utf8b");
+ Py_DECREF(arg);
+ if (!output)
+ return 0;
+ if (!PyBytes_Check(output)) {
+ Py_DECREF(output);
+ PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes");
+ return 0;
+ }
+ }
+ if (PyBytes_Check(output)) {
+ size = PyBytes_GET_SIZE(output);
+ data = PyBytes_AS_STRING(output);
+ }
+ else {
+ size = PyByteArray_GET_SIZE(output);
+ data = PyByteArray_AS_STRING(output);
+ }
+ if (size != strlen(data)) {
+ PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+ Py_DECREF(output);
+ return 0;
+ }
+ *(PyObject**)addr = output;
+ return 1;
+}
+
+
char*
_PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
{
@@ -4154,11 +4201,22 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
collstart-startp, collend-startp, &newpos);
if (repunicode == NULL)
goto onError;
- if (!PyUnicode_Check(repunicode)) {
- /* Implementation limitation: byte results not supported yet. */
- PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
+ if (PyBytes_Check(repunicode)) {
+ /* Directly copy bytes result to output. */
+ repsize = PyBytes_Size(repunicode);
+ if (repsize > 1) {
+ /* Make room for all additional bytes. */
+ if (_PyBytes_Resize(&res, ressize+repsize-1)) {
+ Py_DECREF(repunicode);
+ goto onError;
+ }
+ ressize += repsize-1;
+ }
+ memcpy(str, PyBytes_AsString(repunicode), repsize);
+ str += repsize;
+ p = startp + newpos;
Py_DECREF(repunicode);
- goto onError;
+ break;
}
/* need more space? (at least enough for what we
have+the replacement+the rest of the string, so
@@ -5123,11 +5181,24 @@ int charmap_encoding_error(
collstartpos, collendpos, &newpos);
if (repunicode == NULL)
return -1;
- if (!PyUnicode_Check(repunicode)) {
- /* Implementation limitation: byte results not supported yet. */
- PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
+ if (PyBytes_Check(repunicode)) {
+ /* Directly copy bytes result to output. */
+ Py_ssize_t outsize = PyBytes_Size(*res);
+ Py_ssize_t requiredsize;
+ repsize = PyBytes_Size(repunicode);
+ requiredsize = *respos + repsize;
+ if (requiredsize > outsize)
+ /* Make room for all additional bytes. */
+ if (charmapencode_resize(res, respos, requiredsize)) {
+ Py_DECREF(repunicode);
+ return -1;
+ }
+ memcpy(PyBytes_AsString(*res) + *respos,
+ PyBytes_AsString(repunicode), repsize);
+ *respos += repsize;
+ *inpos = newpos;
Py_DECREF(repunicode);
- return -1;
+ break;
}
/* generate replacement */
repsize = PyUnicode_GET_SIZE(repunicode);
@@ -5691,7 +5762,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
if (repunicode == NULL)
goto onError;
if (!PyUnicode_Check(repunicode)) {
- /* Implementation limitation: byte results not supported yet. */
+ /* Byte results not supported, since they have no decimal property. */
PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
Py_DECREF(repunicode);
goto onError;