summaryrefslogtreecommitdiff
path: root/Objects
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2016-10-30 18:25:27 +0200
committerSerhiy Storchaka <storchaka@gmail.com>2016-10-30 18:25:27 +0200
commitc5a455c69bc256848218daec3909c6d34fa40b36 (patch)
tree11fb9595943206cbcdd9866e196e91a1c70e107d /Objects
parent56b5bad0729a5fc8b685fbdd1958c87d603daa6a (diff)
downloadcpython-c5a455c69bc256848218daec3909c6d34fa40b36.tar.gz
Issue #28561: Clean up UTF-8 encoder: remove dead code, update comments, etc.
Patch by Xiang Zhang.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/stringlib/codecs.h14
1 files changed, 4 insertions, 10 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index a9d0a349d9..43f2f3266f 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -262,9 +262,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
Py_ssize_t size,
const char *errors)
{
-#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */
-
- Py_ssize_t i; /* index into s of next input byte */
+ Py_ssize_t i; /* index into data of next input character */
char *p; /* next free byte in output buffer */
#if STRINGLIB_SIZEOF_CHAR > 1
PyObject *error_handler_obj = NULL;
@@ -389,7 +387,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
goto error;
/* subtract preallocated bytes */
- writer.min_size -= max_char_size;
+ writer.min_size -= max_char_size * (newpos - startpos);
if (PyBytes_Check(rep)) {
p = _PyBytesWriter_WriteBytes(&writer, p,
@@ -402,14 +400,12 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
goto error;
if (!PyUnicode_IS_ASCII(rep)) {
- raise_encode_exception(&exc, "utf-8",
- unicode,
- i-1, i,
+ raise_encode_exception(&exc, "utf-8", unicode,
+ startpos, endpos,
"surrogates not allowed");
goto error;
}
- assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
p = _PyBytesWriter_WriteBytes(&writer, p,
PyUnicode_DATA(rep),
PyUnicode_GET_LENGTH(rep));
@@ -463,8 +459,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
_PyBytesWriter_Dealloc(&writer);
return NULL;
#endif
-
-#undef MAX_SHORT_UNICHARS
}
/* The pattern for constructing UCS2-repeated masks. */