summaryrefslogtreecommitdiff
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2015-09-24 14:45:00 +0200
committerVictor Stinner <victor.stinner@gmail.com>2015-09-24 14:45:00 +0200
commit3ed999f66ffb4d216070c1537b7c05bf148d9d99 (patch)
tree218add45eb26d512d848684a42bacc5787333877 /Objects/unicodeobject.c
parent5729648ef29d129fdb24cb15c51cbf859d682187 (diff)
downloadcpython-3ed999f66ffb4d216070c1537b7c05bf148d9d99.tar.gz
Issue #25227: Cleanup unicode_encode_ucs1() error handler
* Change limit type from unsigned int to Py_UCS4, to use the same type than the "ch" variable (an Unicode character). * Reuse ch variable for _Py_ERROR_XMLCHARREFREPLACE * Add some newlines for readability
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c22
1 files changed, 13 insertions, 9 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d0b285abac..da2aac7dc1 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6415,7 +6415,7 @@ unicode_encode_call_errorhandler(const char *errors,
static PyObject *
unicode_encode_ucs1(PyObject *unicode,
const char *errors,
- unsigned int limit)
+ const Py_UCS4 limit)
{
/* input state */
Py_ssize_t pos=0, size;
@@ -6449,12 +6449,12 @@ unicode_encode_ucs1(PyObject *unicode,
ressize = size;
while (pos < size) {
- Py_UCS4 c = PyUnicode_READ(kind, data, pos);
+ Py_UCS4 ch = PyUnicode_READ(kind, data, pos);
/* can we encode this? */
- if (c<limit) {
+ if (ch < limit) {
/* no overflow check, because we know that the space is enough */
- *str++ = (char)c;
+ *str++ = (char)ch;
++pos;
}
else {
@@ -6481,7 +6481,7 @@ unicode_encode_ucs1(PyObject *unicode,
case _Py_ERROR_REPLACE:
while (collstart++ < collend)
*str++ = '?';
- /* fall through */
+ /* fall through ignore error handler */
case _Py_ERROR_IGNORE:
pos = collend;
break;
@@ -6491,8 +6491,9 @@ unicode_encode_ucs1(PyObject *unicode,
requiredsize = respos;
/* determine replacement size */
for (i = collstart; i < collend; ++i) {
- Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Py_ssize_t incr;
+
+ ch = PyUnicode_READ(kind, data, i);
if (ch < 10)
incr = 2+1+1;
else if (ch < 100)
@@ -6538,6 +6539,7 @@ unicode_encode_ucs1(PyObject *unicode,
if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
PyUnicode_READY(repunicode) == -1))
goto onError;
+
if (PyBytes_Check(repunicode)) {
/* Directly copy bytes result to output. */
repsize = PyBytes_Size(repunicode);
@@ -6561,6 +6563,7 @@ unicode_encode_ucs1(PyObject *unicode,
Py_DECREF(repunicode);
break;
}
+
/* need more space? (at least enough for what we
have+the replacement+the rest of the string, so
we won't have to check space for encodable characters) */
@@ -6583,17 +6586,18 @@ unicode_encode_ucs1(PyObject *unicode,
str = PyBytes_AS_STRING(res) + respos;
ressize = requiredsize;
}
+
/* check if there is anything unencodable in the replacement
and copy it to the output */
for (i = 0; repsize-->0; ++i, ++str) {
- c = PyUnicode_READ_CHAR(repunicode, i);
- if (c >= limit) {
+ ch = PyUnicode_READ_CHAR(repunicode, i);
+ if (ch >= limit) {
raise_encode_exception(&exc, encoding, unicode,
pos, pos+1, reason);
Py_DECREF(repunicode);
goto onError;
}
- *str = (char)c;
+ *str = (char)ch;
}
pos = newpos;
Py_DECREF(repunicode);