summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-04-27 13:55:39 +0200
committerVictor Stinner <victor.stinner@gmail.com>2012-04-27 13:55:39 +0200
commit2555301e0aafefea124fe79f268207bc752a4722 (patch)
tree1cb1f2fa6b5cc325c50217c3d77c28c8fbcf62ec
parentef1380cc8725ace887ac0fd6ed53cf05230f674b (diff)
downloadcpython-2555301e0aafefea124fe79f268207bc752a4722.tar.gz
Check newly created consistency using _PyUnicode_CheckConsistency(str, 1)
* In debug mode, fill the string data with invalid characters * Simplify also reference counting in PyCodec_BackslashReplaceErrors() and PyCodec_XMLCharRefReplaceError()
-rw-r--r--Modules/_json.c1
-rw-r--r--Modules/md5module.c1
-rw-r--r--Modules/sha1module.c1
-rw-r--r--Modules/sha256module.c1
-rw-r--r--Modules/sha512module.c1
-rw-r--r--Objects/bytesobject.c1
-rw-r--r--Objects/unicodeobject.c27
-rw-r--r--Python/codecs.c10
-rw-r--r--Python/compile.c1
-rw-r--r--Python/import.c1
10 files changed, 31 insertions, 14 deletions
diff --git a/Modules/_json.c b/Modules/_json.c
index 95c658ca7c..40c2ced502 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -246,6 +246,7 @@ ascii_escape_unicode(PyObject *pystr)
}
}
output[chars++] = '"';
+ assert(_PyUnicode_CheckConsistency(rval, 1));
return rval;
}
diff --git a/Modules/md5module.c b/Modules/md5module.c
index 86f602ebe5..ee44c4878d 100644
--- a/Modules/md5module.c
+++ b/Modules/md5module.c
@@ -397,6 +397,7 @@ MD5_hexdigest(MD5object *self, PyObject *unused)
c = (digest[i] & 0xf);
hex_digest[j++] = Py_hexdigits[c];
}
+ assert(_PyUnicode_CheckConsistency(retval, 1));
return retval;
}
diff --git a/Modules/sha1module.c b/Modules/sha1module.c
index 30e5c5018a..daea887960 100644
--- a/Modules/sha1module.c
+++ b/Modules/sha1module.c
@@ -373,6 +373,7 @@ SHA1_hexdigest(SHA1object *self, PyObject *unused)
c = (digest[i] & 0xf);
hex_digest[j++] = Py_hexdigits[c];
}
+ assert(_PyUnicode_CheckConsistency(retval, 1));
return retval;
}
diff --git a/Modules/sha256module.c b/Modules/sha256module.c
index f1ef329366..76d91afda3 100644
--- a/Modules/sha256module.c
+++ b/Modules/sha256module.c
@@ -466,6 +466,7 @@ SHA256_hexdigest(SHAobject *self, PyObject *unused)
c = (digest[i] & 0xf);
hex_digest[j++] = Py_hexdigits[c];
}
+ assert(_PyUnicode_CheckConsistency(retval, 1));
return retval;
}
diff --git a/Modules/sha512module.c b/Modules/sha512module.c
index 4f5a1139ee..88f8a64d06 100644
--- a/Modules/sha512module.c
+++ b/Modules/sha512module.c
@@ -532,6 +532,7 @@ SHA512_hexdigest(SHAobject *self, PyObject *unused)
c = (digest[i] & 0xf);
hex_digest[j++] = Py_hexdigits[c];
}
+ assert(_PyUnicode_CheckConsistency(retval, 1));
return retval;
}
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 2e6be431c9..b07be26896 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -626,6 +626,7 @@ PyBytes_Repr(PyObject *obj, int smartquotes)
*p++ = c;
}
*p++ = quote;
+ assert(_PyUnicode_CheckConsistency(v, 1));
return v;
}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 364de90877..60b0a1fbbd 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -967,7 +967,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
PyObject *obj;
PyCompactUnicodeObject *unicode;
void *data;
- int kind_state;
+ enum PyUnicode_Kind kind;
int is_sharing, is_ascii;
Py_ssize_t char_size;
Py_ssize_t struct_size;
@@ -986,17 +986,17 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
is_sharing = 0;
struct_size = sizeof(PyCompactUnicodeObject);
if (maxchar < 128) {
- kind_state = PyUnicode_1BYTE_KIND;
+ kind = PyUnicode_1BYTE_KIND;
char_size = 1;
is_ascii = 1;
struct_size = sizeof(PyASCIIObject);
}
else if (maxchar < 256) {
- kind_state = PyUnicode_1BYTE_KIND;
+ kind = PyUnicode_1BYTE_KIND;
char_size = 1;
}
else if (maxchar < 65536) {
- kind_state = PyUnicode_2BYTE_KIND;
+ kind = PyUnicode_2BYTE_KIND;
char_size = 2;
if (sizeof(wchar_t) == 2)
is_sharing = 1;
@@ -1007,7 +1007,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
"invalid maximum character passed to PyUnicode_New");
return NULL;
}
- kind_state = PyUnicode_4BYTE_KIND;
+ kind = PyUnicode_4BYTE_KIND;
char_size = 4;
if (sizeof(wchar_t) == 4)
is_sharing = 1;
@@ -1041,7 +1041,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
_PyUnicode_LENGTH(unicode) = size;
_PyUnicode_HASH(unicode) = -1;
_PyUnicode_STATE(unicode).interned = 0;
- _PyUnicode_STATE(unicode).kind = kind_state;
+ _PyUnicode_STATE(unicode).kind = kind;
_PyUnicode_STATE(unicode).compact = 1;
_PyUnicode_STATE(unicode).ready = 1;
_PyUnicode_STATE(unicode).ascii = is_ascii;
@@ -1049,19 +1049,19 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
((char*)data)[size] = 0;
_PyUnicode_WSTR(unicode) = NULL;
}
- else if (kind_state == PyUnicode_1BYTE_KIND) {
+ else if (kind == PyUnicode_1BYTE_KIND) {
((char*)data)[size] = 0;
_PyUnicode_WSTR(unicode) = NULL;
_PyUnicode_WSTR_LENGTH(unicode) = 0;
unicode->utf8 = NULL;
unicode->utf8_length = 0;
- }
+ }
else {
unicode->utf8 = NULL;
unicode->utf8_length = 0;
- if (kind_state == PyUnicode_2BYTE_KIND)
+ if (kind == PyUnicode_2BYTE_KIND)
((Py_UCS2*)data)[size] = 0;
- else /* kind_state == PyUnicode_4BYTE_KIND */
+ else /* kind == PyUnicode_4BYTE_KIND */
((Py_UCS4*)data)[size] = 0;
if (is_sharing) {
_PyUnicode_WSTR_LENGTH(unicode) = size;
@@ -1072,6 +1072,13 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
_PyUnicode_WSTR(unicode) = NULL;
}
}
+#ifdef Py_DEBUG
+ /* Fill the data with invalid characters to detect bugs earlier.
+ _PyUnicode_CheckConsistency(str, 1) detects invalid characters,
+ at least for ASCII and UCS-4 strings. U+00FF is invalid in ASCII
+ and U+FFFFFFFF is an invalid character in Unicode 6.0. */
+ memset(data, 0xff, size * kind);
+#endif
assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
return obj;
}
diff --git a/Python/codecs.c b/Python/codecs.c
index 607feea81c..797a45f5a1 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -534,6 +534,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
data = PyUnicode_DATA(res);
for (i = 0; i < len; ++i)
PyUnicode_WRITE(kind, data, i, '?');
+ assert(_PyUnicode_CheckConsistency(res, 1));
return Py_BuildValue("(Nn)", res, end);
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
@@ -559,6 +560,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
data = PyUnicode_DATA(res);
for (i=0; i < len; i++)
PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
+ assert(_PyUnicode_CheckConsistency(res, 1));
return Py_BuildValue("(Nn)", res, end);
}
else {
@@ -652,8 +654,8 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
}
*outp++ = ';';
}
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ restuple = Py_BuildValue("(Nn)", res, end);
Py_DECREF(object);
return restuple;
}
@@ -720,8 +722,8 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
*outp++ = Py_hexdigits[c&0xf];
}
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ restuple = Py_BuildValue("(Nn)", res, end);
Py_DECREF(object);
return restuple;
}
diff --git a/Python/compile.c b/Python/compile.c
index 79d1d21670..10e9ad27f5 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -263,6 +263,7 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident)
Py_DECREF(result);
return NULL;
}
+ assert(_PyUnicode_CheckConsistency(result, 1));
return result;
}
diff --git a/Python/import.c b/Python/import.c
index 8cf10e658c..103e7de439 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -992,6 +992,7 @@ make_source_pathname(PyObject *path)
(j = dot0-right));
PyUnicode_WRITE(kind, data, i+j, 'p');
PyUnicode_WRITE(kind, data, i+j+1, 'y');
+ assert(_PyUnicode_CheckConsistency(result, 1));
return result;
}