summaryrefslogtreecommitdiff
path: root/Objects
diff options
context:
space:
mode:
authorEric V. Smith <eric@trueblade.com>2016-10-31 14:46:26 -0400
committerEric V. Smith <eric@trueblade.com>2016-10-31 14:46:26 -0400
commitef9f0416d8818ee0728b56b29ff148623101784a (patch)
tree69adb07bd422aeea61803d66028eea6ce5dc8786 /Objects
parent045241f1668aea1bbb77705a8c434d991947685b (diff)
downloadcpython-ef9f0416d8818ee0728b56b29ff148623101784a.tar.gz
Issue 28128: Print out better error/warning messages for invalid string escapes. Backport to 3.6.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/bytesobject.c37
-rw-r--r--Objects/unicodeobject.c38
2 files changed, 65 insertions, 10 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 598f6a13cf..779fe295db 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1105,11 +1105,12 @@ _PyBytes_DecodeEscapeRecode(const char **s, const char *end,
return p;
}
-PyObject *PyBytes_DecodeEscape(const char *s,
+PyObject *_PyBytes_DecodeEscape(const char *s,
Py_ssize_t len,
const char *errors,
Py_ssize_t unicode,
- const char *recode_encoding)
+ const char *recode_encoding,
+ const char **first_invalid_escape)
{
int c;
char *p;
@@ -1123,6 +1124,8 @@ PyObject *PyBytes_DecodeEscape(const char *s,
return NULL;
writer.overallocate = 1;
+ *first_invalid_escape = NULL;
+
end = s + len;
while (s < end) {
if (*s != '\\') {
@@ -1207,9 +1210,12 @@ PyObject *PyBytes_DecodeEscape(const char *s,
break;
default:
- if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0)
- goto failed;
+ if (*first_invalid_escape == NULL) {
+ *first_invalid_escape = s-1; /* Back up one char, since we've
+ already incremented s. */
+ }
*p++ = '\\';
+ s--;
goto non_esc; /* an arbitrary number of unescaped
UTF-8 bytes may follow. */
}
@@ -1222,6 +1228,29 @@ PyObject *PyBytes_DecodeEscape(const char *s,
return NULL;
}
+PyObject *PyBytes_DecodeEscape(const char *s,
+ Py_ssize_t len,
+ const char *errors,
+ Py_ssize_t unicode,
+ const char *recode_encoding)
+{
+ const char* first_invalid_escape;
+ PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
+ recode_encoding,
+ &first_invalid_escape);
+ if (result == NULL)
+ return NULL;
+ if (first_invalid_escape != NULL) {
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+ "invalid escape sequence '\\%c'",
+ *first_invalid_escape) < 0) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ }
+ return result;
+
+}
/* -------------------------------------------------------------------- */
/* object api */
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e45f3d7c27..50b21cf9e6 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5896,9 +5896,10 @@ PyUnicode_AsUTF16String(PyObject *unicode)
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
PyObject *
-PyUnicode_DecodeUnicodeEscape(const char *s,
- Py_ssize_t size,
- const char *errors)
+_PyUnicode_DecodeUnicodeEscape(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ const char **first_invalid_escape)
{
const char *starts = s;
_PyUnicodeWriter writer;
@@ -5906,6 +5907,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
+ // so we can remember if we've seen an invalid escape char or not
+ *first_invalid_escape = NULL;
+
if (size == 0) {
_Py_RETURN_UNICODE_EMPTY();
}
@@ -6080,9 +6084,10 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
goto error;
default:
- if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "invalid escape sequence '\\%c'", c) < 0)
- goto onError;
+ if (*first_invalid_escape == NULL) {
+ *first_invalid_escape = s-1; /* Back up one char, since we've
+ already incremented s. */
+ }
WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c);
continue;
@@ -6117,6 +6122,27 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
return NULL;
}
+PyObject *
+PyUnicode_DecodeUnicodeEscape(const char *s,
+ Py_ssize_t size,
+ const char *errors)
+{
+ const char *first_invalid_escape;
+ PyObject *result = _PyUnicode_DecodeUnicodeEscape(s, size, errors,
+ &first_invalid_escape);
+ if (result == NULL)
+ return NULL;
+ if (first_invalid_escape != NULL) {
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+ "invalid escape sequence '\\%c'",
+ *first_invalid_escape) < 0) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ }
+ return result;
+}
+
/* Return a Unicode-Escape string version of the Unicode object.
If quotes is true, the string is enclosed in u"" or u'' quotes as