From 2c626a37d1481035019162e951c748cb854696c7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 15 May 2014 14:37:42 +0300 Subject: Issue #13916: Disallowed the surrogatepass error handler for non UTF-* encodings. --- Python/codecs.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'Python/codecs.c') diff --git a/Python/codecs.c b/Python/codecs.c index e06d6e0922..700313633e 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -901,6 +901,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) } } +#define ENC_UNKNOWN -1 #define ENC_UTF8 0 #define ENC_UTF16BE 1 #define ENC_UTF16LE 2 @@ -916,7 +917,11 @@ get_standard_encoding(const char *encoding, int *bytelength) encoding += 3; if (*encoding == '-' || *encoding == '_' ) encoding++; - if (encoding[0] == '1' && encoding[1] == '6') { + if (encoding[0] == '8' && encoding[1] == '\0') { + *bytelength = 3; + return ENC_UTF8; + } + else if (encoding[0] == '1' && encoding[1] == '6') { encoding += 2; *bytelength = 2; if (*encoding == '\0') { @@ -955,9 +960,7 @@ get_standard_encoding(const char *encoding, int *bytelength) } } } - /* utf-8 */ - *bytelength = 3; - return ENC_UTF8; + return ENC_UNKNOWN; } /* This handler is declared static until someone demonstrates @@ -994,6 +997,12 @@ PyCodec_SurrogatePassErrors(PyObject *exc) } code = get_standard_encoding(encoding, &bytelength); Py_DECREF(encode); + if (code == ENC_UNKNOWN) { + /* Not supported, fail with original exception */ + PyErr_SetObject(PyExceptionInstance_Class(exc), exc); + Py_DECREF(object); + return NULL; + } res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start)); if (!res) { @@ -1068,6 +1077,12 @@ PyCodec_SurrogatePassErrors(PyObject *exc) } code = get_standard_encoding(encoding, &bytelength); Py_DECREF(encode); + if (code == ENC_UNKNOWN) { + /* Not supported, fail with original exception */ + PyErr_SetObject(PyExceptionInstance_Class(exc), exc); + Py_DECREF(object); + return NULL; + } /* Try decoding a single surrogate character. If there are more, let the codec call us again. */ -- cgit v1.2.1 From 569a55317259d71ebde1c85ef48874eba3824a5e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 16 May 2014 14:46:20 +0200 Subject: Issue #13916: Fix surrogatepass error handler on Windows --- Python/codecs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Python/codecs.c') diff --git a/Python/codecs.c b/Python/codecs.c index 700313633e..4c2ae381b3 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -960,6 +960,10 @@ get_standard_encoding(const char *encoding, int *bytelength) } } } + else if (strcmp(encoding, "CP_UTF8") == 0) { + *bytelength = 3; + return ENC_UTF8; + } return ENC_UNKNOWN; } -- cgit v1.2.1