From 4d0ebf454346e70e5b7c7803ab7939f7c12a2b39 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 30 Oct 2012 01:42:39 +0100 Subject: Issue #16330: Use surrogate-related macros Patch written by Serhiy Storchaka. --- Python/fileutils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Python/fileutils.c') diff --git a/Python/fileutils.c b/Python/fileutils.c index 501cb8c8d6..526751d5ad 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -85,7 +85,7 @@ _Py_char2wchar(const char* arg, size_t *size) /* Only use the result if it contains no surrogate characters. */ for (tmp = res; *tmp != 0 && - (*tmp < 0xd800 || *tmp > 0xdfff); tmp++) + !Py_UNICODE_IS_SURROGATE(*tmp); tmp++) ; if (*tmp == 0) { if (size != NULL) @@ -131,7 +131,7 @@ _Py_char2wchar(const char* arg, size_t *size) memset(&mbs, 0, sizeof mbs); continue; } - if (*out >= 0xd800 && *out <= 0xdfff) { + if (Py_UNICODE_IS_SURROGATE(*out)) { /* Surrogate character. Escape the original byte sequence with surrogateescape. */ argsize -= converted; -- cgit v1.2.1 From 99f96c8a9dd3ab52c622c7fe6eb7173da86f3fa3 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 12 Nov 2012 23:04:02 +0100 Subject: Issue #16416: OS data are now always encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding (which may be ASCII if no locale environment variable is set), to avoid inconsistencies with os.fsencode() and os.fsdecode() functions which are already using UTF-8/surrogateescape. --- Python/fileutils.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'Python/fileutils.c') diff --git a/Python/fileutils.c b/Python/fileutils.c index 2e25aae075..42a532d9fa 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -8,6 +8,10 @@ #include #endif +#ifdef __APPLE__ +extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size); +#endif + PyObject * _Py_device_encoding(int fd) { @@ -60,6 +64,15 @@ _Py_device_encoding(int fd) wchar_t* _Py_char2wchar(const char* arg, size_t *size) { +#ifdef __APPLE__ + wchar_t *wstr; + wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg)); + if (wstr == NULL) + return NULL; + if (size != NULL) + *size = wcslen(wstr); + return wstr; +#else wchar_t *res; #ifdef HAVE_BROKEN_MBSTOWCS /* Some platforms have a broken implementation of @@ -145,7 +158,7 @@ _Py_char2wchar(const char* arg, size_t *size) argsize -= converted; out++; } -#else +#else /* HAVE_MBRTOWC */ /* Cannot use C locale for escaping; manually escape as if charset is ASCII (i.e. escape all bytes > 128. This will still roundtrip correctly in the locale's charset, which must be an ASCII superset. */ @@ -160,7 +173,7 @@ _Py_char2wchar(const char* arg, size_t *size) else *out++ = 0xdc00 + *in++; *out = 0; -#endif +#endif /* HAVE_MBRTOWC */ if (size != NULL) *size = out - res; return res; @@ -168,6 +181,7 @@ oom: if (size != NULL) *size = (size_t)-1; return NULL; +#endif /* __APPLE__ */ } /* Encode a (wide) character string to the locale encoding with the @@ -184,6 +198,34 @@ oom: char* _Py_wchar2char(const wchar_t *text, size_t *error_pos) { +#ifdef __APPLE__ + Py_ssize_t len; + PyObject *unicode, *bytes = NULL; + char *cpath; + + unicode = PyUnicode_FromWideChar(text, wcslen(text)); + if (unicode == NULL) { + Py_DECREF(unicode); + return NULL; + } + + bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape"); + Py_DECREF(unicode); + if (bytes == NULL) { + PyErr_Clear(); + return NULL; + } + + len = PyBytes_GET_SIZE(bytes); + cpath = PyMem_Malloc(len+1); + if (cpath == NULL) { + Py_DECREF(bytes); + return NULL; + } + memcpy(cpath, PyBytes_AsString(bytes), len + 1); + Py_DECREF(bytes); + return cpath; +#else /* __APPLE__ */ const size_t len = wcslen(text); char *result = NULL, *bytes = NULL; size_t i, size, converted; @@ -243,6 +285,7 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos) bytes = result; } return result; +#endif /* __APPLE__ */ } /* In principle, this should use HAVE__WSTAT, and _wstat -- cgit v1.2.1 From e037652273f080b41545e058575097645959b804 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 12 Nov 2012 23:32:21 +0100 Subject: Issue #16416: Fix error handling in _Py_wchar2char() _Py_char2wchar() functions --- Python/fileutils.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'Python/fileutils.c') diff --git a/Python/fileutils.c b/Python/fileutils.c index 42a532d9fa..2cd75ce163 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -67,10 +67,12 @@ _Py_char2wchar(const char* arg, size_t *size) #ifdef __APPLE__ wchar_t *wstr; wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg)); - if (wstr == NULL) - return NULL; - if (size != NULL) - *size = wcslen(wstr); + if (size != NULL) { + if (wstr != NULL) + *size = wcslen(wstr); + else + *size = (size_t)-1; + } return wstr; #else wchar_t *res; @@ -204,22 +206,25 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos) char *cpath; unicode = PyUnicode_FromWideChar(text, wcslen(text)); - if (unicode == NULL) { - Py_DECREF(unicode); + if (unicode == NULL) return NULL; - } bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape"); Py_DECREF(unicode); if (bytes == NULL) { PyErr_Clear(); + if (error_pos != NULL) + *error_pos = (size_t)-1; return NULL; } len = PyBytes_GET_SIZE(bytes); cpath = PyMem_Malloc(len+1); if (cpath == NULL) { + PyErr_Clear(); Py_DECREF(bytes); + if (error_pos != NULL) + *error_pos = (size_t)-1; return NULL; } memcpy(cpath, PyBytes_AsString(bytes), len + 1); @@ -231,9 +236,6 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos) size_t i, size, converted; wchar_t c, buf[2]; - if (error_pos != NULL) - *error_pos = (size_t)-1; - /* The function works in two steps: 1. compute the length of the output buffer in bytes (size) 2. outputs the bytes */ @@ -280,8 +282,11 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos) size += 1; /* nul byte at the end */ result = PyMem_Malloc(size); - if (result == NULL) + if (result == NULL) { + if (error_pos != NULL) + *error_pos = (size_t)-1; return NULL; + } bytes = result; } return result; -- cgit v1.2.1