diff options
-rw-r--r-- | Doc/c-api/unicode.rst | 11 | ||||
-rw-r--r-- | Include/unicodeobject.h | 10 | ||||
-rw-r--r-- | Misc/NEWS | 5 | ||||
-rw-r--r-- | Modules/_io/fileio.c | 3 | ||||
-rw-r--r-- | Modules/_tkinter.c | 4 | ||||
-rw-r--r-- | Modules/grpmodule.c | 3 | ||||
-rw-r--r-- | Modules/pwdmodule.c | 4 | ||||
-rw-r--r-- | Modules/spwdmodule.c | 4 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 16 | ||||
-rw-r--r-- | Python/import.c | 12 |
10 files changed, 46 insertions, 26 deletions
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 6e163d6ec7..4222a054a9 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -396,6 +396,7 @@ used, passsing :func:PyUnicode_FSConverter as the conversion function: Use :func:`PyUnicode_DecodeFSDefaultAndSize` if you know the string length. + .. cfunction:: PyObject* PyUnicode_DecodeFSDefault(const char *s) Decode a string using :cdata:`Py_FileSystemDefaultEncoding` and @@ -404,6 +405,16 @@ used, passsing :func:PyUnicode_FSConverter as the conversion function: If :cdata:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8. +.. cfunction:: PyObject* PyUnicode_EncodeFSDefault(PyObject *unicode) + + Encode a Unicode object to :cdata:`Py_FileSystemDefaultEncoding` with the + ``'surrogateescape'`` error handler, return a :func:`bytes` object. + + If :cdata:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8. + + .. versionadded:: 3.2 + + wchar_t Support """"""""""""""" diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 383187b1cc..ddc9000828 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -1268,6 +1268,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( Py_ssize_t size /* size */ ); +/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the + "surrogateescape" error handler, return a bytes object. + + If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8. +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault( + PyObject *unicode + ); + /* --- Methods & Slots ---------------------------------------------------- These are capable of handling Unicode objects and strings on input @@ -12,6 +12,11 @@ What's New in Python 3.2 Alpha 1? Core and Builtins ----------------- +- Issue #8715: Create PyUnicode_EncodeFSDefault() function: Encode a Unicode + object to Py_FileSystemDefaultEncoding with the "surrogateescape" error + handler, return a bytes object. If Py_FileSystemDefaultEncoding is not set, + fall back to UTF-8. + - Enable shortcuts for common encodings in PyUnicode_AsEncodedString() for any error handler, not only the default error handler (strict) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 6ecce1b520..4f450da636 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -247,8 +247,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) if (u == NULL) return -1; - stringobj = PyUnicode_AsEncodedString( - u, Py_FileSystemDefaultEncoding, "surrogateescape"); + stringobj = PyUnicode_EncodeFSDefault(u); Py_DECREF(u); if (stringobj == NULL) return -1; diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 8552575f40..c7c1530545 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -3147,9 +3147,7 @@ PyInit__tkinter(void) it also helps Tcl find its encodings. */ uexe = PyUnicode_FromWideChar(Py_GetProgramName(), -1); if (uexe) { - cexe = PyUnicode_AsEncodedString(uexe, - Py_FileSystemDefaultEncoding, - NULL); + cexe = PyUnicode_EncodeFSDefault(uexe); if (cexe) Tcl_FindExecutable(PyBytes_AsString(cexe)); Py_XDECREF(cexe); diff --git a/Modules/grpmodule.c b/Modules/grpmodule.c index d10a79d800..d64c142857 100644 --- a/Modules/grpmodule.c +++ b/Modules/grpmodule.c @@ -111,8 +111,7 @@ grp_getgrnam(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "U:getgrnam", &arg)) return NULL; - if ((bytes = PyUnicode_AsEncodedString(arg, Py_FileSystemDefaultEncoding, - "surrogateescape")) == NULL) + if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL) return NULL; if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1) goto out; diff --git a/Modules/pwdmodule.c b/Modules/pwdmodule.c index 35a387ad52..b303f95f31 100644 --- a/Modules/pwdmodule.c +++ b/Modules/pwdmodule.c @@ -132,9 +132,7 @@ pwd_getpwnam(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "U:getpwnam", &arg)) return NULL; - if ((bytes = PyUnicode_AsEncodedString(arg, - Py_FileSystemDefaultEncoding, - "surrogateescape")) == NULL) + if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL) return NULL; if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1) goto out; diff --git a/Modules/spwdmodule.c b/Modules/spwdmodule.c index da452e9ff0..96707b4ada 100644 --- a/Modules/spwdmodule.c +++ b/Modules/spwdmodule.c @@ -118,9 +118,7 @@ static PyObject* spwd_getspnam(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "U:getspnam", &arg)) return NULL; - if ((bytes = PyUnicode_AsEncodedString(arg, - Py_FileSystemDefaultEncoding, - "surrogateescape")) == NULL) + if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL) return NULL; if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1) goto out; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 307027a811..b97621b934 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1461,6 +1461,18 @@ PyObject *PyUnicode_AsEncodedObject(PyObject *unicode, return NULL; } +PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode) +{ + if (Py_FileSystemDefaultEncoding) + return PyUnicode_AsEncodedString(unicode, + Py_FileSystemDefaultEncoding, + "surrogateescape"); + else + return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + "surrogateescape"); +} + PyObject *PyUnicode_AsEncodedString(PyObject *unicode, const char *encoding, const char *errors) @@ -1646,9 +1658,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr) arg = PyUnicode_FromObject(arg); if (!arg) return 0; - output = PyUnicode_AsEncodedObject(arg, - Py_FileSystemDefaultEncoding, - "surrogateescape"); + output = PyUnicode_EncodeFSDefault(arg); Py_DECREF(arg); if (!output) return 0; diff --git a/Python/import.c b/Python/import.c index 923888d5df..d23eb6a941 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1633,8 +1633,7 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf, if (!v) return NULL; if (PyUnicode_Check(v)) { - v = PyUnicode_AsEncodedString(v, - Py_FileSystemDefaultEncoding, NULL); + v = PyUnicode_EncodeFSDefault(v); if (v == NULL) return NULL; } @@ -2752,14 +2751,7 @@ ensure_fromlist(PyObject *mod, PyObject *fromlist, char *buf, Py_ssize_t buflen, char *subname; PyObject *submod; char *p; - if (!Py_FileSystemDefaultEncoding) { - item8 = PyUnicode_EncodeASCII(PyUnicode_AsUnicode(item), - PyUnicode_GetSize(item), - NULL); - } else { - item8 = PyUnicode_AsEncodedString(item, - Py_FileSystemDefaultEncoding, NULL); - } + item8 = PyUnicode_EncodeFSDefault(item); if (!item8) { PyErr_SetString(PyExc_ValueError, "Cannot encode path item"); return 0; |