diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2013-01-03 01:21:07 +0100 |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2013-01-03 01:21:07 +0100 |
commit | dc717e129e792c67293e3600b0ef279a255333bb (patch) | |
tree | f8fc99604c6dab9148866e480510988f9f7bc00b /Python/fileutils.c | |
parent | fddd1bd42cfdf9043da96f8ff669502d8d36d897 (diff) | |
parent | 84b680095b570964ec78ae51a10d33bd55b708d9 (diff) | |
download | cpython-dc717e129e792c67293e3600b0ef279a255333bb.tar.gz |
(Merge 3.2) Issue #16455: On FreeBSD and Solaris, if the locale is C, the
ASCII/surrogateescape codec is now used, instead of the locale encoding, to
decode the command line arguments. This change fixes inconsistencies with
os.fsencode() and os.fsdecode() because these operating systems announces an
ASCII locale encoding, whereas the ISO-8859-1 encoding is used in practice.
Diffstat (limited to 'Python/fileutils.c')
-rw-r--r-- | Python/fileutils.c | 86 |
1 files changed, 68 insertions, 18 deletions
diff --git a/Python/fileutils.c b/Python/fileutils.c index 53e8a470e9..b7c42e8e85 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1,4 +1,5 @@ #include "Python.h" +#include "osdefs.h" #ifdef MS_WINDOWS # include <windows.h> #endif @@ -12,6 +13,36 @@ extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size); #endif +PyObject * +_Py_device_encoding(int fd) +{ +#if defined(MS_WINDOWS) || defined(MS_WIN64) + UINT cp; +#endif + if (!_PyVerify_fd(fd) || !isatty(fd)) { + Py_RETURN_NONE; + } +#if defined(MS_WINDOWS) || defined(MS_WIN64) + if (fd == 0) + cp = GetConsoleCP(); + else if (fd == 1 || fd == 2) + cp = GetConsoleOutputCP(); + else + cp = 0; + /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application + has no console */ + if (cp != 0) + return PyUnicode_FromFormat("cp%u", (unsigned int)cp); +#elif defined(CODESET) + { + char *codeset = nl_langinfo(CODESET); + if (codeset != NULL && codeset[0] != 0) + return PyUnicode_FromString(codeset); + } +#endif + Py_RETURN_NONE; +} + #if !defined(__APPLE__) && !defined(MS_WINDOWS) extern int _Py_normalize_encoding(const char *, char *, size_t); @@ -200,7 +231,9 @@ decode_ascii_surrogateescape(const char *arg, size_t *size) Return a pointer to a newly allocated wide character string (use PyMem_Free() to free the memory) and write the number of written wide characters excluding the null character into *size if size is not NULL, or - NULL on error (conversion or memory allocation error). + NULL on error (decoding or memory allocation error). If size is not NULL, + *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding + error. Conversion errors should never happen, unless there is a bug in the C library. */ @@ -292,8 +325,9 @@ _Py_char2wchar(const char* arg, size_t *size) since we provide everything that we have - unless there is a bug in the C library, or I misunderstood how mbrtowc works. */ - fprintf(stderr, "unexpected mbrtowc result -2\n"); PyMem_Free(res); + if (size != NULL) + *size = (size_t)-2; return NULL; } if (converted == (size_t)-1) { @@ -329,7 +363,8 @@ _Py_char2wchar(const char* arg, size_t *size) #endif /* HAVE_MBRTOWC */ return res; oom: - fprintf(stderr, "out of memory\n"); + if (size != NULL) + *size = (size_t)-1; return NULL; #endif /* __APPLE__ */ } @@ -341,10 +376,10 @@ oom: This function is the reverse of _Py_char2wchar(). Return a pointer to a newly allocated byte string (use PyMem_Free() to free - the memory), or NULL on conversion or memory allocation error. + the memory), or NULL on encoding or memory allocation error. If error_pos is not NULL: *error_pos is the index of the invalid character - on conversion error, or (size_t)-1 otherwise. */ + on encoding error, or (size_t)-1 otherwise. */ char* _Py_wchar2char(const wchar_t *text, size_t *error_pos) { @@ -357,9 +392,7 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos) if (unicode == NULL) return NULL; - bytes = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), - "surrogateescape"); + bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape"); Py_DECREF(unicode); if (bytes == NULL) { PyErr_Clear(); @@ -483,8 +516,8 @@ _Py_wstat(const wchar_t* path, struct stat *buf) /* Call _wstat() on Windows, or encode the path to the filesystem encoding and call stat() otherwise. Only fill st_mode attribute on Windows. - Return 0 on success, -1 on _wstat() / stat() error or (if PyErr_Occurred()) - unicode error. */ + Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was + raised. */ int _Py_stat(PyObject *path, struct stat *statbuf) @@ -492,8 +525,12 @@ _Py_stat(PyObject *path, struct stat *statbuf) #ifdef MS_WINDOWS int err; struct _stat wstatbuf; + wchar_t *wpath; - err = _wstat(PyUnicode_AS_UNICODE(path), &wstatbuf); + wpath = PyUnicode_AsUnicode(path); + if (wpath == NULL) + return -2; + err = _wstat(wpath, &wstatbuf); if (!err) statbuf->st_mode = wstatbuf.st_mode; return err; @@ -501,7 +538,7 @@ _Py_stat(PyObject *path, struct stat *statbuf) int ret; PyObject *bytes = PyUnicode_EncodeFSDefault(path); if (bytes == NULL) - return -1; + return -2; ret = stat(PyBytes_AS_STRING(bytes), statbuf); Py_DECREF(bytes); return ret; @@ -547,18 +584,29 @@ FILE* _Py_fopen(PyObject *path, const char *mode) { #ifdef MS_WINDOWS + wchar_t *wpath; wchar_t wmode[10]; int usize; + if (!PyUnicode_Check(path)) { + PyErr_Format(PyExc_TypeError, + "str file path expected under Windows, got %R", + Py_TYPE(path)); + return NULL; + } + wpath = PyUnicode_AsUnicode(path); + if (wpath == NULL) + return NULL; + usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode)); if (usize == 0) return NULL; - return _wfopen(PyUnicode_AS_UNICODE(path), wmode); + return _wfopen(wpath, wmode); #else FILE *f; - PyObject *bytes = PyUnicode_EncodeFSDefault(path); - if (bytes == NULL) + PyObject *bytes; + if (!PyUnicode_FSConverter(path, &bytes)) return NULL; f = fopen(PyBytes_AS_STRING(bytes), mode); Py_DECREF(bytes); @@ -569,7 +617,7 @@ _Py_fopen(PyObject *path, const char *mode) #ifdef HAVE_READLINK /* Read value of symbolic link. Encode the path to the locale encoding, decode - the result from the locale encoding. */ + the result from the locale encoding. Return -1 on error. */ int _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz) @@ -613,7 +661,8 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz) #ifdef HAVE_REALPATH /* Return the canonicalized absolute pathname. Encode path to the locale - encoding, decode the result from the locale encoding. */ + encoding, decode the result from the locale encoding. + Return NULL on error. */ wchar_t* _Py_wrealpath(const wchar_t *path, @@ -651,7 +700,8 @@ _Py_wrealpath(const wchar_t *path, #endif /* Get the current directory. size is the buffer size in wide characters - including the null character. Decode the path from the locale encoding. */ + including the null character. Decode the path from the locale encoding. + Return NULL on error. */ wchar_t* _Py_wgetcwd(wchar_t *buf, size_t size) |