(Merge 3.2) Issue #16455: On FreeBSD and Solaris, if the locale is C, the

ASCII/surrogateescape codec is now used, instead of the locale encoding, to decode the command line arguments. This change fixes inconsistencies with os.fsencode() and os.fsdecode() because these operating systems announces an ASCII locale encoding, whereas the ISO-8859-1 encoding is used in practice.
author: Victor Stinner <victor.stinner@gmail.com> 2013-01-03 01:21:07 +0100
committer: Victor Stinner <victor.stinner@gmail.com> 2013-01-03 01:21:07 +0100
commit: dc717e129e792c67293e3600b0ef279a255333bb (patch)
tree: f8fc99604c6dab9148866e480510988f9f7bc00b /Python/fileutils.c
parent: fddd1bd42cfdf9043da96f8ff669502d8d36d897 (diff)
parent: 84b680095b570964ec78ae51a10d33bd55b708d9 (diff)
download: cpython-dc717e129e792c67293e3600b0ef279a255333bb.tar.gz
1 files changed, 68 insertions, 18 deletions
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 53e8a470e9..b7c42e8e85 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -1,4 +1,5 @@
 #include "Python.h"
+#include "osdefs.h"
 #ifdef MS_WINDOWS
 #  include <windows.h>
 #endif
@@ -12,6 +13,36 @@
 extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
 #endif
 
+PyObject *
+_Py_device_encoding(int fd)
+{
+#if defined(MS_WINDOWS) || defined(MS_WIN64)
+    UINT cp;
+#endif
+    if (!_PyVerify_fd(fd) || !isatty(fd)) {
+        Py_RETURN_NONE;
+    }
+#if defined(MS_WINDOWS) || defined(MS_WIN64)
+    if (fd == 0)
+        cp = GetConsoleCP();
+    else if (fd == 1 || fd == 2)
+        cp = GetConsoleOutputCP();
+    else
+        cp = 0;
+    /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
+       has no console */
+    if (cp != 0)
+        return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
+#elif defined(CODESET)
+    {
+        char *codeset = nl_langinfo(CODESET);
+        if (codeset != NULL && codeset[0] != 0)
+            return PyUnicode_FromString(codeset);
+    }
+#endif
+    Py_RETURN_NONE;
+}
+
 #if !defined(__APPLE__) && !defined(MS_WINDOWS)
 extern int _Py_normalize_encoding(const char *, char *, size_t);
 
@@ -200,7 +231,9 @@ decode_ascii_surrogateescape(const char *arg, size_t *size)
    Return a pointer to a newly allocated wide character string (use
    PyMem_Free() to free the memory) and write the number of written wide
    characters excluding the null character into *size if size is not NULL, or
-   NULL on error (conversion or memory allocation error).
+   NULL on error (decoding or memory allocation error). If size is not NULL,
+   *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
+   error.
 
    Conversion errors should never happen, unless there is a bug in the C
    library. */
@@ -292,8 +325,9 @@ _Py_char2wchar(const char* arg, size_t *size)
                since we provide everything that we have -
                unless there is a bug in the C library, or I
                misunderstood how mbrtowc works. */
-            fprintf(stderr, "unexpected mbrtowc result -2\n");
             PyMem_Free(res);
+            if (size != NULL)
+                *size = (size_t)-2;
             return NULL;
         }
         if (converted == (size_t)-1) {
@@ -329,7 +363,8 @@ _Py_char2wchar(const char* arg, size_t *size)
 #endif   /* HAVE_MBRTOWC */
     return res;
 oom:
-    fprintf(stderr, "out of memory\n");
+    if (size != NULL)
+        *size = (size_t)-1;
     return NULL;
 #endif   /* __APPLE__ */
 }
@@ -341,10 +376,10 @@ oom:
    This function is the reverse of _Py_char2wchar().
 
    Return a pointer to a newly allocated byte string (use PyMem_Free() to free
-   the memory), or NULL on conversion or memory allocation error.
+   the memory), or NULL on encoding or memory allocation error.
 
    If error_pos is not NULL: *error_pos is the index of the invalid character
-   on conversion error, or (size_t)-1 otherwise. */
+   on encoding error, or (size_t)-1 otherwise. */
 char*
 _Py_wchar2char(const wchar_t *text, size_t *error_pos)
 {
@@ -357,9 +392,7 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos)
     if (unicode == NULL)
         return NULL;
 
-    bytes = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
-                                 PyUnicode_GET_SIZE(unicode),
-                                 "surrogateescape");
+    bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
     Py_DECREF(unicode);
     if (bytes == NULL) {
         PyErr_Clear();
@@ -483,8 +516,8 @@ _Py_wstat(const wchar_t* path, struct stat *buf)
 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
    call stat() otherwise. Only fill st_mode attribute on Windows.
 
-   Return 0 on success, -1 on _wstat() / stat() error or (if PyErr_Occurred())
-   unicode error. */
+   Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
+   raised. */
 
 int
 _Py_stat(PyObject *path, struct stat *statbuf)
@@ -492,8 +525,12 @@ _Py_stat(PyObject *path, struct stat *statbuf)
 #ifdef MS_WINDOWS
     int err;
     struct _stat wstatbuf;
+    wchar_t *wpath;
 
-    err = _wstat(PyUnicode_AS_UNICODE(path), &wstatbuf);
+    wpath = PyUnicode_AsUnicode(path);
+    if (wpath == NULL)
+        return -2;
+    err = _wstat(wpath, &wstatbuf);
     if (!err)
         statbuf->st_mode = wstatbuf.st_mode;
     return err;
@@ -501,7 +538,7 @@ _Py_stat(PyObject *path, struct stat *statbuf)
     int ret;
     PyObject *bytes = PyUnicode_EncodeFSDefault(path);
     if (bytes == NULL)
-        return -1;
+        return -2;
     ret = stat(PyBytes_AS_STRING(bytes), statbuf);
     Py_DECREF(bytes);
     return ret;
@@ -547,18 +584,29 @@ FILE*
 _Py_fopen(PyObject *path, const char *mode)
 {
 #ifdef MS_WINDOWS
+    wchar_t *wpath;
     wchar_t wmode[10];
     int usize;
 
+    if (!PyUnicode_Check(path)) {
+        PyErr_Format(PyExc_TypeError,
+                     "str file path expected under Windows, got %R",
+                     Py_TYPE(path));
+        return NULL;
+    }
+    wpath = PyUnicode_AsUnicode(path);
+    if (wpath == NULL)
+        return NULL;
+
     usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
     if (usize == 0)
         return NULL;
 
-    return _wfopen(PyUnicode_AS_UNICODE(path), wmode);
+    return _wfopen(wpath, wmode);
 #else
     FILE *f;
-    PyObject *bytes = PyUnicode_EncodeFSDefault(path);
-    if (bytes == NULL)
+    PyObject *bytes;
+    if (!PyUnicode_FSConverter(path, &bytes))
         return NULL;
     f = fopen(PyBytes_AS_STRING(bytes), mode);
     Py_DECREF(bytes);
@@ -569,7 +617,7 @@ _Py_fopen(PyObject *path, const char *mode)
 #ifdef HAVE_READLINK
 
 /* Read value of symbolic link. Encode the path to the locale encoding, decode
-   the result from the locale encoding. */
+   the result from the locale encoding. Return -1 on error. */
 
 int
 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
@@ -613,7 +661,8 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
 #ifdef HAVE_REALPATH
 
 /* Return the canonicalized absolute pathname. Encode path to the locale
-   encoding, decode the result from the locale encoding. */
+   encoding, decode the result from the locale encoding.
+   Return NULL on error. */
 
 wchar_t*
 _Py_wrealpath(const wchar_t *path,
@@ -651,7 +700,8 @@ _Py_wrealpath(const wchar_t *path,
 #endif
 
 /* Get the current directory. size is the buffer size in wide characters
-   including the null character. Decode the path from the locale encoding. */
+   including the null character. Decode the path from the locale encoding.
+   Return NULL on error. */
 
 wchar_t*
 _Py_wgetcwd(wchar_t *buf, size_t size)
author	Victor Stinner <victor.stinner@gmail.com>	2013-01-03 01:21:07 +0100
committer	Victor Stinner <victor.stinner@gmail.com>	2013-01-03 01:21:07 +0100
commit	dc717e129e792c67293e3600b0ef279a255333bb (patch)
tree	f8fc99604c6dab9148866e480510988f9f7bc00b /Python/fileutils.c
parent	fddd1bd42cfdf9043da96f8ff669502d8d36d897 (diff)
parent	84b680095b570964ec78ae51a10d33bd55b708d9 (diff)
download	cpython-dc717e129e792c67293e3600b0ef279a255333bb.tar.gz