1 files changed, 76 insertions, 25 deletions
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 7d08e0726a..5c66ecf0b5 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -1,10 +1,12 @@
 #include "Python.h"
+#include "osdefs.h"
+#include <locale.h>
+
 #ifdef MS_WINDOWS
 #  include <windows.h>
 #endif
 
 #ifdef HAVE_LANGINFO_H
-#include <locale.h>
 #include <langinfo.h>
 #endif
 
@@ -12,6 +14,36 @@
 extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
 #endif
 
+PyObject *
+_Py_device_encoding(int fd)
+{
+#if defined(MS_WINDOWS) || defined(MS_WIN64)
+    UINT cp;
+#endif
+    if (!_PyVerify_fd(fd) || !isatty(fd)) {
+        Py_RETURN_NONE;
+    }
+#if defined(MS_WINDOWS) || defined(MS_WIN64)
+    if (fd == 0)
+        cp = GetConsoleCP();
+    else if (fd == 1 || fd == 2)
+        cp = GetConsoleOutputCP();
+    else
+        cp = 0;
+    /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
+       has no console */
+    if (cp != 0)
+        return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
+#elif defined(CODESET)
+    {
+        char *codeset = nl_langinfo(CODESET);
+        if (codeset != NULL && codeset[0] != 0)
+            return PyUnicode_FromString(codeset);
+    }
+#endif
+    Py_RETURN_NONE;
+}
+
 #if !defined(__APPLE__) && !defined(MS_WINDOWS)
 extern int _Py_normalize_encoding(const char *, char *, size_t);
 
@@ -203,7 +235,9 @@ decode_ascii_surrogateescape(const char *arg, size_t *size)
    Return a pointer to a newly allocated wide character string (use
    PyMem_Free() to free the memory) and write the number of written wide
    characters excluding the null character into *size if size is not NULL, or
-   NULL on error (conversion or memory allocation error).
+   NULL on error (decoding or memory allocation error). If size is not NULL,
+   *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
+   error.
 
    Conversion errors should never happen, unless there is a bug in the C
    library. */
@@ -302,8 +336,9 @@ _Py_char2wchar(const char* arg, size_t *size)
                since we provide everything that we have -
                unless there is a bug in the C library, or I
                misunderstood how mbrtowc works. */
-            fprintf(stderr, "unexpected mbrtowc result -2\n");
             PyMem_Free(res);
+            if (size != NULL)
+                *size = (size_t)-2;
             return NULL;
         }
         if (converted == (size_t)-1) {
@@ -339,7 +374,8 @@ _Py_char2wchar(const char* arg, size_t *size)
 #endif   /* HAVE_MBRTOWC */
     return res;
 oom:
-    fprintf(stderr, "out of memory\n");
+    if (size != NULL)
+        *size = (size_t)-1;
     return NULL;
 #endif   /* __APPLE__ */
 }
@@ -351,10 +387,10 @@ oom:
    This function is the reverse of _Py_char2wchar().
 
    Return a pointer to a newly allocated byte string (use PyMem_Free() to free
-   the memory), or NULL on conversion or memory allocation error.
+   the memory), or NULL on encoding or memory allocation error.
 
    If error_pos is not NULL: *error_pos is the index of the invalid character
-   on conversion error, or (size_t)-1 otherwise. */
+   on encoding error, or (size_t)-1 otherwise. */
 char*
 _Py_wchar2char(const wchar_t *text, size_t *error_pos)
 {
@@ -367,9 +403,7 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos)
     if (unicode == NULL)
         return NULL;
 
-    bytes = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
-                                 PyUnicode_GET_SIZE(unicode),
-                                 "surrogateescape");
+    bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
     Py_DECREF(unicode);
     if (bytes == NULL) {
         PyErr_Clear();
@@ -493,8 +527,8 @@ _Py_wstat(const wchar_t* path, struct stat *buf)
 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
    call stat() otherwise. Only fill st_mode attribute on Windows.
 
-   Return 0 on success, -1 on _wstat() / stat() error or (if PyErr_Occurred())
-   unicode error. */
+   Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
+   raised. */
 
 int
 _Py_stat(PyObject *path, struct stat *statbuf)
@@ -502,8 +536,12 @@ _Py_stat(PyObject *path, struct stat *statbuf)
 #ifdef MS_WINDOWS
     int err;
     struct _stat wstatbuf;
+    wchar_t *wpath;
 
-    err = _wstat(PyUnicode_AS_UNICODE(path), &wstatbuf);
+    wpath = PyUnicode_AsUnicode(path);
+    if (wpath == NULL)
+        return -2;
+    err = _wstat(wpath, &wstatbuf);
     if (!err)
         statbuf->st_mode = wstatbuf.st_mode;
     return err;
@@ -511,7 +549,7 @@ _Py_stat(PyObject *path, struct stat *statbuf)
     int ret;
     PyObject *bytes = PyUnicode_EncodeFSDefault(path);
     if (bytes == NULL)
-        return -1;
+        return -2;
     ret = stat(PyBytes_AS_STRING(bytes), statbuf);
     Py_DECREF(bytes);
     return ret;
@@ -557,18 +595,29 @@ FILE*
 _Py_fopen(PyObject *path, const char *mode)
 {
 #ifdef MS_WINDOWS
+    wchar_t *wpath;
     wchar_t wmode[10];
     int usize;
 
+    if (!PyUnicode_Check(path)) {
+        PyErr_Format(PyExc_TypeError,
+                     "str file path expected under Windows, got %R",
+                     Py_TYPE(path));
+        return NULL;
+    }
+    wpath = PyUnicode_AsUnicode(path);
+    if (wpath == NULL)
+        return NULL;
+
     usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
     if (usize == 0)
         return NULL;
 
-    return _wfopen(PyUnicode_AS_UNICODE(path), wmode);
+    return _wfopen(wpath, wmode);
 #else
     FILE *f;
-    PyObject *bytes = PyUnicode_EncodeFSDefault(path);
-    if (bytes == NULL)
+    PyObject *bytes;
+    if (!PyUnicode_FSConverter(path, &bytes))
         return NULL;
     f = fopen(PyBytes_AS_STRING(bytes), mode);
     Py_DECREF(bytes);
@@ -579,13 +628,13 @@ _Py_fopen(PyObject *path, const char *mode)
 #ifdef HAVE_READLINK
 
 /* Read value of symbolic link. Encode the path to the locale encoding, decode
-   the result from the locale encoding. */
+   the result from the locale encoding. Return -1 on error. */
 
 int
 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
 {
     char *cpath;
-    char cbuf[PATH_MAX];
+    char cbuf[MAXPATHLEN];
     wchar_t *wbuf;
     int res;
     size_t r1;
@@ -595,11 +644,11 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
         errno = EINVAL;
         return -1;
     }
-    res = (int)readlink(cpath, cbuf, PATH_MAX);
+    res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
     PyMem_Free(cpath);
     if (res == -1)
         return -1;
-    if (res == PATH_MAX) {
+    if (res == Py_ARRAY_LENGTH(cbuf)) {
         errno = EINVAL;
         return -1;
     }
@@ -623,14 +672,15 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
 #ifdef HAVE_REALPATH
 
 /* Return the canonicalized absolute pathname. Encode path to the locale
-   encoding, decode the result from the locale encoding. */
+   encoding, decode the result from the locale encoding.
+   Return NULL on error. */
 
 wchar_t*
 _Py_wrealpath(const wchar_t *path,
               wchar_t *resolved_path, size_t resolved_path_size)
 {
     char *cpath;
-    char cresolved_path[PATH_MAX];
+    char cresolved_path[MAXPATHLEN];
     wchar_t *wresolved_path;
     char *res;
     size_t r;
@@ -661,7 +711,8 @@ _Py_wrealpath(const wchar_t *path,
 #endif
 
 /* Get the current directory. size is the buffer size in wide characters
-   including the null character. Decode the path from the locale encoding. */
+   including the null character. Decode the path from the locale encoding.
+   Return NULL on error. */
 
 wchar_t*
 _Py_wgetcwd(wchar_t *buf, size_t size)
@@ -669,11 +720,11 @@ _Py_wgetcwd(wchar_t *buf, size_t size)
 #ifdef MS_WINDOWS
     return _wgetcwd(buf, size);
 #else
-    char fname[PATH_MAX];
+    char fname[MAXPATHLEN];
     wchar_t *wname;
     size_t len;
 
-    if (getcwd(fname, PATH_MAX) == NULL)
+    if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
         return NULL;
     wname = _Py_char2wchar(fname, &len);
     if (wname == NULL)