summaryrefslogtreecommitdiff
path: root/Python
diff options
context:
space:
mode:
authorSteve Dower <steve.dower@microsoft.com>2016-09-08 10:35:16 -0700
committerSteve Dower <steve.dower@microsoft.com>2016-09-08 10:35:16 -0700
commit9c9a45c1f0a264d3006971b8211d948d6883a05b (patch)
tree7ee9acddb557610e2bbc9f2199d1e492282d7496 /Python
parent7b6b990256e32e7e88446ca2859bcee46070e92e (diff)
downloadcpython-9c9a45c1f0a264d3006971b8211d948d6883a05b.tar.gz
Issue #27781: Change file system encoding on Windows to UTF-8 (PEP 529)
Diffstat (limited to 'Python')
-rw-r--r--Python/bltinmodule.c8
-rw-r--r--Python/pylifecycle.c20
-rw-r--r--Python/sysmodule.c50
3 files changed, 73 insertions, 5 deletions
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index be145609dd..252c0a7b89 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -21,16 +21,18 @@
Don't forget to modify PyUnicode_DecodeFSDefault() if you touch any of the
values for Py_FileSystemDefaultEncoding!
*/
-#ifdef HAVE_MBCS
-const char *Py_FileSystemDefaultEncoding = "mbcs";
+#if defined(__APPLE__)
+const char *Py_FileSystemDefaultEncoding = "utf-8";
int Py_HasFileSystemDefaultEncoding = 1;
-#elif defined(__APPLE__)
+#elif defined(MS_WINDOWS)
+/* may be changed by initfsencoding(), but should never be free()d */
const char *Py_FileSystemDefaultEncoding = "utf-8";
int Py_HasFileSystemDefaultEncoding = 1;
#else
const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
int Py_HasFileSystemDefaultEncoding = 0;
#endif
+const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
_Py_IDENTIFIER(__builtins__);
_Py_IDENTIFIER(__dict__);
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 1896888916..3f3b614a47 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -90,6 +90,9 @@ int Py_NoUserSiteDirectory = 0; /* for -s and site.py */
int Py_UnbufferedStdioFlag = 0; /* Unbuffered binary std{in,out,err} */
int Py_HashRandomizationFlag = 0; /* for -R and PYTHONHASHSEED */
int Py_IsolatedFlag = 0; /* for -I, isolate from user's env */
+#ifdef MS_WINDOWS
+int Py_LegacyWindowsFSEncodingFlag = 0; /* Uses mbcs instead of utf-8 */
+#endif
PyThreadState *_Py_Finalizing = NULL;
@@ -321,6 +324,10 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib)
check its value further. */
if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0')
Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p);
+#ifdef MS_WINDOWS
+ if ((p = Py_GETENV("PYTHONLEGACYWINDOWSFSENCODING")) && *p != '\0')
+ Py_LegacyWindowsFSEncodingFlag = add_flag(Py_LegacyWindowsFSEncodingFlag, p);
+#endif
_PyRandom_Init();
@@ -958,6 +965,18 @@ initfsencoding(PyInterpreterState *interp)
{
PyObject *codec;
+#ifdef MS_WINDOWS
+ if (Py_LegacyWindowsFSEncodingFlag)
+ {
+ Py_FileSystemDefaultEncoding = "mbcs";
+ Py_FileSystemDefaultEncodeErrors = "replace";
+ }
+ else
+ {
+ Py_FileSystemDefaultEncoding = "utf-8";
+ Py_FileSystemDefaultEncodeErrors = "surrogatepass";
+ }
+#else
if (Py_FileSystemDefaultEncoding == NULL)
{
Py_FileSystemDefaultEncoding = get_locale_encoding();
@@ -968,6 +987,7 @@ initfsencoding(PyInterpreterState *interp)
interp->fscodec_initialized = 1;
return 0;
}
+#endif
/* the encoding is mbcs, utf-8 or ascii */
codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index a54f266030..0fe76b7a74 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -311,6 +311,23 @@ operating system filenames."
);
static PyObject *
+sys_getfilesystemencodeerrors(PyObject *self)
+{
+ if (Py_FileSystemDefaultEncodeErrors)
+ return PyUnicode_FromString(Py_FileSystemDefaultEncodeErrors);
+ PyErr_SetString(PyExc_RuntimeError,
+ "filesystem encoding is not initialized");
+ return NULL;
+}
+
+PyDoc_STRVAR(getfilesystemencodeerrors_doc,
+ "getfilesystemencodeerrors() -> string\n\
+\n\
+Return the error mode used to convert Unicode filenames in\n\
+operating system filenames."
+);
+
+static PyObject *
sys_intern(PyObject *self, PyObject *args)
{
PyObject *s;
@@ -866,6 +883,24 @@ sys_getwindowsversion(PyObject *self)
#pragma warning(pop)
+PyDoc_STRVAR(enablelegacywindowsfsencoding_doc,
+"_enablelegacywindowsfsencoding()\n\
+\n\
+Changes the default filesystem encoding to mbcs:replace for consistency\n\
+with earlier versions of Python. See PEP 529 for more information.\n\
+\n\
+This is equivalent to defining the PYTHONLEGACYWINDOWSFSENCODING \n\
+environment variable before launching Python."
+);
+
+static PyObject *
+sys_enablelegacywindowsfsencoding(PyObject *self)
+{
+ Py_FileSystemDefaultEncoding = "mbcs";
+ Py_FileSystemDefaultEncodeErrors = "replace";
+ Py_RETURN_NONE;
+}
+
#endif /* MS_WINDOWS */
#ifdef HAVE_DLOPEN
@@ -1225,6 +1260,8 @@ static PyMethodDef sys_methods[] = {
#endif
{"getfilesystemencoding", (PyCFunction)sys_getfilesystemencoding,
METH_NOARGS, getfilesystemencoding_doc},
+ { "getfilesystemencodeerrors", (PyCFunction)sys_getfilesystemencodeerrors,
+ METH_NOARGS, getfilesystemencodeerrors_doc },
#ifdef Py_TRACE_REFS
{"getobjects", _Py_GetObjects, METH_VARARGS},
#endif
@@ -1240,6 +1277,8 @@ static PyMethodDef sys_methods[] = {
#ifdef MS_WINDOWS
{"getwindowsversion", (PyCFunction)sys_getwindowsversion, METH_NOARGS,
getwindowsversion_doc},
+ {"_enablelegacywindowsfsencoding", (PyCFunction)sys_enablelegacywindowsfsencoding,
+ METH_NOARGS, enablelegacywindowsfsencoding_doc },
#endif /* MS_WINDOWS */
{"intern", sys_intern, METH_VARARGS, intern_doc},
{"is_finalizing", sys_is_finalizing, METH_NOARGS, is_finalizing_doc},
@@ -1456,14 +1495,21 @@ version -- the version of this interpreter as a string\n\
version_info -- version information as a named tuple\n\
"
)
-#ifdef MS_WINDOWS
+#ifdef MS_COREDLL
/* concatenating string here */
PyDoc_STR(
"dllhandle -- [Windows only] integer handle of the Python DLL\n\
winver -- [Windows only] version number of the Python DLL\n\
"
)
-#endif /* MS_WINDOWS */
+#endif /* MS_COREDLL */
+#ifdef MS_WINDOWS
+/* concatenating string here */
+PyDoc_STR(
+"_enablelegacywindowsfsencoding -- [Windows only] \n\
+"
+)
+#endif
PyDoc_STR(
"__stdin__ -- the original stdin; don't touch!\n\
__stdout__ -- the original stdout; don't touch!\n\