summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
authorAndreas Klebinger <klebinger.andreas@gmx.at>2017-07-27 18:16:09 +0100
committerTamar Christina <tamar@zhox.com>2017-07-27 21:16:02 +0100
commit7af0b906116e13fbd90f43f2f6c6b826df2dca77 (patch)
tree4da8912ab0408e22b119098dd64260b32e935bd9 /rts
parent791947db6db32ef7d4772a821a0823e558e3c05b (diff)
downloadhaskell-7af0b906116e13fbd90f43f2f6c6b826df2dca77.tar.gz
Initialize hs_init with UTF8 encoded arguments on Windows.
Summary: Get utf8 encoded arguments before we call hs_init and use them instead of ignoring hs_init arguments. This reduces differing behaviour of the RTS between windows and linux and simplifies the code involved. A few testcases were changed to expect the same result on windows as on linux after the changes. This fixes #13940. Test Plan: ./validate Reviewers: austin, hvr, bgamari, erikd, simonmar, Phyx Subscribers: Phyx, rwbarton, thomie GHC Trac Issues: #13940 Differential Revision: https://phabricator.haskell.org/D3739
Diffstat (limited to 'rts')
-rw-r--r--rts/RtsFlags.c127
-rw-r--r--rts/RtsFlags.h5
-rw-r--r--rts/RtsMain.c11
-rw-r--r--rts/RtsStartup.c26
-rw-r--r--rts/RtsSymbols.c2
5 files changed, 120 insertions, 51 deletions
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 7b10d2a67d..80bfa56f73 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -46,12 +46,11 @@ int rts_argc = 0; /* ditto */
char **rts_argv = NULL;
int rts_argv_size = 0;
#if defined(mingw32_HOST_OS)
-// On Windows, we want to use GetCommandLineW rather than argc/argv,
-// but we need to mutate the command line arguments for withProgName and
-// friends. The System.Environment module achieves that using this bit of
-// shared state:
-int win32_prog_argc = 0;
-wchar_t **win32_prog_argv = NULL;
+// On Windows hs_main uses GetCommandLineW to get Unicode arguments and
+// passes them along UTF8 encoded as argv. We store them here in order to
+// free them on exit.
+int win32_full_utf8_argc = 0;
+char** win32_utf8_argv = NULL;
#endif
// The global rtsConfig, set from the RtsConfig supplied by the call
@@ -111,6 +110,9 @@ static void read_trace_flags(const char *arg);
static void errorUsage (void) GNU_ATTRIBUTE(__noreturn__);
+#if defined(mingw32_HOST_OS)
+static char** win32_full_utf8_argv;
+#endif
static char * copyArg (char *arg);
static char ** copyArgv (int argc, char *argv[]);
static void freeArgv (int argc, char *argv[]);
@@ -446,6 +448,66 @@ usage_text[] = {
0
};
+/**
+Note [Windows Unicode Arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+On Windows argv is usually encoded in the current Codepage which might not
+support unicode.
+
+Instead of ignoring the arguments to hs_init we expect them to be utf-8
+encoded when coming from a custom main function. In the regular hs_main we
+get the unicode arguments from the windows API and pass them along utf8
+encoded instead.
+
+This reduces special casing of arguments in later parts of the RTS and base
+libraries to dealing with slash differences and using utf8 instead of the
+current locale on Windows when decoding arguments.
+
+*/
+
+#if defined(mingw32_HOST_OS)
+//Allocate a buffer and return the string utf8 encoded.
+char* lpcwstrToUTF8(const wchar_t* utf16_str)
+{
+ //Check the utf8 encoded size first
+ int res = WideCharToMultiByte(CP_UTF8, 0, utf16_str, -1, NULL, 0,
+ NULL, NULL);
+ if (res == 0) {
+ return NULL;
+ }
+ char* buffer = (char*) stgMallocBytes((size_t)res, "getUTF8Args 2");
+ res = WideCharToMultiByte(CP_UTF8, 0, utf16_str, -1, buffer, res,
+ NULL, NULL);
+ return buffer;
+}
+
+char** getUTF8Args(int* argc)
+{
+ LPCWSTR cmdLine = GetCommandLineW();
+ LPWSTR* argvw = CommandLineToArgvW(cmdLine, argc);
+
+ // We create two argument arrays, one which is later permutated by the RTS
+ // instead of the main argv.
+ // The other one is used to free the allocted memory later.
+ char** argv = (char**) stgMallocBytes(sizeof(char*) * (*argc + 1),
+ "getUTF8Args 1");
+ win32_full_utf8_argv = (char**) stgMallocBytes(sizeof(char*) * (*argc + 1),
+ "getUTF8Args 1");
+
+ for (int i = 0; i < *argc; i++)
+ {
+ argv[i] = lpcwstrToUTF8(argvw[i]);
+ }
+ argv[*argc] = NULL;
+ memcpy(win32_full_utf8_argv, argv, sizeof(char*) * (*argc + 1));
+
+ LocalFree(argvw);
+ win32_utf8_argv = argv;
+ win32_full_utf8_argc = *argc;
+ return argv;
+}
+#endif
+
STATIC_INLINE bool strequal(const char *a, const char * b)
{
return(strcmp(a, b) == 0);
@@ -514,12 +576,8 @@ static void errorRtsOptsDisabled(const char *s)
- rtsConfig (global) contains the supplied RtsConfig
- On Windows getArgs ignores argv and instead takes the arguments directly
- from the WinAPI and removes any which would have been parsed by the RTS.
-
- If the handling of which arguments are passed to the Haskell side changes
- these changes have to be synchronized with getArgs in base. See #13287 and
- Note [Ignore hs_init argv] in System.Environment.
+ On Windows argv is assumed to be utf8 encoded for unicode compatibility.
+ See Note [Windows Unicode Arguments]
-------------------------------------------------------------------------- */
@@ -579,6 +637,7 @@ void setupRtsFlags (int *argc, char *argv[], RtsConfig rts_config)
for (mode = PGM; arg < total_arg; arg++) {
// The '--RTS' argument disables all future +RTS ... -RTS processing.
if (strequal("--RTS", argv[arg])) {
+
arg++;
break;
}
@@ -2040,48 +2099,18 @@ void freeWin32ProgArgv (void);
void
freeWin32ProgArgv (void)
{
- int i;
-
- if (win32_prog_argv != NULL) {
- for (i = 0; i < win32_prog_argc; i++) {
- stgFree(win32_prog_argv[i]);
- }
- stgFree(win32_prog_argv);
+ if(win32_utf8_argv == NULL) {
+ return;
+ }
+ else
+ {
+ freeArgv(win32_full_utf8_argc, win32_full_utf8_argv);
+ stgFree(win32_utf8_argv);
}
- win32_prog_argc = 0;
- win32_prog_argv = NULL;
-}
-void
-getWin32ProgArgv(int *argc, wchar_t **argv[])
-{
- *argc = win32_prog_argc;
- *argv = win32_prog_argv;
}
-void
-setWin32ProgArgv(int argc, wchar_t *argv[])
-{
- int i;
-
- freeWin32ProgArgv();
-
- win32_prog_argc = argc;
- if (argv == NULL) {
- win32_prog_argv = NULL;
- return;
- }
-
- win32_prog_argv = stgCallocBytes(argc + 1, sizeof (wchar_t *),
- "setWin32ProgArgv 1");
- for (i = 0; i < argc; i++) {
- win32_prog_argv[i] = stgMallocBytes((wcslen(argv[i]) + 1) * sizeof(wchar_t),
- "setWin32ProgArgv 2");
- wcscpy(win32_prog_argv[i], argv[i]);
- }
- win32_prog_argv[argc] = NULL;
-}
#endif
/* ----------------------------------------------------------------------------
diff --git a/rts/RtsFlags.h b/rts/RtsFlags.h
index 71ad219d29..c36c64a63b 100644
--- a/rts/RtsFlags.h
+++ b/rts/RtsFlags.h
@@ -13,6 +13,11 @@
/* Routines that operate-on/to-do-with RTS flags: */
+#if defined(mingw32_HOST_OS)
+//The returned buffer has to be freed with stgFree()
+char* lpcwstrToUTF8(const wchar_t* utf16_str);
+char** getUTF8Args(int* argc);
+#endif
void initRtsFlagsDefaults (void);
void setupRtsFlags (int *argc, char *argv[], RtsConfig rtsConfig);
void freeRtsArgs (void);
diff --git a/rts/RtsMain.c b/rts/RtsMain.c
index d9f05576a0..57c38742b6 100644
--- a/rts/RtsMain.c
+++ b/rts/RtsMain.c
@@ -13,6 +13,7 @@
#include "RtsAPI.h"
#include "RtsUtils.h"
+#include "RtsFlags.h"
#include "Prelude.h"
#include "Task.h"
#include "Excn.h"
@@ -48,6 +49,16 @@ int hs_main ( int argc, char *argv[], // program args
int exit_status;
SchedulerStatus status;
+ // See Note: [Windows Unicode Arguments] in rts/RtsFlags.c
+ #if defined(mingw32_HOST_OS)
+ {
+ argv = getUTF8Args(&argc);
+ }
+ #endif
+
+
+
+
hs_init_ghc(&argc, &argv, rts_config);
// kick off the computation by creating the main thread with a pointer
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
index 71a842d0a9..e4ca6b906d 100644
--- a/rts/RtsStartup.c
+++ b/rts/RtsStartup.c
@@ -179,7 +179,33 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
if (argc == NULL || argv == NULL) {
// Use a default for argc & argv if either is not supplied
int my_argc = 1;
+ #if defined(mingw32_HOST_OS)
+ //Retry larger buffer sizes on error up to about the NTFS length limit.
+ wchar_t* pathBuf;
+ char *my_argv[2] = { NULL, NULL };
+ for(DWORD maxLength = MAX_PATH; maxLength <= 33280; maxLength *= 2)
+ {
+ pathBuf = (wchar_t*) stgMallocBytes(sizeof(wchar_t) * maxLength,
+ "hs_init_ghc: GetModuleFileName");
+ DWORD pathLength = GetModuleFileNameW(NULL, pathBuf, maxLength);
+ if(GetLastError() == ERROR_INSUFFICIENT_BUFFER || pathLength == 0) {
+ stgFree(pathBuf);
+ pathBuf = NULL;
+ } else {
+ break;
+ }
+ }
+ if(pathBuf == NULL) {
+ my_argv[0] = "<unknown>";
+ } else {
+ my_argv[0] = lpcwstrToUTF8(pathBuf);
+ stgFree(pathBuf);
+ }
+
+
+ #else
char *my_argv[] = { "<unknown>", NULL };
+ #endif
setFullProgArgv(my_argc,my_argv);
setupRtsFlags(&my_argc, my_argv, rts_config);
} else {
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index 11b1437f77..e80a4955f0 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -97,8 +97,6 @@
SymI_HasProto(stg_asyncReadzh) \
SymI_HasProto(stg_asyncWritezh) \
SymI_HasProto(stg_asyncDoProczh) \
- SymI_HasProto(getWin32ProgArgv) \
- SymI_HasProto(setWin32ProgArgv) \
SymI_HasProto(rts_InstallConsoleEvent) \
SymI_HasProto(rts_ConsoleHandlerDone) \
SymI_HasProto(atexit) \