summaryrefslogtreecommitdiff
path: root/rts/RtsFlags.c
diff options
context:
space:
mode:
authorAndreas Klebinger <klebinger.andreas@gmx.at>2017-07-27 18:16:09 +0100
committerTamar Christina <tamar@zhox.com>2017-07-27 21:16:02 +0100
commit7af0b906116e13fbd90f43f2f6c6b826df2dca77 (patch)
tree4da8912ab0408e22b119098dd64260b32e935bd9 /rts/RtsFlags.c
parent791947db6db32ef7d4772a821a0823e558e3c05b (diff)
downloadhaskell-7af0b906116e13fbd90f43f2f6c6b826df2dca77.tar.gz
Initialize hs_init with UTF8 encoded arguments on Windows.
Summary: Get utf8 encoded arguments before we call hs_init and use them instead of ignoring hs_init arguments. This reduces differing behaviour of the RTS between windows and linux and simplifies the code involved. A few testcases were changed to expect the same result on windows as on linux after the changes. This fixes #13940. Test Plan: ./validate Reviewers: austin, hvr, bgamari, erikd, simonmar, Phyx Subscribers: Phyx, rwbarton, thomie GHC Trac Issues: #13940 Differential Revision: https://phabricator.haskell.org/D3739
Diffstat (limited to 'rts/RtsFlags.c')
-rw-r--r--rts/RtsFlags.c127
1 files changed, 78 insertions, 49 deletions
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 7b10d2a67d..80bfa56f73 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -46,12 +46,11 @@ int rts_argc = 0; /* ditto */
char **rts_argv = NULL;
int rts_argv_size = 0;
#if defined(mingw32_HOST_OS)
-// On Windows, we want to use GetCommandLineW rather than argc/argv,
-// but we need to mutate the command line arguments for withProgName and
-// friends. The System.Environment module achieves that using this bit of
-// shared state:
-int win32_prog_argc = 0;
-wchar_t **win32_prog_argv = NULL;
+// On Windows hs_main uses GetCommandLineW to get Unicode arguments and
+// passes them along UTF8 encoded as argv. We store them here in order to
+// free them on exit.
+int win32_full_utf8_argc = 0;
+char** win32_utf8_argv = NULL;
#endif
// The global rtsConfig, set from the RtsConfig supplied by the call
@@ -111,6 +110,9 @@ static void read_trace_flags(const char *arg);
static void errorUsage (void) GNU_ATTRIBUTE(__noreturn__);
+#if defined(mingw32_HOST_OS)
+static char** win32_full_utf8_argv;
+#endif
static char * copyArg (char *arg);
static char ** copyArgv (int argc, char *argv[]);
static void freeArgv (int argc, char *argv[]);
@@ -446,6 +448,66 @@ usage_text[] = {
0
};
+/**
+Note [Windows Unicode Arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+On Windows argv is usually encoded in the current Codepage which might not
+support unicode.
+
+Instead of ignoring the arguments to hs_init we expect them to be utf-8
+encoded when coming from a custom main function. In the regular hs_main we
+get the unicode arguments from the windows API and pass them along utf8
+encoded instead.
+
+This reduces special casing of arguments in later parts of the RTS and base
+libraries to dealing with slash differences and using utf8 instead of the
+current locale on Windows when decoding arguments.
+
+*/
+
+#if defined(mingw32_HOST_OS)
+//Allocate a buffer and return the string utf8 encoded.
+char* lpcwstrToUTF8(const wchar_t* utf16_str)
+{
+ //Check the utf8 encoded size first
+ int res = WideCharToMultiByte(CP_UTF8, 0, utf16_str, -1, NULL, 0,
+ NULL, NULL);
+ if (res == 0) {
+ return NULL;
+ }
+ char* buffer = (char*) stgMallocBytes((size_t)res, "getUTF8Args 2");
+ res = WideCharToMultiByte(CP_UTF8, 0, utf16_str, -1, buffer, res,
+ NULL, NULL);
+ return buffer;
+}
+
+char** getUTF8Args(int* argc)
+{
+ LPCWSTR cmdLine = GetCommandLineW();
+ LPWSTR* argvw = CommandLineToArgvW(cmdLine, argc);
+
+ // We create two argument arrays, one which is later permutated by the RTS
+ // instead of the main argv.
+ // The other one is used to free the allocted memory later.
+ char** argv = (char**) stgMallocBytes(sizeof(char*) * (*argc + 1),
+ "getUTF8Args 1");
+ win32_full_utf8_argv = (char**) stgMallocBytes(sizeof(char*) * (*argc + 1),
+ "getUTF8Args 1");
+
+ for (int i = 0; i < *argc; i++)
+ {
+ argv[i] = lpcwstrToUTF8(argvw[i]);
+ }
+ argv[*argc] = NULL;
+ memcpy(win32_full_utf8_argv, argv, sizeof(char*) * (*argc + 1));
+
+ LocalFree(argvw);
+ win32_utf8_argv = argv;
+ win32_full_utf8_argc = *argc;
+ return argv;
+}
+#endif
+
STATIC_INLINE bool strequal(const char *a, const char * b)
{
return(strcmp(a, b) == 0);
@@ -514,12 +576,8 @@ static void errorRtsOptsDisabled(const char *s)
- rtsConfig (global) contains the supplied RtsConfig
- On Windows getArgs ignores argv and instead takes the arguments directly
- from the WinAPI and removes any which would have been parsed by the RTS.
-
- If the handling of which arguments are passed to the Haskell side changes
- these changes have to be synchronized with getArgs in base. See #13287 and
- Note [Ignore hs_init argv] in System.Environment.
+ On Windows argv is assumed to be utf8 encoded for unicode compatibility.
+ See Note [Windows Unicode Arguments]
-------------------------------------------------------------------------- */
@@ -579,6 +637,7 @@ void setupRtsFlags (int *argc, char *argv[], RtsConfig rts_config)
for (mode = PGM; arg < total_arg; arg++) {
// The '--RTS' argument disables all future +RTS ... -RTS processing.
if (strequal("--RTS", argv[arg])) {
+
arg++;
break;
}
@@ -2040,48 +2099,18 @@ void freeWin32ProgArgv (void);
void
freeWin32ProgArgv (void)
{
- int i;
-
- if (win32_prog_argv != NULL) {
- for (i = 0; i < win32_prog_argc; i++) {
- stgFree(win32_prog_argv[i]);
- }
- stgFree(win32_prog_argv);
+ if(win32_utf8_argv == NULL) {
+ return;
+ }
+ else
+ {
+ freeArgv(win32_full_utf8_argc, win32_full_utf8_argv);
+ stgFree(win32_utf8_argv);
}
- win32_prog_argc = 0;
- win32_prog_argv = NULL;
-}
-void
-getWin32ProgArgv(int *argc, wchar_t **argv[])
-{
- *argc = win32_prog_argc;
- *argv = win32_prog_argv;
}
-void
-setWin32ProgArgv(int argc, wchar_t *argv[])
-{
- int i;
-
- freeWin32ProgArgv();
-
- win32_prog_argc = argc;
- if (argv == NULL) {
- win32_prog_argv = NULL;
- return;
- }
-
- win32_prog_argv = stgCallocBytes(argc + 1, sizeof (wchar_t *),
- "setWin32ProgArgv 1");
- for (i = 0; i < argc; i++) {
- win32_prog_argv[i] = stgMallocBytes((wcslen(argv[i]) + 1) * sizeof(wchar_t),
- "setWin32ProgArgv 2");
- wcscpy(win32_prog_argv[i], argv[i]);
- }
- win32_prog_argv[argc] = NULL;
-}
#endif
/* ----------------------------------------------------------------------------