diff options
author | Andreas Klebinger <klebinger.andreas@gmx.at> | 2017-07-27 18:16:09 +0100 |
---|---|---|
committer | Tamar Christina <tamar@zhox.com> | 2017-07-27 21:16:02 +0100 |
commit | 7af0b906116e13fbd90f43f2f6c6b826df2dca77 (patch) | |
tree | 4da8912ab0408e22b119098dd64260b32e935bd9 /rts | |
parent | 791947db6db32ef7d4772a821a0823e558e3c05b (diff) | |
download | haskell-7af0b906116e13fbd90f43f2f6c6b826df2dca77.tar.gz |
Initialize hs_init with UTF8 encoded arguments on Windows.
Summary:
Get utf8 encoded arguments before we call hs_init and use them
instead of ignoring hs_init arguments. This reduces differing
behaviour of the RTS between windows and linux and simplifies
the code involved.
A few testcases were changed to expect the same result on windows
as on linux after the changes.
This fixes #13940.
Test Plan: ./validate
Reviewers: austin, hvr, bgamari, erikd, simonmar, Phyx
Subscribers: Phyx, rwbarton, thomie
GHC Trac Issues: #13940
Differential Revision: https://phabricator.haskell.org/D3739
Diffstat (limited to 'rts')
-rw-r--r-- | rts/RtsFlags.c | 127 | ||||
-rw-r--r-- | rts/RtsFlags.h | 5 | ||||
-rw-r--r-- | rts/RtsMain.c | 11 | ||||
-rw-r--r-- | rts/RtsStartup.c | 26 | ||||
-rw-r--r-- | rts/RtsSymbols.c | 2 |
5 files changed, 120 insertions, 51 deletions
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index 7b10d2a67d..80bfa56f73 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -46,12 +46,11 @@ int rts_argc = 0; /* ditto */ char **rts_argv = NULL; int rts_argv_size = 0; #if defined(mingw32_HOST_OS) -// On Windows, we want to use GetCommandLineW rather than argc/argv, -// but we need to mutate the command line arguments for withProgName and -// friends. The System.Environment module achieves that using this bit of -// shared state: -int win32_prog_argc = 0; -wchar_t **win32_prog_argv = NULL; +// On Windows hs_main uses GetCommandLineW to get Unicode arguments and +// passes them along UTF8 encoded as argv. We store them here in order to +// free them on exit. +int win32_full_utf8_argc = 0; +char** win32_utf8_argv = NULL; #endif // The global rtsConfig, set from the RtsConfig supplied by the call @@ -111,6 +110,9 @@ static void read_trace_flags(const char *arg); static void errorUsage (void) GNU_ATTRIBUTE(__noreturn__); +#if defined(mingw32_HOST_OS) +static char** win32_full_utf8_argv; +#endif static char * copyArg (char *arg); static char ** copyArgv (int argc, char *argv[]); static void freeArgv (int argc, char *argv[]); @@ -446,6 +448,66 @@ usage_text[] = { 0 }; +/** +Note [Windows Unicode Arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +On Windows argv is usually encoded in the current Codepage which might not +support unicode. + +Instead of ignoring the arguments to hs_init we expect them to be utf-8 +encoded when coming from a custom main function. In the regular hs_main we +get the unicode arguments from the windows API and pass them along utf8 +encoded instead. + +This reduces special casing of arguments in later parts of the RTS and base +libraries to dealing with slash differences and using utf8 instead of the +current locale on Windows when decoding arguments. + +*/ + +#if defined(mingw32_HOST_OS) +//Allocate a buffer and return the string utf8 encoded. +char* lpcwstrToUTF8(const wchar_t* utf16_str) +{ + //Check the utf8 encoded size first + int res = WideCharToMultiByte(CP_UTF8, 0, utf16_str, -1, NULL, 0, + NULL, NULL); + if (res == 0) { + return NULL; + } + char* buffer = (char*) stgMallocBytes((size_t)res, "getUTF8Args 2"); + res = WideCharToMultiByte(CP_UTF8, 0, utf16_str, -1, buffer, res, + NULL, NULL); + return buffer; +} + +char** getUTF8Args(int* argc) +{ + LPCWSTR cmdLine = GetCommandLineW(); + LPWSTR* argvw = CommandLineToArgvW(cmdLine, argc); + + // We create two argument arrays, one which is later permutated by the RTS + // instead of the main argv. + // The other one is used to free the allocted memory later. + char** argv = (char**) stgMallocBytes(sizeof(char*) * (*argc + 1), + "getUTF8Args 1"); + win32_full_utf8_argv = (char**) stgMallocBytes(sizeof(char*) * (*argc + 1), + "getUTF8Args 1"); + + for (int i = 0; i < *argc; i++) + { + argv[i] = lpcwstrToUTF8(argvw[i]); + } + argv[*argc] = NULL; + memcpy(win32_full_utf8_argv, argv, sizeof(char*) * (*argc + 1)); + + LocalFree(argvw); + win32_utf8_argv = argv; + win32_full_utf8_argc = *argc; + return argv; +} +#endif + STATIC_INLINE bool strequal(const char *a, const char * b) { return(strcmp(a, b) == 0); @@ -514,12 +576,8 @@ static void errorRtsOptsDisabled(const char *s) - rtsConfig (global) contains the supplied RtsConfig - On Windows getArgs ignores argv and instead takes the arguments directly - from the WinAPI and removes any which would have been parsed by the RTS. - - If the handling of which arguments are passed to the Haskell side changes - these changes have to be synchronized with getArgs in base. See #13287 and - Note [Ignore hs_init argv] in System.Environment. + On Windows argv is assumed to be utf8 encoded for unicode compatibility. + See Note [Windows Unicode Arguments] -------------------------------------------------------------------------- */ @@ -579,6 +637,7 @@ void setupRtsFlags (int *argc, char *argv[], RtsConfig rts_config) for (mode = PGM; arg < total_arg; arg++) { // The '--RTS' argument disables all future +RTS ... -RTS processing. if (strequal("--RTS", argv[arg])) { + arg++; break; } @@ -2040,48 +2099,18 @@ void freeWin32ProgArgv (void); void freeWin32ProgArgv (void) { - int i; - - if (win32_prog_argv != NULL) { - for (i = 0; i < win32_prog_argc; i++) { - stgFree(win32_prog_argv[i]); - } - stgFree(win32_prog_argv); + if(win32_utf8_argv == NULL) { + return; + } + else + { + freeArgv(win32_full_utf8_argc, win32_full_utf8_argv); + stgFree(win32_utf8_argv); } - win32_prog_argc = 0; - win32_prog_argv = NULL; -} -void -getWin32ProgArgv(int *argc, wchar_t **argv[]) -{ - *argc = win32_prog_argc; - *argv = win32_prog_argv; } -void -setWin32ProgArgv(int argc, wchar_t *argv[]) -{ - int i; - - freeWin32ProgArgv(); - - win32_prog_argc = argc; - if (argv == NULL) { - win32_prog_argv = NULL; - return; - } - - win32_prog_argv = stgCallocBytes(argc + 1, sizeof (wchar_t *), - "setWin32ProgArgv 1"); - for (i = 0; i < argc; i++) { - win32_prog_argv[i] = stgMallocBytes((wcslen(argv[i]) + 1) * sizeof(wchar_t), - "setWin32ProgArgv 2"); - wcscpy(win32_prog_argv[i], argv[i]); - } - win32_prog_argv[argc] = NULL; -} #endif /* ---------------------------------------------------------------------------- diff --git a/rts/RtsFlags.h b/rts/RtsFlags.h index 71ad219d29..c36c64a63b 100644 --- a/rts/RtsFlags.h +++ b/rts/RtsFlags.h @@ -13,6 +13,11 @@ /* Routines that operate-on/to-do-with RTS flags: */ +#if defined(mingw32_HOST_OS) +//The returned buffer has to be freed with stgFree() +char* lpcwstrToUTF8(const wchar_t* utf16_str); +char** getUTF8Args(int* argc); +#endif void initRtsFlagsDefaults (void); void setupRtsFlags (int *argc, char *argv[], RtsConfig rtsConfig); void freeRtsArgs (void); diff --git a/rts/RtsMain.c b/rts/RtsMain.c index d9f05576a0..57c38742b6 100644 --- a/rts/RtsMain.c +++ b/rts/RtsMain.c @@ -13,6 +13,7 @@ #include "RtsAPI.h" #include "RtsUtils.h" +#include "RtsFlags.h" #include "Prelude.h" #include "Task.h" #include "Excn.h" @@ -48,6 +49,16 @@ int hs_main ( int argc, char *argv[], // program args int exit_status; SchedulerStatus status; + // See Note: [Windows Unicode Arguments] in rts/RtsFlags.c + #if defined(mingw32_HOST_OS) + { + argv = getUTF8Args(&argc); + } + #endif + + + + hs_init_ghc(&argc, &argv, rts_config); // kick off the computation by creating the main thread with a pointer diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c index 71a842d0a9..e4ca6b906d 100644 --- a/rts/RtsStartup.c +++ b/rts/RtsStartup.c @@ -179,7 +179,33 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config) if (argc == NULL || argv == NULL) { // Use a default for argc & argv if either is not supplied int my_argc = 1; + #if defined(mingw32_HOST_OS) + //Retry larger buffer sizes on error up to about the NTFS length limit. + wchar_t* pathBuf; + char *my_argv[2] = { NULL, NULL }; + for(DWORD maxLength = MAX_PATH; maxLength <= 33280; maxLength *= 2) + { + pathBuf = (wchar_t*) stgMallocBytes(sizeof(wchar_t) * maxLength, + "hs_init_ghc: GetModuleFileName"); + DWORD pathLength = GetModuleFileNameW(NULL, pathBuf, maxLength); + if(GetLastError() == ERROR_INSUFFICIENT_BUFFER || pathLength == 0) { + stgFree(pathBuf); + pathBuf = NULL; + } else { + break; + } + } + if(pathBuf == NULL) { + my_argv[0] = "<unknown>"; + } else { + my_argv[0] = lpcwstrToUTF8(pathBuf); + stgFree(pathBuf); + } + + + #else char *my_argv[] = { "<unknown>", NULL }; + #endif setFullProgArgv(my_argc,my_argv); setupRtsFlags(&my_argc, my_argv, rts_config); } else { diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index 11b1437f77..e80a4955f0 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -97,8 +97,6 @@ SymI_HasProto(stg_asyncReadzh) \ SymI_HasProto(stg_asyncWritezh) \ SymI_HasProto(stg_asyncDoProczh) \ - SymI_HasProto(getWin32ProgArgv) \ - SymI_HasProto(setWin32ProgArgv) \ SymI_HasProto(rts_InstallConsoleEvent) \ SymI_HasProto(rts_ConsoleHandlerDone) \ SymI_HasProto(atexit) \ |