diff options
-rw-r--r-- | docs/users_guide/ffi-chap.rst | 6 | ||||
-rw-r--r-- | includes/Rts.h | 6 | ||||
-rw-r--r-- | libraries/base/GHC/Environment.hs | 45 | ||||
-rw-r--r-- | libraries/base/GHC/IO/Encoding.hs | 12 | ||||
-rw-r--r-- | libraries/base/System/Environment.hs | 92 | ||||
-rw-r--r-- | rts/RtsFlags.c | 127 | ||||
-rw-r--r-- | rts/RtsFlags.h | 5 | ||||
-rw-r--r-- | rts/RtsMain.c | 11 | ||||
-rw-r--r-- | rts/RtsStartup.c | 26 | ||||
-rw-r--r-- | rts/RtsSymbols.c | 2 | ||||
-rw-r--r-- | testsuite/tests/ghci.debugger/scripts/all.T | 3 | ||||
-rw-r--r-- | testsuite/tests/rts/T6006.stdout-mingw32 | 2 |
12 files changed, 155 insertions, 182 deletions
diff --git a/docs/users_guide/ffi-chap.rst b/docs/users_guide/ffi-chap.rst index 311146c4d9..320a3a6e70 100644 --- a/docs/users_guide/ffi-chap.rst +++ b/docs/users_guide/ffi-chap.rst @@ -337,6 +337,12 @@ reliably re-initialise after this has happened; see :ref:`infelicities-ffi`. don't forget the flag :ghc-flag:`-no-hs-main`, otherwise GHC will try to link to the ``Main`` Haskell module. +.. note:: + On Windows hs_init treats argv as UTF8-encoded. Passing other encodings + might lead to unexpected results. Passing NULL as argv is valid but can + lead to <unknown> showing up in error messages instead of the name of the + executable. + To use ``+RTS`` flags with ``hs_init()``, we have to modify the example slightly. By default, GHC's RTS will only accept "safe" ``+RTS`` flags (see :ref:`options-linker`), and the :ghc-flag:`-rtsopts[=⟨none|some|all⟩]` diff --git a/includes/Rts.h b/includes/Rts.h index aca24e4f28..a59a8ca432 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -211,12 +211,6 @@ DLL_IMPORT_RTS extern char **prog_argv; /* so we can get at these from Haskell * DLL_IMPORT_RTS extern int prog_argc; DLL_IMPORT_RTS extern char *prog_name; -#if defined(mingw32_HOST_OS) -// We need these two from Haskell too -void getWin32ProgArgv(int *argc, wchar_t **argv[]); -void setWin32ProgArgv(int argc, wchar_t *argv[]); -#endif - void reportStackOverflow(StgTSO* tso); void reportHeapOverflow(void); diff --git a/libraries/base/GHC/Environment.hs b/libraries/base/GHC/Environment.hs index a077f6f8c4..0270aedf55 100644 --- a/libraries/base/GHC/Environment.hs +++ b/libraries/base/GHC/Environment.hs @@ -8,11 +8,10 @@ import Foreign import Foreign.C import GHC.Base import GHC.Real ( fromIntegral ) +import GHC.IO.Encoding +import qualified GHC.Foreign as GHC #if defined(mingw32_HOST_OS) -import GHC.IO (finally) -import GHC.Windows - # if defined(i386_HOST_ARCH) # define WINDOWS_CCONV stdcall # elif defined(x86_64_HOST_ARCH) @@ -20,9 +19,6 @@ import GHC.Windows # else # error Unknown mingw32 arch # endif -#else -import GHC.IO.Encoding -import qualified GHC.Foreign as GHC #endif -- | Computation 'getFullArgs' is the "raw" version of 'getArgs', similar @@ -30,37 +26,14 @@ import qualified GHC.Foreign as GHC -- command line arguments, starting with the program name, and -- including those normally eaten by the RTS (+RTS ... -RTS). getFullArgs :: IO [String] -#if defined(mingw32_HOST_OS) --- Ignore the arguments to hs_init on Windows for the sake of Unicode compat getFullArgs = do - p_arg_string <- c_GetCommandLine - alloca $ \p_argc -> do - p_argv <- c_CommandLineToArgv p_arg_string p_argc - if p_argv == nullPtr - then throwGetLastError "getFullArgs" - else flip finally (c_LocalFree p_argv) $ do - argc <- peek p_argc - p_argvs <- peekArray (fromIntegral argc) p_argv - mapM peekCWString p_argvs - -foreign import WINDOWS_CCONV unsafe "windows.h GetCommandLineW" - c_GetCommandLine :: IO (Ptr CWString) - -foreign import WINDOWS_CCONV unsafe "windows.h CommandLineToArgvW" - c_CommandLineToArgv :: Ptr CWString -> Ptr CInt -> IO (Ptr CWString) - -foreign import WINDOWS_CCONV unsafe "Windows.h LocalFree" - c_LocalFree :: Ptr a -> IO (Ptr a) -#else -getFullArgs = - alloca $ \ p_argc -> - alloca $ \ p_argv -> do - getFullProgArgv p_argc p_argv - p <- fromIntegral `liftM` peek p_argc - argv <- peek p_argv - enc <- getFileSystemEncoding - peekArray p argv >>= mapM (GHC.peekCString enc) + alloca $ \ p_argc -> do + alloca $ \ p_argv -> do + getFullProgArgv p_argc p_argv + p <- fromIntegral `liftM` peek p_argc + argv <- peek p_argv + enc <- argvEncoding + peekArray p argv >>= mapM (GHC.peekCString enc) foreign import ccall unsafe "getFullProgArgv" getFullProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO () -#endif diff --git a/libraries/base/GHC/IO/Encoding.hs b/libraries/base/GHC/IO/Encoding.hs index 578a420faf..daff97e560 100644 --- a/libraries/base/GHC/IO/Encoding.hs +++ b/libraries/base/GHC/IO/Encoding.hs @@ -27,6 +27,7 @@ module GHC.IO.Encoding ( setLocaleEncoding, setFileSystemEncoding, setForeignEncoding, char8, mkTextEncoding, + argvEncoding ) where import GHC.Base @@ -161,6 +162,17 @@ initFileSystemEncoding = CodePage.mkLocaleEncoding RoundtripFailure initForeignEncoding = CodePage.mkLocaleEncoding IgnoreCodingFailure #endif +-- See Note [Windows Unicode Arguments] in rts/RtsFlags.c +-- On Windows we assume hs_init argv is in utf8 encoding. + +-- | Internal encoding of argv +argvEncoding :: IO TextEncoding +#if defined(mingw32_HOST_OS) +argvEncoding = return utf8 +#else +argvEncoding = getFileSystemEncoding +#endif + -- | An encoding in which Unicode code points are translated to bytes -- by taking the code point modulo 256. When decoding, bytes are -- translated directly into the equivalent code point. diff --git a/libraries/base/System/Environment.hs b/libraries/base/System/Environment.hs index 56e6961f8a..ff085462e6 100644 --- a/libraries/base/System/Environment.hs +++ b/libraries/base/System/Environment.hs @@ -38,13 +38,13 @@ import Control.Exception.Base (bracket) #endif -- import GHC.IO import GHC.IO.Exception -import GHC.IO.Encoding (getFileSystemEncoding) import qualified GHC.Foreign as GHC import Control.Monad #if defined(mingw32_HOST_OS) -import GHC.Environment +import GHC.IO.Encoding (argvEncoding) import GHC.Windows #else +import GHC.IO.Encoding (getFileSystemEncoding, argvEncoding) import System.Posix.Internals (withFilePath) #endif @@ -65,89 +65,21 @@ import System.Environment.ExecutablePath -- --------------------------------------------------------------------------- -- getArgs, getProgName, getEnv -#if defined(mingw32_HOST_OS) - -{- -Note [Ignore hs_init argv] -~~~~~~~~~~~~~~~~~~~~~~~~~~ -Ignore the arguments to hs_init on Windows for the sake of Unicode compat - -Instead on Windows we get the list of arguments from getCommandLineW and -filter out arguments which the RTS would not have passed along. - -This is done to ensure we get the arguments in proper Unicode Encoding which -the RTS at this moment does not seem provide. The filtering has to match the -one done by the RTS to avoid inconsistencies like #13287. --} - -getWin32ProgArgv_certainly :: IO [String] -getWin32ProgArgv_certainly = do - mb_argv <- getWin32ProgArgv - case mb_argv of - -- see Note [Ignore hs_init argv] - Nothing -> fmap dropRTSArgs getFullArgs - Just argv -> return argv - -withWin32ProgArgv :: [String] -> IO a -> IO a -withWin32ProgArgv argv act = bracket begin setWin32ProgArgv (\_ -> act) - where - begin = do - mb_old_argv <- getWin32ProgArgv - setWin32ProgArgv (Just argv) - return mb_old_argv - -getWin32ProgArgv :: IO (Maybe [String]) -getWin32ProgArgv = alloca $ \p_argc -> alloca $ \p_argv -> do - c_getWin32ProgArgv p_argc p_argv - argc <- peek p_argc - argv_p <- peek p_argv - if argv_p == nullPtr - then return Nothing - else do - argv_ps <- peekArray (fromIntegral argc) argv_p - fmap Just $ mapM peekCWString argv_ps - -setWin32ProgArgv :: Maybe [String] -> IO () -setWin32ProgArgv Nothing = c_setWin32ProgArgv 0 nullPtr -setWin32ProgArgv (Just argv) = withMany withCWString argv $ \argv_ps -> withArrayLen argv_ps $ \argc argv_p -> do - c_setWin32ProgArgv (fromIntegral argc) argv_p - -foreign import ccall unsafe "getWin32ProgArgv" - c_getWin32ProgArgv :: Ptr CInt -> Ptr (Ptr CWString) -> IO () - -foreign import ccall unsafe "setWin32ProgArgv" - c_setWin32ProgArgv :: CInt -> Ptr CWString -> IO () - --- See Note [Ignore hs_init argv] -dropRTSArgs :: [String] -> [String] -dropRTSArgs [] = [] -dropRTSArgs rest@("--":_) = rest -dropRTSArgs ("+RTS":rest) = dropRTSArgs (dropWhile (/= "-RTS") rest) -dropRTSArgs ("--RTS":rest) = rest -dropRTSArgs ("-RTS":rest) = dropRTSArgs rest -dropRTSArgs (arg:rest) = arg : dropRTSArgs rest - -#endif - -- | Computation 'getArgs' returns a list of the program's command -- line arguments (not including the program name). getArgs :: IO [String] - -#if defined(mingw32_HOST_OS) -getArgs = fmap tail getWin32ProgArgv_certainly -#else getArgs = alloca $ \ p_argc -> alloca $ \ p_argv -> do getProgArgv p_argc p_argv p <- fromIntegral `liftM` peek p_argc argv <- peek p_argv - enc <- getFileSystemEncoding + enc <- argvEncoding peekArray (p - 1) (advancePtr argv 1) >>= mapM (GHC.peekCString enc) + foreign import ccall unsafe "getProgArgv" getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO () -#endif {-| Computation 'getProgName' returns the name of the program as it was @@ -160,10 +92,7 @@ between platforms: on Windows, for example, a program invoked as foo is probably really @FOO.EXE@, and that is what 'getProgName' will return. -} getProgName :: IO String -#if defined(mingw32_HOST_OS) -- Ignore the arguments to hs_init on Windows for the sake of Unicode compat -getProgName = fmap (basename . head) getWin32ProgArgv_certainly -#else getProgName = alloca $ \ p_argc -> alloca $ \ p_argv -> do @@ -173,10 +102,9 @@ getProgName = unpackProgName :: Ptr (Ptr CChar) -> IO String -- argv[0] unpackProgName argv = do - enc <- getFileSystemEncoding + enc <- argvEncoding s <- peekElemOff argv 0 >>= GHC.peekCString enc return (basename s) -#endif basename :: FilePath -> FilePath basename f = go f f @@ -371,15 +299,7 @@ withProgName nm act = do -- the duration of an action. withArgv :: [String] -> IO a -> IO a - -#if defined(mingw32_HOST_OS) --- We have to reflect the updated arguments in the RTS-side variables as --- well, because the RTS still consults them for error messages and the like. --- If we don't do this then ghc-e005 fails. -withArgv new_args act = withWin32ProgArgv new_args $ withProgArgv new_args act -#else withArgv = withProgArgv -#endif withProgArgv :: [String] -> IO a -> IO a withProgArgv new_args act = do @@ -391,7 +311,7 @@ withProgArgv new_args act = do setProgArgv :: [String] -> IO () setProgArgv argv = do - enc <- getFileSystemEncoding + enc <- argvEncoding GHC.withCStringsLen enc argv $ \len css -> c_setProgArgv (fromIntegral len) css diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index 7b10d2a67d..80bfa56f73 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -46,12 +46,11 @@ int rts_argc = 0; /* ditto */ char **rts_argv = NULL; int rts_argv_size = 0; #if defined(mingw32_HOST_OS) -// On Windows, we want to use GetCommandLineW rather than argc/argv, -// but we need to mutate the command line arguments for withProgName and -// friends. The System.Environment module achieves that using this bit of -// shared state: -int win32_prog_argc = 0; -wchar_t **win32_prog_argv = NULL; +// On Windows hs_main uses GetCommandLineW to get Unicode arguments and +// passes them along UTF8 encoded as argv. We store them here in order to +// free them on exit. +int win32_full_utf8_argc = 0; +char** win32_utf8_argv = NULL; #endif // The global rtsConfig, set from the RtsConfig supplied by the call @@ -111,6 +110,9 @@ static void read_trace_flags(const char *arg); static void errorUsage (void) GNU_ATTRIBUTE(__noreturn__); +#if defined(mingw32_HOST_OS) +static char** win32_full_utf8_argv; +#endif static char * copyArg (char *arg); static char ** copyArgv (int argc, char *argv[]); static void freeArgv (int argc, char *argv[]); @@ -446,6 +448,66 @@ usage_text[] = { 0 }; +/** +Note [Windows Unicode Arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +On Windows argv is usually encoded in the current Codepage which might not +support unicode. + +Instead of ignoring the arguments to hs_init we expect them to be utf-8 +encoded when coming from a custom main function. In the regular hs_main we +get the unicode arguments from the windows API and pass them along utf8 +encoded instead. + +This reduces special casing of arguments in later parts of the RTS and base +libraries to dealing with slash differences and using utf8 instead of the +current locale on Windows when decoding arguments. + +*/ + +#if defined(mingw32_HOST_OS) +//Allocate a buffer and return the string utf8 encoded. +char* lpcwstrToUTF8(const wchar_t* utf16_str) +{ + //Check the utf8 encoded size first + int res = WideCharToMultiByte(CP_UTF8, 0, utf16_str, -1, NULL, 0, + NULL, NULL); + if (res == 0) { + return NULL; + } + char* buffer = (char*) stgMallocBytes((size_t)res, "getUTF8Args 2"); + res = WideCharToMultiByte(CP_UTF8, 0, utf16_str, -1, buffer, res, + NULL, NULL); + return buffer; +} + +char** getUTF8Args(int* argc) +{ + LPCWSTR cmdLine = GetCommandLineW(); + LPWSTR* argvw = CommandLineToArgvW(cmdLine, argc); + + // We create two argument arrays, one which is later permutated by the RTS + // instead of the main argv. + // The other one is used to free the allocted memory later. + char** argv = (char**) stgMallocBytes(sizeof(char*) * (*argc + 1), + "getUTF8Args 1"); + win32_full_utf8_argv = (char**) stgMallocBytes(sizeof(char*) * (*argc + 1), + "getUTF8Args 1"); + + for (int i = 0; i < *argc; i++) + { + argv[i] = lpcwstrToUTF8(argvw[i]); + } + argv[*argc] = NULL; + memcpy(win32_full_utf8_argv, argv, sizeof(char*) * (*argc + 1)); + + LocalFree(argvw); + win32_utf8_argv = argv; + win32_full_utf8_argc = *argc; + return argv; +} +#endif + STATIC_INLINE bool strequal(const char *a, const char * b) { return(strcmp(a, b) == 0); @@ -514,12 +576,8 @@ static void errorRtsOptsDisabled(const char *s) - rtsConfig (global) contains the supplied RtsConfig - On Windows getArgs ignores argv and instead takes the arguments directly - from the WinAPI and removes any which would have been parsed by the RTS. - - If the handling of which arguments are passed to the Haskell side changes - these changes have to be synchronized with getArgs in base. See #13287 and - Note [Ignore hs_init argv] in System.Environment. + On Windows argv is assumed to be utf8 encoded for unicode compatibility. + See Note [Windows Unicode Arguments] -------------------------------------------------------------------------- */ @@ -579,6 +637,7 @@ void setupRtsFlags (int *argc, char *argv[], RtsConfig rts_config) for (mode = PGM; arg < total_arg; arg++) { // The '--RTS' argument disables all future +RTS ... -RTS processing. if (strequal("--RTS", argv[arg])) { + arg++; break; } @@ -2040,48 +2099,18 @@ void freeWin32ProgArgv (void); void freeWin32ProgArgv (void) { - int i; - - if (win32_prog_argv != NULL) { - for (i = 0; i < win32_prog_argc; i++) { - stgFree(win32_prog_argv[i]); - } - stgFree(win32_prog_argv); + if(win32_utf8_argv == NULL) { + return; + } + else + { + freeArgv(win32_full_utf8_argc, win32_full_utf8_argv); + stgFree(win32_utf8_argv); } - win32_prog_argc = 0; - win32_prog_argv = NULL; -} -void -getWin32ProgArgv(int *argc, wchar_t **argv[]) -{ - *argc = win32_prog_argc; - *argv = win32_prog_argv; } -void -setWin32ProgArgv(int argc, wchar_t *argv[]) -{ - int i; - - freeWin32ProgArgv(); - - win32_prog_argc = argc; - if (argv == NULL) { - win32_prog_argv = NULL; - return; - } - - win32_prog_argv = stgCallocBytes(argc + 1, sizeof (wchar_t *), - "setWin32ProgArgv 1"); - for (i = 0; i < argc; i++) { - win32_prog_argv[i] = stgMallocBytes((wcslen(argv[i]) + 1) * sizeof(wchar_t), - "setWin32ProgArgv 2"); - wcscpy(win32_prog_argv[i], argv[i]); - } - win32_prog_argv[argc] = NULL; -} #endif /* ---------------------------------------------------------------------------- diff --git a/rts/RtsFlags.h b/rts/RtsFlags.h index 71ad219d29..c36c64a63b 100644 --- a/rts/RtsFlags.h +++ b/rts/RtsFlags.h @@ -13,6 +13,11 @@ /* Routines that operate-on/to-do-with RTS flags: */ +#if defined(mingw32_HOST_OS) +//The returned buffer has to be freed with stgFree() +char* lpcwstrToUTF8(const wchar_t* utf16_str); +char** getUTF8Args(int* argc); +#endif void initRtsFlagsDefaults (void); void setupRtsFlags (int *argc, char *argv[], RtsConfig rtsConfig); void freeRtsArgs (void); diff --git a/rts/RtsMain.c b/rts/RtsMain.c index d9f05576a0..57c38742b6 100644 --- a/rts/RtsMain.c +++ b/rts/RtsMain.c @@ -13,6 +13,7 @@ #include "RtsAPI.h" #include "RtsUtils.h" +#include "RtsFlags.h" #include "Prelude.h" #include "Task.h" #include "Excn.h" @@ -48,6 +49,16 @@ int hs_main ( int argc, char *argv[], // program args int exit_status; SchedulerStatus status; + // See Note: [Windows Unicode Arguments] in rts/RtsFlags.c + #if defined(mingw32_HOST_OS) + { + argv = getUTF8Args(&argc); + } + #endif + + + + hs_init_ghc(&argc, &argv, rts_config); // kick off the computation by creating the main thread with a pointer diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c index 71a842d0a9..e4ca6b906d 100644 --- a/rts/RtsStartup.c +++ b/rts/RtsStartup.c @@ -179,7 +179,33 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config) if (argc == NULL || argv == NULL) { // Use a default for argc & argv if either is not supplied int my_argc = 1; + #if defined(mingw32_HOST_OS) + //Retry larger buffer sizes on error up to about the NTFS length limit. + wchar_t* pathBuf; + char *my_argv[2] = { NULL, NULL }; + for(DWORD maxLength = MAX_PATH; maxLength <= 33280; maxLength *= 2) + { + pathBuf = (wchar_t*) stgMallocBytes(sizeof(wchar_t) * maxLength, + "hs_init_ghc: GetModuleFileName"); + DWORD pathLength = GetModuleFileNameW(NULL, pathBuf, maxLength); + if(GetLastError() == ERROR_INSUFFICIENT_BUFFER || pathLength == 0) { + stgFree(pathBuf); + pathBuf = NULL; + } else { + break; + } + } + if(pathBuf == NULL) { + my_argv[0] = "<unknown>"; + } else { + my_argv[0] = lpcwstrToUTF8(pathBuf); + stgFree(pathBuf); + } + + + #else char *my_argv[] = { "<unknown>", NULL }; + #endif setFullProgArgv(my_argc,my_argv); setupRtsFlags(&my_argc, my_argv, rts_config); } else { diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index 11b1437f77..e80a4955f0 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -97,8 +97,6 @@ SymI_HasProto(stg_asyncReadzh) \ SymI_HasProto(stg_asyncWritezh) \ SymI_HasProto(stg_asyncDoProczh) \ - SymI_HasProto(getWin32ProgArgv) \ - SymI_HasProto(setWin32ProgArgv) \ SymI_HasProto(rts_InstallConsoleEvent) \ SymI_HasProto(rts_ConsoleHandlerDone) \ SymI_HasProto(atexit) \ diff --git a/testsuite/tests/ghci.debugger/scripts/all.T b/testsuite/tests/ghci.debugger/scripts/all.T index 96de3a334b..9e533aa192 100644 --- a/testsuite/tests/ghci.debugger/scripts/all.T +++ b/testsuite/tests/ghci.debugger/scripts/all.T @@ -50,8 +50,7 @@ test('break009', [extra_files(['../Test6.hs']), test('break010', extra_files(['../Test6.hs']), ghci_script, ['break010.script']) test('break011', [extra_files(['../Test7.hs']), - combined_output, - when(msys(), expect_broken(12712))], + combined_output], ghci_script, ['break011.script']) test('break012', normal, ghci_script, ['break012.script']) test('break013', normal, ghci_script, ['break013.script']) diff --git a/testsuite/tests/rts/T6006.stdout-mingw32 b/testsuite/tests/rts/T6006.stdout-mingw32 index 42e57fde57..962ec4b280 100644 --- a/testsuite/tests/rts/T6006.stdout-mingw32 +++ b/testsuite/tests/rts/T6006.stdout-mingw32 @@ -1,2 +1,2 @@ -"T6006.exe" +"T6006" [] |