summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/users_guide/ffi-chap.rst6
-rw-r--r--includes/Rts.h6
-rw-r--r--libraries/base/GHC/Environment.hs45
-rw-r--r--libraries/base/GHC/IO/Encoding.hs12
-rw-r--r--libraries/base/System/Environment.hs92
-rw-r--r--rts/RtsFlags.c127
-rw-r--r--rts/RtsFlags.h5
-rw-r--r--rts/RtsMain.c11
-rw-r--r--rts/RtsStartup.c26
-rw-r--r--rts/RtsSymbols.c2
-rw-r--r--testsuite/tests/ghci.debugger/scripts/all.T3
-rw-r--r--testsuite/tests/rts/T6006.stdout-mingw322
12 files changed, 155 insertions, 182 deletions
diff --git a/docs/users_guide/ffi-chap.rst b/docs/users_guide/ffi-chap.rst
index 311146c4d9..320a3a6e70 100644
--- a/docs/users_guide/ffi-chap.rst
+++ b/docs/users_guide/ffi-chap.rst
@@ -337,6 +337,12 @@ reliably re-initialise after this has happened; see :ref:`infelicities-ffi`.
don't forget the flag :ghc-flag:`-no-hs-main`, otherwise GHC
will try to link to the ``Main`` Haskell module.
+.. note::
+ On Windows hs_init treats argv as UTF8-encoded. Passing other encodings
+ might lead to unexpected results. Passing NULL as argv is valid but can
+ lead to <unknown> showing up in error messages instead of the name of the
+ executable.
+
To use ``+RTS`` flags with ``hs_init()``, we have to modify the example
slightly. By default, GHC's RTS will only accept "safe" ``+RTS`` flags (see
:ref:`options-linker`), and the :ghc-flag:`-rtsopts[=⟨none|some|all⟩]`
diff --git a/includes/Rts.h b/includes/Rts.h
index aca24e4f28..a59a8ca432 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -211,12 +211,6 @@ DLL_IMPORT_RTS extern char **prog_argv; /* so we can get at these from Haskell *
DLL_IMPORT_RTS extern int prog_argc;
DLL_IMPORT_RTS extern char *prog_name;
-#if defined(mingw32_HOST_OS)
-// We need these two from Haskell too
-void getWin32ProgArgv(int *argc, wchar_t **argv[]);
-void setWin32ProgArgv(int argc, wchar_t *argv[]);
-#endif
-
void reportStackOverflow(StgTSO* tso);
void reportHeapOverflow(void);
diff --git a/libraries/base/GHC/Environment.hs b/libraries/base/GHC/Environment.hs
index a077f6f8c4..0270aedf55 100644
--- a/libraries/base/GHC/Environment.hs
+++ b/libraries/base/GHC/Environment.hs
@@ -8,11 +8,10 @@ import Foreign
import Foreign.C
import GHC.Base
import GHC.Real ( fromIntegral )
+import GHC.IO.Encoding
+import qualified GHC.Foreign as GHC
#if defined(mingw32_HOST_OS)
-import GHC.IO (finally)
-import GHC.Windows
-
# if defined(i386_HOST_ARCH)
# define WINDOWS_CCONV stdcall
# elif defined(x86_64_HOST_ARCH)
@@ -20,9 +19,6 @@ import GHC.Windows
# else
# error Unknown mingw32 arch
# endif
-#else
-import GHC.IO.Encoding
-import qualified GHC.Foreign as GHC
#endif
-- | Computation 'getFullArgs' is the "raw" version of 'getArgs', similar
@@ -30,37 +26,14 @@ import qualified GHC.Foreign as GHC
-- command line arguments, starting with the program name, and
-- including those normally eaten by the RTS (+RTS ... -RTS).
getFullArgs :: IO [String]
-#if defined(mingw32_HOST_OS)
--- Ignore the arguments to hs_init on Windows for the sake of Unicode compat
getFullArgs = do
- p_arg_string <- c_GetCommandLine
- alloca $ \p_argc -> do
- p_argv <- c_CommandLineToArgv p_arg_string p_argc
- if p_argv == nullPtr
- then throwGetLastError "getFullArgs"
- else flip finally (c_LocalFree p_argv) $ do
- argc <- peek p_argc
- p_argvs <- peekArray (fromIntegral argc) p_argv
- mapM peekCWString p_argvs
-
-foreign import WINDOWS_CCONV unsafe "windows.h GetCommandLineW"
- c_GetCommandLine :: IO (Ptr CWString)
-
-foreign import WINDOWS_CCONV unsafe "windows.h CommandLineToArgvW"
- c_CommandLineToArgv :: Ptr CWString -> Ptr CInt -> IO (Ptr CWString)
-
-foreign import WINDOWS_CCONV unsafe "Windows.h LocalFree"
- c_LocalFree :: Ptr a -> IO (Ptr a)
-#else
-getFullArgs =
- alloca $ \ p_argc ->
- alloca $ \ p_argv -> do
- getFullProgArgv p_argc p_argv
- p <- fromIntegral `liftM` peek p_argc
- argv <- peek p_argv
- enc <- getFileSystemEncoding
- peekArray p argv >>= mapM (GHC.peekCString enc)
+ alloca $ \ p_argc -> do
+ alloca $ \ p_argv -> do
+ getFullProgArgv p_argc p_argv
+ p <- fromIntegral `liftM` peek p_argc
+ argv <- peek p_argv
+ enc <- argvEncoding
+ peekArray p argv >>= mapM (GHC.peekCString enc)
foreign import ccall unsafe "getFullProgArgv"
getFullProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
-#endif
diff --git a/libraries/base/GHC/IO/Encoding.hs b/libraries/base/GHC/IO/Encoding.hs
index 578a420faf..daff97e560 100644
--- a/libraries/base/GHC/IO/Encoding.hs
+++ b/libraries/base/GHC/IO/Encoding.hs
@@ -27,6 +27,7 @@ module GHC.IO.Encoding (
setLocaleEncoding, setFileSystemEncoding, setForeignEncoding,
char8,
mkTextEncoding,
+ argvEncoding
) where
import GHC.Base
@@ -161,6 +162,17 @@ initFileSystemEncoding = CodePage.mkLocaleEncoding RoundtripFailure
initForeignEncoding = CodePage.mkLocaleEncoding IgnoreCodingFailure
#endif
+-- See Note [Windows Unicode Arguments] in rts/RtsFlags.c
+-- On Windows we assume hs_init argv is in utf8 encoding.
+
+-- | Internal encoding of argv
+argvEncoding :: IO TextEncoding
+#if defined(mingw32_HOST_OS)
+argvEncoding = return utf8
+#else
+argvEncoding = getFileSystemEncoding
+#endif
+
-- | An encoding in which Unicode code points are translated to bytes
-- by taking the code point modulo 256. When decoding, bytes are
-- translated directly into the equivalent code point.
diff --git a/libraries/base/System/Environment.hs b/libraries/base/System/Environment.hs
index 56e6961f8a..ff085462e6 100644
--- a/libraries/base/System/Environment.hs
+++ b/libraries/base/System/Environment.hs
@@ -38,13 +38,13 @@ import Control.Exception.Base (bracket)
#endif
-- import GHC.IO
import GHC.IO.Exception
-import GHC.IO.Encoding (getFileSystemEncoding)
import qualified GHC.Foreign as GHC
import Control.Monad
#if defined(mingw32_HOST_OS)
-import GHC.Environment
+import GHC.IO.Encoding (argvEncoding)
import GHC.Windows
#else
+import GHC.IO.Encoding (getFileSystemEncoding, argvEncoding)
import System.Posix.Internals (withFilePath)
#endif
@@ -65,89 +65,21 @@ import System.Environment.ExecutablePath
-- ---------------------------------------------------------------------------
-- getArgs, getProgName, getEnv
-#if defined(mingw32_HOST_OS)
-
-{-
-Note [Ignore hs_init argv]
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-Ignore the arguments to hs_init on Windows for the sake of Unicode compat
-
-Instead on Windows we get the list of arguments from getCommandLineW and
-filter out arguments which the RTS would not have passed along.
-
-This is done to ensure we get the arguments in proper Unicode Encoding which
-the RTS at this moment does not seem provide. The filtering has to match the
-one done by the RTS to avoid inconsistencies like #13287.
--}
-
-getWin32ProgArgv_certainly :: IO [String]
-getWin32ProgArgv_certainly = do
- mb_argv <- getWin32ProgArgv
- case mb_argv of
- -- see Note [Ignore hs_init argv]
- Nothing -> fmap dropRTSArgs getFullArgs
- Just argv -> return argv
-
-withWin32ProgArgv :: [String] -> IO a -> IO a
-withWin32ProgArgv argv act = bracket begin setWin32ProgArgv (\_ -> act)
- where
- begin = do
- mb_old_argv <- getWin32ProgArgv
- setWin32ProgArgv (Just argv)
- return mb_old_argv
-
-getWin32ProgArgv :: IO (Maybe [String])
-getWin32ProgArgv = alloca $ \p_argc -> alloca $ \p_argv -> do
- c_getWin32ProgArgv p_argc p_argv
- argc <- peek p_argc
- argv_p <- peek p_argv
- if argv_p == nullPtr
- then return Nothing
- else do
- argv_ps <- peekArray (fromIntegral argc) argv_p
- fmap Just $ mapM peekCWString argv_ps
-
-setWin32ProgArgv :: Maybe [String] -> IO ()
-setWin32ProgArgv Nothing = c_setWin32ProgArgv 0 nullPtr
-setWin32ProgArgv (Just argv) = withMany withCWString argv $ \argv_ps -> withArrayLen argv_ps $ \argc argv_p -> do
- c_setWin32ProgArgv (fromIntegral argc) argv_p
-
-foreign import ccall unsafe "getWin32ProgArgv"
- c_getWin32ProgArgv :: Ptr CInt -> Ptr (Ptr CWString) -> IO ()
-
-foreign import ccall unsafe "setWin32ProgArgv"
- c_setWin32ProgArgv :: CInt -> Ptr CWString -> IO ()
-
--- See Note [Ignore hs_init argv]
-dropRTSArgs :: [String] -> [String]
-dropRTSArgs [] = []
-dropRTSArgs rest@("--":_) = rest
-dropRTSArgs ("+RTS":rest) = dropRTSArgs (dropWhile (/= "-RTS") rest)
-dropRTSArgs ("--RTS":rest) = rest
-dropRTSArgs ("-RTS":rest) = dropRTSArgs rest
-dropRTSArgs (arg:rest) = arg : dropRTSArgs rest
-
-#endif
-
-- | Computation 'getArgs' returns a list of the program's command
-- line arguments (not including the program name).
getArgs :: IO [String]
-
-#if defined(mingw32_HOST_OS)
-getArgs = fmap tail getWin32ProgArgv_certainly
-#else
getArgs =
alloca $ \ p_argc ->
alloca $ \ p_argv -> do
getProgArgv p_argc p_argv
p <- fromIntegral `liftM` peek p_argc
argv <- peek p_argv
- enc <- getFileSystemEncoding
+ enc <- argvEncoding
peekArray (p - 1) (advancePtr argv 1) >>= mapM (GHC.peekCString enc)
+
foreign import ccall unsafe "getProgArgv"
getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
-#endif
{-|
Computation 'getProgName' returns the name of the program as it was
@@ -160,10 +92,7 @@ between platforms: on Windows, for example, a program invoked as foo
is probably really @FOO.EXE@, and that is what 'getProgName' will return.
-}
getProgName :: IO String
-#if defined(mingw32_HOST_OS)
-- Ignore the arguments to hs_init on Windows for the sake of Unicode compat
-getProgName = fmap (basename . head) getWin32ProgArgv_certainly
-#else
getProgName =
alloca $ \ p_argc ->
alloca $ \ p_argv -> do
@@ -173,10 +102,9 @@ getProgName =
unpackProgName :: Ptr (Ptr CChar) -> IO String -- argv[0]
unpackProgName argv = do
- enc <- getFileSystemEncoding
+ enc <- argvEncoding
s <- peekElemOff argv 0 >>= GHC.peekCString enc
return (basename s)
-#endif
basename :: FilePath -> FilePath
basename f = go f f
@@ -371,15 +299,7 @@ withProgName nm act = do
-- the duration of an action.
withArgv :: [String] -> IO a -> IO a
-
-#if defined(mingw32_HOST_OS)
--- We have to reflect the updated arguments in the RTS-side variables as
--- well, because the RTS still consults them for error messages and the like.
--- If we don't do this then ghc-e005 fails.
-withArgv new_args act = withWin32ProgArgv new_args $ withProgArgv new_args act
-#else
withArgv = withProgArgv
-#endif
withProgArgv :: [String] -> IO a -> IO a
withProgArgv new_args act = do
@@ -391,7 +311,7 @@ withProgArgv new_args act = do
setProgArgv :: [String] -> IO ()
setProgArgv argv = do
- enc <- getFileSystemEncoding
+ enc <- argvEncoding
GHC.withCStringsLen enc argv $ \len css ->
c_setProgArgv (fromIntegral len) css
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 7b10d2a67d..80bfa56f73 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -46,12 +46,11 @@ int rts_argc = 0; /* ditto */
char **rts_argv = NULL;
int rts_argv_size = 0;
#if defined(mingw32_HOST_OS)
-// On Windows, we want to use GetCommandLineW rather than argc/argv,
-// but we need to mutate the command line arguments for withProgName and
-// friends. The System.Environment module achieves that using this bit of
-// shared state:
-int win32_prog_argc = 0;
-wchar_t **win32_prog_argv = NULL;
+// On Windows hs_main uses GetCommandLineW to get Unicode arguments and
+// passes them along UTF8 encoded as argv. We store them here in order to
+// free them on exit.
+int win32_full_utf8_argc = 0;
+char** win32_utf8_argv = NULL;
#endif
// The global rtsConfig, set from the RtsConfig supplied by the call
@@ -111,6 +110,9 @@ static void read_trace_flags(const char *arg);
static void errorUsage (void) GNU_ATTRIBUTE(__noreturn__);
+#if defined(mingw32_HOST_OS)
+static char** win32_full_utf8_argv;
+#endif
static char * copyArg (char *arg);
static char ** copyArgv (int argc, char *argv[]);
static void freeArgv (int argc, char *argv[]);
@@ -446,6 +448,66 @@ usage_text[] = {
0
};
+/**
+Note [Windows Unicode Arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+On Windows argv is usually encoded in the current Codepage which might not
+support unicode.
+
+Instead of ignoring the arguments to hs_init we expect them to be utf-8
+encoded when coming from a custom main function. In the regular hs_main we
+get the unicode arguments from the windows API and pass them along utf8
+encoded instead.
+
+This reduces special casing of arguments in later parts of the RTS and base
+libraries to dealing with slash differences and using utf8 instead of the
+current locale on Windows when decoding arguments.
+
+*/
+
+#if defined(mingw32_HOST_OS)
+//Allocate a buffer and return the string utf8 encoded.
+char* lpcwstrToUTF8(const wchar_t* utf16_str)
+{
+ //Check the utf8 encoded size first
+ int res = WideCharToMultiByte(CP_UTF8, 0, utf16_str, -1, NULL, 0,
+ NULL, NULL);
+ if (res == 0) {
+ return NULL;
+ }
+ char* buffer = (char*) stgMallocBytes((size_t)res, "getUTF8Args 2");
+ res = WideCharToMultiByte(CP_UTF8, 0, utf16_str, -1, buffer, res,
+ NULL, NULL);
+ return buffer;
+}
+
+char** getUTF8Args(int* argc)
+{
+ LPCWSTR cmdLine = GetCommandLineW();
+ LPWSTR* argvw = CommandLineToArgvW(cmdLine, argc);
+
+ // We create two argument arrays, one which is later permutated by the RTS
+ // instead of the main argv.
+ // The other one is used to free the allocted memory later.
+ char** argv = (char**) stgMallocBytes(sizeof(char*) * (*argc + 1),
+ "getUTF8Args 1");
+ win32_full_utf8_argv = (char**) stgMallocBytes(sizeof(char*) * (*argc + 1),
+ "getUTF8Args 1");
+
+ for (int i = 0; i < *argc; i++)
+ {
+ argv[i] = lpcwstrToUTF8(argvw[i]);
+ }
+ argv[*argc] = NULL;
+ memcpy(win32_full_utf8_argv, argv, sizeof(char*) * (*argc + 1));
+
+ LocalFree(argvw);
+ win32_utf8_argv = argv;
+ win32_full_utf8_argc = *argc;
+ return argv;
+}
+#endif
+
STATIC_INLINE bool strequal(const char *a, const char * b)
{
return(strcmp(a, b) == 0);
@@ -514,12 +576,8 @@ static void errorRtsOptsDisabled(const char *s)
- rtsConfig (global) contains the supplied RtsConfig
- On Windows getArgs ignores argv and instead takes the arguments directly
- from the WinAPI and removes any which would have been parsed by the RTS.
-
- If the handling of which arguments are passed to the Haskell side changes
- these changes have to be synchronized with getArgs in base. See #13287 and
- Note [Ignore hs_init argv] in System.Environment.
+ On Windows argv is assumed to be utf8 encoded for unicode compatibility.
+ See Note [Windows Unicode Arguments]
-------------------------------------------------------------------------- */
@@ -579,6 +637,7 @@ void setupRtsFlags (int *argc, char *argv[], RtsConfig rts_config)
for (mode = PGM; arg < total_arg; arg++) {
// The '--RTS' argument disables all future +RTS ... -RTS processing.
if (strequal("--RTS", argv[arg])) {
+
arg++;
break;
}
@@ -2040,48 +2099,18 @@ void freeWin32ProgArgv (void);
void
freeWin32ProgArgv (void)
{
- int i;
-
- if (win32_prog_argv != NULL) {
- for (i = 0; i < win32_prog_argc; i++) {
- stgFree(win32_prog_argv[i]);
- }
- stgFree(win32_prog_argv);
+ if(win32_utf8_argv == NULL) {
+ return;
+ }
+ else
+ {
+ freeArgv(win32_full_utf8_argc, win32_full_utf8_argv);
+ stgFree(win32_utf8_argv);
}
- win32_prog_argc = 0;
- win32_prog_argv = NULL;
-}
-void
-getWin32ProgArgv(int *argc, wchar_t **argv[])
-{
- *argc = win32_prog_argc;
- *argv = win32_prog_argv;
}
-void
-setWin32ProgArgv(int argc, wchar_t *argv[])
-{
- int i;
-
- freeWin32ProgArgv();
-
- win32_prog_argc = argc;
- if (argv == NULL) {
- win32_prog_argv = NULL;
- return;
- }
-
- win32_prog_argv = stgCallocBytes(argc + 1, sizeof (wchar_t *),
- "setWin32ProgArgv 1");
- for (i = 0; i < argc; i++) {
- win32_prog_argv[i] = stgMallocBytes((wcslen(argv[i]) + 1) * sizeof(wchar_t),
- "setWin32ProgArgv 2");
- wcscpy(win32_prog_argv[i], argv[i]);
- }
- win32_prog_argv[argc] = NULL;
-}
#endif
/* ----------------------------------------------------------------------------
diff --git a/rts/RtsFlags.h b/rts/RtsFlags.h
index 71ad219d29..c36c64a63b 100644
--- a/rts/RtsFlags.h
+++ b/rts/RtsFlags.h
@@ -13,6 +13,11 @@
/* Routines that operate-on/to-do-with RTS flags: */
+#if defined(mingw32_HOST_OS)
+//The returned buffer has to be freed with stgFree()
+char* lpcwstrToUTF8(const wchar_t* utf16_str);
+char** getUTF8Args(int* argc);
+#endif
void initRtsFlagsDefaults (void);
void setupRtsFlags (int *argc, char *argv[], RtsConfig rtsConfig);
void freeRtsArgs (void);
diff --git a/rts/RtsMain.c b/rts/RtsMain.c
index d9f05576a0..57c38742b6 100644
--- a/rts/RtsMain.c
+++ b/rts/RtsMain.c
@@ -13,6 +13,7 @@
#include "RtsAPI.h"
#include "RtsUtils.h"
+#include "RtsFlags.h"
#include "Prelude.h"
#include "Task.h"
#include "Excn.h"
@@ -48,6 +49,16 @@ int hs_main ( int argc, char *argv[], // program args
int exit_status;
SchedulerStatus status;
+ // See Note: [Windows Unicode Arguments] in rts/RtsFlags.c
+ #if defined(mingw32_HOST_OS)
+ {
+ argv = getUTF8Args(&argc);
+ }
+ #endif
+
+
+
+
hs_init_ghc(&argc, &argv, rts_config);
// kick off the computation by creating the main thread with a pointer
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
index 71a842d0a9..e4ca6b906d 100644
--- a/rts/RtsStartup.c
+++ b/rts/RtsStartup.c
@@ -179,7 +179,33 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
if (argc == NULL || argv == NULL) {
// Use a default for argc & argv if either is not supplied
int my_argc = 1;
+ #if defined(mingw32_HOST_OS)
+ //Retry larger buffer sizes on error up to about the NTFS length limit.
+ wchar_t* pathBuf;
+ char *my_argv[2] = { NULL, NULL };
+ for(DWORD maxLength = MAX_PATH; maxLength <= 33280; maxLength *= 2)
+ {
+ pathBuf = (wchar_t*) stgMallocBytes(sizeof(wchar_t) * maxLength,
+ "hs_init_ghc: GetModuleFileName");
+ DWORD pathLength = GetModuleFileNameW(NULL, pathBuf, maxLength);
+ if(GetLastError() == ERROR_INSUFFICIENT_BUFFER || pathLength == 0) {
+ stgFree(pathBuf);
+ pathBuf = NULL;
+ } else {
+ break;
+ }
+ }
+ if(pathBuf == NULL) {
+ my_argv[0] = "<unknown>";
+ } else {
+ my_argv[0] = lpcwstrToUTF8(pathBuf);
+ stgFree(pathBuf);
+ }
+
+
+ #else
char *my_argv[] = { "<unknown>", NULL };
+ #endif
setFullProgArgv(my_argc,my_argv);
setupRtsFlags(&my_argc, my_argv, rts_config);
} else {
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index 11b1437f77..e80a4955f0 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -97,8 +97,6 @@
SymI_HasProto(stg_asyncReadzh) \
SymI_HasProto(stg_asyncWritezh) \
SymI_HasProto(stg_asyncDoProczh) \
- SymI_HasProto(getWin32ProgArgv) \
- SymI_HasProto(setWin32ProgArgv) \
SymI_HasProto(rts_InstallConsoleEvent) \
SymI_HasProto(rts_ConsoleHandlerDone) \
SymI_HasProto(atexit) \
diff --git a/testsuite/tests/ghci.debugger/scripts/all.T b/testsuite/tests/ghci.debugger/scripts/all.T
index 96de3a334b..9e533aa192 100644
--- a/testsuite/tests/ghci.debugger/scripts/all.T
+++ b/testsuite/tests/ghci.debugger/scripts/all.T
@@ -50,8 +50,7 @@ test('break009', [extra_files(['../Test6.hs']),
test('break010', extra_files(['../Test6.hs']), ghci_script, ['break010.script'])
test('break011',
[extra_files(['../Test7.hs']),
- combined_output,
- when(msys(), expect_broken(12712))],
+ combined_output],
ghci_script, ['break011.script'])
test('break012', normal, ghci_script, ['break012.script'])
test('break013', normal, ghci_script, ['break013.script'])
diff --git a/testsuite/tests/rts/T6006.stdout-mingw32 b/testsuite/tests/rts/T6006.stdout-mingw32
index 42e57fde57..962ec4b280 100644
--- a/testsuite/tests/rts/T6006.stdout-mingw32
+++ b/testsuite/tests/rts/T6006.stdout-mingw32
@@ -1,2 +1,2 @@
-"T6006.exe"
+"T6006"
[]