summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChaitanya Koparkar <ckoparkar@gmail.com>2018-05-03 12:38:36 -0400
committerBen Gamari <ben@smart-cactus.org>2018-05-03 12:39:16 -0400
commit866525a1765715b8b9902e1bd53b9af1c7a93a30 (patch)
treec9b4ffb66b24ef633783fc193505a655406bf67b
parent6132d7c5e6404936ef281a6f3be333fea780906e (diff)
downloadhaskell-866525a1765715b8b9902e1bd53b9af1c7a93a30.tar.gz
Move the ResponseFile module from haddock into base
GHC and the build tools use "response files" to work around the limit on the length of command line arguments on Windows. Haddock's implementation of parsing response files (i.e escaping/unescaping the appropriate characters) seems complete, is well tested, and also closely matches the GCC version. This patch moves the relevant bits into `base` so that it's easier for other libraries to reuse it. Test Plan: make test TEST=T13896 Reviewers: bgamari, RyanGlScott, 23Skidoo, hvr Reviewed By: RyanGlScott Subscribers: thomie, carter GHC Trac Issues: #13896 Differential Revision: https://phabricator.haskell.org/D4612
-rw-r--r--libraries/base/GHC/ResponseFile.hs159
-rw-r--r--libraries/base/base.cabal1
-rw-r--r--libraries/base/changelog.md3
-rw-r--r--libraries/base/tests/T13896.hs75
-rw-r--r--libraries/base/tests/all.T1
5 files changed, 239 insertions, 0 deletions
diff --git a/libraries/base/GHC/ResponseFile.hs b/libraries/base/GHC/ResponseFile.hs
new file mode 100644
index 0000000000..3c2f64894b
--- /dev/null
+++ b/libraries/base/GHC/ResponseFile.hs
@@ -0,0 +1,159 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : GHC.ResponseFile
+-- License : BSD-style (see the file LICENSE)
+--
+-- Maintainer : libraries@haskell.org
+-- Stability : internal
+-- Portability : portable
+--
+-- GCC style response files.
+--
+-- @since 4.12.0.0
+----------------------------------------------------------------------------
+
+-- Migrated from Haddock.
+
+module GHC.ResponseFile (
+ getArgsWithResponseFiles,
+ unescapeArgs,
+ escapeArgs,
+ expandResponse
+ ) where
+
+import Control.Exception
+import Data.Char (isSpace)
+import Data.Foldable (foldl')
+import System.Environment (getArgs)
+import System.Exit (exitFailure)
+import System.IO
+
+{-|
+Like 'getArgs', but can also read arguments supplied via response files.
+
+
+For example, consider a program @foo@:
+
+@
+main :: IO ()
+main = do
+ args <- getArgsWithResponseFiles
+ putStrLn (show args)
+@
+
+
+And a response file @args.txt@:
+
+@
+--one 1
+--'two' 2
+--"three" 3
+@
+
+Then the result of invoking @foo@ with @args.txt@ is:
+
+> > ./foo @args.txt
+> ["--one","1","--two","2","--three","3"]
+
+-}
+getArgsWithResponseFiles :: IO [String]
+getArgsWithResponseFiles = getArgs >>= expandResponse
+
+-- | Given a string of concatenated strings, separate each by removing
+-- a layer of /quoting/ and\/or /escaping/ of certain characters.
+--
+-- These characters are: any whitespace, single quote, double quote,
+-- and the backslash character. The backslash character always
+-- escapes (i.e., passes through without further consideration) the
+-- character which follows. Characters can also be escaped in blocks
+-- by quoting (i.e., surrounding the blocks with matching pairs of
+-- either single- or double-quotes which are not themselves escaped).
+--
+-- Any whitespace which appears outside of either of the quoting and
+-- escaping mechanisms, is interpreted as having been added by this
+-- special concatenation process to designate where the boundaries
+-- are between the original, un-concatenated list of strings. These
+-- added whitespace characters are removed from the output.
+--
+-- > unescapeArgs "hello\\ \\\"world\\\"\n" == escapeArgs "hello \"world\""
+unescapeArgs :: String -> [String]
+unescapeArgs = filter (not . null) . unescape
+
+-- | Given a list of strings, concatenate them into a single string
+-- with escaping of certain characters, and the addition of a newline
+-- between each string. The escaping is done by adding a single
+-- backslash character before any whitespace, single quote, double
+-- quote, or backslash character, so this escaping character must be
+-- removed. Unescaped whitespace (in this case, newline) is part
+-- of this "transport" format to indicate the end of the previous
+-- string and the start of a new string.
+--
+-- While 'unescapeArgs' allows using quoting (i.e., convenient
+-- escaping of many characters) by having matching sets of single- or
+-- double-quotes,'escapeArgs' does not use the quoting mechasnism,
+-- and thus will always escape any whitespace, quotes, and
+-- backslashes.
+--
+-- > unescapeArgs "hello\\ \\\"world\\\"\\n" == escapeArgs "hello \"world\""
+escapeArgs :: [String] -> String
+escapeArgs = unlines . map escapeArg
+
+-- | Arguments which look like '@foo' will be replaced with the
+-- contents of file @foo@. A gcc-like syntax for response files arguments
+-- is expected. This must re-constitute the argument list by doing an
+-- inverse of the escaping mechanism done by the calling-program side.
+--
+-- We quit if the file is not found or reading somehow fails.
+-- (A convenience routine for haddock or possibly other clients)
+expandResponse :: [String] -> IO [String]
+expandResponse = fmap concat . mapM expand
+ where
+ expand :: String -> IO [String]
+ expand ('@':f) = readFileExc f >>= return . unescapeArgs
+ expand x = return [x]
+
+ readFileExc f =
+ readFile f `catch` \(e :: IOException) -> do
+ hPutStrLn stderr $ "Error while expanding response file: " ++ show e
+ exitFailure
+
+data Quoting = NoneQ | SngQ | DblQ
+
+unescape :: String -> [String]
+unescape args = reverse . map reverse $ go args NoneQ False [] []
+ where
+ -- n.b., the order of these cases matters; these are cribbed from gcc
+ -- case 1: end of input
+ go [] _q _bs a as = a:as
+ -- case 2: back-slash escape in progress
+ go (c:cs) q True a as = go cs q False (c:a) as
+ -- case 3: no back-slash escape in progress, but got a back-slash
+ go (c:cs) q False a as
+ | '\\' == c = go cs q True a as
+ -- case 4: single-quote escaping in progress
+ go (c:cs) SngQ False a as
+ | '\'' == c = go cs NoneQ False a as
+ | otherwise = go cs SngQ False (c:a) as
+ -- case 5: double-quote escaping in progress
+ go (c:cs) DblQ False a as
+ | '"' == c = go cs NoneQ False a as
+ | otherwise = go cs DblQ False (c:a) as
+ -- case 6: no escaping is in progress
+ go (c:cs) NoneQ False a as
+ | isSpace c = go cs NoneQ False [] (a:as)
+ | '\'' == c = go cs SngQ False a as
+ | '"' == c = go cs DblQ False a as
+ | otherwise = go cs NoneQ False (c:a) as
+
+escapeArg :: String -> String
+escapeArg = reverse . foldl' escape []
+
+escape :: String -> Char -> String
+escape cs c
+ | isSpace c
+ || '\\' == c
+ || '\'' == c
+ || '"' == c = c:'\\':cs -- n.b., our caller must reverse the result
+ | otherwise = c:cs
diff --git a/libraries/base/base.cabal b/libraries/base/base.cabal
index a104f524d0..6da95b070f 100644
--- a/libraries/base/base.cabal
+++ b/libraries/base/base.cabal
@@ -270,6 +270,7 @@ Library
GHC.Read
GHC.Real
GHC.Records
+ GHC.ResponseFile
GHC.RTS.Flags
GHC.ST
GHC.StaticPtr
diff --git a/libraries/base/changelog.md b/libraries/base/changelog.md
index 157c7dfb2e..0bca2649d4 100644
--- a/libraries/base/changelog.md
+++ b/libraries/base/changelog.md
@@ -3,6 +3,9 @@
## 4.12.0.0 *TBA*
* Bundled with GHC *TBA*
+ * Add a new module `GHC.ResponseFile` (previously defined in the `haddock`
+ package). (#13896)
+
* Move the module `Data.Functor.Contravariant` from the
`contravariant` package to `base`.
diff --git a/libraries/base/tests/T13896.hs b/libraries/base/tests/T13896.hs
new file mode 100644
index 0000000000..9e269a4a7c
--- /dev/null
+++ b/libraries/base/tests/T13896.hs
@@ -0,0 +1,75 @@
+import GHC.ResponseFile
+
+assertEqual :: (Eq a, Show a) => a -> a -> IO ()
+assertEqual x y = if x == y
+ then return ()
+ else error $ "assertEqual: " ++ show x ++ " /= " ++ show y
+
+-- Migrated from Haddock.
+
+-- The first two elements are
+-- 1) a list of 'args' to encode and
+-- 2) a single string of the encoded args
+-- The 3rd element is just a description for the tests.
+testStrs :: [(([String], String), String)]
+testStrs =
+ [ ((["a simple command line"],
+ "a\\ simple\\ command\\ line\n"),
+ "the white-space, end with newline")
+
+ , ((["arg 'foo' is single quoted"],
+ "arg\\ \\'foo\\'\\ is\\ single\\ quoted\n"),
+ "the single quotes as well")
+
+ , ((["arg \"bar\" is double quoted"],
+ "arg\\ \\\"bar\\\"\\ is\\ double\\ quoted\n"),
+ "the double quotes as well" )
+
+ , ((["arg \"foo bar\" has embedded whitespace"],
+ "arg\\ \\\"foo\\ bar\\\"\\ has\\ embedded\\ whitespace\n"),
+ "the quote-embedded whitespace")
+
+ , ((["arg 'Jack said \\'hi\\'' has single quotes"],
+ "arg\\ \\'Jack\\ said\\ \\\\\\'hi\\\\\\'\\'\\ has\\ single\\ quotes\n"),
+ "the escaped single quotes")
+
+ , ((["arg 'Jack said \\\"hi\\\"' has double quotes"],
+ "arg\\ \\'Jack\\ said\\ \\\\\\\"hi\\\\\\\"\\'\\ has\\ double\\ quotes\n"),
+ "the escaped double quotes")
+
+ , ((["arg 'Jack said\\r\\n\\t \\\"hi\\\"' has other whitespace"],
+ "arg\\ \\'Jack\\ said\\\\r\\\\n\\\\t\\ \\\\\\\"hi\\\\\\\"\\'\\ has\\ \
+ \other\\ whitespace\n"),
+ "the other whitespace")
+
+ , (([ "--prologue=.\\dist\\.\\haddock-prologue3239114604.txt"
+ , "--title=HaddockNewline-0.1.0.0: This has a\n\
+ \newline yo."
+ , "-BC:\\Program Files\\Haskell Platform\\lib"],
+ "--prologue=.\\\\dist\\\\.\\\\haddock-prologue3239114604.txt\n\
+ \--title=HaddockNewline-0.1.0.0:\\ This\\ has\\ a\\\n\
+ \newline\\ yo.\n\
+ \-BC:\\\\Program\\ Files\\\\Haskell\\ Platform\\\\lib\n"),
+ "an actual haddock response file snippet with embedded newlines")
+ ]
+
+main :: IO ()
+main = do
+ -- Test escapeArgs
+ mapM_ (\((ss1,s2),des) -> escapeArgs ss1 `assertEqual` s2) testStrs
+
+ -- Test unescapeArgs
+ mapM_ (\((ss1,s2),des) -> unescapeArgs s2 `assertEqual` ss1) testStrs
+
+ -- Given unescaped quotes, it should pass-through,
+ -- without escaping everything inside
+
+ (filter (not . null) $
+ unescapeArgs "this\\ is\\ 'not escape\\d \"inside\"'\\ yo\n")
+ `assertEqual`
+ ["this is not escaped \"inside\" yo"]
+
+ (filter (not . null) $
+ unescapeArgs "this\\ is\\ \"not escape\\d 'inside'\"\\ yo\n")
+ `assertEqual`
+ ["this is not escaped 'inside' yo"]
diff --git a/libraries/base/tests/all.T b/libraries/base/tests/all.T
index 0db147ea1e..491df2fd7e 100644
--- a/libraries/base/tests/all.T
+++ b/libraries/base/tests/all.T
@@ -223,3 +223,4 @@ test('T3474',
compile_and_run, ['-O'])
test('T14425', normal, compile_and_run, [''])
test('T10412', normal, compile_and_run, [''])
+test('T13896', normal, compile_and_run, [''])