diff options
author | Chaitanya Koparkar <ckoparkar@gmail.com> | 2018-05-03 12:38:36 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2018-05-03 12:39:16 -0400 |
commit | 866525a1765715b8b9902e1bd53b9af1c7a93a30 (patch) | |
tree | c9b4ffb66b24ef633783fc193505a655406bf67b | |
parent | 6132d7c5e6404936ef281a6f3be333fea780906e (diff) | |
download | haskell-866525a1765715b8b9902e1bd53b9af1c7a93a30.tar.gz |
Move the ResponseFile module from haddock into base
GHC and the build tools use "response files" to work around the limit
on the length of command line arguments on Windows. Haddock's
implementation of parsing response files (i.e escaping/unescaping the
appropriate characters) seems complete, is well tested, and also
closely matches the GCC version. This patch moves the relevant bits
into `base` so that it's easier for other libraries to reuse it.
Test Plan: make test TEST=T13896
Reviewers: bgamari, RyanGlScott, 23Skidoo, hvr
Reviewed By: RyanGlScott
Subscribers: thomie, carter
GHC Trac Issues: #13896
Differential Revision: https://phabricator.haskell.org/D4612
-rw-r--r-- | libraries/base/GHC/ResponseFile.hs | 159 | ||||
-rw-r--r-- | libraries/base/base.cabal | 1 | ||||
-rw-r--r-- | libraries/base/changelog.md | 3 | ||||
-rw-r--r-- | libraries/base/tests/T13896.hs | 75 | ||||
-rw-r--r-- | libraries/base/tests/all.T | 1 |
5 files changed, 239 insertions, 0 deletions
diff --git a/libraries/base/GHC/ResponseFile.hs b/libraries/base/GHC/ResponseFile.hs new file mode 100644 index 0000000000..3c2f64894b --- /dev/null +++ b/libraries/base/GHC/ResponseFile.hs @@ -0,0 +1,159 @@ +{-# LANGUAGE ScopedTypeVariables #-} + +----------------------------------------------------------------------------- +-- | +-- Module : GHC.ResponseFile +-- License : BSD-style (see the file LICENSE) +-- +-- Maintainer : libraries@haskell.org +-- Stability : internal +-- Portability : portable +-- +-- GCC style response files. +-- +-- @since 4.12.0.0 +---------------------------------------------------------------------------- + +-- Migrated from Haddock. + +module GHC.ResponseFile ( + getArgsWithResponseFiles, + unescapeArgs, + escapeArgs, + expandResponse + ) where + +import Control.Exception +import Data.Char (isSpace) +import Data.Foldable (foldl') +import System.Environment (getArgs) +import System.Exit (exitFailure) +import System.IO + +{-| +Like 'getArgs', but can also read arguments supplied via response files. + + +For example, consider a program @foo@: + +@ +main :: IO () +main = do + args <- getArgsWithResponseFiles + putStrLn (show args) +@ + + +And a response file @args.txt@: + +@ +--one 1 +--'two' 2 +--"three" 3 +@ + +Then the result of invoking @foo@ with @args.txt@ is: + +> > ./foo @args.txt +> ["--one","1","--two","2","--three","3"] + +-} +getArgsWithResponseFiles :: IO [String] +getArgsWithResponseFiles = getArgs >>= expandResponse + +-- | Given a string of concatenated strings, separate each by removing +-- a layer of /quoting/ and\/or /escaping/ of certain characters. +-- +-- These characters are: any whitespace, single quote, double quote, +-- and the backslash character. The backslash character always +-- escapes (i.e., passes through without further consideration) the +-- character which follows. Characters can also be escaped in blocks +-- by quoting (i.e., surrounding the blocks with matching pairs of +-- either single- or double-quotes which are not themselves escaped). +-- +-- Any whitespace which appears outside of either of the quoting and +-- escaping mechanisms, is interpreted as having been added by this +-- special concatenation process to designate where the boundaries +-- are between the original, un-concatenated list of strings. These +-- added whitespace characters are removed from the output. +-- +-- > unescapeArgs "hello\\ \\\"world\\\"\n" == escapeArgs "hello \"world\"" +unescapeArgs :: String -> [String] +unescapeArgs = filter (not . null) . unescape + +-- | Given a list of strings, concatenate them into a single string +-- with escaping of certain characters, and the addition of a newline +-- between each string. The escaping is done by adding a single +-- backslash character before any whitespace, single quote, double +-- quote, or backslash character, so this escaping character must be +-- removed. Unescaped whitespace (in this case, newline) is part +-- of this "transport" format to indicate the end of the previous +-- string and the start of a new string. +-- +-- While 'unescapeArgs' allows using quoting (i.e., convenient +-- escaping of many characters) by having matching sets of single- or +-- double-quotes,'escapeArgs' does not use the quoting mechasnism, +-- and thus will always escape any whitespace, quotes, and +-- backslashes. +-- +-- > unescapeArgs "hello\\ \\\"world\\\"\\n" == escapeArgs "hello \"world\"" +escapeArgs :: [String] -> String +escapeArgs = unlines . map escapeArg + +-- | Arguments which look like '@foo' will be replaced with the +-- contents of file @foo@. A gcc-like syntax for response files arguments +-- is expected. This must re-constitute the argument list by doing an +-- inverse of the escaping mechanism done by the calling-program side. +-- +-- We quit if the file is not found or reading somehow fails. +-- (A convenience routine for haddock or possibly other clients) +expandResponse :: [String] -> IO [String] +expandResponse = fmap concat . mapM expand + where + expand :: String -> IO [String] + expand ('@':f) = readFileExc f >>= return . unescapeArgs + expand x = return [x] + + readFileExc f = + readFile f `catch` \(e :: IOException) -> do + hPutStrLn stderr $ "Error while expanding response file: " ++ show e + exitFailure + +data Quoting = NoneQ | SngQ | DblQ + +unescape :: String -> [String] +unescape args = reverse . map reverse $ go args NoneQ False [] [] + where + -- n.b., the order of these cases matters; these are cribbed from gcc + -- case 1: end of input + go [] _q _bs a as = a:as + -- case 2: back-slash escape in progress + go (c:cs) q True a as = go cs q False (c:a) as + -- case 3: no back-slash escape in progress, but got a back-slash + go (c:cs) q False a as + | '\\' == c = go cs q True a as + -- case 4: single-quote escaping in progress + go (c:cs) SngQ False a as + | '\'' == c = go cs NoneQ False a as + | otherwise = go cs SngQ False (c:a) as + -- case 5: double-quote escaping in progress + go (c:cs) DblQ False a as + | '"' == c = go cs NoneQ False a as + | otherwise = go cs DblQ False (c:a) as + -- case 6: no escaping is in progress + go (c:cs) NoneQ False a as + | isSpace c = go cs NoneQ False [] (a:as) + | '\'' == c = go cs SngQ False a as + | '"' == c = go cs DblQ False a as + | otherwise = go cs NoneQ False (c:a) as + +escapeArg :: String -> String +escapeArg = reverse . foldl' escape [] + +escape :: String -> Char -> String +escape cs c + | isSpace c + || '\\' == c + || '\'' == c + || '"' == c = c:'\\':cs -- n.b., our caller must reverse the result + | otherwise = c:cs diff --git a/libraries/base/base.cabal b/libraries/base/base.cabal index a104f524d0..6da95b070f 100644 --- a/libraries/base/base.cabal +++ b/libraries/base/base.cabal @@ -270,6 +270,7 @@ Library GHC.Read GHC.Real GHC.Records + GHC.ResponseFile GHC.RTS.Flags GHC.ST GHC.StaticPtr diff --git a/libraries/base/changelog.md b/libraries/base/changelog.md index 157c7dfb2e..0bca2649d4 100644 --- a/libraries/base/changelog.md +++ b/libraries/base/changelog.md @@ -3,6 +3,9 @@ ## 4.12.0.0 *TBA* * Bundled with GHC *TBA* + * Add a new module `GHC.ResponseFile` (previously defined in the `haddock` + package). (#13896) + * Move the module `Data.Functor.Contravariant` from the `contravariant` package to `base`. diff --git a/libraries/base/tests/T13896.hs b/libraries/base/tests/T13896.hs new file mode 100644 index 0000000000..9e269a4a7c --- /dev/null +++ b/libraries/base/tests/T13896.hs @@ -0,0 +1,75 @@ +import GHC.ResponseFile + +assertEqual :: (Eq a, Show a) => a -> a -> IO () +assertEqual x y = if x == y + then return () + else error $ "assertEqual: " ++ show x ++ " /= " ++ show y + +-- Migrated from Haddock. + +-- The first two elements are +-- 1) a list of 'args' to encode and +-- 2) a single string of the encoded args +-- The 3rd element is just a description for the tests. +testStrs :: [(([String], String), String)] +testStrs = + [ ((["a simple command line"], + "a\\ simple\\ command\\ line\n"), + "the white-space, end with newline") + + , ((["arg 'foo' is single quoted"], + "arg\\ \\'foo\\'\\ is\\ single\\ quoted\n"), + "the single quotes as well") + + , ((["arg \"bar\" is double quoted"], + "arg\\ \\\"bar\\\"\\ is\\ double\\ quoted\n"), + "the double quotes as well" ) + + , ((["arg \"foo bar\" has embedded whitespace"], + "arg\\ \\\"foo\\ bar\\\"\\ has\\ embedded\\ whitespace\n"), + "the quote-embedded whitespace") + + , ((["arg 'Jack said \\'hi\\'' has single quotes"], + "arg\\ \\'Jack\\ said\\ \\\\\\'hi\\\\\\'\\'\\ has\\ single\\ quotes\n"), + "the escaped single quotes") + + , ((["arg 'Jack said \\\"hi\\\"' has double quotes"], + "arg\\ \\'Jack\\ said\\ \\\\\\\"hi\\\\\\\"\\'\\ has\\ double\\ quotes\n"), + "the escaped double quotes") + + , ((["arg 'Jack said\\r\\n\\t \\\"hi\\\"' has other whitespace"], + "arg\\ \\'Jack\\ said\\\\r\\\\n\\\\t\\ \\\\\\\"hi\\\\\\\"\\'\\ has\\ \ + \other\\ whitespace\n"), + "the other whitespace") + + , (([ "--prologue=.\\dist\\.\\haddock-prologue3239114604.txt" + , "--title=HaddockNewline-0.1.0.0: This has a\n\ + \newline yo." + , "-BC:\\Program Files\\Haskell Platform\\lib"], + "--prologue=.\\\\dist\\\\.\\\\haddock-prologue3239114604.txt\n\ + \--title=HaddockNewline-0.1.0.0:\\ This\\ has\\ a\\\n\ + \newline\\ yo.\n\ + \-BC:\\\\Program\\ Files\\\\Haskell\\ Platform\\\\lib\n"), + "an actual haddock response file snippet with embedded newlines") + ] + +main :: IO () +main = do + -- Test escapeArgs + mapM_ (\((ss1,s2),des) -> escapeArgs ss1 `assertEqual` s2) testStrs + + -- Test unescapeArgs + mapM_ (\((ss1,s2),des) -> unescapeArgs s2 `assertEqual` ss1) testStrs + + -- Given unescaped quotes, it should pass-through, + -- without escaping everything inside + + (filter (not . null) $ + unescapeArgs "this\\ is\\ 'not escape\\d \"inside\"'\\ yo\n") + `assertEqual` + ["this is not escaped \"inside\" yo"] + + (filter (not . null) $ + unescapeArgs "this\\ is\\ \"not escape\\d 'inside'\"\\ yo\n") + `assertEqual` + ["this is not escaped 'inside' yo"] diff --git a/libraries/base/tests/all.T b/libraries/base/tests/all.T index 0db147ea1e..491df2fd7e 100644 --- a/libraries/base/tests/all.T +++ b/libraries/base/tests/all.T @@ -223,3 +223,4 @@ test('T3474', compile_and_run, ['-O']) test('T14425', normal, compile_and_run, ['']) test('T10412', normal, compile_and_run, ['']) +test('T13896', normal, compile_and_run, ['']) |