summaryrefslogtreecommitdiff
path: root/compiler/GHC/Parser/Header.hs
blob: daa8bc78a58a2de0ad0471c4fbd169e77198baea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471

{-# LANGUAGE TypeFamilies #-}

-----------------------------------------------------------------------------
--
-- | Parsing the top of a Haskell source file to get its module name,
-- imports and options.
--
-- (c) Simon Marlow 2005
-- (c) Lemmih 2006
--
-----------------------------------------------------------------------------

module GHC.Parser.Header
   ( getImports
   , mkPrelImports -- used by the renamer too
   , getOptionsFromFile
   , getOptions
   , toArgs
   , checkProcessArgsResult
   )
where

import GHC.Prelude

import GHC.Platform

import GHC.Driver.Session
import GHC.Driver.Config
import GHC.Driver.Errors.Types -- Unfortunate, needed due to the fact we throw exceptions!

import GHC.Parser.Errors.Types
import GHC.Parser           ( parseHeader )
import GHC.Parser.Lexer

import GHC.Hs
import GHC.Unit.Module
import GHC.Builtin.Names

import GHC.Types.Error hiding ( getErrorMessages, getWarningMessages, getMessages )
import GHC.Types.SrcLoc
import GHC.Types.SourceError
import GHC.Types.SourceText

import GHC.Utils.Misc
import GHC.Utils.Outputable as Outputable
import GHC.Utils.Panic
import GHC.Utils.Monad
import GHC.Utils.Error
import GHC.Utils.Exception as Exception

import GHC.Data.StringBuffer
import GHC.Data.Maybe
import GHC.Data.FastString
import qualified GHC.Data.Strict as Strict

import Control.Monad
import System.IO
import System.IO.Unsafe
import Data.List (partition)
import Data.Char (isSpace)
import Text.ParserCombinators.ReadP (readP_to_S, gather)
import Text.ParserCombinators.ReadPrec (readPrec_to_P)
import Text.Read (readPrec)

------------------------------------------------------------------------------

-- | Parse the imports of a source file.
--
-- Throws a 'SourceError' if parsing fails.
getImports :: ParserOpts   -- ^ Parser options
           -> Bool         -- ^ Implicit Prelude?
           -> StringBuffer -- ^ Parse this.
           -> FilePath     -- ^ Filename the buffer came from.  Used for
                           --   reporting parse error locations.
           -> FilePath     -- ^ The original source filename (used for locations
                           --   in the function result)
           -> IO (Either
               (Messages PsMessage)
               ([(Maybe FastString, Located ModuleName)],
                [(Maybe FastString, Located ModuleName)],
                Located ModuleName))
              -- ^ The source imports and normal imports (with optional package
              -- names from -XPackageImports), and the module name.
getImports popts implicit_prelude buf filename source_filename = do
  let loc  = mkRealSrcLoc (mkFastString filename) 1 1
  case unP parseHeader (initParserState popts buf loc) of
    PFailed pst ->
        -- assuming we're not logging warnings here as per below
      return $ Left $ getErrorMessages pst
    POk pst rdr_module -> fmap Right $ do
      let (_warns, errs) = getMessages pst
      -- don't log warnings: they'll be reported when we parse the file
      -- for real.  See #2500.
      if not (isEmptyMessages errs)
        then throwErrors (GhcPsMessage <$> errs)
        else
          let   hsmod = unLoc rdr_module
                mb_mod = hsmodName hsmod
                imps = hsmodImports hsmod
                main_loc = srcLocSpan (mkSrcLoc (mkFastString source_filename)
                                       1 1)
                mod = mb_mod `orElse` L main_loc mAIN_NAME
                (src_idecls, ord_idecls) = partition ((== IsBoot) . ideclSource . unLoc) imps

               -- GHC.Prim doesn't exist physically, so don't go looking for it.
                ordinary_imps = filter ((/= moduleName gHC_PRIM) . unLoc
                                        . ideclName . unLoc)
                                       ord_idecls

                implicit_imports = mkPrelImports (unLoc mod) main_loc
                                                 implicit_prelude imps
                convImport (L _ i) = (fmap sl_fs (ideclPkgQual i), ideclName i)
              in
              return (map convImport src_idecls,
                      map convImport (implicit_imports ++ ordinary_imps),
                      mod)

mkPrelImports :: ModuleName
              -> SrcSpan    -- Attribute the "import Prelude" to this location
              -> Bool -> [LImportDecl GhcPs]
              -> [LImportDecl GhcPs]
-- Construct the implicit declaration "import Prelude" (or not)
--
-- NB: opt_NoImplicitPrelude is slightly different to import Prelude ();
-- because the former doesn't even look at Prelude.hi for instance
-- declarations, whereas the latter does.
mkPrelImports this_mod loc implicit_prelude import_decls
  | this_mod == pRELUDE_NAME
   || explicit_prelude_import
   || not implicit_prelude
  = []
  | otherwise = [preludeImportDecl]
  where
      explicit_prelude_import = any is_prelude_import import_decls

      is_prelude_import (L _ decl) =
        unLoc (ideclName decl) == pRELUDE_NAME
        -- allow explicit "base" package qualifier (#19082, #17045)
        && case ideclPkgQual decl of
            Nothing -> True
            Just b  -> sl_fs b == unitIdFS baseUnitId


      loc' = noAnnSrcSpan loc
      preludeImportDecl :: LImportDecl GhcPs
      preludeImportDecl
        = L loc' $ ImportDecl { ideclExt       = noAnn,
                                ideclSourceSrc = NoSourceText,
                                ideclName      = L loc pRELUDE_NAME,
                                ideclPkgQual   = Nothing,
                                ideclSource    = NotBoot,
                                ideclSafe      = False,  -- Not a safe import
                                ideclQualified = NotQualified,
                                ideclImplicit  = True,   -- Implicit!
                                ideclAs        = Nothing,
                                ideclHiding    = Nothing  }

--------------------------------------------------------------
-- Get options
--------------------------------------------------------------

-- | Parse OPTIONS and LANGUAGE pragmas of the source file.
--
-- Throws a 'SourceError' if flag parsing fails (including unsupported flags.)
getOptionsFromFile :: DynFlags
                   -> FilePath            -- ^ Input file
                   -> IO [Located String] -- ^ Parsed options, if any.
getOptionsFromFile dflags filename
    = Exception.bracket
              (openBinaryFile filename ReadMode)
              (hClose)
              (\handle -> do
                  opts <- fmap (getOptions' dflags)
                               (lazyGetToks (initParserOpts dflags') filename handle)
                  seqList opts $ return opts)
    where -- We don't need to get haddock doc tokens when we're just
          -- getting the options from pragmas, and lazily lexing them
          -- correctly is a little tricky: If there is "\n" or "\n-"
          -- left at the end of a buffer then the haddock doc may
          -- continue past the end of the buffer, despite the fact that
          -- we already have an apparently-complete token.
          -- We therefore just turn Opt_Haddock off when doing the lazy
          -- lex.
          dflags' = gopt_unset dflags Opt_Haddock

blockSize :: Int
-- blockSize = 17 -- for testing :-)
blockSize = 1024

lazyGetToks :: ParserOpts -> FilePath -> Handle -> IO [Located Token]
lazyGetToks popts filename handle = do
  buf <- hGetStringBufferBlock handle blockSize
  let prag_state = initPragState popts buf loc
  unsafeInterleaveIO $ lazyLexBuf handle prag_state False blockSize
 where
  loc  = mkRealSrcLoc (mkFastString filename) 1 1

  lazyLexBuf :: Handle -> PState -> Bool -> Int -> IO [Located Token]
  lazyLexBuf handle state eof size =
    case unP (lexer False return) state of
      POk state' t -> do
        -- pprTrace "lazyLexBuf" (text (show (buffer state'))) (return ())
        if atEnd (buffer state') && not eof
           -- if this token reached the end of the buffer, and we haven't
           -- necessarily read up to the end of the file, then the token might
           -- be truncated, so read some more of the file and lex it again.
           then getMore handle state size
           else case unLoc t of
                  ITeof  -> return [t]
                  _other -> do rest <- lazyLexBuf handle state' eof size
                               return (t : rest)
      _ | not eof   -> getMore handle state size
        | otherwise -> return [L (mkSrcSpanPs (last_loc state)) ITeof]
                         -- parser assumes an ITeof sentinel at the end

  getMore :: Handle -> PState -> Int -> IO [Located Token]
  getMore handle state size = do
     -- pprTrace "getMore" (text (show (buffer state))) (return ())
     let new_size = size * 2
       -- double the buffer size each time we read a new block.  This
       -- counteracts the quadratic slowdown we otherwise get for very
       -- large module names (#5981)
     nextbuf <- hGetStringBufferBlock handle new_size
     if (len nextbuf == 0) then lazyLexBuf handle state True new_size else do
       newbuf <- appendStringBuffers (buffer state) nextbuf
       unsafeInterleaveIO $ lazyLexBuf handle state{buffer=newbuf} False new_size


getToks :: ParserOpts -> FilePath -> StringBuffer -> [Located Token]
getToks popts filename buf = lexAll pstate
 where
  pstate = initPragState popts buf loc
  loc  = mkRealSrcLoc (mkFastString filename) 1 1

  lexAll state = case unP (lexer False return) state of
                   POk _      t@(L _ ITeof) -> [t]
                   POk state' t -> t : lexAll state'
                   _ -> [L (mkSrcSpanPs (last_loc state)) ITeof]


-- | Parse OPTIONS and LANGUAGE pragmas of the source file.
--
-- Throws a 'SourceError' if flag parsing fails (including unsupported flags.)
getOptions :: DynFlags
           -> StringBuffer -- ^ Input Buffer
           -> FilePath     -- ^ Source filename.  Used for location info.
           -> [Located String] -- ^ Parsed options.
getOptions dflags buf filename
    = getOptions' dflags (getToks (initParserOpts dflags) filename buf)

-- The token parser is written manually because Happy can't
-- return a partial result when it encounters a lexer error.
-- We want to extract options before the buffer is passed through
-- CPP, so we can't use the same trick as 'getImports'.
getOptions' :: DynFlags
            -> [Located Token]      -- Input buffer
            -> [Located String]     -- Options.
getOptions' dflags toks
    = parseToks toks
    where
          parseToks (open:close:xs)
              | IToptions_prag str <- unLoc open
              , ITclose_prag       <- unLoc close
              = case toArgs starting_loc str of
                  Left _err -> optionsParseError str $   -- #15053
                                 combineSrcSpans (getLoc open) (getLoc close)
                  Right args -> args ++ parseToks xs
            where
              src_span      = getLoc open
              real_src_span = expectJust "getOptions'" (srcSpanToRealSrcSpan src_span)
              starting_loc  = realSrcSpanStart real_src_span
          parseToks (open:close:xs)
              | ITinclude_prag str <- unLoc open
              , ITclose_prag       <- unLoc close
              = map (L (getLoc open)) ["-#include",removeSpaces str] ++
                parseToks xs
          parseToks (open:close:xs)
              | ITdocOptions str _ <- unLoc open
              , ITclose_prag       <- unLoc close
              = map (L (getLoc open)) ["-haddock-opts", removeSpaces str]
                ++ parseToks xs
          parseToks (open:xs)
              | ITlanguage_prag <- unLoc open
              = parseLanguage xs
          parseToks (comment:xs) -- Skip over comments
              | isComment (unLoc comment)
              = parseToks xs
          parseToks _ = []
          parseLanguage ((L loc (ITconid fs)):rest)
              = checkExtension dflags (L loc fs) :
                case rest of
                  (L _loc ITcomma):more -> parseLanguage more
                  (L _loc ITclose_prag):more -> parseToks more
                  (L loc _):_ -> languagePragParseError loc
                  [] -> panic "getOptions'.parseLanguage(1) went past eof token"
          parseLanguage (tok:_)
              = languagePragParseError (getLoc tok)
          parseLanguage []
              = panic "getOptions'.parseLanguage(2) went past eof token"

          isComment :: Token -> Bool
          isComment c =
            case c of
              (ITlineComment {})     -> True
              (ITblockComment {})    -> True
              (ITdocCommentNext {})  -> True
              (ITdocCommentPrev {})  -> True
              (ITdocCommentNamed {}) -> True
              (ITdocSection {})      -> True
              _                      -> False

toArgs :: RealSrcLoc
       -> String -> Either String   -- Error
                           [Located String] -- Args
toArgs starting_loc orig_str
    = let (after_spaces_loc, after_spaces_str) = consume_spaces starting_loc orig_str in
      case after_spaces_str of
      '[':after_bracket ->
        let after_bracket_loc = advanceSrcLoc after_spaces_loc '['
            (after_bracket_spaces_loc, after_bracket_spaces_str)
              = consume_spaces after_bracket_loc after_bracket in
        case after_bracket_spaces_str of
          ']':rest | all isSpace rest -> Right []
          _ -> readAsList after_bracket_spaces_loc after_bracket_spaces_str

      _ -> toArgs' after_spaces_loc after_spaces_str
 where
  consume_spaces :: RealSrcLoc -> String -> (RealSrcLoc, String)
  consume_spaces loc [] = (loc, [])
  consume_spaces loc (c:cs)
    | isSpace c = consume_spaces (advanceSrcLoc loc c) cs
    | otherwise = (loc, c:cs)

  break_with_loc :: (Char -> Bool) -> RealSrcLoc -> String
                 -> (String, RealSrcLoc, String)  -- location is start of second string
  break_with_loc p = go []
    where
      go reversed_acc loc [] = (reverse reversed_acc, loc, [])
      go reversed_acc loc (c:cs)
        | p c       = (reverse reversed_acc, loc, c:cs)
        | otherwise = go (c:reversed_acc) (advanceSrcLoc loc c) cs

  advance_src_loc_many :: RealSrcLoc -> String -> RealSrcLoc
  advance_src_loc_many = foldl' advanceSrcLoc

  locate :: RealSrcLoc -> RealSrcLoc -> a -> Located a
  locate begin end x = L (RealSrcSpan (mkRealSrcSpan begin end) Strict.Nothing) x

  toArgs' :: RealSrcLoc -> String -> Either String [Located String]
  -- Remove outer quotes:
  -- > toArgs' "\"foo\" \"bar baz\""
  -- Right ["foo", "bar baz"]
  --
  -- Keep inner quotes:
  -- > toArgs' "-DFOO=\"bar baz\""
  -- Right ["-DFOO=\"bar baz\""]
  toArgs' loc s =
    let (after_spaces_loc, after_spaces_str) = consume_spaces loc s in
    case after_spaces_str of
      [] -> Right []
      '"' : _ -> do
        -- readAsString removes outer quotes
        (arg, new_loc, rest) <- readAsString after_spaces_loc after_spaces_str
        check_for_space rest
        (locate after_spaces_loc new_loc arg:)
          `fmap` toArgs' new_loc rest
      _ -> case break_with_loc (isSpace <||> (== '"')) after_spaces_loc after_spaces_str of
            (argPart1, loc2, s''@('"':_)) -> do
                (argPart2, loc3, rest) <- readAsString loc2 s''
                check_for_space rest
                -- show argPart2 to keep inner quotes
                (locate after_spaces_loc loc3 (argPart1 ++ show argPart2):)
                  `fmap` toArgs' loc3 rest
            (arg, loc2, s'') -> (locate after_spaces_loc loc2 arg:)
                                  `fmap` toArgs' loc2 s''

  check_for_space :: String -> Either String ()
  check_for_space [] = Right ()
  check_for_space (c:_)
    | isSpace c = Right ()
    | otherwise = Left ("Whitespace expected after string in " ++ show orig_str)

  reads_with_consumed :: Read a => String
                      -> [((String, a), String)]
                        -- ((consumed string, parsed result), remainder of input)
  reads_with_consumed = readP_to_S (gather (readPrec_to_P readPrec 0))

  readAsString :: RealSrcLoc
               -> String
               -> Either String (String, RealSrcLoc, String)
  readAsString loc s = case reads_with_consumed s of
                [((consumed, arg), rest)] ->
                    Right (arg, advance_src_loc_many loc consumed, rest)
                _ ->
                    Left ("Couldn't read " ++ show s ++ " as String")

   -- input has had the '[' stripped off
  readAsList :: RealSrcLoc -> String -> Either String [Located String]
  readAsList loc s = do
    let (after_spaces_loc, after_spaces_str) = consume_spaces loc s
    (arg, after_arg_loc, after_arg_str) <- readAsString after_spaces_loc after_spaces_str
    let (after_arg_spaces_loc, after_arg_spaces_str)
          = consume_spaces after_arg_loc after_arg_str
    (locate after_spaces_loc after_arg_loc arg :) <$>
      case after_arg_spaces_str of
        ',':after_comma -> readAsList (advanceSrcLoc after_arg_spaces_loc ',') after_comma
        ']':after_bracket
          | all isSpace after_bracket
          -> Right []
        _ -> Left ("Couldn't read " ++ show ('[' : s) ++ " as [String]")
             -- reinsert missing '[' for clarity.

-----------------------------------------------------------------------------

-- | Complain about non-dynamic flags in OPTIONS pragmas.
--
-- Throws a 'SourceError' if the input list is non-empty claiming that the
-- input flags are unknown.
checkProcessArgsResult :: MonadIO m => [Located String] -> m ()
checkProcessArgsResult flags
  = when (notNull flags) $
      liftIO $ throwErrors $ foldMap (singleMessage . mkMsg) flags
    where mkMsg (L loc flag)
              = mkPlainErrorMsgEnvelope loc $
                GhcDriverMessage $ DriverUnknownMessage $ mkPlainError noHints $
                  text "unknown flag in  {-# OPTIONS_GHC #-} pragma:" <+>
                  text flag

-----------------------------------------------------------------------------

checkExtension :: DynFlags -> Located FastString -> Located String
checkExtension dflags (L l ext)
-- Checks if a given extension is valid, and if so returns
-- its corresponding flag. Otherwise it throws an exception.
  = if ext' `elem` supported
    then L l ("-X"++ext')
    else unsupportedExtnError dflags l ext'
  where
    ext' = unpackFS ext
    supported = supportedLanguagesAndExtensions $ platformArchOS $ targetPlatform dflags

languagePragParseError :: SrcSpan -> a
languagePragParseError loc =
    throwErr loc $
       vcat [ text "Cannot parse LANGUAGE pragma"
            , text "Expecting comma-separated list of language options,"
            , text "each starting with a capital letter"
            , nest 2 (text "E.g. {-# LANGUAGE TemplateHaskell, GADTs #-}") ]

unsupportedExtnError :: DynFlags -> SrcSpan -> String -> a
unsupportedExtnError dflags loc unsup =
    throwErr loc $
        text "Unsupported extension: " <> text unsup $$
        if null suggestions then Outputable.empty else text "Perhaps you meant" <+> quotedListWithOr (map text suggestions)
  where
     supported = supportedLanguagesAndExtensions $ platformArchOS $ targetPlatform dflags
     suggestions = fuzzyMatch unsup supported

optionsParseError :: String -> SrcSpan -> a     -- #15053
optionsParseError str loc =
  throwErr loc $
      vcat [ text "Error while parsing OPTIONS_GHC pragma."
           , text "Expecting whitespace-separated list of GHC options."
           , text "  E.g. {-# OPTIONS_GHC -Wall -O2 #-}"
           , text ("Input was: " ++ show str) ]

throwErr :: SrcSpan -> SDoc -> a                -- #15053
throwErr loc doc =
  let msg = mkPlainErrorMsgEnvelope loc $ GhcPsMessage $ PsUnknownMessage $ mkPlainError noHints doc
  in throw $ mkSrcErr $ singleMessage msg