summaryrefslogtreecommitdiff
path: root/compiler/GHC/Cmm/Lexer.x
diff options
context:
space:
mode:
authorVladislav Zavialov <vlad.z.4096@gmail.com>2020-01-23 23:03:04 +0300
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-02-29 05:06:31 -0500
commit327b29e1a05d9f1ea04465c9b23aed92473dd453 (patch)
tree0b6db26b4677c2677a32754de523eb842f9cb849 /compiler/GHC/Cmm/Lexer.x
parent37f126033f1e5bf0331143f005ef90ba6e2e02cd (diff)
downloadhaskell-327b29e1a05d9f1ea04465c9b23aed92473dd453.tar.gz
Monotonic locations (#17632)
When GHC is parsing a file generated by a tool, e.g. by the C preprocessor, the tool may insert #line pragmas to adjust the locations reported to the user. As the result, the locations recorded in RealSrcLoc are not monotonic. Elements that appear later in the StringBuffer are not guaranteed to have a higher line/column number. In fact, there are no guarantees whatsoever, as #line pragmas can arbitrarily modify locations. This lack of guarantees makes ideas such as #17544 infeasible. This patch adds an additional bit of information to every SrcLoc: newtype BufPos = BufPos { bufPos :: Int } A BufPos represents the location in the StringBuffer, unaffected by any pragmas. Updates haddock submodule. Metric Increase: haddock.Cabal haddock.base haddock.compiler MultiLayerModules Naperian parsing001 T12150
Diffstat (limited to 'compiler/GHC/Cmm/Lexer.x')
-rw-r--r--compiler/GHC/Cmm/Lexer.x20
1 files changed, 10 insertions, 10 deletions
diff --git a/compiler/GHC/Cmm/Lexer.x b/compiler/GHC/Cmm/Lexer.x
index d8f15b916c..be2f676608 100644
--- a/compiler/GHC/Cmm/Lexer.x
+++ b/compiler/GHC/Cmm/Lexer.x
@@ -185,7 +185,7 @@ data CmmToken
-- -----------------------------------------------------------------------------
-- Lexer actions
-type Action = RealSrcSpan -> StringBuffer -> Int -> PD (RealLocated CmmToken)
+type Action = PsSpan -> StringBuffer -> Int -> PD (PsLocated CmmToken)
begin :: Int -> Action
begin code _span _str _len = do liftP (pushLexState code); lexToken
@@ -290,7 +290,7 @@ tok_string str = CmmT_String (read str)
-- Line pragmas
setLine :: Int -> Action
-setLine code span buf len = do
+setLine code (PsSpan span _) buf len = do
let line = parseUnsignedInteger buf len 10 octDecDigit
liftP $ do
setSrcLoc (mkRealSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
@@ -300,7 +300,7 @@ setLine code span buf len = do
lexToken
setFile :: Int -> Action
-setFile code span buf len = do
+setFile code (PsSpan span _) buf len = do
let file = lexemeToFastString (stepOn buf) (len-2)
liftP $ do
setSrcLoc (mkRealSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
@@ -315,23 +315,23 @@ cmmlex :: (Located CmmToken -> PD a) -> PD a
cmmlex cont = do
(L span tok) <- lexToken
--trace ("token: " ++ show tok) $ do
- cont (L (RealSrcSpan span) tok)
+ cont (L (mkSrcSpanPs span) tok)
-lexToken :: PD (RealLocated CmmToken)
+lexToken :: PD (PsLocated CmmToken)
lexToken = do
inp@(loc1,buf) <- getInput
sc <- liftP getLexState
case alexScan inp sc of
- AlexEOF -> do let span = mkRealSrcSpan loc1 loc1
+ AlexEOF -> do let span = mkPsSpan loc1 loc1
liftP (setLastToken span 0)
return (L span CmmT_EOF)
- AlexError (loc2,_) -> liftP $ failLocMsgP loc1 loc2 "lexical error"
+ AlexError (loc2,_) -> liftP $ failLocMsgP (psRealLoc loc1) (psRealLoc loc2) "lexical error"
AlexSkip inp2 _ -> do
setInput inp2
lexToken
AlexToken inp2@(end,_buf2) len t -> do
setInput inp2
- let span = mkRealSrcSpan loc1 end
+ let span = mkPsSpan loc1 end
span `seq` liftP (setLastToken span len)
t span buf len
@@ -339,7 +339,7 @@ lexToken = do
-- Monad stuff
-- Stuff that Alex needs to know about our input type:
-type AlexInput = (RealSrcLoc,StringBuffer)
+type AlexInput = (PsLoc,StringBuffer)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (_,s) = prevChar s '\n'
@@ -357,7 +357,7 @@ alexGetByte (loc,s)
| otherwise = b `seq` loc' `seq` s' `seq` Just (b, (loc', s'))
where c = currentChar s
b = fromIntegral $ ord $ c
- loc' = advanceSrcLoc loc c
+ loc' = advancePsLoc loc c
s' = stepOn s
getInput :: PD AlexInput