summaryrefslogtreecommitdiff
path: root/compiler/parser/LexCore.hs
diff options
context:
space:
mode:
authorSimon Marlow <simonmar@microsoft.com>2006-04-07 02:05:11 +0000
committerSimon Marlow <simonmar@microsoft.com>2006-04-07 02:05:11 +0000
commit0065d5ab628975892cea1ec7303f968c3338cbe1 (patch)
tree8e2afe0ab48ee33cf95009809d67c9649573ef92 /compiler/parser/LexCore.hs
parent28a464a75e14cece5db40f2765a29348273ff2d2 (diff)
downloadhaskell-0065d5ab628975892cea1ec7303f968c3338cbe1.tar.gz
Reorganisation of the source tree
Most of the other users of the fptools build system have migrated to Cabal, and with the move to darcs we can now flatten the source tree without losing history, so here goes. The main change is that the ghc/ subdir is gone, and most of what it contained is now at the top level. The build system now makes no pretense at being multi-project, it is just the GHC build system. No doubt this will break many things, and there will be a period of instability while we fix the dependencies. A straightforward build should work, but I haven't yet fixed binary/source distributions. Changes to the Building Guide will follow, too.
Diffstat (limited to 'compiler/parser/LexCore.hs')
-rw-r--r--compiler/parser/LexCore.hs130
1 files changed, 130 insertions, 0 deletions
diff --git a/compiler/parser/LexCore.hs b/compiler/parser/LexCore.hs
new file mode 100644
index 0000000000..1a545a3e43
--- /dev/null
+++ b/compiler/parser/LexCore.hs
@@ -0,0 +1,130 @@
+module LexCore where
+
+import ParserCoreUtils
+import Ratio
+import Char
+import qualified Numeric( readFloat, readDec )
+
+isNameChar c = isAlpha c || isDigit c || (c == '_') || (c == '\'')
+isKeywordChar c = isAlpha c || (c == '_')
+
+lexer :: (Token -> P a) -> P a
+lexer cont [] = cont TKEOF []
+lexer cont ('\n':cs) = \line -> lexer cont cs (line+1)
+lexer cont ('-':'>':cs) = cont TKrarrow cs
+
+lexer cont (c:cs)
+ | isSpace c = lexer cont cs
+ | isLower c || (c == '_') = lexName cont TKname (c:cs)
+ | isUpper c = lexName cont TKcname (c:cs)
+ | isDigit c || (c == '-') = lexNum cont (c:cs)
+
+lexer cont ('%':cs) = lexKeyword cont cs
+lexer cont ('\'':cs) = lexChar cont cs
+lexer cont ('\"':cs) = lexString [] cont cs
+lexer cont ('#':cs) = cont TKhash cs
+lexer cont ('(':cs) = cont TKoparen cs
+lexer cont (')':cs) = cont TKcparen cs
+lexer cont ('{':cs) = cont TKobrace cs
+lexer cont ('}':cs) = cont TKcbrace cs
+lexer cont ('=':cs) = cont TKeq cs
+lexer cont (':':':':cs) = cont TKcoloncolon cs
+lexer cont ('*':cs) = cont TKstar cs
+lexer cont ('.':cs) = cont TKdot cs
+lexer cont ('\\':cs) = cont TKlambda cs
+lexer cont ('@':cs) = cont TKat cs
+lexer cont ('?':cs) = cont TKquestion cs
+lexer cont (';':cs) = cont TKsemicolon cs
+lexer cont (c:cs) = failP "invalid character" [c]
+
+
+
+lexChar cont ('\\':'x':h1:h0:'\'':cs)
+ | isHexEscape [h1,h0] = cont (TKchar (hexToChar h1 h0)) cs
+lexChar cont ('\\':cs) = failP "invalid char character" ('\\':(take 10 cs))
+lexChar cont ('\'':cs) = failP "invalid char character" ['\'']
+lexChar cont ('\"':cs) = failP "invalid char character" ['\"']
+lexChar cont (c:'\'':cs) = cont (TKchar c) cs
+
+
+lexString s cont ('\\':'x':h1:h0:cs)
+ | isHexEscape [h1,h0] = lexString (s++[hexToChar h1 h0]) cont cs
+lexString s cont ('\\':cs) = failP "invalid string character" ['\\']
+lexString s cont ('\'':cs) = failP "invalid string character" ['\'']
+lexString s cont ('\"':cs) = cont (TKstring s) cs
+lexString s cont (c:cs) = lexString (s++[c]) cont cs
+
+isHexEscape = all (\c -> isHexDigit c && (isDigit c || isLower c))
+
+hexToChar h1 h0 = chr (digitToInt h1 * 16 + digitToInt h0)
+
+
+lexNum cont cs =
+ case cs of
+ ('-':cs) -> f (-1) cs
+ _ -> f 1 cs
+ where f sgn cs =
+ case span isDigit cs of
+ (digits,'.':c:rest)
+ | isDigit c -> cont (TKrational (fromInteger sgn * r)) rest'
+ where ((r,rest'):_) = readFloat (digits ++ ('.':c:rest))
+ -- When reading a floating-point number, which is
+ -- a bit complicated, use the Haskell 98 library function
+ (digits,rest) -> cont (TKinteger (sgn * (read digits))) rest
+
+lexName cont cstr cs = cont (cstr name) rest
+ where (name,rest) = span isNameChar cs
+
+lexKeyword cont cs =
+ case span isKeywordChar cs of
+ ("module",rest) -> cont TKmodule rest
+ ("data",rest) -> cont TKdata rest
+ ("newtype",rest) -> cont TKnewtype rest
+ ("forall",rest) -> cont TKforall rest
+ ("rec",rest) -> cont TKrec rest
+ ("let",rest) -> cont TKlet rest
+ ("in",rest) -> cont TKin rest
+ ("case",rest) -> cont TKcase rest
+ ("of",rest) -> cont TKof rest
+ ("coerce",rest) -> cont TKcoerce rest
+ ("note",rest) -> cont TKnote rest
+ ("external",rest) -> cont TKexternal rest
+ ("_",rest) -> cont TKwild rest
+ _ -> failP "invalid keyword" ('%':cs)
+
+
+#if __GLASGOW_HASKELL__ >= 504
+-- The readFloat in the Numeric library will do the job
+
+readFloat :: (RealFrac a) => ReadS a
+readFloat = Numeric.readFloat
+
+#else
+-- Haskell 98's Numeric.readFloat used to have a bogusly restricted signature
+-- so it was incapable of reading a rational.
+-- So for GHCs that have that old bogus library, here is the code, written out longhand.
+
+readFloat r = [(fromRational ((n%1)*10^^(k-d)),t) | (n,d,s) <- readFix r,
+ (k,t) <- readExp s] ++
+ [ (0/0, t) | ("NaN",t) <- lex r] ++
+ [ (1/0, t) | ("Infinity",t) <- lex r]
+ where
+ readFix r = [(read (ds++ds'), length ds', t)
+ | (ds,d) <- lexDigits r,
+ (ds',t) <- lexFrac d ]
+
+ lexFrac ('.':ds) = lexDigits ds
+ lexFrac s = [("",s)]
+
+ readExp (e:s) | e `elem` "eE" = readExp' s
+ readExp s = [(0,s)]
+
+ readExp' ('-':s) = [(-k,t) | (k,t) <- Numeric.readDec s]
+ readExp' ('+':s) = Numeric.readDec s
+ readExp' s = Numeric.readDec s
+
+lexDigits :: ReadS String
+lexDigits s = case span isDigit s of
+ (cs,s') | not (null cs) -> [(cs,s')]
+ otherwise -> []
+#endif