summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorTakenobu Tani <takenobu.hs@gmail.com>2018-01-21 12:08:59 -0500
committerBen Gamari <ben@smart-cactus.org>2018-01-21 12:09:00 -0500
commit4a13c5b1f4beb53cbf1f3529acdf3ba37528e694 (patch)
treecfce02f26d602175f99699763b0c654b4b65033e /compiler
parent180ca65ff6d1b4f3f4cdadc569fd4de107be14db (diff)
downloadhaskell-4a13c5b1f4beb53cbf1f3529acdf3ba37528e694.tar.gz
Implement underscores in numeric literals (NumericUnderscores extension)
Implement the proposal of underscores in numeric literals. Underscores in numeric literals are simply ignored. The specification of the feature is available here: https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/000 9-numeric-underscores.rst For a discussion of the various choices: https://github.com/ghc-proposals/ghc-proposals/pull/76 Implementation detail: * Added dynamic flag * `NumericUnderscores` extension flag is added for this feature. * Alex "Regular expression macros" in Lexer.x * Add `@numspc` (numeric spacer) macro to represent multiple underscores. * Modify `@decimal`, `@decimal`, `@binary`, `@octal`, `@hexadecimal`, `@exponent`, and `@bin_exponent` macros to include `@numspc`. * Alex "Rules" in Lexer.x * To be simpler, we have only the definitions with underscores. And then we have a separate function (`tok_integral` and `tok_frac`) that validates the literals. * Validation functions in Lexer.x * `tok_integral` and `tok_frac` functions validate whether contain underscores or not. If `NumericUnderscores` extensions are not enabled, check that there are no underscores. * `tok_frac` function is created by merging `strtoken` and `init_strtoken`. * `init_strtoken` is deleted. Because it is no longer used. * Remove underscores from target literal string * `parseUnsignedInteger`, `readRational__`, and `readHexRational} use the customized `span'` function to remove underscores. * Added Testcase * testcase for NumericUnderscores enabled. NumericUnderscores0.hs and NumericUnderscores1.hs * testcase for NumericUnderscores disabled. NoNumericUnderscores0.hs and NoNumericUnderscores1.hs * testcase to invalid pattern for NumericUnderscores enabled. NumericUnderscoresFail0.hs and NumericUnderscoresFail1.hs Test Plan: `validate` including the above testcase Reviewers: goldfire, bgamari Reviewed By: bgamari Subscribers: carter, rwbarton, thomie GHC Trac Issues: #14473 Differential Revision: https://phabricator.haskell.org/D4235
Diffstat (limited to 'compiler')
-rw-r--r--compiler/main/DynFlags.hs1
-rw-r--r--compiler/parser/Lexer.x101
-rw-r--r--compiler/utils/StringBuffer.hs1
-rw-r--r--compiler/utils/Util.hs21
4 files changed, 80 insertions, 44 deletions
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 3324d5532e..77837e6d3d 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -4057,6 +4057,7 @@ xFlagsDeps = [
flagSpec "MonomorphismRestriction" LangExt.MonomorphismRestriction,
flagSpec "MultiParamTypeClasses" LangExt.MultiParamTypeClasses,
flagSpec "MultiWayIf" LangExt.MultiWayIf,
+ flagSpec "NumericUnderscores" LangExt.NumericUnderscores,
flagSpec "NPlusKPatterns" LangExt.NPlusKPatterns,
flagSpec "NamedFieldPuns" LangExt.RecordPuns,
flagSpec "NamedWildCards" LangExt.NamedWildCards,
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 76cc4ee6e6..d8a670e7fb 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -177,12 +177,14 @@ $docsym = [\| \^ \* \$]
@varsym = ($symbol # \:) $symbol* -- variable (operator) symbol
@consym = \: $symbol* -- constructor (operator) symbol
-@decimal = $decdigit+
-@binary = $binit+
-@octal = $octit+
-@hexadecimal = $hexit+
-@exponent = [eE] [\-\+]? @decimal
-@bin_exponent = [pP] [\-\+]? @decimal
+-- See Note [Lexing NumericUnderscores extension] and #14473
+@numspc = _* -- numeric spacer (#14473)
+@decimal = $decdigit(@numspc $decdigit)*
+@binary = $binit(@numspc $binit)*
+@octal = $octit(@numspc $octit)*
+@hexadecimal = $hexit(@numspc $hexit)*
+@exponent = @numspc [eE] [\-\+]? @decimal
+@bin_exponent = @numspc [pP] [\-\+]? @decimal
@qual = (@conid \.)+
@qvarid = @qual @varid
@@ -190,8 +192,8 @@ $docsym = [\| \^ \* \$]
@qvarsym = @qual @varsym
@qconsym = @qual @consym
-@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
-@hex_floating_point = @hexadecimal \. @hexadecimal @bin_exponent? | @hexadecimal @bin_exponent
+@floating_point = @numspc @decimal \. @decimal @exponent? | @numspc @decimal @exponent
+@hex_floating_point = @numspc @hexadecimal \. @hexadecimal @bin_exponent? | @numspc @hexadecimal @bin_exponent
-- normal signed numerical literals can only be explicitly negative,
-- not explicitly positive (contrast @exponent)
@@ -485,24 +487,34 @@ $tab { warnTab }
-- For the normal boxed literals we need to be careful
-- when trying to be close to Haskell98
+
+-- Note [Lexing NumericUnderscores extension] (#14473)
+--
+-- NumericUnderscores extension allows underscores in numeric literals.
+-- Multiple underscores are represented with @numspc macro.
+-- To be simpler, we have only the definitions with underscores.
+-- And then we have a separate function (tok_integral and tok_frac)
+-- that validates the literals.
+-- If extensions are not enabled, check that there are no underscores.
+--
<0> {
-- Normal integral literals (:: Num a => a, from Integer)
@decimal { tok_num positive 0 0 decimal }
- 0[bB] @binary / { ifExtension binaryLiteralsEnabled } { tok_num positive 2 2 binary }
- 0[oO] @octal { tok_num positive 2 2 octal }
- 0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
+ 0[bB] @numspc @binary / { ifExtension binaryLiteralsEnabled } { tok_num positive 2 2 binary }
+ 0[oO] @numspc @octal { tok_num positive 2 2 octal }
+ 0[xX] @numspc @hexadecimal { tok_num positive 2 2 hexadecimal }
@negative @decimal / { ifExtension negativeLiteralsEnabled } { tok_num negative 1 1 decimal }
- @negative 0[bB] @binary / { ifExtension negativeLiteralsEnabled `alexAndPred`
- ifExtension binaryLiteralsEnabled } { tok_num negative 3 3 binary }
- @negative 0[oO] @octal / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 octal }
- @negative 0[xX] @hexadecimal / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 hexadecimal }
+ @negative 0[bB] @numspc @binary / { ifExtension negativeLiteralsEnabled `alexAndPred`
+ ifExtension binaryLiteralsEnabled } { tok_num negative 3 3 binary }
+ @negative 0[oO] @numspc @octal / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 octal }
+ @negative 0[xX] @numspc @hexadecimal / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 hexadecimal }
-- Normal rational literals (:: Fractional a => a, from Rational)
- @floating_point { strtoken tok_float }
- @negative @floating_point / { ifExtension negativeLiteralsEnabled } { strtoken tok_float }
- 0[xX] @hex_floating_point / { ifExtension hexFloatLiteralsEnabled } { strtoken tok_hex_float }
- @negative 0[xX]@hex_floating_point / { ifExtension hexFloatLiteralsEnabled `alexAndPred`
- ifExtension negativeLiteralsEnabled } { strtoken tok_hex_float }
+ @floating_point { tok_frac 0 tok_float }
+ @negative @floating_point / { ifExtension negativeLiteralsEnabled } { tok_frac 0 tok_float }
+ 0[xX] @numspc @hex_floating_point / { ifExtension hexFloatLiteralsEnabled } { tok_frac 0 tok_hex_float }
+ @negative 0[xX] @numspc @hex_floating_point / { ifExtension hexFloatLiteralsEnabled `alexAndPred`
+ ifExtension negativeLiteralsEnabled } { tok_frac 0 tok_hex_float }
}
<0> {
@@ -510,26 +522,26 @@ $tab { warnTab }
-- It's simpler (and faster?) to give separate cases to the negatives,
-- especially considering octal/hexadecimal prefixes.
@decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
- 0[bB] @binary \# / { ifExtension magicHashEnabled `alexAndPred`
+ 0[bB] @numspc @binary \# / { ifExtension magicHashEnabled `alexAndPred`
ifExtension binaryLiteralsEnabled } { tok_primint positive 2 3 binary }
- 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
- 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
+ 0[oO] @numspc @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
+ 0[xX] @numspc @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
@negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
- @negative 0[bB] @binary \# / { ifExtension magicHashEnabled `alexAndPred`
- ifExtension binaryLiteralsEnabled } { tok_primint negative 3 4 binary }
- @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
- @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
+ @negative 0[bB] @numspc @binary \# / { ifExtension magicHashEnabled `alexAndPred`
+ ifExtension binaryLiteralsEnabled } { tok_primint negative 3 4 binary }
+ @negative 0[oO] @numspc @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
+ @negative 0[xX] @numspc @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
@decimal \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
- 0[bB] @binary \# \# / { ifExtension magicHashEnabled `alexAndPred`
+ 0[bB] @numspc @binary \# \# / { ifExtension magicHashEnabled `alexAndPred`
ifExtension binaryLiteralsEnabled } { tok_primword 2 4 binary }
- 0[oO] @octal \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
- 0[xX] @hexadecimal \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
+ 0[oO] @numspc @octal \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
+ 0[xX] @numspc @hexadecimal \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
-- Unboxed floats and doubles (:: Float#, :: Double#)
-- prim_{float,double} work with signed literals
- @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
- @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
+ @signed @floating_point \# / { ifExtension magicHashEnabled } { tok_frac 1 tok_primfloat }
+ @signed @floating_point \# \# / { ifExtension magicHashEnabled } { tok_frac 2 tok_primdouble }
}
-- Strings and chars are lexed by hand-written code. The reason is
@@ -943,11 +955,6 @@ strtoken :: (String -> Token) -> Action
strtoken f span buf len =
return (L span $! (f $! lexemeToString buf len))
-init_strtoken :: Int -> (String -> Token) -> Action
--- like strtoken, but drops the last N character(s)
-init_strtoken drop f span buf len =
- return (L span $! (f $! lexemeToString buf (len-drop)))
-
begin :: Int -> Action
begin code _span _str _len = do pushLexState code; lexToken
@@ -1277,8 +1284,12 @@ tok_integral :: (SourceText -> Integer -> Token)
-> Int -> Int
-> (Integer, (Char -> Int))
-> Action
-tok_integral itint transint transbuf translen (radix,char_to_int) span buf len
- = return $ L span $ itint (SourceText $ lexemeToString buf len)
+tok_integral itint transint transbuf translen (radix,char_to_int) span buf len = do
+ numericUnderscores <- extension numericUnderscoresEnabled -- #14473
+ let src = lexemeToString buf len
+ if (not numericUnderscores) && ('_' `elem` src)
+ then failMsgP "Use NumericUnderscores to allow underscores in integer literals"
+ else return $ L span $ itint (SourceText src)
$! transint $ parseUnsignedInteger
(offsetBytes transbuf buf) (subtract translen len) radix char_to_int
@@ -1310,6 +1321,14 @@ octal = (8,octDecDigit)
hexadecimal = (16,hexDigit)
-- readRational can understand negative rationals, exponents, everything.
+tok_frac :: Int -> (String -> Token) -> Action
+tok_frac drop f span buf len = do
+ numericUnderscores <- extension numericUnderscoresEnabled -- #14473
+ let src = lexemeToString buf (len-drop)
+ if (not numericUnderscores) && ('_' `elem` src)
+ then failMsgP "Use NumericUnderscores to allow underscores in floating literals"
+ else return (L span $! (f $! src))
+
tok_float, tok_primfloat, tok_primdouble :: String -> Token
tok_float str = ITrational $! readFractionalLit str
tok_hex_float str = ITrational $! readHexFractionalLit str
@@ -2221,6 +2240,7 @@ data ExtBits
| HexFloatLiteralsBit
| TypeApplicationsBit
| StaticPointersBit
+ | NumericUnderscoresBit
deriving Enum
@@ -2289,6 +2309,8 @@ typeApplicationEnabled :: ExtsBitmap -> Bool
typeApplicationEnabled = xtest TypeApplicationsBit
staticPointersEnabled :: ExtsBitmap -> Bool
staticPointersEnabled = xtest StaticPointersBit
+numericUnderscoresEnabled :: ExtsBitmap -> Bool
+numericUnderscoresEnabled = xtest NumericUnderscoresBit
-- PState for parsing options pragmas
--
@@ -2344,6 +2366,7 @@ mkParserFlags flags =
.|. PatternSynonymsBit `setBitIf` xopt LangExt.PatternSynonyms flags
.|. TypeApplicationsBit `setBitIf` xopt LangExt.TypeApplications flags
.|. StaticPointersBit `setBitIf` xopt LangExt.StaticPointers flags
+ .|. NumericUnderscoresBit `setBitIf` xopt LangExt.NumericUnderscores flags
setBitIf :: ExtBits -> Bool -> ExtsBitmap
b `setBitIf` cond | cond = xbit b
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index 39941e2f7a..a5fc4e7f12 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -323,5 +323,6 @@ parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int
= inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let
go i x | i == len = x
| otherwise = case fst (utf8DecodeChar (ptr `plusPtr` (cur + i))) of
+ '_' -> go (i + 1) x -- skip "_" (#14473)
char -> go (i + 1) (x * radix + toInteger (char_to_int char))
in go 0 0
diff --git a/compiler/utils/Util.hs b/compiler/utils/Util.hs
index 7a46db7665..a4520ed679 100644
--- a/compiler/utils/Util.hs
+++ b/compiler/utils/Util.hs
@@ -1142,12 +1142,18 @@ readRational__ r = do
lexDecDigits = nonnull isDigit
- lexDotDigits ('.':s) = return (span isDigit s)
+ lexDotDigits ('.':s) = return (span' isDigit s)
lexDotDigits s = return ("",s)
- nonnull p s = do (cs@(_:_),t) <- return (span p s)
+ nonnull p s = do (cs@(_:_),t) <- return (span' p s)
return (cs,t)
+ span' _ xs@[] = (xs, xs)
+ span' p xs@(x:xs')
+ | x == '_' = span' p xs' -- skip "_" (#14473)
+ | p x = let (ys,zs) = span' p xs' in (x:ys,zs)
+ | otherwise = ([],xs)
+
readRational :: String -> Rational -- NB: *does* handle a leading "-"
readRational top_s
= case top_s of
@@ -1176,12 +1182,12 @@ readHexRational str =
readHexRational__ :: String -> Maybe Rational
readHexRational__ ('0' : x : rest)
| x == 'X' || x == 'x' =
- do let (front,rest2) = span isHexDigit rest
+ do let (front,rest2) = span' isHexDigit rest
guard (not (null front))
let frontNum = steps 16 0 front
case rest2 of
'.' : rest3 ->
- do let (back,rest4) = span isHexDigit rest3
+ do let (back,rest4) = span' isHexDigit rest3
guard (not (null back))
let backNum = steps 16 frontNum back
exp1 = -4 * length back
@@ -1201,13 +1207,18 @@ readHexRational__ ('0' : x : rest)
mk :: Integer -> Int -> Rational
mk n e = fromInteger n * 2^^e
- dec cs = case span isDigit cs of
+ dec cs = case span' isDigit cs of
(ds,"") | not (null ds) -> Just (steps 10 0 ds)
_ -> Nothing
steps base n ds = foldl' (step base) n ds
step base n d = base * n + fromIntegral (digitToInt d)
+ span' _ xs@[] = (xs, xs)
+ span' p xs@(x:xs')
+ | x == '_' = span' p xs' -- skip "_" (#14473)
+ | p x = let (ys,zs) = span' p xs' in (x:ys,zs)
+ | otherwise = ([],xs)
readHexRational__ _ = Nothing