diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2019-02-05 15:55:03 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2019-02-08 11:00:24 -0500 |
commit | ced729f6f4651b67151015e25a98c93792794aee (patch) | |
tree | 72c16533c7f1d6b1eb20b61b2a09dfe1070354e3 | |
parent | 7ff127f9e455402da183d1bb1808db024d1fa944 (diff) | |
download | haskell-ced729f6f4651b67151015e25a98c93792794aee.tar.gz |
Cleanup in parser/Ctype.hs
* GHC now performs constant folding on bit operations like (.|.) so we
use them and we remove the misleading comment
* we use Word8 instead of Int and we remove the useless conversion to
Int32. Hopefully future releases of GHC could transform the big case in
`charType` into a value table indexing instead of a jump table. Word8
would make the table smaller.
* we use INLINABLE pragma instead of INLINE on `is_ctype`: in my test,
the latter *prevents* `is_ctype` to be inlined because `charType` is
inlined into `is_ctype` (to call charType`s worker on the unboxed Char
directly).
-rw-r--r-- | compiler/parser/Ctype.hs | 273 |
1 files changed, 135 insertions, 138 deletions
diff --git a/compiler/parser/Ctype.hs b/compiler/parser/Ctype.hs index 9c3988e869..57721da94d 100644 --- a/compiler/parser/Ctype.hs +++ b/compiler/parser/Ctype.hs @@ -18,14 +18,14 @@ module Ctype import GhcPrelude -import Data.Int ( Int32 ) -import Data.Bits ( Bits((.&.)) ) +import Data.Bits ( Bits((.&.),(.|.)) ) import Data.Char ( ord, chr ) +import Data.Word import Panic -- Bit masks -cIdent, cSymbol, cAny, cSpace, cLower, cUpper, cDigit :: Int +cIdent, cSymbol, cAny, cSpace, cLower, cUpper, cDigit :: Word8 cIdent = 1 cSymbol = 2 cAny = 4 @@ -37,9 +37,9 @@ cDigit = 64 -- | The predicates below look costly, but aren't, GHC+GCC do a great job -- at the big case below. -{-# INLINE is_ctype #-} -is_ctype :: Int -> Char -> Bool -is_ctype mask c = (fromIntegral (charType c) .&. fromIntegral mask) /= (0::Int32) +{-# INLINABLE is_ctype #-} +is_ctype :: Word8 -> Char -> Bool +is_ctype mask c = (charType c .&. mask) /= 0 is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit, is_alphanum :: Char -> Bool @@ -82,137 +82,134 @@ to_lower c | c >= 'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a')) | otherwise = c --- | We really mean .|. instead of + below, but GHC currently doesn't do --- any constant folding with bitops. *sigh* - -charType :: Char -> Int +charType :: Char -> Word8 charType c = case c of - '\0' -> 0 -- \000 - '\1' -> 0 -- \001 - '\2' -> 0 -- \002 - '\3' -> 0 -- \003 - '\4' -> 0 -- \004 - '\5' -> 0 -- \005 - '\6' -> 0 -- \006 - '\7' -> 0 -- \007 - '\8' -> 0 -- \010 - '\9' -> cSpace -- \t (not allowed in strings, so !cAny) - '\10' -> cSpace -- \n (ditto) - '\11' -> cSpace -- \v (ditto) - '\12' -> cSpace -- \f (ditto) - '\13' -> cSpace -- ^M (ditto) - '\14' -> 0 -- \016 - '\15' -> 0 -- \017 - '\16' -> 0 -- \020 - '\17' -> 0 -- \021 - '\18' -> 0 -- \022 - '\19' -> 0 -- \023 - '\20' -> 0 -- \024 - '\21' -> 0 -- \025 - '\22' -> 0 -- \026 - '\23' -> 0 -- \027 - '\24' -> 0 -- \030 - '\25' -> 0 -- \031 - '\26' -> 0 -- \032 - '\27' -> 0 -- \033 - '\28' -> 0 -- \034 - '\29' -> 0 -- \035 - '\30' -> 0 -- \036 - '\31' -> 0 -- \037 - '\32' -> cAny + cSpace -- - '\33' -> cAny + cSymbol -- ! - '\34' -> cAny -- " - '\35' -> cAny + cSymbol -- # - '\36' -> cAny + cSymbol -- $ - '\37' -> cAny + cSymbol -- % - '\38' -> cAny + cSymbol -- & - '\39' -> cAny + cIdent -- ' - '\40' -> cAny -- ( - '\41' -> cAny -- ) - '\42' -> cAny + cSymbol -- * - '\43' -> cAny + cSymbol -- + - '\44' -> cAny -- , - '\45' -> cAny + cSymbol -- - - '\46' -> cAny + cSymbol -- . - '\47' -> cAny + cSymbol -- / - '\48' -> cAny + cIdent + cDigit -- 0 - '\49' -> cAny + cIdent + cDigit -- 1 - '\50' -> cAny + cIdent + cDigit -- 2 - '\51' -> cAny + cIdent + cDigit -- 3 - '\52' -> cAny + cIdent + cDigit -- 4 - '\53' -> cAny + cIdent + cDigit -- 5 - '\54' -> cAny + cIdent + cDigit -- 6 - '\55' -> cAny + cIdent + cDigit -- 7 - '\56' -> cAny + cIdent + cDigit -- 8 - '\57' -> cAny + cIdent + cDigit -- 9 - '\58' -> cAny + cSymbol -- : - '\59' -> cAny -- ; - '\60' -> cAny + cSymbol -- < - '\61' -> cAny + cSymbol -- = - '\62' -> cAny + cSymbol -- > - '\63' -> cAny + cSymbol -- ? - '\64' -> cAny + cSymbol -- @ - '\65' -> cAny + cIdent + cUpper -- A - '\66' -> cAny + cIdent + cUpper -- B - '\67' -> cAny + cIdent + cUpper -- C - '\68' -> cAny + cIdent + cUpper -- D - '\69' -> cAny + cIdent + cUpper -- E - '\70' -> cAny + cIdent + cUpper -- F - '\71' -> cAny + cIdent + cUpper -- G - '\72' -> cAny + cIdent + cUpper -- H - '\73' -> cAny + cIdent + cUpper -- I - '\74' -> cAny + cIdent + cUpper -- J - '\75' -> cAny + cIdent + cUpper -- K - '\76' -> cAny + cIdent + cUpper -- L - '\77' -> cAny + cIdent + cUpper -- M - '\78' -> cAny + cIdent + cUpper -- N - '\79' -> cAny + cIdent + cUpper -- O - '\80' -> cAny + cIdent + cUpper -- P - '\81' -> cAny + cIdent + cUpper -- Q - '\82' -> cAny + cIdent + cUpper -- R - '\83' -> cAny + cIdent + cUpper -- S - '\84' -> cAny + cIdent + cUpper -- T - '\85' -> cAny + cIdent + cUpper -- U - '\86' -> cAny + cIdent + cUpper -- V - '\87' -> cAny + cIdent + cUpper -- W - '\88' -> cAny + cIdent + cUpper -- X - '\89' -> cAny + cIdent + cUpper -- Y - '\90' -> cAny + cIdent + cUpper -- Z - '\91' -> cAny -- [ - '\92' -> cAny + cSymbol -- backslash - '\93' -> cAny -- ] - '\94' -> cAny + cSymbol -- ^ - '\95' -> cAny + cIdent + cLower -- _ - '\96' -> cAny -- ` - '\97' -> cAny + cIdent + cLower -- a - '\98' -> cAny + cIdent + cLower -- b - '\99' -> cAny + cIdent + cLower -- c - '\100' -> cAny + cIdent + cLower -- d - '\101' -> cAny + cIdent + cLower -- e - '\102' -> cAny + cIdent + cLower -- f - '\103' -> cAny + cIdent + cLower -- g - '\104' -> cAny + cIdent + cLower -- h - '\105' -> cAny + cIdent + cLower -- i - '\106' -> cAny + cIdent + cLower -- j - '\107' -> cAny + cIdent + cLower -- k - '\108' -> cAny + cIdent + cLower -- l - '\109' -> cAny + cIdent + cLower -- m - '\110' -> cAny + cIdent + cLower -- n - '\111' -> cAny + cIdent + cLower -- o - '\112' -> cAny + cIdent + cLower -- p - '\113' -> cAny + cIdent + cLower -- q - '\114' -> cAny + cIdent + cLower -- r - '\115' -> cAny + cIdent + cLower -- s - '\116' -> cAny + cIdent + cLower -- t - '\117' -> cAny + cIdent + cLower -- u - '\118' -> cAny + cIdent + cLower -- v - '\119' -> cAny + cIdent + cLower -- w - '\120' -> cAny + cIdent + cLower -- x - '\121' -> cAny + cIdent + cLower -- y - '\122' -> cAny + cIdent + cLower -- z - '\123' -> cAny -- { - '\124' -> cAny + cSymbol -- | - '\125' -> cAny -- } - '\126' -> cAny + cSymbol -- ~ - '\127' -> 0 -- \177 + '\0' -> 0 -- \000 + '\1' -> 0 -- \001 + '\2' -> 0 -- \002 + '\3' -> 0 -- \003 + '\4' -> 0 -- \004 + '\5' -> 0 -- \005 + '\6' -> 0 -- \006 + '\7' -> 0 -- \007 + '\8' -> 0 -- \010 + '\9' -> cSpace -- \t (not allowed in strings, so !cAny) + '\10' -> cSpace -- \n (ditto) + '\11' -> cSpace -- \v (ditto) + '\12' -> cSpace -- \f (ditto) + '\13' -> cSpace -- ^M (ditto) + '\14' -> 0 -- \016 + '\15' -> 0 -- \017 + '\16' -> 0 -- \020 + '\17' -> 0 -- \021 + '\18' -> 0 -- \022 + '\19' -> 0 -- \023 + '\20' -> 0 -- \024 + '\21' -> 0 -- \025 + '\22' -> 0 -- \026 + '\23' -> 0 -- \027 + '\24' -> 0 -- \030 + '\25' -> 0 -- \031 + '\26' -> 0 -- \032 + '\27' -> 0 -- \033 + '\28' -> 0 -- \034 + '\29' -> 0 -- \035 + '\30' -> 0 -- \036 + '\31' -> 0 -- \037 + '\32' -> cAny .|. cSpace -- + '\33' -> cAny .|. cSymbol -- ! + '\34' -> cAny -- " + '\35' -> cAny .|. cSymbol -- # + '\36' -> cAny .|. cSymbol -- $ + '\37' -> cAny .|. cSymbol -- % + '\38' -> cAny .|. cSymbol -- & + '\39' -> cAny .|. cIdent -- ' + '\40' -> cAny -- ( + '\41' -> cAny -- ) + '\42' -> cAny .|. cSymbol -- * + '\43' -> cAny .|. cSymbol -- + + '\44' -> cAny -- , + '\45' -> cAny .|. cSymbol -- - + '\46' -> cAny .|. cSymbol -- . + '\47' -> cAny .|. cSymbol -- / + '\48' -> cAny .|. cIdent .|. cDigit -- 0 + '\49' -> cAny .|. cIdent .|. cDigit -- 1 + '\50' -> cAny .|. cIdent .|. cDigit -- 2 + '\51' -> cAny .|. cIdent .|. cDigit -- 3 + '\52' -> cAny .|. cIdent .|. cDigit -- 4 + '\53' -> cAny .|. cIdent .|. cDigit -- 5 + '\54' -> cAny .|. cIdent .|. cDigit -- 6 + '\55' -> cAny .|. cIdent .|. cDigit -- 7 + '\56' -> cAny .|. cIdent .|. cDigit -- 8 + '\57' -> cAny .|. cIdent .|. cDigit -- 9 + '\58' -> cAny .|. cSymbol -- : + '\59' -> cAny -- ; + '\60' -> cAny .|. cSymbol -- < + '\61' -> cAny .|. cSymbol -- = + '\62' -> cAny .|. cSymbol -- > + '\63' -> cAny .|. cSymbol -- ? + '\64' -> cAny .|. cSymbol -- @ + '\65' -> cAny .|. cIdent .|. cUpper -- A + '\66' -> cAny .|. cIdent .|. cUpper -- B + '\67' -> cAny .|. cIdent .|. cUpper -- C + '\68' -> cAny .|. cIdent .|. cUpper -- D + '\69' -> cAny .|. cIdent .|. cUpper -- E + '\70' -> cAny .|. cIdent .|. cUpper -- F + '\71' -> cAny .|. cIdent .|. cUpper -- G + '\72' -> cAny .|. cIdent .|. cUpper -- H + '\73' -> cAny .|. cIdent .|. cUpper -- I + '\74' -> cAny .|. cIdent .|. cUpper -- J + '\75' -> cAny .|. cIdent .|. cUpper -- K + '\76' -> cAny .|. cIdent .|. cUpper -- L + '\77' -> cAny .|. cIdent .|. cUpper -- M + '\78' -> cAny .|. cIdent .|. cUpper -- N + '\79' -> cAny .|. cIdent .|. cUpper -- O + '\80' -> cAny .|. cIdent .|. cUpper -- P + '\81' -> cAny .|. cIdent .|. cUpper -- Q + '\82' -> cAny .|. cIdent .|. cUpper -- R + '\83' -> cAny .|. cIdent .|. cUpper -- S + '\84' -> cAny .|. cIdent .|. cUpper -- T + '\85' -> cAny .|. cIdent .|. cUpper -- U + '\86' -> cAny .|. cIdent .|. cUpper -- V + '\87' -> cAny .|. cIdent .|. cUpper -- W + '\88' -> cAny .|. cIdent .|. cUpper -- X + '\89' -> cAny .|. cIdent .|. cUpper -- Y + '\90' -> cAny .|. cIdent .|. cUpper -- Z + '\91' -> cAny -- [ + '\92' -> cAny .|. cSymbol -- backslash + '\93' -> cAny -- ] + '\94' -> cAny .|. cSymbol -- ^ + '\95' -> cAny .|. cIdent .|. cLower -- _ + '\96' -> cAny -- ` + '\97' -> cAny .|. cIdent .|. cLower -- a + '\98' -> cAny .|. cIdent .|. cLower -- b + '\99' -> cAny .|. cIdent .|. cLower -- c + '\100' -> cAny .|. cIdent .|. cLower -- d + '\101' -> cAny .|. cIdent .|. cLower -- e + '\102' -> cAny .|. cIdent .|. cLower -- f + '\103' -> cAny .|. cIdent .|. cLower -- g + '\104' -> cAny .|. cIdent .|. cLower -- h + '\105' -> cAny .|. cIdent .|. cLower -- i + '\106' -> cAny .|. cIdent .|. cLower -- j + '\107' -> cAny .|. cIdent .|. cLower -- k + '\108' -> cAny .|. cIdent .|. cLower -- l + '\109' -> cAny .|. cIdent .|. cLower -- m + '\110' -> cAny .|. cIdent .|. cLower -- n + '\111' -> cAny .|. cIdent .|. cLower -- o + '\112' -> cAny .|. cIdent .|. cLower -- p + '\113' -> cAny .|. cIdent .|. cLower -- q + '\114' -> cAny .|. cIdent .|. cLower -- r + '\115' -> cAny .|. cIdent .|. cLower -- s + '\116' -> cAny .|. cIdent .|. cLower -- t + '\117' -> cAny .|. cIdent .|. cLower -- u + '\118' -> cAny .|. cIdent .|. cLower -- v + '\119' -> cAny .|. cIdent .|. cLower -- w + '\120' -> cAny .|. cIdent .|. cLower -- x + '\121' -> cAny .|. cIdent .|. cLower -- y + '\122' -> cAny .|. cIdent .|. cLower -- z + '\123' -> cAny -- { + '\124' -> cAny .|. cSymbol -- | + '\125' -> cAny -- } + '\126' -> cAny .|. cSymbol -- ~ + '\127' -> 0 -- \177 _ -> panic ("charType: " ++ show c) |