diff options
author | Ian Lynagh <igloo@earth.li> | 2012-07-14 20:48:42 +0100 |
---|---|---|
committer | Ian Lynagh <igloo@earth.li> | 2012-07-14 20:57:37 +0100 |
commit | 7ae1bec53801069661e249e47ebd6998d6450093 (patch) | |
tree | 3bf00efb097a569173b2c2e9fe1d10347508c6ba | |
parent | 18f82197efbc2b930b123032fd7828626c04ee43 (diff) | |
download | haskell-7ae1bec53801069661e249e47ebd6998d6450093.tar.gz |
Implement FastBytes, and use it for MachStr
This is a first step on the way to refactoring the FastString type.
FastBytes currently has no unique, mainly because there isn't currently
a nice way to produce them in Binary.
Also, we don't currently do the "Dictionary" thing with FastBytes in
Binary. I'm not sure whether this is important.
We can change both decisions later, but in the meantime this gets the
refactoring underway.
-rw-r--r-- | compiler/basicTypes/Literal.lhs | 9 | ||||
-rw-r--r-- | compiler/codeGen/CgUtils.hs | 3 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmUtils.hs | 2 | ||||
-rw-r--r-- | compiler/coreSyn/CoreUnfold.lhs | 2 | ||||
-rw-r--r-- | compiler/coreSyn/ExternalCore.lhs | 4 | ||||
-rw-r--r-- | compiler/coreSyn/MkCore.lhs | 4 | ||||
-rw-r--r-- | compiler/coreSyn/MkExternalCore.lhs | 2 | ||||
-rw-r--r-- | compiler/coreSyn/PprExternalCore.lhs | 4 | ||||
-rw-r--r-- | compiler/deSugar/DsBinds.lhs | 2 | ||||
-rw-r--r-- | compiler/deSugar/MatchLit.lhs | 14 | ||||
-rw-r--r-- | compiler/ghci/ByteCodeGen.lhs | 2 | ||||
-rw-r--r-- | compiler/parser/ParserCore.y | 2 | ||||
-rw-r--r-- | compiler/prelude/PrelRules.lhs | 2 | ||||
-rw-r--r-- | compiler/utils/Binary.hs | 23 | ||||
-rw-r--r-- | compiler/utils/FastString.lhs | 75 | ||||
-rw-r--r-- | compiler/utils/Outputable.lhs | 12 |
16 files changed, 129 insertions, 33 deletions
diff --git a/compiler/basicTypes/Literal.lhs b/compiler/basicTypes/Literal.lhs index bbc70551f9..8fbcbb7a88 100644 --- a/compiler/basicTypes/Literal.lhs +++ b/compiler/basicTypes/Literal.lhs @@ -84,7 +84,7 @@ data Literal -- First the primitive guys MachChar Char -- ^ @Char#@ - at least 31 bits. Create with 'mkMachChar' - | MachStr FastString -- ^ A string-literal: stored and emitted + | MachStr FastBytes -- ^ A string-literal: stored and emitted -- UTF-8 encoded, we'll arrange to decode it -- at runtime. Also emitted with a @'\0'@ -- terminator. Create with 'mkMachString' @@ -248,7 +248,8 @@ mkMachChar = MachChar -- | Creates a 'Literal' of type @Addr#@, which is appropriate for passing to -- e.g. some of the \"error\" functions in GHC.Err such as @GHC.Err.runtimeError@ mkMachString :: String -> Literal -mkMachString s = MachStr (mkFastString s) -- stored UTF-8 encoded +-- stored UTF-8 encoded +mkMachString s = MachStr (fastStringToFastBytes $ mkFastString s) mkLitInteger :: Integer -> Type -> Literal mkLitInteger = LitInteger @@ -436,7 +437,7 @@ pprLiteral :: (SDoc -> SDoc) -> Literal -> SDoc -- to wrap parens around literals that occur in -- a context requiring an atomic thing pprLiteral _ (MachChar ch) = pprHsChar ch -pprLiteral _ (MachStr s) = pprHsString s +pprLiteral _ (MachStr s) = pprHsBytes s pprLiteral _ (MachInt i) = pprIntVal i pprLiteral _ (MachDouble d) = double (fromRat d) pprLiteral _ (MachNullAddr) = ptext (sLit "__NULL") @@ -469,7 +470,7 @@ Hash values should be zero or a positive integer. No negatives please. \begin{code} hashLiteral :: Literal -> Int hashLiteral (MachChar c) = ord c + 1000 -- Keep it out of range of common ints -hashLiteral (MachStr s) = hashFS s +hashLiteral (MachStr s) = hashFB s hashLiteral (MachNullAddr) = 0 hashLiteral (MachInt i) = hashInteger i hashLiteral (MachInt64 i) = hashInteger i diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index e7d17c1f03..08b6fb8939 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -92,8 +92,7 @@ addIdReps ids = [(idCgRep id, id) | id <- ids] ------------------------------------------------------------------------- cgLit :: Literal -> FCode CmmLit -cgLit (MachStr s) = newByteStringCLit (bytesFS s) - -- not unpackFS; we want the UTF-8 byte stream. +cgLit (MachStr s) = newByteStringCLit (bytesFB s) cgLit other_lit = return (mkSimpleLit other_lit) mkSimpleLit :: Literal -> CmmLit diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index 733c2d4692..ab44888597 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -90,7 +90,7 @@ import Data.Maybe ------------------------------------------------------------------------- cgLit :: Literal -> FCode CmmLit -cgLit (MachStr s) = newByteStringCLit (bytesFS s) +cgLit (MachStr s) = newByteStringCLit (bytesFB s) -- not unpackFS; we want the UTF-8 byte stream. cgLit other_lit = return (mkSimpleLit other_lit) diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index 816d34e87b..6c61f4294c 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -508,7 +508,7 @@ sizeExpr bOMB_OUT_SIZE top_args expr litSize :: Literal -> Int -- Used by CoreUnfold.sizeExpr litSize (LitInteger {}) = 100 -- Note [Size of literal integers] -litSize (MachStr str) = 10 + 10 * ((lengthFS str + 3) `div` 4) +litSize (MachStr str) = 10 + 10 * ((lengthFB str + 3) `div` 4) -- If size could be 0 then @f "x"@ might be too small -- [Sept03: make literal strings a bit bigger to avoid fruitless -- duplication of little strings] diff --git a/compiler/coreSyn/ExternalCore.lhs b/compiler/coreSyn/ExternalCore.lhs index 3d416f78a4..d2f6691a7c 100644 --- a/compiler/coreSyn/ExternalCore.lhs +++ b/compiler/coreSyn/ExternalCore.lhs @@ -11,6 +11,8 @@ module ExternalCore where +import Data.Word + data Module = Module Mname [Tdef] [Vdefg] @@ -84,7 +86,7 @@ data Lit = Lint Integer Ty | Lrational Rational Ty | Lchar Char Ty - | Lstring String Ty + | Lstring [Word8] Ty type Mname = Id diff --git a/compiler/coreSyn/MkCore.lhs b/compiler/coreSyn/MkCore.lhs index 410d62db7d..3a696d1914 100644 --- a/compiler/coreSyn/MkCore.lhs +++ b/compiler/coreSyn/MkCore.lhs @@ -283,11 +283,11 @@ mkStringExprFS str | all safeChar chars = do unpack_id <- lookupId unpackCStringName - return (App (Var unpack_id) (Lit (MachStr str))) + return (App (Var unpack_id) (Lit (MachStr (fastStringToFastBytes str)))) | otherwise = do unpack_id <- lookupId unpackCStringUtf8Name - return (App (Var unpack_id) (Lit (MachStr str))) + return (App (Var unpack_id) (Lit (MachStr (fastStringToFastBytes str)))) where chars = unpackFS str diff --git a/compiler/coreSyn/MkExternalCore.lhs b/compiler/coreSyn/MkExternalCore.lhs index b6c682ffc0..d05da2a420 100644 --- a/compiler/coreSyn/MkExternalCore.lhs +++ b/compiler/coreSyn/MkExternalCore.lhs @@ -221,7 +221,7 @@ make_lit dflags l = -- For a character bigger than 0xff, we represent it in ext-core -- as an int lit with a char type. MachChar i -> C.Lint (fromIntegral $ ord i) t - MachStr s -> C.Lstring (unpackFS s) t + MachStr s -> C.Lstring (bytesFB s) t MachNullAddr -> C.Lint 0 t MachInt i -> C.Lint i t MachInt64 i -> C.Lint i t diff --git a/compiler/coreSyn/PprExternalCore.lhs b/compiler/coreSyn/PprExternalCore.lhs index 571b816e59..9c6846c494 100644 --- a/compiler/coreSyn/PprExternalCore.lhs +++ b/compiler/coreSyn/PprExternalCore.lhs @@ -199,7 +199,9 @@ plit (Lint i t) = parens (integer i <> text "::" <> pty t) plit (Lrational r t) = parens (text (show (numerator r)) <+> char '%' <+> text (show (denominator r)) <> text "::" <> pty t) plit (Lchar c t) = parens (text ("\'" ++ escape [c] ++ "\'") <> text "::" <> pty t) -plit (Lstring s t) = parens (pstring s <> text "::" <> pty t) +-- This is a little messy. We shouldn't really be going via String. +plit (Lstring bs t) = parens (pstring str <> text "::" <> pty t) + where str = map (chr . fromIntegral) bs pstring :: String -> Doc pstring s = doubleQuotes(text (escape s)) diff --git a/compiler/deSugar/DsBinds.lhs b/compiler/deSugar/DsBinds.lhs index 8949387aae..75680bcf3a 100644 --- a/compiler/deSugar/DsBinds.lhs +++ b/compiler/deSugar/DsBinds.lhs @@ -775,7 +775,7 @@ dsEvTerm (EvSuperClass d n) dsEvTerm (EvDelayedError ty msg) = return $ Var errorId `mkTyApps` [ty] `mkApps` [litMsg] where errorId = rUNTIME_ERROR_ID - litMsg = Lit (MachStr msg) + litMsg = Lit (MachStr (fastStringToFastBytes msg)) dsEvTerm (EvLit l) = case l of diff --git a/compiler/deSugar/MatchLit.lhs b/compiler/deSugar/MatchLit.lhs index 84ec342473..a3fe356660 100644 --- a/compiler/deSugar/MatchLit.lhs +++ b/compiler/deSugar/MatchLit.lhs @@ -39,6 +39,7 @@ import TysWiredIn import Literal import SrcLoc import Data.Ratio +import MonadUtils import Outputable import BasicTypes import Util @@ -68,7 +69,7 @@ See also below where we look for @DictApps@ for \tr{plusInt}, etc. \begin{code} dsLit :: HsLit -> DsM CoreExpr -dsLit (HsStringPrim s) = return (Lit (MachStr s)) +dsLit (HsStringPrim s) = return (Lit (MachStr (fastStringToFastBytes s))) dsLit (HsCharPrim c) = return (Lit (MachChar c)) dsLit (HsIntPrim i) = return (Lit (MachInt i)) dsLit (HsWordPrim w) = return (Lit (MachWord w)) @@ -123,10 +124,10 @@ hsLitKey (HsWordPrim w) = mkMachWord w hsLitKey (HsInt64Prim i) = mkMachInt64 i hsLitKey (HsWord64Prim w) = mkMachWord64 w hsLitKey (HsCharPrim c) = MachChar c -hsLitKey (HsStringPrim s) = MachStr s +hsLitKey (HsStringPrim s) = MachStr (fastStringToFastBytes s) hsLitKey (HsFloatPrim f) = MachFloat (fl_value f) hsLitKey (HsDoublePrim d) = MachDouble (fl_value d) -hsLitKey (HsString s) = MachStr s +hsLitKey (HsString s) = MachStr (fastStringToFastBytes s) hsLitKey l = pprPanic "hsLitKey" (ppr l) hsOverLitKey :: OutputableBndr a => HsOverLit a -> Bool -> Literal @@ -138,7 +139,7 @@ litValKey (HsIntegral i) False = MachInt i litValKey (HsIntegral i) True = MachInt (-i) litValKey (HsFractional r) False = MachFloat (fl_value r) litValKey (HsFractional r) True = MachFloat (negate (fl_value r)) -litValKey (HsIsString s) neg = ASSERT( not neg) MachStr s +litValKey (HsIsString s) neg = ASSERT( not neg) MachStr (fastStringToFastBytes s) \end{code} %************************************************************************ @@ -253,7 +254,10 @@ matchLiterals (var:vars) ty sub_groups wrap_str_guard :: Id -> (Literal,MatchResult) -> DsM MatchResult -- Equality check for string literals wrap_str_guard eq_str (MachStr s, mr) - = do { lit <- mkStringExprFS s + = do { -- We now have to convert back to FastString. Perhaps there + -- should be separate MachBytes and MachStr constructors? + s' <- liftIO $ mkFastStringFastBytes s + ; lit <- mkStringExprFS s' ; let pred = mkApps (Var eq_str) [Var var, lit] ; return (mkGuardedMatchResult pred mr) } wrap_str_guard _ (l, _) = pprPanic "matchLiterals/wrap_str_guard" (ppr l) diff --git a/compiler/ghci/ByteCodeGen.lhs b/compiler/ghci/ByteCodeGen.lhs index d722964bcd..a19d2ecf0b 100644 --- a/compiler/ghci/ByteCodeGen.lhs +++ b/compiler/ghci/ByteCodeGen.lhs @@ -1259,7 +1259,7 @@ pushAtom _ _ (AnnLit lit) pushStr s = let getMallocvilleAddr = case s of - FastString _ n _ fp _ -> + FastBytes n fp -> -- we could grab the Ptr from the ForeignPtr, -- but then we have no way to control its lifetime. -- In reality it'll probably stay alive long enoungh diff --git a/compiler/parser/ParserCore.y b/compiler/parser/ParserCore.y index edb8b50864..7f9a49a2b7 100644 --- a/compiler/parser/ParserCore.y +++ b/compiler/parser/ParserCore.y @@ -303,7 +303,7 @@ lit :: { Literal } : '(' INTEGER '::' aty ')' { convIntLit $2 $4 } | '(' RATIONAL '::' aty ')' { convRatLit $2 $4 } | '(' CHAR '::' aty ')' { MachChar $2 } - | '(' STRING '::' aty ')' { MachStr (mkFastString $2) } + | '(' STRING '::' aty ')' { MachStr (fastStringToFastBytes (mkFastString $2)) } fs_var_occ :: { FastString } : NAME { mkFastString $1 } diff --git a/compiler/prelude/PrelRules.lhs b/compiler/prelude/PrelRules.lhs index 7aeb920e0c..713726288f 100644 --- a/compiler/prelude/PrelRules.lhs +++ b/compiler/prelude/PrelRules.lhs @@ -737,7 +737,7 @@ match_append_lit _ [Type ty1, c1 `cheapEqExpr` c2 = ASSERT( ty1 `eqType` ty2 ) Just (Var unpk `App` Type ty1 - `App` Lit (MachStr (s1 `appendFS` s2)) + `App` Lit (MachStr (s1 `appendFB` s2)) `App` c1 `App` n) diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs index 77bd190fa9..bf24b09b71 100644 --- a/compiler/utils/Binary.hs +++ b/compiler/utils/Binary.hs @@ -725,7 +725,14 @@ type SymbolTable = Array Int Name --------------------------------------------------------- putFS :: BinHandle -> FastString -> IO () -putFS bh (FastString _ l _ buf _) = do +putFS bh fs = putFB bh $ fastStringToFastBytes fs + +getFS :: BinHandle -> IO FastString +getFS bh = do fb <- getFB bh + mkFastStringFastBytes fb + +putFB :: BinHandle -> FastBytes -> IO () +putFB bh (FastBytes l buf) = do put_ bh l withForeignPtr buf $ \ptr -> let @@ -738,19 +745,19 @@ putFS bh (FastString _ l _ buf _) = do go 0 {- -- possible faster version, not quite there yet: -getFS bh@BinMem{} = do +getFB bh@BinMem{} = do (I# l) <- get bh arr <- readIORef (arr_r bh) off <- readFastMutInt (off_r bh) - return $! (mkFastSubStringBA# arr off l) + return $! (mkFastSubBytesBA# arr off l) -} -getFS :: BinHandle -> IO FastString -getFS bh = do +getFB :: BinHandle -> IO FastBytes +getFB bh = do l <- get bh fp <- mallocForeignPtrBytes l withForeignPtr fp $ \ptr -> do let - go n | n == l = mkFastStringForeignPtr ptr fp l + go n | n == l = return $ foreignPtrToFastBytes fp l | otherwise = do b <- getByte bh pokeElemOff ptr n b @@ -758,6 +765,10 @@ getFS bh = do -- go 0 +instance Binary FastBytes where + put_ bh f = putFB bh f + get bh = getFB bh + instance Binary FastString where put_ bh f = case getUserData bh of diff --git a/compiler/utils/FastString.lhs b/compiler/utils/FastString.lhs index 2c94de75f9..05a379808e 100644 --- a/compiler/utils/FastString.lhs +++ b/compiler/utils/FastString.lhs @@ -26,6 +26,16 @@ -- Use 'LitString' unless you want the facilities of 'FastString'. module FastString ( + -- * FastBytes + FastBytes(..), + mkFastStringFastBytes, + foreignPtrToFastBytes, + fastStringToFastBytes, + bytesFB, + hashFB, + lengthFB, + appendFB, + -- * FastStrings FastString(..), -- not abstract, for now. @@ -117,6 +127,61 @@ import GHC.Base ( unpackCString# ) #define hASH_TBL_SIZE_UNBOXED 4091# +data FastBytes = FastBytes { + fb_n_bytes :: {-# UNPACK #-} !Int, -- number of bytes + fb_buf :: {-# UNPACK #-} !(ForeignPtr Word8) + } deriving Typeable + +instance Data FastBytes where + -- don't traverse? + toConstr _ = abstractConstr "FastBytes" + gunfold _ _ = error "gunfold" + dataTypeOf _ = mkNoRepType "FastBytes" + +instance Eq FastBytes where + x == y = (x `compare` y) == EQ + +instance Ord FastBytes where + compare = cmpFB + +foreignPtrToFastBytes :: ForeignPtr Word8 -> Int -> FastBytes +foreignPtrToFastBytes fp len = FastBytes len fp + +mkFastStringFastBytes :: FastBytes -> IO FastString +mkFastStringFastBytes (FastBytes len fp) + = withForeignPtr fp $ \ptr -> mkFastStringForeignPtr ptr fp len + +fastStringToFastBytes :: FastString -> FastBytes +fastStringToFastBytes f = FastBytes (n_bytes f) (buf f) + +-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString' +bytesFB :: FastBytes -> [Word8] +bytesFB (FastBytes n_bytes buf) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> + peekArray n_bytes ptr + +hashFB :: FastBytes -> Int +hashFB (FastBytes len buf) + = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $ hashStr ptr len + +lengthFB :: FastBytes -> Int +lengthFB f = fb_n_bytes f + +appendFB :: FastBytes -> FastBytes -> FastBytes +appendFB fb1 fb2 = + inlinePerformIO $ do + r <- mallocForeignPtrBytes len + withForeignPtr r $ \ r' -> do + withForeignPtr (fb_buf fb1) $ \ fb1Ptr -> do + withForeignPtr (fb_buf fb2) $ \ fb2Ptr -> do + copyBytes r' fb1Ptr len1 + copyBytes (advancePtr r' len1) fb2Ptr len2 + return $ foreignPtrToFastBytes r len + where len = len1 + len2 + len1 = fb_n_bytes fb1 + len2 = fb_n_bytes fb2 + + {-| A 'FastString' is an array of bytes, hashed to support fast O(1) comparison. It is also associated with a character encoding, so that @@ -165,8 +230,12 @@ instance Data FastString where dataTypeOf _ = mkNoRepType "FastString" cmpFS :: FastString -> FastString -> Ordering -cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) = +cmpFS f1@(FastString u1 _ _ _ _) f2@(FastString u2 _ _ _ _) = if u1 == u2 then EQ else + cmpFB (fastStringToFastBytes f1) (fastStringToFastBytes f2) + +cmpFB :: FastBytes -> FastBytes -> Ordering +cmpFB (FastBytes l1 buf1) (FastBytes l2 buf2) = case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of LT -> LT EQ -> compare l1 l2 @@ -431,9 +500,7 @@ unpackFS (FastString _ n_bytes _ buf enc) = -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString' bytesFS :: FastString -> [Word8] -bytesFS (FastString _ n_bytes _ buf _) = - inlinePerformIO $ withForeignPtr buf $ \ptr -> - peekArray n_bytes ptr +bytesFS fs = bytesFB $ fastStringToFastBytes fs -- | Returns a Z-encoded version of a 'FastString'. This might be the -- original, if it was already Z-encoded. The first time this diff --git a/compiler/utils/Outputable.lhs b/compiler/utils/Outputable.lhs index f74aaa84fe..8d97de8394 100644 --- a/compiler/utils/Outputable.lhs +++ b/compiler/utils/Outputable.lhs @@ -48,7 +48,7 @@ module Outputable ( renderWithStyle, pprInfixVar, pprPrefixVar, - pprHsChar, pprHsString, + pprHsChar, pprHsString, pprHsBytes, pprFastFilePath, -- * Controlling the style in which output is printed @@ -743,6 +743,16 @@ pprHsChar c | c > '\x10ffff' = char '\\' <> text (show (fromIntegral (ord c) :: pprHsString :: FastString -> SDoc pprHsString fs = vcat (map text (showMultiLineString (unpackFS fs))) +-- | Special combinator for showing string literals. +pprHsBytes :: FastBytes -> SDoc +pprHsBytes fb = let escaped = concatMap escape $ bytesFB fb + in vcat (map text (showMultiLineString escaped)) <> char '#' + where escape :: Word8 -> String + escape w = let c = chr (fromIntegral w) + in if isAscii c + then [c] + else '\\' : show w + --------------------- -- Put a name in parens if it's an operator pprPrefixVar :: Bool -> SDoc -> SDoc |