diff options
-rw-r--r-- | compiler/basicTypes/Literal.lhs | 9 | ||||
-rw-r--r-- | compiler/codeGen/CgUtils.hs | 3 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmUtils.hs | 2 | ||||
-rw-r--r-- | compiler/coreSyn/CoreUnfold.lhs | 2 | ||||
-rw-r--r-- | compiler/coreSyn/ExternalCore.lhs | 4 | ||||
-rw-r--r-- | compiler/coreSyn/MkCore.lhs | 4 | ||||
-rw-r--r-- | compiler/coreSyn/MkExternalCore.lhs | 2 | ||||
-rw-r--r-- | compiler/coreSyn/PprExternalCore.lhs | 4 | ||||
-rw-r--r-- | compiler/deSugar/DsBinds.lhs | 2 | ||||
-rw-r--r-- | compiler/deSugar/MatchLit.lhs | 14 | ||||
-rw-r--r-- | compiler/ghci/ByteCodeGen.lhs | 2 | ||||
-rw-r--r-- | compiler/parser/ParserCore.y | 2 | ||||
-rw-r--r-- | compiler/prelude/PrelRules.lhs | 2 | ||||
-rw-r--r-- | compiler/utils/Binary.hs | 23 | ||||
-rw-r--r-- | compiler/utils/FastString.lhs | 75 | ||||
-rw-r--r-- | compiler/utils/Outputable.lhs | 12 |
16 files changed, 129 insertions, 33 deletions
diff --git a/compiler/basicTypes/Literal.lhs b/compiler/basicTypes/Literal.lhs index bbc70551f9..8fbcbb7a88 100644 --- a/compiler/basicTypes/Literal.lhs +++ b/compiler/basicTypes/Literal.lhs @@ -84,7 +84,7 @@ data Literal -- First the primitive guys MachChar Char -- ^ @Char#@ - at least 31 bits. Create with 'mkMachChar' - | MachStr FastString -- ^ A string-literal: stored and emitted + | MachStr FastBytes -- ^ A string-literal: stored and emitted -- UTF-8 encoded, we'll arrange to decode it -- at runtime. Also emitted with a @'\0'@ -- terminator. Create with 'mkMachString' @@ -248,7 +248,8 @@ mkMachChar = MachChar -- | Creates a 'Literal' of type @Addr#@, which is appropriate for passing to -- e.g. some of the \"error\" functions in GHC.Err such as @GHC.Err.runtimeError@ mkMachString :: String -> Literal -mkMachString s = MachStr (mkFastString s) -- stored UTF-8 encoded +-- stored UTF-8 encoded +mkMachString s = MachStr (fastStringToFastBytes $ mkFastString s) mkLitInteger :: Integer -> Type -> Literal mkLitInteger = LitInteger @@ -436,7 +437,7 @@ pprLiteral :: (SDoc -> SDoc) -> Literal -> SDoc -- to wrap parens around literals that occur in -- a context requiring an atomic thing pprLiteral _ (MachChar ch) = pprHsChar ch -pprLiteral _ (MachStr s) = pprHsString s +pprLiteral _ (MachStr s) = pprHsBytes s pprLiteral _ (MachInt i) = pprIntVal i pprLiteral _ (MachDouble d) = double (fromRat d) pprLiteral _ (MachNullAddr) = ptext (sLit "__NULL") @@ -469,7 +470,7 @@ Hash values should be zero or a positive integer. No negatives please. \begin{code} hashLiteral :: Literal -> Int hashLiteral (MachChar c) = ord c + 1000 -- Keep it out of range of common ints -hashLiteral (MachStr s) = hashFS s +hashLiteral (MachStr s) = hashFB s hashLiteral (MachNullAddr) = 0 hashLiteral (MachInt i) = hashInteger i hashLiteral (MachInt64 i) = hashInteger i diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index e7d17c1f03..08b6fb8939 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -92,8 +92,7 @@ addIdReps ids = [(idCgRep id, id) | id <- ids] ------------------------------------------------------------------------- cgLit :: Literal -> FCode CmmLit -cgLit (MachStr s) = newByteStringCLit (bytesFS s) - -- not unpackFS; we want the UTF-8 byte stream. +cgLit (MachStr s) = newByteStringCLit (bytesFB s) cgLit other_lit = return (mkSimpleLit other_lit) mkSimpleLit :: Literal -> CmmLit diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index 733c2d4692..ab44888597 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -90,7 +90,7 @@ import Data.Maybe ------------------------------------------------------------------------- cgLit :: Literal -> FCode CmmLit -cgLit (MachStr s) = newByteStringCLit (bytesFS s) +cgLit (MachStr s) = newByteStringCLit (bytesFB s) -- not unpackFS; we want the UTF-8 byte stream. cgLit other_lit = return (mkSimpleLit other_lit) diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index 816d34e87b..6c61f4294c 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -508,7 +508,7 @@ sizeExpr bOMB_OUT_SIZE top_args expr litSize :: Literal -> Int -- Used by CoreUnfold.sizeExpr litSize (LitInteger {}) = 100 -- Note [Size of literal integers] -litSize (MachStr str) = 10 + 10 * ((lengthFS str + 3) `div` 4) +litSize (MachStr str) = 10 + 10 * ((lengthFB str + 3) `div` 4) -- If size could be 0 then @f "x"@ might be too small -- [Sept03: make literal strings a bit bigger to avoid fruitless -- duplication of little strings] diff --git a/compiler/coreSyn/ExternalCore.lhs b/compiler/coreSyn/ExternalCore.lhs index 3d416f78a4..d2f6691a7c 100644 --- a/compiler/coreSyn/ExternalCore.lhs +++ b/compiler/coreSyn/ExternalCore.lhs @@ -11,6 +11,8 @@ module ExternalCore where +import Data.Word + data Module = Module Mname [Tdef] [Vdefg] @@ -84,7 +86,7 @@ data Lit = Lint Integer Ty | Lrational Rational Ty | Lchar Char Ty - | Lstring String Ty + | Lstring [Word8] Ty type Mname = Id diff --git a/compiler/coreSyn/MkCore.lhs b/compiler/coreSyn/MkCore.lhs index 410d62db7d..3a696d1914 100644 --- a/compiler/coreSyn/MkCore.lhs +++ b/compiler/coreSyn/MkCore.lhs @@ -283,11 +283,11 @@ mkStringExprFS str | all safeChar chars = do unpack_id <- lookupId unpackCStringName - return (App (Var unpack_id) (Lit (MachStr str))) + return (App (Var unpack_id) (Lit (MachStr (fastStringToFastBytes str)))) | otherwise = do unpack_id <- lookupId unpackCStringUtf8Name - return (App (Var unpack_id) (Lit (MachStr str))) + return (App (Var unpack_id) (Lit (MachStr (fastStringToFastBytes str)))) where chars = unpackFS str diff --git a/compiler/coreSyn/MkExternalCore.lhs b/compiler/coreSyn/MkExternalCore.lhs index b6c682ffc0..d05da2a420 100644 --- a/compiler/coreSyn/MkExternalCore.lhs +++ b/compiler/coreSyn/MkExternalCore.lhs @@ -221,7 +221,7 @@ make_lit dflags l = -- For a character bigger than 0xff, we represent it in ext-core -- as an int lit with a char type. MachChar i -> C.Lint (fromIntegral $ ord i) t - MachStr s -> C.Lstring (unpackFS s) t + MachStr s -> C.Lstring (bytesFB s) t MachNullAddr -> C.Lint 0 t MachInt i -> C.Lint i t MachInt64 i -> C.Lint i t diff --git a/compiler/coreSyn/PprExternalCore.lhs b/compiler/coreSyn/PprExternalCore.lhs index 571b816e59..9c6846c494 100644 --- a/compiler/coreSyn/PprExternalCore.lhs +++ b/compiler/coreSyn/PprExternalCore.lhs @@ -199,7 +199,9 @@ plit (Lint i t) = parens (integer i <> text "::" <> pty t) plit (Lrational r t) = parens (text (show (numerator r)) <+> char '%' <+> text (show (denominator r)) <> text "::" <> pty t) plit (Lchar c t) = parens (text ("\'" ++ escape [c] ++ "\'") <> text "::" <> pty t) -plit (Lstring s t) = parens (pstring s <> text "::" <> pty t) +-- This is a little messy. We shouldn't really be going via String. +plit (Lstring bs t) = parens (pstring str <> text "::" <> pty t) + where str = map (chr . fromIntegral) bs pstring :: String -> Doc pstring s = doubleQuotes(text (escape s)) diff --git a/compiler/deSugar/DsBinds.lhs b/compiler/deSugar/DsBinds.lhs index 8949387aae..75680bcf3a 100644 --- a/compiler/deSugar/DsBinds.lhs +++ b/compiler/deSugar/DsBinds.lhs @@ -775,7 +775,7 @@ dsEvTerm (EvSuperClass d n) dsEvTerm (EvDelayedError ty msg) = return $ Var errorId `mkTyApps` [ty] `mkApps` [litMsg] where errorId = rUNTIME_ERROR_ID - litMsg = Lit (MachStr msg) + litMsg = Lit (MachStr (fastStringToFastBytes msg)) dsEvTerm (EvLit l) = case l of diff --git a/compiler/deSugar/MatchLit.lhs b/compiler/deSugar/MatchLit.lhs index 84ec342473..a3fe356660 100644 --- a/compiler/deSugar/MatchLit.lhs +++ b/compiler/deSugar/MatchLit.lhs @@ -39,6 +39,7 @@ import TysWiredIn import Literal import SrcLoc import Data.Ratio +import MonadUtils import Outputable import BasicTypes import Util @@ -68,7 +69,7 @@ See also below where we look for @DictApps@ for \tr{plusInt}, etc. \begin{code} dsLit :: HsLit -> DsM CoreExpr -dsLit (HsStringPrim s) = return (Lit (MachStr s)) +dsLit (HsStringPrim s) = return (Lit (MachStr (fastStringToFastBytes s))) dsLit (HsCharPrim c) = return (Lit (MachChar c)) dsLit (HsIntPrim i) = return (Lit (MachInt i)) dsLit (HsWordPrim w) = return (Lit (MachWord w)) @@ -123,10 +124,10 @@ hsLitKey (HsWordPrim w) = mkMachWord w hsLitKey (HsInt64Prim i) = mkMachInt64 i hsLitKey (HsWord64Prim w) = mkMachWord64 w hsLitKey (HsCharPrim c) = MachChar c -hsLitKey (HsStringPrim s) = MachStr s +hsLitKey (HsStringPrim s) = MachStr (fastStringToFastBytes s) hsLitKey (HsFloatPrim f) = MachFloat (fl_value f) hsLitKey (HsDoublePrim d) = MachDouble (fl_value d) -hsLitKey (HsString s) = MachStr s +hsLitKey (HsString s) = MachStr (fastStringToFastBytes s) hsLitKey l = pprPanic "hsLitKey" (ppr l) hsOverLitKey :: OutputableBndr a => HsOverLit a -> Bool -> Literal @@ -138,7 +139,7 @@ litValKey (HsIntegral i) False = MachInt i litValKey (HsIntegral i) True = MachInt (-i) litValKey (HsFractional r) False = MachFloat (fl_value r) litValKey (HsFractional r) True = MachFloat (negate (fl_value r)) -litValKey (HsIsString s) neg = ASSERT( not neg) MachStr s +litValKey (HsIsString s) neg = ASSERT( not neg) MachStr (fastStringToFastBytes s) \end{code} %************************************************************************ @@ -253,7 +254,10 @@ matchLiterals (var:vars) ty sub_groups wrap_str_guard :: Id -> (Literal,MatchResult) -> DsM MatchResult -- Equality check for string literals wrap_str_guard eq_str (MachStr s, mr) - = do { lit <- mkStringExprFS s + = do { -- We now have to convert back to FastString. Perhaps there + -- should be separate MachBytes and MachStr constructors? + s' <- liftIO $ mkFastStringFastBytes s + ; lit <- mkStringExprFS s' ; let pred = mkApps (Var eq_str) [Var var, lit] ; return (mkGuardedMatchResult pred mr) } wrap_str_guard _ (l, _) = pprPanic "matchLiterals/wrap_str_guard" (ppr l) diff --git a/compiler/ghci/ByteCodeGen.lhs b/compiler/ghci/ByteCodeGen.lhs index d722964bcd..a19d2ecf0b 100644 --- a/compiler/ghci/ByteCodeGen.lhs +++ b/compiler/ghci/ByteCodeGen.lhs @@ -1259,7 +1259,7 @@ pushAtom _ _ (AnnLit lit) pushStr s = let getMallocvilleAddr = case s of - FastString _ n _ fp _ -> + FastBytes n fp -> -- we could grab the Ptr from the ForeignPtr, -- but then we have no way to control its lifetime. -- In reality it'll probably stay alive long enoungh diff --git a/compiler/parser/ParserCore.y b/compiler/parser/ParserCore.y index edb8b50864..7f9a49a2b7 100644 --- a/compiler/parser/ParserCore.y +++ b/compiler/parser/ParserCore.y @@ -303,7 +303,7 @@ lit :: { Literal } : '(' INTEGER '::' aty ')' { convIntLit $2 $4 } | '(' RATIONAL '::' aty ')' { convRatLit $2 $4 } | '(' CHAR '::' aty ')' { MachChar $2 } - | '(' STRING '::' aty ')' { MachStr (mkFastString $2) } + | '(' STRING '::' aty ')' { MachStr (fastStringToFastBytes (mkFastString $2)) } fs_var_occ :: { FastString } : NAME { mkFastString $1 } diff --git a/compiler/prelude/PrelRules.lhs b/compiler/prelude/PrelRules.lhs index 7aeb920e0c..713726288f 100644 --- a/compiler/prelude/PrelRules.lhs +++ b/compiler/prelude/PrelRules.lhs @@ -737,7 +737,7 @@ match_append_lit _ [Type ty1, c1 `cheapEqExpr` c2 = ASSERT( ty1 `eqType` ty2 ) Just (Var unpk `App` Type ty1 - `App` Lit (MachStr (s1 `appendFS` s2)) + `App` Lit (MachStr (s1 `appendFB` s2)) `App` c1 `App` n) diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs index 77bd190fa9..bf24b09b71 100644 --- a/compiler/utils/Binary.hs +++ b/compiler/utils/Binary.hs @@ -725,7 +725,14 @@ type SymbolTable = Array Int Name --------------------------------------------------------- putFS :: BinHandle -> FastString -> IO () -putFS bh (FastString _ l _ buf _) = do +putFS bh fs = putFB bh $ fastStringToFastBytes fs + +getFS :: BinHandle -> IO FastString +getFS bh = do fb <- getFB bh + mkFastStringFastBytes fb + +putFB :: BinHandle -> FastBytes -> IO () +putFB bh (FastBytes l buf) = do put_ bh l withForeignPtr buf $ \ptr -> let @@ -738,19 +745,19 @@ putFS bh (FastString _ l _ buf _) = do go 0 {- -- possible faster version, not quite there yet: -getFS bh@BinMem{} = do +getFB bh@BinMem{} = do (I# l) <- get bh arr <- readIORef (arr_r bh) off <- readFastMutInt (off_r bh) - return $! (mkFastSubStringBA# arr off l) + return $! (mkFastSubBytesBA# arr off l) -} -getFS :: BinHandle -> IO FastString -getFS bh = do +getFB :: BinHandle -> IO FastBytes +getFB bh = do l <- get bh fp <- mallocForeignPtrBytes l withForeignPtr fp $ \ptr -> do let - go n | n == l = mkFastStringForeignPtr ptr fp l + go n | n == l = return $ foreignPtrToFastBytes fp l | otherwise = do b <- getByte bh pokeElemOff ptr n b @@ -758,6 +765,10 @@ getFS bh = do -- go 0 +instance Binary FastBytes where + put_ bh f = putFB bh f + get bh = getFB bh + instance Binary FastString where put_ bh f = case getUserData bh of diff --git a/compiler/utils/FastString.lhs b/compiler/utils/FastString.lhs index 2c94de75f9..05a379808e 100644 --- a/compiler/utils/FastString.lhs +++ b/compiler/utils/FastString.lhs @@ -26,6 +26,16 @@ -- Use 'LitString' unless you want the facilities of 'FastString'. module FastString ( + -- * FastBytes + FastBytes(..), + mkFastStringFastBytes, + foreignPtrToFastBytes, + fastStringToFastBytes, + bytesFB, + hashFB, + lengthFB, + appendFB, + -- * FastStrings FastString(..), -- not abstract, for now. @@ -117,6 +127,61 @@ import GHC.Base ( unpackCString# ) #define hASH_TBL_SIZE_UNBOXED 4091# +data FastBytes = FastBytes { + fb_n_bytes :: {-# UNPACK #-} !Int, -- number of bytes + fb_buf :: {-# UNPACK #-} !(ForeignPtr Word8) + } deriving Typeable + +instance Data FastBytes where + -- don't traverse? + toConstr _ = abstractConstr "FastBytes" + gunfold _ _ = error "gunfold" + dataTypeOf _ = mkNoRepType "FastBytes" + +instance Eq FastBytes where + x == y = (x `compare` y) == EQ + +instance Ord FastBytes where + compare = cmpFB + +foreignPtrToFastBytes :: ForeignPtr Word8 -> Int -> FastBytes +foreignPtrToFastBytes fp len = FastBytes len fp + +mkFastStringFastBytes :: FastBytes -> IO FastString +mkFastStringFastBytes (FastBytes len fp) + = withForeignPtr fp $ \ptr -> mkFastStringForeignPtr ptr fp len + +fastStringToFastBytes :: FastString -> FastBytes +fastStringToFastBytes f = FastBytes (n_bytes f) (buf f) + +-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString' +bytesFB :: FastBytes -> [Word8] +bytesFB (FastBytes n_bytes buf) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> + peekArray n_bytes ptr + +hashFB :: FastBytes -> Int +hashFB (FastBytes len buf) + = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $ hashStr ptr len + +lengthFB :: FastBytes -> Int +lengthFB f = fb_n_bytes f + +appendFB :: FastBytes -> FastBytes -> FastBytes +appendFB fb1 fb2 = + inlinePerformIO $ do + r <- mallocForeignPtrBytes len + withForeignPtr r $ \ r' -> do + withForeignPtr (fb_buf fb1) $ \ fb1Ptr -> do + withForeignPtr (fb_buf fb2) $ \ fb2Ptr -> do + copyBytes r' fb1Ptr len1 + copyBytes (advancePtr r' len1) fb2Ptr len2 + return $ foreignPtrToFastBytes r len + where len = len1 + len2 + len1 = fb_n_bytes fb1 + len2 = fb_n_bytes fb2 + + {-| A 'FastString' is an array of bytes, hashed to support fast O(1) comparison. It is also associated with a character encoding, so that @@ -165,8 +230,12 @@ instance Data FastString where dataTypeOf _ = mkNoRepType "FastString" cmpFS :: FastString -> FastString -> Ordering -cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) = +cmpFS f1@(FastString u1 _ _ _ _) f2@(FastString u2 _ _ _ _) = if u1 == u2 then EQ else + cmpFB (fastStringToFastBytes f1) (fastStringToFastBytes f2) + +cmpFB :: FastBytes -> FastBytes -> Ordering +cmpFB (FastBytes l1 buf1) (FastBytes l2 buf2) = case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of LT -> LT EQ -> compare l1 l2 @@ -431,9 +500,7 @@ unpackFS (FastString _ n_bytes _ buf enc) = -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString' bytesFS :: FastString -> [Word8] -bytesFS (FastString _ n_bytes _ buf _) = - inlinePerformIO $ withForeignPtr buf $ \ptr -> - peekArray n_bytes ptr +bytesFS fs = bytesFB $ fastStringToFastBytes fs -- | Returns a Z-encoded version of a 'FastString'. This might be the -- original, if it was already Z-encoded. The first time this diff --git a/compiler/utils/Outputable.lhs b/compiler/utils/Outputable.lhs index f74aaa84fe..8d97de8394 100644 --- a/compiler/utils/Outputable.lhs +++ b/compiler/utils/Outputable.lhs @@ -48,7 +48,7 @@ module Outputable ( renderWithStyle, pprInfixVar, pprPrefixVar, - pprHsChar, pprHsString, + pprHsChar, pprHsString, pprHsBytes, pprFastFilePath, -- * Controlling the style in which output is printed @@ -743,6 +743,16 @@ pprHsChar c | c > '\x10ffff' = char '\\' <> text (show (fromIntegral (ord c) :: pprHsString :: FastString -> SDoc pprHsString fs = vcat (map text (showMultiLineString (unpackFS fs))) +-- | Special combinator for showing string literals. +pprHsBytes :: FastBytes -> SDoc +pprHsBytes fb = let escaped = concatMap escape $ bytesFB fb + in vcat (map text (showMultiLineString escaped)) <> char '#' + where escape :: Word8 -> String + escape w = let c = chr (fromIntegral w) + in if isAscii c + then [c] + else '\\' : show w + --------------------- -- Put a name in parens if it's an operator pprPrefixVar :: Bool -> SDoc -> SDoc |