summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Lynagh <igloo@earth.li>2012-07-14 20:48:42 +0100
committerIan Lynagh <igloo@earth.li>2012-07-14 20:57:37 +0100
commit7ae1bec53801069661e249e47ebd6998d6450093 (patch)
tree3bf00efb097a569173b2c2e9fe1d10347508c6ba
parent18f82197efbc2b930b123032fd7828626c04ee43 (diff)
downloadhaskell-7ae1bec53801069661e249e47ebd6998d6450093.tar.gz
Implement FastBytes, and use it for MachStr
This is a first step on the way to refactoring the FastString type. FastBytes currently has no unique, mainly because there isn't currently a nice way to produce them in Binary. Also, we don't currently do the "Dictionary" thing with FastBytes in Binary. I'm not sure whether this is important. We can change both decisions later, but in the meantime this gets the refactoring underway.
-rw-r--r--compiler/basicTypes/Literal.lhs9
-rw-r--r--compiler/codeGen/CgUtils.hs3
-rw-r--r--compiler/codeGen/StgCmmUtils.hs2
-rw-r--r--compiler/coreSyn/CoreUnfold.lhs2
-rw-r--r--compiler/coreSyn/ExternalCore.lhs4
-rw-r--r--compiler/coreSyn/MkCore.lhs4
-rw-r--r--compiler/coreSyn/MkExternalCore.lhs2
-rw-r--r--compiler/coreSyn/PprExternalCore.lhs4
-rw-r--r--compiler/deSugar/DsBinds.lhs2
-rw-r--r--compiler/deSugar/MatchLit.lhs14
-rw-r--r--compiler/ghci/ByteCodeGen.lhs2
-rw-r--r--compiler/parser/ParserCore.y2
-rw-r--r--compiler/prelude/PrelRules.lhs2
-rw-r--r--compiler/utils/Binary.hs23
-rw-r--r--compiler/utils/FastString.lhs75
-rw-r--r--compiler/utils/Outputable.lhs12
16 files changed, 129 insertions, 33 deletions
diff --git a/compiler/basicTypes/Literal.lhs b/compiler/basicTypes/Literal.lhs
index bbc70551f9..8fbcbb7a88 100644
--- a/compiler/basicTypes/Literal.lhs
+++ b/compiler/basicTypes/Literal.lhs
@@ -84,7 +84,7 @@ data Literal
-- First the primitive guys
MachChar Char -- ^ @Char#@ - at least 31 bits. Create with 'mkMachChar'
- | MachStr FastString -- ^ A string-literal: stored and emitted
+ | MachStr FastBytes -- ^ A string-literal: stored and emitted
-- UTF-8 encoded, we'll arrange to decode it
-- at runtime. Also emitted with a @'\0'@
-- terminator. Create with 'mkMachString'
@@ -248,7 +248,8 @@ mkMachChar = MachChar
-- | Creates a 'Literal' of type @Addr#@, which is appropriate for passing to
-- e.g. some of the \"error\" functions in GHC.Err such as @GHC.Err.runtimeError@
mkMachString :: String -> Literal
-mkMachString s = MachStr (mkFastString s) -- stored UTF-8 encoded
+-- stored UTF-8 encoded
+mkMachString s = MachStr (fastStringToFastBytes $ mkFastString s)
mkLitInteger :: Integer -> Type -> Literal
mkLitInteger = LitInteger
@@ -436,7 +437,7 @@ pprLiteral :: (SDoc -> SDoc) -> Literal -> SDoc
-- to wrap parens around literals that occur in
-- a context requiring an atomic thing
pprLiteral _ (MachChar ch) = pprHsChar ch
-pprLiteral _ (MachStr s) = pprHsString s
+pprLiteral _ (MachStr s) = pprHsBytes s
pprLiteral _ (MachInt i) = pprIntVal i
pprLiteral _ (MachDouble d) = double (fromRat d)
pprLiteral _ (MachNullAddr) = ptext (sLit "__NULL")
@@ -469,7 +470,7 @@ Hash values should be zero or a positive integer. No negatives please.
\begin{code}
hashLiteral :: Literal -> Int
hashLiteral (MachChar c) = ord c + 1000 -- Keep it out of range of common ints
-hashLiteral (MachStr s) = hashFS s
+hashLiteral (MachStr s) = hashFB s
hashLiteral (MachNullAddr) = 0
hashLiteral (MachInt i) = hashInteger i
hashLiteral (MachInt64 i) = hashInteger i
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index e7d17c1f03..08b6fb8939 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -92,8 +92,7 @@ addIdReps ids = [(idCgRep id, id) | id <- ids]
-------------------------------------------------------------------------
cgLit :: Literal -> FCode CmmLit
-cgLit (MachStr s) = newByteStringCLit (bytesFS s)
- -- not unpackFS; we want the UTF-8 byte stream.
+cgLit (MachStr s) = newByteStringCLit (bytesFB s)
cgLit other_lit = return (mkSimpleLit other_lit)
mkSimpleLit :: Literal -> CmmLit
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index 733c2d4692..ab44888597 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -90,7 +90,7 @@ import Data.Maybe
-------------------------------------------------------------------------
cgLit :: Literal -> FCode CmmLit
-cgLit (MachStr s) = newByteStringCLit (bytesFS s)
+cgLit (MachStr s) = newByteStringCLit (bytesFB s)
-- not unpackFS; we want the UTF-8 byte stream.
cgLit other_lit = return (mkSimpleLit other_lit)
diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs
index 816d34e87b..6c61f4294c 100644
--- a/compiler/coreSyn/CoreUnfold.lhs
+++ b/compiler/coreSyn/CoreUnfold.lhs
@@ -508,7 +508,7 @@ sizeExpr bOMB_OUT_SIZE top_args expr
litSize :: Literal -> Int
-- Used by CoreUnfold.sizeExpr
litSize (LitInteger {}) = 100 -- Note [Size of literal integers]
-litSize (MachStr str) = 10 + 10 * ((lengthFS str + 3) `div` 4)
+litSize (MachStr str) = 10 + 10 * ((lengthFB str + 3) `div` 4)
-- If size could be 0 then @f "x"@ might be too small
-- [Sept03: make literal strings a bit bigger to avoid fruitless
-- duplication of little strings]
diff --git a/compiler/coreSyn/ExternalCore.lhs b/compiler/coreSyn/ExternalCore.lhs
index 3d416f78a4..d2f6691a7c 100644
--- a/compiler/coreSyn/ExternalCore.lhs
+++ b/compiler/coreSyn/ExternalCore.lhs
@@ -11,6 +11,8 @@
module ExternalCore where
+import Data.Word
+
data Module
= Module Mname [Tdef] [Vdefg]
@@ -84,7 +86,7 @@ data Lit
= Lint Integer Ty
| Lrational Rational Ty
| Lchar Char Ty
- | Lstring String Ty
+ | Lstring [Word8] Ty
type Mname = Id
diff --git a/compiler/coreSyn/MkCore.lhs b/compiler/coreSyn/MkCore.lhs
index 410d62db7d..3a696d1914 100644
--- a/compiler/coreSyn/MkCore.lhs
+++ b/compiler/coreSyn/MkCore.lhs
@@ -283,11 +283,11 @@ mkStringExprFS str
| all safeChar chars
= do unpack_id <- lookupId unpackCStringName
- return (App (Var unpack_id) (Lit (MachStr str)))
+ return (App (Var unpack_id) (Lit (MachStr (fastStringToFastBytes str))))
| otherwise
= do unpack_id <- lookupId unpackCStringUtf8Name
- return (App (Var unpack_id) (Lit (MachStr str)))
+ return (App (Var unpack_id) (Lit (MachStr (fastStringToFastBytes str))))
where
chars = unpackFS str
diff --git a/compiler/coreSyn/MkExternalCore.lhs b/compiler/coreSyn/MkExternalCore.lhs
index b6c682ffc0..d05da2a420 100644
--- a/compiler/coreSyn/MkExternalCore.lhs
+++ b/compiler/coreSyn/MkExternalCore.lhs
@@ -221,7 +221,7 @@ make_lit dflags l =
-- For a character bigger than 0xff, we represent it in ext-core
-- as an int lit with a char type.
MachChar i -> C.Lint (fromIntegral $ ord i) t
- MachStr s -> C.Lstring (unpackFS s) t
+ MachStr s -> C.Lstring (bytesFB s) t
MachNullAddr -> C.Lint 0 t
MachInt i -> C.Lint i t
MachInt64 i -> C.Lint i t
diff --git a/compiler/coreSyn/PprExternalCore.lhs b/compiler/coreSyn/PprExternalCore.lhs
index 571b816e59..9c6846c494 100644
--- a/compiler/coreSyn/PprExternalCore.lhs
+++ b/compiler/coreSyn/PprExternalCore.lhs
@@ -199,7 +199,9 @@ plit (Lint i t) = parens (integer i <> text "::" <> pty t)
plit (Lrational r t) = parens (text (show (numerator r)) <+> char '%'
<+> text (show (denominator r)) <> text "::" <> pty t)
plit (Lchar c t) = parens (text ("\'" ++ escape [c] ++ "\'") <> text "::" <> pty t)
-plit (Lstring s t) = parens (pstring s <> text "::" <> pty t)
+-- This is a little messy. We shouldn't really be going via String.
+plit (Lstring bs t) = parens (pstring str <> text "::" <> pty t)
+ where str = map (chr . fromIntegral) bs
pstring :: String -> Doc
pstring s = doubleQuotes(text (escape s))
diff --git a/compiler/deSugar/DsBinds.lhs b/compiler/deSugar/DsBinds.lhs
index 8949387aae..75680bcf3a 100644
--- a/compiler/deSugar/DsBinds.lhs
+++ b/compiler/deSugar/DsBinds.lhs
@@ -775,7 +775,7 @@ dsEvTerm (EvSuperClass d n)
dsEvTerm (EvDelayedError ty msg) = return $ Var errorId `mkTyApps` [ty] `mkApps` [litMsg]
where
errorId = rUNTIME_ERROR_ID
- litMsg = Lit (MachStr msg)
+ litMsg = Lit (MachStr (fastStringToFastBytes msg))
dsEvTerm (EvLit l) =
case l of
diff --git a/compiler/deSugar/MatchLit.lhs b/compiler/deSugar/MatchLit.lhs
index 84ec342473..a3fe356660 100644
--- a/compiler/deSugar/MatchLit.lhs
+++ b/compiler/deSugar/MatchLit.lhs
@@ -39,6 +39,7 @@ import TysWiredIn
import Literal
import SrcLoc
import Data.Ratio
+import MonadUtils
import Outputable
import BasicTypes
import Util
@@ -68,7 +69,7 @@ See also below where we look for @DictApps@ for \tr{plusInt}, etc.
\begin{code}
dsLit :: HsLit -> DsM CoreExpr
-dsLit (HsStringPrim s) = return (Lit (MachStr s))
+dsLit (HsStringPrim s) = return (Lit (MachStr (fastStringToFastBytes s)))
dsLit (HsCharPrim c) = return (Lit (MachChar c))
dsLit (HsIntPrim i) = return (Lit (MachInt i))
dsLit (HsWordPrim w) = return (Lit (MachWord w))
@@ -123,10 +124,10 @@ hsLitKey (HsWordPrim w) = mkMachWord w
hsLitKey (HsInt64Prim i) = mkMachInt64 i
hsLitKey (HsWord64Prim w) = mkMachWord64 w
hsLitKey (HsCharPrim c) = MachChar c
-hsLitKey (HsStringPrim s) = MachStr s
+hsLitKey (HsStringPrim s) = MachStr (fastStringToFastBytes s)
hsLitKey (HsFloatPrim f) = MachFloat (fl_value f)
hsLitKey (HsDoublePrim d) = MachDouble (fl_value d)
-hsLitKey (HsString s) = MachStr s
+hsLitKey (HsString s) = MachStr (fastStringToFastBytes s)
hsLitKey l = pprPanic "hsLitKey" (ppr l)
hsOverLitKey :: OutputableBndr a => HsOverLit a -> Bool -> Literal
@@ -138,7 +139,7 @@ litValKey (HsIntegral i) False = MachInt i
litValKey (HsIntegral i) True = MachInt (-i)
litValKey (HsFractional r) False = MachFloat (fl_value r)
litValKey (HsFractional r) True = MachFloat (negate (fl_value r))
-litValKey (HsIsString s) neg = ASSERT( not neg) MachStr s
+litValKey (HsIsString s) neg = ASSERT( not neg) MachStr (fastStringToFastBytes s)
\end{code}
%************************************************************************
@@ -253,7 +254,10 @@ matchLiterals (var:vars) ty sub_groups
wrap_str_guard :: Id -> (Literal,MatchResult) -> DsM MatchResult
-- Equality check for string literals
wrap_str_guard eq_str (MachStr s, mr)
- = do { lit <- mkStringExprFS s
+ = do { -- We now have to convert back to FastString. Perhaps there
+ -- should be separate MachBytes and MachStr constructors?
+ s' <- liftIO $ mkFastStringFastBytes s
+ ; lit <- mkStringExprFS s'
; let pred = mkApps (Var eq_str) [Var var, lit]
; return (mkGuardedMatchResult pred mr) }
wrap_str_guard _ (l, _) = pprPanic "matchLiterals/wrap_str_guard" (ppr l)
diff --git a/compiler/ghci/ByteCodeGen.lhs b/compiler/ghci/ByteCodeGen.lhs
index d722964bcd..a19d2ecf0b 100644
--- a/compiler/ghci/ByteCodeGen.lhs
+++ b/compiler/ghci/ByteCodeGen.lhs
@@ -1259,7 +1259,7 @@ pushAtom _ _ (AnnLit lit)
pushStr s
= let getMallocvilleAddr
= case s of
- FastString _ n _ fp _ ->
+ FastBytes n fp ->
-- we could grab the Ptr from the ForeignPtr,
-- but then we have no way to control its lifetime.
-- In reality it'll probably stay alive long enoungh
diff --git a/compiler/parser/ParserCore.y b/compiler/parser/ParserCore.y
index edb8b50864..7f9a49a2b7 100644
--- a/compiler/parser/ParserCore.y
+++ b/compiler/parser/ParserCore.y
@@ -303,7 +303,7 @@ lit :: { Literal }
: '(' INTEGER '::' aty ')' { convIntLit $2 $4 }
| '(' RATIONAL '::' aty ')' { convRatLit $2 $4 }
| '(' CHAR '::' aty ')' { MachChar $2 }
- | '(' STRING '::' aty ')' { MachStr (mkFastString $2) }
+ | '(' STRING '::' aty ')' { MachStr (fastStringToFastBytes (mkFastString $2)) }
fs_var_occ :: { FastString }
: NAME { mkFastString $1 }
diff --git a/compiler/prelude/PrelRules.lhs b/compiler/prelude/PrelRules.lhs
index 7aeb920e0c..713726288f 100644
--- a/compiler/prelude/PrelRules.lhs
+++ b/compiler/prelude/PrelRules.lhs
@@ -737,7 +737,7 @@ match_append_lit _ [Type ty1,
c1 `cheapEqExpr` c2
= ASSERT( ty1 `eqType` ty2 )
Just (Var unpk `App` Type ty1
- `App` Lit (MachStr (s1 `appendFS` s2))
+ `App` Lit (MachStr (s1 `appendFB` s2))
`App` c1
`App` n)
diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs
index 77bd190fa9..bf24b09b71 100644
--- a/compiler/utils/Binary.hs
+++ b/compiler/utils/Binary.hs
@@ -725,7 +725,14 @@ type SymbolTable = Array Int Name
---------------------------------------------------------
putFS :: BinHandle -> FastString -> IO ()
-putFS bh (FastString _ l _ buf _) = do
+putFS bh fs = putFB bh $ fastStringToFastBytes fs
+
+getFS :: BinHandle -> IO FastString
+getFS bh = do fb <- getFB bh
+ mkFastStringFastBytes fb
+
+putFB :: BinHandle -> FastBytes -> IO ()
+putFB bh (FastBytes l buf) = do
put_ bh l
withForeignPtr buf $ \ptr ->
let
@@ -738,19 +745,19 @@ putFS bh (FastString _ l _ buf _) = do
go 0
{- -- possible faster version, not quite there yet:
-getFS bh@BinMem{} = do
+getFB bh@BinMem{} = do
(I# l) <- get bh
arr <- readIORef (arr_r bh)
off <- readFastMutInt (off_r bh)
- return $! (mkFastSubStringBA# arr off l)
+ return $! (mkFastSubBytesBA# arr off l)
-}
-getFS :: BinHandle -> IO FastString
-getFS bh = do
+getFB :: BinHandle -> IO FastBytes
+getFB bh = do
l <- get bh
fp <- mallocForeignPtrBytes l
withForeignPtr fp $ \ptr -> do
let
- go n | n == l = mkFastStringForeignPtr ptr fp l
+ go n | n == l = return $ foreignPtrToFastBytes fp l
| otherwise = do
b <- getByte bh
pokeElemOff ptr n b
@@ -758,6 +765,10 @@ getFS bh = do
--
go 0
+instance Binary FastBytes where
+ put_ bh f = putFB bh f
+ get bh = getFB bh
+
instance Binary FastString where
put_ bh f =
case getUserData bh of
diff --git a/compiler/utils/FastString.lhs b/compiler/utils/FastString.lhs
index 2c94de75f9..05a379808e 100644
--- a/compiler/utils/FastString.lhs
+++ b/compiler/utils/FastString.lhs
@@ -26,6 +26,16 @@
-- Use 'LitString' unless you want the facilities of 'FastString'.
module FastString
(
+ -- * FastBytes
+ FastBytes(..),
+ mkFastStringFastBytes,
+ foreignPtrToFastBytes,
+ fastStringToFastBytes,
+ bytesFB,
+ hashFB,
+ lengthFB,
+ appendFB,
+
-- * FastStrings
FastString(..), -- not abstract, for now.
@@ -117,6 +127,61 @@ import GHC.Base ( unpackCString# )
#define hASH_TBL_SIZE_UNBOXED 4091#
+data FastBytes = FastBytes {
+ fb_n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
+ fb_buf :: {-# UNPACK #-} !(ForeignPtr Word8)
+ } deriving Typeable
+
+instance Data FastBytes where
+ -- don't traverse?
+ toConstr _ = abstractConstr "FastBytes"
+ gunfold _ _ = error "gunfold"
+ dataTypeOf _ = mkNoRepType "FastBytes"
+
+instance Eq FastBytes where
+ x == y = (x `compare` y) == EQ
+
+instance Ord FastBytes where
+ compare = cmpFB
+
+foreignPtrToFastBytes :: ForeignPtr Word8 -> Int -> FastBytes
+foreignPtrToFastBytes fp len = FastBytes len fp
+
+mkFastStringFastBytes :: FastBytes -> IO FastString
+mkFastStringFastBytes (FastBytes len fp)
+ = withForeignPtr fp $ \ptr -> mkFastStringForeignPtr ptr fp len
+
+fastStringToFastBytes :: FastString -> FastBytes
+fastStringToFastBytes f = FastBytes (n_bytes f) (buf f)
+
+-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
+bytesFB :: FastBytes -> [Word8]
+bytesFB (FastBytes n_bytes buf) =
+ inlinePerformIO $ withForeignPtr buf $ \ptr ->
+ peekArray n_bytes ptr
+
+hashFB :: FastBytes -> Int
+hashFB (FastBytes len buf)
+ = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $ hashStr ptr len
+
+lengthFB :: FastBytes -> Int
+lengthFB f = fb_n_bytes f
+
+appendFB :: FastBytes -> FastBytes -> FastBytes
+appendFB fb1 fb2 =
+ inlinePerformIO $ do
+ r <- mallocForeignPtrBytes len
+ withForeignPtr r $ \ r' -> do
+ withForeignPtr (fb_buf fb1) $ \ fb1Ptr -> do
+ withForeignPtr (fb_buf fb2) $ \ fb2Ptr -> do
+ copyBytes r' fb1Ptr len1
+ copyBytes (advancePtr r' len1) fb2Ptr len2
+ return $ foreignPtrToFastBytes r len
+ where len = len1 + len2
+ len1 = fb_n_bytes fb1
+ len2 = fb_n_bytes fb2
+
+
{-|
A 'FastString' is an array of bytes, hashed to support fast O(1)
comparison. It is also associated with a character encoding, so that
@@ -165,8 +230,12 @@ instance Data FastString where
dataTypeOf _ = mkNoRepType "FastString"
cmpFS :: FastString -> FastString -> Ordering
-cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
+cmpFS f1@(FastString u1 _ _ _ _) f2@(FastString u2 _ _ _ _) =
if u1 == u2 then EQ else
+ cmpFB (fastStringToFastBytes f1) (fastStringToFastBytes f2)
+
+cmpFB :: FastBytes -> FastBytes -> Ordering
+cmpFB (FastBytes l1 buf1) (FastBytes l2 buf2) =
case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of
LT -> LT
EQ -> compare l1 l2
@@ -431,9 +500,7 @@ unpackFS (FastString _ n_bytes _ buf enc) =
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS :: FastString -> [Word8]
-bytesFS (FastString _ n_bytes _ buf _) =
- inlinePerformIO $ withForeignPtr buf $ \ptr ->
- peekArray n_bytes ptr
+bytesFS fs = bytesFB $ fastStringToFastBytes fs
-- | Returns a Z-encoded version of a 'FastString'. This might be the
-- original, if it was already Z-encoded. The first time this
diff --git a/compiler/utils/Outputable.lhs b/compiler/utils/Outputable.lhs
index f74aaa84fe..8d97de8394 100644
--- a/compiler/utils/Outputable.lhs
+++ b/compiler/utils/Outputable.lhs
@@ -48,7 +48,7 @@ module Outputable (
renderWithStyle,
pprInfixVar, pprPrefixVar,
- pprHsChar, pprHsString,
+ pprHsChar, pprHsString, pprHsBytes,
pprFastFilePath,
-- * Controlling the style in which output is printed
@@ -743,6 +743,16 @@ pprHsChar c | c > '\x10ffff' = char '\\' <> text (show (fromIntegral (ord c) ::
pprHsString :: FastString -> SDoc
pprHsString fs = vcat (map text (showMultiLineString (unpackFS fs)))
+-- | Special combinator for showing string literals.
+pprHsBytes :: FastBytes -> SDoc
+pprHsBytes fb = let escaped = concatMap escape $ bytesFB fb
+ in vcat (map text (showMultiLineString escaped)) <> char '#'
+ where escape :: Word8 -> String
+ escape w = let c = chr (fromIntegral w)
+ in if isAscii c
+ then [c]
+ else '\\' : show w
+
---------------------
-- Put a name in parens if it's an operator
pprPrefixVar :: Bool -> SDoc -> SDoc