summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/basicTypes/Literal.lhs9
-rw-r--r--compiler/codeGen/CgUtils.hs3
-rw-r--r--compiler/codeGen/StgCmmUtils.hs2
-rw-r--r--compiler/coreSyn/CoreUnfold.lhs2
-rw-r--r--compiler/coreSyn/ExternalCore.lhs4
-rw-r--r--compiler/coreSyn/MkCore.lhs4
-rw-r--r--compiler/coreSyn/MkExternalCore.lhs2
-rw-r--r--compiler/coreSyn/PprExternalCore.lhs4
-rw-r--r--compiler/deSugar/DsBinds.lhs2
-rw-r--r--compiler/deSugar/MatchLit.lhs14
-rw-r--r--compiler/ghci/ByteCodeGen.lhs2
-rw-r--r--compiler/parser/ParserCore.y2
-rw-r--r--compiler/prelude/PrelRules.lhs2
-rw-r--r--compiler/utils/Binary.hs23
-rw-r--r--compiler/utils/FastString.lhs75
-rw-r--r--compiler/utils/Outputable.lhs12
16 files changed, 129 insertions, 33 deletions
diff --git a/compiler/basicTypes/Literal.lhs b/compiler/basicTypes/Literal.lhs
index bbc70551f9..8fbcbb7a88 100644
--- a/compiler/basicTypes/Literal.lhs
+++ b/compiler/basicTypes/Literal.lhs
@@ -84,7 +84,7 @@ data Literal
-- First the primitive guys
MachChar Char -- ^ @Char#@ - at least 31 bits. Create with 'mkMachChar'
- | MachStr FastString -- ^ A string-literal: stored and emitted
+ | MachStr FastBytes -- ^ A string-literal: stored and emitted
-- UTF-8 encoded, we'll arrange to decode it
-- at runtime. Also emitted with a @'\0'@
-- terminator. Create with 'mkMachString'
@@ -248,7 +248,8 @@ mkMachChar = MachChar
-- | Creates a 'Literal' of type @Addr#@, which is appropriate for passing to
-- e.g. some of the \"error\" functions in GHC.Err such as @GHC.Err.runtimeError@
mkMachString :: String -> Literal
-mkMachString s = MachStr (mkFastString s) -- stored UTF-8 encoded
+-- stored UTF-8 encoded
+mkMachString s = MachStr (fastStringToFastBytes $ mkFastString s)
mkLitInteger :: Integer -> Type -> Literal
mkLitInteger = LitInteger
@@ -436,7 +437,7 @@ pprLiteral :: (SDoc -> SDoc) -> Literal -> SDoc
-- to wrap parens around literals that occur in
-- a context requiring an atomic thing
pprLiteral _ (MachChar ch) = pprHsChar ch
-pprLiteral _ (MachStr s) = pprHsString s
+pprLiteral _ (MachStr s) = pprHsBytes s
pprLiteral _ (MachInt i) = pprIntVal i
pprLiteral _ (MachDouble d) = double (fromRat d)
pprLiteral _ (MachNullAddr) = ptext (sLit "__NULL")
@@ -469,7 +470,7 @@ Hash values should be zero or a positive integer. No negatives please.
\begin{code}
hashLiteral :: Literal -> Int
hashLiteral (MachChar c) = ord c + 1000 -- Keep it out of range of common ints
-hashLiteral (MachStr s) = hashFS s
+hashLiteral (MachStr s) = hashFB s
hashLiteral (MachNullAddr) = 0
hashLiteral (MachInt i) = hashInteger i
hashLiteral (MachInt64 i) = hashInteger i
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index e7d17c1f03..08b6fb8939 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -92,8 +92,7 @@ addIdReps ids = [(idCgRep id, id) | id <- ids]
-------------------------------------------------------------------------
cgLit :: Literal -> FCode CmmLit
-cgLit (MachStr s) = newByteStringCLit (bytesFS s)
- -- not unpackFS; we want the UTF-8 byte stream.
+cgLit (MachStr s) = newByteStringCLit (bytesFB s)
cgLit other_lit = return (mkSimpleLit other_lit)
mkSimpleLit :: Literal -> CmmLit
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index 733c2d4692..ab44888597 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -90,7 +90,7 @@ import Data.Maybe
-------------------------------------------------------------------------
cgLit :: Literal -> FCode CmmLit
-cgLit (MachStr s) = newByteStringCLit (bytesFS s)
+cgLit (MachStr s) = newByteStringCLit (bytesFB s)
-- not unpackFS; we want the UTF-8 byte stream.
cgLit other_lit = return (mkSimpleLit other_lit)
diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs
index 816d34e87b..6c61f4294c 100644
--- a/compiler/coreSyn/CoreUnfold.lhs
+++ b/compiler/coreSyn/CoreUnfold.lhs
@@ -508,7 +508,7 @@ sizeExpr bOMB_OUT_SIZE top_args expr
litSize :: Literal -> Int
-- Used by CoreUnfold.sizeExpr
litSize (LitInteger {}) = 100 -- Note [Size of literal integers]
-litSize (MachStr str) = 10 + 10 * ((lengthFS str + 3) `div` 4)
+litSize (MachStr str) = 10 + 10 * ((lengthFB str + 3) `div` 4)
-- If size could be 0 then @f "x"@ might be too small
-- [Sept03: make literal strings a bit bigger to avoid fruitless
-- duplication of little strings]
diff --git a/compiler/coreSyn/ExternalCore.lhs b/compiler/coreSyn/ExternalCore.lhs
index 3d416f78a4..d2f6691a7c 100644
--- a/compiler/coreSyn/ExternalCore.lhs
+++ b/compiler/coreSyn/ExternalCore.lhs
@@ -11,6 +11,8 @@
module ExternalCore where
+import Data.Word
+
data Module
= Module Mname [Tdef] [Vdefg]
@@ -84,7 +86,7 @@ data Lit
= Lint Integer Ty
| Lrational Rational Ty
| Lchar Char Ty
- | Lstring String Ty
+ | Lstring [Word8] Ty
type Mname = Id
diff --git a/compiler/coreSyn/MkCore.lhs b/compiler/coreSyn/MkCore.lhs
index 410d62db7d..3a696d1914 100644
--- a/compiler/coreSyn/MkCore.lhs
+++ b/compiler/coreSyn/MkCore.lhs
@@ -283,11 +283,11 @@ mkStringExprFS str
| all safeChar chars
= do unpack_id <- lookupId unpackCStringName
- return (App (Var unpack_id) (Lit (MachStr str)))
+ return (App (Var unpack_id) (Lit (MachStr (fastStringToFastBytes str))))
| otherwise
= do unpack_id <- lookupId unpackCStringUtf8Name
- return (App (Var unpack_id) (Lit (MachStr str)))
+ return (App (Var unpack_id) (Lit (MachStr (fastStringToFastBytes str))))
where
chars = unpackFS str
diff --git a/compiler/coreSyn/MkExternalCore.lhs b/compiler/coreSyn/MkExternalCore.lhs
index b6c682ffc0..d05da2a420 100644
--- a/compiler/coreSyn/MkExternalCore.lhs
+++ b/compiler/coreSyn/MkExternalCore.lhs
@@ -221,7 +221,7 @@ make_lit dflags l =
-- For a character bigger than 0xff, we represent it in ext-core
-- as an int lit with a char type.
MachChar i -> C.Lint (fromIntegral $ ord i) t
- MachStr s -> C.Lstring (unpackFS s) t
+ MachStr s -> C.Lstring (bytesFB s) t
MachNullAddr -> C.Lint 0 t
MachInt i -> C.Lint i t
MachInt64 i -> C.Lint i t
diff --git a/compiler/coreSyn/PprExternalCore.lhs b/compiler/coreSyn/PprExternalCore.lhs
index 571b816e59..9c6846c494 100644
--- a/compiler/coreSyn/PprExternalCore.lhs
+++ b/compiler/coreSyn/PprExternalCore.lhs
@@ -199,7 +199,9 @@ plit (Lint i t) = parens (integer i <> text "::" <> pty t)
plit (Lrational r t) = parens (text (show (numerator r)) <+> char '%'
<+> text (show (denominator r)) <> text "::" <> pty t)
plit (Lchar c t) = parens (text ("\'" ++ escape [c] ++ "\'") <> text "::" <> pty t)
-plit (Lstring s t) = parens (pstring s <> text "::" <> pty t)
+-- This is a little messy. We shouldn't really be going via String.
+plit (Lstring bs t) = parens (pstring str <> text "::" <> pty t)
+ where str = map (chr . fromIntegral) bs
pstring :: String -> Doc
pstring s = doubleQuotes(text (escape s))
diff --git a/compiler/deSugar/DsBinds.lhs b/compiler/deSugar/DsBinds.lhs
index 8949387aae..75680bcf3a 100644
--- a/compiler/deSugar/DsBinds.lhs
+++ b/compiler/deSugar/DsBinds.lhs
@@ -775,7 +775,7 @@ dsEvTerm (EvSuperClass d n)
dsEvTerm (EvDelayedError ty msg) = return $ Var errorId `mkTyApps` [ty] `mkApps` [litMsg]
where
errorId = rUNTIME_ERROR_ID
- litMsg = Lit (MachStr msg)
+ litMsg = Lit (MachStr (fastStringToFastBytes msg))
dsEvTerm (EvLit l) =
case l of
diff --git a/compiler/deSugar/MatchLit.lhs b/compiler/deSugar/MatchLit.lhs
index 84ec342473..a3fe356660 100644
--- a/compiler/deSugar/MatchLit.lhs
+++ b/compiler/deSugar/MatchLit.lhs
@@ -39,6 +39,7 @@ import TysWiredIn
import Literal
import SrcLoc
import Data.Ratio
+import MonadUtils
import Outputable
import BasicTypes
import Util
@@ -68,7 +69,7 @@ See also below where we look for @DictApps@ for \tr{plusInt}, etc.
\begin{code}
dsLit :: HsLit -> DsM CoreExpr
-dsLit (HsStringPrim s) = return (Lit (MachStr s))
+dsLit (HsStringPrim s) = return (Lit (MachStr (fastStringToFastBytes s)))
dsLit (HsCharPrim c) = return (Lit (MachChar c))
dsLit (HsIntPrim i) = return (Lit (MachInt i))
dsLit (HsWordPrim w) = return (Lit (MachWord w))
@@ -123,10 +124,10 @@ hsLitKey (HsWordPrim w) = mkMachWord w
hsLitKey (HsInt64Prim i) = mkMachInt64 i
hsLitKey (HsWord64Prim w) = mkMachWord64 w
hsLitKey (HsCharPrim c) = MachChar c
-hsLitKey (HsStringPrim s) = MachStr s
+hsLitKey (HsStringPrim s) = MachStr (fastStringToFastBytes s)
hsLitKey (HsFloatPrim f) = MachFloat (fl_value f)
hsLitKey (HsDoublePrim d) = MachDouble (fl_value d)
-hsLitKey (HsString s) = MachStr s
+hsLitKey (HsString s) = MachStr (fastStringToFastBytes s)
hsLitKey l = pprPanic "hsLitKey" (ppr l)
hsOverLitKey :: OutputableBndr a => HsOverLit a -> Bool -> Literal
@@ -138,7 +139,7 @@ litValKey (HsIntegral i) False = MachInt i
litValKey (HsIntegral i) True = MachInt (-i)
litValKey (HsFractional r) False = MachFloat (fl_value r)
litValKey (HsFractional r) True = MachFloat (negate (fl_value r))
-litValKey (HsIsString s) neg = ASSERT( not neg) MachStr s
+litValKey (HsIsString s) neg = ASSERT( not neg) MachStr (fastStringToFastBytes s)
\end{code}
%************************************************************************
@@ -253,7 +254,10 @@ matchLiterals (var:vars) ty sub_groups
wrap_str_guard :: Id -> (Literal,MatchResult) -> DsM MatchResult
-- Equality check for string literals
wrap_str_guard eq_str (MachStr s, mr)
- = do { lit <- mkStringExprFS s
+ = do { -- We now have to convert back to FastString. Perhaps there
+ -- should be separate MachBytes and MachStr constructors?
+ s' <- liftIO $ mkFastStringFastBytes s
+ ; lit <- mkStringExprFS s'
; let pred = mkApps (Var eq_str) [Var var, lit]
; return (mkGuardedMatchResult pred mr) }
wrap_str_guard _ (l, _) = pprPanic "matchLiterals/wrap_str_guard" (ppr l)
diff --git a/compiler/ghci/ByteCodeGen.lhs b/compiler/ghci/ByteCodeGen.lhs
index d722964bcd..a19d2ecf0b 100644
--- a/compiler/ghci/ByteCodeGen.lhs
+++ b/compiler/ghci/ByteCodeGen.lhs
@@ -1259,7 +1259,7 @@ pushAtom _ _ (AnnLit lit)
pushStr s
= let getMallocvilleAddr
= case s of
- FastString _ n _ fp _ ->
+ FastBytes n fp ->
-- we could grab the Ptr from the ForeignPtr,
-- but then we have no way to control its lifetime.
-- In reality it'll probably stay alive long enoungh
diff --git a/compiler/parser/ParserCore.y b/compiler/parser/ParserCore.y
index edb8b50864..7f9a49a2b7 100644
--- a/compiler/parser/ParserCore.y
+++ b/compiler/parser/ParserCore.y
@@ -303,7 +303,7 @@ lit :: { Literal }
: '(' INTEGER '::' aty ')' { convIntLit $2 $4 }
| '(' RATIONAL '::' aty ')' { convRatLit $2 $4 }
| '(' CHAR '::' aty ')' { MachChar $2 }
- | '(' STRING '::' aty ')' { MachStr (mkFastString $2) }
+ | '(' STRING '::' aty ')' { MachStr (fastStringToFastBytes (mkFastString $2)) }
fs_var_occ :: { FastString }
: NAME { mkFastString $1 }
diff --git a/compiler/prelude/PrelRules.lhs b/compiler/prelude/PrelRules.lhs
index 7aeb920e0c..713726288f 100644
--- a/compiler/prelude/PrelRules.lhs
+++ b/compiler/prelude/PrelRules.lhs
@@ -737,7 +737,7 @@ match_append_lit _ [Type ty1,
c1 `cheapEqExpr` c2
= ASSERT( ty1 `eqType` ty2 )
Just (Var unpk `App` Type ty1
- `App` Lit (MachStr (s1 `appendFS` s2))
+ `App` Lit (MachStr (s1 `appendFB` s2))
`App` c1
`App` n)
diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs
index 77bd190fa9..bf24b09b71 100644
--- a/compiler/utils/Binary.hs
+++ b/compiler/utils/Binary.hs
@@ -725,7 +725,14 @@ type SymbolTable = Array Int Name
---------------------------------------------------------
putFS :: BinHandle -> FastString -> IO ()
-putFS bh (FastString _ l _ buf _) = do
+putFS bh fs = putFB bh $ fastStringToFastBytes fs
+
+getFS :: BinHandle -> IO FastString
+getFS bh = do fb <- getFB bh
+ mkFastStringFastBytes fb
+
+putFB :: BinHandle -> FastBytes -> IO ()
+putFB bh (FastBytes l buf) = do
put_ bh l
withForeignPtr buf $ \ptr ->
let
@@ -738,19 +745,19 @@ putFS bh (FastString _ l _ buf _) = do
go 0
{- -- possible faster version, not quite there yet:
-getFS bh@BinMem{} = do
+getFB bh@BinMem{} = do
(I# l) <- get bh
arr <- readIORef (arr_r bh)
off <- readFastMutInt (off_r bh)
- return $! (mkFastSubStringBA# arr off l)
+ return $! (mkFastSubBytesBA# arr off l)
-}
-getFS :: BinHandle -> IO FastString
-getFS bh = do
+getFB :: BinHandle -> IO FastBytes
+getFB bh = do
l <- get bh
fp <- mallocForeignPtrBytes l
withForeignPtr fp $ \ptr -> do
let
- go n | n == l = mkFastStringForeignPtr ptr fp l
+ go n | n == l = return $ foreignPtrToFastBytes fp l
| otherwise = do
b <- getByte bh
pokeElemOff ptr n b
@@ -758,6 +765,10 @@ getFS bh = do
--
go 0
+instance Binary FastBytes where
+ put_ bh f = putFB bh f
+ get bh = getFB bh
+
instance Binary FastString where
put_ bh f =
case getUserData bh of
diff --git a/compiler/utils/FastString.lhs b/compiler/utils/FastString.lhs
index 2c94de75f9..05a379808e 100644
--- a/compiler/utils/FastString.lhs
+++ b/compiler/utils/FastString.lhs
@@ -26,6 +26,16 @@
-- Use 'LitString' unless you want the facilities of 'FastString'.
module FastString
(
+ -- * FastBytes
+ FastBytes(..),
+ mkFastStringFastBytes,
+ foreignPtrToFastBytes,
+ fastStringToFastBytes,
+ bytesFB,
+ hashFB,
+ lengthFB,
+ appendFB,
+
-- * FastStrings
FastString(..), -- not abstract, for now.
@@ -117,6 +127,61 @@ import GHC.Base ( unpackCString# )
#define hASH_TBL_SIZE_UNBOXED 4091#
+data FastBytes = FastBytes {
+ fb_n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
+ fb_buf :: {-# UNPACK #-} !(ForeignPtr Word8)
+ } deriving Typeable
+
+instance Data FastBytes where
+ -- don't traverse?
+ toConstr _ = abstractConstr "FastBytes"
+ gunfold _ _ = error "gunfold"
+ dataTypeOf _ = mkNoRepType "FastBytes"
+
+instance Eq FastBytes where
+ x == y = (x `compare` y) == EQ
+
+instance Ord FastBytes where
+ compare = cmpFB
+
+foreignPtrToFastBytes :: ForeignPtr Word8 -> Int -> FastBytes
+foreignPtrToFastBytes fp len = FastBytes len fp
+
+mkFastStringFastBytes :: FastBytes -> IO FastString
+mkFastStringFastBytes (FastBytes len fp)
+ = withForeignPtr fp $ \ptr -> mkFastStringForeignPtr ptr fp len
+
+fastStringToFastBytes :: FastString -> FastBytes
+fastStringToFastBytes f = FastBytes (n_bytes f) (buf f)
+
+-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
+bytesFB :: FastBytes -> [Word8]
+bytesFB (FastBytes n_bytes buf) =
+ inlinePerformIO $ withForeignPtr buf $ \ptr ->
+ peekArray n_bytes ptr
+
+hashFB :: FastBytes -> Int
+hashFB (FastBytes len buf)
+ = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $ hashStr ptr len
+
+lengthFB :: FastBytes -> Int
+lengthFB f = fb_n_bytes f
+
+appendFB :: FastBytes -> FastBytes -> FastBytes
+appendFB fb1 fb2 =
+ inlinePerformIO $ do
+ r <- mallocForeignPtrBytes len
+ withForeignPtr r $ \ r' -> do
+ withForeignPtr (fb_buf fb1) $ \ fb1Ptr -> do
+ withForeignPtr (fb_buf fb2) $ \ fb2Ptr -> do
+ copyBytes r' fb1Ptr len1
+ copyBytes (advancePtr r' len1) fb2Ptr len2
+ return $ foreignPtrToFastBytes r len
+ where len = len1 + len2
+ len1 = fb_n_bytes fb1
+ len2 = fb_n_bytes fb2
+
+
{-|
A 'FastString' is an array of bytes, hashed to support fast O(1)
comparison. It is also associated with a character encoding, so that
@@ -165,8 +230,12 @@ instance Data FastString where
dataTypeOf _ = mkNoRepType "FastString"
cmpFS :: FastString -> FastString -> Ordering
-cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
+cmpFS f1@(FastString u1 _ _ _ _) f2@(FastString u2 _ _ _ _) =
if u1 == u2 then EQ else
+ cmpFB (fastStringToFastBytes f1) (fastStringToFastBytes f2)
+
+cmpFB :: FastBytes -> FastBytes -> Ordering
+cmpFB (FastBytes l1 buf1) (FastBytes l2 buf2) =
case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of
LT -> LT
EQ -> compare l1 l2
@@ -431,9 +500,7 @@ unpackFS (FastString _ n_bytes _ buf enc) =
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS :: FastString -> [Word8]
-bytesFS (FastString _ n_bytes _ buf _) =
- inlinePerformIO $ withForeignPtr buf $ \ptr ->
- peekArray n_bytes ptr
+bytesFS fs = bytesFB $ fastStringToFastBytes fs
-- | Returns a Z-encoded version of a 'FastString'. This might be the
-- original, if it was already Z-encoded. The first time this
diff --git a/compiler/utils/Outputable.lhs b/compiler/utils/Outputable.lhs
index f74aaa84fe..8d97de8394 100644
--- a/compiler/utils/Outputable.lhs
+++ b/compiler/utils/Outputable.lhs
@@ -48,7 +48,7 @@ module Outputable (
renderWithStyle,
pprInfixVar, pprPrefixVar,
- pprHsChar, pprHsString,
+ pprHsChar, pprHsString, pprHsBytes,
pprFastFilePath,
-- * Controlling the style in which output is printed
@@ -743,6 +743,16 @@ pprHsChar c | c > '\x10ffff' = char '\\' <> text (show (fromIntegral (ord c) ::
pprHsString :: FastString -> SDoc
pprHsString fs = vcat (map text (showMultiLineString (unpackFS fs)))
+-- | Special combinator for showing string literals.
+pprHsBytes :: FastBytes -> SDoc
+pprHsBytes fb = let escaped = concatMap escape $ bytesFB fb
+ in vcat (map text (showMultiLineString escaped)) <> char '#'
+ where escape :: Word8 -> String
+ escape w = let c = chr (fromIntegral w)
+ in if isAscii c
+ then [c]
+ else '\\' : show w
+
---------------------
-- Put a name in parens if it's an operator
pprPrefixVar :: Bool -> SDoc -> SDoc