diff options
Diffstat (limited to 'compiler/utils')
-rw-r--r-- | compiler/utils/Bag.lhs | 177 | ||||
-rw-r--r-- | compiler/utils/Binary.hs | 756 | ||||
-rw-r--r-- | compiler/utils/BitSet.lhs | 205 | ||||
-rw-r--r-- | compiler/utils/BufWrite.hs | 124 | ||||
-rw-r--r-- | compiler/utils/Digraph.lhs | 426 | ||||
-rw-r--r-- | compiler/utils/Encoding.hs | 373 | ||||
-rw-r--r-- | compiler/utils/FastMutInt.lhs | 54 | ||||
-rw-r--r-- | compiler/utils/FastString.lhs | 499 | ||||
-rw-r--r-- | compiler/utils/FastTypes.lhs | 65 | ||||
-rw-r--r-- | compiler/utils/FiniteMap.lhs | 749 | ||||
-rw-r--r-- | compiler/utils/IOEnv.hs | 208 | ||||
-rw-r--r-- | compiler/utils/ListSetOps.lhs | 227 | ||||
-rw-r--r-- | compiler/utils/Maybes.lhs | 123 | ||||
-rw-r--r-- | compiler/utils/OrdList.lhs | 83 | ||||
-rw-r--r-- | compiler/utils/Outputable.lhs | 540 | ||||
-rw-r--r-- | compiler/utils/Panic.lhs | 250 | ||||
-rw-r--r-- | compiler/utils/Pretty.lhs | 1075 | ||||
-rw-r--r-- | compiler/utils/StringBuffer.lhs | 240 | ||||
-rw-r--r-- | compiler/utils/UniqFM.lhs | 847 | ||||
-rw-r--r-- | compiler/utils/UniqSet.lhs | 138 | ||||
-rw-r--r-- | compiler/utils/Util.lhs | 1029 |
21 files changed, 8188 insertions, 0 deletions
diff --git a/compiler/utils/Bag.lhs b/compiler/utils/Bag.lhs new file mode 100644 index 0000000000..b107f84a3a --- /dev/null +++ b/compiler/utils/Bag.lhs @@ -0,0 +1,177 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +\section[Bags]{@Bag@: an unordered collection with duplicates} + +\begin{code} +module Bag ( + Bag, -- abstract type + + emptyBag, unitBag, unionBags, unionManyBags, + mapBag, + elemBag, + filterBag, partitionBag, concatBag, foldBag, foldrBag, foldlBag, + isEmptyBag, isSingletonBag, consBag, snocBag, anyBag, + listToBag, bagToList, + mapBagM, mapAndUnzipBagM + ) where + +#include "HsVersions.h" + +import Outputable +import Util ( isSingleton ) +import List ( partition ) +\end{code} + + +\begin{code} +data Bag a + = EmptyBag + | UnitBag a + | TwoBags (Bag a) (Bag a) -- INVARIANT: neither branch is empty + | ListBag [a] -- INVARIANT: the list is non-empty + +emptyBag = EmptyBag +unitBag = UnitBag + +elemBag :: Eq a => a -> Bag a -> Bool + +elemBag x EmptyBag = False +elemBag x (UnitBag y) = x==y +elemBag x (TwoBags b1 b2) = x `elemBag` b1 || x `elemBag` b2 +elemBag x (ListBag ys) = any (x ==) ys + +unionManyBags :: [Bag a] -> Bag a +unionManyBags xs = foldr unionBags EmptyBag xs + +-- This one is a bit stricter! The bag will get completely evaluated. + +unionBags :: Bag a -> Bag a -> Bag a +unionBags EmptyBag b = b +unionBags b EmptyBag = b +unionBags b1 b2 = TwoBags b1 b2 + +consBag :: a -> Bag a -> Bag a +snocBag :: Bag a -> a -> Bag a + +consBag elt bag = (unitBag elt) `unionBags` bag +snocBag bag elt = bag `unionBags` (unitBag elt) + +isEmptyBag EmptyBag = True +isEmptyBag other = False -- NB invariants + +isSingletonBag :: Bag a -> Bool +isSingletonBag EmptyBag = False +isSingletonBag (UnitBag x) = True +isSingletonBag (TwoBags b1 b2) = False -- Neither is empty +isSingletonBag (ListBag xs) = isSingleton xs + +filterBag :: (a -> Bool) -> Bag a -> Bag a +filterBag pred EmptyBag = EmptyBag +filterBag pred b@(UnitBag val) = if pred val then b else EmptyBag +filterBag pred (TwoBags b1 b2) = sat1 `unionBags` sat2 + where + sat1 = filterBag pred b1 + sat2 = filterBag pred b2 +filterBag pred (ListBag vs) = listToBag (filter pred vs) + +anyBag :: (a -> Bool) -> Bag a -> Bool +anyBag p EmptyBag = False +anyBag p (UnitBag v) = p v +anyBag p (TwoBags b1 b2) = anyBag p b1 || anyBag p b2 +anyBag p (ListBag xs) = any p xs + +concatBag :: Bag (Bag a) -> Bag a +concatBag EmptyBag = EmptyBag +concatBag (UnitBag b) = b +concatBag (TwoBags b1 b2) = concatBag b1 `unionBags` concatBag b2 +concatBag (ListBag bs) = unionManyBags bs + +partitionBag :: (a -> Bool) -> Bag a -> (Bag a {- Satisfy predictate -}, + Bag a {- Don't -}) +partitionBag pred EmptyBag = (EmptyBag, EmptyBag) +partitionBag pred b@(UnitBag val) = if pred val then (b, EmptyBag) else (EmptyBag, b) +partitionBag pred (TwoBags b1 b2) = (sat1 `unionBags` sat2, fail1 `unionBags` fail2) + where + (sat1,fail1) = partitionBag pred b1 + (sat2,fail2) = partitionBag pred b2 +partitionBag pred (ListBag vs) = (listToBag sats, listToBag fails) + where + (sats,fails) = partition pred vs + + +foldBag :: (r -> r -> r) -- Replace TwoBags with this; should be associative + -> (a -> r) -- Replace UnitBag with this + -> r -- Replace EmptyBag with this + -> Bag a + -> r + +{- Standard definition +foldBag t u e EmptyBag = e +foldBag t u e (UnitBag x) = u x +foldBag t u e (TwoBags b1 b2) = (foldBag t u e b1) `t` (foldBag t u e b2) +foldBag t u e (ListBag xs) = foldr (t.u) e xs +-} + +-- More tail-recursive definition, exploiting associativity of "t" +foldBag t u e EmptyBag = e +foldBag t u e (UnitBag x) = u x `t` e +foldBag t u e (TwoBags b1 b2) = foldBag t u (foldBag t u e b2) b1 +foldBag t u e (ListBag xs) = foldr (t.u) e xs + +foldrBag :: (a -> r -> r) -> r + -> Bag a + -> r + +foldrBag k z EmptyBag = z +foldrBag k z (UnitBag x) = k x z +foldrBag k z (TwoBags b1 b2) = foldrBag k (foldrBag k z b2) b1 +foldrBag k z (ListBag xs) = foldr k z xs + +foldlBag :: (r -> a -> r) -> r + -> Bag a + -> r + +foldlBag k z EmptyBag = z +foldlBag k z (UnitBag x) = k z x +foldlBag k z (TwoBags b1 b2) = foldlBag k (foldlBag k z b1) b2 +foldlBag k z (ListBag xs) = foldl k z xs + + +mapBag :: (a -> b) -> Bag a -> Bag b +mapBag f EmptyBag = EmptyBag +mapBag f (UnitBag x) = UnitBag (f x) +mapBag f (TwoBags b1 b2) = TwoBags (mapBag f b1) (mapBag f b2) +mapBag f (ListBag xs) = ListBag (map f xs) + +mapBagM :: Monad m => (a -> m b) -> Bag a -> m (Bag b) +mapBagM f EmptyBag = return EmptyBag +mapBagM f (UnitBag x) = do { r <- f x; return (UnitBag r) } +mapBagM f (TwoBags b1 b2) = do { r1 <- mapBagM f b1; r2 <- mapBagM f b2; return (TwoBags r1 r2) } +mapBagM f (ListBag xs) = do { rs <- mapM f xs; return (ListBag rs) } + +mapAndUnzipBagM :: Monad m => (a -> m (b,c)) -> Bag a -> m (Bag b, Bag c) +mapAndUnzipBagM f EmptyBag = return (EmptyBag, EmptyBag) +mapAndUnzipBagM f (UnitBag x) = do { (r,s) <- f x; return (UnitBag r, UnitBag s) } +mapAndUnzipBagM f (TwoBags b1 b2) = do { (r1,s1) <- mapAndUnzipBagM f b1 + ; (r2,s2) <- mapAndUnzipBagM f b2 + ; return (TwoBags r1 r2, TwoBags s1 s2) } +mapAndUnzipBagM f (ListBag xs) = do { ts <- mapM f xs + ; let (rs,ss) = unzip ts + ; return (ListBag rs, ListBag ss) } + +listToBag :: [a] -> Bag a +listToBag [] = EmptyBag +listToBag vs = ListBag vs + +bagToList :: Bag a -> [a] +bagToList b = foldrBag (:) [] b +\end{code} + +\begin{code} +instance (Outputable a) => Outputable (Bag a) where + ppr EmptyBag = ptext SLIT("emptyBag") + ppr (UnitBag a) = ppr a + ppr (TwoBags b1 b2) = hsep [ppr b1 <> comma, ppr b2] + ppr (ListBag as) = interpp'SP as +\end{code} diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs new file mode 100644 index 0000000000..7a1ca515b7 --- /dev/null +++ b/compiler/utils/Binary.hs @@ -0,0 +1,756 @@ +{-# OPTIONS -cpp #-} +-- +-- (c) The University of Glasgow 2002 +-- +-- Binary I/O library, with special tweaks for GHC +-- +-- Based on the nhc98 Binary library, which is copyright +-- (c) Malcolm Wallace and Colin Runciman, University of York, 1998. +-- Under the terms of the license for that software, we must tell you +-- where you can obtain the original version of the Binary library, namely +-- http://www.cs.york.ac.uk/fp/nhc98/ + +module Binary + ( {-type-} Bin, + {-class-} Binary(..), + {-type-} BinHandle, + + openBinIO, openBinIO_, + openBinMem, +-- closeBin, + + seekBin, + tellBin, + castBin, + + writeBinMem, + readBinMem, + + isEOFBin, + + -- for writing instances: + putByte, + getByte, + + -- lazy Bin I/O + lazyGet, + lazyPut, + + -- GHC only: + ByteArray(..), + getByteArray, + putByteArray, + + getBinFileWithDict, -- :: Binary a => FilePath -> IO a + putBinFileWithDict, -- :: Binary a => FilePath -> ModuleName -> a -> IO () + + ) where + +#include "HsVersions.h" + +-- The *host* architecture version: +#include "MachDeps.h" + +import FastString +import Unique +import Panic +import UniqFM +import FastMutInt +import PackageConfig ( PackageId, packageIdFS, fsToPackageId ) + +import Foreign +import Data.Array.IO +import Data.Array +import Data.Bits +import Data.Int +import Data.Word +import Data.IORef +import Data.Char ( ord, chr ) +import Data.Array.Base ( unsafeRead, unsafeWrite ) +import Control.Monad ( when ) +import Control.Exception ( throwDyn ) +import System.IO as IO +import System.IO.Unsafe ( unsafeInterleaveIO ) +import System.IO.Error ( mkIOError, eofErrorType ) +import GHC.Real ( Ratio(..) ) +import GHC.Exts +import GHC.IOBase ( IO(..) ) +import GHC.Word ( Word8(..) ) +#if __GLASGOW_HASKELL__ < 601 +-- openFileEx is available from the lang package, but we want to +-- be independent of hslibs libraries. +import GHC.Handle ( openFileEx, IOModeEx(..) ) +#else +import System.IO ( openBinaryFile ) +#endif + +#if __GLASGOW_HASKELL__ < 601 +openBinaryFile f mode = openFileEx f (BinaryMode mode) +#endif + +type BinArray = IOUArray Int Word8 + +--------------------------------------------------------------- +-- BinHandle +--------------------------------------------------------------- + +data BinHandle + = BinMem { -- binary data stored in an unboxed array + bh_usr :: UserData, -- sigh, need parameterized modules :-) + off_r :: !FastMutInt, -- the current offset + sz_r :: !FastMutInt, -- size of the array (cached) + arr_r :: !(IORef BinArray) -- the array (bounds: (0,size-1)) + } + -- XXX: should really store a "high water mark" for dumping out + -- the binary data to a file. + + | BinIO { -- binary data stored in a file + bh_usr :: UserData, + off_r :: !FastMutInt, -- the current offset (cached) + hdl :: !IO.Handle -- the file handle (must be seekable) + } + -- cache the file ptr in BinIO; using hTell is too expensive + -- to call repeatedly. If anyone else is modifying this Handle + -- at the same time, we'll be screwed. + +getUserData :: BinHandle -> UserData +getUserData bh = bh_usr bh + +setUserData :: BinHandle -> UserData -> BinHandle +setUserData bh us = bh { bh_usr = us } + + +--------------------------------------------------------------- +-- Bin +--------------------------------------------------------------- + +newtype Bin a = BinPtr Int + deriving (Eq, Ord, Show, Bounded) + +castBin :: Bin a -> Bin b +castBin (BinPtr i) = BinPtr i + +--------------------------------------------------------------- +-- class Binary +--------------------------------------------------------------- + +class Binary a where + put_ :: BinHandle -> a -> IO () + put :: BinHandle -> a -> IO (Bin a) + get :: BinHandle -> IO a + + -- define one of put_, put. Use of put_ is recommended because it + -- is more likely that tail-calls can kick in, and we rarely need the + -- position return value. + put_ bh a = do put bh a; return () + put bh a = do p <- tellBin bh; put_ bh a; return p + +putAt :: Binary a => BinHandle -> Bin a -> a -> IO () +putAt bh p x = do seekBin bh p; put bh x; return () + +getAt :: Binary a => BinHandle -> Bin a -> IO a +getAt bh p = do seekBin bh p; get bh + +openBinIO_ :: IO.Handle -> IO BinHandle +openBinIO_ h = openBinIO h + +openBinIO :: IO.Handle -> IO BinHandle +openBinIO h = do + r <- newFastMutInt + writeFastMutInt r 0 + return (BinIO noUserData r h) + +openBinMem :: Int -> IO BinHandle +openBinMem size + | size <= 0 = error "Data.Binary.openBinMem: size must be >= 0" + | otherwise = do + arr <- newArray_ (0,size-1) + arr_r <- newIORef arr + ix_r <- newFastMutInt + writeFastMutInt ix_r 0 + sz_r <- newFastMutInt + writeFastMutInt sz_r size + return (BinMem noUserData ix_r sz_r arr_r) + +tellBin :: BinHandle -> IO (Bin a) +tellBin (BinIO _ r _) = do ix <- readFastMutInt r; return (BinPtr ix) +tellBin (BinMem _ r _ _) = do ix <- readFastMutInt r; return (BinPtr ix) + +seekBin :: BinHandle -> Bin a -> IO () +seekBin (BinIO _ ix_r h) (BinPtr p) = do + writeFastMutInt ix_r p + hSeek h AbsoluteSeek (fromIntegral p) +seekBin h@(BinMem _ ix_r sz_r a) (BinPtr p) = do + sz <- readFastMutInt sz_r + if (p >= sz) + then do expandBin h p; writeFastMutInt ix_r p + else writeFastMutInt ix_r p + +isEOFBin :: BinHandle -> IO Bool +isEOFBin (BinMem _ ix_r sz_r a) = do + ix <- readFastMutInt ix_r + sz <- readFastMutInt sz_r + return (ix >= sz) +isEOFBin (BinIO _ ix_r h) = hIsEOF h + +writeBinMem :: BinHandle -> FilePath -> IO () +writeBinMem (BinIO _ _ _) _ = error "Data.Binary.writeBinMem: not a memory handle" +writeBinMem (BinMem _ ix_r sz_r arr_r) fn = do + h <- openBinaryFile fn WriteMode + arr <- readIORef arr_r + ix <- readFastMutInt ix_r + hPutArray h arr ix +#if __GLASGOW_HASKELL__ <= 500 + -- workaround a bug in old implementation of hPutBuf (it doesn't + -- set the FILEOBJ_RW_WRITTEN flag on the file object, so the file doens't + -- get flushed properly). Adding an extra '\0' doens't do any harm. + hPutChar h '\0' +#endif + hClose h + +readBinMem :: FilePath -> IO BinHandle +-- Return a BinHandle with a totally undefined State +readBinMem filename = do + h <- openBinaryFile filename ReadMode + filesize' <- hFileSize h + let filesize = fromIntegral filesize' + arr <- newArray_ (0,filesize-1) + count <- hGetArray h arr filesize + when (count /= filesize) + (error ("Binary.readBinMem: only read " ++ show count ++ " bytes")) + hClose h + arr_r <- newIORef arr + ix_r <- newFastMutInt + writeFastMutInt ix_r 0 + sz_r <- newFastMutInt + writeFastMutInt sz_r filesize + return (BinMem noUserData ix_r sz_r arr_r) + +-- expand the size of the array to include a specified offset +expandBin :: BinHandle -> Int -> IO () +expandBin (BinMem _ ix_r sz_r arr_r) off = do + sz <- readFastMutInt sz_r + let sz' = head (dropWhile (<= off) (iterate (* 2) sz)) + arr <- readIORef arr_r + arr' <- newArray_ (0,sz'-1) + sequence_ [ unsafeRead arr i >>= unsafeWrite arr' i + | i <- [ 0 .. sz-1 ] ] + writeFastMutInt sz_r sz' + writeIORef arr_r arr' +#ifdef DEBUG + hPutStrLn stderr ("Binary: expanding to size: " ++ show sz') +#endif + return () +expandBin (BinIO _ _ _) _ = return () + -- no need to expand a file, we'll assume they expand by themselves. + +-- ----------------------------------------------------------------------------- +-- Low-level reading/writing of bytes + +putWord8 :: BinHandle -> Word8 -> IO () +putWord8 h@(BinMem _ ix_r sz_r arr_r) w = do + ix <- readFastMutInt ix_r + sz <- readFastMutInt sz_r + -- double the size of the array if it overflows + if (ix >= sz) + then do expandBin h ix + putWord8 h w + else do arr <- readIORef arr_r + unsafeWrite arr ix w + writeFastMutInt ix_r (ix+1) + return () +putWord8 (BinIO _ ix_r h) w = do + ix <- readFastMutInt ix_r + hPutChar h (chr (fromIntegral w)) -- XXX not really correct + writeFastMutInt ix_r (ix+1) + return () + +getWord8 :: BinHandle -> IO Word8 +getWord8 (BinMem _ ix_r sz_r arr_r) = do + ix <- readFastMutInt ix_r + sz <- readFastMutInt sz_r + when (ix >= sz) $ +#if __GLASGOW_HASKELL__ <= 408 + throw (mkIOError eofErrorType "Data.Binary.getWord8" Nothing Nothing) +#else + ioError (mkIOError eofErrorType "Data.Binary.getWord8" Nothing Nothing) +#endif + arr <- readIORef arr_r + w <- unsafeRead arr ix + writeFastMutInt ix_r (ix+1) + return w +getWord8 (BinIO _ ix_r h) = do + ix <- readFastMutInt ix_r + c <- hGetChar h + writeFastMutInt ix_r (ix+1) + return $! (fromIntegral (ord c)) -- XXX not really correct + +putByte :: BinHandle -> Word8 -> IO () +putByte bh w = put_ bh w + +getByte :: BinHandle -> IO Word8 +getByte = getWord8 + +-- ----------------------------------------------------------------------------- +-- Primitve Word writes + +instance Binary Word8 where + put_ = putWord8 + get = getWord8 + +instance Binary Word16 where + put_ h w = do -- XXX too slow.. inline putWord8? + putByte h (fromIntegral (w `shiftR` 8)) + putByte h (fromIntegral (w .&. 0xff)) + get h = do + w1 <- getWord8 h + w2 <- getWord8 h + return $! ((fromIntegral w1 `shiftL` 8) .|. fromIntegral w2) + + +instance Binary Word32 where + put_ h w = do + putByte h (fromIntegral (w `shiftR` 24)) + putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff)) + putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff)) + putByte h (fromIntegral (w .&. 0xff)) + get h = do + w1 <- getWord8 h + w2 <- getWord8 h + w3 <- getWord8 h + w4 <- getWord8 h + return $! ((fromIntegral w1 `shiftL` 24) .|. + (fromIntegral w2 `shiftL` 16) .|. + (fromIntegral w3 `shiftL` 8) .|. + (fromIntegral w4)) + + +instance Binary Word64 where + put_ h w = do + putByte h (fromIntegral (w `shiftR` 56)) + putByte h (fromIntegral ((w `shiftR` 48) .&. 0xff)) + putByte h (fromIntegral ((w `shiftR` 40) .&. 0xff)) + putByte h (fromIntegral ((w `shiftR` 32) .&. 0xff)) + putByte h (fromIntegral ((w `shiftR` 24) .&. 0xff)) + putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff)) + putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff)) + putByte h (fromIntegral (w .&. 0xff)) + get h = do + w1 <- getWord8 h + w2 <- getWord8 h + w3 <- getWord8 h + w4 <- getWord8 h + w5 <- getWord8 h + w6 <- getWord8 h + w7 <- getWord8 h + w8 <- getWord8 h + return $! ((fromIntegral w1 `shiftL` 56) .|. + (fromIntegral w2 `shiftL` 48) .|. + (fromIntegral w3 `shiftL` 40) .|. + (fromIntegral w4 `shiftL` 32) .|. + (fromIntegral w5 `shiftL` 24) .|. + (fromIntegral w6 `shiftL` 16) .|. + (fromIntegral w7 `shiftL` 8) .|. + (fromIntegral w8)) + +-- ----------------------------------------------------------------------------- +-- Primitve Int writes + +instance Binary Int8 where + put_ h w = put_ h (fromIntegral w :: Word8) + get h = do w <- get h; return $! (fromIntegral (w::Word8)) + +instance Binary Int16 where + put_ h w = put_ h (fromIntegral w :: Word16) + get h = do w <- get h; return $! (fromIntegral (w::Word16)) + +instance Binary Int32 where + put_ h w = put_ h (fromIntegral w :: Word32) + get h = do w <- get h; return $! (fromIntegral (w::Word32)) + +instance Binary Int64 where + put_ h w = put_ h (fromIntegral w :: Word64) + get h = do w <- get h; return $! (fromIntegral (w::Word64)) + +-- ----------------------------------------------------------------------------- +-- Instances for standard types + +instance Binary () where + put_ bh () = return () + get _ = return () +-- getF bh p = case getBitsF bh 0 p of (_,b) -> ((),b) + +instance Binary Bool where + put_ bh b = putByte bh (fromIntegral (fromEnum b)) + get bh = do x <- getWord8 bh; return $! (toEnum (fromIntegral x)) +-- getF bh p = case getBitsF bh 1 p of (x,b) -> (toEnum x,b) + +instance Binary Char where + put_ bh c = put_ bh (fromIntegral (ord c) :: Word32) + get bh = do x <- get bh; return $! (chr (fromIntegral (x :: Word32))) +-- getF bh p = case getBitsF bh 8 p of (x,b) -> (toEnum x,b) + +instance Binary Int where +#if SIZEOF_HSINT == 4 + put_ bh i = put_ bh (fromIntegral i :: Int32) + get bh = do + x <- get bh + return $! (fromIntegral (x :: Int32)) +#elif SIZEOF_HSINT == 8 + put_ bh i = put_ bh (fromIntegral i :: Int64) + get bh = do + x <- get bh + return $! (fromIntegral (x :: Int64)) +#else +#error "unsupported sizeof(HsInt)" +#endif +-- getF bh = getBitsF bh 32 + +instance Binary a => Binary [a] where + put_ bh l = do + let len = length l + if (len < 0xff) + then putByte bh (fromIntegral len :: Word8) + else do putByte bh 0xff; put_ bh (fromIntegral len :: Word32) + mapM_ (put_ bh) l + get bh = do + b <- getByte bh + len <- if b == 0xff + then get bh + else return (fromIntegral b :: Word32) + let loop 0 = return [] + loop n = do a <- get bh; as <- loop (n-1); return (a:as) + loop len + +instance (Binary a, Binary b) => Binary (a,b) where + put_ bh (a,b) = do put_ bh a; put_ bh b + get bh = do a <- get bh + b <- get bh + return (a,b) + +instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where + put_ bh (a,b,c) = do put_ bh a; put_ bh b; put_ bh c + get bh = do a <- get bh + b <- get bh + c <- get bh + return (a,b,c) + +instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where + put_ bh (a,b,c,d) = do put_ bh a; put_ bh b; put_ bh c; put_ bh d + get bh = do a <- get bh + b <- get bh + c <- get bh + d <- get bh + return (a,b,c,d) + +instance Binary a => Binary (Maybe a) where + put_ bh Nothing = putByte bh 0 + put_ bh (Just a) = do putByte bh 1; put_ bh a + get bh = do h <- getWord8 bh + case h of + 0 -> return Nothing + _ -> do x <- get bh; return (Just x) + +instance (Binary a, Binary b) => Binary (Either a b) where + put_ bh (Left a) = do putByte bh 0; put_ bh a + put_ bh (Right b) = do putByte bh 1; put_ bh b + get bh = do h <- getWord8 bh + case h of + 0 -> do a <- get bh ; return (Left a) + _ -> do b <- get bh ; return (Right b) + +#ifdef __GLASGOW_HASKELL__ +instance Binary Integer where + put_ bh (S# i#) = do putByte bh 0; put_ bh (I# i#) + put_ bh (J# s# a#) = do + p <- putByte bh 1; + put_ bh (I# s#) + let sz# = sizeofByteArray# a# -- in *bytes* + put_ bh (I# sz#) -- in *bytes* + putByteArray bh a# sz# + + get bh = do + b <- getByte bh + case b of + 0 -> do (I# i#) <- get bh + return (S# i#) + _ -> do (I# s#) <- get bh + sz <- get bh + (BA a#) <- getByteArray bh sz + return (J# s# a#) + +putByteArray :: BinHandle -> ByteArray# -> Int# -> IO () +putByteArray bh a s# = loop 0# + where loop n# + | n# ==# s# = return () + | otherwise = do + putByte bh (indexByteArray a n#) + loop (n# +# 1#) + +getByteArray :: BinHandle -> Int -> IO ByteArray +getByteArray bh (I# sz) = do + (MBA arr) <- newByteArray sz + let loop n + | n ==# sz = return () + | otherwise = do + w <- getByte bh + writeByteArray arr n w + loop (n +# 1#) + loop 0# + freezeByteArray arr + + +data ByteArray = BA ByteArray# +data MBA = MBA (MutableByteArray# RealWorld) + +newByteArray :: Int# -> IO MBA +newByteArray sz = IO $ \s -> + case newByteArray# sz s of { (# s, arr #) -> + (# s, MBA arr #) } + +freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray +freezeByteArray arr = IO $ \s -> + case unsafeFreezeByteArray# arr s of { (# s, arr #) -> + (# s, BA arr #) } + +writeByteArray :: MutableByteArray# RealWorld -> Int# -> Word8 -> IO () + +#if __GLASGOW_HASKELL__ < 503 +writeByteArray arr i w8 = IO $ \s -> + case word8ToWord w8 of { W# w# -> + case writeCharArray# arr i (chr# (word2Int# w#)) s of { s -> + (# s , () #) }} +#else +writeByteArray arr i (W8# w) = IO $ \s -> + case writeWord8Array# arr i w s of { s -> + (# s, () #) } +#endif + +#if __GLASGOW_HASKELL__ < 503 +indexByteArray a# n# = fromIntegral (I# (ord# (indexCharArray# a# n#))) +#else +indexByteArray a# n# = W8# (indexWord8Array# a# n#) +#endif + +instance (Integral a, Binary a) => Binary (Ratio a) where + put_ bh (a :% b) = do put_ bh a; put_ bh b + get bh = do a <- get bh; b <- get bh; return (a :% b) +#endif + +instance Binary (Bin a) where + put_ bh (BinPtr i) = put_ bh i + get bh = do i <- get bh; return (BinPtr i) + +-- ----------------------------------------------------------------------------- +-- Lazy reading/writing + +lazyPut :: Binary a => BinHandle -> a -> IO () +lazyPut bh a = do + -- output the obj with a ptr to skip over it: + pre_a <- tellBin bh + put_ bh pre_a -- save a slot for the ptr + put_ bh a -- dump the object + q <- tellBin bh -- q = ptr to after object + putAt bh pre_a q -- fill in slot before a with ptr to q + seekBin bh q -- finally carry on writing at q + +lazyGet :: Binary a => BinHandle -> IO a +lazyGet bh = do + p <- get bh -- a BinPtr + p_a <- tellBin bh + a <- unsafeInterleaveIO (getAt bh p_a) + seekBin bh p -- skip over the object for now + return a + +-- -------------------------------------------------------------- +-- Main wrappers: getBinFileWithDict, putBinFileWithDict +-- +-- This layer is built on top of the stuff above, +-- and should not know anything about BinHandles +-- -------------------------------------------------------------- + +initBinMemSize = (1024*1024) :: Int + +#if WORD_SIZE_IN_BITS == 32 +binaryInterfaceMagic = 0x1face :: Word32 +#elif WORD_SIZE_IN_BITS == 64 +binaryInterfaceMagic = 0x1face64 :: Word32 +#endif + +getBinFileWithDict :: Binary a => FilePath -> IO a +getBinFileWithDict file_path = do + bh <- Binary.readBinMem file_path + + -- Read the magic number to check that this really is a GHC .hi file + -- (This magic number does not change when we change + -- GHC interface file format) + magic <- get bh + when (magic /= binaryInterfaceMagic) $ + throwDyn (ProgramError ( + "magic number mismatch: old/corrupt interface file?")) + + -- Read the dictionary + -- The next word in the file is a pointer to where the dictionary is + -- (probably at the end of the file) + dict_p <- Binary.get bh -- Get the dictionary ptr + data_p <- tellBin bh -- Remember where we are now + seekBin bh dict_p + dict <- getDictionary bh + seekBin bh data_p -- Back to where we were before + + -- Initialise the user-data field of bh + let bh' = setUserData bh (initReadState dict) + + -- At last, get the thing + get bh' + +putBinFileWithDict :: Binary a => FilePath -> a -> IO () +putBinFileWithDict file_path the_thing = do + bh <- openBinMem initBinMemSize + put_ bh binaryInterfaceMagic + + -- Remember where the dictionary pointer will go + dict_p_p <- tellBin bh + put_ bh dict_p_p -- Placeholder for ptr to dictionary + + -- Make some intial state + usr_state <- newWriteState + + -- Put the main thing, + put_ (setUserData bh usr_state) the_thing + + -- Get the final-state + j <- readIORef (ud_next usr_state) + fm <- readIORef (ud_map usr_state) + dict_p <- tellBin bh -- This is where the dictionary will start + + -- Write the dictionary pointer at the fornt of the file + putAt bh dict_p_p dict_p -- Fill in the placeholder + seekBin bh dict_p -- Seek back to the end of the file + + -- Write the dictionary itself + putDictionary bh j (constructDictionary j fm) + + -- And send the result to the file + writeBinMem bh file_path + +-- ----------------------------------------------------------------------------- +-- UserData +-- ----------------------------------------------------------------------------- + +data UserData = + UserData { -- This field is used only when reading + ud_dict :: Dictionary, + + -- The next two fields are only used when writing + ud_next :: IORef Int, -- The next index to use + ud_map :: IORef (UniqFM (Int,FastString)) + } + +noUserData = error "Binary.UserData: no user data" + +initReadState :: Dictionary -> UserData +initReadState dict = UserData{ ud_dict = dict, + ud_next = undef "next", + ud_map = undef "map" } + +newWriteState :: IO UserData +newWriteState = do + j_r <- newIORef 0 + out_r <- newIORef emptyUFM + return (UserData { ud_dict = panic "dict", + ud_next = j_r, + ud_map = out_r }) + + +undef s = panic ("Binary.UserData: no " ++ s) + +--------------------------------------------------------- +-- The Dictionary +--------------------------------------------------------- + +type Dictionary = Array Int FastString -- The dictionary + -- Should be 0-indexed + +putDictionary :: BinHandle -> Int -> Dictionary -> IO () +putDictionary bh sz dict = do + put_ bh sz + mapM_ (putFS bh) (elems dict) + +getDictionary :: BinHandle -> IO Dictionary +getDictionary bh = do + sz <- get bh + elems <- sequence (take sz (repeat (getFS bh))) + return (listArray (0,sz-1) elems) + +constructDictionary :: Int -> UniqFM (Int,FastString) -> Dictionary +constructDictionary j fm = array (0,j-1) (eltsUFM fm) + +--------------------------------------------------------- +-- Reading and writing FastStrings +--------------------------------------------------------- + +putFS bh (FastString id l _ buf _) = do + put_ bh l + withForeignPtr buf $ \ptr -> + let + go n | n == l = return () + | otherwise = do + b <- peekElemOff ptr n + putByte bh b + go (n+1) + in + go 0 + +{- -- possible faster version, not quite there yet: +getFS bh@BinMem{} = do + (I# l) <- get bh + arr <- readIORef (arr_r bh) + off <- readFastMutInt (off_r bh) + return $! (mkFastSubStringBA# arr off l) +-} +getFS bh = do + l <- get bh + fp <- mallocForeignPtrBytes l + withForeignPtr fp $ \ptr -> do + let + go n | n == l = mkFastStringForeignPtr ptr fp l + | otherwise = do + b <- getByte bh + pokeElemOff ptr n b + go (n+1) + -- + go 0 + +#if __GLASGOW_HASKELL__ < 600 +mallocForeignPtrBytes :: Int -> IO (ForeignPtr a) +mallocForeignPtrBytes n = do + r <- mallocBytes n + newForeignPtr r (finalizerFree r) + +foreign import ccall unsafe "stdlib.h free" + finalizerFree :: Ptr a -> IO () +#endif + +instance Binary PackageId where + put_ bh pid = put_ bh (packageIdFS pid) + get bh = do { fs <- get bh; return (fsToPackageId fs) } + +instance Binary FastString where + put_ bh f@(FastString id l _ fp _) = + case getUserData bh of { + UserData { ud_next = j_r, ud_map = out_r, ud_dict = dict} -> do + out <- readIORef out_r + let uniq = getUnique f + case lookupUFM out uniq of + Just (j,f) -> put_ bh j + Nothing -> do + j <- readIORef j_r + put_ bh j + writeIORef j_r (j+1) + writeIORef out_r (addToUFM out uniq (j,f)) + } + + get bh = do + j <- get bh + return $! (ud_dict (getUserData bh) ! j) diff --git a/compiler/utils/BitSet.lhs b/compiler/utils/BitSet.lhs new file mode 100644 index 0000000000..a108136af3 --- /dev/null +++ b/compiler/utils/BitSet.lhs @@ -0,0 +1,205 @@ +% +% (c) The GRASP Project, Glasgow University, 1994-1998 +% +\section[BitSet]{An implementation of very small sets} + +Bit sets are a fast implementation of sets of integers ranging from 0 +to one less than the number of bits in a machine word (typically 31). +If any element exceeds the maximum value for a particular machine +architecture, the results of these operations are undefined. You have +been warned. If you put any safety checks in this code, I will have +to kill you. + +Note: the Yale Haskell implementation won't provide a full 32 bits. +However, if you can handle the performance loss, you could change to +Integer and get virtually unlimited sets. + +\begin{code} + +module BitSet ( + BitSet, -- abstract type + mkBS, listBS, emptyBS, unitBS, + unionBS, minusBS, intBS + ) where + +#include "HsVersions.h" + +#ifdef __GLASGOW_HASKELL__ +import GLAEXTS +-- nothing to import +#elif defined(__YALE_HASKELL__) +{-hide import from mkdependHS-} +import + LogOpPrims +#else +{-hide import from mkdependHS-} +import + Word +#endif + +#ifdef __GLASGOW_HASKELL__ + +data BitSet = MkBS Word# + +emptyBS :: BitSet +emptyBS = MkBS (int2Word# 0#) + +mkBS :: [Int] -> BitSet +mkBS xs = foldr (unionBS . unitBS) emptyBS xs + +unitBS :: Int -> BitSet +unitBS x = case x of +#if __GLASGOW_HASKELL__ >= 503 + I# i# -> MkBS ((int2Word# 1#) `uncheckedShiftL#` i#) +#else + I# i# -> MkBS ((int2Word# 1#) `shiftL#` i#) +#endif + +unionBS :: BitSet -> BitSet -> BitSet +unionBS (MkBS x#) (MkBS y#) = MkBS (x# `or#` y#) + +minusBS :: BitSet -> BitSet -> BitSet +minusBS (MkBS x#) (MkBS y#) = MkBS (x# `and#` (not# y#)) + +#if 0 +-- not used in GHC +isEmptyBS :: BitSet -> Bool +isEmptyBS (MkBS s#) + = case word2Int# s# of + 0# -> True + _ -> False + +intersectBS :: BitSet -> BitSet -> BitSet +intersectBS (MkBS x#) (MkBS y#) = MkBS (x# `and#` y#) + +elementBS :: Int -> BitSet -> Bool +elementBS x (MkBS s#) = case x of + I# i# -> case word2Int# (((int2Word# 1#) `shiftL#` i#) `and#` s#) of + 0# -> False + _ -> True +#endif + +listBS :: BitSet -> [Int] +listBS s = listify s 0 + where listify (MkBS s#) n = + case word2Int# s# of + 0# -> [] + _ -> let s' = (MkBS (s# `shiftr` 1#)) + more = listify s' (n + 1) + in case word2Int# (s# `and#` (int2Word# 1#)) of + 0# -> more + _ -> n : more +#if __GLASGOW_HASKELL__ >= 503 + shiftr x y = uncheckedShiftRL# x y +#else + shiftr x y = shiftRL# x y +#endif + +-- intBS is a bit naughty. +intBS :: BitSet -> Int +intBS (MkBS w#) = I# (word2Int# w#) + +#elif defined(__YALE_HASKELL__) + +data BitSet = MkBS Int + +emptyBS :: BitSet +emptyBS = MkBS 0 + +mkBS :: [Int] -> BitSet +mkBS xs = foldr (unionBS . unitBS) emptyBS xs + +unitBS :: Int -> BitSet +unitBS x = MkBS (1 `ashInt` x) + +unionBS :: BitSet -> BitSet -> BitSet +unionBS (MkBS x) (MkBS y) = MkBS (x `logiorInt` y) + +#if 0 +-- not used in GHC +isEmptyBS :: BitSet -> Bool +isEmptyBS (MkBS s) + = case s of + 0 -> True + _ -> False + +intersectBS :: BitSet -> BitSet -> BitSet +intersectBS (MkBS x) (MkBS y) = MkBS (x `logandInt` y) + +elementBS :: Int -> BitSet -> Bool +elementBS x (MkBS s) + = case logbitpInt x s of + 0 -> False + _ -> True +#endif + +minusBS :: BitSet -> BitSet -> BitSet +minusBS (MkBS x) (MkBS y) = MkBS (x `logandc2Int` y) + +-- rewritten to avoid right shifts (which would give nonsense on negative +-- values. +listBS :: BitSet -> [Int] +listBS (MkBS s) = listify s 0 1 + where listify s n m = + case s of + 0 -> [] + _ -> let n' = n+1; m' = m+m in + case logbitpInt s m of + 0 -> listify s n' m' + _ -> n : listify (s `logandc2Int` m) n' m' + +#else /* HBC, perhaps? */ + +data BitSet = MkBS Word + +emptyBS :: BitSet +emptyBS = MkBS 0 + +mkBS :: [Int] -> BitSet +mkBS xs = foldr (unionBS . unitBS) emptyBS xs + +unitBS :: Int -> BitSet +unitBS x = MkBS (1 `bitLsh` x) + +unionBS :: BitSet -> BitSet -> BitSet +unionBS (MkBS x) (MkBS y) = MkBS (x `bitOr` y) + +#if 0 +-- not used in GHC +isEmptyBS :: BitSet -> Bool +isEmptyBS (MkBS s) + = case s of + 0 -> True + _ -> False + +intersectBS :: BitSet -> BitSet -> BitSet +intersectBS (MkBS x) (MkBS y) = MkBS (x `bitAnd` y) + +elementBS :: Int -> BitSet -> Bool +elementBS x (MkBS s) + = case (1 `bitLsh` x) `bitAnd` s of + 0 -> False + _ -> True +#endif + +minusBS :: BitSet -> BitSet -> BitSet +minusBS (MkBS x) (MkBS y) = MkBS (x `bitAnd` (bitCompl y)) + +listBS :: BitSet -> [Int] +listBS (MkBS s) = listify s 0 + where listify s n = + case s of + 0 -> [] + _ -> let s' = s `bitRsh` 1 + more = listify s' (n + 1) + in case (s `bitAnd` 1) of + 0 -> more + _ -> n : more + +#endif + +\end{code} + + + + diff --git a/compiler/utils/BufWrite.hs b/compiler/utils/BufWrite.hs new file mode 100644 index 0000000000..a03db3d084 --- /dev/null +++ b/compiler/utils/BufWrite.hs @@ -0,0 +1,124 @@ +----------------------------------------------------------------------------- +-- +-- Fast write-buffered Handles +-- +-- (c) The University of Glasgow 2005 +-- +-- This is a simple abstraction over Handles that offers very fast write +-- buffering, but without the thread safety that Handles provide. It's used +-- to save time in Pretty.printDoc. +-- +----------------------------------------------------------------------------- + +module BufWrite ( + BufHandle(..), + newBufHandle, + bPutChar, + bPutStr, + bPutFS, + bPutLitString, + bFlush, + ) where + +#include "HsVersions.h" + +import FastString +import FastMutInt +import Panic ( panic ) + +import Monad ( when ) +import Char ( ord ) +import Foreign +import IO + +import GHC.IOBase ( IO(..) ) +import System.IO ( hPutBuf ) +import GHC.Ptr ( Ptr(..) ) + +import GLAEXTS ( Int(..), Int#, Addr# ) + +-- ----------------------------------------------------------------------------- + +data BufHandle = BufHandle {-#UNPACK#-}!(Ptr Word8) + {-#UNPACK#-}!FastMutInt + Handle + +newBufHandle :: Handle -> IO BufHandle +newBufHandle hdl = do + ptr <- mallocBytes buf_size + r <- newFastMutInt + writeFastMutInt r 0 + return (BufHandle ptr r hdl) + +buf_size = 8192 :: Int + +#define STRICT2(f) f a b | a `seq` b `seq` False = undefined +#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined + +bPutChar :: BufHandle -> Char -> IO () +STRICT2(bPutChar) +bPutChar b@(BufHandle buf r hdl) c = do + i <- readFastMutInt r + if (i >= buf_size) + then do hPutBuf hdl buf buf_size + writeFastMutInt r 0 + bPutChar b c + else do pokeElemOff buf i (fromIntegral (ord c) :: Word8) + writeFastMutInt r (i+1) + +bPutStr :: BufHandle -> String -> IO () +STRICT2(bPutStr) +bPutStr b@(BufHandle buf r hdl) str = do + i <- readFastMutInt r + loop str i + where loop _ i | i `seq` False = undefined + loop "" i = do writeFastMutInt r i; return () + loop (c:cs) i + | i >= buf_size = do + hPutBuf hdl buf buf_size + loop (c:cs) 0 + | otherwise = do + pokeElemOff buf i (fromIntegral (ord c)) + loop cs (i+1) + +bPutFS :: BufHandle -> FastString -> IO () +bPutFS b@(BufHandle buf r hdl) fs@(FastString _ len _ fp _) = + withForeignPtr fp $ \ptr -> do + i <- readFastMutInt r + if (i + len) >= buf_size + then do hPutBuf hdl buf i + writeFastMutInt r 0 + if (len >= buf_size) + then hPutBuf hdl ptr len + else bPutFS b fs + else do + copyBytes (buf `plusPtr` i) ptr len + writeFastMutInt r (i+len) + +bPutLitString :: BufHandle -> Addr# -> Int# -> IO () +bPutLitString b@(BufHandle buf r hdl) a# len# = do + let len = I# len# + i <- readFastMutInt r + if (i+len) >= buf_size + then do hPutBuf hdl buf i + writeFastMutInt r 0 + if (len >= buf_size) + then hPutBuf hdl (Ptr a#) len + else bPutLitString b a# len# + else do + copyBytes (buf `plusPtr` i) (Ptr a#) len + writeFastMutInt r (i+len) + +bFlush :: BufHandle -> IO () +bFlush b@(BufHandle buf r hdl) = do + i <- readFastMutInt r + when (i > 0) $ hPutBuf hdl buf i + free buf + return () + +#if 0 +myPutBuf s hdl buf i = + modifyIOError (\e -> ioeSetErrorString e (ioeGetErrorString e ++ ':':s ++ " (" ++ show buf ++ "," ++ show i ++ ")")) $ + + hPutBuf hdl buf i +#endif diff --git a/compiler/utils/Digraph.lhs b/compiler/utils/Digraph.lhs new file mode 100644 index 0000000000..c49087c8f3 --- /dev/null +++ b/compiler/utils/Digraph.lhs @@ -0,0 +1,426 @@ +\begin{code} +module Digraph( + + -- At present the only one with a "nice" external interface + stronglyConnComp, stronglyConnCompR, SCC(..), flattenSCC, flattenSCCs, + + Graph, Vertex, + graphFromEdges, graphFromEdges', + buildG, transposeG, reverseE, outdegree, indegree, + + Tree(..), Forest, + showTree, showForest, + + dfs, dff, + topSort, + components, + scc, + back, cross, forward, + reachable, path, + bcc + + ) where + +# include "HsVersions.h" + +------------------------------------------------------------------------------ +-- A version of the graph algorithms described in: +-- +-- ``Lazy Depth-First Search and Linear Graph Algorithms in Haskell'' +-- by David King and John Launchbury +-- +-- Also included is some additional code for printing tree structures ... +------------------------------------------------------------------------------ + + +import Util ( sortLe ) + +-- Extensions +import MONAD_ST + +-- std interfaces +import Maybe +import Array +import List +import Outputable + +#if __GLASGOW_HASKELL__ >= 504 +import Data.Array.ST hiding ( indices, bounds ) +#else +import ST +#endif +\end{code} + + +%************************************************************************ +%* * +%* External interface +%* * +%************************************************************************ + +\begin{code} +data SCC vertex = AcyclicSCC vertex + | CyclicSCC [vertex] + +flattenSCCs :: [SCC a] -> [a] +flattenSCCs = concatMap flattenSCC + +flattenSCC (AcyclicSCC v) = [v] +flattenSCC (CyclicSCC vs) = vs + +instance Outputable a => Outputable (SCC a) where + ppr (AcyclicSCC v) = text "NONREC" $$ (nest 3 (ppr v)) + ppr (CyclicSCC vs) = text "REC" $$ (nest 3 (vcat (map ppr vs))) +\end{code} + +\begin{code} +stronglyConnComp + :: Ord key + => [(node, key, [key])] -- The graph; its ok for the + -- out-list to contain keys which arent + -- a vertex key, they are ignored + -> [SCC node] -- Returned in topologically sorted order + -- Later components depend on earlier ones, but not vice versa + +stronglyConnComp edges + = map get_node (stronglyConnCompR edges) + where + get_node (AcyclicSCC (n, _, _)) = AcyclicSCC n + get_node (CyclicSCC triples) = CyclicSCC [n | (n,_,_) <- triples] + +-- The "R" interface is used when you expect to apply SCC to +-- the (some of) the result of SCC, so you dont want to lose the dependency info +stronglyConnCompR + :: Ord key + => [(node, key, [key])] -- The graph; its ok for the + -- out-list to contain keys which arent + -- a vertex key, they are ignored + -> [SCC (node, key, [key])] -- Topologically sorted + +stronglyConnCompR [] = [] -- added to avoid creating empty array in graphFromEdges -- SOF +stronglyConnCompR edges + = map decode forest + where + (graph, vertex_fn) = _scc_ "graphFromEdges" graphFromEdges edges + forest = _scc_ "Digraph.scc" scc graph + decode (Node v []) | mentions_itself v = CyclicSCC [vertex_fn v] + | otherwise = AcyclicSCC (vertex_fn v) + decode other = CyclicSCC (dec other []) + where + dec (Node v ts) vs = vertex_fn v : foldr dec vs ts + mentions_itself v = v `elem` (graph ! v) +\end{code} + +%************************************************************************ +%* * +%* Graphs +%* * +%************************************************************************ + + +\begin{code} +type Vertex = Int +type Table a = Array Vertex a +type Graph = Table [Vertex] +type Bounds = (Vertex, Vertex) +type Edge = (Vertex, Vertex) +\end{code} + +\begin{code} +vertices :: Graph -> [Vertex] +vertices = indices + +edges :: Graph -> [Edge] +edges g = [ (v, w) | v <- vertices g, w <- g!v ] + +mapT :: (Vertex -> a -> b) -> Table a -> Table b +mapT f t = array (bounds t) [ (,) v (f v (t!v)) | v <- indices t ] + +buildG :: Bounds -> [Edge] -> Graph +buildG bounds edges = accumArray (flip (:)) [] bounds edges + +transposeG :: Graph -> Graph +transposeG g = buildG (bounds g) (reverseE g) + +reverseE :: Graph -> [Edge] +reverseE g = [ (w, v) | (v, w) <- edges g ] + +outdegree :: Graph -> Table Int +outdegree = mapT numEdges + where numEdges v ws = length ws + +indegree :: Graph -> Table Int +indegree = outdegree . transposeG +\end{code} + + +\begin{code} +graphFromEdges + :: Ord key + => [(node, key, [key])] + -> (Graph, Vertex -> (node, key, [key])) +graphFromEdges edges = + case graphFromEdges' edges of (graph, vertex_fn, _) -> (graph, vertex_fn) + +graphFromEdges' + :: Ord key + => [(node, key, [key])] + -> (Graph, Vertex -> (node, key, [key]), key -> Maybe Vertex) +graphFromEdges' edges + = (graph, \v -> vertex_map ! v, key_vertex) + where + max_v = length edges - 1 + bounds = (0,max_v) :: (Vertex, Vertex) + sorted_edges = let + (_,k1,_) `le` (_,k2,_) = case k1 `compare` k2 of { GT -> False; other -> True } + in + sortLe le edges + edges1 = zipWith (,) [0..] sorted_edges + + graph = array bounds [(,) v (mapMaybe key_vertex ks) | (,) v (_, _, ks) <- edges1] + key_map = array bounds [(,) v k | (,) v (_, k, _ ) <- edges1] + vertex_map = array bounds edges1 + + + -- key_vertex :: key -> Maybe Vertex + -- returns Nothing for non-interesting vertices + key_vertex k = find 0 max_v + where + find a b | a > b + = Nothing + find a b = case compare k (key_map ! mid) of + LT -> find a (mid-1) + EQ -> Just mid + GT -> find (mid+1) b + where + mid = (a + b) `div` 2 +\end{code} + +%************************************************************************ +%* * +%* Trees and forests +%* * +%************************************************************************ + +\begin{code} +data Tree a = Node a (Forest a) +type Forest a = [Tree a] + +mapTree :: (a -> b) -> (Tree a -> Tree b) +mapTree f (Node x ts) = Node (f x) (map (mapTree f) ts) +\end{code} + +\begin{code} +instance Show a => Show (Tree a) where + showsPrec p t s = showTree t ++ s + +showTree :: Show a => Tree a -> String +showTree = drawTree . mapTree show + +showForest :: Show a => Forest a -> String +showForest = unlines . map showTree + +drawTree :: Tree String -> String +drawTree = unlines . draw + +draw (Node x ts) = grp this (space (length this)) (stLoop ts) + where this = s1 ++ x ++ " " + + space n = replicate n ' ' + + stLoop [] = [""] + stLoop [t] = grp s2 " " (draw t) + stLoop (t:ts) = grp s3 s4 (draw t) ++ [s4] ++ rsLoop ts + + rsLoop [t] = grp s5 " " (draw t) + rsLoop (t:ts) = grp s6 s4 (draw t) ++ [s4] ++ rsLoop ts + + grp fst rst = zipWith (++) (fst:repeat rst) + + [s1,s2,s3,s4,s5,s6] = ["- ", "--", "-+", " |", " `", " +"] +\end{code} + + +%************************************************************************ +%* * +%* Depth first search +%* * +%************************************************************************ + +\begin{code} +#if __GLASGOW_HASKELL__ >= 504 +newSTArray :: Ix i => (i,i) -> e -> ST s (STArray s i e) +newSTArray = newArray + +readSTArray :: Ix i => STArray s i e -> i -> ST s e +readSTArray = readArray + +writeSTArray :: Ix i => STArray s i e -> i -> e -> ST s () +writeSTArray = writeArray +#endif + +type Set s = STArray s Vertex Bool + +mkEmpty :: Bounds -> ST s (Set s) +mkEmpty bnds = newSTArray bnds False + +contains :: Set s -> Vertex -> ST s Bool +contains m v = readSTArray m v + +include :: Set s -> Vertex -> ST s () +include m v = writeSTArray m v True +\end{code} + +\begin{code} +dff :: Graph -> Forest Vertex +dff g = dfs g (vertices g) + +dfs :: Graph -> [Vertex] -> Forest Vertex +dfs g vs = prune (bounds g) (map (generate g) vs) + +generate :: Graph -> Vertex -> Tree Vertex +generate g v = Node v (map (generate g) (g!v)) + +prune :: Bounds -> Forest Vertex -> Forest Vertex +prune bnds ts = runST (mkEmpty bnds >>= \m -> + chop m ts) + +chop :: Set s -> Forest Vertex -> ST s (Forest Vertex) +chop m [] = return [] +chop m (Node v ts : us) + = contains m v >>= \visited -> + if visited then + chop m us + else + include m v >>= \_ -> + chop m ts >>= \as -> + chop m us >>= \bs -> + return (Node v as : bs) +\end{code} + + +%************************************************************************ +%* * +%* Algorithms +%* * +%************************************************************************ + +------------------------------------------------------------ +-- Algorithm 1: depth first search numbering +------------------------------------------------------------ + +\begin{code} +--preorder :: Tree a -> [a] +preorder (Node a ts) = a : preorderF ts + +preorderF :: Forest a -> [a] +preorderF ts = concat (map preorder ts) + +tabulate :: Bounds -> [Vertex] -> Table Int +tabulate bnds vs = array bnds (zipWith (,) vs [1..]) + +preArr :: Bounds -> Forest Vertex -> Table Int +preArr bnds = tabulate bnds . preorderF +\end{code} + + +------------------------------------------------------------ +-- Algorithm 2: topological sorting +------------------------------------------------------------ + +\begin{code} +--postorder :: Tree a -> [a] +postorder (Node a ts) = postorderF ts ++ [a] + +postorderF :: Forest a -> [a] +postorderF ts = concat (map postorder ts) + +postOrd :: Graph -> [Vertex] +postOrd = postorderF . dff + +topSort :: Graph -> [Vertex] +topSort = reverse . postOrd +\end{code} + + +------------------------------------------------------------ +-- Algorithm 3: connected components +------------------------------------------------------------ + +\begin{code} +components :: Graph -> Forest Vertex +components = dff . undirected + +undirected :: Graph -> Graph +undirected g = buildG (bounds g) (edges g ++ reverseE g) +\end{code} + + +-- Algorithm 4: strongly connected components + +\begin{code} +scc :: Graph -> Forest Vertex +scc g = dfs g (reverse (postOrd (transposeG g))) +\end{code} + + +------------------------------------------------------------ +-- Algorithm 5: Classifying edges +------------------------------------------------------------ + +\begin{code} +back :: Graph -> Table Int -> Graph +back g post = mapT select g + where select v ws = [ w | w <- ws, post!v < post!w ] + +cross :: Graph -> Table Int -> Table Int -> Graph +cross g pre post = mapT select g + where select v ws = [ w | w <- ws, post!v > post!w, pre!v > pre!w ] + +forward :: Graph -> Graph -> Table Int -> Graph +forward g tree pre = mapT select g + where select v ws = [ w | w <- ws, pre!v < pre!w ] \\ tree!v +\end{code} + + +------------------------------------------------------------ +-- Algorithm 6: Finding reachable vertices +------------------------------------------------------------ + +\begin{code} +reachable :: Graph -> Vertex -> [Vertex] +reachable g v = preorderF (dfs g [v]) + +path :: Graph -> Vertex -> Vertex -> Bool +path g v w = w `elem` (reachable g v) +\end{code} + + +------------------------------------------------------------ +-- Algorithm 7: Biconnected components +------------------------------------------------------------ + +\begin{code} +bcc :: Graph -> Forest [Vertex] +bcc g = (concat . map bicomps . map (do_label g dnum)) forest + where forest = dff g + dnum = preArr (bounds g) forest + +do_label :: Graph -> Table Int -> Tree Vertex -> Tree (Vertex,Int,Int) +do_label g dnum (Node v ts) = Node (v,dnum!v,lv) us + where us = map (do_label g dnum) ts + lv = minimum ([dnum!v] ++ [dnum!w | w <- g!v] + ++ [lu | Node (u,du,lu) xs <- us]) + +bicomps :: Tree (Vertex,Int,Int) -> Forest [Vertex] +bicomps (Node (v,dv,lv) ts) + = [ Node (v:vs) us | (l,Node vs us) <- map collect ts] + +collect :: Tree (Vertex,Int,Int) -> (Int, Tree [Vertex]) +collect (Node (v,dv,lv) ts) = (lv, Node (v:vs) cs) + where collected = map collect ts + vs = concat [ ws | (lw, Node ws us) <- collected, lw<dv] + cs = concat [ if lw<dv then us else [Node (v:ws) us] + | (lw, Node ws us) <- collected ] +\end{code} + diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs new file mode 100644 index 0000000000..152bf3c60e --- /dev/null +++ b/compiler/utils/Encoding.hs @@ -0,0 +1,373 @@ +-- ----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow, 1997-2006 +-- +-- Character encodings +-- +-- ----------------------------------------------------------------------------- + +module Encoding ( + -- * UTF-8 + utf8DecodeChar#, + utf8PrevChar, + utf8CharStart, + utf8DecodeChar, + utf8DecodeString, + utf8EncodeChar, + utf8EncodeString, + utf8EncodedLength, + countUTF8Chars, + + -- * Z-encoding + zEncodeString, + zDecodeString + ) where + +#define COMPILING_FAST_STRING +#include "HsVersions.h" +import Foreign +import Data.Char ( ord, chr, isDigit, digitToInt, isHexDigit ) +import Numeric ( showHex ) + +import Data.Bits +import GHC.Ptr ( Ptr(..) ) +import GHC.Base + +-- ----------------------------------------------------------------------------- +-- UTF-8 + +-- We can't write the decoder as efficiently as we'd like without +-- resorting to unboxed extensions, unfortunately. I tried to write +-- an IO version of this function, but GHC can't eliminate boxed +-- results from an IO-returning function. +-- +-- We assume we can ignore overflow when parsing a multibyte character here. +-- To make this safe, we add extra sentinel bytes to unparsed UTF-8 sequences +-- before decoding them (see StringBuffer.hs). + +{-# INLINE utf8DecodeChar# #-} +utf8DecodeChar# :: Addr# -> (# Char#, Addr# #) +utf8DecodeChar# a# = + let ch0 = word2Int# (indexWord8OffAddr# a# 0#) in + case () of + _ | ch0 <=# 0x7F# -> (# chr# ch0, a# `plusAddr#` 1# #) + + | ch0 >=# 0xC0# && ch0 <=# 0xDF# -> + let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in + if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else + (# chr# (((ch0 -# 0xC0#) `uncheckedIShiftL#` 6#) +# + (ch1 -# 0x80#)), + a# `plusAddr#` 2# #) + + | ch0 >=# 0xE0# && ch0 <=# 0xEF# -> + let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in + if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else + let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in + if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else + (# chr# (((ch0 -# 0xE0#) `uncheckedIShiftL#` 12#) +# + ((ch1 -# 0x80#) `uncheckedIShiftL#` 6#) +# + (ch2 -# 0x80#)), + a# `plusAddr#` 3# #) + + | ch0 >=# 0xF0# && ch0 <=# 0xF8# -> + let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in + if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else + let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in + if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else + let ch3 = word2Int# (indexWord8OffAddr# a# 3#) in + if ch3 <# 0x80# || ch3 >=# 0xC0# then fail 3# else + (# chr# (((ch0 -# 0xF0#) `uncheckedIShiftL#` 18#) +# + ((ch1 -# 0x80#) `uncheckedIShiftL#` 12#) +# + ((ch2 -# 0x80#) `uncheckedIShiftL#` 6#) +# + (ch3 -# 0x80#)), + a# `plusAddr#` 4# #) + + | otherwise -> fail 1# + where + -- all invalid sequences end up here: + fail n = (# '\0'#, a# `plusAddr#` n #) + -- '\xFFFD' would be the usual replacement character, but + -- that's a valid symbol in Haskell, so will result in a + -- confusing parse error later on. Instead we use '\0' which + -- will signal a lexer error immediately. + +utf8DecodeChar :: Ptr Word8 -> (Char, Ptr Word8) +utf8DecodeChar (Ptr a#) = + case utf8DecodeChar# a# of (# c#, b# #) -> ( C# c#, Ptr b# ) + +-- UTF-8 is cleverly designed so that we can always figure out where +-- the start of the current character is, given any position in a +-- stream. This function finds the start of the previous character, +-- assuming there *is* a previous character. +utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8) +utf8PrevChar p = utf8CharStart (p `plusPtr` (-1)) + +utf8CharStart :: Ptr Word8 -> IO (Ptr Word8) +utf8CharStart p = go p + where go p = do w <- peek p + if w >= 0x80 && w < 0xC0 + then go (p `plusPtr` (-1)) + else return p + +utf8DecodeString :: Ptr Word8 -> Int -> IO [Char] +STRICT2(utf8DecodeString) +utf8DecodeString (Ptr a#) (I# len#) + = unpack a# + where + end# = addr2Int# (a# `plusAddr#` len#) + + unpack p# + | addr2Int# p# >=# end# = return [] + | otherwise = + case utf8DecodeChar# p# of + (# c#, q# #) -> do + chs <- unpack q# + return (C# c# : chs) + +countUTF8Chars :: Ptr Word8 -> Int -> IO Int +countUTF8Chars ptr bytes = go ptr 0 + where + end = ptr `plusPtr` bytes + + STRICT2(go) + go ptr n + | ptr >= end = return n + | otherwise = do + case utf8DecodeChar# (unPtr ptr) of + (# c, a #) -> go (Ptr a) (n+1) + +unPtr (Ptr a) = a + +utf8EncodeChar c ptr = + let x = ord c in + case () of + _ | x > 0 && x <= 0x007f -> do + poke ptr (fromIntegral x) + return (ptr `plusPtr` 1) + -- NB. '\0' is encoded as '\xC0\x80', not '\0'. This is so that we + -- can have 0-terminated UTF-8 strings (see GHC.Base.unpackCStringUtf8). + | x <= 0x07ff -> do + poke ptr (fromIntegral (0xC0 .|. ((x `shiftR` 6) .&. 0x1F))) + pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x .&. 0x3F))) + return (ptr `plusPtr` 2) + | x <= 0xffff -> do + poke ptr (fromIntegral (0xE0 .|. (x `shiftR` 12) .&. 0x0F)) + pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x `shiftR` 6) .&. 0x3F)) + pokeElemOff ptr 2 (fromIntegral (0x80 .|. (x .&. 0x3F))) + return (ptr `plusPtr` 3) + | otherwise -> do + poke ptr (fromIntegral (0xF0 .|. (x `shiftR` 18))) + pokeElemOff ptr 1 (fromIntegral (0x80 .|. ((x `shiftR` 12) .&. 0x3F))) + pokeElemOff ptr 2 (fromIntegral (0x80 .|. ((x `shiftR` 6) .&. 0x3F))) + pokeElemOff ptr 3 (fromIntegral (0x80 .|. (x .&. 0x3F))) + return (ptr `plusPtr` 4) + +utf8EncodeString :: Ptr Word8 -> String -> IO () +utf8EncodeString ptr str = go ptr str + where STRICT2(go) + go ptr [] = return () + go ptr (c:cs) = do + ptr' <- utf8EncodeChar c ptr + go ptr' cs + +utf8EncodedLength :: String -> Int +utf8EncodedLength str = go 0 str + where STRICT2(go) + go n [] = n + go n (c:cs) + | ord c > 0 && ord c <= 0x007f = go (n+1) cs + | ord c <= 0x07ff = go (n+2) cs + | ord c <= 0xffff = go (n+3) cs + | otherwise = go (n+4) cs + +-- ----------------------------------------------------------------------------- +-- The Z-encoding + +{- +This is the main name-encoding and decoding function. It encodes any +string into a string that is acceptable as a C name. This is done +right before we emit a symbol name into the compiled C or asm code. +Z-encoding of strings is cached in the FastString interface, so we +never encode the same string more than once. + +The basic encoding scheme is this. + +* Tuples (,,,) are coded as Z3T + +* Alphabetic characters (upper and lower) and digits + all translate to themselves; + except 'Z', which translates to 'ZZ' + and 'z', which translates to 'zz' + We need both so that we can preserve the variable/tycon distinction + +* Most other printable characters translate to 'zx' or 'Zx' for some + alphabetic character x + +* The others translate as 'znnnU' where 'nnn' is the decimal number + of the character + + Before After + -------------------------- + Trak Trak + foo_wib foozuwib + > zg + >1 zg1 + foo# foozh + foo## foozhzh + foo##1 foozhzh1 + fooZ fooZZ + :+ ZCzp + () Z0T 0-tuple + (,,,,) Z5T 5-tuple + (# #) Z1H unboxed 1-tuple (note the space) + (#,,,,#) Z5H unboxed 5-tuple + (NB: There is no Z1T nor Z0H.) +-} + +type UserString = String -- As the user typed it +type EncodedString = String -- Encoded form + + +zEncodeString :: UserString -> EncodedString +zEncodeString cs = case maybe_tuple cs of + Just n -> n -- Tuples go to Z2T etc + Nothing -> go cs + where + go [] = [] + go (c:cs) = encode_ch c ++ go cs + +unencodedChar :: Char -> Bool -- True for chars that don't need encoding +unencodedChar 'Z' = False +unencodedChar 'z' = False +unencodedChar c = c >= 'a' && c <= 'z' + || c >= 'A' && c <= 'Z' + || c >= '0' && c <= '9' + +encode_ch :: Char -> EncodedString +encode_ch c | unencodedChar c = [c] -- Common case first + +-- Constructors +encode_ch '(' = "ZL" -- Needed for things like (,), and (->) +encode_ch ')' = "ZR" -- For symmetry with ( +encode_ch '[' = "ZM" +encode_ch ']' = "ZN" +encode_ch ':' = "ZC" +encode_ch 'Z' = "ZZ" + +-- Variables +encode_ch 'z' = "zz" +encode_ch '&' = "za" +encode_ch '|' = "zb" +encode_ch '^' = "zc" +encode_ch '$' = "zd" +encode_ch '=' = "ze" +encode_ch '>' = "zg" +encode_ch '#' = "zh" +encode_ch '.' = "zi" +encode_ch '<' = "zl" +encode_ch '-' = "zm" +encode_ch '!' = "zn" +encode_ch '+' = "zp" +encode_ch '\'' = "zq" +encode_ch '\\' = "zr" +encode_ch '/' = "zs" +encode_ch '*' = "zt" +encode_ch '_' = "zu" +encode_ch '%' = "zv" +encode_ch c = 'z' : if isDigit (head hex_str) then hex_str + else '0':hex_str + where hex_str = showHex (ord c) "U" + -- ToDo: we could improve the encoding here in various ways. + -- eg. strings of unicode characters come out as 'z1234Uz5678U', we + -- could remove the 'U' in the middle (the 'z' works as a separator). + +zDecodeString :: EncodedString -> UserString +zDecodeString [] = [] +zDecodeString ('Z' : d : rest) + | isDigit d = decode_tuple d rest + | otherwise = decode_upper d : zDecodeString rest +zDecodeString ('z' : d : rest) + | isDigit d = decode_num_esc d rest + | otherwise = decode_lower d : zDecodeString rest +zDecodeString (c : rest) = c : zDecodeString rest + +decode_upper, decode_lower :: Char -> Char + +decode_upper 'L' = '(' +decode_upper 'R' = ')' +decode_upper 'M' = '[' +decode_upper 'N' = ']' +decode_upper 'C' = ':' +decode_upper 'Z' = 'Z' +decode_upper ch = {-pprTrace "decode_upper" (char ch)-} ch + +decode_lower 'z' = 'z' +decode_lower 'a' = '&' +decode_lower 'b' = '|' +decode_lower 'c' = '^' +decode_lower 'd' = '$' +decode_lower 'e' = '=' +decode_lower 'g' = '>' +decode_lower 'h' = '#' +decode_lower 'i' = '.' +decode_lower 'l' = '<' +decode_lower 'm' = '-' +decode_lower 'n' = '!' +decode_lower 'p' = '+' +decode_lower 'q' = '\'' +decode_lower 'r' = '\\' +decode_lower 's' = '/' +decode_lower 't' = '*' +decode_lower 'u' = '_' +decode_lower 'v' = '%' +decode_lower ch = {-pprTrace "decode_lower" (char ch)-} ch + +-- Characters not having a specific code are coded as z224U (in hex) +decode_num_esc d rest + = go (digitToInt d) rest + where + go n (c : rest) | isHexDigit c = go (16*n + digitToInt c) rest + go n ('U' : rest) = chr n : zDecodeString rest + go n other = error ("decode_num_esc: " ++ show n ++ ' ':other) + +decode_tuple :: Char -> EncodedString -> UserString +decode_tuple d rest + = go (digitToInt d) rest + where + -- NB. recurse back to zDecodeString after decoding the tuple, because + -- the tuple might be embedded in a longer name. + go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest + go 0 ('T':rest) = "()" ++ zDecodeString rest + go n ('T':rest) = '(' : replicate (n-1) ',' ++ ")" ++ zDecodeString rest + go 1 ('H':rest) = "(# #)" ++ zDecodeString rest + go n ('H':rest) = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ zDecodeString rest + go n other = error ("decode_tuple: " ++ show n ++ ' ':other) + +{- +Tuples are encoded as + Z3T or Z3H +for 3-tuples or unboxed 3-tuples respectively. No other encoding starts + Z<digit> + +* "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple) + There are no unboxed 0-tuples. + +* "()" is the tycon for a boxed 0-tuple. + There are no boxed 1-tuples. +-} + +maybe_tuple :: UserString -> Maybe EncodedString + +maybe_tuple "(# #)" = Just("Z1H") +maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of + (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H") + other -> Nothing +maybe_tuple "()" = Just("Z0T") +maybe_tuple ('(' : cs) = case count_commas (0::Int) cs of + (n, ')' : cs) -> Just ('Z' : shows (n+1) "T") + other -> Nothing +maybe_tuple other = Nothing + +count_commas :: Int -> String -> (Int, String) +count_commas n (',' : cs) = count_commas (n+1) cs +count_commas n cs = (n,cs) diff --git a/compiler/utils/FastMutInt.lhs b/compiler/utils/FastMutInt.lhs new file mode 100644 index 0000000000..b483a1428e --- /dev/null +++ b/compiler/utils/FastMutInt.lhs @@ -0,0 +1,54 @@ +{-# OPTIONS -cpp #-} +-- +-- (c) The University of Glasgow 2002 +-- +-- Unboxed mutable Ints + +\begin{code} +module FastMutInt( + FastMutInt, newFastMutInt, + readFastMutInt, writeFastMutInt + ) where + +#include "MachDeps.h" + +#ifndef SIZEOF_HSINT +#define SIZEOF_HSINT INT_SIZE_IN_BYTES +#endif + + +#if __GLASGOW_HASKELL__ < 503 +import GlaExts +import PrelIOBase +#else +import GHC.Base +import GHC.IOBase +#endif + +#if __GLASGOW_HASKELL__ < 411 +newByteArray# = newCharArray# +#endif +\end{code} + +\begin{code} +#ifdef __GLASGOW_HASKELL__ +data FastMutInt = FastMutInt (MutableByteArray# RealWorld) + +newFastMutInt :: IO FastMutInt +newFastMutInt = IO $ \s -> + case newByteArray# size s of { (# s, arr #) -> + (# s, FastMutInt arr #) } + where I# size = SIZEOF_HSINT + +readFastMutInt :: FastMutInt -> IO Int +readFastMutInt (FastMutInt arr) = IO $ \s -> + case readIntArray# arr 0# s of { (# s, i #) -> + (# s, I# i #) } + +writeFastMutInt :: FastMutInt -> Int -> IO () +writeFastMutInt (FastMutInt arr) (I# i) = IO $ \s -> + case writeIntArray# arr 0# i s of { s -> + (# s, () #) } +\end{code} +#endif + diff --git a/compiler/utils/FastString.lhs b/compiler/utils/FastString.lhs new file mode 100644 index 0000000000..ea307799c4 --- /dev/null +++ b/compiler/utils/FastString.lhs @@ -0,0 +1,499 @@ +% +% (c) The University of Glasgow, 1997-2006 +% +\begin{code} +{- +FastString: A compact, hash-consed, representation of character strings. + Comparison is O(1), and you can get a Unique from them. + Generated by the FSLIT macro + Turn into SDoc with Outputable.ftext + +LitString: Just a wrapper for the Addr# of a C string (Ptr CChar). + Practically no operations + Outputing them is fast + Generated by the SLIT macro + Turn into SDoc with Outputable.ptext + +Use LitString unless you want the facilities of FastString +-} +module FastString + ( + -- * FastStrings + FastString(..), -- not abstract, for now. + + -- ** Construction + mkFastString, + mkFastStringBytes, + mkFastStringForeignPtr, + mkFastString#, + mkZFastString, + mkZFastStringBytes, + + -- ** Deconstruction + unpackFS, -- :: FastString -> String + bytesFS, -- :: FastString -> [Word8] + + -- ** Encoding + isZEncoded, + zEncodeFS, + + -- ** Operations + uniqueOfFS, + lengthFS, + nullFS, + appendFS, + headFS, + tailFS, + concatFS, + consFS, + nilFS, + + -- ** Outputing + hPutFS, + + -- ** Internal + getFastStringTable, + hasZEncoding, + + -- * LitStrings + LitString, + mkLitString#, + strLength + ) where + +-- This #define suppresses the "import FastString" that +-- HsVersions otherwise produces +#define COMPILING_FAST_STRING +#include "HsVersions.h" + +import Encoding + +import Foreign +import Foreign.C +import GHC.Exts +import System.IO.Unsafe ( unsafePerformIO ) +import Control.Monad.ST ( stToIO ) +import Data.IORef ( IORef, newIORef, readIORef, writeIORef ) +import System.IO ( hPutBuf ) +import Data.Maybe ( isJust ) + +import GHC.Arr ( STArray(..), newSTArray ) +import GHC.IOBase ( IO(..) ) +import GHC.Ptr ( Ptr(..) ) + +#define hASH_TBL_SIZE 4091 + + +{-| +A 'FastString' is an array of bytes, hashed to support fast O(1) +comparison. It is also associated with a character encoding, so that +we know how to convert a 'FastString' to the local encoding, or to the +Z-encoding used by the compiler internally. + +'FastString's support a memoized conversion to the Z-encoding via zEncodeFS. +-} + +data FastString = FastString { + uniq :: {-# UNPACK #-} !Int, -- unique id + n_bytes :: {-# UNPACK #-} !Int, -- number of bytes + n_chars :: {-# UNPACK #-} !Int, -- number of chars + buf :: {-# UNPACK #-} !(ForeignPtr Word8), + enc :: FSEncoding + } + +data FSEncoding + = ZEncoded + -- including strings that don't need any encoding + | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString)) + -- A UTF-8 string with a memoized Z-encoding + +instance Eq FastString where + f1 == f2 = uniq f1 == uniq f2 + +instance Ord FastString where + -- Compares lexicographically, not by unique + a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False } + a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False } + a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True } + a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True } + max x y | x >= y = x + | otherwise = y + min x y | x <= y = x + | otherwise = y + compare a b = cmpFS a b + +instance Show FastString where + show fs = show (unpackFS fs) + +cmpFS :: FastString -> FastString -> Ordering +cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) = + if u1 == u2 then EQ else + let l = if l1 <= l2 then l1 else l2 in + inlinePerformIO $ + withForeignPtr buf1 $ \p1 -> + withForeignPtr buf2 $ \p2 -> do + res <- memcmp p1 p2 l + case () of + _ | res < 0 -> return LT + | res == 0 -> if l1 == l2 then return EQ + else if l1 < l2 then return LT + else return GT + | otherwise -> return GT + +#ifndef __HADDOCK__ +foreign import ccall unsafe "ghc_memcmp" + memcmp :: Ptr a -> Ptr b -> Int -> IO Int +#endif + +-- ----------------------------------------------------------------------------- +-- Construction + +{- +Internally, the compiler will maintain a fast string symbol +table, providing sharing and fast comparison. Creation of +new @FastString@s then covertly does a lookup, re-using the +@FastString@ if there was a hit. +-} + +data FastStringTable = + FastStringTable + {-# UNPACK #-} !Int + (MutableArray# RealWorld [FastString]) + +string_table :: IORef FastStringTable +string_table = + unsafePerformIO $ do + (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) []) + newIORef (FastStringTable 0 arr#) + +lookupTbl :: FastStringTable -> Int -> IO [FastString] +lookupTbl (FastStringTable _ arr#) (I# i#) = + IO $ \ s# -> readArray# arr# i# s# + +updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO () +updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do + (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) }) + writeIORef fs_table_var (FastStringTable (uid+1) arr#) + +mkFastString# :: Addr# -> FastString +mkFastString# a# = mkFastStringBytes ptr (strLength ptr) + where ptr = Ptr a# + +mkFastStringBytes :: Ptr Word8 -> Int -> FastString +mkFastStringBytes ptr len = unsafePerformIO $ do + ft@(FastStringTable uid tbl#) <- readIORef string_table + let + h = hashStr ptr len + add_it ls = do + fs <- copyNewFastString uid ptr len + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h + case lookup_result of + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v + +mkZFastStringBytes :: Ptr Word8 -> Int -> FastString +mkZFastStringBytes ptr len = unsafePerformIO $ do + ft@(FastStringTable uid tbl#) <- readIORef string_table + let + h = hashStr ptr len + add_it ls = do + fs <- copyNewZFastString uid ptr len + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h + case lookup_result of + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v + +-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference +-- between this and 'mkFastStringBytes' is that we don't have to copy +-- the bytes if the string is new to the table. +mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString +mkFastStringForeignPtr ptr fp len = do + ft@(FastStringTable uid tbl#) <- readIORef string_table +-- _trace ("hashed: "++show (I# h)) $ + let + h = hashStr ptr len + add_it ls = do + fs <- mkNewFastString uid ptr fp len + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h + case lookup_result of + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v + +mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString +mkZFastStringForeignPtr ptr fp len = do + ft@(FastStringTable uid tbl#) <- readIORef string_table +-- _trace ("hashed: "++show (I# h)) $ + let + h = hashStr ptr len + add_it ls = do + fs <- mkNewZFastString uid ptr fp len + updTbl string_table ft h (fs:ls) + {- _trace ("new: " ++ show f_str) $ -} + return fs + -- + lookup_result <- lookupTbl ft h + case lookup_result of + [] -> add_it [] + ls -> do + b <- bucket_match ls len ptr + case b of + Nothing -> add_it ls + Just v -> {- _trace ("re-use: "++show v) $ -} return v + + +-- | Creates a UTF-8 encoded 'FastString' from a 'String' +mkFastString :: String -> FastString +mkFastString str = + inlinePerformIO $ do + let l = utf8EncodedLength str + buf <- mallocForeignPtrBytes l + withForeignPtr buf $ \ptr -> do + utf8EncodeString ptr str + mkFastStringForeignPtr ptr buf l + + +-- | Creates a Z-encoded 'FastString' from a 'String' +mkZFastString :: String -> FastString +mkZFastString str = + inlinePerformIO $ do + let l = Prelude.length str + buf <- mallocForeignPtrBytes l + withForeignPtr buf $ \ptr -> do + pokeCAString (castPtr ptr) str + mkZFastStringForeignPtr ptr buf l + +bucket_match [] _ _ = return Nothing +bucket_match (v@(FastString _ l _ buf _):ls) len ptr + | len == l = do + b <- cmpStringPrefix ptr buf len + if b then return (Just v) + else bucket_match ls len ptr + | otherwise = + bucket_match ls len ptr + +mkNewFastString uid ptr fp len = do + ref <- newIORef Nothing + n_chars <- countUTF8Chars ptr len + return (FastString uid len n_chars fp (UTF8Encoded ref)) + +mkNewZFastString uid ptr fp len = do + return (FastString uid len len fp ZEncoded) + + +copyNewFastString uid ptr len = do + fp <- copyBytesToForeignPtr ptr len + ref <- newIORef Nothing + n_chars <- countUTF8Chars ptr len + return (FastString uid len n_chars fp (UTF8Encoded ref)) + +copyNewZFastString uid ptr len = do + fp <- copyBytesToForeignPtr ptr len + return (FastString uid len len fp ZEncoded) + + +copyBytesToForeignPtr ptr len = do + fp <- mallocForeignPtrBytes len + withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len + return fp + +cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool +cmpStringPrefix ptr fp len = + withForeignPtr fp $ \ptr' -> do + r <- memcmp ptr ptr' len + return (r == 0) + + +hashStr :: Ptr Word8 -> Int -> Int + -- use the Addr to produce a hash value between 0 & m (inclusive) +hashStr (Ptr a#) (I# len#) = loop 0# 0# + where + loop h n | n ==# len# = I# h + | otherwise = loop h2 (n +# 1#) + where c = ord# (indexCharOffAddr# a# n) + h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE# + +-- ----------------------------------------------------------------------------- +-- Operations + +-- | Returns the length of the 'FastString' in characters +lengthFS :: FastString -> Int +lengthFS f = n_chars f + +-- | Returns 'True' if the 'FastString' is Z-encoded +isZEncoded :: FastString -> Bool +isZEncoded fs | ZEncoded <- enc fs = True + | otherwise = False + +-- | Returns 'True' if this 'FastString' is not Z-encoded but already has +-- a Z-encoding cached (used in producing stats). +hasZEncoding :: FastString -> Bool +hasZEncoding fs@(FastString uid n_bytes _ fp enc) = + case enc of + ZEncoded -> False + UTF8Encoded ref -> + inlinePerformIO $ do + m <- readIORef ref + return (isJust m) + +-- | Returns 'True' if the 'FastString' is empty +nullFS :: FastString -> Bool +nullFS f = n_bytes f == 0 + +-- | unpacks and decodes the FastString +unpackFS :: FastString -> String +unpackFS (FastString _ n_bytes _ buf enc) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> + case enc of + ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes) + UTF8Encoded _ -> utf8DecodeString ptr n_bytes + +bytesFS :: FastString -> [Word8] +bytesFS (FastString _ n_bytes _ buf enc) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> + peekArray n_bytes ptr + +-- | returns a Z-encoded version of a 'FastString'. This might be the +-- original, if it was already Z-encoded. The first time this +-- function is applied to a particular 'FastString', the results are +-- memoized. +-- +zEncodeFS :: FastString -> FastString +zEncodeFS fs@(FastString uid n_bytes _ fp enc) = + case enc of + ZEncoded -> fs + UTF8Encoded ref -> + inlinePerformIO $ do + m <- readIORef ref + case m of + Just fs -> return fs + Nothing -> do + let efs = mkZFastString (zEncodeString (unpackFS fs)) + writeIORef ref (Just efs) + return efs + +appendFS :: FastString -> FastString -> FastString +appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2) + +concatFS :: [FastString] -> FastString +concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better + +headFS :: FastString -> Char +headFS (FastString _ n_bytes _ buf enc) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> do + case enc of + ZEncoded -> do + w <- peek (castPtr ptr) + return (castCCharToChar w) + UTF8Encoded _ -> + return (fst (utf8DecodeChar ptr)) + +tailFS :: FastString -> FastString +tailFS (FastString _ n_bytes _ buf enc) = + inlinePerformIO $ withForeignPtr buf $ \ptr -> do + case enc of + ZEncoded -> do + return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1) + UTF8Encoded _ -> do + let (_,ptr') = utf8DecodeChar ptr + let off = ptr' `minusPtr` ptr + return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off) + +consFS :: Char -> FastString -> FastString +consFS c fs = mkFastString (c : unpackFS fs) + +uniqueOfFS :: FastString -> Int# +uniqueOfFS (FastString (I# u#) _ _ _ _) = u# + +nilFS = mkFastString "" + +-- ----------------------------------------------------------------------------- +-- Stats + +getFastStringTable :: IO [[FastString]] +getFastStringTable = do + tbl <- readIORef string_table + buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE] + return buckets + +-- ----------------------------------------------------------------------------- +-- Outputting 'FastString's + +-- |Outputs a 'FastString' with /no decoding at all/, that is, you +-- get the actual bytes in the 'FastString' written to the 'Handle'. +hPutFS handle (FastString _ len _ fp _) + | len == 0 = return () + | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len + +-- ToDo: we'll probably want an hPutFSLocal, or something, to output +-- in the current locale's encoding (for error messages and suchlike). + +-- ----------------------------------------------------------------------------- +-- LitStrings, here for convenience only. + +type LitString = Ptr () + +mkLitString# :: Addr# -> LitString +mkLitString# a# = Ptr a# + +foreign import ccall unsafe "ghc_strlen" + strLength :: Ptr () -> Int + +-- ----------------------------------------------------------------------------- +-- under the carpet + +-- Just like unsafePerformIO, but we inline it. +{-# INLINE inlinePerformIO #-} +inlinePerformIO :: IO a -> a +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r + +-- NB. does *not* add a '\0'-terminator. +pokeCAString :: Ptr CChar -> String -> IO () +pokeCAString ptr str = + let + go [] n = return () + go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1) + in + go str 0 + +#if __GLASGOW_HASKELL__ < 600 + +mallocForeignPtrBytes :: Int -> IO (ForeignPtr a) +mallocForeignPtrBytes n = do + r <- mallocBytes n + newForeignPtr r (finalizerFree r) + +foreign import ccall unsafe "stdlib.h free" + finalizerFree :: Ptr a -> IO () + +peekCAStringLen = peekCStringLen + +#elif __GLASGOW_HASKELL__ <= 602 + +peekCAStringLen = peekCStringLen + +#endif +\end{code} diff --git a/compiler/utils/FastTypes.lhs b/compiler/utils/FastTypes.lhs new file mode 100644 index 0000000000..bb92c8c02f --- /dev/null +++ b/compiler/utils/FastTypes.lhs @@ -0,0 +1,65 @@ +% +% (c) The University of Glasgow, 2000 +% +\section{Fast integers and booleans} + +\begin{code} +module FastTypes ( + FastInt, _ILIT, iBox, iUnbox, + (+#), (-#), (*#), quotFastInt, negateFastInt, + (==#), (<#), (<=#), (>=#), (>#), + + FastBool, fastBool, isFastTrue, fastOr, fastAnd + ) where + +#include "HsVersions.h" + +#if defined(__GLASGOW_HASKELL__) + +-- Import the beggars +import GLAEXTS + ( Int(..), Int#, (+#), (-#), (*#), + quotInt#, negateInt#, (==#), (<#), (<=#), (>=#), (>#) + ) + +type FastInt = Int# +_ILIT (I# x) = x +iBox x = I# x +iUnbox (I# x) = x +quotFastInt = quotInt# +negateFastInt = negateInt# + +type FastBool = Int# +fastBool True = 1# +fastBool False = 0# +isFastTrue x = x ==# 1# + +fastOr 1# _ = 1# +fastOr 0# x = x + +fastAnd 0# x = 0# +fastAnd 1# x = x + +#else /* ! __GLASGOW_HASKELL__ */ + +type FastInt = Int +_ILIT x = x +iBox x = x +iUnbox x = x +(+#) = (+) +(-#) = (-) +(*#) = (*) +quotFastInt = quot +negateFastInt = negate +(==#) = (==) +(<#) = (<) +(<=#) = (<=) +(>=#) = (>=) +(>#) = (>) + +type FastBool = Bool +fastBool x = x +_IS_TRUE_ x = x + +#endif /* ! __GLASGOW_HASKELL__ */ +\end{code} diff --git a/compiler/utils/FiniteMap.lhs b/compiler/utils/FiniteMap.lhs new file mode 100644 index 0000000000..9168d3656f --- /dev/null +++ b/compiler/utils/FiniteMap.lhs @@ -0,0 +1,749 @@ + +% (c) The AQUA Project, Glasgow University, 1994-1998 +% +\section[FiniteMap]{An implementation of finite maps} + +``Finite maps'' are the heart of the compiler's +lookup-tables/environments and its implementation of sets. Important +stuff! + +This code is derived from that in the paper: +\begin{display} + S Adams + "Efficient sets: a balancing act" + Journal of functional programming 3(4) Oct 1993, pp553-562 +\end{display} + +The code is SPECIALIZEd to various highly-desirable types (e.g., Id) +near the end. + +\begin{code} + +module FiniteMap ( + FiniteMap, -- abstract type + + emptyFM, unitFM, listToFM, + + addToFM, + addToFM_C, + addListToFM, + addListToFM_C, + delFromFM, + delListFromFM, + + plusFM, + plusFM_C, + minusFM, + foldFM, + + intersectFM, + intersectFM_C, + mapFM, filterFM, + + sizeFM, isEmptyFM, elemFM, lookupFM, lookupWithDefaultFM, + + fmToList, keysFM, eltsFM + + , bagToFM + + ) where + +#include "HsVersions.h" +#define IF_NOT_GHC(a) {--} + +#if defined(DEBUG_FINITEMAPS)/* NB NB NB */ +#define OUTPUTABLE_key , Outputable key +#else +#define OUTPUTABLE_key {--} +#endif + +import Maybes +import Bag ( Bag, foldrBag ) +import Util +import Outputable + +import GLAEXTS + +#if ! OMIT_NATIVE_CODEGEN +# define IF_NCG(a) a +#else +# define IF_NCG(a) {--} +#endif + + +-- SIGH: but we use unboxed "sizes"... +#if __GLASGOW_HASKELL__ +#define IF_GHC(a,b) a +#else /* not GHC */ +#define IF_GHC(a,b) b +#endif /* not GHC */ +\end{code} + + +%************************************************************************ +%* * +\subsection{The signature of the module} +%* * +%************************************************************************ + +\begin{code} +-- BUILDING +emptyFM :: FiniteMap key elt +unitFM :: key -> elt -> FiniteMap key elt +listToFM :: (Ord key OUTPUTABLE_key) => [(key,elt)] -> FiniteMap key elt + -- In the case of duplicates, the last is taken +bagToFM :: (Ord key OUTPUTABLE_key) => Bag (key,elt) -> FiniteMap key elt + -- In the case of duplicates, who knows which is taken + +-- ADDING AND DELETING + -- Throws away any previous binding + -- In the list case, the items are added starting with the + -- first one in the list +addToFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> key -> elt -> FiniteMap key elt +addListToFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> [(key,elt)] -> FiniteMap key elt + + -- Combines with previous binding + -- The combining fn goes (old -> new -> new) +addToFM_C :: (Ord key OUTPUTABLE_key) => (elt -> elt -> elt) + -> FiniteMap key elt -> key -> elt + -> FiniteMap key elt +addListToFM_C :: (Ord key OUTPUTABLE_key) => (elt -> elt -> elt) + -> FiniteMap key elt -> [(key,elt)] + -> FiniteMap key elt + + -- Deletion doesn't complain if you try to delete something + -- which isn't there +delFromFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> key -> FiniteMap key elt +delListFromFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> [key] -> FiniteMap key elt + +-- COMBINING + -- Bindings in right argument shadow those in the left +plusFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + + -- Combines bindings for the same thing with the given function +plusFM_C :: (Ord key OUTPUTABLE_key) => (elt -> elt -> elt) + -> FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt + +minusFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt + -- (minusFM a1 a2) deletes from a1 any bindings which are bound in a2 + +intersectFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt +intersectFM_C :: (Ord key OUTPUTABLE_key) => (elt1 -> elt2 -> elt3) + -> FiniteMap key elt1 -> FiniteMap key elt2 -> FiniteMap key elt3 + +-- MAPPING, FOLDING, FILTERING +foldFM :: (key -> elt -> a -> a) -> a -> FiniteMap key elt -> a +mapFM :: (key -> elt1 -> elt2) -> FiniteMap key elt1 -> FiniteMap key elt2 +filterFM :: (Ord key OUTPUTABLE_key) => (key -> elt -> Bool) + -> FiniteMap key elt -> FiniteMap key elt + + +-- INTERROGATING +sizeFM :: FiniteMap key elt -> Int +isEmptyFM :: FiniteMap key elt -> Bool + +elemFM :: (Ord key OUTPUTABLE_key) => key -> FiniteMap key elt -> Bool +lookupFM :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> key -> Maybe elt +lookupWithDefaultFM + :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> elt -> key -> elt + -- lookupWithDefaultFM supplies a "default" elt + -- to return for an unmapped key + +-- LISTIFYING +fmToList :: FiniteMap key elt -> [(key,elt)] +keysFM :: FiniteMap key elt -> [key] +eltsFM :: FiniteMap key elt -> [elt] +\end{code} + +%************************************************************************ +%* * +\subsection{The @FiniteMap@ data type, and building of same} +%* * +%************************************************************************ + +Invariants about @FiniteMap@: +\begin{enumerate} +\item +all keys in a FiniteMap are distinct +\item +all keys in left subtree are $<$ key in Branch and +all keys in right subtree are $>$ key in Branch +\item +size field of a Branch gives number of Branch nodes in the tree +\item +size of left subtree is differs from size of right subtree by a +factor of at most \tr{sIZE_RATIO} +\end{enumerate} + +\begin{code} +data FiniteMap key elt + = EmptyFM + | Branch key elt -- Key and elt stored here + IF_GHC(Int#,Int{-STRICT-}) -- Size >= 1 + (FiniteMap key elt) -- Children + (FiniteMap key elt) +\end{code} + +\begin{code} +emptyFM = EmptyFM +{- +emptyFM + = Branch bottom bottom IF_GHC(0#,0) bottom bottom + where + bottom = panic "emptyFM" +-} + +-- #define EmptyFM (Branch _ _ IF_GHC(0#,0) _ _) + +unitFM key elt = Branch key elt IF_GHC(1#,1) emptyFM emptyFM + +listToFM = addListToFM emptyFM + +bagToFM = foldrBag (\(k,v) fm -> addToFM fm k v) emptyFM +\end{code} + +%************************************************************************ +%* * +\subsection{Adding to and deleting from @FiniteMaps@} +%* * +%************************************************************************ + +\begin{code} +addToFM fm key elt = addToFM_C (\ old new -> new) fm key elt + +addToFM_C combiner EmptyFM key elt = unitFM key elt +addToFM_C combiner (Branch key elt size fm_l fm_r) new_key new_elt + = case compare new_key key of + LT -> mkBalBranch key elt (addToFM_C combiner fm_l new_key new_elt) fm_r + GT -> mkBalBranch key elt fm_l (addToFM_C combiner fm_r new_key new_elt) + EQ -> Branch new_key (combiner elt new_elt) size fm_l fm_r + +addListToFM fm key_elt_pairs = addListToFM_C (\ old new -> new) fm key_elt_pairs + +addListToFM_C combiner fm key_elt_pairs + = foldl' add fm key_elt_pairs -- foldl adds from the left + where + add fmap (key,elt) = addToFM_C combiner fmap key elt +\end{code} + +\begin{code} +delFromFM EmptyFM del_key = emptyFM +delFromFM (Branch key elt size fm_l fm_r) del_key + = case compare del_key key of + GT -> mkBalBranch key elt fm_l (delFromFM fm_r del_key) + LT -> mkBalBranch key elt (delFromFM fm_l del_key) fm_r + EQ -> glueBal fm_l fm_r + +delListFromFM fm keys = foldl' delFromFM fm keys +\end{code} + +%************************************************************************ +%* * +\subsection{Combining @FiniteMaps@} +%* * +%************************************************************************ + +\begin{code} +plusFM_C combiner EmptyFM fm2 = fm2 +plusFM_C combiner fm1 EmptyFM = fm1 +plusFM_C combiner fm1 (Branch split_key elt2 _ left right) + = mkVBalBranch split_key new_elt + (plusFM_C combiner lts left) + (plusFM_C combiner gts right) + where + lts = splitLT fm1 split_key + gts = splitGT fm1 split_key + new_elt = case lookupFM fm1 split_key of + Nothing -> elt2 + Just elt1 -> combiner elt1 elt2 + +-- It's worth doing plusFM specially, because we don't need +-- to do the lookup in fm1. +-- FM2 over-rides FM1. + +plusFM EmptyFM fm2 = fm2 +plusFM fm1 EmptyFM = fm1 +plusFM fm1 (Branch split_key elt1 _ left right) + = mkVBalBranch split_key elt1 (plusFM lts left) (plusFM gts right) + where + lts = splitLT fm1 split_key + gts = splitGT fm1 split_key + +minusFM EmptyFM fm2 = emptyFM +minusFM fm1 EmptyFM = fm1 +minusFM fm1 (Branch split_key elt _ left right) + = glueVBal (minusFM lts left) (minusFM gts right) + -- The two can be way different, so we need glueVBal + where + lts = splitLT fm1 split_key -- NB gt and lt, so the equal ones + gts = splitGT fm1 split_key -- are not in either. + +intersectFM fm1 fm2 = intersectFM_C (\ left right -> right) fm1 fm2 + +intersectFM_C combiner fm1 EmptyFM = emptyFM +intersectFM_C combiner EmptyFM fm2 = emptyFM +intersectFM_C combiner fm1 (Branch split_key elt2 _ left right) + + | maybeToBool maybe_elt1 -- split_elt *is* in intersection + = mkVBalBranch split_key (combiner elt1 elt2) (intersectFM_C combiner lts left) + (intersectFM_C combiner gts right) + + | otherwise -- split_elt is *not* in intersection + = glueVBal (intersectFM_C combiner lts left) (intersectFM_C combiner gts right) + + where + lts = splitLT fm1 split_key -- NB gt and lt, so the equal ones + gts = splitGT fm1 split_key -- are not in either. + + maybe_elt1 = lookupFM fm1 split_key + Just elt1 = maybe_elt1 +\end{code} + +%************************************************************************ +%* * +\subsection{Mapping, folding, and filtering with @FiniteMaps@} +%* * +%************************************************************************ + +\begin{code} +foldFM k z EmptyFM = z +foldFM k z (Branch key elt _ fm_l fm_r) + = foldFM k (k key elt (foldFM k z fm_r)) fm_l + +mapFM f EmptyFM = emptyFM +mapFM f (Branch key elt size fm_l fm_r) + = Branch key (f key elt) size (mapFM f fm_l) (mapFM f fm_r) + +filterFM p EmptyFM = emptyFM +filterFM p (Branch key elt _ fm_l fm_r) + | p key elt -- Keep the item + = mkVBalBranch key elt (filterFM p fm_l) (filterFM p fm_r) + + | otherwise -- Drop the item + = glueVBal (filterFM p fm_l) (filterFM p fm_r) +\end{code} + +%************************************************************************ +%* * +\subsection{Interrogating @FiniteMaps@} +%* * +%************************************************************************ + +\begin{code} +--{-# INLINE sizeFM #-} +sizeFM EmptyFM = 0 +sizeFM (Branch _ _ size _ _) = IF_GHC(I# size, size) + +isEmptyFM fm = sizeFM fm == 0 + +lookupFM EmptyFM key = Nothing +lookupFM (Branch key elt _ fm_l fm_r) key_to_find + = case compare key_to_find key of + LT -> lookupFM fm_l key_to_find + GT -> lookupFM fm_r key_to_find + EQ -> Just elt + +key `elemFM` fm + = case (lookupFM fm key) of { Nothing -> False; Just elt -> True } + +lookupWithDefaultFM fm deflt key + = case (lookupFM fm key) of { Nothing -> deflt; Just elt -> elt } +\end{code} + +%************************************************************************ +%* * +\subsection{Listifying @FiniteMaps@} +%* * +%************************************************************************ + +\begin{code} +fmToList fm = foldFM (\ key elt rest -> (key,elt) : rest) [] fm +keysFM fm = foldFM (\ key elt rest -> key : rest) [] fm +eltsFM fm = foldFM (\ key elt rest -> elt : rest) [] fm +\end{code} + + +%************************************************************************ +%* * +\subsection{The implementation of balancing} +%* * +%************************************************************************ + +%************************************************************************ +%* * +\subsubsection{Basic construction of a @FiniteMap@} +%* * +%************************************************************************ + +@mkBranch@ simply gets the size component right. This is the ONLY +(non-trivial) place the Branch object is built, so the ASSERTion +recursively checks consistency. (The trivial use of Branch is in +@unitFM@.) + +\begin{code} +sIZE_RATIO :: Int +sIZE_RATIO = 5 + +mkBranch :: (Ord key OUTPUTABLE_key) -- Used for the assertion checking only + => Int + -> key -> elt + -> FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + +mkBranch which key elt fm_l fm_r + = --ASSERT( left_ok && right_ok && balance_ok ) +#if defined(DEBUG_FINITEMAPS) + if not ( left_ok && right_ok && balance_ok ) then + pprPanic ("mkBranch:"++show which) (vcat [ppr [left_ok, right_ok, balance_ok], + ppr key, + ppr fm_l, + ppr fm_r]) + else +#endif + let + result = Branch key elt (unbox (1 + left_size + right_size)) fm_l fm_r + in +-- if sizeFM result <= 8 then + result +-- else +-- pprTrace ("mkBranch:"++(show which)) (ppr result) ( +-- result +-- ) + where + left_ok = case fm_l of + EmptyFM -> True + Branch left_key _ _ _ _ -> let + biggest_left_key = fst (findMax fm_l) + in + biggest_left_key < key + right_ok = case fm_r of + EmptyFM -> True + Branch right_key _ _ _ _ -> let + smallest_right_key = fst (findMin fm_r) + in + key < smallest_right_key + balance_ok = True -- sigh +{- LATER: + balance_ok + = -- Both subtrees have one or no elements... + (left_size + right_size <= 1) +-- NO || left_size == 0 -- ??? +-- NO || right_size == 0 -- ??? + -- ... or the number of elements in a subtree does not exceed + -- sIZE_RATIO times the number of elements in the other subtree + || (left_size * sIZE_RATIO >= right_size && + right_size * sIZE_RATIO >= left_size) +-} + + left_size = sizeFM fm_l + right_size = sizeFM fm_r + +#ifdef __GLASGOW_HASKELL__ + unbox :: Int -> Int# + unbox (I# size) = size +#else + unbox :: Int -> Int + unbox x = x +#endif +\end{code} + +%************************************************************************ +%* * +\subsubsection{{\em Balanced} construction of a @FiniteMap@} +%* * +%************************************************************************ + +@mkBalBranch@ rebalances, assuming that the subtrees aren't too far +out of whack. + +\begin{code} +mkBalBranch :: (Ord key OUTPUTABLE_key) + => key -> elt + -> FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + +mkBalBranch key elt fm_L fm_R + + | size_l + size_r < 2 + = mkBranch 1{-which-} key elt fm_L fm_R + + | size_r > sIZE_RATIO * size_l -- Right tree too big + = case fm_R of + Branch _ _ _ fm_rl fm_rr + | sizeFM fm_rl < 2 * sizeFM fm_rr -> single_L fm_L fm_R + | otherwise -> double_L fm_L fm_R + -- Other case impossible + + | size_l > sIZE_RATIO * size_r -- Left tree too big + = case fm_L of + Branch _ _ _ fm_ll fm_lr + | sizeFM fm_lr < 2 * sizeFM fm_ll -> single_R fm_L fm_R + | otherwise -> double_R fm_L fm_R + -- Other case impossible + + | otherwise -- No imbalance + = mkBranch 2{-which-} key elt fm_L fm_R + + where + size_l = sizeFM fm_L + size_r = sizeFM fm_R + + single_L fm_l (Branch key_r elt_r _ fm_rl fm_rr) + = mkBranch 3{-which-} key_r elt_r (mkBranch 4{-which-} key elt fm_l fm_rl) fm_rr + + double_L fm_l (Branch key_r elt_r _ (Branch key_rl elt_rl _ fm_rll fm_rlr) fm_rr) + = mkBranch 5{-which-} key_rl elt_rl (mkBranch 6{-which-} key elt fm_l fm_rll) + (mkBranch 7{-which-} key_r elt_r fm_rlr fm_rr) + + single_R (Branch key_l elt_l _ fm_ll fm_lr) fm_r + = mkBranch 8{-which-} key_l elt_l fm_ll (mkBranch 9{-which-} key elt fm_lr fm_r) + + double_R (Branch key_l elt_l _ fm_ll (Branch key_lr elt_lr _ fm_lrl fm_lrr)) fm_r + = mkBranch 10{-which-} key_lr elt_lr (mkBranch 11{-which-} key_l elt_l fm_ll fm_lrl) + (mkBranch 12{-which-} key elt fm_lrr fm_r) +\end{code} + + +\begin{code} +mkVBalBranch :: (Ord key OUTPUTABLE_key) + => key -> elt + -> FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + +-- Assert: in any call to (mkVBalBranch_C comb key elt l r), +-- (a) all keys in l are < all keys in r +-- (b) all keys in l are < key +-- (c) all keys in r are > key + +mkVBalBranch key elt EmptyFM fm_r = addToFM fm_r key elt +mkVBalBranch key elt fm_l EmptyFM = addToFM fm_l key elt + +mkVBalBranch key elt fm_l@(Branch key_l elt_l _ fm_ll fm_lr) + fm_r@(Branch key_r elt_r _ fm_rl fm_rr) + | sIZE_RATIO * size_l < size_r + = mkBalBranch key_r elt_r (mkVBalBranch key elt fm_l fm_rl) fm_rr + + | sIZE_RATIO * size_r < size_l + = mkBalBranch key_l elt_l fm_ll (mkVBalBranch key elt fm_lr fm_r) + + | otherwise + = mkBranch 13{-which-} key elt fm_l fm_r + + where + size_l = sizeFM fm_l + size_r = sizeFM fm_r +\end{code} + +%************************************************************************ +%* * +\subsubsection{Gluing two trees together} +%* * +%************************************************************************ + +@glueBal@ assumes its two arguments aren't too far out of whack, just +like @mkBalBranch@. But: all keys in first arg are $<$ all keys in +second. + +\begin{code} +glueBal :: (Ord key OUTPUTABLE_key) + => FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + +glueBal EmptyFM fm2 = fm2 +glueBal fm1 EmptyFM = fm1 +glueBal fm1 fm2 + -- The case analysis here (absent in Adams' program) is really to deal + -- with the case where fm2 is a singleton. Then deleting the minimum means + -- we pass an empty tree to mkBalBranch, which breaks its invariant. + | sizeFM fm2 > sizeFM fm1 + = mkBalBranch mid_key2 mid_elt2 fm1 (deleteMin fm2) + + | otherwise + = mkBalBranch mid_key1 mid_elt1 (deleteMax fm1) fm2 + where + (mid_key1, mid_elt1) = findMax fm1 + (mid_key2, mid_elt2) = findMin fm2 +\end{code} + +@glueVBal@ copes with arguments which can be of any size. +But: all keys in first arg are $<$ all keys in second. + +\begin{code} +glueVBal :: (Ord key OUTPUTABLE_key) + => FiniteMap key elt -> FiniteMap key elt + -> FiniteMap key elt + +glueVBal EmptyFM fm2 = fm2 +glueVBal fm1 EmptyFM = fm1 +glueVBal fm_l@(Branch key_l elt_l _ fm_ll fm_lr) + fm_r@(Branch key_r elt_r _ fm_rl fm_rr) + | sIZE_RATIO * size_l < size_r + = mkBalBranch key_r elt_r (glueVBal fm_l fm_rl) fm_rr + + | sIZE_RATIO * size_r < size_l + = mkBalBranch key_l elt_l fm_ll (glueVBal fm_lr fm_r) + + | otherwise -- We now need the same two cases as in glueBal above. + = glueBal fm_l fm_r + where + size_l = sizeFM fm_l + size_r = sizeFM fm_r +\end{code} + +%************************************************************************ +%* * +\subsection{Local utilities} +%* * +%************************************************************************ + +\begin{code} +splitLT, splitGT :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> key -> FiniteMap key elt + +-- splitLT fm split_key = fm restricted to keys < split_key +-- splitGT fm split_key = fm restricted to keys > split_key + +splitLT EmptyFM split_key = emptyFM +splitLT (Branch key elt _ fm_l fm_r) split_key + = case compare split_key key of + LT -> splitLT fm_l split_key + GT -> mkVBalBranch key elt fm_l (splitLT fm_r split_key) + EQ -> fm_l + +splitGT EmptyFM split_key = emptyFM +splitGT (Branch key elt _ fm_l fm_r) split_key + = case compare split_key key of + GT -> splitGT fm_r split_key + LT -> mkVBalBranch key elt (splitGT fm_l split_key) fm_r + EQ -> fm_r + +findMin :: FiniteMap key elt -> (key,elt) +findMin (Branch key elt _ EmptyFM _) = (key,elt) +findMin (Branch key elt _ fm_l _) = findMin fm_l + +deleteMin :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> FiniteMap key elt +deleteMin (Branch key elt _ EmptyFM fm_r) = fm_r +deleteMin (Branch key elt _ fm_l fm_r) = mkBalBranch key elt (deleteMin fm_l) fm_r + +findMax :: FiniteMap key elt -> (key,elt) +findMax (Branch key elt _ _ EmptyFM) = (key,elt) +findMax (Branch key elt _ _ fm_r) = findMax fm_r + +deleteMax :: (Ord key OUTPUTABLE_key) => FiniteMap key elt -> FiniteMap key elt +deleteMax (Branch key elt _ fm_l EmptyFM) = fm_l +deleteMax (Branch key elt _ fm_l fm_r) = mkBalBranch key elt fm_l (deleteMax fm_r) +\end{code} + +%************************************************************************ +%* * +\subsection{Output-ery} +%* * +%************************************************************************ + +\begin{code} +#if defined(DEBUG_FINITEMAPS) + +instance (Outputable key) => Outputable (FiniteMap key elt) where + ppr fm = pprX fm + +pprX EmptyFM = char '!' +pprX (Branch key elt sz fm_l fm_r) + = parens (hcat [pprX fm_l, space, + ppr key, space, int (IF_GHC(I# sz, sz)), space, + pprX fm_r]) +#else +-- and when not debugging the package itself... +instance (Outputable key, Outputable elt) => Outputable (FiniteMap key elt) where + ppr fm = ppr (fmToList fm) +#endif + +#if 0 +instance (Eq key, Eq elt) => Eq (FiniteMap key elt) where + fm_1 == fm_2 = (sizeFM fm_1 == sizeFM fm_2) && -- quick test + (fmToList fm_1 == fmToList fm_2) + +{- NO: not clear what The Right Thing to do is: +instance (Ord key, Ord elt) => Ord (FiniteMap key elt) where + fm_1 <= fm_2 = (sizeFM fm_1 <= sizeFM fm_2) && -- quick test + (fmToList fm_1 <= fmToList fm_2) +-} +#endif +\end{code} + +%************************************************************************ +%* * +\subsection{Efficiency pragmas for GHC} +%* * +%************************************************************************ + +When the FiniteMap module is used in GHC, we specialise it for +\tr{Uniques}, for dastardly efficiency reasons. + +\begin{code} +#if 0 + +#if __GLASGOW_HASKELL__ + +{-# SPECIALIZE addListToFM + :: FiniteMap (FastString, FAST_STRING) elt -> [((FAST_STRING, FAST_STRING),elt)] -> FiniteMap (FAST_STRING, FAST_STRING) elt + , FiniteMap RdrName elt -> [(RdrName,elt)] -> FiniteMap RdrName elt + IF_NCG(COMMA FiniteMap Reg elt -> [(Reg COMMA elt)] -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE addListToFM_C + :: (elt -> elt -> elt) -> FiniteMap TyCon elt -> [(TyCon,elt)] -> FiniteMap TyCon elt + , (elt -> elt -> elt) -> FiniteMap FastString elt -> [(FAST_STRING,elt)] -> FiniteMap FAST_STRING elt + IF_NCG(COMMA (elt -> elt -> elt) -> FiniteMap Reg elt -> [(Reg COMMA elt)] -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE addToFM + :: FiniteMap CLabel elt -> CLabel -> elt -> FiniteMap CLabel elt + , FiniteMap FastString elt -> FAST_STRING -> elt -> FiniteMap FAST_STRING elt + , FiniteMap (FastString, FAST_STRING) elt -> (FAST_STRING, FAST_STRING) -> elt -> FiniteMap (FAST_STRING, FAST_STRING) elt + , FiniteMap RdrName elt -> RdrName -> elt -> FiniteMap RdrName elt + IF_NCG(COMMA FiniteMap Reg elt -> Reg -> elt -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE addToFM_C + :: (elt -> elt -> elt) -> FiniteMap (RdrName, RdrName) elt -> (RdrName, RdrName) -> elt -> FiniteMap (RdrName, RdrName) elt + , (elt -> elt -> elt) -> FiniteMap FastString elt -> FAST_STRING -> elt -> FiniteMap FAST_STRING elt + IF_NCG(COMMA (elt -> elt -> elt) -> FiniteMap Reg elt -> Reg -> elt -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE bagToFM + :: Bag (FastString,elt) -> FiniteMap FAST_STRING elt + #-} +{-# SPECIALIZE delListFromFM + :: FiniteMap RdrName elt -> [RdrName] -> FiniteMap RdrName elt + , FiniteMap FastString elt -> [FAST_STRING] -> FiniteMap FAST_STRING elt + IF_NCG(COMMA FiniteMap Reg elt -> [Reg] -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE listToFM + :: [([Char],elt)] -> FiniteMap [Char] elt + , [(FastString,elt)] -> FiniteMap FAST_STRING elt + , [((FastString,FAST_STRING),elt)] -> FiniteMap (FAST_STRING, FAST_STRING) elt + IF_NCG(COMMA [(Reg COMMA elt)] -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE lookupFM + :: FiniteMap CLabel elt -> CLabel -> Maybe elt + , FiniteMap [Char] elt -> [Char] -> Maybe elt + , FiniteMap FastString elt -> FAST_STRING -> Maybe elt + , FiniteMap (FastString,FAST_STRING) elt -> (FAST_STRING,FAST_STRING) -> Maybe elt + , FiniteMap RdrName elt -> RdrName -> Maybe elt + , FiniteMap (RdrName,RdrName) elt -> (RdrName,RdrName) -> Maybe elt + IF_NCG(COMMA FiniteMap Reg elt -> Reg -> Maybe elt) + #-} +{-# SPECIALIZE lookupWithDefaultFM + :: FiniteMap FastString elt -> elt -> FAST_STRING -> elt + IF_NCG(COMMA FiniteMap Reg elt -> elt -> Reg -> elt) + #-} +{-# SPECIALIZE plusFM + :: FiniteMap RdrName elt -> FiniteMap RdrName elt -> FiniteMap RdrName elt + , FiniteMap FastString elt -> FiniteMap FAST_STRING elt -> FiniteMap FAST_STRING elt + IF_NCG(COMMA FiniteMap Reg elt -> FiniteMap Reg elt -> FiniteMap Reg elt) + #-} +{-# SPECIALIZE plusFM_C + :: (elt -> elt -> elt) -> FiniteMap FastString elt -> FiniteMap FAST_STRING elt -> FiniteMap FAST_STRING elt + IF_NCG(COMMA (elt -> elt -> elt) -> FiniteMap Reg elt -> FiniteMap Reg elt -> FiniteMap Reg elt) + #-} + +#endif /* compiling with ghc and have specialiser */ + +#endif /* 0 */ +\end{code} diff --git a/compiler/utils/IOEnv.hs b/compiler/utils/IOEnv.hs new file mode 100644 index 0000000000..e1dfdb400b --- /dev/null +++ b/compiler/utils/IOEnv.hs @@ -0,0 +1,208 @@ +-- (c) The University of Glasgow 2002 +-- +-- The IO Monad with an environment +-- + +module IOEnv ( + IOEnv, -- Instance of Monad + + -- Standard combinators, specialised + returnM, thenM, thenM_, failM, failWithM, + mappM, mappM_, mapSndM, sequenceM, sequenceM_, + foldlM, foldrM, + mapAndUnzipM, mapAndUnzip3M, + checkM, ifM, zipWithM, zipWithM_, + + -- Getting at the environment + getEnv, setEnv, updEnv, + + runIOEnv, unsafeInterleaveM, + tryM, tryAllM, fixM, + + -- I/O operations + ioToIOEnv, + IORef, newMutVar, readMutVar, writeMutVar, updMutVar + ) where +#include "HsVersions.h" + +import Panic ( try, tryUser, Exception(..) ) +import DATA_IOREF ( IORef, newIORef, readIORef, writeIORef ) +import UNSAFE_IO ( unsafeInterleaveIO ) +import FIX_IO ( fixIO ) + + +---------------------------------------------------------------------- +-- Defining the monad type +---------------------------------------------------------------------- + + +newtype IOEnv env a = IOEnv (env -> IO a) +unIOEnv (IOEnv m) = m + +instance Monad (IOEnv m) where + (>>=) = thenM + (>>) = thenM_ + return = returnM + fail s = failM -- Ignore the string + +returnM :: a -> IOEnv env a +returnM a = IOEnv (\ env -> return a) + +thenM :: IOEnv env a -> (a -> IOEnv env b) -> IOEnv env b +thenM (IOEnv m) f = IOEnv (\ env -> do { r <- m env ; + unIOEnv (f r) env }) + +thenM_ :: IOEnv env a -> IOEnv env b -> IOEnv env b +thenM_ (IOEnv m) f = IOEnv (\ env -> do { m env ; unIOEnv f env }) + +failM :: IOEnv env a +failM = IOEnv (\ env -> ioError (userError "IOEnv failure")) + +failWithM :: String -> IOEnv env a +failWithM s = IOEnv (\ env -> ioError (userError s)) + + + +---------------------------------------------------------------------- +-- Fundmantal combinators specific to the monad +---------------------------------------------------------------------- + + +--------------------------- +runIOEnv :: env -> IOEnv env a -> IO a +runIOEnv env (IOEnv m) = m env + + +--------------------------- +{-# NOINLINE fixM #-} + -- Aargh! Not inlining fixTc alleviates a space leak problem. + -- Normally fixTc is used with a lazy tuple match: if the optimiser is + -- shown the definition of fixTc, it occasionally transforms the code + -- in such a way that the code generator doesn't spot the selector + -- thunks. Sigh. + +fixM :: (a -> IOEnv env a) -> IOEnv env a +fixM f = IOEnv (\ env -> fixIO (\ r -> unIOEnv (f r) env)) + + +--------------------------- +tryM :: IOEnv env r -> IOEnv env (Either Exception r) +-- Reflect UserError exceptions into IOEnv monad +-- The idea is that errors in the program being compiled will give rise +-- to UserErrors. But, say, pattern-match failures in GHC itself should +-- not be caught here, else they'll be reported as errors in the program +-- begin compiled! +tryM (IOEnv thing) = IOEnv (\ env -> tryUser (thing env)) + +tryAllM :: IOEnv env r -> IOEnv env (Either Exception r) +-- Catch *all* exceptions +-- This is used when running a Template-Haskell splice, when +-- even a pattern-match failure is a programmer error +tryAllM (IOEnv thing) = IOEnv (\ env -> try (thing env)) + +--------------------------- +unsafeInterleaveM :: IOEnv env a -> IOEnv env a +unsafeInterleaveM (IOEnv m) = IOEnv (\ env -> unsafeInterleaveIO (m env)) + + +---------------------------------------------------------------------- +-- Accessing input/output +---------------------------------------------------------------------- + +ioToIOEnv :: IO a -> IOEnv env a +ioToIOEnv io = IOEnv (\ env -> io) + +newMutVar :: a -> IOEnv env (IORef a) +newMutVar val = IOEnv (\ env -> newIORef val) + +writeMutVar :: IORef a -> a -> IOEnv env () +writeMutVar var val = IOEnv (\ env -> writeIORef var val) + +readMutVar :: IORef a -> IOEnv env a +readMutVar var = IOEnv (\ env -> readIORef var) + +updMutVar :: IORef a -> (a->a) -> IOEnv env () +updMutVar var upd_fn = IOEnv (\ env -> do { v <- readIORef var; writeIORef var (upd_fn v) }) + + +---------------------------------------------------------------------- +-- Accessing the environment +---------------------------------------------------------------------- + +getEnv :: IOEnv env env +{-# INLINE getEnv #-} +getEnv = IOEnv (\ env -> return env) + +setEnv :: env' -> IOEnv env' a -> IOEnv env a +{-# INLINE setEnv #-} +setEnv new_env (IOEnv m) = IOEnv (\ env -> m new_env) + +updEnv :: (env -> env') -> IOEnv env' a -> IOEnv env a +{-# INLINE updEnv #-} +updEnv upd (IOEnv m) = IOEnv (\ env -> m (upd env)) + + +---------------------------------------------------------------------- +-- Standard combinators, but specialised for this monad +-- (for efficiency) +---------------------------------------------------------------------- + +mappM :: (a -> IOEnv env b) -> [a] -> IOEnv env [b] +mappM_ :: (a -> IOEnv env b) -> [a] -> IOEnv env () +mapSndM :: (b -> IOEnv env c) -> [(a,b)] -> IOEnv env [(a,c)] + -- Funny names to avoid clash with Prelude +sequenceM :: [IOEnv env a] -> IOEnv env [a] +sequenceM_ :: [IOEnv env a] -> IOEnv env () +foldlM :: (a -> b -> IOEnv env a) -> a -> [b] -> IOEnv env a +foldrM :: (b -> a -> IOEnv env a) -> a -> [b] -> IOEnv env a +mapAndUnzipM :: (a -> IOEnv env (b,c)) -> [a] -> IOEnv env ([b],[c]) +mapAndUnzip3M :: (a -> IOEnv env (b,c,d)) -> [a] -> IOEnv env ([b],[c],[d]) +checkM :: Bool -> IOEnv env a -> IOEnv env () -- Perform arg if bool is False +ifM :: Bool -> IOEnv env a -> IOEnv env () -- Perform arg if bool is True + +mappM f [] = return [] +mappM f (x:xs) = do { r <- f x; rs <- mappM f xs; return (r:rs) } + +mapSndM f [] = return [] +mapSndM f ((a,b):xs) = do { c <- f b; rs <- mapSndM f xs; return ((a,c):rs) } + +mappM_ f [] = return () +mappM_ f (x:xs) = f x >> mappM_ f xs + +zipWithM :: (a -> b -> IOEnv env c) -> [a] -> [b] -> IOEnv env [c] +zipWithM f [] bs = return [] +zipWithM f as [] = return [] +zipWithM f (a:as) (b:bs) = do { r <- f a b; rs <- zipWithM f as bs; return (r:rs) } + +zipWithM_ :: (a -> b -> IOEnv env c) -> [a] -> [b] -> IOEnv env () +zipWithM_ f [] bs = return () +zipWithM_ f as [] = return () +zipWithM_ f (a:as) (b:bs) = do { f a b; zipWithM_ f as bs } + +sequenceM [] = return [] +sequenceM (x:xs) = do { r <- x; rs <- sequenceM xs; return (r:rs) } + +sequenceM_ [] = return () +sequenceM_ (x:xs) = do { x; sequenceM_ xs } + +foldlM k z [] = return z +foldlM k z (x:xs) = do { r <- k z x; foldlM k r xs } + +foldrM k z [] = return z +foldrM k z (x:xs) = do { r <- foldrM k z xs; k x r } + +mapAndUnzipM f [] = return ([],[]) +mapAndUnzipM f (x:xs) = do { (r,s) <- f x; + (rs,ss) <- mapAndUnzipM f xs; + return (r:rs, s:ss) } + +mapAndUnzip3M f [] = return ([],[], []) +mapAndUnzip3M f (x:xs) = do { (r,s,t) <- f x; + (rs,ss,ts) <- mapAndUnzip3M f xs; + return (r:rs, s:ss, t:ts) } + +checkM True err = return () +checkM False err = do { err; return () } + +ifM True do_it = do { do_it; return () } +ifM False do_it = return () diff --git a/compiler/utils/ListSetOps.lhs b/compiler/utils/ListSetOps.lhs new file mode 100644 index 0000000000..02950722a2 --- /dev/null +++ b/compiler/utils/ListSetOps.lhs @@ -0,0 +1,227 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +\section[ListSetOps]{Set-like operations on lists} + +\begin{code} +module ListSetOps ( + unionLists, minusList, insertList, + + -- Association lists + Assoc, assoc, assocMaybe, assocUsing, assocDefault, assocDefaultUsing, + emptyAssoc, unitAssoc, mapAssoc, plusAssoc_C, extendAssoc_C, + mkLookupFun, findInList, assocElts, + + -- Duplicate handling + hasNoDups, runs, removeDups, findDupsEq, + equivClasses, equivClassesByUniq + + ) where + +#include "HsVersions.h" + +import Outputable +import Unique ( Unique ) +import UniqFM ( eltsUFM, emptyUFM, addToUFM_C ) +import Util ( isn'tIn, isIn, mapAccumR, sortLe ) +import List ( partition ) +\end{code} + + +%************************************************************************ +%* * + Treating lists as sets + Assumes the lists contain no duplicates, but are unordered +%* * +%************************************************************************ + +\begin{code} +insertList :: Eq a => a -> [a] -> [a] +-- Assumes the arg list contains no dups; guarantees the result has no dups +insertList x xs | isIn "insert" x xs = xs + | otherwise = x : xs + +unionLists :: (Eq a) => [a] -> [a] -> [a] +-- Assumes that the arguments contain no duplicates +unionLists xs ys = [x | x <- xs, isn'tIn "unionLists" x ys] ++ ys + +minusList :: (Eq a) => [a] -> [a] -> [a] +-- Everything in the first list that is not in the second list: +minusList xs ys = [ x | x <- xs, isn'tIn "minusList" x ys] +\end{code} + + +%************************************************************************ +%* * +\subsection[Utils-assoc]{Association lists} +%* * +%************************************************************************ + +Inefficient finite maps based on association lists and equality. + +\begin{code} +type Assoc a b = [(a,b)] -- A finite mapping based on equality and association lists + +emptyAssoc :: Assoc a b +unitAssoc :: a -> b -> Assoc a b +assocElts :: Assoc a b -> [(a,b)] +assoc :: (Eq a) => String -> Assoc a b -> a -> b +assocDefault :: (Eq a) => b -> Assoc a b -> a -> b +assocUsing :: (a -> a -> Bool) -> String -> Assoc a b -> a -> b +assocMaybe :: (Eq a) => Assoc a b -> a -> Maybe b +assocDefaultUsing :: (a -> a -> Bool) -> b -> Assoc a b -> a -> b +mapAssoc :: (b -> c) -> Assoc a b -> Assoc a c +extendAssoc_C :: (Eq a) => (b -> b -> b) -> Assoc a b -> (a,b) -> Assoc a b +plusAssoc_C :: (Eq a) => (b -> b -> b) -> Assoc a b -> Assoc a b -> Assoc a b + -- combining fn takes (old->new->result) + +emptyAssoc = [] +unitAssoc a b = [(a,b)] +assocElts xs = xs + +assocDefaultUsing eq deflt ((k,v) : rest) key + | k `eq` key = v + | otherwise = assocDefaultUsing eq deflt rest key + +assocDefaultUsing eq deflt [] key = deflt + +assoc crash_msg list key = assocDefaultUsing (==) (panic ("Failed in assoc: " ++ crash_msg)) list key +assocDefault deflt list key = assocDefaultUsing (==) deflt list key +assocUsing eq crash_msg list key = assocDefaultUsing eq (panic ("Failed in assoc: " ++ crash_msg)) list key + +assocMaybe alist key + = lookup alist + where + lookup [] = Nothing + lookup ((tv,ty):rest) = if key == tv then Just ty else lookup rest + +mapAssoc f alist = [(key, f val) | (key,val) <- alist] + +plusAssoc_C combine [] new = new -- Shortcut for common case +plusAssoc_C combine old new = foldl (extendAssoc_C combine) old new + +extendAssoc_C combine old_list (new_key, new_val) + = go old_list + where + go [] = [(new_key, new_val)] + go ((old_key, old_val) : old_list) + | new_key == old_key = ((old_key, old_val `combine` new_val) : old_list) + | otherwise = (old_key, old_val) : go old_list +\end{code} + + +@mkLookupFun eq alist@ is a function which looks up +its argument in the association list @alist@, returning a Maybe type. +@mkLookupFunDef@ is similar except that it is given a value to return +on failure. + +\begin{code} +mkLookupFun :: (key -> key -> Bool) -- Equality predicate + -> [(key,val)] -- The assoc list + -> key -- The key + -> Maybe val -- The corresponding value + +mkLookupFun eq alist s + = case [a | (s',a) <- alist, s' `eq` s] of + [] -> Nothing + (a:_) -> Just a + +findInList :: (a -> Bool) -> [a] -> Maybe a +findInList p [] = Nothing +findInList p (x:xs) | p x = Just x + | otherwise = findInList p xs +\end{code} + + +%************************************************************************ +%* * +\subsection[Utils-dups]{Duplicate-handling} +%* * +%************************************************************************ + +\begin{code} +hasNoDups :: (Eq a) => [a] -> Bool + +hasNoDups xs = f [] xs + where + f seen_so_far [] = True + f seen_so_far (x:xs) = if x `is_elem` seen_so_far then + False + else + f (x:seen_so_far) xs + + is_elem = isIn "hasNoDups" +\end{code} + +\begin{code} +equivClasses :: (a -> a -> Ordering) -- Comparison + -> [a] + -> [[a]] + +equivClasses cmp stuff@[] = [] +equivClasses cmp stuff@[item] = [stuff] +equivClasses cmp items + = runs eq (sortLe le items) + where + eq a b = case cmp a b of { EQ -> True; _ -> False } + le a b = case cmp a b of { LT -> True; EQ -> True; GT -> False } +\end{code} + +The first cases in @equivClasses@ above are just to cut to the point +more quickly... + +@runs@ groups a list into a list of lists, each sublist being a run of +identical elements of the input list. It is passed a predicate @p@ which +tells when two elements are equal. + +\begin{code} +runs :: (a -> a -> Bool) -- Equality + -> [a] + -> [[a]] + +runs p [] = [] +runs p (x:xs) = case (span (p x) xs) of + (first, rest) -> (x:first) : (runs p rest) +\end{code} + +\begin{code} +removeDups :: (a -> a -> Ordering) -- Comparison function + -> [a] + -> ([a], -- List with no duplicates + [[a]]) -- List of duplicate groups. One representative from + -- each group appears in the first result + +removeDups cmp [] = ([], []) +removeDups cmp [x] = ([x],[]) +removeDups cmp xs + = case (mapAccumR collect_dups [] (equivClasses cmp xs)) of { (dups, xs') -> + (xs', dups) } + where + collect_dups dups_so_far [x] = (dups_so_far, x) + collect_dups dups_so_far dups@(x:xs) = (dups:dups_so_far, x) + +findDupsEq :: (a->a->Bool) -> [a] -> [[a]] +findDupsEq eq [] = [] +findDupsEq eq (x:xs) | null eq_xs = findDupsEq eq xs + | otherwise = (x:eq_xs) : findDupsEq eq neq_xs + where + (eq_xs, neq_xs) = partition (eq x) xs +\end{code} + + +\begin{code} +equivClassesByUniq :: (a -> Unique) -> [a] -> [[a]] + -- NB: it's *very* important that if we have the input list [a,b,c], + -- where a,b,c all have the same unique, then we get back the list + -- [a,b,c] + -- not + -- [c,b,a] + -- Hence the use of foldr, plus the reversed-args tack_on below +equivClassesByUniq get_uniq xs + = eltsUFM (foldr add emptyUFM xs) + where + add a ufm = addToUFM_C tack_on ufm (get_uniq a) [a] + tack_on old new = new++old +\end{code} + + diff --git a/compiler/utils/Maybes.lhs b/compiler/utils/Maybes.lhs new file mode 100644 index 0000000000..3c9bd693e6 --- /dev/null +++ b/compiler/utils/Maybes.lhs @@ -0,0 +1,123 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +\section[Maybes]{The `Maybe' types and associated utility functions} + +\begin{code} +module Maybes ( + module Maybe, -- Re-export all of Maybe + + MaybeErr(..), -- Instance of Monad + failME, + + orElse, + mapCatMaybes, + allMaybes, + firstJust, + expectJust, + maybeToBool, + + thenMaybe, seqMaybe, returnMaybe, failMaybe + ) where + +#include "HsVersions.h" + +import Maybe + + +infixr 4 `orElse` +\end{code} + +%************************************************************************ +%* * +\subsection[Maybe type]{The @Maybe@ type} +%* * +%************************************************************************ + +\begin{code} +maybeToBool :: Maybe a -> Bool +maybeToBool Nothing = False +maybeToBool (Just x) = True +\end{code} + +@catMaybes@ takes a list of @Maybe@s and returns a list of +the contents of all the @Just@s in it. @allMaybes@ collects +a list of @Justs@ into a single @Just@, returning @Nothing@ if there +are any @Nothings@. + +\begin{code} +allMaybes :: [Maybe a] -> Maybe [a] +allMaybes [] = Just [] +allMaybes (Nothing : ms) = Nothing +allMaybes (Just x : ms) = case (allMaybes ms) of + Nothing -> Nothing + Just xs -> Just (x:xs) + +\end{code} + +@firstJust@ takes a list of @Maybes@ and returns the +first @Just@ if there is one, or @Nothing@ otherwise. + +\begin{code} +firstJust :: [Maybe a] -> Maybe a +firstJust [] = Nothing +firstJust (Just x : ms) = Just x +firstJust (Nothing : ms) = firstJust ms +\end{code} + +\begin{code} +expectJust :: String -> Maybe a -> a +{-# INLINE expectJust #-} +expectJust err (Just x) = x +expectJust err Nothing = error ("expectJust " ++ err) +\end{code} + +\begin{code} +mapCatMaybes :: (a -> Maybe b) -> [a] -> [b] +mapCatMaybes f [] = [] +mapCatMaybes f (x:xs) = case f x of + Just y -> y : mapCatMaybes f xs + Nothing -> mapCatMaybes f xs +\end{code} + +The Maybe monad +~~~~~~~~~~~~~~~ +\begin{code} +seqMaybe :: Maybe a -> Maybe a -> Maybe a +seqMaybe (Just x) _ = Just x +seqMaybe Nothing my = my + +thenMaybe :: Maybe a -> (a -> Maybe b) -> Maybe b +thenMaybe ma mb = case ma of + Just x -> mb x + Nothing -> Nothing + +returnMaybe :: a -> Maybe a +returnMaybe = Just + +failMaybe :: Maybe a +failMaybe = Nothing + +orElse :: Maybe a -> a -> a +(Just x) `orElse` y = x +Nothing `orElse` y = y +\end{code} + + +%************************************************************************ +%* * +\subsection[MaybeErr type]{The @MaybeErr@ type} +%* * +%************************************************************************ + +\begin{code} +data MaybeErr err val = Succeeded val | Failed err + +instance Monad (MaybeErr err) where + return v = Succeeded v + Succeeded v >>= k = k v + Failed e >>= k = Failed e + +failME :: err -> MaybeErr err val +failME e = Failed e +\end{code} diff --git a/compiler/utils/OrdList.lhs b/compiler/utils/OrdList.lhs new file mode 100644 index 0000000000..7f22b38e49 --- /dev/null +++ b/compiler/utils/OrdList.lhs @@ -0,0 +1,83 @@ +% +% (c) The AQUA Project, Glasgow University, 1993-1998 +% + +This is useful, general stuff for the Native Code Generator. + +Provide trees (of instructions), so that lists of instructions +can be appended in linear time. + +\begin{code} +module OrdList ( + OrdList, + nilOL, isNilOL, unitOL, appOL, consOL, snocOL, concatOL, + fromOL, toOL, foldrOL, foldlOL +) where + +infixl 5 `appOL` +infixl 5 `snocOL` +infixr 5 `consOL` + +data OrdList a + = Many [a] + | Two (OrdList a) (OrdList a) + | One a + | None + +nilOL :: OrdList a +isNilOL :: OrdList a -> Bool + +unitOL :: a -> OrdList a +snocOL :: OrdList a -> a -> OrdList a +consOL :: a -> OrdList a -> OrdList a +appOL :: OrdList a -> OrdList a -> OrdList a +concatOL :: [OrdList a] -> OrdList a + +nilOL = None +unitOL as = One as +snocOL as b = Two as (One b) +consOL a bs = Two (One a) bs +concatOL aas = foldr Two None aas + +isNilOL None = True +isNilOL (One _) = False +isNilOL (Two as bs) = isNilOL as && isNilOL bs +isNilOL (Many xs) = null xs + +appOL None bs = bs +appOL as None = as +appOL as bs = Two as bs + +mapOL :: (a -> b) -> OrdList a -> OrdList b +mapOL f None = None +mapOL f (One x) = One (f x) +mapOL f (Two x y) = Two (mapOL f x) (mapOL f y) +mapOL f (Many xs) = Many (map f xs) + +instance Functor OrdList where + fmap = mapOL + +foldrOL :: (a->b->b) -> b -> OrdList a -> b +foldrOL k z None = z +foldrOL k z (One x) = k x z +foldrOL k z (Two b1 b2) = foldrOL k (foldrOL k z b2) b1 +foldrOL k z (Many xs) = foldr k z xs + +foldlOL :: (b->a->b) -> b -> OrdList a -> b +foldlOL k z None = z +foldlOL k z (One x) = k z x +foldlOL k z (Two b1 b2) = foldlOL k (foldlOL k z b1) b2 +foldlOL k z (Many xs) = foldl k z xs + +fromOL :: OrdList a -> [a] +fromOL ol + = flat ol [] + where + flat None rest = rest + flat (One x) rest = x:rest + flat (Two a b) rest = flat a (flat b rest) + flat (Many xs) rest = xs ++ rest + +toOL :: [a] -> OrdList a +toOL xs = Many xs +\end{code} diff --git a/compiler/utils/Outputable.lhs b/compiler/utils/Outputable.lhs new file mode 100644 index 0000000000..cf99e12bcf --- /dev/null +++ b/compiler/utils/Outputable.lhs @@ -0,0 +1,540 @@ +% +% (c) The GRASP Project, Glasgow University, 1992-1998 +% +\section[Outputable]{Classes for pretty-printing} + +Defines classes for pretty-printing and forcing, both forms of +``output.'' + +\begin{code} + +module Outputable ( + Outputable(..), OutputableBndr(..), -- Class + + BindingSite(..), + + PprStyle, CodeStyle(..), PrintUnqualified, alwaysQualify, + getPprStyle, withPprStyle, withPprStyleDoc, pprDeeper, pprSetDepth, + codeStyle, userStyle, debugStyle, dumpStyle, asmStyle, + ifPprDebug, unqualStyle, + mkErrStyle, defaultErrStyle, defaultDumpStyle, defaultUserStyle, + + SDoc, -- Abstract + docToSDoc, + interppSP, interpp'SP, pprQuotedList, pprWithCommas, + empty, nest, + text, char, ftext, ptext, + int, integer, float, double, rational, + parens, brackets, braces, quotes, doubleQuotes, angleBrackets, + semi, comma, colon, dcolon, space, equals, dot, arrow, + lparen, rparen, lbrack, rbrack, lbrace, rbrace, underscore, + (<>), (<+>), hcat, hsep, + ($$), ($+$), vcat, + sep, cat, + fsep, fcat, + hang, punctuate, + speakNth, speakNTimes, speakN, speakNOf, plural, + + printSDoc, printErrs, printDump, + printForC, printForAsm, printForUser, + pprCode, mkCodeStyle, + showSDoc, showSDocForUser, showSDocDebug, showSDocDump, + showSDocUnqual, showsPrecSDoc, + pprHsChar, pprHsString, + + -- error handling + pprPanic, assertPprPanic, pprPanic#, pprPgmError, + pprTrace, warnPprTrace, + trace, pgmError, panic, panic#, assertPanic + ) where + +#include "HsVersions.h" + + +import {-# SOURCE #-} Module( Module ) +import {-# SOURCE #-} OccName( OccName ) + +import StaticFlags ( opt_PprStyle_Debug, opt_PprUserLength ) +import PackageConfig ( PackageId, packageIdString ) +import FastString +import qualified Pretty +import Pretty ( Doc, Mode(..) ) +import Panic + +import DATA_WORD ( Word32 ) + +import IO ( Handle, stderr, stdout, hFlush ) +import Char ( ord ) +\end{code} + + +%************************************************************************ +%* * +\subsection{The @PprStyle@ data type} +%* * +%************************************************************************ + +\begin{code} +data PprStyle + = PprUser PrintUnqualified Depth + -- Pretty-print in a way that will make sense to the + -- ordinary user; must be very close to Haskell + -- syntax, etc. + -- Assumes printing tidied code: non-system names are + -- printed without uniques. + + | PprCode CodeStyle + -- Print code; either C or assembler + + | PprDump -- For -ddump-foo; less verbose than PprDebug. + -- Does not assume tidied code: non-external names + -- are printed with uniques. + + | PprDebug -- Full debugging output + +data CodeStyle = CStyle -- The format of labels differs for C and assembler + | AsmStyle + +data Depth = AllTheWay + | PartWay Int -- 0 => stop + + +type PrintUnqualified = Module -> OccName -> Bool + -- This function tells when it's ok to print + -- a (Global) name unqualified + +alwaysQualify,neverQualify :: PrintUnqualified +alwaysQualify m n = False +neverQualify m n = True + +defaultUserStyle = mkUserStyle alwaysQualify AllTheWay + +defaultDumpStyle | opt_PprStyle_Debug = PprDebug + | otherwise = PprDump + +mkErrStyle :: PrintUnqualified -> PprStyle +-- Style for printing error messages +mkErrStyle print_unqual = mkUserStyle print_unqual (PartWay opt_PprUserLength) + +defaultErrStyle :: PprStyle +-- Default style for error messages +-- It's a bit of a hack because it doesn't take into account what's in scope +-- Only used for desugarer warnings, and typechecker errors in interface sigs +defaultErrStyle + | opt_PprStyle_Debug = mkUserStyle alwaysQualify AllTheWay + | otherwise = mkUserStyle alwaysQualify (PartWay opt_PprUserLength) + +mkUserStyle unqual depth | opt_PprStyle_Debug = PprDebug + | otherwise = PprUser unqual depth +\end{code} + +Orthogonal to the above printing styles are (possibly) some +command-line flags that affect printing (often carried with the +style). The most likely ones are variations on how much type info is +shown. + +The following test decides whether or not we are actually generating +code (either C or assembly), or generating interface files. + +%************************************************************************ +%* * +\subsection{The @SDoc@ data type} +%* * +%************************************************************************ + +\begin{code} +type SDoc = PprStyle -> Doc + +withPprStyle :: PprStyle -> SDoc -> SDoc +withPprStyle sty d sty' = d sty + +withPprStyleDoc :: PprStyle -> SDoc -> Doc +withPprStyleDoc sty d = d sty + +pprDeeper :: SDoc -> SDoc +pprDeeper d (PprUser unqual (PartWay 0)) = Pretty.text "..." +pprDeeper d (PprUser unqual (PartWay n)) = d (PprUser unqual (PartWay (n-1))) +pprDeeper d other_sty = d other_sty + +pprSetDepth :: Int -> SDoc -> SDoc +pprSetDepth n d (PprUser unqual _) = d (PprUser unqual (PartWay n)) +pprSetDepth n d other_sty = d other_sty + +getPprStyle :: (PprStyle -> SDoc) -> SDoc +getPprStyle df sty = df sty sty +\end{code} + +\begin{code} +unqualStyle :: PprStyle -> PrintUnqualified +unqualStyle (PprUser unqual _) m n = unqual m n +unqualStyle other m n = False + +codeStyle :: PprStyle -> Bool +codeStyle (PprCode _) = True +codeStyle _ = False + +asmStyle :: PprStyle -> Bool +asmStyle (PprCode AsmStyle) = True +asmStyle other = False + +dumpStyle :: PprStyle -> Bool +dumpStyle PprDump = True +dumpStyle other = False + +debugStyle :: PprStyle -> Bool +debugStyle PprDebug = True +debugStyle other = False + +userStyle :: PprStyle -> Bool +userStyle (PprUser _ _) = True +userStyle other = False + +ifPprDebug :: SDoc -> SDoc -- Empty for non-debug style +ifPprDebug d sty@PprDebug = d sty +ifPprDebug d sty = Pretty.empty +\end{code} + +\begin{code} +-- Unused [7/02 sof] +printSDoc :: SDoc -> PprStyle -> IO () +printSDoc d sty = do + Pretty.printDoc PageMode stdout (d sty) + hFlush stdout + +-- I'm not sure whether the direct-IO approach of Pretty.printDoc +-- above is better or worse than the put-big-string approach here +printErrs :: Doc -> IO () +printErrs doc = do Pretty.printDoc PageMode stderr doc + hFlush stderr + +printDump :: SDoc -> IO () +printDump doc = do + Pretty.printDoc PageMode stdout (better_doc defaultDumpStyle) + hFlush stdout + where + better_doc = doc $$ text "" + +printForUser :: Handle -> PrintUnqualified -> SDoc -> IO () +printForUser handle unqual doc + = Pretty.printDoc PageMode handle (doc (mkUserStyle unqual AllTheWay)) + +-- printForC, printForAsm do what they sound like +printForC :: Handle -> SDoc -> IO () +printForC handle doc = Pretty.printDoc LeftMode handle (doc (PprCode CStyle)) + +printForAsm :: Handle -> SDoc -> IO () +printForAsm handle doc = Pretty.printDoc LeftMode handle (doc (PprCode AsmStyle)) + +pprCode :: CodeStyle -> SDoc -> SDoc +pprCode cs d = withPprStyle (PprCode cs) d + +mkCodeStyle :: CodeStyle -> PprStyle +mkCodeStyle = PprCode + +-- Can't make SDoc an instance of Show because SDoc is just a function type +-- However, Doc *is* an instance of Show +-- showSDoc just blasts it out as a string +showSDoc :: SDoc -> String +showSDoc d = show (d defaultUserStyle) + +showSDocForUser :: PrintUnqualified -> SDoc -> String +showSDocForUser unqual doc = show (doc (mkUserStyle unqual AllTheWay)) + +showSDocUnqual :: SDoc -> String +-- Only used in the gruesome HsExpr.isOperator +showSDocUnqual d = show (d (mkUserStyle neverQualify AllTheWay)) + +showsPrecSDoc :: Int -> SDoc -> ShowS +showsPrecSDoc p d = showsPrec p (d defaultUserStyle) + +showSDocDump :: SDoc -> String +showSDocDump d = show (d PprDump) + +showSDocDebug :: SDoc -> String +showSDocDebug d = show (d PprDebug) +\end{code} + +\begin{code} +docToSDoc :: Doc -> SDoc +docToSDoc d = \_ -> d + +empty sty = Pretty.empty +text s sty = Pretty.text s +char c sty = Pretty.char c +ftext s sty = Pretty.ftext s +ptext s sty = Pretty.ptext s +int n sty = Pretty.int n +integer n sty = Pretty.integer n +float n sty = Pretty.float n +double n sty = Pretty.double n +rational n sty = Pretty.rational n + +parens d sty = Pretty.parens (d sty) +braces d sty = Pretty.braces (d sty) +brackets d sty = Pretty.brackets (d sty) +doubleQuotes d sty = Pretty.doubleQuotes (d sty) +angleBrackets d = char '<' <> d <> char '>' + +-- quotes encloses something in single quotes... +-- but it omits them if the thing ends in a single quote +-- so that we don't get `foo''. Instead we just have foo'. +quotes d sty = case show pp_d of + ('\'' : _) -> pp_d + other -> Pretty.quotes pp_d + where + pp_d = d sty + +semi sty = Pretty.semi +comma sty = Pretty.comma +colon sty = Pretty.colon +equals sty = Pretty.equals +space sty = Pretty.space +lparen sty = Pretty.lparen +rparen sty = Pretty.rparen +lbrack sty = Pretty.lbrack +rbrack sty = Pretty.rbrack +lbrace sty = Pretty.lbrace +rbrace sty = Pretty.rbrace +dcolon sty = Pretty.ptext SLIT("::") +arrow sty = Pretty.ptext SLIT("->") +underscore = char '_' +dot = char '.' + +nest n d sty = Pretty.nest n (d sty) +(<>) d1 d2 sty = (Pretty.<>) (d1 sty) (d2 sty) +(<+>) d1 d2 sty = (Pretty.<+>) (d1 sty) (d2 sty) +($$) d1 d2 sty = (Pretty.$$) (d1 sty) (d2 sty) +($+$) d1 d2 sty = (Pretty.$+$) (d1 sty) (d2 sty) + +hcat ds sty = Pretty.hcat [d sty | d <- ds] +hsep ds sty = Pretty.hsep [d sty | d <- ds] +vcat ds sty = Pretty.vcat [d sty | d <- ds] +sep ds sty = Pretty.sep [d sty | d <- ds] +cat ds sty = Pretty.cat [d sty | d <- ds] +fsep ds sty = Pretty.fsep [d sty | d <- ds] +fcat ds sty = Pretty.fcat [d sty | d <- ds] + +hang d1 n d2 sty = Pretty.hang (d1 sty) n (d2 sty) + +punctuate :: SDoc -> [SDoc] -> [SDoc] +punctuate p [] = [] +punctuate p (d:ds) = go d ds + where + go d [] = [d] + go d (e:es) = (d <> p) : go e es +\end{code} + + +%************************************************************************ +%* * +\subsection[Outputable-class]{The @Outputable@ class} +%* * +%************************************************************************ + +\begin{code} +class Outputable a where + ppr :: a -> SDoc +\end{code} + +\begin{code} +instance Outputable Bool where + ppr True = ptext SLIT("True") + ppr False = ptext SLIT("False") + +instance Outputable Int where + ppr n = int n + +instance Outputable () where + ppr _ = text "()" + +instance (Outputable a) => Outputable [a] where + ppr xs = brackets (fsep (punctuate comma (map ppr xs))) + +instance (Outputable a, Outputable b) => Outputable (a, b) where + ppr (x,y) = parens (sep [ppr x <> comma, ppr y]) + +instance Outputable a => Outputable (Maybe a) where + ppr Nothing = ptext SLIT("Nothing") + ppr (Just x) = ptext SLIT("Just") <+> ppr x + +-- ToDo: may not be used +instance (Outputable a, Outputable b, Outputable c) => Outputable (a, b, c) where + ppr (x,y,z) = + parens (sep [ppr x <> comma, + ppr y <> comma, + ppr z ]) + +instance (Outputable a, Outputable b, Outputable c, Outputable d) => + Outputable (a, b, c, d) where + ppr (x,y,z,w) = + parens (sep [ppr x <> comma, + ppr y <> comma, + ppr z <> comma, + ppr w]) + +instance Outputable FastString where + ppr fs = ftext fs -- Prints an unadorned string, + -- no double quotes or anything + +instance Outputable PackageId where + ppr pid = text (packageIdString pid) +\end{code} + + +%************************************************************************ +%* * +\subsection{The @OutputableBndr@ class} +%* * +%************************************************************************ + +When we print a binder, we often want to print its type too. +The @OutputableBndr@ class encapsulates this idea. + +@BindingSite@ is used to tell the thing that prints binder what +language construct is binding the identifier. This can be used +to decide how much info to print. + +\begin{code} +data BindingSite = LambdaBind | CaseBind | LetBind + +class Outputable a => OutputableBndr a where + pprBndr :: BindingSite -> a -> SDoc + pprBndr b x = ppr x +\end{code} + + + +%************************************************************************ +%* * +\subsection{Random printing helpers} +%* * +%************************************************************************ + +\begin{code} +-- We have 31-bit Chars and will simply use Show instances +-- of Char and String. + +pprHsChar :: Char -> SDoc +pprHsChar c | c > '\x10ffff' = char '\\' <> text (show (fromIntegral (ord c) :: Word32)) + | otherwise = text (show c) + +pprHsString :: FastString -> SDoc +pprHsString fs = text (show (unpackFS fs)) +\end{code} + + +%************************************************************************ +%* * +\subsection{Other helper functions} +%* * +%************************************************************************ + +\begin{code} +pprWithCommas :: (a -> SDoc) -> [a] -> SDoc +pprWithCommas pp xs = fsep (punctuate comma (map pp xs)) + +interppSP :: Outputable a => [a] -> SDoc +interppSP xs = sep (map ppr xs) + +interpp'SP :: Outputable a => [a] -> SDoc +interpp'SP xs = sep (punctuate comma (map ppr xs)) + +pprQuotedList :: Outputable a => [a] -> SDoc +-- [x,y,z] ==> `x', `y', `z' +pprQuotedList xs = hsep (punctuate comma (map (quotes . ppr) xs)) +\end{code} + + +%************************************************************************ +%* * +\subsection{Printing numbers verbally} +%* * +%************************************************************************ + +@speakNth@ converts an integer to a verbal index; eg 1 maps to +``first'' etc. + +\begin{code} +speakNth :: Int -> SDoc +speakNth 1 = ptext SLIT("first") +speakNth 2 = ptext SLIT("second") +speakNth 3 = ptext SLIT("third") +speakNth 4 = ptext SLIT("fourth") +speakNth 5 = ptext SLIT("fifth") +speakNth 6 = ptext SLIT("sixth") +speakNth n = hcat [ int n, text suffix ] + where + suffix | n <= 20 = "th" -- 11,12,13 are non-std + | last_dig == 1 = "st" + | last_dig == 2 = "nd" + | last_dig == 3 = "rd" + | otherwise = "th" + + last_dig = n `rem` 10 + +speakN :: Int -> SDoc +speakN 0 = ptext SLIT("none") -- E.g. "he has none" +speakN 1 = ptext SLIT("one") -- E.g. "he has one" +speakN 2 = ptext SLIT("two") +speakN 3 = ptext SLIT("three") +speakN 4 = ptext SLIT("four") +speakN 5 = ptext SLIT("five") +speakN 6 = ptext SLIT("six") +speakN n = int n + +speakNOf :: Int -> SDoc -> SDoc +speakNOf 0 d = ptext SLIT("no") <+> d <> char 's' -- E.g. "no arguments" +speakNOf 1 d = ptext SLIT("one") <+> d -- E.g. "one argument" +speakNOf n d = speakN n <+> d <> char 's' -- E.g. "three arguments" + +speakNTimes :: Int {- >=1 -} -> SDoc +speakNTimes t | t == 1 = ptext SLIT("once") + | t == 2 = ptext SLIT("twice") + | otherwise = speakN t <+> ptext SLIT("times") + +plural [x] = empty +plural xs = char 's' +\end{code} + + +%************************************************************************ +%* * +\subsection{Error handling} +%* * +%************************************************************************ + +\begin{code} +pprPanic, pprPgmError :: String -> SDoc -> a +pprTrace :: String -> SDoc -> a -> a +pprPanic = pprAndThen panic -- Throw an exn saying "bug in GHC" + +pprPgmError = pprAndThen pgmError -- Throw an exn saying "bug in pgm being compiled" + -- (used for unusual pgm errors) +pprTrace = pprAndThen trace + +pprPanic# heading pretty_msg = panic# (show (doc PprDebug)) + where + doc = text heading <+> pretty_msg + +pprAndThen :: (String -> a) -> String -> SDoc -> a +pprAndThen cont heading pretty_msg = cont (show (doc PprDebug)) + where + doc = sep [text heading, nest 4 pretty_msg] + +assertPprPanic :: String -> Int -> SDoc -> a +assertPprPanic file line msg + = panic (show (doc PprDebug)) + where + doc = sep [hsep[text "ASSERT failed! file", + text file, + text "line", int line], + msg] + +warnPprTrace :: Bool -> String -> Int -> SDoc -> a -> a +warnPprTrace False file line msg x = x +warnPprTrace True file line msg x + = trace (show (doc PprDebug)) x + where + doc = sep [hsep [text "WARNING: file", text file, text "line", int line], + msg] +\end{code} diff --git a/compiler/utils/Panic.lhs b/compiler/utils/Panic.lhs new file mode 100644 index 0000000000..1a74d5db32 --- /dev/null +++ b/compiler/utils/Panic.lhs @@ -0,0 +1,250 @@ +% +% (c) The GRASP Project, Glasgow University, 1992-2000 +% +\section{Panic error messages} + +Defines basic funtions for printing error messages. + +It's hard to put these functions anywhere else without causing +some unnecessary loops in the module dependency graph. + +\begin{code} +module Panic + ( + GhcException(..), showGhcException, ghcError, progName, + pgmError, + + panic, panic#, assertPanic, trace, + + Exception.Exception(..), showException, try, tryJust, tryMost, tryUser, + catchJust, ioErrors, throwTo, + + installSignalHandlers, interruptTargetThread + ) where + +#include "HsVersions.h" + +import Config +import FastTypes + +#ifndef mingw32_HOST_OS +# if __GLASGOW_HASKELL__ > 504 +import System.Posix.Signals +# else +import Posix ( Handler(Catch), installHandler, sigINT, sigQUIT ) +# endif /* GHC > 504 */ +#endif /* mingw32_HOST_OS */ + +#if defined(mingw32_HOST_OS) && __GLASGOW_HASKELL__ >= 603 +import GHC.ConsoleHandler +#endif + +# if __GLASGOW_HASKELL__ < 500 +import EXCEPTION ( raiseInThread ) +# else +import EXCEPTION ( throwTo ) +# endif /* GHC < 500 */ + +#if __GLASGOW_HASKELL__ > 408 +import EXCEPTION ( catchJust, tryJust, ioErrors ) +#endif + +import CONCURRENT ( myThreadId, MVar, ThreadId, withMVar, newEmptyMVar ) +import DYNAMIC +import qualified EXCEPTION as Exception +import TRACE ( trace ) +import UNSAFE_IO ( unsafePerformIO ) +import IO ( isUserError ) + +import System +\end{code} + +GHC's own exception type. + +\begin{code} +ghcError :: GhcException -> a +ghcError e = Exception.throwDyn e + +-- error messages all take the form +-- +-- <location>: <error> +-- +-- If the location is on the command line, or in GHC itself, then +-- <location>="ghc". All of the error types below correspond to +-- a <location> of "ghc", except for ProgramError (where the string is +-- assumed to contain a location already, so we don't print one). + +data GhcException + = PhaseFailed String -- name of phase + ExitCode -- an external phase (eg. cpp) failed + | Interrupted -- someone pressed ^C + | UsageError String -- prints the short usage msg after the error + | CmdLineError String -- cmdline prob, but doesn't print usage + | Panic String -- the `impossible' happened + | InstallationError String -- an installation problem + | ProgramError String -- error in the user's code, probably + deriving Eq + +progName = unsafePerformIO (getProgName) +{-# NOINLINE progName #-} + +short_usage = "Usage: For basic information, try the `--help' option." + +showException :: Exception.Exception -> String +-- Show expected dynamic exceptions specially +showException (Exception.DynException d) | Just e <- fromDynamic d + = show (e::GhcException) +showException other_exn = show other_exn + +instance Show GhcException where + showsPrec _ e@(ProgramError _) = showGhcException e + showsPrec _ e = showString progName . showString ": " . showGhcException e + +showGhcException (UsageError str) + = showString str . showChar '\n' . showString short_usage +showGhcException (PhaseFailed phase code) + = showString "phase `" . showString phase . + showString "' failed (exitcode = " . shows int_code . + showString ")" + where + int_code = + case code of + ExitSuccess -> (0::Int) + ExitFailure x -> x +showGhcException (CmdLineError str) + = showString str +showGhcException (ProgramError str) + = showString str +showGhcException (InstallationError str) + = showString str +showGhcException (Interrupted) + = showString "interrupted" +showGhcException (Panic s) + = showString ("panic! (the 'impossible' happened)\n" + ++ " (GHC version " ++ cProjectVersion ++ " for " ++ TargetPlatform_NAME ++ "):\n\t" + ++ s ++ "\n\n" + ++ "Please report this as a GHC bug: http://www.haskell.org/ghc/reportabug\n") + +#if __GLASGOW_HASKELL__ < 603 +myMkTyConApp = mkAppTy +#else +myMkTyConApp = mkTyConApp +#endif + +ghcExceptionTc = mkTyCon "GhcException" +{-# NOINLINE ghcExceptionTc #-} +instance Typeable GhcException where + typeOf _ = myMkTyConApp ghcExceptionTc [] +\end{code} + +Panics and asserts. + +\begin{code} +panic, pgmError :: String -> a +panic x = Exception.throwDyn (Panic x) +pgmError x = Exception.throwDyn (ProgramError x) + +-- #-versions because panic can't return an unboxed int, and that's +-- what TAG_ is with GHC at the moment. Ugh. (Simon) +-- No, man -- Too Beautiful! (Will) + +panic# :: String -> FastInt +panic# s = case (panic s) of () -> _ILIT 0 + +assertPanic :: String -> Int -> a +assertPanic file line = + Exception.throw (Exception.AssertionFailed + ("ASSERT failed! file " ++ file ++ ", line " ++ show line)) +\end{code} + +\begin{code} +-- | tryMost is like try, but passes through Interrupted and Panic +-- exceptions. Used when we want soft failures when reading interface +-- files, for example. + +tryMost :: IO a -> IO (Either Exception.Exception a) +tryMost action = do r <- try action; filter r + where + filter (Left e@(Exception.DynException d)) + | Just ghc_ex <- fromDynamic d + = case ghc_ex of + Interrupted -> Exception.throw e + Panic _ -> Exception.throw e + _other -> return (Left e) + filter other + = return other + +-- | tryUser is like try, but catches only UserErrors. +-- These are the ones that are thrown by the TcRn monad +-- to signal an error in the program being compiled +tryUser :: IO a -> IO (Either Exception.Exception a) +tryUser action = tryJust tc_errors action + where +#if __GLASGOW_HASKELL__ > 504 || __GLASGOW_HASKELL__ < 500 + tc_errors e@(Exception.IOException ioe) | isUserError ioe = Just e +#elif __GLASGOW_HASKELL__ == 502 + tc_errors e@(UserError _) = Just e +#else + tc_errors e@(Exception.IOException ioe) | isUserError e = Just e +#endif + tc_errors _other = Nothing +\end{code} + +Compatibility stuff: + +\begin{code} +#if __GLASGOW_HASKELL__ <= 408 +try = Exception.tryAllIO +#else +try = Exception.try +#endif + +#if __GLASGOW_HASKELL__ <= 408 +catchJust = Exception.catchIO +tryJust = Exception.tryIO +ioErrors = Exception.justIoErrors +throwTo = Exception.raiseInThread +#endif +\end{code} + +Standard signal handlers for catching ^C, which just throw an +exception in the target thread. The current target thread is +the thread at the head of the list in the MVar passed to +installSignalHandlers. + +\begin{code} +installSignalHandlers :: IO () +installSignalHandlers = do + let + interrupt_exn = Exception.DynException (toDyn Interrupted) + + interrupt = do + withMVar interruptTargetThread $ \targets -> + case targets of + [] -> return () + (thread:_) -> throwTo thread interrupt_exn + -- +#if !defined(mingw32_HOST_OS) + installHandler sigQUIT (Catch interrupt) Nothing + installHandler sigINT (Catch interrupt) Nothing + return () +#elif __GLASGOW_HASKELL__ >= 603 + -- GHC 6.3+ has support for console events on Windows + -- NOTE: running GHCi under a bash shell for some reason requires + -- you to press Ctrl-Break rather than Ctrl-C to provoke + -- an interrupt. Ctrl-C is getting blocked somewhere, I don't know + -- why --SDM 17/12/2004 + let sig_handler ControlC = interrupt + sig_handler Break = interrupt + sig_handler _ = return () + + installHandler (Catch sig_handler) + return () +#else + return () -- nothing +#endif + +{-# NOINLINE interruptTargetThread #-} +interruptTargetThread :: MVar [ThreadId] +interruptTargetThread = unsafePerformIO newEmptyMVar +\end{code} diff --git a/compiler/utils/Pretty.lhs b/compiler/utils/Pretty.lhs new file mode 100644 index 0000000000..ec8f1e75ad --- /dev/null +++ b/compiler/utils/Pretty.lhs @@ -0,0 +1,1075 @@ +********************************************************************************* +* * +* John Hughes's and Simon Peyton Jones's Pretty Printer Combinators * +* * +* based on "The Design of a Pretty-printing Library" * +* in Advanced Functional Programming, * +* Johan Jeuring and Erik Meijer (eds), LNCS 925 * +* http://www.cs.chalmers.se/~rjmh/Papers/pretty.ps * +* * +* Heavily modified by Simon Peyton Jones, Dec 96 * +* * +********************************************************************************* + +Version 3.0 28 May 1997 + * Cured massive performance bug. If you write + + foldl <> empty (map (text.show) [1..10000]) + + you get quadratic behaviour with V2.0. Why? For just the same reason as you get + quadratic behaviour with left-associated (++) chains. + + This is really bad news. One thing a pretty-printer abstraction should + certainly guarantee is insensivity to associativity. It matters: suddenly + GHC's compilation times went up by a factor of 100 when I switched to the + new pretty printer. + + I fixed it with a bit of a hack (because I wanted to get GHC back on the + road). I added two new constructors to the Doc type, Above and Beside: + + <> = Beside + $$ = Above + + Then, where I need to get to a "TextBeside" or "NilAbove" form I "force" + the Doc to squeeze out these suspended calls to Beside and Above; but in so + doing I re-associate. It's quite simple, but I'm not satisfied that I've done + the best possible job. I'll send you the code if you are interested. + + * Added new exports: + punctuate, hang + int, integer, float, double, rational, + lparen, rparen, lbrack, rbrack, lbrace, rbrace, + + * fullRender's type signature has changed. Rather than producing a string it + now takes an extra couple of arguments that tells it how to glue fragments + of output together: + + fullRender :: Mode + -> Int -- Line length + -> Float -- Ribbons per line + -> (TextDetails -> a -> a) -- What to do with text + -> a -- What to do at the end + -> Doc + -> a -- Result + + The "fragments" are encapsulated in the TextDetails data type: + data TextDetails = Chr Char + | Str String + | PStr FastString + + The Chr and Str constructors are obvious enough. The PStr constructor has a packed + string (FastString) inside it. It's generated by using the new "ptext" export. + + An advantage of this new setup is that you can get the renderer to do output + directly (by passing in a function of type (TextDetails -> IO () -> IO ()), + rather than producing a string that you then print. + + +Version 2.0 24 April 1997 + * Made empty into a left unit for <> as well as a right unit; + it is also now true that + nest k empty = empty + which wasn't true before. + + * Fixed an obscure bug in sep that occassionally gave very wierd behaviour + + * Added $+$ + + * Corrected and tidied up the laws and invariants + +====================================================================== +Relative to John's original paper, there are the following new features: + +1. There's an empty document, "empty". It's a left and right unit for + both <> and $$, and anywhere in the argument list for + sep, hcat, hsep, vcat, fcat etc. + + It is Really Useful in practice. + +2. There is a paragraph-fill combinator, fsep, that's much like sep, + only it keeps fitting things on one line until itc can't fit any more. + +3. Some random useful extra combinators are provided. + <+> puts its arguments beside each other with a space between them, + unless either argument is empty in which case it returns the other + + + hcat is a list version of <> + hsep is a list version of <+> + vcat is a list version of $$ + + sep (separate) is either like hsep or like vcat, depending on what fits + + cat is behaves like sep, but it uses <> for horizontal conposition + fcat is behaves like fsep, but it uses <> for horizontal conposition + + These new ones do the obvious things: + char, semi, comma, colon, space, + parens, brackets, braces, + quotes, doubleQuotes + +4. The "above" combinator, $$, now overlaps its two arguments if the + last line of the top argument stops before the first line of the second begins. + For example: text "hi" $$ nest 5 "there" + lays out as + hi there + rather than + hi + there + + There are two places this is really useful + + a) When making labelled blocks, like this: + Left -> code for left + Right -> code for right + LongLongLongLabel -> + code for longlonglonglabel + The block is on the same line as the label if the label is + short, but on the next line otherwise. + + b) When laying out lists like this: + [ first + , second + , third + ] + which some people like. But if the list fits on one line + you want [first, second, third]. You can't do this with + John's original combinators, but it's quite easy with the + new $$. + + The combinator $+$ gives the original "never-overlap" behaviour. + +5. Several different renderers are provided: + * a standard one + * one that uses cut-marks to avoid deeply-nested documents + simply piling up in the right-hand margin + * one that ignores indentation (fewer chars output; good for machines) + * one that ignores indentation and newlines (ditto, only more so) + +6. Numerous implementation tidy-ups + Use of unboxed data types to speed up the implementation + + + +\begin{code} +module Pretty ( + Doc, -- Abstract + Mode(..), TextDetails(..), + + empty, isEmpty, nest, + + text, char, ftext, ptext, + int, integer, float, double, rational, + parens, brackets, braces, quotes, doubleQuotes, + semi, comma, colon, space, equals, + lparen, rparen, lbrack, rbrack, lbrace, rbrace, + + (<>), (<+>), hcat, hsep, + ($$), ($+$), vcat, + sep, cat, + fsep, fcat, + + hang, punctuate, + +-- renderStyle, -- Haskell 1.3 only + render, fullRender, printDoc, showDocWith + ) where + +#include "HsVersions.h" + +import BufWrite +import FastString + +import GLAEXTS + +import Numeric (fromRat) +import IO + +import System.IO ( hPutBuf ) + +import GHC.Base ( unpackCString# ) +import GHC.Ptr ( Ptr(..) ) + +-- Don't import Util( assertPanic ) because it makes a loop in the module structure + +infixl 6 <> +infixl 6 <+> +infixl 5 $$, $+$ +\end{code} + + + +********************************************************* +* * +\subsection{CPP magic so that we can compile with both GHC and Hugs} +* * +********************************************************* + +The library uses unboxed types to get a bit more speed, but these CPP macros +allow you to use either GHC or Hugs. To get GHC, just set the CPP variable + __GLASGOW_HASKELL__ + +\begin{code} + +#if defined(__GLASGOW_HASKELL__) + +-- Glasgow Haskell + +-- Disable ASSERT checks; they are expensive! +#define LOCAL_ASSERT(x) + +#define ILIT(x) (x#) +#define IBOX(x) (I# (x)) +#define INT Int# +#define MINUS -# +#define NEGATE negateInt# +#define PLUS +# +#define GR ># +#define GREQ >=# +#define LT <# +#define DIV `quotInt#` + + +#define SHOW Show +#define MAXINT maxBound + +#else + +-- Standard Haskell + +#define LOCAL_ASSERT(x) + +#define INT Int +#define IBOX(x) x +#define MINUS - +#define NEGATE negate +#define PLUS + +#define GR > +#define GREQ >= +#define LT < +#define DIV `quot` +#define ILIT(x) x + +#define SHOW Show +#define MAXINT maxBound + +#endif + +\end{code} + + +********************************************************* +* * +\subsection{The interface} +* * +********************************************************* + +The primitive @Doc@ values + +\begin{code} +empty :: Doc +isEmpty :: Doc -> Bool +text :: String -> Doc +char :: Char -> Doc + +semi, comma, colon, space, equals :: Doc +lparen, rparen, lbrack, rbrack, lbrace, rbrace :: Doc + +parens, brackets, braces :: Doc -> Doc +quotes, doubleQuotes :: Doc -> Doc + +int :: Int -> Doc +integer :: Integer -> Doc +float :: Float -> Doc +double :: Double -> Doc +rational :: Rational -> Doc +\end{code} + +Combining @Doc@ values + +\begin{code} +(<>) :: Doc -> Doc -> Doc -- Beside +hcat :: [Doc] -> Doc -- List version of <> +(<+>) :: Doc -> Doc -> Doc -- Beside, separated by space +hsep :: [Doc] -> Doc -- List version of <+> + +($$) :: Doc -> Doc -> Doc -- Above; if there is no + -- overlap it "dovetails" the two +vcat :: [Doc] -> Doc -- List version of $$ + +cat :: [Doc] -> Doc -- Either hcat or vcat +sep :: [Doc] -> Doc -- Either hsep or vcat +fcat :: [Doc] -> Doc -- ``Paragraph fill'' version of cat +fsep :: [Doc] -> Doc -- ``Paragraph fill'' version of sep + +nest :: Int -> Doc -> Doc -- Nested +\end{code} + +GHC-specific ones. + +\begin{code} +hang :: Doc -> Int -> Doc -> Doc +punctuate :: Doc -> [Doc] -> [Doc] -- punctuate p [d1, ... dn] = [d1 <> p, d2 <> p, ... dn-1 <> p, dn] +\end{code} + +Displaying @Doc@ values. + +\begin{code} +instance SHOW Doc where + showsPrec prec doc cont = showDoc doc cont + +render :: Doc -> String -- Uses default style +fullRender :: Mode + -> Int -- Line length + -> Float -- Ribbons per line + -> (TextDetails -> a -> a) -- What to do with text + -> a -- What to do at the end + -> Doc + -> a -- Result + +{- When we start using 1.3 +renderStyle :: Style -> Doc -> String +data Style = Style { lineLength :: Int, -- In chars + ribbonsPerLine :: Float, -- Ratio of ribbon length to line length + mode :: Mode + } +style :: Style -- The default style +style = Style { lineLength = 100, ribbonsPerLine = 2.5, mode = PageMode } +-} + +data Mode = PageMode -- Normal + | ZigZagMode -- With zig-zag cuts + | LeftMode -- No indentation, infinitely long lines + | OneLineMode -- All on one line + +\end{code} + + +********************************************************* +* * +\subsection{The @Doc@ calculus} +* * +********************************************************* + +The @Doc@ combinators satisfy the following laws: +\begin{verbatim} +Laws for $$ +~~~~~~~~~~~ +<a1> (x $$ y) $$ z = x $$ (y $$ z) +<a2> empty $$ x = x +<a3> x $$ empty = x + + ...ditto $+$... + +Laws for <> +~~~~~~~~~~~ +<b1> (x <> y) <> z = x <> (y <> z) +<b2> empty <> x = empty +<b3> x <> empty = x + + ...ditto <+>... + +Laws for text +~~~~~~~~~~~~~ +<t1> text s <> text t = text (s++t) +<t2> text "" <> x = x, if x non-empty + +Laws for nest +~~~~~~~~~~~~~ +<n1> nest 0 x = x +<n2> nest k (nest k' x) = nest (k+k') x +<n3> nest k (x <> y) = nest k z <> nest k y +<n4> nest k (x $$ y) = nest k x $$ nest k y +<n5> nest k empty = empty +<n6> x <> nest k y = x <> y, if x non-empty + +** Note the side condition on <n6>! It is this that +** makes it OK for empty to be a left unit for <>. + +Miscellaneous +~~~~~~~~~~~~~ +<m1> (text s <> x) $$ y = text s <> ((text "" <> x)) $$ + nest (-length s) y) + +<m2> (x $$ y) <> z = x $$ (y <> z) + if y non-empty + + +Laws for list versions +~~~~~~~~~~~~~~~~~~~~~~ +<l1> sep (ps++[empty]++qs) = sep (ps ++ qs) + ...ditto hsep, hcat, vcat, fill... + +<l2> nest k (sep ps) = sep (map (nest k) ps) + ...ditto hsep, hcat, vcat, fill... + +Laws for oneLiner +~~~~~~~~~~~~~~~~~ +<o1> oneLiner (nest k p) = nest k (oneLiner p) +<o2> oneLiner (x <> y) = oneLiner x <> oneLiner y +\end{verbatim} + + +You might think that the following verion of <m1> would +be neater: +\begin{verbatim} +<3 NO> (text s <> x) $$ y = text s <> ((empty <> x)) $$ + nest (-length s) y) +\end{verbatim} +But it doesn't work, for if x=empty, we would have +\begin{verbatim} + text s $$ y = text s <> (empty $$ nest (-length s) y) + = text s <> nest (-length s) y +\end{verbatim} + + + +********************************************************* +* * +\subsection{Simple derived definitions} +* * +********************************************************* + +\begin{code} +semi = char ';' +colon = char ':' +comma = char ',' +space = char ' ' +equals = char '=' +lparen = char '(' +rparen = char ')' +lbrack = char '[' +rbrack = char ']' +lbrace = char '{' +rbrace = char '}' + +int n = text (show n) +integer n = text (show n) +float n = text (show n) +double n = text (show n) +rational n = text (show (fromRat n)) +--rational n = text (show (fromRationalX n)) -- _showRational 30 n) + +quotes p = char '`' <> p <> char '\'' +doubleQuotes p = char '"' <> p <> char '"' +parens p = char '(' <> p <> char ')' +brackets p = char '[' <> p <> char ']' +braces p = char '{' <> p <> char '}' + + +hcat = foldr (<>) empty +hsep = foldr (<+>) empty +vcat = foldr ($$) empty + +hang d1 n d2 = sep [d1, nest n d2] + +punctuate p [] = [] +punctuate p (d:ds) = go d ds + where + go d [] = [d] + go d (e:es) = (d <> p) : go e es +\end{code} + + +********************************************************* +* * +\subsection{The @Doc@ data type} +* * +********************************************************* + +A @Doc@ represents a {\em set} of layouts. A @Doc@ with +no occurrences of @Union@ or @NoDoc@ represents just one layout. +\begin{code} +data Doc + = Empty -- empty + | NilAbove Doc -- text "" $$ x + | TextBeside !TextDetails INT Doc -- text s <> x + | Nest INT Doc -- nest k x + | Union Doc Doc -- ul `union` ur + | NoDoc -- The empty set of documents + | Beside Doc Bool Doc -- True <=> space between + | Above Doc Bool Doc -- True <=> never overlap + +type RDoc = Doc -- RDoc is a "reduced Doc", guaranteed not to have a top-level Above or Beside + + +reduceDoc :: Doc -> RDoc +reduceDoc (Beside p g q) = beside p g (reduceDoc q) +reduceDoc (Above p g q) = above p g (reduceDoc q) +reduceDoc p = p + + +data TextDetails = Chr {-#UNPACK#-}!Char + | Str String + | PStr FastString -- a hashed string + | LStr Addr# Int# -- a '\0'-terminated array of bytes + +space_text = Chr ' ' +nl_text = Chr '\n' +\end{code} + +Here are the invariants: +\begin{itemize} +\item +The argument of @NilAbove@ is never @Empty@. Therefore +a @NilAbove@ occupies at least two lines. + +\item +The arugment of @TextBeside@ is never @Nest@. + +\item +The layouts of the two arguments of @Union@ both flatten to the same string. + +\item +The arguments of @Union@ are either @TextBeside@, or @NilAbove@. + +\item +The right argument of a union cannot be equivalent to the empty set (@NoDoc@). +If the left argument of a union is equivalent to the empty set (@NoDoc@), +then the @NoDoc@ appears in the first line. + +\item +An empty document is always represented by @Empty@. +It can't be hidden inside a @Nest@, or a @Union@ of two @Empty@s. + +\item +The first line of every layout in the left argument of @Union@ +is longer than the first line of any layout in the right argument. +(1) ensures that the left argument has a first line. In view of (3), +this invariant means that the right argument must have at least two +lines. +\end{itemize} + +\begin{code} + -- Arg of a NilAbove is always an RDoc +nilAbove_ p = LOCAL_ASSERT( ok p ) NilAbove p + where + ok Empty = False + ok other = True + + -- Arg of a TextBeside is always an RDoc +textBeside_ s sl p = TextBeside s sl (LOCAL_ASSERT( ok p ) p) + where + ok (Nest _ _) = False + ok other = True + + -- Arg of Nest is always an RDoc +nest_ k p = Nest k (LOCAL_ASSERT( ok p ) p) + where + ok Empty = False + ok other = True + + -- Args of union are always RDocs +union_ p q = Union (LOCAL_ASSERT( ok p ) p) (LOCAL_ASSERT( ok q ) q) + where + ok (TextBeside _ _ _) = True + ok (NilAbove _) = True + ok (Union _ _) = True + ok other = False +\end{code} + + +Notice the difference between + * NoDoc (no documents) + * Empty (one empty document; no height and no width) + * text "" (a document containing the empty string; + one line high, but has no width) + + + +********************************************************* +* * +\subsection{@empty@, @text@, @nest@, @union@} +* * +********************************************************* + +\begin{code} +empty = Empty + +isEmpty Empty = True +isEmpty _ = False + +char c = textBeside_ (Chr c) 1# Empty +text s = case length s of {IBOX(sl) -> textBeside_ (Str s) sl Empty} +ftext s = case lengthFS s of {IBOX(sl) -> textBeside_ (PStr s) sl Empty} +ptext (Ptr s) = case strLength (Ptr s) of {IBOX(sl) -> textBeside_ (LStr s sl) sl Empty} + +-- RULE that turns (text "abc") into (ptext (A# "abc"#)) to avoid the +-- intermediate packing/unpacking of the string. +{-# RULES + "text/str" forall a. text (unpackCString# a) = ptext (Ptr a) + #-} + +nest IBOX(k) p = mkNest k (reduceDoc p) -- Externally callable version + +-- mkNest checks for Nest's invariant that it doesn't have an Empty inside it +mkNest k (Nest k1 p) = mkNest (k PLUS k1) p +mkNest k NoDoc = NoDoc +mkNest k Empty = Empty +mkNest ILIT(0) p = p -- Worth a try! +mkNest k p = nest_ k p + +-- mkUnion checks for an empty document +mkUnion Empty q = Empty +mkUnion p q = p `union_` q +\end{code} + +********************************************************* +* * +\subsection{Vertical composition @$$@} +* * +********************************************************* + + +\begin{code} +p $$ q = Above p False q +p $+$ q = Above p True q + +above :: Doc -> Bool -> RDoc -> RDoc +above (Above p g1 q1) g2 q2 = above p g1 (above q1 g2 q2) +above p@(Beside _ _ _) g q = aboveNest (reduceDoc p) g ILIT(0) (reduceDoc q) +above p g q = aboveNest p g ILIT(0) (reduceDoc q) + +aboveNest :: RDoc -> Bool -> INT -> RDoc -> RDoc +-- Specfication: aboveNest p g k q = p $g$ (nest k q) + +aboveNest NoDoc g k q = NoDoc +aboveNest (p1 `Union` p2) g k q = aboveNest p1 g k q `union_` + aboveNest p2 g k q + +aboveNest Empty g k q = mkNest k q +aboveNest (Nest k1 p) g k q = nest_ k1 (aboveNest p g (k MINUS k1) q) + -- p can't be Empty, so no need for mkNest + +aboveNest (NilAbove p) g k q = nilAbove_ (aboveNest p g k q) +aboveNest (TextBeside s sl p) g k q = textBeside_ s sl rest + where + k1 = k MINUS sl + rest = case p of + Empty -> nilAboveNest g k1 q + other -> aboveNest p g k1 q +\end{code} + +\begin{code} +nilAboveNest :: Bool -> INT -> RDoc -> RDoc +-- Specification: text s <> nilaboveNest g k q +-- = text s <> (text "" $g$ nest k q) + +nilAboveNest g k Empty = Empty -- Here's why the "text s <>" is in the spec! +nilAboveNest g k (Nest k1 q) = nilAboveNest g (k PLUS k1) q + +nilAboveNest g k q | (not g) && (k GR ILIT(0)) -- No newline if no overlap + = textBeside_ (Str (spaces k)) k q + | otherwise -- Put them really above + = nilAbove_ (mkNest k q) +\end{code} + + +********************************************************* +* * +\subsection{Horizontal composition @<>@} +* * +********************************************************* + +\begin{code} +p <> q = Beside p False q +p <+> q = Beside p True q + +beside :: Doc -> Bool -> RDoc -> RDoc +-- Specification: beside g p q = p <g> q + +beside NoDoc g q = NoDoc +beside (p1 `Union` p2) g q = (beside p1 g q) `union_` (beside p2 g q) +beside Empty g q = q +beside (Nest k p) g q = nest_ k $! beside p g q -- p non-empty +beside p@(Beside p1 g1 q1) g2 q2 + {- (A `op1` B) `op2` C == A `op1` (B `op2` C) iff op1 == op2 + [ && (op1 == <> || op1 == <+>) ] -} + | g1 == g2 = beside p1 g1 $! beside q1 g2 q2 + | otherwise = beside (reduceDoc p) g2 q2 +beside p@(Above _ _ _) g q = let d = reduceDoc p in d `seq` beside d g q +beside (NilAbove p) g q = nilAbove_ $! beside p g q +beside (TextBeside s sl p) g q = textBeside_ s sl $! rest + where + rest = case p of + Empty -> nilBeside g q + other -> beside p g q +\end{code} + +\begin{code} +nilBeside :: Bool -> RDoc -> RDoc +-- Specification: text "" <> nilBeside g p +-- = text "" <g> p + +nilBeside g Empty = Empty -- Hence the text "" in the spec +nilBeside g (Nest _ p) = nilBeside g p +nilBeside g p | g = textBeside_ space_text ILIT(1) p + | otherwise = p +\end{code} + +********************************************************* +* * +\subsection{Separate, @sep@, Hughes version} +* * +********************************************************* + +\begin{code} +-- Specification: sep ps = oneLiner (hsep ps) +-- `union` +-- vcat ps + +sep = sepX True -- Separate with spaces +cat = sepX False -- Don't + +sepX x [] = empty +sepX x (p:ps) = sep1 x (reduceDoc p) ILIT(0) ps + + +-- Specification: sep1 g k ys = sep (x : map (nest k) ys) +-- = oneLiner (x <g> nest k (hsep ys)) +-- `union` x $$ nest k (vcat ys) + +sep1 :: Bool -> RDoc -> INT -> [Doc] -> RDoc +sep1 g NoDoc k ys = NoDoc +sep1 g (p `Union` q) k ys = sep1 g p k ys + `union_` + (aboveNest q False k (reduceDoc (vcat ys))) + +sep1 g Empty k ys = mkNest k (sepX g ys) +sep1 g (Nest n p) k ys = nest_ n (sep1 g p (k MINUS n) ys) + +sep1 g (NilAbove p) k ys = nilAbove_ (aboveNest p False k (reduceDoc (vcat ys))) +sep1 g (TextBeside s sl p) k ys = textBeside_ s sl (sepNB g p (k MINUS sl) ys) + +-- Specification: sepNB p k ys = sep1 (text "" <> p) k ys +-- Called when we have already found some text in the first item +-- We have to eat up nests + +sepNB g (Nest _ p) k ys = sepNB g p k ys + +sepNB g Empty k ys = oneLiner (nilBeside g (reduceDoc rest)) + `mkUnion` + nilAboveNest False k (reduceDoc (vcat ys)) + where + rest | g = hsep ys + | otherwise = hcat ys + +sepNB g p k ys = sep1 g p k ys +\end{code} + +********************************************************* +* * +\subsection{@fill@} +* * +********************************************************* + +\begin{code} +fsep = fill True +fcat = fill False + +-- Specification: +-- fill [] = empty +-- fill [p] = p +-- fill (p1:p2:ps) = oneLiner p1 <#> nest (length p1) +-- (fill (oneLiner p2 : ps)) +-- `union` +-- p1 $$ fill ps + +fill g [] = empty +fill g (p:ps) = fill1 g (reduceDoc p) ILIT(0) ps + + +fill1 :: Bool -> RDoc -> INT -> [Doc] -> Doc +fill1 g NoDoc k ys = NoDoc +fill1 g (p `Union` q) k ys = fill1 g p k ys + `union_` + (aboveNest q False k (fill g ys)) + +fill1 g Empty k ys = mkNest k (fill g ys) +fill1 g (Nest n p) k ys = nest_ n (fill1 g p (k MINUS n) ys) + +fill1 g (NilAbove p) k ys = nilAbove_ (aboveNest p False k (fill g ys)) +fill1 g (TextBeside s sl p) k ys = textBeside_ s sl (fillNB g p (k MINUS sl) ys) + +fillNB g (Nest _ p) k ys = fillNB g p k ys +fillNB g Empty k [] = Empty +fillNB g Empty k (y:ys) = nilBeside g (fill1 g (oneLiner (reduceDoc y)) k1 ys) + `mkUnion` + nilAboveNest False k (fill g (y:ys)) + where + k1 | g = k MINUS ILIT(1) + | otherwise = k + +fillNB g p k ys = fill1 g p k ys +\end{code} + + +********************************************************* +* * +\subsection{Selecting the best layout} +* * +********************************************************* + +\begin{code} +best :: Int -- Line length + -> Int -- Ribbon length + -> RDoc + -> RDoc -- No unions in here! + +best IBOX(w) IBOX(r) p + = get w p + where + get :: INT -- (Remaining) width of line + -> Doc -> Doc + get w Empty = Empty + get w NoDoc = NoDoc + get w (NilAbove p) = nilAbove_ (get w p) + get w (TextBeside s sl p) = textBeside_ s sl (get1 w sl p) + get w (Nest k p) = nest_ k (get (w MINUS k) p) + get w (p `Union` q) = nicest w r (get w p) (get w q) + + get1 :: INT -- (Remaining) width of line + -> INT -- Amount of first line already eaten up + -> Doc -- This is an argument to TextBeside => eat Nests + -> Doc -- No unions in here! + + get1 w sl Empty = Empty + get1 w sl NoDoc = NoDoc + get1 w sl (NilAbove p) = nilAbove_ (get (w MINUS sl) p) + get1 w sl (TextBeside t tl p) = textBeside_ t tl (get1 w (sl PLUS tl) p) + get1 w sl (Nest k p) = get1 w sl p + get1 w sl (p `Union` q) = nicest1 w r sl (get1 w sl p) + (get1 w sl q) + +nicest w r p q = nicest1 w r ILIT(0) p q +nicest1 w r sl p q | fits ((w `minn` r) MINUS sl) p = p + | otherwise = q + +fits :: INT -- Space available + -> Doc + -> Bool -- True if *first line* of Doc fits in space available + +fits n p | n LT ILIT(0) = False +fits n NoDoc = False +fits n Empty = True +fits n (NilAbove _) = True +fits n (TextBeside _ sl p) = fits (n MINUS sl) p + +minn x y | x LT y = x + | otherwise = y +\end{code} + +@first@ and @nonEmptySet@ are similar to @nicest@ and @fits@, only simpler. +@first@ returns its first argument if it is non-empty, otherwise its second. + +\begin{code} +first p q | nonEmptySet p = p + | otherwise = q + +nonEmptySet NoDoc = False +nonEmptySet (p `Union` q) = True +nonEmptySet Empty = True +nonEmptySet (NilAbove p) = True -- NoDoc always in first line +nonEmptySet (TextBeside _ _ p) = nonEmptySet p +nonEmptySet (Nest _ p) = nonEmptySet p +\end{code} + +@oneLiner@ returns the one-line members of the given set of @Doc@s. + +\begin{code} +oneLiner :: Doc -> Doc +oneLiner NoDoc = NoDoc +oneLiner Empty = Empty +oneLiner (NilAbove p) = NoDoc +oneLiner (TextBeside s sl p) = textBeside_ s sl (oneLiner p) +oneLiner (Nest k p) = nest_ k (oneLiner p) +oneLiner (p `Union` q) = oneLiner p +\end{code} + + + +********************************************************* +* * +\subsection{Displaying the best layout} +* * +********************************************************* + + +\begin{code} +{- +renderStyle Style{mode, lineLength, ribbonsPerLine} doc + = fullRender mode lineLength ribbonsPerLine doc "" +-} + +render doc = showDocWith PageMode doc +showDoc doc rest = showDocWithAppend PageMode doc rest + +showDocWithAppend :: Mode -> Doc -> String -> String +showDocWithAppend mode doc rest = fullRender mode 100 1.5 string_txt rest doc + +showDocWith :: Mode -> Doc -> String +showDocWith mode doc = showDocWithAppend mode doc "" + +string_txt (Chr c) s = c:s +string_txt (Str s1) s2 = s1 ++ s2 +string_txt (PStr s1) s2 = unpackFS s1 ++ s2 +string_txt (LStr s1 _) s2 = unpackLitString s1 ++ s2 + +unpackLitString addr = + unpack 0# + where + unpack nh + | ch `eqChar#` '\0'# = [] + | otherwise = C# ch : unpack (nh +# 1#) + where + ch = indexCharOffAddr# addr nh +\end{code} + +\begin{code} + +fullRender OneLineMode _ _ txt end doc + = lay (reduceDoc doc) + where + lay NoDoc = cant_fail + lay (Union p q) = (lay q) -- Second arg can't be NoDoc + lay (Nest k p) = lay p + lay Empty = end + lay (NilAbove p) = space_text `txt` lay p -- NoDoc always on first line + lay (TextBeside s sl p) = s `txt` lay p + +fullRender LeftMode _ _ txt end doc + = lay (reduceDoc doc) + where + lay NoDoc = cant_fail + lay (Union p q) = lay (first p q) + lay (Nest k p) = lay p + lay Empty = end + lay (NilAbove p) = nl_text `txt` lay p -- NoDoc always on first line + lay (TextBeside s sl p) = s `txt` lay p + +fullRender mode line_length ribbons_per_line txt end doc + = display mode line_length ribbon_length txt end best_doc + where + best_doc = best hacked_line_length ribbon_length (reduceDoc doc) + + hacked_line_length, ribbon_length :: Int + ribbon_length = round (fromIntegral line_length / ribbons_per_line) + hacked_line_length = case mode of { ZigZagMode -> MAXINT; other -> line_length } + +display mode IBOX(page_width) IBOX(ribbon_width) txt end doc + = case page_width MINUS ribbon_width of { gap_width -> + case gap_width DIV ILIT(2) of { shift -> + let + lay k (Nest k1 p) = lay (k PLUS k1) p + lay k Empty = end + + lay k (NilAbove p) = nl_text `txt` lay k p + + lay k (TextBeside s sl p) + = case mode of + ZigZagMode | k GREQ gap_width + -> nl_text `txt` ( + Str (multi_ch shift '/') `txt` ( + nl_text `txt` ( + lay1 (k MINUS shift) s sl p))) + + | k LT ILIT(0) + -> nl_text `txt` ( + Str (multi_ch shift '\\') `txt` ( + nl_text `txt` ( + lay1 (k PLUS shift) s sl p ))) + + other -> lay1 k s sl p + + lay1 k s sl p = Str (indent k) `txt` (s `txt` lay2 (k PLUS sl) p) + + lay2 k (NilAbove p) = nl_text `txt` lay k p + lay2 k (TextBeside s sl p) = s `txt` (lay2 (k PLUS sl) p) + lay2 k (Nest _ p) = lay2 k p + lay2 k Empty = end + in + lay ILIT(0) doc + }} + +cant_fail = error "easy_display: NoDoc" + +indent n | n GREQ ILIT(8) = '\t' : indent (n MINUS ILIT(8)) + | otherwise = spaces n + +multi_ch ILIT(0) ch = "" +multi_ch n ch = ch : multi_ch (n MINUS ILIT(1)) ch + +spaces ILIT(0) = "" +spaces n = ' ' : spaces (n MINUS ILIT(1)) +\end{code} + +\begin{code} +pprCols = (120 :: Int) -- could make configurable + +printDoc :: Mode -> Handle -> Doc -> IO () +printDoc LeftMode hdl doc + = do { printLeftRender hdl doc; hFlush hdl } +printDoc mode hdl doc + = do { fullRender mode pprCols 1.5 put done doc ; + hFlush hdl } + where + put (Chr c) next = hPutChar hdl c >> next + put (Str s) next = hPutStr hdl s >> next + put (PStr s) next = hPutFS hdl s >> next + put (LStr s l) next = hPutLitString hdl s l >> next + + done = hPutChar hdl '\n' + + -- some versions of hPutBuf will barf if the length is zero +hPutLitString handle a# 0# = return () +hPutLitString handle a# l# +#if __GLASGOW_HASKELL__ < 411 + = hPutBuf handle (A# a#) (I# l#) +#else + = hPutBuf handle (Ptr a#) (I# l#) +#endif + +-- Printing output in LeftMode is performance critical: it's used when +-- dumping C and assembly output, so we allow ourselves a few dirty +-- hacks: +-- +-- (1) we specialise fullRender for LeftMode with IO output. +-- +-- (2) we add a layer of buffering on top of Handles. Handles +-- don't perform well with lots of hPutChars, which is mostly +-- what we're doing here, because Handles have to be thread-safe +-- and async exception-safe. We only have a single thread and don't +-- care about exceptions, so we add a layer of fast buffering +-- over the Handle interface. +-- +-- (3) a few hacks in layLeft below to convince GHC to generate the right +-- code. + +printLeftRender :: Handle -> Doc -> IO () +printLeftRender hdl doc = do + b <- newBufHandle hdl + layLeft b (reduceDoc doc) + bFlush b + +-- HACK ALERT! the "return () >>" below convinces GHC to eta-expand +-- this function with the IO state lambda. Otherwise we end up with +-- closures in all the case branches. +layLeft b _ | b `seq` False = undefined -- make it strict in b +layLeft b NoDoc = cant_fail +layLeft b (Union p q) = return () >> layLeft b (first p q) +layLeft b (Nest k p) = return () >> layLeft b p +layLeft b Empty = bPutChar b '\n' +layLeft b (NilAbove p) = bPutChar b '\n' >> layLeft b p +layLeft b (TextBeside s sl p) = put b s >> layLeft b p + where + put b _ | b `seq` False = undefined + put b (Chr c) = bPutChar b c + put b (Str s) = bPutStr b s + put b (PStr s) = bPutFS b s + put b (LStr s l) = bPutLitString b s l + +#if __GLASGOW_HASKELL__ < 503 +hPutBuf = hPutBufFull +#endif + +\end{code} diff --git a/compiler/utils/StringBuffer.lhs b/compiler/utils/StringBuffer.lhs new file mode 100644 index 0000000000..e52e7e78da --- /dev/null +++ b/compiler/utils/StringBuffer.lhs @@ -0,0 +1,240 @@ +% +% (c) The University of Glasgow, 1997-2006 +% +\section{String buffers} + +Buffers for scanning string input stored in external arrays. + +\begin{code} +module StringBuffer + ( + StringBuffer(..), + -- non-abstract for vs\/HaskellService + + -- * Creation\/destruction + hGetStringBuffer, + hGetStringBufferBlock, + appendStringBuffers, + stringToStringBuffer, + + -- * Inspection + nextChar, + currentChar, + prevChar, + atEnd, + + -- * Moving and comparison + stepOn, + offsetBytes, + byteDiff, + + -- * Conversion + lexemeToString, + lexemeToFastString, + + -- * Parsing integers + parseInteger, + ) where + +#include "HsVersions.h" + +import Encoding +import FastString ( FastString,mkFastString,mkFastStringBytes ) + +import Foreign +import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose + , Handle, hTell ) + +import GHC.Ptr ( Ptr(..) ) +import GHC.Exts +import GHC.IOBase ( IO(..) ) +import GHC.Base ( unsafeChr ) + +#if __GLASGOW_HASKELL__ >= 601 +import System.IO ( openBinaryFile ) +#else +import IOExts ( openFileEx, IOModeEx(..) ) +#endif + +#if __GLASGOW_HASKELL__ < 601 +openBinaryFile fp mode = openFileEx fp (BinaryMode mode) +#endif + +-- ----------------------------------------------------------------------------- +-- The StringBuffer type + +-- |A StringBuffer is an internal pointer to a sized chunk of bytes. +-- The bytes are intended to be *immutable*. There are pure +-- operations to read the contents of a StringBuffer. +-- +-- A StringBuffer may have a finalizer, depending on how it was +-- obtained. +-- +data StringBuffer + = StringBuffer { + buf :: {-# UNPACK #-} !(ForeignPtr Word8), + len :: {-# UNPACK #-} !Int, -- length + cur :: {-# UNPACK #-} !Int -- current pos + } + -- The buffer is assumed to be UTF-8 encoded, and furthermore + -- we add three '\0' bytes to the end as sentinels so that the + -- decoder doesn't have to check for overflow at every single byte + -- of a multibyte sequence. + +instance Show StringBuffer where + showsPrec _ s = showString "<stringbuffer(" + . shows (len s) . showString "," . shows (cur s) + . showString ">" + +-- ----------------------------------------------------------------------------- +-- Creation / Destruction + +hGetStringBuffer :: FilePath -> IO StringBuffer +hGetStringBuffer fname = do + h <- openBinaryFile fname ReadMode + size_i <- hFileSize h + let size = fromIntegral size_i + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + r <- if size == 0 then return 0 else hGetBuf h ptr size + hClose h + if (r /= size) + then ioError (userError "short read of file") + else do + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return (StringBuffer buf size 0) + +hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer +hGetStringBufferBlock handle wanted + = do size_i <- hFileSize handle + offset_i <- hTell handle + let size = min wanted (fromIntegral $ size_i-offset_i) + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> + do r <- if size == 0 then return 0 else hGetBuf handle ptr size + if r /= size + then ioError (userError $ "short read of file: "++show(r,size,fromIntegral size_i,handle)) + else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + return (StringBuffer buf size 0) + +appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer +appendStringBuffers sb1 sb2 + = do newBuf <- mallocForeignPtrArray (size+3) + withForeignPtr newBuf $ \ptr -> + withForeignPtr (buf sb1) $ \sb1Ptr -> + withForeignPtr (buf sb2) $ \sb2Ptr -> + do copyArray (sb1Ptr `advancePtr` cur sb1) ptr (calcLen sb1) + copyArray (sb2Ptr `advancePtr` cur sb2) (ptr `advancePtr` cur sb1) (calcLen sb2) + pokeArray (ptr `advancePtr` size) [0,0,0] + return (StringBuffer newBuf size 0) + where calcLen sb = len sb - cur sb + size = calcLen sb1 + calcLen sb2 + +stringToStringBuffer :: String -> IO StringBuffer +stringToStringBuffer str = do + let size = utf8EncodedLength str + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + utf8EncodeString ptr str + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return (StringBuffer buf size 0) + +-- ----------------------------------------------------------------------------- +-- Grab a character + +-- Getting our fingers dirty a little here, but this is performance-critical +{-# INLINE nextChar #-} +nextChar :: StringBuffer -> (Char,StringBuffer) +nextChar (StringBuffer buf len (I# cur#)) = + inlinePerformIO $ do + withForeignPtr buf $ \(Ptr a#) -> do + case utf8DecodeChar# (a# `plusAddr#` cur#) of + (# c#, b# #) -> + let cur' = I# (b# `minusAddr#` a#) in + return (C# c#, StringBuffer buf len cur') + +currentChar :: StringBuffer -> Char +currentChar = fst . nextChar + +prevChar :: StringBuffer -> Char -> Char +prevChar (StringBuffer buf len 0) deflt = deflt +prevChar (StringBuffer buf len cur) deflt = + inlinePerformIO $ do + withForeignPtr buf $ \p -> do + p' <- utf8PrevChar (p `plusPtr` cur) + return (fst (utf8DecodeChar p')) + +-- ----------------------------------------------------------------------------- +-- Moving + +stepOn :: StringBuffer -> StringBuffer +stepOn s = snd (nextChar s) + +offsetBytes :: Int -> StringBuffer -> StringBuffer +offsetBytes i s = s { cur = cur s + i } + +byteDiff :: StringBuffer -> StringBuffer -> Int +byteDiff s1 s2 = cur s2 - cur s1 + +atEnd :: StringBuffer -> Bool +atEnd (StringBuffer _ l c) = l == c + +-- ----------------------------------------------------------------------------- +-- Conversion + +lexemeToString :: StringBuffer -> Int {-bytes-} -> String +lexemeToString _ 0 = "" +lexemeToString (StringBuffer buf _ cur) bytes = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + utf8DecodeString (ptr `plusPtr` cur) bytes + +lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString +lexemeToFastString _ 0 = mkFastString "" +lexemeToFastString (StringBuffer buf _ cur) len = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + return $! mkFastStringBytes (ptr `plusPtr` cur) len + +-- ----------------------------------------------------------------------------- +-- Parsing integer strings in various bases + +byteOff :: StringBuffer -> Int -> Char +byteOff (StringBuffer buf _ cur) i = + inlinePerformIO $ withForeignPtr buf $ \ptr -> do + w <- peek (ptr `plusPtr` (cur+i)) + return (unsafeChr (fromIntegral (w::Word8))) + +-- | XXX assumes ASCII digits only +parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer +parseInteger buf len radix to_int + = go 0 0 + where go i x | i == len = x + | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i))) + +-- ----------------------------------------------------------------------------- +-- under the carpet + +-- Just like unsafePerformIO, but we inline it. +{-# INLINE inlinePerformIO #-} +inlinePerformIO :: IO a -> a +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r + +#if __GLASGOW_HASKELL__ < 600 +mallocForeignPtrArray :: Storable a => Int -> IO (ForeignPtr a) +mallocForeignPtrArray = doMalloc undefined + where + doMalloc :: Storable b => b -> Int -> IO (ForeignPtr b) + doMalloc dummy size = mallocForeignPtrBytes (size * sizeOf dummy) + +mallocForeignPtrBytes :: Int -> IO (ForeignPtr a) +mallocForeignPtrBytes n = do + r <- mallocBytes n + newForeignPtr r (finalizerFree r) + +foreign import ccall unsafe "stdlib.h free" + finalizerFree :: Ptr a -> IO () +#endif +\end{code} diff --git a/compiler/utils/UniqFM.lhs b/compiler/utils/UniqFM.lhs new file mode 100644 index 0000000000..84294aae0d --- /dev/null +++ b/compiler/utils/UniqFM.lhs @@ -0,0 +1,847 @@ +%ilter +% (c) The AQUA Project, Glasgow University, 1994-1998 +% +\section[UniqFM]{Specialised finite maps, for things with @Uniques@} + +Based on @FiniteMaps@ (as you would expect). + +Basically, the things need to be in class @Uniquable@, and we use the +@getUnique@ method to grab their @Uniques@. + +(A similar thing to @UniqSet@, as opposed to @Set@.) + +\begin{code} +module UniqFM ( + UniqFM, -- abstract type + + emptyUFM, + unitUFM, + unitDirectlyUFM, + listToUFM, + listToUFM_Directly, + addToUFM,addToUFM_C,addToUFM_Acc, + addListToUFM,addListToUFM_C, + addToUFM_Directly, + addListToUFM_Directly, + delFromUFM, + delFromUFM_Directly, + delListFromUFM, + plusUFM, + plusUFM_C, + minusUFM, + intersectUFM, + intersectUFM_C, + foldUFM, + mapUFM, + elemUFM, elemUFM_Directly, + filterUFM, filterUFM_Directly, + sizeUFM, + hashUFM, + isNullUFM, + lookupUFM, lookupUFM_Directly, + lookupWithDefaultUFM, lookupWithDefaultUFM_Directly, + eltsUFM, keysUFM, + ufmToList + ) where + +#include "HsVersions.h" + +import Unique ( Uniquable(..), Unique, getKey#, mkUniqueGrimily ) +import Maybes ( maybeToBool ) +import FastTypes +import Outputable + +import GLAEXTS -- Lots of Int# operations +\end{code} + +%************************************************************************ +%* * +\subsection{The @UniqFM@ type, and signatures for the functions} +%* * +%************************************************************************ + +We use @FiniteMaps@, with a (@getUnique@-able) @Unique@ as ``key''. + +\begin{code} +emptyUFM :: UniqFM elt +isNullUFM :: UniqFM elt -> Bool +unitUFM :: Uniquable key => key -> elt -> UniqFM elt +unitDirectlyUFM -- got the Unique already + :: Unique -> elt -> UniqFM elt +listToUFM :: Uniquable key => [(key,elt)] -> UniqFM elt +listToUFM_Directly + :: [(Unique, elt)] -> UniqFM elt + +addToUFM :: Uniquable key => UniqFM elt -> key -> elt -> UniqFM elt +addListToUFM :: Uniquable key => UniqFM elt -> [(key,elt)] -> UniqFM elt +addToUFM_Directly + :: UniqFM elt -> Unique -> elt -> UniqFM elt + +addToUFM_C :: Uniquable key => (elt -> elt -> elt) -- old -> new -> result + -> UniqFM elt -- old + -> key -> elt -- new + -> UniqFM elt -- result + +addToUFM_Acc :: Uniquable key => + (elt -> elts -> elts) -- Add to existing + -> (elt -> elts) -- New element + -> UniqFM elts -- old + -> key -> elt -- new + -> UniqFM elts -- result + +addListToUFM_C :: Uniquable key => (elt -> elt -> elt) + -> UniqFM elt -> [(key,elt)] + -> UniqFM elt + +delFromUFM :: Uniquable key => UniqFM elt -> key -> UniqFM elt +delListFromUFM :: Uniquable key => UniqFM elt -> [key] -> UniqFM elt +delFromUFM_Directly :: UniqFM elt -> Unique -> UniqFM elt + +plusUFM :: UniqFM elt -> UniqFM elt -> UniqFM elt + +plusUFM_C :: (elt -> elt -> elt) + -> UniqFM elt -> UniqFM elt -> UniqFM elt + +minusUFM :: UniqFM elt1 -> UniqFM elt2 -> UniqFM elt1 + +intersectUFM :: UniqFM elt -> UniqFM elt -> UniqFM elt +intersectUFM_C :: (elt1 -> elt2 -> elt3) + -> UniqFM elt1 -> UniqFM elt2 -> UniqFM elt3 +foldUFM :: (elt -> a -> a) -> a -> UniqFM elt -> a +mapUFM :: (elt1 -> elt2) -> UniqFM elt1 -> UniqFM elt2 +filterUFM :: (elt -> Bool) -> UniqFM elt -> UniqFM elt +filterUFM_Directly :: (Unique -> elt -> Bool) -> UniqFM elt -> UniqFM elt + +sizeUFM :: UniqFM elt -> Int +hashUFM :: UniqFM elt -> Int +elemUFM :: Uniquable key => key -> UniqFM elt -> Bool +elemUFM_Directly:: Unique -> UniqFM elt -> Bool + +lookupUFM :: Uniquable key => UniqFM elt -> key -> Maybe elt +lookupUFM_Directly -- when you've got the Unique already + :: UniqFM elt -> Unique -> Maybe elt +lookupWithDefaultUFM + :: Uniquable key => UniqFM elt -> elt -> key -> elt +lookupWithDefaultUFM_Directly + :: UniqFM elt -> elt -> Unique -> elt + +keysUFM :: UniqFM elt -> [Unique] -- Get the keys +eltsUFM :: UniqFM elt -> [elt] +ufmToList :: UniqFM elt -> [(Unique, elt)] +\end{code} + +%************************************************************************ +%* * +\subsection{The @IdFinMap@ and @TyVarFinMap@ specialisations for Ids/TyVars} +%* * +%************************************************************************ + +\begin{code} +-- Turn off for now, these need to be updated (SDM 4/98) + +#if 0 +#ifdef __GLASGOW_HASKELL__ +-- I don't think HBC was too happy about this (WDP 94/10) + +{-# SPECIALIZE + addListToUFM :: UniqFM elt -> [(Name, elt)] -> UniqFM elt + #-} +{-# SPECIALIZE + addListToUFM_C :: (elt -> elt -> elt) -> UniqFM elt -> [(Name, elt)] -> UniqFM elt + #-} +{-# SPECIALIZE + addToUFM :: UniqFM elt -> Unique -> elt -> UniqFM elt + #-} +{-# SPECIALIZE + listToUFM :: [(Unique, elt)] -> UniqFM elt + #-} +{-# SPECIALIZE + lookupUFM :: UniqFM elt -> Name -> Maybe elt + , UniqFM elt -> Unique -> Maybe elt + #-} + +#endif /* __GLASGOW_HASKELL__ */ +#endif +\end{code} + +%************************************************************************ +%* * +\subsection{Andy Gill's underlying @UniqFM@ machinery} +%* * +%************************************************************************ + +``Uniq Finite maps'' are the heart and soul of the compiler's +lookup-tables/environments. Important stuff! It works well with +Dense and Sparse ranges. +Both @Uq@ Finite maps and @Hash@ Finite Maps +are built ontop of Int Finite Maps. + +This code is explained in the paper: +\begin{display} + A Gill, S Peyton Jones, B O'Sullivan, W Partain and Aqua Friends + "A Cheap balancing act that grows on a tree" + Glasgow FP Workshop, Sep 1994, pp??-?? +\end{display} + +%************************************************************************ +%* * +\subsubsection{The @UniqFM@ type, and signatures for the functions} +%* * +%************************************************************************ + +@UniqFM a@ is a mapping from Unique to a. + +First, the DataType itself; which is either a Node, a Leaf, or an Empty. + +\begin{code} +data UniqFM ele + = EmptyUFM + | LeafUFM FastInt ele + | NodeUFM FastInt -- the switching + FastInt -- the delta + (UniqFM ele) + (UniqFM ele) +-- INVARIANT: the children of a NodeUFM are never EmptyUFMs + +{- +-- for debugging only :-) +instance Outputable (UniqFM a) where + ppr(NodeUFM a b t1 t2) = + sep [text "NodeUFM " <+> int IBOX(a) <+> int IBOX(b), + nest 1 (parens (ppr t1)), + nest 1 (parens (ppr t2))] + ppr (LeafUFM x a) = text "LeafUFM " <+> int IBOX(x) + ppr (EmptyUFM) = empty +-} +-- and when not debugging the package itself... +instance Outputable a => Outputable (UniqFM a) where + ppr ufm = ppr (ufmToList ufm) +\end{code} + +%************************************************************************ +%* * +\subsubsection{The @UniqFM@ functions} +%* * +%************************************************************************ + +First the ways of building a UniqFM. + +\begin{code} +emptyUFM = EmptyUFM +unitUFM key elt = mkLeafUFM (getKey# (getUnique key)) elt +unitDirectlyUFM key elt = mkLeafUFM (getKey# key) elt + +listToUFM key_elt_pairs + = addListToUFM_C use_snd EmptyUFM key_elt_pairs + +listToUFM_Directly uniq_elt_pairs + = addListToUFM_directly_C use_snd EmptyUFM uniq_elt_pairs +\end{code} + +Now ways of adding things to UniqFMs. + +There is an alternative version of @addListToUFM_C@, that uses @plusUFM@, +but the semantics of this operation demands a linear insertion; +perhaps the version without the combinator function +could be optimised using it. + +\begin{code} +addToUFM fm key elt = addToUFM_C use_snd fm key elt + +addToUFM_Directly fm u elt = insert_ele use_snd fm (getKey# u) elt + +addToUFM_C combiner fm key elt + = insert_ele combiner fm (getKey# (getUnique key)) elt + +addToUFM_Acc add unit fm key item + = insert_ele combiner fm (getKey# (getUnique key)) (unit item) + where + combiner old _unit_item = add item old + +addListToUFM fm key_elt_pairs = addListToUFM_C use_snd fm key_elt_pairs +addListToUFM_Directly fm uniq_elt_pairs = addListToUFM_directly_C use_snd fm uniq_elt_pairs + +addListToUFM_C combiner fm key_elt_pairs + = foldl (\ fm (k, e) -> insert_ele combiner fm (getKey# (getUnique k)) e) + fm key_elt_pairs + +addListToUFM_directly_C combiner fm uniq_elt_pairs + = foldl (\ fm (k, e) -> insert_ele combiner fm (getKey# k) e) + fm uniq_elt_pairs +\end{code} + +Now ways of removing things from UniqFM. + +\begin{code} +delListFromUFM fm lst = foldl delFromUFM fm lst + +delFromUFM fm key = delete fm (getKey# (getUnique key)) +delFromUFM_Directly fm u = delete fm (getKey# u) + +delete EmptyUFM _ = EmptyUFM +delete fm key = del_ele fm + where + del_ele :: UniqFM a -> UniqFM a + + del_ele lf@(LeafUFM j _) + | j ==# key = EmptyUFM + | otherwise = lf -- no delete! + + del_ele nd@(NodeUFM j p t1 t2) + | j ># key + = mkSLNodeUFM (NodeUFMData j p) (del_ele t1) t2 + | otherwise + = mkLSNodeUFM (NodeUFMData j p) t1 (del_ele t2) + + del_ele _ = panic "Found EmptyUFM FM when rec-deleting" +\end{code} + +Now ways of adding two UniqFM's together. + +\begin{code} +plusUFM tr1 tr2 = plusUFM_C use_snd tr1 tr2 + +plusUFM_C f EmptyUFM tr = tr +plusUFM_C f tr EmptyUFM = tr +plusUFM_C f fm1 fm2 = mix_trees fm1 fm2 + where + mix_trees (LeafUFM i a) t2 = insert_ele (flip f) t2 i a + mix_trees t1 (LeafUFM i a) = insert_ele f t1 i a + + mix_trees left_t@(NodeUFM j p t1 t2) right_t@(NodeUFM j' p' t1' t2') + = mix_branches + (ask_about_common_ancestor + (NodeUFMData j p) + (NodeUFMData j' p')) + where + -- Given a disjoint j,j' (p >^ p' && p' >^ p): + -- + -- j j' (C j j') + -- / \ + / \ ==> / \ + -- t1 t2 t1' t2' j j' + -- / \ / \ + -- t1 t2 t1' t2' + -- Fast, Ehh ! + -- + mix_branches (NewRoot nd False) + = mkLLNodeUFM nd left_t right_t + mix_branches (NewRoot nd True) + = mkLLNodeUFM nd right_t left_t + + -- Now, if j == j': + -- + -- j j' j + -- / \ + / \ ==> / \ + -- t1 t2 t1' t2' t1 + t1' t2 + t2' + -- + mix_branches (SameRoot) + = mkSSNodeUFM (NodeUFMData j p) + (mix_trees t1 t1') + (mix_trees t2 t2') + -- Now the 4 different other ways; all like this: + -- + -- Given j >^ j' (and, say, j > j') + -- + -- j j' j + -- / \ + / \ ==> / \ + -- t1 t2 t1' t2' t1 t2 + j' + -- / \ + -- t1' t2' + mix_branches (LeftRoot Leftt) -- | trace "LL" True + = mkSLNodeUFM + (NodeUFMData j p) + (mix_trees t1 right_t) + t2 + + mix_branches (LeftRoot Rightt) -- | trace "LR" True + = mkLSNodeUFM + (NodeUFMData j p) + t1 + (mix_trees t2 right_t) + + mix_branches (RightRoot Leftt) -- | trace "RL" True + = mkSLNodeUFM + (NodeUFMData j' p') + (mix_trees left_t t1') + t2' + + mix_branches (RightRoot Rightt) -- | trace "RR" True + = mkLSNodeUFM + (NodeUFMData j' p') + t1' + (mix_trees left_t t2') + + mix_trees _ _ = panic "EmptyUFM found when inserting into plusInt" +\end{code} + +And ways of subtracting them. First the base cases, +then the full D&C approach. + +\begin{code} +minusUFM EmptyUFM _ = EmptyUFM +minusUFM t1 EmptyUFM = t1 +minusUFM fm1 fm2 = minus_trees fm1 fm2 + where + -- + -- Notice the asymetry of subtraction + -- + minus_trees lf@(LeafUFM i a) t2 = + case lookUp t2 i of + Nothing -> lf + Just b -> EmptyUFM + + minus_trees t1 (LeafUFM i _) = delete t1 i + + minus_trees left_t@(NodeUFM j p t1 t2) right_t@(NodeUFM j' p' t1' t2') + = minus_branches + (ask_about_common_ancestor + (NodeUFMData j p) + (NodeUFMData j' p')) + where + -- Given a disjoint j,j' (p >^ p' && p' >^ p): + -- + -- j j' j + -- / \ + / \ ==> / \ + -- t1 t2 t1' t2' t1 t2 + -- + -- + -- Fast, Ehh ! + -- + minus_branches (NewRoot nd _) = left_t + + -- Now, if j == j': + -- + -- j j' j + -- / \ + / \ ==> / \ + -- t1 t2 t1' t2' t1 + t1' t2 + t2' + -- + minus_branches (SameRoot) + = mkSSNodeUFM (NodeUFMData j p) + (minus_trees t1 t1') + (minus_trees t2 t2') + -- Now the 4 different other ways; all like this: + -- again, with asymatry + + -- + -- The left is above the right + -- + minus_branches (LeftRoot Leftt) + = mkSLNodeUFM + (NodeUFMData j p) + (minus_trees t1 right_t) + t2 + minus_branches (LeftRoot Rightt) + = mkLSNodeUFM + (NodeUFMData j p) + t1 + (minus_trees t2 right_t) + + -- + -- The right is above the left + -- + minus_branches (RightRoot Leftt) + = minus_trees left_t t1' + minus_branches (RightRoot Rightt) + = minus_trees left_t t2' + + minus_trees _ _ = panic "EmptyUFM found when insering into plusInt" +\end{code} + +And taking the intersection of two UniqFM's. + +\begin{code} +intersectUFM t1 t2 = intersectUFM_C use_snd t1 t2 + +intersectUFM_C f EmptyUFM _ = EmptyUFM +intersectUFM_C f _ EmptyUFM = EmptyUFM +intersectUFM_C f fm1 fm2 = intersect_trees fm1 fm2 + where + intersect_trees (LeafUFM i a) t2 = + case lookUp t2 i of + Nothing -> EmptyUFM + Just b -> mkLeafUFM i (f a b) + + intersect_trees t1 (LeafUFM i a) = + case lookUp t1 i of + Nothing -> EmptyUFM + Just b -> mkLeafUFM i (f b a) + + intersect_trees left_t@(NodeUFM j p t1 t2) right_t@(NodeUFM j' p' t1' t2') + = intersect_branches + (ask_about_common_ancestor + (NodeUFMData j p) + (NodeUFMData j' p')) + where + -- Given a disjoint j,j' (p >^ p' && p' >^ p): + -- + -- j j' + -- / \ + / \ ==> EmptyUFM + -- t1 t2 t1' t2' + -- + -- Fast, Ehh ! + -- + intersect_branches (NewRoot nd _) = EmptyUFM + + -- Now, if j == j': + -- + -- j j' j + -- / \ + / \ ==> / \ + -- t1 t2 t1' t2' t1 x t1' t2 x t2' + -- + intersect_branches (SameRoot) + = mkSSNodeUFM (NodeUFMData j p) + (intersect_trees t1 t1') + (intersect_trees t2 t2') + -- Now the 4 different other ways; all like this: + -- + -- Given j >^ j' (and, say, j > j') + -- + -- j j' t2 + j' + -- / \ + / \ ==> / \ + -- t1 t2 t1' t2' t1' t2' + -- + -- This does cut down the search space quite a bit. + + intersect_branches (LeftRoot Leftt) + = intersect_trees t1 right_t + intersect_branches (LeftRoot Rightt) + = intersect_trees t2 right_t + intersect_branches (RightRoot Leftt) + = intersect_trees left_t t1' + intersect_branches (RightRoot Rightt) + = intersect_trees left_t t2' + + intersect_trees x y = panic ("EmptyUFM found when intersecting trees") +\end{code} + +Now the usual set of `collection' operators, like map, fold, etc. + +\begin{code} +foldUFM f a (NodeUFM _ _ t1 t2) = foldUFM f (foldUFM f a t2) t1 +foldUFM f a (LeafUFM _ obj) = f obj a +foldUFM f a EmptyUFM = a +\end{code} + +\begin{code} +mapUFM fn EmptyUFM = EmptyUFM +mapUFM fn fm = map_tree fn fm + +filterUFM fn EmptyUFM = EmptyUFM +filterUFM fn fm = filter_tree pred fm + where + pred (i::FastInt) e = fn e + +filterUFM_Directly fn EmptyUFM = EmptyUFM +filterUFM_Directly fn fm = filter_tree pred fm + where + pred i e = fn (mkUniqueGrimily (iBox i)) e +\end{code} + +Note, this takes a long time, O(n), but +because we dont want to do this very often, we put up with this. +O'rable, but how often do we look at the size of +a finite map? + +\begin{code} +sizeUFM EmptyUFM = 0 +sizeUFM (NodeUFM _ _ t1 t2) = sizeUFM t1 + sizeUFM t2 +sizeUFM (LeafUFM _ _) = 1 + +isNullUFM EmptyUFM = True +isNullUFM _ = False + +-- hashing is used in VarSet.uniqAway, and should be fast +-- We use a cheap and cheerful method for now +hashUFM EmptyUFM = 0 +hashUFM (NodeUFM n _ _ _) = iBox n +hashUFM (LeafUFM n _) = iBox n +\end{code} + +looking up in a hurry is the {\em whole point} of this binary tree lark. +Lookup up a binary tree is easy (and fast). + +\begin{code} +elemUFM key fm = maybeToBool (lookupUFM fm key) +elemUFM_Directly key fm = maybeToBool (lookupUFM_Directly fm key) + +lookupUFM fm key = lookUp fm (getKey# (getUnique key)) +lookupUFM_Directly fm key = lookUp fm (getKey# key) + +lookupWithDefaultUFM fm deflt key + = case lookUp fm (getKey# (getUnique key)) of + Nothing -> deflt + Just elt -> elt + +lookupWithDefaultUFM_Directly fm deflt key + = case lookUp fm (getKey# key) of + Nothing -> deflt + Just elt -> elt + +lookUp EmptyUFM _ = Nothing +lookUp fm i = lookup_tree fm + where + lookup_tree :: UniqFM a -> Maybe a + + lookup_tree (LeafUFM j b) + | j ==# i = Just b + | otherwise = Nothing + lookup_tree (NodeUFM j p t1 t2) + | j ># i = lookup_tree t1 + | otherwise = lookup_tree t2 + + lookup_tree EmptyUFM = panic "lookup Failed" +\end{code} + +folds are *wonderful* things. + +\begin{code} +eltsUFM fm = foldUFM (:) [] fm + +ufmToList fm = fold_tree (\ iu elt rest -> (mkUniqueGrimily (iBox iu), elt) : rest) [] fm + +keysUFM fm = fold_tree (\ iu elt rest -> mkUniqueGrimily (iBox iu) : rest) [] fm + +fold_tree f a (NodeUFM _ _ t1 t2) = fold_tree f (fold_tree f a t2) t1 +fold_tree f a (LeafUFM iu obj) = f iu obj a +fold_tree f a EmptyUFM = a +\end{code} + +%************************************************************************ +%* * +\subsubsection{The @UniqFM@ type, and its functions} +%* * +%************************************************************************ + +You should always use these to build the tree. +There are 4 versions of mkNodeUFM, depending on +the strictness of the two sub-tree arguments. +The strictness is used *both* to prune out +empty trees, *and* to improve performance, +stoping needless thunks lying around. +The rule of thumb (from experence with these trees) +is make thunks strict, but data structures lazy. +If in doubt, use mkSSNodeUFM, which has the `strongest' +functionality, but may do a few needless evaluations. + +\begin{code} +mkLeafUFM :: FastInt -> a -> UniqFM a +mkLeafUFM i a = LeafUFM i a + +-- The *ONLY* ways of building a NodeUFM. + +mkSSNodeUFM (NodeUFMData j p) EmptyUFM t2 = t2 +mkSSNodeUFM (NodeUFMData j p) t1 EmptyUFM = t1 +mkSSNodeUFM (NodeUFMData j p) t1 t2 + = ASSERT(correctNodeUFM (iBox j) (iBox p) t1 t2) + NodeUFM j p t1 t2 + +mkSLNodeUFM (NodeUFMData j p) EmptyUFM t2 = t2 +mkSLNodeUFM (NodeUFMData j p) t1 t2 + = ASSERT(correctNodeUFM (iBox j) (iBox p) t1 t2) + NodeUFM j p t1 t2 + +mkLSNodeUFM (NodeUFMData j p) t1 EmptyUFM = t1 +mkLSNodeUFM (NodeUFMData j p) t1 t2 + = ASSERT(correctNodeUFM (iBox j) (iBox p) t1 t2) + NodeUFM j p t1 t2 + +mkLLNodeUFM (NodeUFMData j p) t1 t2 + = ASSERT(correctNodeUFM (iBox j) (iBox p) t1 t2) + NodeUFM j p t1 t2 + +correctNodeUFM + :: Int + -> Int + -> UniqFM a + -> UniqFM a + -> Bool + +correctNodeUFM j p t1 t2 + = correct (j-p) (j-1) p t1 && correct j ((j-1)+p) p t2 + where + correct low high _ (LeafUFM i _) + = low <= iBox i && iBox i <= high + correct low high above_p (NodeUFM j p _ _) + = low <= iBox j && iBox j <= high && above_p > iBox p + correct _ _ _ EmptyUFM = panic "EmptyUFM stored inside a tree" +\end{code} + +Note: doing SAT on this by hand seems to make it worse. Todo: Investigate, +and if necessary do $\lambda$ lifting on our functions that are bound. + +\begin{code} +insert_ele + :: (a -> a -> a) -- old -> new -> result + -> UniqFM a + -> FastInt + -> a + -> UniqFM a + +insert_ele f EmptyUFM i new = mkLeafUFM i new + +insert_ele f (LeafUFM j old) i new + | j ># i = + mkLLNodeUFM (getCommonNodeUFMData + (indexToRoot i) + (indexToRoot j)) + (mkLeafUFM i new) + (mkLeafUFM j old) + | j ==# i = mkLeafUFM j (f old new) + | otherwise = + mkLLNodeUFM (getCommonNodeUFMData + (indexToRoot i) + (indexToRoot j)) + (mkLeafUFM j old) + (mkLeafUFM i new) + +insert_ele f n@(NodeUFM j p t1 t2) i a + | i <# j + = if (i >=# (j -# p)) + then mkSLNodeUFM (NodeUFMData j p) (insert_ele f t1 i a) t2 + else mkLLNodeUFM (getCommonNodeUFMData + (indexToRoot i) + ((NodeUFMData j p))) + (mkLeafUFM i a) + n + | otherwise + = if (i <=# ((j -# _ILIT(1)) +# p)) + then mkLSNodeUFM (NodeUFMData j p) t1 (insert_ele f t2 i a) + else mkLLNodeUFM (getCommonNodeUFMData + (indexToRoot i) + ((NodeUFMData j p))) + n + (mkLeafUFM i a) +\end{code} + + + +\begin{code} +map_tree f (NodeUFM j p t1 t2) + = mkLLNodeUFM (NodeUFMData j p) (map_tree f t1) (map_tree f t2) + -- NB. lazy! we know the tree is well-formed. +map_tree f (LeafUFM i obj) + = mkLeafUFM i (f obj) +map_tree f _ = panic "map_tree failed" +\end{code} + +\begin{code} +filter_tree :: (FastInt -> a -> Bool) -> UniqFM a -> UniqFM a +filter_tree f nd@(NodeUFM j p t1 t2) + = mkSSNodeUFM (NodeUFMData j p) (filter_tree f t1) (filter_tree f t2) + +filter_tree f lf@(LeafUFM i obj) + | f i obj = lf + | otherwise = EmptyUFM +filter_tree f _ = panic "filter_tree failed" +\end{code} + +%************************************************************************ +%* * +\subsubsection{The @UniqFM@ type, and signatures for the functions} +%* * +%************************************************************************ + +Now some Utilities; + +This is the information that is held inside a NodeUFM, packaged up for +consumer use. + +\begin{code} +data NodeUFMData + = NodeUFMData FastInt + FastInt +\end{code} + +This is the information used when computing new NodeUFMs. + +\begin{code} +data Side = Leftt | Rightt -- NB: avoid 1.3 names "Left" and "Right" +data CommonRoot + = LeftRoot Side -- which side is the right down ? + | RightRoot Side -- which side is the left down ? + | SameRoot -- they are the same ! + | NewRoot NodeUFMData -- here's the new, common, root + Bool -- do you need to swap left and right ? +\end{code} + +This specifies the relationship between NodeUFMData and CalcNodeUFMData. + +\begin{code} +indexToRoot :: FastInt -> NodeUFMData + +indexToRoot i + = let + l = (_ILIT(1) :: FastInt) + in + NodeUFMData (((i `shiftR_` l) `shiftL_` l) +# _ILIT(1)) l + +getCommonNodeUFMData :: NodeUFMData -> NodeUFMData -> NodeUFMData + +getCommonNodeUFMData (NodeUFMData i p) (NodeUFMData i2 p2) + | p ==# p2 = getCommonNodeUFMData_ p j j2 + | p <# p2 = getCommonNodeUFMData_ p2 (j `quotFastInt` (p2 `quotFastInt` p)) j2 + | otherwise = getCommonNodeUFMData_ p j (j2 `quotFastInt` (p `quotFastInt` p2)) + where + l = (_ILIT(1) :: FastInt) + j = i `quotFastInt` (p `shiftL_` l) + j2 = i2 `quotFastInt` (p2 `shiftL_` l) + + getCommonNodeUFMData_ :: FastInt -> FastInt -> FastInt -> NodeUFMData + + getCommonNodeUFMData_ p j j_ + | j ==# j_ + = NodeUFMData (((j `shiftL_` l) +# l) *# p) p + | otherwise + = getCommonNodeUFMData_ (p `shiftL_` l) (j `shiftR_` l) (j_ `shiftR_` l) + +ask_about_common_ancestor :: NodeUFMData -> NodeUFMData -> CommonRoot + +ask_about_common_ancestor x@(NodeUFMData j p) y@(NodeUFMData j2 p2) + | j ==# j2 = SameRoot + | otherwise + = case getCommonNodeUFMData x y of + nd@(NodeUFMData j3 p3) + | j3 ==# j -> LeftRoot (decideSide (j ># j2)) + | j3 ==# j2 -> RightRoot (decideSide (j <# j2)) + | otherwise -> NewRoot nd (j ># j2) + where + decideSide :: Bool -> Side + decideSide True = Leftt + decideSide False = Rightt +\end{code} + +This might be better in Util.lhs ? + + +Now the bit twiddling functions. +\begin{code} +shiftL_ :: FastInt -> FastInt -> FastInt +shiftR_ :: FastInt -> FastInt -> FastInt + +#if __GLASGOW_HASKELL__ +{-# INLINE shiftL_ #-} +{-# INLINE shiftR_ #-} +#if __GLASGOW_HASKELL__ >= 503 +shiftL_ n p = word2Int#((int2Word# n) `uncheckedShiftL#` p) +#else +shiftL_ n p = word2Int#((int2Word# n) `shiftL#` p) +#endif +shiftR_ n p = word2Int#((int2Word# n) `shiftr` p) + where +#if __GLASGOW_HASKELL__ >= 503 + shiftr x y = uncheckedShiftRL# x y +#else + shiftr x y = shiftRL# x y +#endif + +#else /* not GHC */ +shiftL_ n p = n * (2 ^ p) +shiftR_ n p = n `quot` (2 ^ p) + +#endif /* not GHC */ +\end{code} + +\begin{code} +use_snd :: a -> b -> b +use_snd a b = b +\end{code} diff --git a/compiler/utils/UniqSet.lhs b/compiler/utils/UniqSet.lhs new file mode 100644 index 0000000000..129e333eb5 --- /dev/null +++ b/compiler/utils/UniqSet.lhs @@ -0,0 +1,138 @@ +% +% (c) The AQUA Project, Glasgow University, 1994-1998 +% +\section[UniqSet]{Specialised sets, for things with @Uniques@} + +Based on @UniqFMs@ (as you would expect). + +Basically, the things need to be in class @Uniquable@. + +\begin{code} +module UniqSet ( + UniqSet, -- abstract type: NOT + + mkUniqSet, uniqSetToList, emptyUniqSet, unitUniqSet, + addOneToUniqSet, addListToUniqSet, delOneFromUniqSet, delListFromUniqSet, + unionUniqSets, unionManyUniqSets, minusUniqSet, + elementOfUniqSet, mapUniqSet, intersectUniqSets, + isEmptyUniqSet, filterUniqSet, sizeUniqSet, foldUniqSet, + elemUniqSet_Directly, lookupUniqSet, hashUniqSet + ) where + +#include "HsVersions.h" + +import {-# SOURCE #-} Name ( Name ) + +import Maybes ( maybeToBool ) +import UniqFM +import Unique ( Unique, Uniquable(..) ) + +#if ! OMIT_NATIVE_CODEGEN +#define IF_NCG(a) a +#else +#define IF_NCG(a) {--} +#endif +\end{code} + +%************************************************************************ +%* * +\subsection{The @UniqSet@ type} +%* * +%************************************************************************ + +We use @UniqFM@, with a (@getUnique@-able) @Unique@ as ``key'' +and the thing itself as the ``value'' (for later retrieval). + +\begin{code} +--data UniqSet a = MkUniqSet (FiniteMap Unique a) : NOT + +type UniqSet a = UniqFM a +#define MkUniqSet {--} + +emptyUniqSet :: UniqSet a +emptyUniqSet = MkUniqSet emptyUFM + +unitUniqSet :: Uniquable a => a -> UniqSet a +unitUniqSet x = MkUniqSet (unitUFM x x) + +uniqSetToList :: UniqSet a -> [a] +uniqSetToList (MkUniqSet set) = eltsUFM set + +foldUniqSet :: (a -> b -> b) -> b -> UniqSet a -> b +foldUniqSet k z (MkUniqSet set) = foldUFM k z set + +mkUniqSet :: Uniquable a => [a] -> UniqSet a +mkUniqSet xs = MkUniqSet (listToUFM [ (x, x) | x <- xs]) + +addOneToUniqSet :: Uniquable a => UniqSet a -> a -> UniqSet a +addOneToUniqSet (MkUniqSet set) x = MkUniqSet (addToUFM set x x) + +delOneFromUniqSet :: Uniquable a => UniqSet a -> a -> UniqSet a +delOneFromUniqSet (MkUniqSet set) x = MkUniqSet (delFromUFM set x) + +delListFromUniqSet :: Uniquable a => UniqSet a -> [a] -> UniqSet a +delListFromUniqSet (MkUniqSet set) xs = MkUniqSet (delListFromUFM set xs) + +addListToUniqSet :: Uniquable a => UniqSet a -> [a] -> UniqSet a +addListToUniqSet (MkUniqSet set) xs = MkUniqSet (addListToUFM set [(x,x) | x<-xs]) + +unionUniqSets :: UniqSet a -> UniqSet a -> UniqSet a +unionUniqSets (MkUniqSet set1) (MkUniqSet set2) = MkUniqSet (plusUFM set1 set2) + +unionManyUniqSets :: [UniqSet a] -> UniqSet a + -- = foldr unionUniqSets emptyUniqSet ss +unionManyUniqSets [] = emptyUniqSet +unionManyUniqSets [s] = s +unionManyUniqSets (s:ss) = s `unionUniqSets` unionManyUniqSets ss + +minusUniqSet :: UniqSet a -> UniqSet a -> UniqSet a +minusUniqSet (MkUniqSet set1) (MkUniqSet set2) = MkUniqSet (minusUFM set1 set2) + +filterUniqSet :: (a -> Bool) -> UniqSet a -> UniqSet a +filterUniqSet pred (MkUniqSet set) = MkUniqSet (filterUFM pred set) + +intersectUniqSets :: UniqSet a -> UniqSet a -> UniqSet a +intersectUniqSets (MkUniqSet set1) (MkUniqSet set2) = MkUniqSet (intersectUFM set1 set2) + +elementOfUniqSet :: Uniquable a => a -> UniqSet a -> Bool +elementOfUniqSet x (MkUniqSet set) = maybeToBool (lookupUFM set x) + +lookupUniqSet :: Uniquable a => UniqSet a -> a -> Maybe a +lookupUniqSet (MkUniqSet set) x = lookupUFM set x + +elemUniqSet_Directly :: Unique -> UniqSet a -> Bool +elemUniqSet_Directly x (MkUniqSet set) = maybeToBool (lookupUFM_Directly set x) + +sizeUniqSet :: UniqSet a -> Int +sizeUniqSet (MkUniqSet set) = sizeUFM set + +hashUniqSet :: UniqSet a -> Int +hashUniqSet (MkUniqSet set) = hashUFM set + +isEmptyUniqSet :: UniqSet a -> Bool +isEmptyUniqSet (MkUniqSet set) = isNullUFM set {-SLOW: sizeUFM set == 0-} + +mapUniqSet :: (a -> a) -> UniqSet a -> UniqSet a + -- VERY IMPORTANT: *assumes* that the function doesn't change the unique +mapUniqSet f (MkUniqSet set) = MkUniqSet (mapUFM f set) +\end{code} + +\begin{code} +#if __GLASGOW_HASKELL__ +{-# SPECIALIZE + addOneToUniqSet :: UniqSet Unique -> Unique -> UniqSet Unique + #-} +{- SPECIALIZE + elementOfUniqSet :: Name -> UniqSet Name -> Bool + , Unique -> UniqSet Unique -> Bool + -} +{- SPECIALIZE + mkUniqSet :: [Name] -> UniqSet Name + -} + +{- SPECIALIZE + unitUniqSet :: Name -> UniqSet Name + , Unique -> UniqSet Unique + -} +#endif +\end{code} diff --git a/compiler/utils/Util.lhs b/compiler/utils/Util.lhs new file mode 100644 index 0000000000..e692ff1aa3 --- /dev/null +++ b/compiler/utils/Util.lhs @@ -0,0 +1,1029 @@ +% +% (c) The University of Glasgow 1992-2002 +% +\section[Util]{Highly random utility functions} + +\begin{code} +module Util ( + + -- general list processing + zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal, + zipLazy, stretchZipWith, + mapFst, mapSnd, + mapAndUnzip, mapAndUnzip3, + nOfThem, filterOut, + lengthExceeds, lengthIs, lengthAtLeast, listLengthCmp, atLength, + isSingleton, only, singleton, + notNull, snocView, + + isIn, isn'tIn, + + -- for-loop + nTimes, + + -- sorting + sortLe, sortWith, + + -- transitive closures + transitiveClosure, + + -- accumulating + mapAccumL, mapAccumR, mapAccumB, + foldl2, count, all2, + + takeList, dropList, splitAtList, split, + + -- comparisons + isEqual, eqListBy, equalLength, compareLength, + thenCmp, cmpList, prefixMatch, suffixMatch, maybePrefixMatch, + removeSpaces, + + -- strictness + foldl', seqList, + + -- pairs + unzipWith, + + global, consIORef, + + -- module names + looksLikeModuleName, + + toArgs, + + -- Floating point stuff + readRational, + + -- IO-ish utilities + createDirectoryHierarchy, + doesDirNameExist, + modificationTimeIfExists, + + later, handleDyn, handle, + + -- Filename utils + Suffix, + splitFilename, suffixOf, basenameOf, joinFileExt, + splitFilenameDir, joinFileName, + splitFilename3, + splitLongestPrefix, + replaceFilenameSuffix, directoryOf, filenameOf, + replaceFilenameDirectory, + escapeSpaces, isPathSeparator, + parseSearchPath, + normalisePath, platformPath, pgmPath, + ) where + +#include "HsVersions.h" + +import Panic ( panic, trace ) +import FastTypes + +import EXCEPTION ( Exception(..), finally, throwDyn, catchDyn, throw ) +import qualified EXCEPTION as Exception +import DYNAMIC ( Typeable ) +import DATA_IOREF ( IORef, newIORef ) +import UNSAFE_IO ( unsafePerformIO ) +import DATA_IOREF ( readIORef, writeIORef ) + +import qualified List ( elem, notElem ) + +#ifndef DEBUG +import List ( zipWith4 ) +#endif + +import Monad ( when ) +import IO ( catch, isDoesNotExistError ) +import Directory ( doesDirectoryExist, createDirectory ) +import Char ( isUpper, isAlphaNum, isSpace, ord, isDigit ) +import Ratio ( (%) ) +import Time ( ClockTime ) +import Directory ( getModificationTime ) + +infixr 9 `thenCmp` +\end{code} + +%************************************************************************ +%* * +\subsection{The Eager monad} +%* * +%************************************************************************ + +The @Eager@ monad is just an encoding of continuation-passing style, +used to allow you to express "do this and then that", mainly to avoid +space leaks. It's done with a type synonym to save bureaucracy. + +\begin{code} +#if NOT_USED + +type Eager ans a = (a -> ans) -> ans + +runEager :: Eager a a -> a +runEager m = m (\x -> x) + +appEager :: Eager ans a -> (a -> ans) -> ans +appEager m cont = m cont + +thenEager :: Eager ans a -> (a -> Eager ans b) -> Eager ans b +thenEager m k cont = m (\r -> k r cont) + +returnEager :: a -> Eager ans a +returnEager v cont = cont v + +mapEager :: (a -> Eager ans b) -> [a] -> Eager ans [b] +mapEager f [] = returnEager [] +mapEager f (x:xs) = f x `thenEager` \ y -> + mapEager f xs `thenEager` \ ys -> + returnEager (y:ys) +#endif +\end{code} + +%************************************************************************ +%* * +\subsection{A for loop} +%* * +%************************************************************************ + +\begin{code} +-- Compose a function with itself n times. (nth rather than twice) +nTimes :: Int -> (a -> a) -> (a -> a) +nTimes 0 _ = id +nTimes 1 f = f +nTimes n f = f . nTimes (n-1) f +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-lists]{General list processing} +%* * +%************************************************************************ + +\begin{code} +filterOut :: (a->Bool) -> [a] -> [a] +-- Like filter, only reverses the sense of the test +filterOut p [] = [] +filterOut p (x:xs) | p x = filterOut p xs + | otherwise = x : filterOut p xs +\end{code} + +A paranoid @zip@ (and some @zipWith@ friends) that checks the lists +are of equal length. Alastair Reid thinks this should only happen if +DEBUGging on; hey, why not? + +\begin{code} +zipEqual :: String -> [a] -> [b] -> [(a,b)] +zipWithEqual :: String -> (a->b->c) -> [a]->[b]->[c] +zipWith3Equal :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d] +zipWith4Equal :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e] + +#ifndef DEBUG +zipEqual _ = zip +zipWithEqual _ = zipWith +zipWith3Equal _ = zipWith3 +zipWith4Equal _ = zipWith4 +#else +zipEqual msg [] [] = [] +zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs +zipEqual msg as bs = panic ("zipEqual: unequal lists:"++msg) + +zipWithEqual msg z (a:as) (b:bs)= z a b : zipWithEqual msg z as bs +zipWithEqual msg _ [] [] = [] +zipWithEqual msg _ _ _ = panic ("zipWithEqual: unequal lists:"++msg) + +zipWith3Equal msg z (a:as) (b:bs) (c:cs) + = z a b c : zipWith3Equal msg z as bs cs +zipWith3Equal msg _ [] [] [] = [] +zipWith3Equal msg _ _ _ _ = panic ("zipWith3Equal: unequal lists:"++msg) + +zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds) + = z a b c d : zipWith4Equal msg z as bs cs ds +zipWith4Equal msg _ [] [] [] [] = [] +zipWith4Equal msg _ _ _ _ _ = panic ("zipWith4Equal: unequal lists:"++msg) +#endif +\end{code} + +\begin{code} +-- zipLazy is lazy in the second list (observe the ~) + +zipLazy :: [a] -> [b] -> [(a,b)] +zipLazy [] ys = [] +zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys +\end{code} + + +\begin{code} +stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c] +-- (stretchZipWith p z f xs ys) stretches ys by inserting z in +-- the places where p returns *True* + +stretchZipWith p z f [] ys = [] +stretchZipWith p z f (x:xs) ys + | p x = f x z : stretchZipWith p z f xs ys + | otherwise = case ys of + [] -> [] + (y:ys) -> f x y : stretchZipWith p z f xs ys +\end{code} + + +\begin{code} +mapFst :: (a->c) -> [(a,b)] -> [(c,b)] +mapSnd :: (b->c) -> [(a,b)] -> [(a,c)] + +mapFst f xys = [(f x, y) | (x,y) <- xys] +mapSnd f xys = [(x, f y) | (x,y) <- xys] + +mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c]) + +mapAndUnzip f [] = ([],[]) +mapAndUnzip f (x:xs) + = let + (r1, r2) = f x + (rs1, rs2) = mapAndUnzip f xs + in + (r1:rs1, r2:rs2) + +mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d]) + +mapAndUnzip3 f [] = ([],[],[]) +mapAndUnzip3 f (x:xs) + = let + (r1, r2, r3) = f x + (rs1, rs2, rs3) = mapAndUnzip3 f xs + in + (r1:rs1, r2:rs2, r3:rs3) +\end{code} + +\begin{code} +nOfThem :: Int -> a -> [a] +nOfThem n thing = replicate n thing + +-- 'atLength atLen atEnd ls n' unravels list 'ls' to position 'n'; +-- specification: +-- +-- atLength atLenPred atEndPred ls n +-- | n < 0 = atLenPred n +-- | length ls < n = atEndPred (n - length ls) +-- | otherwise = atLenPred (drop n ls) +-- +atLength :: ([a] -> b) + -> (Int -> b) + -> [a] + -> Int + -> b +atLength atLenPred atEndPred ls n + | n < 0 = atEndPred n + | otherwise = go n ls + where + go n [] = atEndPred n + go 0 ls = atLenPred ls + go n (_:xs) = go (n-1) xs + +-- special cases. +lengthExceeds :: [a] -> Int -> Bool +-- (lengthExceeds xs n) = (length xs > n) +lengthExceeds = atLength notNull (const False) + +lengthAtLeast :: [a] -> Int -> Bool +lengthAtLeast = atLength notNull (== 0) + +lengthIs :: [a] -> Int -> Bool +lengthIs = atLength null (==0) + +listLengthCmp :: [a] -> Int -> Ordering +listLengthCmp = atLength atLen atEnd + where + atEnd 0 = EQ + atEnd x + | x > 0 = LT -- not yet seen 'n' elts, so list length is < n. + | otherwise = GT + + atLen [] = EQ + atLen _ = GT + +singleton :: a -> [a] +singleton x = [x] + +isSingleton :: [a] -> Bool +isSingleton [x] = True +isSingleton _ = False + +notNull :: [a] -> Bool +notNull [] = False +notNull _ = True + +snocView :: [a] -> Maybe ([a],a) + -- Split off the last element +snocView [] = Nothing +snocView xs = go [] xs + where + -- Invariant: second arg is non-empty + go acc [x] = Just (reverse acc, x) + go acc (x:xs) = go (x:acc) xs + +only :: [a] -> a +#ifdef DEBUG +only [a] = a +#else +only (a:_) = a +#endif +\end{code} + +Debugging/specialising versions of \tr{elem} and \tr{notElem} + +\begin{code} +isIn, isn'tIn :: (Eq a) => String -> a -> [a] -> Bool + +# ifndef DEBUG +isIn msg x ys = elem__ x ys +isn'tIn msg x ys = notElem__ x ys + +--these are here to be SPECIALIZEd (automagically) +elem__ _ [] = False +elem__ x (y:ys) = x==y || elem__ x ys + +notElem__ x [] = True +notElem__ x (y:ys) = x /= y && notElem__ x ys + +# else /* DEBUG */ +isIn msg x ys + = elem (_ILIT 0) x ys + where + elem i _ [] = False + elem i x (y:ys) + | i ># _ILIT 100 = trace ("Over-long elem in " ++ msg) $ + x `List.elem` (y:ys) + | otherwise = x == y || elem (i +# _ILIT(1)) x ys + +isn'tIn msg x ys + = notElem (_ILIT 0) x ys + where + notElem i x [] = True + notElem i x (y:ys) + | i ># _ILIT 100 = trace ("Over-long notElem in " ++ msg) $ + x `List.notElem` (y:ys) + | otherwise = x /= y && notElem (i +# _ILIT(1)) x ys +# endif /* DEBUG */ +\end{code} + +%************************************************************************ +%* * +\subsubsection[Utils-Carsten-mergesort]{A mergesort from Carsten} +%* * +%************************************************************************ + +\begin{display} +Date: Mon, 3 May 93 20:45:23 +0200 +From: Carsten Kehler Holst <kehler@cs.chalmers.se> +To: partain@dcs.gla.ac.uk +Subject: natural merge sort beats quick sort [ and it is prettier ] + +Here is a piece of Haskell code that I'm rather fond of. See it as an +attempt to get rid of the ridiculous quick-sort routine. group is +quite useful by itself I think it was John's idea originally though I +believe the lazy version is due to me [surprisingly complicated]. +gamma [used to be called] is called gamma because I got inspired by +the Gamma calculus. It is not very close to the calculus but does +behave less sequentially than both foldr and foldl. One could imagine +a version of gamma that took a unit element as well thereby avoiding +the problem with empty lists. + +I've tried this code against + + 1) insertion sort - as provided by haskell + 2) the normal implementation of quick sort + 3) a deforested version of quick sort due to Jan Sparud + 4) a super-optimized-quick-sort of Lennart's + +If the list is partially sorted both merge sort and in particular +natural merge sort wins. If the list is random [ average length of +rising subsequences = approx 2 ] mergesort still wins and natural +merge sort is marginally beaten by Lennart's soqs. The space +consumption of merge sort is a bit worse than Lennart's quick sort +approx a factor of 2. And a lot worse if Sparud's bug-fix [see his +fpca article ] isn't used because of group. + +have fun +Carsten +\end{display} + +\begin{code} +group :: (a -> a -> Bool) -> [a] -> [[a]] +-- Given a <= function, group finds maximal contiguous up-runs +-- or down-runs in the input list. +-- It's stable, in the sense that it never re-orders equal elements +-- +-- Date: Mon, 12 Feb 1996 15:09:41 +0000 +-- From: Andy Gill <andy@dcs.gla.ac.uk> +-- Here is a `better' definition of group. + +group p [] = [] +group p (x:xs) = group' xs x x (x :) + where + group' [] _ _ s = [s []] + group' (x:xs) x_min x_max s + | x_max `p` x = group' xs x_min x (s . (x :)) + | not (x_min `p` x) = group' xs x x_max ((x :) . s) + | otherwise = s [] : group' xs x x (x :) + -- NB: the 'not' is essential for stablity + -- x `p` x_min would reverse equal elements + +generalMerge :: (a -> a -> Bool) -> [a] -> [a] -> [a] +generalMerge p xs [] = xs +generalMerge p [] ys = ys +generalMerge p (x:xs) (y:ys) | x `p` y = x : generalMerge p xs (y:ys) + | otherwise = y : generalMerge p (x:xs) ys + +-- gamma is now called balancedFold + +balancedFold :: (a -> a -> a) -> [a] -> a +balancedFold f [] = error "can't reduce an empty list using balancedFold" +balancedFold f [x] = x +balancedFold f l = balancedFold f (balancedFold' f l) + +balancedFold' :: (a -> a -> a) -> [a] -> [a] +balancedFold' f (x:y:xs) = f x y : balancedFold' f xs +balancedFold' f xs = xs + +generalNaturalMergeSort p [] = [] +generalNaturalMergeSort p xs = (balancedFold (generalMerge p) . group p) xs + +#if NOT_USED +generalMergeSort p [] = [] +generalMergeSort p xs = (balancedFold (generalMerge p) . map (: [])) xs + +mergeSort, naturalMergeSort :: Ord a => [a] -> [a] + +mergeSort = generalMergeSort (<=) +naturalMergeSort = generalNaturalMergeSort (<=) + +mergeSortLe le = generalMergeSort le +#endif + +sortLe :: (a->a->Bool) -> [a] -> [a] +sortLe le = generalNaturalMergeSort le + +sortWith :: Ord b => (a->b) -> [a] -> [a] +sortWith get_key xs = sortLe le xs + where + x `le` y = get_key x < get_key y +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-transitive-closure]{Transitive closure} +%* * +%************************************************************************ + +This algorithm for transitive closure is straightforward, albeit quadratic. + +\begin{code} +transitiveClosure :: (a -> [a]) -- Successor function + -> (a -> a -> Bool) -- Equality predicate + -> [a] + -> [a] -- The transitive closure + +transitiveClosure succ eq xs + = go [] xs + where + go done [] = done + go done (x:xs) | x `is_in` done = go done xs + | otherwise = go (x:done) (succ x ++ xs) + + x `is_in` [] = False + x `is_in` (y:ys) | eq x y = True + | otherwise = x `is_in` ys +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-accum]{Accumulating} +%* * +%************************************************************************ + +@mapAccumL@ behaves like a combination +of @map@ and @foldl@; +it applies a function to each element of a list, passing an accumulating +parameter from left to right, and returning a final value of this +accumulator together with the new list. + +\begin{code} +mapAccumL :: (acc -> x -> (acc, y)) -- Function of elt of input list + -- and accumulator, returning new + -- accumulator and elt of result list + -> acc -- Initial accumulator + -> [x] -- Input list + -> (acc, [y]) -- Final accumulator and result list + +mapAccumL f b [] = (b, []) +mapAccumL f b (x:xs) = (b'', x':xs') where + (b', x') = f b x + (b'', xs') = mapAccumL f b' xs +\end{code} + +@mapAccumR@ does the same, but working from right to left instead. Its type is +the same as @mapAccumL@, though. + +\begin{code} +mapAccumR :: (acc -> x -> (acc, y)) -- Function of elt of input list + -- and accumulator, returning new + -- accumulator and elt of result list + -> acc -- Initial accumulator + -> [x] -- Input list + -> (acc, [y]) -- Final accumulator and result list + +mapAccumR f b [] = (b, []) +mapAccumR f b (x:xs) = (b'', x':xs') where + (b'', x') = f b' x + (b', xs') = mapAccumR f b xs +\end{code} + +Here is the bi-directional version, that works from both left and right. + +\begin{code} +mapAccumB :: (accl -> accr -> x -> (accl, accr,y)) + -- Function of elt of input list + -- and accumulator, returning new + -- accumulator and elt of result list + -> accl -- Initial accumulator from left + -> accr -- Initial accumulator from right + -> [x] -- Input list + -> (accl, accr, [y]) -- Final accumulators and result list + +mapAccumB f a b [] = (a,b,[]) +mapAccumB f a b (x:xs) = (a'',b'',y:ys) + where + (a',b'',y) = f a b' x + (a'',b',ys) = mapAccumB f a' b xs +\end{code} + +A strict version of foldl. + +\begin{code} +foldl' :: (a -> b -> a) -> a -> [b] -> a +foldl' f z xs = lgo z xs + where + lgo z [] = z + lgo z (x:xs) = (lgo $! (f z x)) xs +\end{code} + +A combination of foldl with zip. It works with equal length lists. + +\begin{code} +foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc +foldl2 k z [] [] = z +foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs + +all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool +-- True if the lists are the same length, and +-- all corresponding elements satisfy the predicate +all2 p [] [] = True +all2 p (x:xs) (y:ys) = p x y && all2 p xs ys +all2 p xs ys = False +\end{code} + +Count the number of times a predicate is true + +\begin{code} +count :: (a -> Bool) -> [a] -> Int +count p [] = 0 +count p (x:xs) | p x = 1 + count p xs + | otherwise = count p xs +\end{code} + +@splitAt@, @take@, and @drop@ but with length of another +list giving the break-off point: + +\begin{code} +takeList :: [b] -> [a] -> [a] +takeList [] _ = [] +takeList (_:xs) ls = + case ls of + [] -> [] + (y:ys) -> y : takeList xs ys + +dropList :: [b] -> [a] -> [a] +dropList [] xs = xs +dropList _ xs@[] = xs +dropList (_:xs) (_:ys) = dropList xs ys + + +splitAtList :: [b] -> [a] -> ([a], [a]) +splitAtList [] xs = ([], xs) +splitAtList _ xs@[] = (xs, xs) +splitAtList (_:xs) (y:ys) = (y:ys', ys'') + where + (ys', ys'') = splitAtList xs ys + +split :: Char -> String -> [String] +split c s = case rest of + [] -> [chunk] + _:rest -> chunk : split c rest + where (chunk, rest) = break (==c) s +\end{code} + + +%************************************************************************ +%* * +\subsection[Utils-comparison]{Comparisons} +%* * +%************************************************************************ + +\begin{code} +isEqual :: Ordering -> Bool +-- Often used in (isEqual (a `compare` b)) +isEqual GT = False +isEqual EQ = True +isEqual LT = False + +thenCmp :: Ordering -> Ordering -> Ordering +{-# INLINE thenCmp #-} +thenCmp EQ any = any +thenCmp other any = other + +eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool +eqListBy eq [] [] = True +eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys +eqListBy eq xs ys = False + +equalLength :: [a] -> [b] -> Bool +equalLength [] [] = True +equalLength (_:xs) (_:ys) = equalLength xs ys +equalLength xs ys = False + +compareLength :: [a] -> [b] -> Ordering +compareLength [] [] = EQ +compareLength (_:xs) (_:ys) = compareLength xs ys +compareLength [] _ys = LT +compareLength _xs [] = GT + +cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering + -- `cmpList' uses a user-specified comparer + +cmpList cmp [] [] = EQ +cmpList cmp [] _ = LT +cmpList cmp _ [] = GT +cmpList cmp (a:as) (b:bs) + = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx } +\end{code} + +\begin{code} +prefixMatch :: Eq a => [a] -> [a] -> Bool +prefixMatch [] _str = True +prefixMatch _pat [] = False +prefixMatch (p:ps) (s:ss) | p == s = prefixMatch ps ss + | otherwise = False + +maybePrefixMatch :: String -> String -> Maybe String +maybePrefixMatch [] rest = Just rest +maybePrefixMatch (_:_) [] = Nothing +maybePrefixMatch (p:pat) (r:rest) + | p == r = maybePrefixMatch pat rest + | otherwise = Nothing + +suffixMatch :: Eq a => [a] -> [a] -> Bool +suffixMatch pat str = prefixMatch (reverse pat) (reverse str) + +removeSpaces :: String -> String +removeSpaces = reverse . dropWhile isSpace . reverse . dropWhile isSpace +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-pairs]{Pairs} +%* * +%************************************************************************ + +The following are curried versions of @fst@ and @snd@. + +\begin{code} +#if NOT_USED +cfst :: a -> b -> a -- stranal-sem only (Note) +cfst x y = x +#endif +\end{code} + +The following provide us higher order functions that, when applied +to a function, operate on pairs. + +\begin{code} +#if NOT_USED +applyToPair :: ((a -> c),(b -> d)) -> (a,b) -> (c,d) +applyToPair (f,g) (x,y) = (f x, g y) + +applyToFst :: (a -> c) -> (a,b)-> (c,b) +applyToFst f (x,y) = (f x,y) + +applyToSnd :: (b -> d) -> (a,b) -> (a,d) +applyToSnd f (x,y) = (x,f y) +#endif +\end{code} + +\begin{code} +unzipWith :: (a -> b -> c) -> [(a, b)] -> [c] +unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs +\end{code} + +\begin{code} +seqList :: [a] -> b -> b +seqList [] b = b +seqList (x:xs) b = x `seq` seqList xs b +\end{code} + +Global variables: + +\begin{code} +global :: a -> IORef a +global a = unsafePerformIO (newIORef a) +\end{code} + +\begin{code} +consIORef :: IORef [a] -> a -> IO () +consIORef var x = do + xs <- readIORef var + writeIORef var (x:xs) +\end{code} + +Module names: + +\begin{code} +looksLikeModuleName [] = False +looksLikeModuleName (c:cs) = isUpper c && go cs + where go [] = True + go ('.':cs) = looksLikeModuleName cs + go (c:cs) = (isAlphaNum c || c == '_') && go cs +\end{code} + +Akin to @Prelude.words@, but sensitive to dquoted entities treating +them as single words. + +\begin{code} +toArgs :: String -> [String] +toArgs "" = [] +toArgs s = + case break (\ ch -> isSpace ch || ch == '"') (dropWhile isSpace s) of -- " + (w,aft) -> + (\ ws -> if null w then ws else w : ws) $ + case aft of + [] -> [] + (x:xs) + | x /= '"' -> toArgs xs + | otherwise -> + case lex aft of + ((str,rs):_) -> stripQuotes str : toArgs rs + _ -> [aft] + where + -- strip away dquotes; assume first and last chars contain quotes. + stripQuotes :: String -> String + stripQuotes ('"':xs) = init xs + stripQuotes xs = xs +\end{code} + +-- ----------------------------------------------------------------------------- +-- Floats + +\begin{code} +readRational__ :: ReadS Rational -- NB: doesn't handle leading "-" +readRational__ r = do + (n,d,s) <- readFix r + (k,t) <- readExp s + return ((n%1)*10^^(k-d), t) + where + readFix r = do + (ds,s) <- lexDecDigits r + (ds',t) <- lexDotDigits s + return (read (ds++ds'), length ds', t) + + readExp (e:s) | e `elem` "eE" = readExp' s + readExp s = return (0,s) + + readExp' ('+':s) = readDec s + readExp' ('-':s) = do + (k,t) <- readDec s + return (-k,t) + readExp' s = readDec s + + readDec s = do + (ds,r) <- nonnull isDigit s + return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ], + r) + + lexDecDigits = nonnull isDigit + + lexDotDigits ('.':s) = return (span isDigit s) + lexDotDigits s = return ("",s) + + nonnull p s = do (cs@(_:_),t) <- return (span p s) + return (cs,t) + +readRational :: String -> Rational -- NB: *does* handle a leading "-" +readRational top_s + = case top_s of + '-' : xs -> - (read_me xs) + xs -> read_me xs + where + read_me s + = case (do { (x,"") <- readRational__ s ; return x }) of + [x] -> x + [] -> error ("readRational: no parse:" ++ top_s) + _ -> error ("readRational: ambiguous parse:" ++ top_s) + + +----------------------------------------------------------------------------- +-- Create a hierarchy of directories + +createDirectoryHierarchy :: FilePath -> IO () +createDirectoryHierarchy dir = do + b <- doesDirectoryExist dir + when (not b) $ do + createDirectoryHierarchy (directoryOf dir) + createDirectory dir + +----------------------------------------------------------------------------- +-- Verify that the 'dirname' portion of a FilePath exists. +-- +doesDirNameExist :: FilePath -> IO Bool +doesDirNameExist fpath = doesDirectoryExist (directoryOf fpath) + +-- ----------------------------------------------------------------------------- +-- Exception utils + +later = flip finally + +handleDyn :: Typeable ex => (ex -> IO a) -> IO a -> IO a +handleDyn = flip catchDyn + +handle :: (Exception -> IO a) -> IO a -> IO a +#if __GLASGOW_HASKELL__ < 501 +handle = flip Exception.catchAllIO +#else +handle h f = f `Exception.catch` \e -> case e of + ExitException _ -> throw e + _ -> h e +#endif + +-- -------------------------------------------------------------- +-- check existence & modification time at the same time + +modificationTimeIfExists :: FilePath -> IO (Maybe ClockTime) +modificationTimeIfExists f = do + (do t <- getModificationTime f; return (Just t)) + `IO.catch` \e -> if isDoesNotExistError e + then return Nothing + else ioError e + +-- -------------------------------------------------------------- +-- Filename manipulation + +-- Filenames are kept "normalised" inside GHC, using '/' as the path +-- separator. On Windows these functions will also recognise '\\' as +-- the path separator, but will generally construct paths using '/'. + +type Suffix = String + +splitFilename :: String -> (String,Suffix) +splitFilename f = splitLongestPrefix f (=='.') + +basenameOf :: FilePath -> String +basenameOf = fst . splitFilename + +suffixOf :: FilePath -> Suffix +suffixOf = snd . splitFilename + +joinFileExt :: String -> String -> FilePath +joinFileExt path "" = path +joinFileExt path ext = path ++ '.':ext + +-- "foo/bar/xyzzy.ext" -> ("foo/bar", "xyzzy.ext") +splitFilenameDir :: String -> (String,String) +splitFilenameDir str + = let (dir, rest) = splitLongestPrefix str isPathSeparator + (dir', rest') | null rest = (".", dir) + | otherwise = (dir, rest) + in (dir', rest') + +-- "foo/bar/xyzzy.ext" -> ("foo/bar", "xyzzy", ".ext") +splitFilename3 :: String -> (String,String,Suffix) +splitFilename3 str + = let (dir, rest) = splitFilenameDir str + (name, ext) = splitFilename rest + in (dir, name, ext) + +joinFileName :: String -> String -> FilePath +joinFileName "" fname = fname +joinFileName "." fname = fname +joinFileName dir "" = dir +joinFileName dir fname = dir ++ '/':fname + +-- split a string at the last character where 'pred' is True, +-- returning a pair of strings. The first component holds the string +-- up (but not including) the last character for which 'pred' returned +-- True, the second whatever comes after (but also not including the +-- last character). +-- +-- If 'pred' returns False for all characters in the string, the original +-- string is returned in the first component (and the second one is just +-- empty). +splitLongestPrefix :: String -> (Char -> Bool) -> (String,String) +splitLongestPrefix str pred + | null r_pre = (str, []) + | otherwise = (reverse (tail r_pre), reverse r_suf) + -- 'tail' drops the char satisfying 'pred' + where + (r_suf, r_pre) = break pred (reverse str) + +replaceFilenameSuffix :: FilePath -> Suffix -> FilePath +replaceFilenameSuffix file suf = basenameOf file `joinFileExt` suf + +-- directoryOf strips the filename off the input string, returning +-- the directory. +directoryOf :: FilePath -> String +directoryOf = fst . splitFilenameDir + +-- filenameOf strips the directory off the input string, returning +-- the filename. +filenameOf :: FilePath -> String +filenameOf = snd . splitFilenameDir + +replaceFilenameDirectory :: FilePath -> String -> FilePath +replaceFilenameDirectory path dir = dir `joinFileName` filenameOf path + +escapeSpaces :: String -> String +escapeSpaces = foldr (\c s -> if isSpace c then '\\':c:s else c:s) "" + +isPathSeparator :: Char -> Bool +isPathSeparator ch = +#ifdef mingw32_TARGET_OS + ch == '/' || ch == '\\' +#else + ch == '/' +#endif + +-------------------------------------------------------------- +-- * Search path +-------------------------------------------------------------- + +-- | The function splits the given string to substrings +-- using the 'searchPathSeparator'. +parseSearchPath :: String -> [FilePath] +parseSearchPath path = split path + where + split :: String -> [String] + split s = + case rest' of + [] -> [chunk] + _:rest -> chunk : split rest + where + chunk = + case chunk' of +#ifdef mingw32_HOST_OS + ('\"':xs@(_:_)) | last xs == '\"' -> init xs +#endif + _ -> chunk' + + (chunk', rest') = break (==searchPathSeparator) s + +-- | A platform-specific character used to separate search path strings in +-- environment variables. The separator is a colon (\":\") on Unix and Macintosh, +-- and a semicolon (\";\") on the Windows operating system. +searchPathSeparator :: Char +#if mingw32_HOST_OS || mingw32_TARGET_OS +searchPathSeparator = ';' +#else +searchPathSeparator = ':' +#endif + +----------------------------------------------------------------------------- +-- Convert filepath into platform / MSDOS form. + +-- We maintain path names in Unix form ('/'-separated) right until +-- the last moment. On Windows we dos-ify them just before passing them +-- to the Windows command. +-- +-- The alternative, of using '/' consistently on Unix and '\' on Windows, +-- proved quite awkward. There were a lot more calls to platformPath, +-- and even on Windows we might invoke a unix-like utility (eg 'sh'), which +-- interpreted a command line 'foo\baz' as 'foobaz'. + +normalisePath :: String -> String +-- Just changes '\' to '/' + +pgmPath :: String -- Directory string in Unix format + -> String -- Program name with no directory separators + -- (e.g. copy /y) + -> String -- Program invocation string in native format + +#if defined(mingw32_HOST_OS) +--------------------- Windows version ------------------ +normalisePath xs = subst '\\' '/' xs +pgmPath dir pgm = platformPath dir ++ '\\' : pgm +platformPath p = subst '/' '\\' p + +subst a b ls = map (\ x -> if x == a then b else x) ls +#else +--------------------- Non-Windows version -------------- +normalisePath xs = xs +pgmPath dir pgm = dir ++ '/' : pgm +platformPath stuff = stuff +-------------------------------------------------------- +#endif +\end{code} |