diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2020-04-20 16:54:38 +0200 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-04-26 13:55:14 -0400 |
commit | af332442123878c1b61d236dce46418efcbe8750 (patch) | |
tree | ec4b332843cdd4fedb4aa60b11b7b8dba82a0764 /compiler/GHC/Data | |
parent | b0fbfc7582fb81314dc28a056536737fb5eeaa6e (diff) | |
download | haskell-af332442123878c1b61d236dce46418efcbe8750.tar.gz |
Modules: Utils and Data (#13009)
Update Haddock submodule
Metric Increase:
haddock.compiler
Diffstat (limited to 'compiler/GHC/Data')
-rw-r--r-- | compiler/GHC/Data/Bag.hs | 335 | ||||
-rw-r--r-- | compiler/GHC/Data/Bitmap.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/Data/BooleanFormula.hs | 262 | ||||
-rw-r--r-- | compiler/GHC/Data/EnumSet.hs | 35 | ||||
-rw-r--r-- | compiler/GHC/Data/FastMutInt.hs | 61 | ||||
-rw-r--r-- | compiler/GHC/Data/FastString.hs | 693 | ||||
-rw-r--r-- | compiler/GHC/Data/FastString/Env.hs | 100 | ||||
-rw-r--r-- | compiler/GHC/Data/FiniteMap.hs | 31 | ||||
-rw-r--r-- | compiler/GHC/Data/Graph/Base.hs | 107 | ||||
-rw-r--r-- | compiler/GHC/Data/Graph/Color.hs | 375 | ||||
-rw-r--r-- | compiler/GHC/Data/Graph/Directed.hs | 524 | ||||
-rw-r--r-- | compiler/GHC/Data/Graph/Ops.hs | 698 | ||||
-rw-r--r-- | compiler/GHC/Data/Graph/Ppr.hs | 173 | ||||
-rw-r--r-- | compiler/GHC/Data/Graph/UnVar.hs | 145 | ||||
-rw-r--r-- | compiler/GHC/Data/IOEnv.hs | 219 | ||||
-rw-r--r-- | compiler/GHC/Data/List/SetOps.hs | 182 | ||||
-rw-r--r-- | compiler/GHC/Data/Maybe.hs | 114 | ||||
-rw-r--r-- | compiler/GHC/Data/OrdList.hs | 192 | ||||
-rw-r--r-- | compiler/GHC/Data/Pair.hs | 68 | ||||
-rw-r--r-- | compiler/GHC/Data/Stream.hs | 135 | ||||
-rw-r--r-- | compiler/GHC/Data/StringBuffer.hs | 334 | ||||
-rw-r--r-- | compiler/GHC/Data/TrieMap.hs | 406 |
22 files changed, 5190 insertions, 1 deletions
diff --git a/compiler/GHC/Data/Bag.hs b/compiler/GHC/Data/Bag.hs new file mode 100644 index 0000000000..aa18bec5e1 --- /dev/null +++ b/compiler/GHC/Data/Bag.hs @@ -0,0 +1,335 @@ +{- +(c) The University of Glasgow 2006 +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 + + +Bag: an unordered collection with duplicates +-} + +{-# LANGUAGE ScopedTypeVariables, CPP, DeriveFunctor #-} + +module GHC.Data.Bag ( + Bag, -- abstract type + + emptyBag, unitBag, unionBags, unionManyBags, + mapBag, + elemBag, lengthBag, + filterBag, partitionBag, partitionBagWith, + concatBag, catBagMaybes, foldBag, + isEmptyBag, isSingletonBag, consBag, snocBag, anyBag, allBag, + listToBag, bagToList, mapAccumBagL, + concatMapBag, concatMapBagPair, mapMaybeBag, + mapBagM, mapBagM_, + flatMapBagM, flatMapBagPairM, + mapAndUnzipBagM, mapAccumBagLM, + anyBagM, filterBagM + ) where + +import GHC.Prelude + +import GHC.Utils.Outputable +import GHC.Utils.Misc + +import GHC.Utils.Monad +import Control.Monad +import Data.Data +import Data.Maybe( mapMaybe ) +import Data.List ( partition, mapAccumL ) +import qualified Data.Foldable as Foldable + +infixr 3 `consBag` +infixl 3 `snocBag` + +data Bag a + = EmptyBag + | UnitBag a + | TwoBags (Bag a) (Bag a) -- INVARIANT: neither branch is empty + | ListBag [a] -- INVARIANT: the list is non-empty + deriving (Functor) + +emptyBag :: Bag a +emptyBag = EmptyBag + +unitBag :: a -> Bag a +unitBag = UnitBag + +lengthBag :: Bag a -> Int +lengthBag EmptyBag = 0 +lengthBag (UnitBag {}) = 1 +lengthBag (TwoBags b1 b2) = lengthBag b1 + lengthBag b2 +lengthBag (ListBag xs) = length xs + +elemBag :: Eq a => a -> Bag a -> Bool +elemBag _ EmptyBag = False +elemBag x (UnitBag y) = x == y +elemBag x (TwoBags b1 b2) = x `elemBag` b1 || x `elemBag` b2 +elemBag x (ListBag ys) = any (x ==) ys + +unionManyBags :: [Bag a] -> Bag a +unionManyBags xs = foldr unionBags EmptyBag xs + +-- This one is a bit stricter! The bag will get completely evaluated. + +unionBags :: Bag a -> Bag a -> Bag a +unionBags EmptyBag b = b +unionBags b EmptyBag = b +unionBags b1 b2 = TwoBags b1 b2 + +consBag :: a -> Bag a -> Bag a +snocBag :: Bag a -> a -> Bag a + +consBag elt bag = (unitBag elt) `unionBags` bag +snocBag bag elt = bag `unionBags` (unitBag elt) + +isEmptyBag :: Bag a -> Bool +isEmptyBag EmptyBag = True +isEmptyBag _ = False -- NB invariants + +isSingletonBag :: Bag a -> Bool +isSingletonBag EmptyBag = False +isSingletonBag (UnitBag _) = True +isSingletonBag (TwoBags _ _) = False -- Neither is empty +isSingletonBag (ListBag xs) = isSingleton xs + +filterBag :: (a -> Bool) -> Bag a -> Bag a +filterBag _ EmptyBag = EmptyBag +filterBag pred b@(UnitBag val) = if pred val then b else EmptyBag +filterBag pred (TwoBags b1 b2) = sat1 `unionBags` sat2 + where sat1 = filterBag pred b1 + sat2 = filterBag pred b2 +filterBag pred (ListBag vs) = listToBag (filter pred vs) + +filterBagM :: Monad m => (a -> m Bool) -> Bag a -> m (Bag a) +filterBagM _ EmptyBag = return EmptyBag +filterBagM pred b@(UnitBag val) = do + flag <- pred val + if flag then return b + else return EmptyBag +filterBagM pred (TwoBags b1 b2) = do + sat1 <- filterBagM pred b1 + sat2 <- filterBagM pred b2 + return (sat1 `unionBags` sat2) +filterBagM pred (ListBag vs) = do + sat <- filterM pred vs + return (listToBag sat) + +allBag :: (a -> Bool) -> Bag a -> Bool +allBag _ EmptyBag = True +allBag p (UnitBag v) = p v +allBag p (TwoBags b1 b2) = allBag p b1 && allBag p b2 +allBag p (ListBag xs) = all p xs + +anyBag :: (a -> Bool) -> Bag a -> Bool +anyBag _ EmptyBag = False +anyBag p (UnitBag v) = p v +anyBag p (TwoBags b1 b2) = anyBag p b1 || anyBag p b2 +anyBag p (ListBag xs) = any p xs + +anyBagM :: Monad m => (a -> m Bool) -> Bag a -> m Bool +anyBagM _ EmptyBag = return False +anyBagM p (UnitBag v) = p v +anyBagM p (TwoBags b1 b2) = do flag <- anyBagM p b1 + if flag then return True + else anyBagM p b2 +anyBagM p (ListBag xs) = anyM p xs + +concatBag :: Bag (Bag a) -> Bag a +concatBag bss = foldr add emptyBag bss + where + add bs rs = bs `unionBags` rs + +catBagMaybes :: Bag (Maybe a) -> Bag a +catBagMaybes bs = foldr add emptyBag bs + where + add Nothing rs = rs + add (Just x) rs = x `consBag` rs + +partitionBag :: (a -> Bool) -> Bag a -> (Bag a {- Satisfy predictate -}, + Bag a {- Don't -}) +partitionBag _ EmptyBag = (EmptyBag, EmptyBag) +partitionBag pred b@(UnitBag val) + = if pred val then (b, EmptyBag) else (EmptyBag, b) +partitionBag pred (TwoBags b1 b2) + = (sat1 `unionBags` sat2, fail1 `unionBags` fail2) + where (sat1, fail1) = partitionBag pred b1 + (sat2, fail2) = partitionBag pred b2 +partitionBag pred (ListBag vs) = (listToBag sats, listToBag fails) + where (sats, fails) = partition pred vs + + +partitionBagWith :: (a -> Either b c) -> Bag a + -> (Bag b {- Left -}, + Bag c {- Right -}) +partitionBagWith _ EmptyBag = (EmptyBag, EmptyBag) +partitionBagWith pred (UnitBag val) + = case pred val of + Left a -> (UnitBag a, EmptyBag) + Right b -> (EmptyBag, UnitBag b) +partitionBagWith pred (TwoBags b1 b2) + = (sat1 `unionBags` sat2, fail1 `unionBags` fail2) + where (sat1, fail1) = partitionBagWith pred b1 + (sat2, fail2) = partitionBagWith pred b2 +partitionBagWith pred (ListBag vs) = (listToBag sats, listToBag fails) + where (sats, fails) = partitionWith pred vs + +foldBag :: (r -> r -> r) -- Replace TwoBags with this; should be associative + -> (a -> r) -- Replace UnitBag with this + -> r -- Replace EmptyBag with this + -> Bag a + -> r + +{- Standard definition +foldBag t u e EmptyBag = e +foldBag t u e (UnitBag x) = u x +foldBag t u e (TwoBags b1 b2) = (foldBag t u e b1) `t` (foldBag t u e b2) +foldBag t u e (ListBag xs) = foldr (t.u) e xs +-} + +-- More tail-recursive definition, exploiting associativity of "t" +foldBag _ _ e EmptyBag = e +foldBag t u e (UnitBag x) = u x `t` e +foldBag t u e (TwoBags b1 b2) = foldBag t u (foldBag t u e b2) b1 +foldBag t u e (ListBag xs) = foldr (t.u) e xs + +mapBag :: (a -> b) -> Bag a -> Bag b +mapBag = fmap + +concatMapBag :: (a -> Bag b) -> Bag a -> Bag b +concatMapBag _ EmptyBag = EmptyBag +concatMapBag f (UnitBag x) = f x +concatMapBag f (TwoBags b1 b2) = unionBags (concatMapBag f b1) (concatMapBag f b2) +concatMapBag f (ListBag xs) = foldr (unionBags . f) emptyBag xs + +concatMapBagPair :: (a -> (Bag b, Bag c)) -> Bag a -> (Bag b, Bag c) +concatMapBagPair _ EmptyBag = (EmptyBag, EmptyBag) +concatMapBagPair f (UnitBag x) = f x +concatMapBagPair f (TwoBags b1 b2) = (unionBags r1 r2, unionBags s1 s2) + where + (r1, s1) = concatMapBagPair f b1 + (r2, s2) = concatMapBagPair f b2 +concatMapBagPair f (ListBag xs) = foldr go (emptyBag, emptyBag) xs + where + go a (s1, s2) = (unionBags r1 s1, unionBags r2 s2) + where + (r1, r2) = f a + +mapMaybeBag :: (a -> Maybe b) -> Bag a -> Bag b +mapMaybeBag _ EmptyBag = EmptyBag +mapMaybeBag f (UnitBag x) = case f x of + Nothing -> EmptyBag + Just y -> UnitBag y +mapMaybeBag f (TwoBags b1 b2) = unionBags (mapMaybeBag f b1) (mapMaybeBag f b2) +mapMaybeBag f (ListBag xs) = ListBag (mapMaybe f xs) + +mapBagM :: Monad m => (a -> m b) -> Bag a -> m (Bag b) +mapBagM _ EmptyBag = return EmptyBag +mapBagM f (UnitBag x) = do r <- f x + return (UnitBag r) +mapBagM f (TwoBags b1 b2) = do r1 <- mapBagM f b1 + r2 <- mapBagM f b2 + return (TwoBags r1 r2) +mapBagM f (ListBag xs) = do rs <- mapM f xs + return (ListBag rs) + +mapBagM_ :: Monad m => (a -> m b) -> Bag a -> m () +mapBagM_ _ EmptyBag = return () +mapBagM_ f (UnitBag x) = f x >> return () +mapBagM_ f (TwoBags b1 b2) = mapBagM_ f b1 >> mapBagM_ f b2 +mapBagM_ f (ListBag xs) = mapM_ f xs + +flatMapBagM :: Monad m => (a -> m (Bag b)) -> Bag a -> m (Bag b) +flatMapBagM _ EmptyBag = return EmptyBag +flatMapBagM f (UnitBag x) = f x +flatMapBagM f (TwoBags b1 b2) = do r1 <- flatMapBagM f b1 + r2 <- flatMapBagM f b2 + return (r1 `unionBags` r2) +flatMapBagM f (ListBag xs) = foldrM k EmptyBag xs + where + k x b2 = do { b1 <- f x; return (b1 `unionBags` b2) } + +flatMapBagPairM :: Monad m => (a -> m (Bag b, Bag c)) -> Bag a -> m (Bag b, Bag c) +flatMapBagPairM _ EmptyBag = return (EmptyBag, EmptyBag) +flatMapBagPairM f (UnitBag x) = f x +flatMapBagPairM f (TwoBags b1 b2) = do (r1,s1) <- flatMapBagPairM f b1 + (r2,s2) <- flatMapBagPairM f b2 + return (r1 `unionBags` r2, s1 `unionBags` s2) +flatMapBagPairM f (ListBag xs) = foldrM k (EmptyBag, EmptyBag) xs + where + k x (r2,s2) = do { (r1,s1) <- f x + ; return (r1 `unionBags` r2, s1 `unionBags` s2) } + +mapAndUnzipBagM :: Monad m => (a -> m (b,c)) -> Bag a -> m (Bag b, Bag c) +mapAndUnzipBagM _ EmptyBag = return (EmptyBag, EmptyBag) +mapAndUnzipBagM f (UnitBag x) = do (r,s) <- f x + return (UnitBag r, UnitBag s) +mapAndUnzipBagM f (TwoBags b1 b2) = do (r1,s1) <- mapAndUnzipBagM f b1 + (r2,s2) <- mapAndUnzipBagM f b2 + return (TwoBags r1 r2, TwoBags s1 s2) +mapAndUnzipBagM f (ListBag xs) = do ts <- mapM f xs + let (rs,ss) = unzip ts + return (ListBag rs, ListBag ss) + +mapAccumBagL ::(acc -> x -> (acc, y)) -- ^ combining function + -> acc -- ^ initial state + -> Bag x -- ^ inputs + -> (acc, Bag y) -- ^ final state, outputs +mapAccumBagL _ s EmptyBag = (s, EmptyBag) +mapAccumBagL f s (UnitBag x) = let (s1, x1) = f s x in (s1, UnitBag x1) +mapAccumBagL f s (TwoBags b1 b2) = let (s1, b1') = mapAccumBagL f s b1 + (s2, b2') = mapAccumBagL f s1 b2 + in (s2, TwoBags b1' b2') +mapAccumBagL f s (ListBag xs) = let (s', xs') = mapAccumL f s xs + in (s', ListBag xs') + +mapAccumBagLM :: Monad m + => (acc -> x -> m (acc, y)) -- ^ combining function + -> acc -- ^ initial state + -> Bag x -- ^ inputs + -> m (acc, Bag y) -- ^ final state, outputs +mapAccumBagLM _ s EmptyBag = return (s, EmptyBag) +mapAccumBagLM f s (UnitBag x) = do { (s1, x1) <- f s x; return (s1, UnitBag x1) } +mapAccumBagLM f s (TwoBags b1 b2) = do { (s1, b1') <- mapAccumBagLM f s b1 + ; (s2, b2') <- mapAccumBagLM f s1 b2 + ; return (s2, TwoBags b1' b2') } +mapAccumBagLM f s (ListBag xs) = do { (s', xs') <- mapAccumLM f s xs + ; return (s', ListBag xs') } + +listToBag :: [a] -> Bag a +listToBag [] = EmptyBag +listToBag [x] = UnitBag x +listToBag vs = ListBag vs + +bagToList :: Bag a -> [a] +bagToList b = foldr (:) [] b + +instance (Outputable a) => Outputable (Bag a) where + ppr bag = braces (pprWithCommas ppr (bagToList bag)) + +instance Data a => Data (Bag a) where + gfoldl k z b = z listToBag `k` bagToList b -- traverse abstract type abstractly + toConstr _ = abstractConstr $ "Bag("++show (typeOf (undefined::a))++")" + gunfold _ _ = error "gunfold" + dataTypeOf _ = mkNoRepType "Bag" + dataCast1 x = gcast1 x + +instance Foldable.Foldable Bag where + foldr _ z EmptyBag = z + foldr k z (UnitBag x) = k x z + foldr k z (TwoBags b1 b2) = foldr k (foldr k z b2) b1 + foldr k z (ListBag xs) = foldr k z xs + + foldl _ z EmptyBag = z + foldl k z (UnitBag x) = k z x + foldl k z (TwoBags b1 b2) = foldl k (foldl k z b1) b2 + foldl k z (ListBag xs) = foldl k z xs + + foldl' _ z EmptyBag = z + foldl' k z (UnitBag x) = k z x + foldl' k z (TwoBags b1 b2) = let r1 = foldl' k z b1 in seq r1 $ foldl' k r1 b2 + foldl' k z (ListBag xs) = foldl' k z xs + +instance Traversable Bag where + traverse _ EmptyBag = pure EmptyBag + traverse f (UnitBag x) = UnitBag <$> f x + traverse f (TwoBags b1 b2) = TwoBags <$> traverse f b1 <*> traverse f b2 + traverse f (ListBag xs) = ListBag <$> traverse f xs diff --git a/compiler/GHC/Data/Bitmap.hs b/compiler/GHC/Data/Bitmap.hs index 55700ddf9a..0b7158aa24 100644 --- a/compiler/GHC/Data/Bitmap.hs +++ b/compiler/GHC/Data/Bitmap.hs @@ -14,7 +14,7 @@ module GHC.Data.Bitmap ( mAX_SMALL_BITMAP_SIZE, ) where -import GhcPrelude +import GHC.Prelude import GHC.Platform import GHC.Runtime.Heap.Layout diff --git a/compiler/GHC/Data/BooleanFormula.hs b/compiler/GHC/Data/BooleanFormula.hs new file mode 100644 index 0000000000..15c97558eb --- /dev/null +++ b/compiler/GHC/Data/BooleanFormula.hs @@ -0,0 +1,262 @@ +{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable, + DeriveTraversable #-} + +-------------------------------------------------------------------------------- +-- | Boolean formulas without quantifiers and without negation. +-- Such a formula consists of variables, conjunctions (and), and disjunctions (or). +-- +-- This module is used to represent minimal complete definitions for classes. +-- +module GHC.Data.BooleanFormula ( + BooleanFormula(..), LBooleanFormula, + mkFalse, mkTrue, mkAnd, mkOr, mkVar, + isFalse, isTrue, + eval, simplify, isUnsatisfied, + implies, impliesAtom, + pprBooleanFormula, pprBooleanFormulaNice + ) where + +import GHC.Prelude + +import Data.List ( nub, intersperse ) +import Data.Data + +import GHC.Utils.Monad +import GHC.Utils.Outputable +import GHC.Utils.Binary +import GHC.Types.SrcLoc +import GHC.Types.Unique +import GHC.Types.Unique.Set + +---------------------------------------------------------------------- +-- Boolean formula type and smart constructors +---------------------------------------------------------------------- + +type LBooleanFormula a = Located (BooleanFormula a) + +data BooleanFormula a = Var a | And [LBooleanFormula a] | Or [LBooleanFormula a] + | Parens (LBooleanFormula a) + deriving (Eq, Data, Functor, Foldable, Traversable) + +mkVar :: a -> BooleanFormula a +mkVar = Var + +mkFalse, mkTrue :: BooleanFormula a +mkFalse = Or [] +mkTrue = And [] + +-- Convert a Bool to a BooleanFormula +mkBool :: Bool -> BooleanFormula a +mkBool False = mkFalse +mkBool True = mkTrue + +-- Make a conjunction, and try to simplify +mkAnd :: Eq a => [LBooleanFormula a] -> BooleanFormula a +mkAnd = maybe mkFalse (mkAnd' . nub) . concatMapM fromAnd + where + -- See Note [Simplification of BooleanFormulas] + fromAnd :: LBooleanFormula a -> Maybe [LBooleanFormula a] + fromAnd (L _ (And xs)) = Just xs + -- assume that xs are already simplified + -- otherwise we would need: fromAnd (And xs) = concat <$> traverse fromAnd xs + fromAnd (L _ (Or [])) = Nothing + -- in case of False we bail out, And [..,mkFalse,..] == mkFalse + fromAnd x = Just [x] + mkAnd' [x] = unLoc x + mkAnd' xs = And xs + +mkOr :: Eq a => [LBooleanFormula a] -> BooleanFormula a +mkOr = maybe mkTrue (mkOr' . nub) . concatMapM fromOr + where + -- See Note [Simplification of BooleanFormulas] + fromOr (L _ (Or xs)) = Just xs + fromOr (L _ (And [])) = Nothing + fromOr x = Just [x] + mkOr' [x] = unLoc x + mkOr' xs = Or xs + + +{- +Note [Simplification of BooleanFormulas] +~~~~~~~~~~~~~~~~~~~~~~ +The smart constructors (`mkAnd` and `mkOr`) do some attempt to simplify expressions. In particular, + 1. Collapsing nested ands and ors, so + `(mkAnd [x, And [y,z]]` + is represented as + `And [x,y,z]` + Implemented by `fromAnd`/`fromOr` + 2. Collapsing trivial ands and ors, so + `mkAnd [x]` becomes just `x`. + Implemented by mkAnd' / mkOr' + 3. Conjunction with false, disjunction with true is simplified, i.e. + `mkAnd [mkFalse,x]` becomes `mkFalse`. + 4. Common subexpression elimination: + `mkAnd [x,x,y]` is reduced to just `mkAnd [x,y]`. + +This simplification is not exhaustive, in the sense that it will not produce +the smallest possible equivalent expression. For example, +`Or [And [x,y], And [x]]` could be simplified to `And [x]`, but it currently +is not. A general simplifier would need to use something like BDDs. + +The reason behind the (crude) simplifier is to make for more user friendly +error messages. E.g. for the code + > class Foo a where + > {-# MINIMAL bar, (foo, baq | foo, quux) #-} + > instance Foo Int where + > bar = ... + > baz = ... + > quux = ... +We don't show a ridiculous error message like + Implement () and (either (`foo' and ()) or (`foo' and ())) +-} + +---------------------------------------------------------------------- +-- Evaluation and simplification +---------------------------------------------------------------------- + +isFalse :: BooleanFormula a -> Bool +isFalse (Or []) = True +isFalse _ = False + +isTrue :: BooleanFormula a -> Bool +isTrue (And []) = True +isTrue _ = False + +eval :: (a -> Bool) -> BooleanFormula a -> Bool +eval f (Var x) = f x +eval f (And xs) = all (eval f . unLoc) xs +eval f (Or xs) = any (eval f . unLoc) xs +eval f (Parens x) = eval f (unLoc x) + +-- Simplify a boolean formula. +-- The argument function should give the truth of the atoms, or Nothing if undecided. +simplify :: Eq a => (a -> Maybe Bool) -> BooleanFormula a -> BooleanFormula a +simplify f (Var a) = case f a of + Nothing -> Var a + Just b -> mkBool b +simplify f (And xs) = mkAnd (map (\(L l x) -> L l (simplify f x)) xs) +simplify f (Or xs) = mkOr (map (\(L l x) -> L l (simplify f x)) xs) +simplify f (Parens x) = simplify f (unLoc x) + +-- Test if a boolean formula is satisfied when the given values are assigned to the atoms +-- if it is, returns Nothing +-- if it is not, return (Just remainder) +isUnsatisfied :: Eq a => (a -> Bool) -> BooleanFormula a -> Maybe (BooleanFormula a) +isUnsatisfied f bf + | isTrue bf' = Nothing + | otherwise = Just bf' + where + f' x = if f x then Just True else Nothing + bf' = simplify f' bf + +-- prop_simplify: +-- eval f x == True <==> isTrue (simplify (Just . f) x) +-- eval f x == False <==> isFalse (simplify (Just . f) x) + +-- If the boolean formula holds, does that mean that the given atom is always true? +impliesAtom :: Eq a => BooleanFormula a -> a -> Bool +Var x `impliesAtom` y = x == y +And xs `impliesAtom` y = any (\x -> (unLoc x) `impliesAtom` y) xs + -- we have all of xs, so one of them implying y is enough +Or xs `impliesAtom` y = all (\x -> (unLoc x) `impliesAtom` y) xs +Parens x `impliesAtom` y = (unLoc x) `impliesAtom` y + +implies :: Uniquable a => BooleanFormula a -> BooleanFormula a -> Bool +implies e1 e2 = go (Clause emptyUniqSet [e1]) (Clause emptyUniqSet [e2]) + where + go :: Uniquable a => Clause a -> Clause a -> Bool + go l@Clause{ clauseExprs = hyp:hyps } r = + case hyp of + Var x | memberClauseAtoms x r -> True + | otherwise -> go (extendClauseAtoms l x) { clauseExprs = hyps } r + Parens hyp' -> go l { clauseExprs = unLoc hyp':hyps } r + And hyps' -> go l { clauseExprs = map unLoc hyps' ++ hyps } r + Or hyps' -> all (\hyp' -> go l { clauseExprs = unLoc hyp':hyps } r) hyps' + go l r@Clause{ clauseExprs = con:cons } = + case con of + Var x | memberClauseAtoms x l -> True + | otherwise -> go l (extendClauseAtoms r x) { clauseExprs = cons } + Parens con' -> go l r { clauseExprs = unLoc con':cons } + And cons' -> all (\con' -> go l r { clauseExprs = unLoc con':cons }) cons' + Or cons' -> go l r { clauseExprs = map unLoc cons' ++ cons } + go _ _ = False + +-- A small sequent calculus proof engine. +data Clause a = Clause { + clauseAtoms :: UniqSet a, + clauseExprs :: [BooleanFormula a] + } +extendClauseAtoms :: Uniquable a => Clause a -> a -> Clause a +extendClauseAtoms c x = c { clauseAtoms = addOneToUniqSet (clauseAtoms c) x } + +memberClauseAtoms :: Uniquable a => a -> Clause a -> Bool +memberClauseAtoms x c = x `elementOfUniqSet` clauseAtoms c + +---------------------------------------------------------------------- +-- Pretty printing +---------------------------------------------------------------------- + +-- Pretty print a BooleanFormula, +-- using the arguments as pretty printers for Var, And and Or respectively +pprBooleanFormula' :: (Rational -> a -> SDoc) + -> (Rational -> [SDoc] -> SDoc) + -> (Rational -> [SDoc] -> SDoc) + -> Rational -> BooleanFormula a -> SDoc +pprBooleanFormula' pprVar pprAnd pprOr = go + where + go p (Var x) = pprVar p x + go p (And []) = cparen (p > 0) $ empty + go p (And xs) = pprAnd p (map (go 3 . unLoc) xs) + go _ (Or []) = keyword $ text "FALSE" + go p (Or xs) = pprOr p (map (go 2 . unLoc) xs) + go p (Parens x) = go p (unLoc x) + +-- Pretty print in source syntax, "a | b | c,d,e" +pprBooleanFormula :: (Rational -> a -> SDoc) -> Rational -> BooleanFormula a -> SDoc +pprBooleanFormula pprVar = pprBooleanFormula' pprVar pprAnd pprOr + where + pprAnd p = cparen (p > 3) . fsep . punctuate comma + pprOr p = cparen (p > 2) . fsep . intersperse vbar + +-- Pretty print human in readable format, "either `a' or `b' or (`c', `d' and `e')"? +pprBooleanFormulaNice :: Outputable a => BooleanFormula a -> SDoc +pprBooleanFormulaNice = pprBooleanFormula' pprVar pprAnd pprOr 0 + where + pprVar _ = quotes . ppr + pprAnd p = cparen (p > 1) . pprAnd' + pprAnd' [] = empty + pprAnd' [x,y] = x <+> text "and" <+> y + pprAnd' xs@(_:_) = fsep (punctuate comma (init xs)) <> text ", and" <+> last xs + pprOr p xs = cparen (p > 1) $ text "either" <+> sep (intersperse (text "or") xs) + +instance (OutputableBndr a) => Outputable (BooleanFormula a) where + ppr = pprBooleanFormulaNormal + +pprBooleanFormulaNormal :: (OutputableBndr a) + => BooleanFormula a -> SDoc +pprBooleanFormulaNormal = go + where + go (Var x) = pprPrefixOcc x + go (And xs) = fsep $ punctuate comma (map (go . unLoc) xs) + go (Or []) = keyword $ text "FALSE" + go (Or xs) = fsep $ intersperse vbar (map (go . unLoc) xs) + go (Parens x) = parens (go $ unLoc x) + + +---------------------------------------------------------------------- +-- Binary +---------------------------------------------------------------------- + +instance Binary a => Binary (BooleanFormula a) where + put_ bh (Var x) = putByte bh 0 >> put_ bh x + put_ bh (And xs) = putByte bh 1 >> put_ bh xs + put_ bh (Or xs) = putByte bh 2 >> put_ bh xs + put_ bh (Parens x) = putByte bh 3 >> put_ bh x + + get bh = do + h <- getByte bh + case h of + 0 -> Var <$> get bh + 1 -> And <$> get bh + 2 -> Or <$> get bh + _ -> Parens <$> get bh diff --git a/compiler/GHC/Data/EnumSet.hs b/compiler/GHC/Data/EnumSet.hs new file mode 100644 index 0000000000..61d6bf002b --- /dev/null +++ b/compiler/GHC/Data/EnumSet.hs @@ -0,0 +1,35 @@ +-- | A tiny wrapper around 'IntSet.IntSet' for representing sets of 'Enum' +-- things. +module GHC.Data.EnumSet + ( EnumSet + , member + , insert + , delete + , toList + , fromList + , empty + ) where + +import GHC.Prelude + +import qualified Data.IntSet as IntSet + +newtype EnumSet a = EnumSet IntSet.IntSet + +member :: Enum a => a -> EnumSet a -> Bool +member x (EnumSet s) = IntSet.member (fromEnum x) s + +insert :: Enum a => a -> EnumSet a -> EnumSet a +insert x (EnumSet s) = EnumSet $ IntSet.insert (fromEnum x) s + +delete :: Enum a => a -> EnumSet a -> EnumSet a +delete x (EnumSet s) = EnumSet $ IntSet.delete (fromEnum x) s + +toList :: Enum a => EnumSet a -> [a] +toList (EnumSet s) = map toEnum $ IntSet.toList s + +fromList :: Enum a => [a] -> EnumSet a +fromList = EnumSet . IntSet.fromList . map fromEnum + +empty :: EnumSet a +empty = EnumSet IntSet.empty diff --git a/compiler/GHC/Data/FastMutInt.hs b/compiler/GHC/Data/FastMutInt.hs new file mode 100644 index 0000000000..cc81b88b01 --- /dev/null +++ b/compiler/GHC/Data/FastMutInt.hs @@ -0,0 +1,61 @@ +{-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples #-} +{-# OPTIONS_GHC -O2 #-} +-- We always optimise this, otherwise performance of a non-optimised +-- compiler is severely affected +-- +-- (c) The University of Glasgow 2002-2006 +-- +-- Unboxed mutable Ints + +module GHC.Data.FastMutInt( + FastMutInt, newFastMutInt, + readFastMutInt, writeFastMutInt, + + FastMutPtr, newFastMutPtr, + readFastMutPtr, writeFastMutPtr + ) where + +import GHC.Prelude + +import Data.Bits +import GHC.Base +import GHC.Ptr + +newFastMutInt :: IO FastMutInt +readFastMutInt :: FastMutInt -> IO Int +writeFastMutInt :: FastMutInt -> Int -> IO () + +newFastMutPtr :: IO FastMutPtr +readFastMutPtr :: FastMutPtr -> IO (Ptr a) +writeFastMutPtr :: FastMutPtr -> Ptr a -> IO () + +data FastMutInt = FastMutInt (MutableByteArray# RealWorld) + +newFastMutInt = IO $ \s -> + case newByteArray# size s of { (# s, arr #) -> + (# s, FastMutInt arr #) } + where !(I# size) = finiteBitSize (0 :: Int) + +readFastMutInt (FastMutInt arr) = IO $ \s -> + case readIntArray# arr 0# s of { (# s, i #) -> + (# s, I# i #) } + +writeFastMutInt (FastMutInt arr) (I# i) = IO $ \s -> + case writeIntArray# arr 0# i s of { s -> + (# s, () #) } + +data FastMutPtr = FastMutPtr (MutableByteArray# RealWorld) + +newFastMutPtr = IO $ \s -> + case newByteArray# size s of { (# s, arr #) -> + (# s, FastMutPtr arr #) } + -- GHC assumes 'sizeof (Int) == sizeof (Ptr a)' + where !(I# size) = finiteBitSize (0 :: Int) + +readFastMutPtr (FastMutPtr arr) = IO $ \s -> + case readAddrArray# arr 0# s of { (# s, i #) -> + (# s, Ptr i #) } + +writeFastMutPtr (FastMutPtr arr) (Ptr i) = IO $ \s -> + case writeAddrArray# arr 0# i s of { s -> + (# s, () #) } diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs new file mode 100644 index 0000000000..82f38601f5 --- /dev/null +++ b/compiler/GHC/Data/FastString.hs @@ -0,0 +1,693 @@ +-- (c) The University of Glasgow, 1997-2006 + +{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples, + GeneralizedNewtypeDeriving #-} +{-# OPTIONS_GHC -O2 -funbox-strict-fields #-} +-- We always optimise this, otherwise performance of a non-optimised +-- compiler is severely affected + +-- | +-- There are two principal string types used internally by GHC: +-- +-- ['FastString'] +-- +-- * A compact, hash-consed, representation of character strings. +-- * Comparison is O(1), and you can get a 'Unique.Unique' from them. +-- * Generated by 'fsLit'. +-- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'. +-- +-- ['PtrString'] +-- +-- * Pointer and size of a Latin-1 encoded string. +-- * Practically no operations. +-- * Outputting them is fast. +-- * Generated by 'sLit'. +-- * Turn into 'Outputable.SDoc' with 'Outputable.ptext' +-- * Requires manual memory management. +-- Improper use may lead to memory leaks or dangling pointers. +-- * It assumes Latin-1 as the encoding, therefore it cannot represent +-- arbitrary Unicode strings. +-- +-- Use 'PtrString' unless you want the facilities of 'FastString'. +module GHC.Data.FastString + ( + -- * ByteString + bytesFS, -- :: FastString -> ByteString + fastStringToByteString, -- = bytesFS (kept for haddock) + mkFastStringByteString, + fastZStringToByteString, + unsafeMkByteString, + + -- * FastZString + FastZString, + hPutFZS, + zString, + lengthFZS, + + -- * FastStrings + FastString(..), -- not abstract, for now. + + -- ** Construction + fsLit, + mkFastString, + mkFastStringBytes, + mkFastStringByteList, + mkFastStringForeignPtr, + mkFastString#, + + -- ** Deconstruction + unpackFS, -- :: FastString -> String + + -- ** Encoding + zEncodeFS, + + -- ** Operations + uniqueOfFS, + lengthFS, + nullFS, + appendFS, + headFS, + tailFS, + concatFS, + consFS, + nilFS, + isUnderscoreFS, + + -- ** Outputting + hPutFS, + + -- ** Internal + getFastStringTable, + getFastStringZEncCounter, + + -- * PtrStrings + PtrString (..), + + -- ** Construction + sLit, + mkPtrString#, + mkPtrString, + + -- ** Deconstruction + unpackPtrString, + + -- ** Operations + lengthPS + ) where + +#include "HsVersions.h" + +import GHC.Prelude as Prelude + +import GHC.Utils.Encoding +import GHC.Utils.IO.Unsafe +import GHC.Utils.Panic.Plain +import GHC.Utils.Misc + +import Control.Concurrent.MVar +import Control.DeepSeq +import Control.Monad +import Data.ByteString (ByteString) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as BSC +import qualified Data.ByteString.Internal as BS +import qualified Data.ByteString.Unsafe as BS +import Foreign.C +import GHC.Exts +import System.IO +import Data.Data +import Data.IORef +import Data.Char +import Data.Semigroup as Semi + +import GHC.IO + +import Foreign + +#if GHC_STAGE >= 2 +import GHC.Conc.Sync (sharedCAF) +#endif + +import GHC.Base ( unpackCString#, unpackNBytes# ) + + +-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString' +bytesFS :: FastString -> ByteString +bytesFS f = fs_bs f + +{-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-} +fastStringToByteString :: FastString -> ByteString +fastStringToByteString = bytesFS + +fastZStringToByteString :: FastZString -> ByteString +fastZStringToByteString (FastZString bs) = bs + +-- This will drop information if any character > '\xFF' +unsafeMkByteString :: String -> ByteString +unsafeMkByteString = BSC.pack + +hashFastString :: FastString -> Int +hashFastString (FastString _ _ bs _) + = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> + return $ hashStr (castPtr ptr) len + +-- ----------------------------------------------------------------------------- + +newtype FastZString = FastZString ByteString + deriving NFData + +hPutFZS :: Handle -> FastZString -> IO () +hPutFZS handle (FastZString bs) = BS.hPut handle bs + +zString :: FastZString -> String +zString (FastZString bs) = + inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen + +lengthFZS :: FastZString -> Int +lengthFZS (FastZString bs) = BS.length bs + +mkFastZStringString :: String -> FastZString +mkFastZStringString str = FastZString (BSC.pack str) + +-- ----------------------------------------------------------------------------- + +{-| A 'FastString' is a UTF-8 encoded string together with a unique ID. All +'FastString's are stored in a global hashtable to support fast O(1) +comparison. + +It is also associated with a lazy reference to the Z-encoding +of this string which is used by the compiler internally. +-} +data FastString = FastString { + uniq :: {-# UNPACK #-} !Int, -- unique id + n_chars :: {-# UNPACK #-} !Int, -- number of chars + fs_bs :: {-# UNPACK #-} !ByteString, + fs_zenc :: FastZString + -- ^ Lazily computed z-encoding of this string. + -- + -- Since 'FastString's are globally memoized this is computed at most + -- once for any given string. + } + +instance Eq FastString where + f1 == f2 = uniq f1 == uniq f2 + +instance Ord FastString where + -- Compares lexicographically, not by unique + a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False } + a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False } + a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True } + a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True } + max x y | x >= y = x + | otherwise = y + min x y | x <= y = x + | otherwise = y + compare a b = cmpFS a b + +instance IsString FastString where + fromString = fsLit + +instance Semi.Semigroup FastString where + (<>) = appendFS + +instance Monoid FastString where + mempty = nilFS + mappend = (Semi.<>) + mconcat = concatFS + +instance Show FastString where + show fs = show (unpackFS fs) + +instance Data FastString where + -- don't traverse? + toConstr _ = abstractConstr "FastString" + gunfold _ _ = error "gunfold" + dataTypeOf _ = mkNoRepType "FastString" + +instance NFData FastString where + rnf fs = seq fs () + +cmpFS :: FastString -> FastString -> Ordering +cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) = + if u1 == u2 then EQ else + compare (bytesFS f1) (bytesFS f2) + +foreign import ccall unsafe "memcmp" + memcmp :: Ptr a -> Ptr b -> Int -> IO Int + +-- ----------------------------------------------------------------------------- +-- Construction + +{- +Internally, the compiler will maintain a fast string symbol table, providing +sharing and fast comparison. Creation of new @FastString@s then covertly does a +lookup, re-using the @FastString@ if there was a hit. + +The design of the FastString hash table allows for lockless concurrent reads +and updates to multiple buckets with low synchronization overhead. + +See Note [Updating the FastString table] on how it's updated. +-} +data FastStringTable = FastStringTable + {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets + {-# UNPACK #-} !(IORef Int) -- number of computed z-encodings for all buckets + (Array# (IORef FastStringTableSegment)) -- concurrent segments + +data FastStringTableSegment = FastStringTableSegment + {-# UNPACK #-} !(MVar ()) -- the lock for write in each segment + {-# UNPACK #-} !(IORef Int) -- the number of elements + (MutableArray# RealWorld [FastString]) -- buckets in this segment + +{- +Following parameters are determined based on: + +* Benchmark based on testsuite/tests/utils/should_run/T14854.hs +* Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@: + on 2018-10-24, we have 13920 entries. +-} +segmentBits, numSegments, segmentMask, initialNumBuckets :: Int +segmentBits = 8 +numSegments = 256 -- bit segmentBits +segmentMask = 0xff -- bit segmentBits - 1 +initialNumBuckets = 64 + +hashToSegment# :: Int# -> Int# +hashToSegment# hash# = hash# `andI#` segmentMask# + where + !(I# segmentMask#) = segmentMask + +hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int# +hashToIndex# buckets# hash# = + (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size# + where + !(I# segmentBits#) = segmentBits + size# = sizeofMutableArray# buckets# + +maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment +maybeResizeSegment segmentRef = do + segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef + let oldSize# = sizeofMutableArray# old# + newSize# = oldSize# *# 2# + (I# n#) <- readIORef counter + if isTrue# (n# <# newSize#) -- maximum load of 1 + then return segment + else do + resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# -> + case newArray# newSize# [] s1# of + (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #) + forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do + fsList <- IO $ readArray# old# i# + forM_ fsList $ \fs -> do + let -- Shall we store in hash value in FastString instead? + !(I# hash#) = hashFastString fs + idx# = hashToIndex# new# hash# + IO $ \s1# -> + case readArray# new# idx# s1# of + (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of + s3# -> (# s3#, () #) + writeIORef segmentRef resizedSegment + return resizedSegment + +{-# NOINLINE stringTable #-} +stringTable :: FastStringTable +stringTable = unsafePerformIO $ do + let !(I# numSegments#) = numSegments + !(I# initialNumBuckets#) = initialNumBuckets + loop a# i# s1# + | isTrue# (i# ==# numSegments#) = s1# + | otherwise = case newMVar () `unIO` s1# of + (# s2#, lock #) -> case newIORef 0 `unIO` s2# of + (# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of + (# s4#, buckets# #) -> case newIORef + (FastStringTableSegment lock counter buckets#) `unIO` s4# of + (# s5#, segment #) -> case writeArray# a# i# segment s5# of + s6# -> loop a# (i# +# 1#) s6# + uid <- newIORef 603979776 -- ord '$' * 0x01000000 + n_zencs <- newIORef 0 + tab <- IO $ \s1# -> + case newArray# numSegments# (panic "string_table") s1# of + (# s2#, arr# #) -> case loop arr# 0# s2# of + s3# -> case unsafeFreezeArray# arr# s3# of + (# s4#, segments# #) -> + (# s4#, FastStringTable uid n_zencs segments# #) + + -- use the support wired into the RTS to share this CAF among all images of + -- libHSghc +#if GHC_STAGE < 2 + return tab +#else + sharedCAF tab getOrSetLibHSghcFastStringTable + +-- from the RTS; thus we cannot use this mechanism when GHC_STAGE<2; the previous +-- RTS might not have this symbol +foreign import ccall unsafe "getOrSetLibHSghcFastStringTable" + getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a) +#endif + +{- + +We include the FastString table in the `sharedCAF` mechanism because we'd like +FastStrings created by a Core plugin to have the same uniques as corresponding +strings created by the host compiler itself. For example, this allows plugins +to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or +even re-invoke the parser. + +In particular, the following little sanity test was failing in a plugin +prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not +be looked up /by the plugin/. + + let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT" + putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts + +`mkTcOcc` involves the lookup (or creation) of a FastString. Since the +plugin's FastString.string_table is empty, constructing the RdrName also +allocates new uniques for the FastStrings "GHC.NT.Type" and "NT". These +uniques are almost certainly unequal to the ones that the host compiler +originally assigned to those FastStrings. Thus the lookup fails since the +domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's +unique. + +Maintaining synchronization of the two instances of this global is rather +difficult because of the uses of `unsafePerformIO` in this module. Not +synchronizing them risks breaking the rather major invariant that two +FastStrings with the same unique have the same string. Thus we use the +lower-level `sharedCAF` mechanism that relies on Globals.c. + +-} + +mkFastString# :: Addr# -> FastString +mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr) + where ptr = Ptr a# + +{- Note [Updating the FastString table] + +We use a concurrent hashtable which contains multiple segments, each hash value +always maps to the same segment. Read is lock-free, write to the a segment +should acquire a lock for that segment to avoid race condition, writes to +different segments are independent. + +The procedure goes like this: + +1. Find out which segment to operate on based on the hash value +2. Read the relevant bucket and perform a look up of the string. +3. If it exists, return it. +4. Otherwise grab a unique ID, create a new FastString and atomically attempt + to update the relevant segment with this FastString: + + * Resize the segment by doubling the number of buckets when the number of + FastStrings in this segment grows beyond the threshold. + * Double check that the string is not in the bucket. Another thread may have + inserted it while we were creating our string. + * Return the existing FastString if it exists. The one we preemptively + created will get GCed. + * Otherwise, insert and return the string we created. +-} + +mkFastStringWith + :: (Int -> IORef Int-> IO FastString) -> Ptr Word8 -> Int -> IO FastString +mkFastStringWith mk_fs !ptr !len = do + FastStringTableSegment lock _ buckets# <- readIORef segmentRef + let idx# = hashToIndex# buckets# hash# + bucket <- IO $ readArray# buckets# idx# + res <- bucket_match bucket len ptr + case res of + Just found -> return found + Nothing -> do + -- The withMVar below is not dupable. It can lead to deadlock if it is + -- only run partially and putMVar is not called after takeMVar. + noDuplicate + n <- get_uid + new_fs <- mk_fs n n_zencs + withMVar lock $ \_ -> insert new_fs + where + !(FastStringTable uid n_zencs segments#) = stringTable + get_uid = atomicModifyIORef' uid $ \n -> (n+1,n) + + !(I# hash#) = hashStr ptr len + (# segmentRef #) = indexArray# segments# (hashToSegment# hash#) + insert fs = do + FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef + let idx# = hashToIndex# buckets# hash# + bucket <- IO $ readArray# buckets# idx# + res <- bucket_match bucket len ptr + case res of + -- The FastString was added by another thread after previous read and + -- before we acquired the write lock. + Just found -> return found + Nothing -> do + IO $ \s1# -> + case writeArray# buckets# idx# (fs: bucket) s1# of + s2# -> (# s2#, () #) + modifyIORef' counter succ + return fs + +bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString) +bucket_match [] _ _ = return Nothing +bucket_match (v@(FastString _ _ bs _):ls) len ptr + | len == BS.length bs = do + b <- BS.unsafeUseAsCString bs $ \buf -> + cmpStringPrefix ptr (castPtr buf) len + if b then return (Just v) + else bucket_match ls len ptr + | otherwise = + bucket_match ls len ptr + +mkFastStringBytes :: Ptr Word8 -> Int -> FastString +mkFastStringBytes !ptr !len = + -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is + -- idempotent. + unsafeDupablePerformIO $ + mkFastStringWith (copyNewFastString ptr len) ptr len + +-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference +-- between this and 'mkFastStringBytes' is that we don't have to copy +-- the bytes if the string is new to the table. +mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString +mkFastStringForeignPtr ptr !fp len + = mkFastStringWith (mkNewFastString fp ptr len) ptr len + +-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference +-- between this and 'mkFastStringBytes' is that we don't have to copy +-- the bytes if the string is new to the table. +mkFastStringByteString :: ByteString -> FastString +mkFastStringByteString bs = + inlinePerformIO $ + BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do + let ptr' = castPtr ptr + mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len + +-- | Creates a UTF-8 encoded 'FastString' from a 'String' +mkFastString :: String -> FastString +mkFastString str = + inlinePerformIO $ do + let l = utf8EncodedLength str + buf <- mallocForeignPtrBytes l + withForeignPtr buf $ \ptr -> do + utf8EncodeString ptr str + mkFastStringForeignPtr ptr buf l + +-- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@ +mkFastStringByteList :: [Word8] -> FastString +mkFastStringByteList str = mkFastStringByteString (BS.pack str) + +-- | Creates a (lazy) Z-encoded 'FastString' from a 'String' and account +-- the number of forced z-strings into the passed 'IORef'. +mkZFastString :: IORef Int -> ByteString -> FastZString +mkZFastString n_zencs bs = unsafePerformIO $ do + atomicModifyIORef' n_zencs $ \n -> (n+1, ()) + return $ mkFastZStringString (zEncodeString (utf8DecodeByteString bs)) + +mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int + -> IORef Int -> IO FastString +mkNewFastString fp ptr len uid n_zencs = do + let bs = BS.fromForeignPtr fp 0 len + zstr = mkZFastString n_zencs bs + n_chars <- countUTF8Chars ptr len + return (FastString uid n_chars bs zstr) + +mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int + -> IORef Int -> IO FastString +mkNewFastStringByteString bs ptr len uid n_zencs = do + let zstr = mkZFastString n_zencs bs + n_chars <- countUTF8Chars ptr len + return (FastString uid n_chars bs zstr) + +copyNewFastString :: Ptr Word8 -> Int -> Int -> IORef Int -> IO FastString +copyNewFastString ptr len uid n_zencs = do + fp <- copyBytesToForeignPtr ptr len + let bs = BS.fromForeignPtr fp 0 len + zstr = mkZFastString n_zencs bs + n_chars <- countUTF8Chars ptr len + return (FastString uid n_chars bs zstr) + +copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8) +copyBytesToForeignPtr ptr len = do + fp <- mallocForeignPtrBytes len + withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len + return fp + +cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool +cmpStringPrefix ptr1 ptr2 len = + do r <- memcmp ptr1 ptr2 len + return (r == 0) + +hashStr :: Ptr Word8 -> Int -> Int + -- use the Addr to produce a hash value between 0 & m (inclusive) +hashStr (Ptr a#) (I# len#) = loop 0# 0# + where + loop h n = + if isTrue# (n ==# len#) then + I# h + else + let + -- DO NOT move this let binding! indexCharOffAddr# reads from the + -- pointer so we need to evaluate this based on the length check + -- above. Not doing this right caused #17909. + !c = ord# (indexCharOffAddr# a# n) + !h2 = (h *# 16777619#) `xorI#` c + in + loop h2 (n +# 1#) + +-- ----------------------------------------------------------------------------- +-- Operations + +-- | Returns the length of the 'FastString' in characters +lengthFS :: FastString -> Int +lengthFS f = n_chars f + +-- | Returns @True@ if the 'FastString' is empty +nullFS :: FastString -> Bool +nullFS f = BS.null (fs_bs f) + +-- | Unpacks and decodes the FastString +unpackFS :: FastString -> String +unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs + +-- | Returns a Z-encoded version of a 'FastString'. This might be the +-- original, if it was already Z-encoded. The first time this +-- function is applied to a particular 'FastString', the results are +-- memoized. +-- +zEncodeFS :: FastString -> FastZString +zEncodeFS (FastString _ _ _ ref) = ref + +appendFS :: FastString -> FastString -> FastString +appendFS fs1 fs2 = mkFastStringByteString + $ BS.append (bytesFS fs1) (bytesFS fs2) + +concatFS :: [FastString] -> FastString +concatFS = mkFastStringByteString . BS.concat . map fs_bs + +headFS :: FastString -> Char +headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString" +headFS (FastString _ _ bs _) = + inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> + return (fst (utf8DecodeChar (castPtr ptr))) + +tailFS :: FastString -> FastString +tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString" +tailFS (FastString _ _ bs _) = + inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr -> + do let (_, n) = utf8DecodeChar (castPtr ptr) + return $! mkFastStringByteString (BS.drop n bs) + +consFS :: Char -> FastString -> FastString +consFS c fs = mkFastString (c : unpackFS fs) + +uniqueOfFS :: FastString -> Int +uniqueOfFS (FastString u _ _ _) = u + +nilFS :: FastString +nilFS = mkFastString "" + +isUnderscoreFS :: FastString -> Bool +isUnderscoreFS fs = fs == fsLit "_" + +-- ----------------------------------------------------------------------------- +-- Stats + +getFastStringTable :: IO [[[FastString]]] +getFastStringTable = + forM [0 .. numSegments - 1] $ \(I# i#) -> do + let (# segmentRef #) = indexArray# segments# i# + FastStringTableSegment _ _ buckets# <- readIORef segmentRef + let bucketSize = I# (sizeofMutableArray# buckets#) + forM [0 .. bucketSize - 1] $ \(I# j#) -> + IO $ readArray# buckets# j# + where + !(FastStringTable _ _ segments#) = stringTable + +getFastStringZEncCounter :: IO Int +getFastStringZEncCounter = readIORef n_zencs + where + !(FastStringTable _ n_zencs _) = stringTable + +-- ----------------------------------------------------------------------------- +-- Outputting 'FastString's + +-- |Outputs a 'FastString' with /no decoding at all/, that is, you +-- get the actual bytes in the 'FastString' written to the 'Handle'. +hPutFS :: Handle -> FastString -> IO () +hPutFS handle fs = BS.hPut handle $ bytesFS fs + +-- ToDo: we'll probably want an hPutFSLocal, or something, to output +-- in the current locale's encoding (for error messages and suchlike). + +-- ----------------------------------------------------------------------------- +-- PtrStrings, here for convenience only. + +-- | A 'PtrString' is a pointer to some array of Latin-1 encoded chars. +data PtrString = PtrString !(Ptr Word8) !Int + +-- | Wrap an unboxed address into a 'PtrString'. +mkPtrString# :: Addr# -> PtrString +mkPtrString# a# = PtrString (Ptr a#) (ptrStrLength (Ptr a#)) + +-- | Encode a 'String' into a newly allocated 'PtrString' using Latin-1 +-- encoding. The original string must not contain non-Latin-1 characters +-- (above codepoint @0xff@). +{-# INLINE mkPtrString #-} +mkPtrString :: String -> PtrString +mkPtrString s = + -- we don't use `unsafeDupablePerformIO` here to avoid potential memory leaks + -- and because someone might be using `eqAddr#` to check for string equality. + unsafePerformIO (do + let len = length s + p <- mallocBytes len + let + loop :: Int -> String -> IO () + loop !_ [] = return () + loop n (c:cs) = do + pokeByteOff p n (fromIntegral (ord c) :: Word8) + loop (1+n) cs + loop 0 s + return (PtrString p len) + ) + +-- | Decode a 'PtrString' back into a 'String' using Latin-1 encoding. +-- This does not free the memory associated with 'PtrString'. +unpackPtrString :: PtrString -> String +unpackPtrString (PtrString (Ptr p#) (I# n#)) = unpackNBytes# p# n# + +-- | Return the length of a 'PtrString' +lengthPS :: PtrString -> Int +lengthPS (PtrString _ n) = n + +-- ----------------------------------------------------------------------------- +-- under the carpet + +foreign import ccall unsafe "strlen" + ptrStrLength :: Ptr Word8 -> Int + +{-# NOINLINE sLit #-} +sLit :: String -> PtrString +sLit x = mkPtrString x + +{-# NOINLINE fsLit #-} +fsLit :: String -> FastString +fsLit x = mkFastString x + +{-# RULES "slit" + forall x . sLit (unpackCString# x) = mkPtrString# x #-} +{-# RULES "fslit" + forall x . fsLit (unpackCString# x) = mkFastString# x #-} diff --git a/compiler/GHC/Data/FastString/Env.hs b/compiler/GHC/Data/FastString/Env.hs new file mode 100644 index 0000000000..36fab5727c --- /dev/null +++ b/compiler/GHC/Data/FastString/Env.hs @@ -0,0 +1,100 @@ +{- +% +% (c) The University of Glasgow 2006 +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +-} + +-- | FastStringEnv: FastString environments +module GHC.Data.FastString.Env ( + -- * FastString environments (maps) + FastStringEnv, + + -- ** Manipulating these environments + mkFsEnv, + emptyFsEnv, unitFsEnv, + extendFsEnv_C, extendFsEnv_Acc, extendFsEnv, + extendFsEnvList, extendFsEnvList_C, + filterFsEnv, + plusFsEnv, plusFsEnv_C, alterFsEnv, + lookupFsEnv, lookupFsEnv_NF, delFromFsEnv, delListFromFsEnv, + elemFsEnv, mapFsEnv, + + -- * Deterministic FastString environments (maps) + DFastStringEnv, + + -- ** Manipulating these environments + mkDFsEnv, emptyDFsEnv, dFsEnvElts, lookupDFsEnv + ) where + +import GHC.Prelude + +import GHC.Types.Unique.FM +import GHC.Types.Unique.DFM +import GHC.Data.Maybe +import GHC.Data.FastString + + +-- | A non-deterministic set of FastStrings. +-- See Note [Deterministic UniqFM] in GHC.Types.Unique.DFM for explanation why it's not +-- deterministic and why it matters. Use DFastStringEnv if the set eventually +-- gets converted into a list or folded over in a way where the order +-- changes the generated code. +type FastStringEnv a = UniqFM a -- Domain is FastString + +emptyFsEnv :: FastStringEnv a +mkFsEnv :: [(FastString,a)] -> FastStringEnv a +alterFsEnv :: (Maybe a-> Maybe a) -> FastStringEnv a -> FastString -> FastStringEnv a +extendFsEnv_C :: (a->a->a) -> FastStringEnv a -> FastString -> a -> FastStringEnv a +extendFsEnv_Acc :: (a->b->b) -> (a->b) -> FastStringEnv b -> FastString -> a -> FastStringEnv b +extendFsEnv :: FastStringEnv a -> FastString -> a -> FastStringEnv a +plusFsEnv :: FastStringEnv a -> FastStringEnv a -> FastStringEnv a +plusFsEnv_C :: (a->a->a) -> FastStringEnv a -> FastStringEnv a -> FastStringEnv a +extendFsEnvList :: FastStringEnv a -> [(FastString,a)] -> FastStringEnv a +extendFsEnvList_C :: (a->a->a) -> FastStringEnv a -> [(FastString,a)] -> FastStringEnv a +delFromFsEnv :: FastStringEnv a -> FastString -> FastStringEnv a +delListFromFsEnv :: FastStringEnv a -> [FastString] -> FastStringEnv a +elemFsEnv :: FastString -> FastStringEnv a -> Bool +unitFsEnv :: FastString -> a -> FastStringEnv a +lookupFsEnv :: FastStringEnv a -> FastString -> Maybe a +lookupFsEnv_NF :: FastStringEnv a -> FastString -> a +filterFsEnv :: (elt -> Bool) -> FastStringEnv elt -> FastStringEnv elt +mapFsEnv :: (elt1 -> elt2) -> FastStringEnv elt1 -> FastStringEnv elt2 + +emptyFsEnv = emptyUFM +unitFsEnv x y = unitUFM x y +extendFsEnv x y z = addToUFM x y z +extendFsEnvList x l = addListToUFM x l +lookupFsEnv x y = lookupUFM x y +alterFsEnv = alterUFM +mkFsEnv l = listToUFM l +elemFsEnv x y = elemUFM x y +plusFsEnv x y = plusUFM x y +plusFsEnv_C f x y = plusUFM_C f x y +extendFsEnv_C f x y z = addToUFM_C f x y z +mapFsEnv f x = mapUFM f x +extendFsEnv_Acc x y z a b = addToUFM_Acc x y z a b +extendFsEnvList_C x y z = addListToUFM_C x y z +delFromFsEnv x y = delFromUFM x y +delListFromFsEnv x y = delListFromUFM x y +filterFsEnv x y = filterUFM x y + +lookupFsEnv_NF env n = expectJust "lookupFsEnv_NF" (lookupFsEnv env n) + +-- Deterministic FastStringEnv +-- See Note [Deterministic UniqFM] in GHC.Types.Unique.DFM for explanation why we need +-- DFastStringEnv. + +type DFastStringEnv a = UniqDFM a -- Domain is FastString + +emptyDFsEnv :: DFastStringEnv a +emptyDFsEnv = emptyUDFM + +dFsEnvElts :: DFastStringEnv a -> [a] +dFsEnvElts = eltsUDFM + +mkDFsEnv :: [(FastString,a)] -> DFastStringEnv a +mkDFsEnv l = listToUDFM l + +lookupDFsEnv :: DFastStringEnv a -> FastString -> Maybe a +lookupDFsEnv = lookupUDFM diff --git a/compiler/GHC/Data/FiniteMap.hs b/compiler/GHC/Data/FiniteMap.hs new file mode 100644 index 0000000000..055944d320 --- /dev/null +++ b/compiler/GHC/Data/FiniteMap.hs @@ -0,0 +1,31 @@ +-- Some extra functions to extend Data.Map + +module GHC.Data.FiniteMap ( + insertList, + insertListWith, + deleteList, + foldRight, foldRightWithKey + ) where + +import GHC.Prelude + +import Data.Map (Map) +import qualified Data.Map as Map + +insertList :: Ord key => [(key,elt)] -> Map key elt -> Map key elt +insertList xs m = foldl' (\m (k, v) -> Map.insert k v m) m xs + +insertListWith :: Ord key + => (elt -> elt -> elt) + -> [(key,elt)] + -> Map key elt + -> Map key elt +insertListWith f xs m0 = foldl' (\m (k, v) -> Map.insertWith f k v m) m0 xs + +deleteList :: Ord key => [key] -> Map key elt -> Map key elt +deleteList ks m = foldl' (flip Map.delete) m ks + +foldRight :: (elt -> a -> a) -> a -> Map key elt -> a +foldRight = Map.foldr +foldRightWithKey :: (key -> elt -> a -> a) -> a -> Map key elt -> a +foldRightWithKey = Map.foldrWithKey diff --git a/compiler/GHC/Data/Graph/Base.hs b/compiler/GHC/Data/Graph/Base.hs new file mode 100644 index 0000000000..3c40645660 --- /dev/null +++ b/compiler/GHC/Data/Graph/Base.hs @@ -0,0 +1,107 @@ + +-- | Types for the general graph colorer. +module GHC.Data.Graph.Base ( + Triv, + Graph (..), + initGraph, + graphMapModify, + + Node (..), newNode, +) + + +where + +import GHC.Prelude + +import GHC.Types.Unique.Set +import GHC.Types.Unique.FM + + +-- | A fn to check if a node is trivially colorable +-- For graphs who's color classes are disjoint then a node is 'trivially colorable' +-- when it has less neighbors and exclusions than available colors for that node. +-- +-- For graph's who's color classes overlap, ie some colors alias other colors, then +-- this can be a bit more tricky. There is a general way to calculate this, but +-- it's likely be too slow for use in the code. The coloring algorithm takes +-- a canned function which can be optimised by the user to be specific to the +-- specific graph being colored. +-- +-- for details, see "A Generalised Algorithm for Graph-Coloring Register Allocation" +-- Smith, Ramsey, Holloway - PLDI 2004. +-- +type Triv k cls color + = cls -- the class of the node we're trying to color. + -> UniqSet k -- the node's neighbors. + -> UniqSet color -- the node's exclusions. + -> Bool + + +-- | The Interference graph. +-- There used to be more fields, but they were turfed out in a previous revision. +-- maybe we'll want more later.. +-- +data Graph k cls color + = Graph { + -- | All active nodes in the graph. + graphMap :: UniqFM (Node k cls color) } + + +-- | An empty graph. +initGraph :: Graph k cls color +initGraph + = Graph + { graphMap = emptyUFM } + + +-- | Modify the finite map holding the nodes in the graph. +graphMapModify + :: (UniqFM (Node k cls color) -> UniqFM (Node k cls color)) + -> Graph k cls color -> Graph k cls color + +graphMapModify f graph + = graph { graphMap = f (graphMap graph) } + + + +-- | Graph nodes. +-- Represents a thing that can conflict with another thing. +-- For the register allocater the nodes represent registers. +-- +data Node k cls color + = Node { + -- | A unique identifier for this node. + nodeId :: k + + -- | The class of this node, + -- determines the set of colors that can be used. + , nodeClass :: cls + + -- | The color of this node, if any. + , nodeColor :: Maybe color + + -- | Neighbors which must be colored differently to this node. + , nodeConflicts :: UniqSet k + + -- | Colors that cannot be used by this node. + , nodeExclusions :: UniqSet color + + -- | Colors that this node would prefer to be, in descending order. + , nodePreference :: [color] + + -- | Neighbors that this node would like to be colored the same as. + , nodeCoalesce :: UniqSet k } + + +-- | An empty node. +newNode :: k -> cls -> Node k cls color +newNode k cls + = Node + { nodeId = k + , nodeClass = cls + , nodeColor = Nothing + , nodeConflicts = emptyUniqSet + , nodeExclusions = emptyUniqSet + , nodePreference = [] + , nodeCoalesce = emptyUniqSet } diff --git a/compiler/GHC/Data/Graph/Color.hs b/compiler/GHC/Data/Graph/Color.hs new file mode 100644 index 0000000000..948447da58 --- /dev/null +++ b/compiler/GHC/Data/Graph/Color.hs @@ -0,0 +1,375 @@ +-- | Graph Coloring. +-- This is a generic graph coloring library, abstracted over the type of +-- the node keys, nodes and colors. +-- + +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} + +module GHC.Data.Graph.Color ( + module GHC.Data.Graph.Base, + module GHC.Data.Graph.Ops, + module GHC.Data.Graph.Ppr, + colorGraph +) + +where + +import GHC.Prelude + +import GHC.Data.Graph.Base +import GHC.Data.Graph.Ops +import GHC.Data.Graph.Ppr + +import GHC.Types.Unique +import GHC.Types.Unique.FM +import GHC.Types.Unique.Set +import GHC.Utils.Outputable + +import Data.Maybe +import Data.List + + +-- | Try to color a graph with this set of colors. +-- Uses Chaitin's algorithm to color the graph. +-- The graph is scanned for nodes which are deamed 'trivially colorable'. These nodes +-- are pushed onto a stack and removed from the graph. +-- Once this process is complete the graph can be colored by removing nodes from +-- the stack (ie in reverse order) and assigning them colors different to their neighbors. +-- +colorGraph + :: ( Uniquable k, Uniquable cls, Uniquable color + , Eq cls, Ord k + , Outputable k, Outputable cls, Outputable color) + => Bool -- ^ whether to do iterative coalescing + -> Int -- ^ how many times we've tried to color this graph so far. + -> UniqFM (UniqSet color) -- ^ map of (node class -> set of colors available for this class). + -> Triv k cls color -- ^ fn to decide whether a node is trivially colorable. + -> (Graph k cls color -> k) -- ^ fn to choose a node to potentially leave uncolored if nothing is trivially colorable. + -> Graph k cls color -- ^ the graph to color. + + -> ( Graph k cls color -- the colored graph. + , UniqSet k -- the set of nodes that we couldn't find a color for. + , UniqFM k ) -- map of regs (r1 -> r2) that were coalesced + -- r1 should be replaced by r2 in the source + +colorGraph iterative spinCount colors triv spill graph0 + = let + -- If we're not doing iterative coalescing then do an aggressive coalescing first time + -- around and then conservative coalescing for subsequent passes. + -- + -- Aggressive coalescing is a quick way to get rid of many reg-reg moves. However, if + -- there is a lot of register pressure and we do it on every round then it can make the + -- graph less colorable and prevent the algorithm from converging in a sensible number + -- of cycles. + -- + (graph_coalesced, kksCoalesce1) + = if iterative + then (graph0, []) + else if spinCount == 0 + then coalesceGraph True triv graph0 + else coalesceGraph False triv graph0 + + -- run the scanner to slurp out all the trivially colorable nodes + -- (and do coalescing if iterative coalescing is enabled) + (ksTriv, ksProblems, kksCoalesce2) + = colorScan iterative triv spill graph_coalesced + + -- If iterative coalescing is enabled, the scanner will coalesce the graph as does its business. + -- We need to apply all the coalescences found by the scanner to the original + -- graph before doing assignColors. + -- + -- Because we've got the whole, non-pruned graph here we turn on aggressive coalescing + -- to force all the (conservative) coalescences found during scanning. + -- + (graph_scan_coalesced, _) + = mapAccumL (coalesceNodes True triv) graph_coalesced kksCoalesce2 + + -- color the trivially colorable nodes + -- during scanning, keys of triv nodes were added to the front of the list as they were found + -- this colors them in the reverse order, as required by the algorithm. + (graph_triv, ksNoTriv) + = assignColors colors graph_scan_coalesced ksTriv + + -- try and color the problem nodes + -- problem nodes are the ones that were left uncolored because they weren't triv. + -- theres a change we can color them here anyway. + (graph_prob, ksNoColor) + = assignColors colors graph_triv ksProblems + + -- if the trivially colorable nodes didn't color then something is probably wrong + -- with the provided triv function. + -- + in if not $ null ksNoTriv + then pprPanic "colorGraph: trivially colorable nodes didn't color!" -- empty + ( empty + $$ text "ksTriv = " <> ppr ksTriv + $$ text "ksNoTriv = " <> ppr ksNoTriv + $$ text "colors = " <> ppr colors + $$ empty + $$ dotGraph (\_ -> text "white") triv graph_triv) + + else ( graph_prob + , mkUniqSet ksNoColor -- the nodes that didn't color (spills) + , if iterative + then (listToUFM kksCoalesce2) + else (listToUFM kksCoalesce1)) + + +-- | Scan through the conflict graph separating out trivially colorable and +-- potentially uncolorable (problem) nodes. +-- +-- Checking whether a node is trivially colorable or not is a reasonably expensive operation, +-- so after a triv node is found and removed from the graph it's no good to return to the 'start' +-- of the graph and recheck a bunch of nodes that will probably still be non-trivially colorable. +-- +-- To ward against this, during each pass through the graph we collect up a list of triv nodes +-- that were found, and only remove them once we've finished the pass. The more nodes we can delete +-- at once the more likely it is that nodes we've already checked will become trivially colorable +-- for the next pass. +-- +-- TODO: add work lists to finding triv nodes is easier. +-- If we've just scanned the graph, and removed triv nodes, then the only +-- nodes that we need to rescan are the ones we've removed edges from. + +colorScan + :: ( Uniquable k, Uniquable cls, Uniquable color + , Ord k, Eq cls + , Outputable k, Outputable cls) + => Bool -- ^ whether to do iterative coalescing + -> Triv k cls color -- ^ fn to decide whether a node is trivially colorable + -> (Graph k cls color -> k) -- ^ fn to choose a node to potentially leave uncolored if nothing is trivially colorable. + -> Graph k cls color -- ^ the graph to scan + + -> ([k], [k], [(k, k)]) -- triv colorable nodes, problem nodes, pairs of nodes to coalesce + +colorScan iterative triv spill graph + = colorScan_spin iterative triv spill graph [] [] [] + +colorScan_spin + :: ( Uniquable k, Uniquable cls, Uniquable color + , Ord k, Eq cls + , Outputable k, Outputable cls) + => Bool + -> Triv k cls color + -> (Graph k cls color -> k) + -> Graph k cls color + -> [k] + -> [k] + -> [(k, k)] + -> ([k], [k], [(k, k)]) + +colorScan_spin iterative triv spill graph + ksTriv ksSpill kksCoalesce + + -- if the graph is empty then we're done + | isNullUFM $ graphMap graph + = (ksTriv, ksSpill, reverse kksCoalesce) + + -- Simplify: + -- Look for trivially colorable nodes. + -- If we can find some then remove them from the graph and go back for more. + -- + | nsTrivFound@(_:_) + <- scanGraph (\node -> triv (nodeClass node) (nodeConflicts node) (nodeExclusions node) + + -- for iterative coalescing we only want non-move related + -- nodes here + && (not iterative || isEmptyUniqSet (nodeCoalesce node))) + $ graph + + , ksTrivFound <- map nodeId nsTrivFound + , graph2 <- foldr (\k g -> let Just g' = delNode k g + in g') + graph ksTrivFound + + = colorScan_spin iterative triv spill graph2 + (ksTrivFound ++ ksTriv) + ksSpill + kksCoalesce + + -- Coalesce: + -- If we're doing iterative coalescing and no triv nodes are available + -- then it's time for a coalescing pass. + | iterative + = case coalesceGraph False triv graph of + + -- we were able to coalesce something + -- go back to Simplify and see if this frees up more nodes to be trivially colorable. + (graph2, kksCoalesceFound@(_:_)) + -> colorScan_spin iterative triv spill graph2 + ksTriv ksSpill (reverse kksCoalesceFound ++ kksCoalesce) + + -- Freeze: + -- nothing could be coalesced (or was triv), + -- time to choose a node to freeze and give up on ever coalescing it. + (graph2, []) + -> case freezeOneInGraph graph2 of + + -- we were able to freeze something + -- hopefully this will free up something for Simplify + (graph3, True) + -> colorScan_spin iterative triv spill graph3 + ksTriv ksSpill kksCoalesce + + -- we couldn't find something to freeze either + -- time for a spill + (graph3, False) + -> colorScan_spill iterative triv spill graph3 + ksTriv ksSpill kksCoalesce + + -- spill time + | otherwise + = colorScan_spill iterative triv spill graph + ksTriv ksSpill kksCoalesce + + +-- Select: +-- we couldn't find any triv nodes or things to freeze or coalesce, +-- and the graph isn't empty yet.. We'll have to choose a spill +-- candidate and leave it uncolored. +-- +colorScan_spill + :: ( Uniquable k, Uniquable cls, Uniquable color + , Ord k, Eq cls + , Outputable k, Outputable cls) + => Bool + -> Triv k cls color + -> (Graph k cls color -> k) + -> Graph k cls color + -> [k] + -> [k] + -> [(k, k)] + -> ([k], [k], [(k, k)]) + +colorScan_spill iterative triv spill graph + ksTriv ksSpill kksCoalesce + + = let kSpill = spill graph + Just graph' = delNode kSpill graph + in colorScan_spin iterative triv spill graph' + ksTriv (kSpill : ksSpill) kksCoalesce + + +-- | Try to assign a color to all these nodes. + +assignColors + :: ( Uniquable k, Uniquable cls, Uniquable color + , Outputable cls) + => UniqFM (UniqSet color) -- ^ map of (node class -> set of colors available for this class). + -> Graph k cls color -- ^ the graph + -> [k] -- ^ nodes to assign a color to. + -> ( Graph k cls color -- the colored graph + , [k]) -- the nodes that didn't color. + +assignColors colors graph ks + = assignColors' colors graph [] ks + + where assignColors' _ graph prob [] + = (graph, prob) + + assignColors' colors graph prob (k:ks) + = case assignColor colors k graph of + + -- couldn't color this node + Nothing -> assignColors' colors graph (k : prob) ks + + -- this node colored ok, so do the rest + Just graph' -> assignColors' colors graph' prob ks + + + assignColor colors u graph + | Just c <- selectColor colors graph u + = Just (setColor u c graph) + + | otherwise + = Nothing + + + +-- | Select a color for a certain node +-- taking into account preferences, neighbors and exclusions. +-- returns Nothing if no color can be assigned to this node. +-- +selectColor + :: ( Uniquable k, Uniquable cls, Uniquable color + , Outputable cls) + => UniqFM (UniqSet color) -- ^ map of (node class -> set of colors available for this class). + -> Graph k cls color -- ^ the graph + -> k -- ^ key of the node to select a color for. + -> Maybe color + +selectColor colors graph u + = let -- lookup the node + Just node = lookupNode graph u + + -- lookup the available colors for the class of this node. + colors_avail + = case lookupUFM colors (nodeClass node) of + Nothing -> pprPanic "selectColor: no colors available for class " (ppr (nodeClass node)) + Just cs -> cs + + -- find colors we can't use because they're already being used + -- by a node that conflicts with this one. + Just nsConflicts + = sequence + $ map (lookupNode graph) + $ nonDetEltsUniqSet + $ nodeConflicts node + -- See Note [Unique Determinism and code generation] + + colors_conflict = mkUniqSet + $ catMaybes + $ map nodeColor nsConflicts + + -- the prefs of our neighbors + colors_neighbor_prefs + = mkUniqSet + $ concatMap nodePreference nsConflicts + + -- colors that are still valid for us + colors_ok_ex = minusUniqSet colors_avail (nodeExclusions node) + colors_ok = minusUniqSet colors_ok_ex colors_conflict + + -- the colors that we prefer, and are still ok + colors_ok_pref = intersectUniqSets + (mkUniqSet $ nodePreference node) colors_ok + + -- the colors that we could choose while being nice to our neighbors + colors_ok_nice = minusUniqSet + colors_ok colors_neighbor_prefs + + -- the best of all possible worlds.. + colors_ok_pref_nice + = intersectUniqSets + colors_ok_nice colors_ok_pref + + -- make the decision + chooseColor + + -- everyone is happy, yay! + | not $ isEmptyUniqSet colors_ok_pref_nice + , c : _ <- filter (\x -> elementOfUniqSet x colors_ok_pref_nice) + (nodePreference node) + = Just c + + -- we've got one of our preferences + | not $ isEmptyUniqSet colors_ok_pref + , c : _ <- filter (\x -> elementOfUniqSet x colors_ok_pref) + (nodePreference node) + = Just c + + -- it wasn't a preference, but it was still ok + | not $ isEmptyUniqSet colors_ok + , c : _ <- nonDetEltsUniqSet colors_ok + -- See Note [Unique Determinism and code generation] + = Just c + + -- no colors were available for us this time. + -- looks like we're going around the loop again.. + | otherwise + = Nothing + + in chooseColor + + + diff --git a/compiler/GHC/Data/Graph/Directed.hs b/compiler/GHC/Data/Graph/Directed.hs new file mode 100644 index 0000000000..c3f397051a --- /dev/null +++ b/compiler/GHC/Data/Graph/Directed.hs @@ -0,0 +1,524 @@ +-- (c) The University of Glasgow 2006 + +{-# LANGUAGE CPP, ScopedTypeVariables, ViewPatterns #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +module GHC.Data.Graph.Directed ( + Graph, graphFromEdgedVerticesOrd, graphFromEdgedVerticesUniq, + + SCC(..), Node(..), flattenSCC, flattenSCCs, + stronglyConnCompG, + topologicalSortG, + verticesG, edgesG, hasVertexG, + reachableG, reachablesG, transposeG, + emptyG, + + findCycle, + + -- For backwards compatibility with the simpler version of Digraph + stronglyConnCompFromEdgedVerticesOrd, + stronglyConnCompFromEdgedVerticesOrdR, + stronglyConnCompFromEdgedVerticesUniq, + stronglyConnCompFromEdgedVerticesUniqR, + + -- Simple way to classify edges + EdgeType(..), classifyEdges + ) where + +#include "HsVersions.h" + +------------------------------------------------------------------------------ +-- A version of the graph algorithms described in: +-- +-- ``Lazy Depth-First Search and Linear IntGraph Algorithms in Haskell'' +-- by David King and John Launchbury +-- +-- Also included is some additional code for printing tree structures ... +-- +-- If you ever find yourself in need of algorithms for classifying edges, +-- or finding connected/biconnected components, consult the history; Sigbjorn +-- Finne contributed some implementations in 1997, although we've since +-- removed them since they were not used anywhere in GHC. +------------------------------------------------------------------------------ + + +import GHC.Prelude + +import GHC.Utils.Misc ( minWith, count ) +import GHC.Utils.Outputable +import GHC.Data.Maybe ( expectJust ) + +-- std interfaces +import Data.Maybe +import Data.Array +import Data.List hiding (transpose) +import qualified Data.Map as Map +import qualified Data.Set as Set + +import qualified Data.Graph as G +import Data.Graph hiding (Graph, Edge, transposeG, reachable) +import Data.Tree +import GHC.Types.Unique +import GHC.Types.Unique.FM + +{- +************************************************************************ +* * +* Graphs and Graph Construction +* * +************************************************************************ + +Note [Nodes, keys, vertices] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * A 'node' is a big blob of client-stuff + + * Each 'node' has a unique (client) 'key', but the latter + is in Ord and has fast comparison + + * Digraph then maps each 'key' to a Vertex (Int) which is + arranged densely in 0.n +-} + +data Graph node = Graph { + gr_int_graph :: IntGraph, + gr_vertex_to_node :: Vertex -> node, + gr_node_to_vertex :: node -> Maybe Vertex + } + +data Edge node = Edge node node + +{-| Representation for nodes of the Graph. + + * The @payload@ is user data, just carried around in this module + + * The @key@ is the node identifier. + Key has an Ord instance for performance reasons. + + * The @[key]@ are the dependencies of the node; + it's ok to have extra keys in the dependencies that + are not the key of any Node in the graph +-} +data Node key payload = DigraphNode { + node_payload :: payload, -- ^ User data + node_key :: key, -- ^ User defined node id + node_dependencies :: [key] -- ^ Dependencies/successors of the node + } + + +instance (Outputable a, Outputable b) => Outputable (Node a b) where + ppr (DigraphNode a b c) = ppr (a, b, c) + +emptyGraph :: Graph a +emptyGraph = Graph (array (1, 0) []) (error "emptyGraph") (const Nothing) + +-- See Note [Deterministic SCC] +graphFromEdgedVertices + :: ReduceFn key payload + -> [Node key payload] -- The graph; its ok for the + -- out-list to contain keys which aren't + -- a vertex key, they are ignored + -> Graph (Node key payload) +graphFromEdgedVertices _reduceFn [] = emptyGraph +graphFromEdgedVertices reduceFn edged_vertices = + Graph graph vertex_fn (key_vertex . key_extractor) + where key_extractor = node_key + (bounds, vertex_fn, key_vertex, numbered_nodes) = + reduceFn edged_vertices key_extractor + graph = array bounds [ (v, sort $ mapMaybe key_vertex ks) + | (v, (node_dependencies -> ks)) <- numbered_nodes] + -- We normalize outgoing edges by sorting on node order, so + -- that the result doesn't depend on the order of the edges + +-- See Note [Deterministic SCC] +-- See Note [reduceNodesIntoVertices implementations] +graphFromEdgedVerticesOrd + :: Ord key + => [Node key payload] -- The graph; its ok for the + -- out-list to contain keys which aren't + -- a vertex key, they are ignored + -> Graph (Node key payload) +graphFromEdgedVerticesOrd = graphFromEdgedVertices reduceNodesIntoVerticesOrd + +-- See Note [Deterministic SCC] +-- See Note [reduceNodesIntoVertices implementations] +graphFromEdgedVerticesUniq + :: Uniquable key + => [Node key payload] -- The graph; its ok for the + -- out-list to contain keys which aren't + -- a vertex key, they are ignored + -> Graph (Node key payload) +graphFromEdgedVerticesUniq = graphFromEdgedVertices reduceNodesIntoVerticesUniq + +type ReduceFn key payload = + [Node key payload] -> (Node key payload -> key) -> + (Bounds, Vertex -> Node key payload + , key -> Maybe Vertex, [(Vertex, Node key payload)]) + +{- +Note [reduceNodesIntoVertices implementations] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +reduceNodesIntoVertices is parameterized by the container type. +This is to accommodate key types that don't have an Ord instance +and hence preclude the use of Data.Map. An example of such type +would be Unique, there's no way to implement Ord Unique +deterministically. + +For such types, there's a version with a Uniquable constraint. +This leaves us with two versions of every function that depends on +reduceNodesIntoVertices, one with Ord constraint and the other with +Uniquable constraint. +For example: graphFromEdgedVerticesOrd and graphFromEdgedVerticesUniq. + +The Uniq version should be a tiny bit more efficient since it uses +Data.IntMap internally. +-} +reduceNodesIntoVertices + :: ([(key, Vertex)] -> m) + -> (key -> m -> Maybe Vertex) + -> ReduceFn key payload +reduceNodesIntoVertices fromList lookup nodes key_extractor = + (bounds, (!) vertex_map, key_vertex, numbered_nodes) + where + max_v = length nodes - 1 + bounds = (0, max_v) :: (Vertex, Vertex) + + -- Keep the order intact to make the result depend on input order + -- instead of key order + numbered_nodes = zip [0..] nodes + vertex_map = array bounds numbered_nodes + + key_map = fromList + [ (key_extractor node, v) | (v, node) <- numbered_nodes ] + key_vertex k = lookup k key_map + +-- See Note [reduceNodesIntoVertices implementations] +reduceNodesIntoVerticesOrd :: Ord key => ReduceFn key payload +reduceNodesIntoVerticesOrd = reduceNodesIntoVertices Map.fromList Map.lookup + +-- See Note [reduceNodesIntoVertices implementations] +reduceNodesIntoVerticesUniq :: Uniquable key => ReduceFn key payload +reduceNodesIntoVerticesUniq = reduceNodesIntoVertices listToUFM (flip lookupUFM) + +{- +************************************************************************ +* * +* SCC +* * +************************************************************************ +-} + +type WorkItem key payload + = (Node key payload, -- Tip of the path + [payload]) -- Rest of the path; + -- [a,b,c] means c depends on b, b depends on a + +-- | Find a reasonably short cycle a->b->c->a, in a strongly +-- connected component. The input nodes are presumed to be +-- a SCC, so you can start anywhere. +findCycle :: forall payload key. Ord key + => [Node key payload] -- The nodes. The dependencies can + -- contain extra keys, which are ignored + -> Maybe [payload] -- A cycle, starting with node + -- so each depends on the next +findCycle graph + = go Set.empty (new_work root_deps []) [] + where + env :: Map.Map key (Node key payload) + env = Map.fromList [ (node_key node, node) | node <- graph ] + + -- Find the node with fewest dependencies among the SCC modules + -- This is just a heuristic to find some plausible root module + root :: Node key payload + root = fst (minWith snd [ (node, count (`Map.member` env) + (node_dependencies node)) + | node <- graph ]) + DigraphNode root_payload root_key root_deps = root + + + -- 'go' implements Dijkstra's algorithm, more or less + go :: Set.Set key -- Visited + -> [WorkItem key payload] -- Work list, items length n + -> [WorkItem key payload] -- Work list, items length n+1 + -> Maybe [payload] -- Returned cycle + -- Invariant: in a call (go visited ps qs), + -- visited = union (map tail (ps ++ qs)) + + go _ [] [] = Nothing -- No cycles + go visited [] qs = go visited qs [] + go visited (((DigraphNode payload key deps), path) : ps) qs + | key == root_key = Just (root_payload : reverse path) + | key `Set.member` visited = go visited ps qs + | key `Map.notMember` env = go visited ps qs + | otherwise = go (Set.insert key visited) + ps (new_qs ++ qs) + where + new_qs = new_work deps (payload : path) + + new_work :: [key] -> [payload] -> [WorkItem key payload] + new_work deps path = [ (n, path) | Just n <- map (`Map.lookup` env) deps ] + +{- +************************************************************************ +* * +* Strongly Connected Component wrappers for Graph +* * +************************************************************************ + +Note: the components are returned topologically sorted: later components +depend on earlier ones, but not vice versa i.e. later components only have +edges going from them to earlier ones. +-} + +{- +Note [Deterministic SCC] +~~~~~~~~~~~~~~~~~~~~~~~~ +stronglyConnCompFromEdgedVerticesUniq, +stronglyConnCompFromEdgedVerticesUniqR, +stronglyConnCompFromEdgedVerticesOrd and +stronglyConnCompFromEdgedVerticesOrdR +provide a following guarantee: +Given a deterministically ordered list of nodes it returns a deterministically +ordered list of strongly connected components, where the list of vertices +in an SCC is also deterministically ordered. +Note that the order of edges doesn't need to be deterministic for this to work. +We use the order of nodes to normalize the order of edges. +-} + +stronglyConnCompG :: Graph node -> [SCC node] +stronglyConnCompG graph = decodeSccs graph forest + where forest = {-# SCC "Digraph.scc" #-} scc (gr_int_graph graph) + +decodeSccs :: Graph node -> Forest Vertex -> [SCC node] +decodeSccs Graph { gr_int_graph = graph, gr_vertex_to_node = vertex_fn } forest + = map decode forest + where + decode (Node v []) | mentions_itself v = CyclicSCC [vertex_fn v] + | otherwise = AcyclicSCC (vertex_fn v) + decode other = CyclicSCC (dec other []) + where dec (Node v ts) vs = vertex_fn v : foldr dec vs ts + mentions_itself v = v `elem` (graph ! v) + + +-- The following two versions are provided for backwards compatibility: +-- See Note [Deterministic SCC] +-- See Note [reduceNodesIntoVertices implementations] +stronglyConnCompFromEdgedVerticesOrd + :: Ord key + => [Node key payload] + -> [SCC payload] +stronglyConnCompFromEdgedVerticesOrd + = map (fmap node_payload) . stronglyConnCompFromEdgedVerticesOrdR + +-- The following two versions are provided for backwards compatibility: +-- See Note [Deterministic SCC] +-- See Note [reduceNodesIntoVertices implementations] +stronglyConnCompFromEdgedVerticesUniq + :: Uniquable key + => [Node key payload] + -> [SCC payload] +stronglyConnCompFromEdgedVerticesUniq + = map (fmap node_payload) . stronglyConnCompFromEdgedVerticesUniqR + +-- The "R" interface is used when you expect to apply SCC to +-- (some of) the result of SCC, so you don't want to lose the dependency info +-- See Note [Deterministic SCC] +-- See Note [reduceNodesIntoVertices implementations] +stronglyConnCompFromEdgedVerticesOrdR + :: Ord key + => [Node key payload] + -> [SCC (Node key payload)] +stronglyConnCompFromEdgedVerticesOrdR = + stronglyConnCompG . graphFromEdgedVertices reduceNodesIntoVerticesOrd + +-- The "R" interface is used when you expect to apply SCC to +-- (some of) the result of SCC, so you don't want to lose the dependency info +-- See Note [Deterministic SCC] +-- See Note [reduceNodesIntoVertices implementations] +stronglyConnCompFromEdgedVerticesUniqR + :: Uniquable key + => [Node key payload] + -> [SCC (Node key payload)] +stronglyConnCompFromEdgedVerticesUniqR = + stronglyConnCompG . graphFromEdgedVertices reduceNodesIntoVerticesUniq + +{- +************************************************************************ +* * +* Misc wrappers for Graph +* * +************************************************************************ +-} + +topologicalSortG :: Graph node -> [node] +topologicalSortG graph = map (gr_vertex_to_node graph) result + where result = {-# SCC "Digraph.topSort" #-} topSort (gr_int_graph graph) + +reachableG :: Graph node -> node -> [node] +reachableG graph from = map (gr_vertex_to_node graph) result + where from_vertex = expectJust "reachableG" (gr_node_to_vertex graph from) + result = {-# SCC "Digraph.reachable" #-} reachable (gr_int_graph graph) [from_vertex] + +-- | Given a list of roots return all reachable nodes. +reachablesG :: Graph node -> [node] -> [node] +reachablesG graph froms = map (gr_vertex_to_node graph) result + where result = {-# SCC "Digraph.reachable" #-} + reachable (gr_int_graph graph) vs + vs = [ v | Just v <- map (gr_node_to_vertex graph) froms ] + +hasVertexG :: Graph node -> node -> Bool +hasVertexG graph node = isJust $ gr_node_to_vertex graph node + +verticesG :: Graph node -> [node] +verticesG graph = map (gr_vertex_to_node graph) $ vertices (gr_int_graph graph) + +edgesG :: Graph node -> [Edge node] +edgesG graph = map (\(v1, v2) -> Edge (v2n v1) (v2n v2)) $ edges (gr_int_graph graph) + where v2n = gr_vertex_to_node graph + +transposeG :: Graph node -> Graph node +transposeG graph = Graph (G.transposeG (gr_int_graph graph)) + (gr_vertex_to_node graph) + (gr_node_to_vertex graph) + +emptyG :: Graph node -> Bool +emptyG g = graphEmpty (gr_int_graph g) + +{- +************************************************************************ +* * +* Showing Graphs +* * +************************************************************************ +-} + +instance Outputable node => Outputable (Graph node) where + ppr graph = vcat [ + hang (text "Vertices:") 2 (vcat (map ppr $ verticesG graph)), + hang (text "Edges:") 2 (vcat (map ppr $ edgesG graph)) + ] + +instance Outputable node => Outputable (Edge node) where + ppr (Edge from to) = ppr from <+> text "->" <+> ppr to + +graphEmpty :: G.Graph -> Bool +graphEmpty g = lo > hi + where (lo, hi) = bounds g + +{- +************************************************************************ +* * +* IntGraphs +* * +************************************************************************ +-} + +type IntGraph = G.Graph + +{- +------------------------------------------------------------ +-- Depth first search numbering +------------------------------------------------------------ +-} + +-- Data.Tree has flatten for Tree, but nothing for Forest +preorderF :: Forest a -> [a] +preorderF ts = concatMap flatten ts + +{- +------------------------------------------------------------ +-- Finding reachable vertices +------------------------------------------------------------ +-} + +-- This generalizes reachable which was found in Data.Graph +reachable :: IntGraph -> [Vertex] -> [Vertex] +reachable g vs = preorderF (dfs g vs) + +{- +************************************************************************ +* * +* Classify Edge Types +* * +************************************************************************ +-} + +-- Remark: While we could generalize this algorithm this comes at a runtime +-- cost and with no advantages. If you find yourself using this with graphs +-- not easily represented using Int nodes please consider rewriting this +-- using the more general Graph type. + +-- | Edge direction based on DFS Classification +data EdgeType + = Forward + | Cross + | Backward -- ^ Loop back towards the root node. + -- Eg backjumps in loops + | SelfLoop -- ^ v -> v + deriving (Eq,Ord) + +instance Outputable EdgeType where + ppr Forward = text "Forward" + ppr Cross = text "Cross" + ppr Backward = text "Backward" + ppr SelfLoop = text "SelfLoop" + +newtype Time = Time Int deriving (Eq,Ord,Num,Outputable) + +--Allow for specialization +{-# INLINEABLE classifyEdges #-} + +-- | Given a start vertex, a way to get successors from a node +-- and a list of (directed) edges classify the types of edges. +classifyEdges :: forall key. Uniquable key => key -> (key -> [key]) + -> [(key,key)] -> [((key, key), EdgeType)] +classifyEdges root getSucc edges = + --let uqe (from,to) = (getUnique from, getUnique to) + --in pprTrace "Edges:" (ppr $ map uqe edges) $ + zip edges $ map classify edges + where + (_time, starts, ends) = addTimes (0,emptyUFM,emptyUFM) root + classify :: (key,key) -> EdgeType + classify (from,to) + | startFrom < startTo + , endFrom > endTo + = Forward + | startFrom > startTo + , endFrom < endTo + = Backward + | startFrom > startTo + , endFrom > endTo + = Cross + | getUnique from == getUnique to + = SelfLoop + | otherwise + = pprPanic "Failed to classify edge of Graph" + (ppr (getUnique from, getUnique to)) + + where + getTime event node + | Just time <- lookupUFM event node + = time + | otherwise + = pprPanic "Failed to classify edge of CFG - not not timed" + (text "edges" <> ppr (getUnique from, getUnique to) + <+> ppr starts <+> ppr ends ) + startFrom = getTime starts from + startTo = getTime starts to + endFrom = getTime ends from + endTo = getTime ends to + + addTimes :: (Time, UniqFM Time, UniqFM Time) -> key + -> (Time, UniqFM Time, UniqFM Time) + addTimes (time,starts,ends) n + --Dont reenter nodes + | elemUFM n starts + = (time,starts,ends) + | otherwise = + let + starts' = addToUFM starts n time + time' = time + 1 + succs = getSucc n :: [key] + (time'',starts'',ends') = foldl' addTimes (time',starts',ends) succs + ends'' = addToUFM ends' n time'' + in + (time'' + 1, starts'', ends'') diff --git a/compiler/GHC/Data/Graph/Ops.hs b/compiler/GHC/Data/Graph/Ops.hs new file mode 100644 index 0000000000..7d9ce669c6 --- /dev/null +++ b/compiler/GHC/Data/Graph/Ops.hs @@ -0,0 +1,698 @@ +-- | Basic operations on graphs. +-- + +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} + +module GHC.Data.Graph.Ops + ( addNode + , delNode + , getNode + , lookupNode + , modNode + + , size + , union + + , addConflict + , delConflict + , addConflicts + + , addCoalesce + , delCoalesce + + , addExclusion + , addExclusions + + , addPreference + , coalesceNodes + , coalesceGraph + , freezeNode + , freezeOneInGraph + , freezeAllInGraph + , scanGraph + , setColor + , validateGraph + , slurpNodeConflictCount + ) +where + +import GHC.Prelude + +import GHC.Data.Graph.Base + +import GHC.Utils.Outputable +import GHC.Types.Unique +import GHC.Types.Unique.Set +import GHC.Types.Unique.FM + +import Data.List hiding (union) +import Data.Maybe + +-- | Lookup a node from the graph. +lookupNode + :: Uniquable k + => Graph k cls color + -> k -> Maybe (Node k cls color) + +lookupNode graph k + = lookupUFM (graphMap graph) k + + +-- | Get a node from the graph, throwing an error if it's not there +getNode + :: Uniquable k + => Graph k cls color + -> k -> Node k cls color + +getNode graph k + = case lookupUFM (graphMap graph) k of + Just node -> node + Nothing -> panic "ColorOps.getNode: not found" + + +-- | Add a node to the graph, linking up its edges +addNode :: Uniquable k + => k -> Node k cls color + -> Graph k cls color -> Graph k cls color + +addNode k node graph + = let + -- add back conflict edges from other nodes to this one + map_conflict = + nonDetFoldUniqSet + -- It's OK to use nonDetFoldUFM here because the + -- operation is commutative + (adjustUFM_C (\n -> n { nodeConflicts = + addOneToUniqSet (nodeConflicts n) k})) + (graphMap graph) + (nodeConflicts node) + + -- add back coalesce edges from other nodes to this one + map_coalesce = + nonDetFoldUniqSet + -- It's OK to use nonDetFoldUFM here because the + -- operation is commutative + (adjustUFM_C (\n -> n { nodeCoalesce = + addOneToUniqSet (nodeCoalesce n) k})) + map_conflict + (nodeCoalesce node) + + in graph + { graphMap = addToUFM map_coalesce k node} + + +-- | Delete a node and all its edges from the graph. +delNode :: (Uniquable k) + => k -> Graph k cls color -> Maybe (Graph k cls color) + +delNode k graph + | Just node <- lookupNode graph k + = let -- delete conflict edges from other nodes to this one. + graph1 = foldl' (\g k1 -> let Just g' = delConflict k1 k g in g') graph + $ nonDetEltsUniqSet (nodeConflicts node) + + -- delete coalesce edge from other nodes to this one. + graph2 = foldl' (\g k1 -> let Just g' = delCoalesce k1 k g in g') graph1 + $ nonDetEltsUniqSet (nodeCoalesce node) + -- See Note [Unique Determinism and code generation] + + -- delete the node + graph3 = graphMapModify (\fm -> delFromUFM fm k) graph2 + + in Just graph3 + + | otherwise + = Nothing + + +-- | Modify a node in the graph. +-- returns Nothing if the node isn't present. +-- +modNode :: Uniquable k + => (Node k cls color -> Node k cls color) + -> k -> Graph k cls color -> Maybe (Graph k cls color) + +modNode f k graph + = case lookupNode graph k of + Just Node{} + -> Just + $ graphMapModify + (\fm -> let Just node = lookupUFM fm k + node' = f node + in addToUFM fm k node') + graph + + Nothing -> Nothing + + +-- | Get the size of the graph, O(n) +size :: Graph k cls color -> Int + +size graph + = sizeUFM $ graphMap graph + + +-- | Union two graphs together. +union :: Graph k cls color -> Graph k cls color -> Graph k cls color + +union graph1 graph2 + = Graph + { graphMap = plusUFM (graphMap graph1) (graphMap graph2) } + + +-- | Add a conflict between nodes to the graph, creating the nodes required. +-- Conflicts are virtual regs which need to be colored differently. +addConflict + :: Uniquable k + => (k, cls) -> (k, cls) + -> Graph k cls color -> Graph k cls color + +addConflict (u1, c1) (u2, c2) + = let addNeighbor u c u' + = adjustWithDefaultUFM + (\node -> node { nodeConflicts = addOneToUniqSet (nodeConflicts node) u' }) + (newNode u c) { nodeConflicts = unitUniqSet u' } + u + + in graphMapModify + ( addNeighbor u1 c1 u2 + . addNeighbor u2 c2 u1) + + +-- | Delete a conflict edge. k1 -> k2 +-- returns Nothing if the node isn't in the graph +delConflict + :: Uniquable k + => k -> k + -> Graph k cls color -> Maybe (Graph k cls color) + +delConflict k1 k2 + = modNode + (\node -> node { nodeConflicts = delOneFromUniqSet (nodeConflicts node) k2 }) + k1 + + +-- | Add some conflicts to the graph, creating nodes if required. +-- All the nodes in the set are taken to conflict with each other. +addConflicts + :: Uniquable k + => UniqSet k -> (k -> cls) + -> Graph k cls color -> Graph k cls color + +addConflicts conflicts getClass + + -- just a single node, but no conflicts, create the node anyway. + | (u : []) <- nonDetEltsUniqSet conflicts + = graphMapModify + $ adjustWithDefaultUFM + id + (newNode u (getClass u)) + u + + | otherwise + = graphMapModify + $ \fm -> foldl' (\g u -> addConflictSet1 u getClass conflicts g) fm + $ nonDetEltsUniqSet conflicts + -- See Note [Unique Determinism and code generation] + + +addConflictSet1 :: Uniquable k + => k -> (k -> cls) -> UniqSet k + -> UniqFM (Node k cls color) + -> UniqFM (Node k cls color) +addConflictSet1 u getClass set + = case delOneFromUniqSet set u of + set' -> adjustWithDefaultUFM + (\node -> node { nodeConflicts = unionUniqSets set' (nodeConflicts node) } ) + (newNode u (getClass u)) { nodeConflicts = set' } + u + + +-- | Add an exclusion to the graph, creating nodes if required. +-- These are extra colors that the node cannot use. +addExclusion + :: (Uniquable k, Uniquable color) + => k -> (k -> cls) -> color + -> Graph k cls color -> Graph k cls color + +addExclusion u getClass color + = graphMapModify + $ adjustWithDefaultUFM + (\node -> node { nodeExclusions = addOneToUniqSet (nodeExclusions node) color }) + (newNode u (getClass u)) { nodeExclusions = unitUniqSet color } + u + +addExclusions + :: (Uniquable k, Uniquable color) + => k -> (k -> cls) -> [color] + -> Graph k cls color -> Graph k cls color + +addExclusions u getClass colors graph + = foldr (addExclusion u getClass) graph colors + + +-- | Add a coalescence edge to the graph, creating nodes if required. +-- It is considered adventageous to assign the same color to nodes in a coalesence. +addCoalesce + :: Uniquable k + => (k, cls) -> (k, cls) + -> Graph k cls color -> Graph k cls color + +addCoalesce (u1, c1) (u2, c2) + = let addCoalesce u c u' + = adjustWithDefaultUFM + (\node -> node { nodeCoalesce = addOneToUniqSet (nodeCoalesce node) u' }) + (newNode u c) { nodeCoalesce = unitUniqSet u' } + u + + in graphMapModify + ( addCoalesce u1 c1 u2 + . addCoalesce u2 c2 u1) + + +-- | Delete a coalescence edge (k1 -> k2) from the graph. +delCoalesce + :: Uniquable k + => k -> k + -> Graph k cls color -> Maybe (Graph k cls color) + +delCoalesce k1 k2 + = modNode (\node -> node { nodeCoalesce = delOneFromUniqSet (nodeCoalesce node) k2 }) + k1 + + +-- | Add a color preference to the graph, creating nodes if required. +-- The most recently added preference is the most preferred. +-- The algorithm tries to assign a node it's preferred color if possible. +-- +addPreference + :: Uniquable k + => (k, cls) -> color + -> Graph k cls color -> Graph k cls color + +addPreference (u, c) color + = graphMapModify + $ adjustWithDefaultUFM + (\node -> node { nodePreference = color : (nodePreference node) }) + (newNode u c) { nodePreference = [color] } + u + + +-- | Do aggressive coalescing on this graph. +-- returns the new graph and the list of pairs of nodes that got coalesced together. +-- for each pair, the resulting node will have the least key and be second in the pair. +-- +coalesceGraph + :: (Uniquable k, Ord k, Eq cls, Outputable k) + => Bool -- ^ If True, coalesce nodes even if this might make the graph + -- less colorable (aggressive coalescing) + -> Triv k cls color + -> Graph k cls color + -> ( Graph k cls color + , [(k, k)]) -- pairs of nodes that were coalesced, in the order that the + -- coalescing was applied. + +coalesceGraph aggressive triv graph + = coalesceGraph' aggressive triv graph [] + +coalesceGraph' + :: (Uniquable k, Ord k, Eq cls, Outputable k) + => Bool + -> Triv k cls color + -> Graph k cls color + -> [(k, k)] + -> ( Graph k cls color + , [(k, k)]) +coalesceGraph' aggressive triv graph kkPairsAcc + = let + -- find all the nodes that have coalescence edges + cNodes = filter (\node -> not $ isEmptyUniqSet (nodeCoalesce node)) + $ nonDetEltsUFM $ graphMap graph + -- See Note [Unique Determinism and code generation] + + -- build a list of pairs of keys for node's we'll try and coalesce + -- every pair of nodes will appear twice in this list + -- ie [(k1, k2), (k2, k1) ... ] + -- This is ok, GrapOps.coalesceNodes handles this and it's convenient for + -- build a list of what nodes get coalesced together for later on. + -- + cList = [ (nodeId node1, k2) + | node1 <- cNodes + , k2 <- nonDetEltsUniqSet $ nodeCoalesce node1 ] + -- See Note [Unique Determinism and code generation] + + -- do the coalescing, returning the new graph and a list of pairs of keys + -- that got coalesced together. + (graph', mPairs) + = mapAccumL (coalesceNodes aggressive triv) graph cList + + -- keep running until there are no more coalesces can be found + in case catMaybes mPairs of + [] -> (graph', reverse kkPairsAcc) + pairs -> coalesceGraph' aggressive triv graph' (reverse pairs ++ kkPairsAcc) + + +-- | Coalesce this pair of nodes unconditionally \/ aggressively. +-- The resulting node is the one with the least key. +-- +-- returns: Just the pair of keys if the nodes were coalesced +-- the second element of the pair being the least one +-- +-- Nothing if either of the nodes weren't in the graph + +coalesceNodes + :: (Uniquable k, Ord k, Eq cls) + => Bool -- ^ If True, coalesce nodes even if this might make the graph + -- less colorable (aggressive coalescing) + -> Triv k cls color + -> Graph k cls color + -> (k, k) -- ^ keys of the nodes to be coalesced + -> (Graph k cls color, Maybe (k, k)) + +coalesceNodes aggressive triv graph (k1, k2) + | (kMin, kMax) <- if k1 < k2 + then (k1, k2) + else (k2, k1) + + -- the nodes being coalesced must be in the graph + , Just nMin <- lookupNode graph kMin + , Just nMax <- lookupNode graph kMax + + -- can't coalesce conflicting modes + , not $ elementOfUniqSet kMin (nodeConflicts nMax) + , not $ elementOfUniqSet kMax (nodeConflicts nMin) + + -- can't coalesce the same node + , nodeId nMin /= nodeId nMax + + = coalesceNodes_merge aggressive triv graph kMin kMax nMin nMax + + -- don't do the coalescing after all + | otherwise + = (graph, Nothing) + +coalesceNodes_merge + :: (Uniquable k, Eq cls) + => Bool + -> Triv k cls color + -> Graph k cls color + -> k -> k + -> Node k cls color + -> Node k cls color + -> (Graph k cls color, Maybe (k, k)) + +coalesceNodes_merge aggressive triv graph kMin kMax nMin nMax + + -- sanity checks + | nodeClass nMin /= nodeClass nMax + = error "GHC.Data.Graph.Ops.coalesceNodes: can't coalesce nodes of different classes." + + | not (isNothing (nodeColor nMin) && isNothing (nodeColor nMax)) + = error "GHC.Data.Graph.Ops.coalesceNodes: can't coalesce colored nodes." + + --- + | otherwise + = let + -- the new node gets all the edges from its two components + node = + Node { nodeId = kMin + , nodeClass = nodeClass nMin + , nodeColor = Nothing + + -- nodes don't conflict with themselves.. + , nodeConflicts + = (unionUniqSets (nodeConflicts nMin) (nodeConflicts nMax)) + `delOneFromUniqSet` kMin + `delOneFromUniqSet` kMax + + , nodeExclusions = unionUniqSets (nodeExclusions nMin) (nodeExclusions nMax) + , nodePreference = nodePreference nMin ++ nodePreference nMax + + -- nodes don't coalesce with themselves.. + , nodeCoalesce + = (unionUniqSets (nodeCoalesce nMin) (nodeCoalesce nMax)) + `delOneFromUniqSet` kMin + `delOneFromUniqSet` kMax + } + + in coalesceNodes_check aggressive triv graph kMin kMax node + +coalesceNodes_check + :: Uniquable k + => Bool + -> Triv k cls color + -> Graph k cls color + -> k -> k + -> Node k cls color + -> (Graph k cls color, Maybe (k, k)) + +coalesceNodes_check aggressive triv graph kMin kMax node + + -- Unless we're coalescing aggressively, if the result node is not trivially + -- colorable then don't do the coalescing. + | not aggressive + , not $ triv (nodeClass node) (nodeConflicts node) (nodeExclusions node) + = (graph, Nothing) + + | otherwise + = let -- delete the old nodes from the graph and add the new one + Just graph1 = delNode kMax graph + Just graph2 = delNode kMin graph1 + graph3 = addNode kMin node graph2 + + in (graph3, Just (kMax, kMin)) + + +-- | Freeze a node +-- This is for the iterative coalescer. +-- By freezing a node we give up on ever coalescing it. +-- Move all its coalesce edges into the frozen set - and update +-- back edges from other nodes. +-- +freezeNode + :: Uniquable k + => k -- ^ key of the node to freeze + -> Graph k cls color -- ^ the graph + -> Graph k cls color -- ^ graph with that node frozen + +freezeNode k + = graphMapModify + $ \fm -> + let -- freeze all the edges in the node to be frozen + Just node = lookupUFM fm k + node' = node + { nodeCoalesce = emptyUniqSet } + + fm1 = addToUFM fm k node' + + -- update back edges pointing to this node + freezeEdge k node + = if elementOfUniqSet k (nodeCoalesce node) + then node { nodeCoalesce = delOneFromUniqSet (nodeCoalesce node) k } + else node -- panic "GHC.Data.Graph.Ops.freezeNode: edge to freeze wasn't in the coalesce set" + -- If the edge isn't actually in the coelesce set then just ignore it. + + fm2 = nonDetFoldUniqSet (adjustUFM_C (freezeEdge k)) fm1 + -- It's OK to use nonDetFoldUFM here because the operation + -- is commutative + $ nodeCoalesce node + + in fm2 + + +-- | Freeze one node in the graph +-- This if for the iterative coalescer. +-- Look for a move related node of low degree and freeze it. +-- +-- We probably don't need to scan the whole graph looking for the node of absolute +-- lowest degree. Just sample the first few and choose the one with the lowest +-- degree out of those. Also, we don't make any distinction between conflicts of different +-- classes.. this is just a heuristic, after all. +-- +-- IDEA: freezing a node might free it up for Simplify.. would be good to check for triv +-- right here, and add it to a worklist if known triv\/non-move nodes. +-- +freezeOneInGraph + :: (Uniquable k) + => Graph k cls color + -> ( Graph k cls color -- the new graph + , Bool ) -- whether we found a node to freeze + +freezeOneInGraph graph + = let compareNodeDegree n1 n2 + = compare (sizeUniqSet $ nodeConflicts n1) (sizeUniqSet $ nodeConflicts n2) + + candidates + = sortBy compareNodeDegree + $ take 5 -- 5 isn't special, it's just a small number. + $ scanGraph (\node -> not $ isEmptyUniqSet (nodeCoalesce node)) graph + + in case candidates of + + -- there wasn't anything available to freeze + [] -> (graph, False) + + -- we found something to freeze + (n : _) + -> ( freezeNode (nodeId n) graph + , True) + + +-- | Freeze all the nodes in the graph +-- for debugging the iterative allocator. +-- +freezeAllInGraph + :: (Uniquable k) + => Graph k cls color + -> Graph k cls color + +freezeAllInGraph graph + = foldr freezeNode graph + $ map nodeId + $ nonDetEltsUFM $ graphMap graph + -- See Note [Unique Determinism and code generation] + + +-- | Find all the nodes in the graph that meet some criteria +-- +scanGraph + :: (Node k cls color -> Bool) + -> Graph k cls color + -> [Node k cls color] + +scanGraph match graph + = filter match $ nonDetEltsUFM $ graphMap graph + -- See Note [Unique Determinism and code generation] + + +-- | validate the internal structure of a graph +-- all its edges should point to valid nodes +-- If they don't then throw an error +-- +validateGraph + :: (Uniquable k, Outputable k, Eq color) + => SDoc -- ^ extra debugging info to display on error + -> Bool -- ^ whether this graph is supposed to be colored. + -> Graph k cls color -- ^ graph to validate + -> Graph k cls color -- ^ validated graph + +validateGraph doc isColored graph + + -- Check that all edges point to valid nodes. + | edges <- unionManyUniqSets + ( (map nodeConflicts $ nonDetEltsUFM $ graphMap graph) + ++ (map nodeCoalesce $ nonDetEltsUFM $ graphMap graph)) + + , nodes <- mkUniqSet $ map nodeId $ nonDetEltsUFM $ graphMap graph + , badEdges <- minusUniqSet edges nodes + , not $ isEmptyUniqSet badEdges + = pprPanic "GHC.Data.Graph.Ops.validateGraph" + ( text "Graph has edges that point to non-existent nodes" + $$ text " bad edges: " <> pprUFM (getUniqSet badEdges) (vcat . map ppr) + $$ doc ) + + -- Check that no conflicting nodes have the same color + | badNodes <- filter (not . (checkNode graph)) + $ nonDetEltsUFM $ graphMap graph + -- See Note [Unique Determinism and code generation] + , not $ null badNodes + = pprPanic "GHC.Data.Graph.Ops.validateGraph" + ( text "Node has same color as one of it's conflicts" + $$ text " bad nodes: " <> hcat (map (ppr . nodeId) badNodes) + $$ doc) + + -- If this is supposed to be a colored graph, + -- check that all nodes have a color. + | isColored + , badNodes <- filter (\n -> isNothing $ nodeColor n) + $ nonDetEltsUFM $ graphMap graph + , not $ null badNodes + = pprPanic "GHC.Data.Graph.Ops.validateGraph" + ( text "Supposably colored graph has uncolored nodes." + $$ text " uncolored nodes: " <> hcat (map (ppr . nodeId) badNodes) + $$ doc ) + + + -- graph looks ok + | otherwise + = graph + + +-- | If this node is colored, check that all the nodes which +-- conflict with it have different colors. +checkNode + :: (Uniquable k, Eq color) + => Graph k cls color + -> Node k cls color + -> Bool -- ^ True if this node is ok + +checkNode graph node + | Just color <- nodeColor node + , Just neighbors <- sequence $ map (lookupNode graph) + $ nonDetEltsUniqSet $ nodeConflicts node + -- See Note [Unique Determinism and code generation] + + , neighbourColors <- catMaybes $ map nodeColor neighbors + , elem color neighbourColors + = False + + | otherwise + = True + + + +-- | Slurp out a map of how many nodes had a certain number of conflict neighbours + +slurpNodeConflictCount + :: Graph k cls color + -> UniqFM (Int, Int) -- ^ (conflict neighbours, num nodes with that many conflicts) + +slurpNodeConflictCount graph + = addListToUFM_C + (\(c1, n1) (_, n2) -> (c1, n1 + n2)) + emptyUFM + $ map (\node + -> let count = sizeUniqSet $ nodeConflicts node + in (count, (count, 1))) + $ nonDetEltsUFM + -- See Note [Unique Determinism and code generation] + $ graphMap graph + + +-- | Set the color of a certain node +setColor + :: Uniquable k + => k -> color + -> Graph k cls color -> Graph k cls color + +setColor u color + = graphMapModify + $ adjustUFM_C + (\n -> n { nodeColor = Just color }) + u + + +{-# INLINE adjustWithDefaultUFM #-} +adjustWithDefaultUFM + :: Uniquable k + => (a -> a) -> a -> k + -> UniqFM a -> UniqFM a + +adjustWithDefaultUFM f def k map + = addToUFM_C + (\old _ -> f old) + map + k def + +-- Argument order different from UniqFM's adjustUFM +{-# INLINE adjustUFM_C #-} +adjustUFM_C + :: Uniquable k + => (a -> a) + -> k -> UniqFM a -> UniqFM a + +adjustUFM_C f k map + = case lookupUFM map k of + Nothing -> map + Just a -> addToUFM map k (f a) + diff --git a/compiler/GHC/Data/Graph/Ppr.hs b/compiler/GHC/Data/Graph/Ppr.hs new file mode 100644 index 0000000000..020284ea7e --- /dev/null +++ b/compiler/GHC/Data/Graph/Ppr.hs @@ -0,0 +1,173 @@ + +-- | Pretty printing of graphs. + +module GHC.Data.Graph.Ppr + ( dumpGraph + , dotGraph + ) +where + +import GHC.Prelude + +import GHC.Data.Graph.Base + +import GHC.Utils.Outputable +import GHC.Types.Unique +import GHC.Types.Unique.Set +import GHC.Types.Unique.FM + +import Data.List (mapAccumL) +import Data.Maybe + + +-- | Pretty print a graph in a somewhat human readable format. +dumpGraph + :: (Outputable k, Outputable color) + => Graph k cls color -> SDoc + +dumpGraph graph + = text "Graph" + $$ pprUFM (graphMap graph) (vcat . map dumpNode) + +dumpNode + :: (Outputable k, Outputable color) + => Node k cls color -> SDoc + +dumpNode node + = text "Node " <> ppr (nodeId node) + $$ text "conflicts " + <> parens (int (sizeUniqSet $ nodeConflicts node)) + <> text " = " + <> ppr (nodeConflicts node) + + $$ text "exclusions " + <> parens (int (sizeUniqSet $ nodeExclusions node)) + <> text " = " + <> ppr (nodeExclusions node) + + $$ text "coalesce " + <> parens (int (sizeUniqSet $ nodeCoalesce node)) + <> text " = " + <> ppr (nodeCoalesce node) + + $$ space + + + +-- | Pretty print a graph in graphviz .dot format. +-- Conflicts get solid edges. +-- Coalescences get dashed edges. +dotGraph + :: ( Uniquable k + , Outputable k, Outputable cls, Outputable color) + => (color -> SDoc) -- ^ What graphviz color to use for each node color + -- It's usually safe to return X11 style colors here, + -- ie "red", "green" etc or a hex triplet #aaff55 etc + -> Triv k cls color + -> Graph k cls color -> SDoc + +dotGraph colorMap triv graph + = let nodes = nonDetEltsUFM $ graphMap graph + -- See Note [Unique Determinism and code generation] + in vcat + ( [ text "graph G {" ] + ++ map (dotNode colorMap triv) nodes + ++ (catMaybes $ snd $ mapAccumL dotNodeEdges emptyUniqSet nodes) + ++ [ text "}" + , space ]) + + +dotNode :: ( Outputable k, Outputable cls, Outputable color) + => (color -> SDoc) + -> Triv k cls color + -> Node k cls color -> SDoc + +dotNode colorMap triv node + = let name = ppr $ nodeId node + cls = ppr $ nodeClass node + + excludes + = hcat $ punctuate space + $ map (\n -> text "-" <> ppr n) + $ nonDetEltsUniqSet $ nodeExclusions node + -- See Note [Unique Determinism and code generation] + + preferences + = hcat $ punctuate space + $ map (\n -> text "+" <> ppr n) + $ nodePreference node + + expref = if and [isEmptyUniqSet (nodeExclusions node), null (nodePreference node)] + then empty + else text "\\n" <> (excludes <+> preferences) + + -- if the node has been colored then show that, + -- otherwise indicate whether it looks trivially colorable. + color + | Just c <- nodeColor node + = text "\\n(" <> ppr c <> text ")" + + | triv (nodeClass node) (nodeConflicts node) (nodeExclusions node) + = text "\\n(" <> text "triv" <> text ")" + + | otherwise + = text "\\n(" <> text "spill?" <> text ")" + + label = name <> text " :: " <> cls + <> expref + <> color + + pcolorC = case nodeColor node of + Nothing -> text "style=filled fillcolor=white" + Just c -> text "style=filled fillcolor=" <> doubleQuotes (colorMap c) + + + pout = text "node [label=" <> doubleQuotes label <> space <> pcolorC <> text "]" + <> space <> doubleQuotes name + <> text ";" + + in pout + + +-- | Nodes in the graph are doubly linked, but we only want one edge for each +-- conflict if the graphviz graph. Traverse over the graph, but make sure +-- to only print the edges for each node once. + +dotNodeEdges + :: ( Uniquable k + , Outputable k) + => UniqSet k + -> Node k cls color + -> (UniqSet k, Maybe SDoc) + +dotNodeEdges visited node + | elementOfUniqSet (nodeId node) visited + = ( visited + , Nothing) + + | otherwise + = let dconflicts + = map (dotEdgeConflict (nodeId node)) + $ nonDetEltsUniqSet + -- See Note [Unique Determinism and code generation] + $ minusUniqSet (nodeConflicts node) visited + + dcoalesces + = map (dotEdgeCoalesce (nodeId node)) + $ nonDetEltsUniqSet + -- See Note [Unique Determinism and code generation] + $ minusUniqSet (nodeCoalesce node) visited + + out = vcat dconflicts + $$ vcat dcoalesces + + in ( addOneToUniqSet visited (nodeId node) + , Just out) + + where dotEdgeConflict u1 u2 + = doubleQuotes (ppr u1) <> text " -- " <> doubleQuotes (ppr u2) + <> text ";" + + dotEdgeCoalesce u1 u2 + = doubleQuotes (ppr u1) <> text " -- " <> doubleQuotes (ppr u2) + <> space <> text "[ style = dashed ];" diff --git a/compiler/GHC/Data/Graph/UnVar.hs b/compiler/GHC/Data/Graph/UnVar.hs new file mode 100644 index 0000000000..4d1657ce62 --- /dev/null +++ b/compiler/GHC/Data/Graph/UnVar.hs @@ -0,0 +1,145 @@ +{- + +Copyright (c) 2014 Joachim Breitner + +A data structure for undirected graphs of variables +(or in plain terms: Sets of unordered pairs of numbers) + + +This is very specifically tailored for the use in CallArity. In particular it +stores the graph as a union of complete and complete bipartite graph, which +would be very expensive to store as sets of edges or as adjanceny lists. + +It does not normalize the graphs. This means that g `unionUnVarGraph` g is +equal to g, but twice as expensive and large. + +-} +module GHC.Data.Graph.UnVar + ( UnVarSet + , emptyUnVarSet, mkUnVarSet, varEnvDom, unionUnVarSet, unionUnVarSets + , delUnVarSet + , elemUnVarSet, isEmptyUnVarSet + , UnVarGraph + , emptyUnVarGraph + , unionUnVarGraph, unionUnVarGraphs + , completeGraph, completeBipartiteGraph + , neighbors + , hasLoopAt + , delNode + ) where + +import GHC.Prelude + +import GHC.Types.Id +import GHC.Types.Var.Env +import GHC.Types.Unique.FM +import GHC.Utils.Outputable +import GHC.Data.Bag +import GHC.Types.Unique + +import qualified Data.IntSet as S + +-- We need a type for sets of variables (UnVarSet). +-- We do not use VarSet, because for that we need to have the actual variable +-- at hand, and we do not have that when we turn the domain of a VarEnv into a UnVarSet. +-- Therefore, use a IntSet directly (which is likely also a bit more efficient). + +-- Set of uniques, i.e. for adjancet nodes +newtype UnVarSet = UnVarSet (S.IntSet) + deriving Eq + +k :: Var -> Int +k v = getKey (getUnique v) + +emptyUnVarSet :: UnVarSet +emptyUnVarSet = UnVarSet S.empty + +elemUnVarSet :: Var -> UnVarSet -> Bool +elemUnVarSet v (UnVarSet s) = k v `S.member` s + + +isEmptyUnVarSet :: UnVarSet -> Bool +isEmptyUnVarSet (UnVarSet s) = S.null s + +delUnVarSet :: UnVarSet -> Var -> UnVarSet +delUnVarSet (UnVarSet s) v = UnVarSet $ k v `S.delete` s + +mkUnVarSet :: [Var] -> UnVarSet +mkUnVarSet vs = UnVarSet $ S.fromList $ map k vs + +varEnvDom :: VarEnv a -> UnVarSet +varEnvDom ae = UnVarSet $ ufmToSet_Directly ae + +unionUnVarSet :: UnVarSet -> UnVarSet -> UnVarSet +unionUnVarSet (UnVarSet set1) (UnVarSet set2) = UnVarSet (set1 `S.union` set2) + +unionUnVarSets :: [UnVarSet] -> UnVarSet +unionUnVarSets = foldr unionUnVarSet emptyUnVarSet + +instance Outputable UnVarSet where + ppr (UnVarSet s) = braces $ + hcat $ punctuate comma [ ppr (getUnique i) | i <- S.toList s] + + +-- The graph type. A list of complete bipartite graphs +data Gen = CBPG UnVarSet UnVarSet -- complete bipartite + | CG UnVarSet -- complete +newtype UnVarGraph = UnVarGraph (Bag Gen) + +emptyUnVarGraph :: UnVarGraph +emptyUnVarGraph = UnVarGraph emptyBag + +unionUnVarGraph :: UnVarGraph -> UnVarGraph -> UnVarGraph +{- +Premature optimisation, it seems. +unionUnVarGraph (UnVarGraph [CBPG s1 s2]) (UnVarGraph [CG s3, CG s4]) + | s1 == s3 && s2 == s4 + = pprTrace "unionUnVarGraph fired" empty $ + completeGraph (s1 `unionUnVarSet` s2) +unionUnVarGraph (UnVarGraph [CBPG s1 s2]) (UnVarGraph [CG s3, CG s4]) + | s2 == s3 && s1 == s4 + = pprTrace "unionUnVarGraph fired2" empty $ + completeGraph (s1 `unionUnVarSet` s2) +-} +unionUnVarGraph (UnVarGraph g1) (UnVarGraph g2) + = -- pprTrace "unionUnVarGraph" (ppr (length g1, length g2)) $ + UnVarGraph (g1 `unionBags` g2) + +unionUnVarGraphs :: [UnVarGraph] -> UnVarGraph +unionUnVarGraphs = foldl' unionUnVarGraph emptyUnVarGraph + +-- completeBipartiteGraph A B = { {a,b} | a ∈ A, b ∈ B } +completeBipartiteGraph :: UnVarSet -> UnVarSet -> UnVarGraph +completeBipartiteGraph s1 s2 = prune $ UnVarGraph $ unitBag $ CBPG s1 s2 + +completeGraph :: UnVarSet -> UnVarGraph +completeGraph s = prune $ UnVarGraph $ unitBag $ CG s + +neighbors :: UnVarGraph -> Var -> UnVarSet +neighbors (UnVarGraph g) v = unionUnVarSets $ concatMap go $ bagToList g + where go (CG s) = (if v `elemUnVarSet` s then [s] else []) + go (CBPG s1 s2) = (if v `elemUnVarSet` s1 then [s2] else []) ++ + (if v `elemUnVarSet` s2 then [s1] else []) + +-- hasLoopAt G v <=> v--v ∈ G +hasLoopAt :: UnVarGraph -> Var -> Bool +hasLoopAt (UnVarGraph g) v = any go $ bagToList g + where go (CG s) = v `elemUnVarSet` s + go (CBPG s1 s2) = v `elemUnVarSet` s1 && v `elemUnVarSet` s2 + + +delNode :: UnVarGraph -> Var -> UnVarGraph +delNode (UnVarGraph g) v = prune $ UnVarGraph $ mapBag go g + where go (CG s) = CG (s `delUnVarSet` v) + go (CBPG s1 s2) = CBPG (s1 `delUnVarSet` v) (s2 `delUnVarSet` v) + +prune :: UnVarGraph -> UnVarGraph +prune (UnVarGraph g) = UnVarGraph $ filterBag go g + where go (CG s) = not (isEmptyUnVarSet s) + go (CBPG s1 s2) = not (isEmptyUnVarSet s1) && not (isEmptyUnVarSet s2) + +instance Outputable Gen where + ppr (CG s) = ppr s <> char '²' + ppr (CBPG s1 s2) = ppr s1 <+> char 'x' <+> ppr s2 +instance Outputable UnVarGraph where + ppr (UnVarGraph g) = ppr g diff --git a/compiler/GHC/Data/IOEnv.hs b/compiler/GHC/Data/IOEnv.hs new file mode 100644 index 0000000000..345482094e --- /dev/null +++ b/compiler/GHC/Data/IOEnv.hs @@ -0,0 +1,219 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveFunctor #-} +-- +-- (c) The University of Glasgow 2002-2006 +-- + +-- | The IO Monad with an environment +-- +-- The environment is passed around as a Reader monad but +-- as its in the IO monad, mutable references can be used +-- for updating state. +-- +module GHC.Data.IOEnv ( + IOEnv, -- Instance of Monad + + -- Monad utilities + module GHC.Utils.Monad, + + -- Errors + failM, failWithM, + IOEnvFailure(..), + + -- Getting at the environment + getEnv, setEnv, updEnv, + + runIOEnv, unsafeInterleaveM, uninterruptibleMaskM_, + tryM, tryAllM, tryMostM, fixM, + + -- I/O operations + IORef, newMutVar, readMutVar, writeMutVar, updMutVar, + atomicUpdMutVar, atomicUpdMutVar' + ) where + +import GHC.Prelude + +import GHC.Driver.Session +import GHC.Utils.Exception +import GHC.Types.Module +import GHC.Utils.Panic + +import Data.IORef ( IORef, newIORef, readIORef, writeIORef, modifyIORef, + atomicModifyIORef, atomicModifyIORef' ) +import System.IO.Unsafe ( unsafeInterleaveIO ) +import System.IO ( fixIO ) +import Control.Monad +import GHC.Utils.Monad +import Control.Applicative (Alternative(..)) + +---------------------------------------------------------------------- +-- Defining the monad type +---------------------------------------------------------------------- + + +newtype IOEnv env a = IOEnv (env -> IO a) deriving (Functor) + +unIOEnv :: IOEnv env a -> (env -> IO a) +unIOEnv (IOEnv m) = m + +instance Monad (IOEnv m) where + (>>=) = thenM + (>>) = (*>) + +instance MonadFail (IOEnv m) where + fail _ = failM -- Ignore the string + +instance Applicative (IOEnv m) where + pure = returnM + IOEnv f <*> IOEnv x = IOEnv (\ env -> f env <*> x env ) + (*>) = thenM_ + +returnM :: a -> IOEnv env a +returnM a = IOEnv (\ _ -> return a) + +thenM :: IOEnv env a -> (a -> IOEnv env b) -> IOEnv env b +thenM (IOEnv m) f = IOEnv (\ env -> do { r <- m env ; + unIOEnv (f r) env }) + +thenM_ :: IOEnv env a -> IOEnv env b -> IOEnv env b +thenM_ (IOEnv m) f = IOEnv (\ env -> do { _ <- m env ; unIOEnv f env }) + +failM :: IOEnv env a +failM = IOEnv (\ _ -> throwIO IOEnvFailure) + +failWithM :: String -> IOEnv env a +failWithM s = IOEnv (\ _ -> ioError (userError s)) + +data IOEnvFailure = IOEnvFailure + +instance Show IOEnvFailure where + show IOEnvFailure = "IOEnv failure" + +instance Exception IOEnvFailure + +instance ExceptionMonad (IOEnv a) where + gcatch act handle = + IOEnv $ \s -> unIOEnv act s `gcatch` \e -> unIOEnv (handle e) s + gmask f = + IOEnv $ \s -> gmask $ \io_restore -> + let + g_restore (IOEnv m) = IOEnv $ \s -> io_restore (m s) + in + unIOEnv (f g_restore) s + +instance ContainsDynFlags env => HasDynFlags (IOEnv env) where + getDynFlags = do env <- getEnv + return $! extractDynFlags env + +instance ContainsModule env => HasModule (IOEnv env) where + getModule = do env <- getEnv + return $ extractModule env + +---------------------------------------------------------------------- +-- Fundamental combinators specific to the monad +---------------------------------------------------------------------- + + +--------------------------- +runIOEnv :: env -> IOEnv env a -> IO a +runIOEnv env (IOEnv m) = m env + + +--------------------------- +{-# NOINLINE fixM #-} + -- Aargh! Not inlining fixM alleviates a space leak problem. + -- Normally fixM is used with a lazy tuple match: if the optimiser is + -- shown the definition of fixM, it occasionally transforms the code + -- in such a way that the code generator doesn't spot the selector + -- thunks. Sigh. + +fixM :: (a -> IOEnv env a) -> IOEnv env a +fixM f = IOEnv (\ env -> fixIO (\ r -> unIOEnv (f r) env)) + + +--------------------------- +tryM :: IOEnv env r -> IOEnv env (Either IOEnvFailure r) +-- Reflect UserError exceptions (only) into IOEnv monad +-- Other exceptions are not caught; they are simply propagated as exns +-- +-- The idea is that errors in the program being compiled will give rise +-- to UserErrors. But, say, pattern-match failures in GHC itself should +-- not be caught here, else they'll be reported as errors in the program +-- begin compiled! +tryM (IOEnv thing) = IOEnv (\ env -> tryIOEnvFailure (thing env)) + +tryIOEnvFailure :: IO a -> IO (Either IOEnvFailure a) +tryIOEnvFailure = try + +-- XXX We shouldn't be catching everything, e.g. timeouts +tryAllM :: IOEnv env r -> IOEnv env (Either SomeException r) +-- Catch *all* exceptions +-- This is used when running a Template-Haskell splice, when +-- even a pattern-match failure is a programmer error +tryAllM (IOEnv thing) = IOEnv (\ env -> try (thing env)) + +tryMostM :: IOEnv env r -> IOEnv env (Either SomeException r) +tryMostM (IOEnv thing) = IOEnv (\ env -> tryMost (thing env)) + +--------------------------- +unsafeInterleaveM :: IOEnv env a -> IOEnv env a +unsafeInterleaveM (IOEnv m) = IOEnv (\ env -> unsafeInterleaveIO (m env)) + +uninterruptibleMaskM_ :: IOEnv env a -> IOEnv env a +uninterruptibleMaskM_ (IOEnv m) = IOEnv (\ env -> uninterruptibleMask_ (m env)) + +---------------------------------------------------------------------- +-- Alternative/MonadPlus +---------------------------------------------------------------------- + +instance Alternative (IOEnv env) where + empty = IOEnv (const empty) + m <|> n = IOEnv (\env -> unIOEnv m env <|> unIOEnv n env) + +instance MonadPlus (IOEnv env) + +---------------------------------------------------------------------- +-- Accessing input/output +---------------------------------------------------------------------- + +instance MonadIO (IOEnv env) where + liftIO io = IOEnv (\ _ -> io) + +newMutVar :: a -> IOEnv env (IORef a) +newMutVar val = liftIO (newIORef val) + +writeMutVar :: IORef a -> a -> IOEnv env () +writeMutVar var val = liftIO (writeIORef var val) + +readMutVar :: IORef a -> IOEnv env a +readMutVar var = liftIO (readIORef var) + +updMutVar :: IORef a -> (a -> a) -> IOEnv env () +updMutVar var upd = liftIO (modifyIORef var upd) + +-- | Atomically update the reference. Does not force the evaluation of the +-- new variable contents. For strict update, use 'atomicUpdMutVar''. +atomicUpdMutVar :: IORef a -> (a -> (a, b)) -> IOEnv env b +atomicUpdMutVar var upd = liftIO (atomicModifyIORef var upd) + +-- | Strict variant of 'atomicUpdMutVar'. +atomicUpdMutVar' :: IORef a -> (a -> (a, b)) -> IOEnv env b +atomicUpdMutVar' var upd = liftIO (atomicModifyIORef' var upd) + +---------------------------------------------------------------------- +-- Accessing the environment +---------------------------------------------------------------------- + +getEnv :: IOEnv env env +{-# INLINE getEnv #-} +getEnv = IOEnv (\ env -> return env) + +-- | Perform a computation with a different environment +setEnv :: env' -> IOEnv env' a -> IOEnv env a +{-# INLINE setEnv #-} +setEnv new_env (IOEnv m) = IOEnv (\ _ -> m new_env) + +-- | Perform a computation with an altered environment +updEnv :: (env -> env') -> IOEnv env' a -> IOEnv env a +{-# INLINE updEnv #-} +updEnv upd (IOEnv m) = IOEnv (\ env -> m (upd env)) diff --git a/compiler/GHC/Data/List/SetOps.hs b/compiler/GHC/Data/List/SetOps.hs new file mode 100644 index 0000000000..2d916e9dd5 --- /dev/null +++ b/compiler/GHC/Data/List/SetOps.hs @@ -0,0 +1,182 @@ +{- +(c) The University of Glasgow 2006 +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 + +-} + +{-# LANGUAGE CPP #-} + +-- | Set-like operations on lists +-- +-- Avoid using them as much as possible +module GHC.Data.List.SetOps ( + unionLists, minusList, deleteBys, + + -- Association lists + Assoc, assoc, assocMaybe, assocUsing, assocDefault, assocDefaultUsing, + + -- Duplicate handling + hasNoDups, removeDups, findDupsEq, + equivClasses, + + -- Indexing + getNth + ) where + +#include "HsVersions.h" + +import GHC.Prelude + +import GHC.Utils.Outputable +import GHC.Utils.Misc + +import qualified Data.List as L +import qualified Data.List.NonEmpty as NE +import Data.List.NonEmpty (NonEmpty(..)) +import qualified Data.Set as S + +getNth :: Outputable a => [a] -> Int -> a +getNth xs n = ASSERT2( xs `lengthExceeds` n, ppr n $$ ppr xs ) + xs !! n + +deleteBys :: (a -> a -> Bool) -> [a] -> [a] -> [a] +-- (deleteBys eq xs ys) returns xs-ys, using the given equality function +-- Just like 'Data.List.delete' but with an equality function +deleteBys eq xs ys = foldl' (flip (L.deleteBy eq)) xs ys + +{- +************************************************************************ +* * + Treating lists as sets + Assumes the lists contain no duplicates, but are unordered +* * +************************************************************************ +-} + + +-- | Assumes that the arguments contain no duplicates +unionLists :: (HasDebugCallStack, Outputable a, Eq a) => [a] -> [a] -> [a] +-- We special case some reasonable common patterns. +unionLists xs [] = xs +unionLists [] ys = ys +unionLists [x] ys + | isIn "unionLists" x ys = ys + | otherwise = x:ys +unionLists xs [y] + | isIn "unionLists" y xs = xs + | otherwise = y:xs +unionLists xs ys + = WARN(lengthExceeds xs 100 || lengthExceeds ys 100, ppr xs $$ ppr ys) + [x | x <- xs, isn'tIn "unionLists" x ys] ++ ys + +-- | Calculate the set difference of two lists. This is +-- /O((m + n) log n)/, where we subtract a list of /n/ elements +-- from a list of /m/ elements. +-- +-- Extremely short cases are handled specially: +-- When /m/ or /n/ is 0, this takes /O(1)/ time. When /m/ is 1, +-- it takes /O(n)/ time. +minusList :: Ord a => [a] -> [a] -> [a] +-- There's no point building a set to perform just one lookup, so we handle +-- extremely short lists specially. It might actually be better to use +-- an O(m*n) algorithm when m is a little longer (perhaps up to 4 or even 5). +-- The tipping point will be somewhere in the area of where /m/ and /log n/ +-- become comparable, but we probably don't want to work too hard on this. +minusList [] _ = [] +minusList xs@[x] ys + | x `elem` ys = [] + | otherwise = xs +-- Using an empty set or a singleton would also be silly, so let's not. +minusList xs [] = xs +minusList xs [y] = filter (/= y) xs +-- When each list has at least two elements, we build a set from the +-- second argument, allowing us to filter the first argument fairly +-- efficiently. +minusList xs ys = filter (`S.notMember` yss) xs + where + yss = S.fromList ys + +{- +************************************************************************ +* * +\subsection[Utils-assoc]{Association lists} +* * +************************************************************************ + +Inefficient finite maps based on association lists and equality. +-} + +-- A finite mapping based on equality and association lists +type Assoc a b = [(a,b)] + +assoc :: (Eq a) => String -> Assoc a b -> a -> b +assocDefault :: (Eq a) => b -> Assoc a b -> a -> b +assocUsing :: (a -> a -> Bool) -> String -> Assoc a b -> a -> b +assocMaybe :: (Eq a) => Assoc a b -> a -> Maybe b +assocDefaultUsing :: (a -> a -> Bool) -> b -> Assoc a b -> a -> b + +assocDefaultUsing _ deflt [] _ = deflt +assocDefaultUsing eq deflt ((k,v) : rest) key + | k `eq` key = v + | otherwise = assocDefaultUsing eq deflt rest key + +assoc crash_msg list key = assocDefaultUsing (==) (panic ("Failed in assoc: " ++ crash_msg)) list key +assocDefault deflt list key = assocDefaultUsing (==) deflt list key +assocUsing eq crash_msg list key = assocDefaultUsing eq (panic ("Failed in assoc: " ++ crash_msg)) list key + +assocMaybe alist key + = lookup alist + where + lookup [] = Nothing + lookup ((tv,ty):rest) = if key == tv then Just ty else lookup rest + +{- +************************************************************************ +* * +\subsection[Utils-dups]{Duplicate-handling} +* * +************************************************************************ +-} + +hasNoDups :: (Eq a) => [a] -> Bool + +hasNoDups xs = f [] xs + where + f _ [] = True + f seen_so_far (x:xs) = if x `is_elem` seen_so_far + then False + else f (x:seen_so_far) xs + + is_elem = isIn "hasNoDups" + +equivClasses :: (a -> a -> Ordering) -- Comparison + -> [a] + -> [NonEmpty a] + +equivClasses _ [] = [] +equivClasses _ [stuff] = [stuff :| []] +equivClasses cmp items = NE.groupBy eq (L.sortBy cmp items) + where + eq a b = case cmp a b of { EQ -> True; _ -> False } + +removeDups :: (a -> a -> Ordering) -- Comparison function + -> [a] + -> ([a], -- List with no duplicates + [NonEmpty a]) -- List of duplicate groups. One representative + -- from each group appears in the first result + +removeDups _ [] = ([], []) +removeDups _ [x] = ([x],[]) +removeDups cmp xs + = case L.mapAccumR collect_dups [] (equivClasses cmp xs) of { (dups, xs') -> + (xs', dups) } + where + collect_dups :: [NonEmpty a] -> NonEmpty a -> ([NonEmpty a], a) + collect_dups dups_so_far (x :| []) = (dups_so_far, x) + collect_dups dups_so_far dups@(x :| _) = (dups:dups_so_far, x) + +findDupsEq :: (a->a->Bool) -> [a] -> [NonEmpty a] +findDupsEq _ [] = [] +findDupsEq eq (x:xs) | L.null eq_xs = findDupsEq eq xs + | otherwise = (x :| eq_xs) : findDupsEq eq neq_xs + where (eq_xs, neq_xs) = L.partition (eq x) xs diff --git a/compiler/GHC/Data/Maybe.hs b/compiler/GHC/Data/Maybe.hs new file mode 100644 index 0000000000..230468a20e --- /dev/null +++ b/compiler/GHC/Data/Maybe.hs @@ -0,0 +1,114 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE FlexibleContexts #-} + +{- +(c) The University of Glasgow 2006 +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +-} + +module GHC.Data.Maybe ( + module Data.Maybe, + + MaybeErr(..), -- Instance of Monad + failME, isSuccess, + + orElse, + firstJust, firstJusts, + whenIsJust, + expectJust, + rightToMaybe, + + -- * MaybeT + MaybeT(..), liftMaybeT, tryMaybeT + ) where + +import GHC.Prelude + +import Control.Monad +import Control.Monad.Trans.Maybe +import Control.Exception (catch, SomeException(..)) +import Data.Maybe +import GHC.Utils.Misc (HasCallStack) + +infixr 4 `orElse` + +{- +************************************************************************ +* * +\subsection[Maybe type]{The @Maybe@ type} +* * +************************************************************************ +-} + +firstJust :: Maybe a -> Maybe a -> Maybe a +firstJust a b = firstJusts [a, b] + +-- | Takes a list of @Maybes@ and returns the first @Just@ if there is one, or +-- @Nothing@ otherwise. +firstJusts :: [Maybe a] -> Maybe a +firstJusts = msum + +expectJust :: HasCallStack => String -> Maybe a -> a +{-# INLINE expectJust #-} +expectJust _ (Just x) = x +expectJust err Nothing = error ("expectJust " ++ err) + +whenIsJust :: Monad m => Maybe a -> (a -> m ()) -> m () +whenIsJust (Just x) f = f x +whenIsJust Nothing _ = return () + +-- | Flipped version of @fromMaybe@, useful for chaining. +orElse :: Maybe a -> a -> a +orElse = flip fromMaybe + +rightToMaybe :: Either a b -> Maybe b +rightToMaybe (Left _) = Nothing +rightToMaybe (Right x) = Just x + +{- +************************************************************************ +* * +\subsection[MaybeT type]{The @MaybeT@ monad transformer} +* * +************************************************************************ +-} + +-- We had our own MaybeT in the past. Now we reuse transformer's MaybeT + +liftMaybeT :: Monad m => m a -> MaybeT m a +liftMaybeT act = MaybeT $ Just `liftM` act + +-- | Try performing an 'IO' action, failing on error. +tryMaybeT :: IO a -> MaybeT IO a +tryMaybeT action = MaybeT $ catch (Just `fmap` action) handler + where + handler (SomeException _) = return Nothing + +{- +************************************************************************ +* * +\subsection[MaybeErr type]{The @MaybeErr@ type} +* * +************************************************************************ +-} + +data MaybeErr err val = Succeeded val | Failed err + deriving (Functor) + +instance Applicative (MaybeErr err) where + pure = Succeeded + (<*>) = ap + +instance Monad (MaybeErr err) where + Succeeded v >>= k = k v + Failed e >>= _ = Failed e + +isSuccess :: MaybeErr err val -> Bool +isSuccess (Succeeded {}) = True +isSuccess (Failed {}) = False + +failME :: err -> MaybeErr err val +failME e = Failed e diff --git a/compiler/GHC/Data/OrdList.hs b/compiler/GHC/Data/OrdList.hs new file mode 100644 index 0000000000..5476055f05 --- /dev/null +++ b/compiler/GHC/Data/OrdList.hs @@ -0,0 +1,192 @@ +{- +(c) The University of Glasgow 2006 +(c) The AQUA Project, Glasgow University, 1993-1998 + + +-} +{-# LANGUAGE DeriveFunctor #-} + +{-# LANGUAGE BangPatterns #-} + +-- | Provide trees (of instructions), so that lists of instructions can be +-- appended in linear time. +module GHC.Data.OrdList ( + OrdList, + nilOL, isNilOL, unitOL, appOL, consOL, snocOL, concatOL, lastOL, + headOL, + mapOL, fromOL, toOL, foldrOL, foldlOL, reverseOL, fromOLReverse, + strictlyEqOL, strictlyOrdOL +) where + +import GHC.Prelude +import Data.Foldable + +import GHC.Utils.Outputable + +import qualified Data.Semigroup as Semigroup + +infixl 5 `appOL` +infixl 5 `snocOL` +infixr 5 `consOL` + +data OrdList a + = None + | One a + | Many [a] -- Invariant: non-empty + | Cons a (OrdList a) + | Snoc (OrdList a) a + | Two (OrdList a) -- Invariant: non-empty + (OrdList a) -- Invariant: non-empty + deriving (Functor) + +instance Outputable a => Outputable (OrdList a) where + ppr ol = ppr (fromOL ol) -- Convert to list and print that + +instance Semigroup (OrdList a) where + (<>) = appOL + +instance Monoid (OrdList a) where + mempty = nilOL + mappend = (Semigroup.<>) + mconcat = concatOL + +instance Foldable OrdList where + foldr = foldrOL + foldl' = foldlOL + toList = fromOL + null = isNilOL + length = lengthOL + +instance Traversable OrdList where + traverse f xs = toOL <$> traverse f (fromOL xs) + +nilOL :: OrdList a +isNilOL :: OrdList a -> Bool + +unitOL :: a -> OrdList a +snocOL :: OrdList a -> a -> OrdList a +consOL :: a -> OrdList a -> OrdList a +appOL :: OrdList a -> OrdList a -> OrdList a +concatOL :: [OrdList a] -> OrdList a +headOL :: OrdList a -> a +lastOL :: OrdList a -> a +lengthOL :: OrdList a -> Int + +nilOL = None +unitOL as = One as +snocOL as b = Snoc as b +consOL a bs = Cons a bs +concatOL aas = foldr appOL None aas + +headOL None = panic "headOL" +headOL (One a) = a +headOL (Many as) = head as +headOL (Cons a _) = a +headOL (Snoc as _) = headOL as +headOL (Two as _) = headOL as + +lastOL None = panic "lastOL" +lastOL (One a) = a +lastOL (Many as) = last as +lastOL (Cons _ as) = lastOL as +lastOL (Snoc _ a) = a +lastOL (Two _ as) = lastOL as + +lengthOL None = 0 +lengthOL (One _) = 1 +lengthOL (Many as) = length as +lengthOL (Cons _ as) = 1 + length as +lengthOL (Snoc as _) = 1 + length as +lengthOL (Two as bs) = length as + length bs + +isNilOL None = True +isNilOL _ = False + +None `appOL` b = b +a `appOL` None = a +One a `appOL` b = Cons a b +a `appOL` One b = Snoc a b +a `appOL` b = Two a b + +fromOL :: OrdList a -> [a] +fromOL a = go a [] + where go None acc = acc + go (One a) acc = a : acc + go (Cons a b) acc = a : go b acc + go (Snoc a b) acc = go a (b:acc) + go (Two a b) acc = go a (go b acc) + go (Many xs) acc = xs ++ acc + +fromOLReverse :: OrdList a -> [a] +fromOLReverse a = go a [] + -- acc is already in reverse order + where go :: OrdList a -> [a] -> [a] + go None acc = acc + go (One a) acc = a : acc + go (Cons a b) acc = go b (a : acc) + go (Snoc a b) acc = b : go a acc + go (Two a b) acc = go b (go a acc) + go (Many xs) acc = reverse xs ++ acc + +mapOL :: (a -> b) -> OrdList a -> OrdList b +mapOL = fmap + +foldrOL :: (a->b->b) -> b -> OrdList a -> b +foldrOL _ z None = z +foldrOL k z (One x) = k x z +foldrOL k z (Cons x xs) = k x (foldrOL k z xs) +foldrOL k z (Snoc xs x) = foldrOL k (k x z) xs +foldrOL k z (Two b1 b2) = foldrOL k (foldrOL k z b2) b1 +foldrOL k z (Many xs) = foldr k z xs + +-- | Strict left fold. +foldlOL :: (b->a->b) -> b -> OrdList a -> b +foldlOL _ z None = z +foldlOL k z (One x) = k z x +foldlOL k z (Cons x xs) = let !z' = (k z x) in foldlOL k z' xs +foldlOL k z (Snoc xs x) = let !z' = (foldlOL k z xs) in k z' x +foldlOL k z (Two b1 b2) = let !z' = (foldlOL k z b1) in foldlOL k z' b2 +foldlOL k z (Many xs) = foldl' k z xs + +toOL :: [a] -> OrdList a +toOL [] = None +toOL [x] = One x +toOL xs = Many xs + +reverseOL :: OrdList a -> OrdList a +reverseOL None = None +reverseOL (One x) = One x +reverseOL (Cons a b) = Snoc (reverseOL b) a +reverseOL (Snoc a b) = Cons b (reverseOL a) +reverseOL (Two a b) = Two (reverseOL b) (reverseOL a) +reverseOL (Many xs) = Many (reverse xs) + +-- | Compare not only the values but also the structure of two lists +strictlyEqOL :: Eq a => OrdList a -> OrdList a -> Bool +strictlyEqOL None None = True +strictlyEqOL (One x) (One y) = x == y +strictlyEqOL (Cons a as) (Cons b bs) = a == b && as `strictlyEqOL` bs +strictlyEqOL (Snoc as a) (Snoc bs b) = a == b && as `strictlyEqOL` bs +strictlyEqOL (Two a1 a2) (Two b1 b2) = a1 `strictlyEqOL` b1 && a2 `strictlyEqOL` b2 +strictlyEqOL (Many as) (Many bs) = as == bs +strictlyEqOL _ _ = False + +-- | Compare not only the values but also the structure of two lists +strictlyOrdOL :: Ord a => OrdList a -> OrdList a -> Ordering +strictlyOrdOL None None = EQ +strictlyOrdOL None _ = LT +strictlyOrdOL (One x) (One y) = compare x y +strictlyOrdOL (One _) _ = LT +strictlyOrdOL (Cons a as) (Cons b bs) = + compare a b `mappend` strictlyOrdOL as bs +strictlyOrdOL (Cons _ _) _ = LT +strictlyOrdOL (Snoc as a) (Snoc bs b) = + compare a b `mappend` strictlyOrdOL as bs +strictlyOrdOL (Snoc _ _) _ = LT +strictlyOrdOL (Two a1 a2) (Two b1 b2) = + (strictlyOrdOL a1 b1) `mappend` (strictlyOrdOL a2 b2) +strictlyOrdOL (Two _ _) _ = LT +strictlyOrdOL (Many as) (Many bs) = compare as bs +strictlyOrdOL (Many _ ) _ = GT + + diff --git a/compiler/GHC/Data/Pair.hs b/compiler/GHC/Data/Pair.hs new file mode 100644 index 0000000000..ae51c78edc --- /dev/null +++ b/compiler/GHC/Data/Pair.hs @@ -0,0 +1,68 @@ +{- +A simple homogeneous pair type with useful Functor, Applicative, and +Traversable instances. +-} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveFunctor #-} + +module GHC.Data.Pair + ( Pair(..) + , unPair + , toPair + , swap + , pLiftFst + , pLiftSnd + ) +where + +#include "HsVersions.h" + +import GHC.Prelude + +import GHC.Utils.Outputable +import qualified Data.Semigroup as Semi + +data Pair a = Pair { pFst :: a, pSnd :: a } + deriving (Functor) +-- Note that Pair is a *unary* type constructor +-- whereas (,) is binary + +-- The important thing about Pair is that it has a *homogeneous* +-- Functor instance, so you can easily apply the same function +-- to both components + +instance Applicative Pair where + pure x = Pair x x + (Pair f g) <*> (Pair x y) = Pair (f x) (g y) + +instance Foldable Pair where + foldMap f (Pair x y) = f x `mappend` f y + +instance Traversable Pair where + traverse f (Pair x y) = Pair <$> f x <*> f y + +instance Semi.Semigroup a => Semi.Semigroup (Pair a) where + Pair a1 b1 <> Pair a2 b2 = Pair (a1 Semi.<> a2) (b1 Semi.<> b2) + +instance (Semi.Semigroup a, Monoid a) => Monoid (Pair a) where + mempty = Pair mempty mempty + mappend = (Semi.<>) + +instance Outputable a => Outputable (Pair a) where + ppr (Pair a b) = ppr a <+> char '~' <+> ppr b + +unPair :: Pair a -> (a,a) +unPair (Pair x y) = (x,y) + +toPair :: (a,a) -> Pair a +toPair (x,y) = Pair x y + +swap :: Pair a -> Pair a +swap (Pair x y) = Pair y x + +pLiftFst :: (a -> a) -> Pair a -> Pair a +pLiftFst f (Pair a b) = Pair (f a) b + +pLiftSnd :: (a -> a) -> Pair a -> Pair a +pLiftSnd f (Pair a b) = Pair a (f b) diff --git a/compiler/GHC/Data/Stream.hs b/compiler/GHC/Data/Stream.hs new file mode 100644 index 0000000000..7996ee7343 --- /dev/null +++ b/compiler/GHC/Data/Stream.hs @@ -0,0 +1,135 @@ +-- ----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow 2012 +-- +-- ----------------------------------------------------------------------------- + +-- | Monadic streams +module GHC.Data.Stream ( + Stream(..), yield, liftIO, + collect, collect_, consume, fromList, + map, mapM, mapAccumL, mapAccumL_ + ) where + +import GHC.Prelude hiding (map,mapM) + +import Control.Monad hiding (mapM) + +-- | +-- @Stream m a b@ is a computation in some Monad @m@ that delivers a sequence +-- of elements of type @a@ followed by a result of type @b@. +-- +-- More concretely, a value of type @Stream m a b@ can be run using @runStream@ +-- in the Monad @m@, and it delivers either +-- +-- * the final result: @Left b@, or +-- * @Right (a,str)@, where @a@ is the next element in the stream, and @str@ +-- is a computation to get the rest of the stream. +-- +-- Stream is itself a Monad, and provides an operation 'yield' that +-- produces a new element of the stream. This makes it convenient to turn +-- existing monadic computations into streams. +-- +-- The idea is that Stream is useful for making a monadic computation +-- that produces values from time to time. This can be used for +-- knitting together two complex monadic operations, so that the +-- producer does not have to produce all its values before the +-- consumer starts consuming them. We make the producer into a +-- Stream, and the consumer pulls on the stream each time it wants a +-- new value. +-- +newtype Stream m a b = Stream { runStream :: m (Either b (a, Stream m a b)) } + +instance Monad f => Functor (Stream f a) where + fmap = liftM + +instance Monad m => Applicative (Stream m a) where + pure a = Stream (return (Left a)) + (<*>) = ap + +instance Monad m => Monad (Stream m a) where + + Stream m >>= k = Stream $ do + r <- m + case r of + Left b -> runStream (k b) + Right (a,str) -> return (Right (a, str >>= k)) + +yield :: Monad m => a -> Stream m a () +yield a = Stream (return (Right (a, return ()))) + +liftIO :: IO a -> Stream IO b a +liftIO io = Stream $ io >>= return . Left + +-- | Turn a Stream into an ordinary list, by demanding all the elements. +collect :: Monad m => Stream m a () -> m [a] +collect str = go str [] + where + go str acc = do + r <- runStream str + case r of + Left () -> return (reverse acc) + Right (a, str') -> go str' (a:acc) + +-- | Turn a Stream into an ordinary list, by demanding all the elements. +collect_ :: Monad m => Stream m a r -> m ([a], r) +collect_ str = go str [] + where + go str acc = do + r <- runStream str + case r of + Left r -> return (reverse acc, r) + Right (a, str') -> go str' (a:acc) + +consume :: Monad m => Stream m a b -> (a -> m ()) -> m b +consume str f = do + r <- runStream str + case r of + Left ret -> return ret + Right (a, str') -> do + f a + consume str' f + +-- | Turn a list into a 'Stream', by yielding each element in turn. +fromList :: Monad m => [a] -> Stream m a () +fromList = mapM_ yield + +-- | Apply a function to each element of a 'Stream', lazily +map :: Monad m => (a -> b) -> Stream m a x -> Stream m b x +map f str = Stream $ do + r <- runStream str + case r of + Left x -> return (Left x) + Right (a, str') -> return (Right (f a, map f str')) + +-- | Apply a monadic operation to each element of a 'Stream', lazily +mapM :: Monad m => (a -> m b) -> Stream m a x -> Stream m b x +mapM f str = Stream $ do + r <- runStream str + case r of + Left x -> return (Left x) + Right (a, str') -> do + b <- f a + return (Right (b, mapM f str')) + +-- | analog of the list-based 'mapAccumL' on Streams. This is a simple +-- way to map over a Stream while carrying some state around. +mapAccumL :: Monad m => (c -> a -> m (c,b)) -> c -> Stream m a () + -> Stream m b c +mapAccumL f c str = Stream $ do + r <- runStream str + case r of + Left () -> return (Left c) + Right (a, str') -> do + (c',b) <- f c a + return (Right (b, mapAccumL f c' str')) + +mapAccumL_ :: Monad m => (c -> a -> m (c,b)) -> c -> Stream m a r + -> Stream m b (c, r) +mapAccumL_ f c str = Stream $ do + r <- runStream str + case r of + Left r -> return (Left (c, r)) + Right (a, str') -> do + (c',b) <- f c a + return (Right (b, mapAccumL_ f c' str')) diff --git a/compiler/GHC/Data/StringBuffer.hs b/compiler/GHC/Data/StringBuffer.hs new file mode 100644 index 0000000000..8ac5d1ae07 --- /dev/null +++ b/compiler/GHC/Data/StringBuffer.hs @@ -0,0 +1,334 @@ +{- +(c) The University of Glasgow 2006 +(c) The University of Glasgow, 1997-2006 + + +Buffers for scanning string input stored in external arrays. +-} + +{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples #-} +{-# OPTIONS_GHC -O2 #-} +-- We always optimise this, otherwise performance of a non-optimised +-- compiler is severely affected + +module GHC.Data.StringBuffer + ( + StringBuffer(..), + -- non-abstract for vs\/HaskellService + + -- * Creation\/destruction + hGetStringBuffer, + hGetStringBufferBlock, + hPutStringBuffer, + appendStringBuffers, + stringToStringBuffer, + + -- * Inspection + nextChar, + currentChar, + prevChar, + atEnd, + + -- * Moving and comparison + stepOn, + offsetBytes, + byteDiff, + atLine, + + -- * Conversion + lexemeToString, + lexemeToFastString, + decodePrevNChars, + + -- * Parsing integers + parseUnsignedInteger, + ) where + +#include "HsVersions.h" + +import GHC.Prelude + +import GHC.Utils.Encoding +import GHC.Data.FastString +import GHC.Utils.IO.Unsafe +import GHC.Utils.Panic.Plain +import GHC.Utils.Misc + +import Data.Maybe +import Control.Exception +import System.IO +import System.IO.Unsafe ( unsafePerformIO ) +import GHC.IO.Encoding.UTF8 ( mkUTF8 ) +import GHC.IO.Encoding.Failure ( CodingFailureMode(IgnoreCodingFailure) ) + +import GHC.Exts + +import Foreign + +-- ----------------------------------------------------------------------------- +-- The StringBuffer type + +-- |A StringBuffer is an internal pointer to a sized chunk of bytes. +-- The bytes are intended to be *immutable*. There are pure +-- operations to read the contents of a StringBuffer. +-- +-- A StringBuffer may have a finalizer, depending on how it was +-- obtained. +-- +data StringBuffer + = StringBuffer { + buf :: {-# UNPACK #-} !(ForeignPtr Word8), + len :: {-# UNPACK #-} !Int, -- length + cur :: {-# UNPACK #-} !Int -- current pos + } + -- The buffer is assumed to be UTF-8 encoded, and furthermore + -- we add three @\'\\0\'@ bytes to the end as sentinels so that the + -- decoder doesn't have to check for overflow at every single byte + -- of a multibyte sequence. + +instance Show StringBuffer where + showsPrec _ s = showString "<stringbuffer(" + . shows (len s) . showString "," . shows (cur s) + . showString ")>" + +-- ----------------------------------------------------------------------------- +-- Creation / Destruction + +-- | Read a file into a 'StringBuffer'. The resulting buffer is automatically +-- managed by the garbage collector. +hGetStringBuffer :: FilePath -> IO StringBuffer +hGetStringBuffer fname = do + h <- openBinaryFile fname ReadMode + size_i <- hFileSize h + offset_i <- skipBOM h size_i 0 -- offset is 0 initially + let size = fromIntegral $ size_i - offset_i + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + r <- if size == 0 then return 0 else hGetBuf h ptr size + hClose h + if (r /= size) + then ioError (userError "short read of file") + else newUTF8StringBuffer buf ptr size + +hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer +hGetStringBufferBlock handle wanted + = do size_i <- hFileSize handle + offset_i <- hTell handle >>= skipBOM handle size_i + let size = min wanted (fromIntegral $ size_i-offset_i) + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> + do r <- if size == 0 then return 0 else hGetBuf handle ptr size + if r /= size + then ioError (userError $ "short read of file: "++show(r,size,size_i,handle)) + else newUTF8StringBuffer buf ptr size + +hPutStringBuffer :: Handle -> StringBuffer -> IO () +hPutStringBuffer hdl (StringBuffer buf len cur) + = do withForeignPtr (plusForeignPtr buf cur) $ \ptr -> + hPutBuf hdl ptr len + +-- | Skip the byte-order mark if there is one (see #1744 and #6016), +-- and return the new position of the handle in bytes. +-- +-- This is better than treating #FEFF as whitespace, +-- because that would mess up layout. We don't have a concept +-- of zero-width whitespace in Haskell: all whitespace codepoints +-- have a width of one column. +skipBOM :: Handle -> Integer -> Integer -> IO Integer +skipBOM h size offset = + -- Only skip BOM at the beginning of a file. + if size > 0 && offset == 0 + then do + -- Validate assumption that handle is in binary mode. + ASSERTM( hGetEncoding h >>= return . isNothing ) + -- Temporarily select utf8 encoding with error ignoring, + -- to make `hLookAhead` and `hGetChar` return full Unicode characters. + bracket_ (hSetEncoding h safeEncoding) (hSetBinaryMode h True) $ do + c <- hLookAhead h + if c == '\xfeff' + then hGetChar h >> hTell h + else return offset + else return offset + where + safeEncoding = mkUTF8 IgnoreCodingFailure + +newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer +newUTF8StringBuffer buf ptr size = do + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return $ StringBuffer buf size 0 + +appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer +appendStringBuffers sb1 sb2 + = do newBuf <- mallocForeignPtrArray (size+3) + withForeignPtr newBuf $ \ptr -> + withForeignPtr (buf sb1) $ \sb1Ptr -> + withForeignPtr (buf sb2) $ \sb2Ptr -> + do copyArray ptr (sb1Ptr `advancePtr` cur sb1) sb1_len + copyArray (ptr `advancePtr` sb1_len) (sb2Ptr `advancePtr` cur sb2) sb2_len + pokeArray (ptr `advancePtr` size) [0,0,0] + return (StringBuffer newBuf size 0) + where sb1_len = calcLen sb1 + sb2_len = calcLen sb2 + calcLen sb = len sb - cur sb + size = sb1_len + sb2_len + +-- | Encode a 'String' into a 'StringBuffer' as UTF-8. The resulting buffer +-- is automatically managed by the garbage collector. +stringToStringBuffer :: String -> StringBuffer +stringToStringBuffer str = + unsafePerformIO $ do + let size = utf8EncodedLength str + buf <- mallocForeignPtrArray (size+3) + withForeignPtr buf $ \ptr -> do + utf8EncodeString ptr str + pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] + -- sentinels for UTF-8 decoding + return (StringBuffer buf size 0) + +-- ----------------------------------------------------------------------------- +-- Grab a character + +-- | Return the first UTF-8 character of a nonempty 'StringBuffer' and as well +-- the remaining portion (analogous to 'Data.List.uncons'). __Warning:__ The +-- behavior is undefined if the 'StringBuffer' is empty. The result shares +-- the same buffer as the original. Similar to 'utf8DecodeChar', if the +-- character cannot be decoded as UTF-8, @\'\\0\'@ is returned. +{-# INLINE nextChar #-} +nextChar :: StringBuffer -> (Char,StringBuffer) +nextChar (StringBuffer buf len (I# cur#)) = + -- Getting our fingers dirty a little here, but this is performance-critical + inlinePerformIO $ do + withForeignPtr buf $ \(Ptr a#) -> do + case utf8DecodeChar# (a# `plusAddr#` cur#) of + (# c#, nBytes# #) -> + let cur' = I# (cur# +# nBytes#) in + return (C# c#, StringBuffer buf len cur') + +-- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous +-- to 'Data.List.head'). __Warning:__ The behavior is undefined if the +-- 'StringBuffer' is empty. Similar to 'utf8DecodeChar', if the character +-- cannot be decoded as UTF-8, @\'\\0\'@ is returned. +currentChar :: StringBuffer -> Char +currentChar = fst . nextChar + +prevChar :: StringBuffer -> Char -> Char +prevChar (StringBuffer _ _ 0) deflt = deflt +prevChar (StringBuffer buf _ cur) _ = + inlinePerformIO $ do + withForeignPtr buf $ \p -> do + p' <- utf8PrevChar (p `plusPtr` cur) + return (fst (utf8DecodeChar p')) + +-- ----------------------------------------------------------------------------- +-- Moving + +-- | Return a 'StringBuffer' with the first UTF-8 character removed (analogous +-- to 'Data.List.tail'). __Warning:__ The behavior is undefined if the +-- 'StringBuffer' is empty. The result shares the same buffer as the +-- original. +stepOn :: StringBuffer -> StringBuffer +stepOn s = snd (nextChar s) + +-- | Return a 'StringBuffer' with the first @n@ bytes removed. __Warning:__ +-- If there aren't enough characters, the returned 'StringBuffer' will be +-- invalid and any use of it may lead to undefined behavior. The result +-- shares the same buffer as the original. +offsetBytes :: Int -- ^ @n@, the number of bytes + -> StringBuffer + -> StringBuffer +offsetBytes i s = s { cur = cur s + i } + +-- | Compute the difference in offset between two 'StringBuffer's that share +-- the same buffer. __Warning:__ The behavior is undefined if the +-- 'StringBuffer's use separate buffers. +byteDiff :: StringBuffer -> StringBuffer -> Int +byteDiff s1 s2 = cur s2 - cur s1 + +-- | Check whether a 'StringBuffer' is empty (analogous to 'Data.List.null'). +atEnd :: StringBuffer -> Bool +atEnd (StringBuffer _ l c) = l == c + +-- | Computes a 'StringBuffer' which points to the first character of the +-- wanted line. Lines begin at 1. +atLine :: Int -> StringBuffer -> Maybe StringBuffer +atLine line sb@(StringBuffer buf len _) = + inlinePerformIO $ + withForeignPtr buf $ \p -> do + p' <- skipToLine line len p + if p' == nullPtr + then return Nothing + else + let + delta = p' `minusPtr` p + in return $ Just (sb { cur = delta + , len = len - delta + }) + +skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8) +skipToLine !line !len !op0 = go 1 op0 + where + !opend = op0 `plusPtr` len + + go !i_line !op + | op >= opend = pure nullPtr + | i_line == line = pure op + | otherwise = do + w <- peek op :: IO Word8 + case w of + 10 -> go (i_line + 1) (plusPtr op 1) + 13 -> do + -- this is safe because a 'StringBuffer' is + -- guaranteed to have 3 bytes sentinel values. + w' <- peek (plusPtr op 1) :: IO Word8 + case w' of + 10 -> go (i_line + 1) (plusPtr op 2) + _ -> go (i_line + 1) (plusPtr op 1) + _ -> go i_line (plusPtr op 1) + +-- ----------------------------------------------------------------------------- +-- Conversion + +-- | Decode the first @n@ bytes of a 'StringBuffer' as UTF-8 into a 'String'. +-- Similar to 'utf8DecodeChar', if the character cannot be decoded as UTF-8, +-- they will be replaced with @\'\\0\'@. +lexemeToString :: StringBuffer + -> Int -- ^ @n@, the number of bytes + -> String +lexemeToString _ 0 = "" +lexemeToString (StringBuffer buf _ cur) bytes = + utf8DecodeStringLazy buf cur bytes + +lexemeToFastString :: StringBuffer + -> Int -- ^ @n@, the number of bytes + -> FastString +lexemeToFastString _ 0 = nilFS +lexemeToFastString (StringBuffer buf _ cur) len = + inlinePerformIO $ + withForeignPtr buf $ \ptr -> + return $! mkFastStringBytes (ptr `plusPtr` cur) len + +-- | Return the previous @n@ characters (or fewer if we are less than @n@ +-- characters into the buffer. +decodePrevNChars :: Int -> StringBuffer -> String +decodePrevNChars n (StringBuffer buf _ cur) = + inlinePerformIO $ withForeignPtr buf $ \p0 -> + go p0 n "" (p0 `plusPtr` (cur - 1)) + where + go :: Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String + go buf0 n acc p | n == 0 || buf0 >= p = return acc + go buf0 n acc p = do + p' <- utf8PrevChar p + let (c,_) = utf8DecodeChar p' + go buf0 (n - 1) (c:acc) p' + +-- ----------------------------------------------------------------------------- +-- Parsing integer strings in various bases +parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer +parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int + = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let + go i x | i == len = x + | otherwise = case fst (utf8DecodeChar (ptr `plusPtr` (cur + i))) of + '_' -> go (i + 1) x -- skip "_" (#14473) + char -> go (i + 1) (x * radix + toInteger (char_to_int char)) + in go 0 0 diff --git a/compiler/GHC/Data/TrieMap.hs b/compiler/GHC/Data/TrieMap.hs new file mode 100644 index 0000000000..e2506e3d4c --- /dev/null +++ b/compiler/GHC/Data/TrieMap.hs @@ -0,0 +1,406 @@ +{- +(c) The University of Glasgow 2006 +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +-} + +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE UndecidableInstances #-} +module GHC.Data.TrieMap( + -- * Maps over 'Maybe' values + MaybeMap, + -- * Maps over 'List' values + ListMap, + -- * Maps over 'Literal's + LiteralMap, + -- * 'TrieMap' class + TrieMap(..), insertTM, deleteTM, + + -- * Things helpful for adding additional Instances. + (>.>), (|>), (|>>), XT, + foldMaybe, + -- * Map for leaf compression + GenMap, + lkG, xtG, mapG, fdG, + xtList, lkList + + ) where + +import GHC.Prelude + +import GHC.Types.Literal +import GHC.Types.Unique.DFM +import GHC.Types.Unique( Unique ) + +import qualified Data.Map as Map +import qualified Data.IntMap as IntMap +import GHC.Utils.Outputable +import Control.Monad( (>=>) ) +import Data.Kind( Type ) + +{- +This module implements TrieMaps, which are finite mappings +whose key is a structured value like a CoreExpr or Type. + +This file implements tries over general data structures. +Implementation for tries over Core Expressions/Types are +available in GHC.Core.Map. + +The regular pattern for handling TrieMaps on data structures was first +described (to my knowledge) in Connelly and Morris's 1995 paper "A +generalization of the Trie Data Structure"; there is also an accessible +description of the idea in Okasaki's book "Purely Functional Data +Structures", Section 10.3.2 + +************************************************************************ +* * + The TrieMap class +* * +************************************************************************ +-} + +type XT a = Maybe a -> Maybe a -- How to alter a non-existent elt (Nothing) + -- or an existing elt (Just) + +class TrieMap m where + type Key m :: Type + emptyTM :: m a + lookupTM :: forall b. Key m -> m b -> Maybe b + alterTM :: forall b. Key m -> XT b -> m b -> m b + mapTM :: (a->b) -> m a -> m b + + foldTM :: (a -> b -> b) -> m a -> b -> b + -- The unusual argument order here makes + -- it easy to compose calls to foldTM; + -- see for example fdE below + +insertTM :: TrieMap m => Key m -> a -> m a -> m a +insertTM k v m = alterTM k (\_ -> Just v) m + +deleteTM :: TrieMap m => Key m -> m a -> m a +deleteTM k m = alterTM k (\_ -> Nothing) m + +---------------------- +-- Recall that +-- Control.Monad.(>=>) :: (a -> Maybe b) -> (b -> Maybe c) -> a -> Maybe c + +(>.>) :: (a -> b) -> (b -> c) -> a -> c +-- Reverse function composition (do f first, then g) +infixr 1 >.> +(f >.> g) x = g (f x) +infixr 1 |>, |>> + +(|>) :: a -> (a->b) -> b -- Reverse application +x |> f = f x + +---------------------- +(|>>) :: TrieMap m2 + => (XT (m2 a) -> m1 (m2 a) -> m1 (m2 a)) + -> (m2 a -> m2 a) + -> m1 (m2 a) -> m1 (m2 a) +(|>>) f g = f (Just . g . deMaybe) + +deMaybe :: TrieMap m => Maybe (m a) -> m a +deMaybe Nothing = emptyTM +deMaybe (Just m) = m + +{- +************************************************************************ +* * + IntMaps +* * +************************************************************************ +-} + +instance TrieMap IntMap.IntMap where + type Key IntMap.IntMap = Int + emptyTM = IntMap.empty + lookupTM k m = IntMap.lookup k m + alterTM = xtInt + foldTM k m z = IntMap.foldr k z m + mapTM f m = IntMap.map f m + +xtInt :: Int -> XT a -> IntMap.IntMap a -> IntMap.IntMap a +xtInt k f m = IntMap.alter f k m + +instance Ord k => TrieMap (Map.Map k) where + type Key (Map.Map k) = k + emptyTM = Map.empty + lookupTM = Map.lookup + alterTM k f m = Map.alter f k m + foldTM k m z = Map.foldr k z m + mapTM f m = Map.map f m + + +{- +Note [foldTM determinism] +~~~~~~~~~~~~~~~~~~~~~~~~~ +We want foldTM to be deterministic, which is why we have an instance of +TrieMap for UniqDFM, but not for UniqFM. Here's an example of some things that +go wrong if foldTM is nondeterministic. Consider: + + f a b = return (a <> b) + +Depending on the order that the typechecker generates constraints you +get either: + + f :: (Monad m, Monoid a) => a -> a -> m a + +or: + + f :: (Monoid a, Monad m) => a -> a -> m a + +The generated code will be different after desugaring as the dictionaries +will be bound in different orders, leading to potential ABI incompatibility. + +One way to solve this would be to notice that the typeclasses could be +sorted alphabetically. + +Unfortunately that doesn't quite work with this example: + + f a b = let x = a <> a; y = b <> b in x + +where you infer: + + f :: (Monoid m, Monoid m1) => m1 -> m -> m1 + +or: + + f :: (Monoid m1, Monoid m) => m1 -> m -> m1 + +Here you could decide to take the order of the type variables in the type +according to depth first traversal and use it to order the constraints. + +The real trouble starts when the user enables incoherent instances and +the compiler has to make an arbitrary choice. Consider: + + class T a b where + go :: a -> b -> String + + instance (Show b) => T Int b where + go a b = show a ++ show b + + instance (Show a) => T a Bool where + go a b = show a ++ show b + + f = go 10 True + +GHC is free to choose either dictionary to implement f, but for the sake of +determinism we'd like it to be consistent when compiling the same sources +with the same flags. + +inert_dicts :: DictMap is implemented with a TrieMap. In getUnsolvedInerts it +gets converted to a bag of (Wanted) Cts using a fold. Then in +solve_simple_wanteds it's merged with other WantedConstraints. We want the +conversion to a bag to be deterministic. For that purpose we use UniqDFM +instead of UniqFM to implement the TrieMap. + +See Note [Deterministic UniqFM] in GHC.Types.Unique.DFM for more details on how it's made +deterministic. +-} + +instance TrieMap UniqDFM where + type Key UniqDFM = Unique + emptyTM = emptyUDFM + lookupTM k m = lookupUDFM m k + alterTM k f m = alterUDFM f m k + foldTM k m z = foldUDFM k z m + mapTM f m = mapUDFM f m + +{- +************************************************************************ +* * + Maybes +* * +************************************************************************ + +If m is a map from k -> val +then (MaybeMap m) is a map from (Maybe k) -> val +-} + +data MaybeMap m a = MM { mm_nothing :: Maybe a, mm_just :: m a } + +instance TrieMap m => TrieMap (MaybeMap m) where + type Key (MaybeMap m) = Maybe (Key m) + emptyTM = MM { mm_nothing = Nothing, mm_just = emptyTM } + lookupTM = lkMaybe lookupTM + alterTM = xtMaybe alterTM + foldTM = fdMaybe + mapTM = mapMb + +mapMb :: TrieMap m => (a->b) -> MaybeMap m a -> MaybeMap m b +mapMb f (MM { mm_nothing = mn, mm_just = mj }) + = MM { mm_nothing = fmap f mn, mm_just = mapTM f mj } + +lkMaybe :: (forall b. k -> m b -> Maybe b) + -> Maybe k -> MaybeMap m a -> Maybe a +lkMaybe _ Nothing = mm_nothing +lkMaybe lk (Just x) = mm_just >.> lk x + +xtMaybe :: (forall b. k -> XT b -> m b -> m b) + -> Maybe k -> XT a -> MaybeMap m a -> MaybeMap m a +xtMaybe _ Nothing f m = m { mm_nothing = f (mm_nothing m) } +xtMaybe tr (Just x) f m = m { mm_just = mm_just m |> tr x f } + +fdMaybe :: TrieMap m => (a -> b -> b) -> MaybeMap m a -> b -> b +fdMaybe k m = foldMaybe k (mm_nothing m) + . foldTM k (mm_just m) + +{- +************************************************************************ +* * + Lists +* * +************************************************************************ +-} + +data ListMap m a + = LM { lm_nil :: Maybe a + , lm_cons :: m (ListMap m a) } + +instance TrieMap m => TrieMap (ListMap m) where + type Key (ListMap m) = [Key m] + emptyTM = LM { lm_nil = Nothing, lm_cons = emptyTM } + lookupTM = lkList lookupTM + alterTM = xtList alterTM + foldTM = fdList + mapTM = mapList + +instance (TrieMap m, Outputable a) => Outputable (ListMap m a) where + ppr m = text "List elts" <+> ppr (foldTM (:) m []) + +mapList :: TrieMap m => (a->b) -> ListMap m a -> ListMap m b +mapList f (LM { lm_nil = mnil, lm_cons = mcons }) + = LM { lm_nil = fmap f mnil, lm_cons = mapTM (mapTM f) mcons } + +lkList :: TrieMap m => (forall b. k -> m b -> Maybe b) + -> [k] -> ListMap m a -> Maybe a +lkList _ [] = lm_nil +lkList lk (x:xs) = lm_cons >.> lk x >=> lkList lk xs + +xtList :: TrieMap m => (forall b. k -> XT b -> m b -> m b) + -> [k] -> XT a -> ListMap m a -> ListMap m a +xtList _ [] f m = m { lm_nil = f (lm_nil m) } +xtList tr (x:xs) f m = m { lm_cons = lm_cons m |> tr x |>> xtList tr xs f } + +fdList :: forall m a b. TrieMap m + => (a -> b -> b) -> ListMap m a -> b -> b +fdList k m = foldMaybe k (lm_nil m) + . foldTM (fdList k) (lm_cons m) + +foldMaybe :: (a -> b -> b) -> Maybe a -> b -> b +foldMaybe _ Nothing b = b +foldMaybe k (Just a) b = k a b + +{- +************************************************************************ +* * + Basic maps +* * +************************************************************************ +-} + +type LiteralMap a = Map.Map Literal a + +{- +************************************************************************ +* * + GenMap +* * +************************************************************************ + +Note [Compressed TrieMap] +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The GenMap constructor augments TrieMaps with leaf compression. This helps +solve the performance problem detailed in #9960: suppose we have a handful +H of entries in a TrieMap, each with a very large key, size K. If you fold over +such a TrieMap you'd expect time O(H). That would certainly be true of an +association list! But with TrieMap we actually have to navigate down a long +singleton structure to get to the elements, so it takes time O(K*H). This +can really hurt on many type-level computation benchmarks: +see for example T9872d. + +The point of a TrieMap is that you need to navigate to the point where only one +key remains, and then things should be fast. So the point of a SingletonMap +is that, once we are down to a single (key,value) pair, we stop and +just use SingletonMap. + +'EmptyMap' provides an even more basic (but essential) optimization: if there is +nothing in the map, don't bother building out the (possibly infinite) recursive +TrieMap structure! + +Compressed triemaps are heavily used by GHC.Core.Map. So we have to mark some things +as INLINEABLE to permit specialization. +-} + +data GenMap m a + = EmptyMap + | SingletonMap (Key m) a + | MultiMap (m a) + +instance (Outputable a, Outputable (m a)) => Outputable (GenMap m a) where + ppr EmptyMap = text "Empty map" + ppr (SingletonMap _ v) = text "Singleton map" <+> ppr v + ppr (MultiMap m) = ppr m + +-- TODO undecidable instance +instance (Eq (Key m), TrieMap m) => TrieMap (GenMap m) where + type Key (GenMap m) = Key m + emptyTM = EmptyMap + lookupTM = lkG + alterTM = xtG + foldTM = fdG + mapTM = mapG + +--We want to be able to specialize these functions when defining eg +--tries over (GenMap CoreExpr) which requires INLINEABLE + +{-# INLINEABLE lkG #-} +lkG :: (Eq (Key m), TrieMap m) => Key m -> GenMap m a -> Maybe a +lkG _ EmptyMap = Nothing +lkG k (SingletonMap k' v') | k == k' = Just v' + | otherwise = Nothing +lkG k (MultiMap m) = lookupTM k m + +{-# INLINEABLE xtG #-} +xtG :: (Eq (Key m), TrieMap m) => Key m -> XT a -> GenMap m a -> GenMap m a +xtG k f EmptyMap + = case f Nothing of + Just v -> SingletonMap k v + Nothing -> EmptyMap +xtG k f m@(SingletonMap k' v') + | k' == k + -- The new key matches the (single) key already in the tree. Hence, + -- apply @f@ to @Just v'@ and build a singleton or empty map depending + -- on the 'Just'/'Nothing' response respectively. + = case f (Just v') of + Just v'' -> SingletonMap k' v'' + Nothing -> EmptyMap + | otherwise + -- We've hit a singleton tree for a different key than the one we are + -- searching for. Hence apply @f@ to @Nothing@. If result is @Nothing@ then + -- we can just return the old map. If not, we need a map with *two* + -- entries. The easiest way to do that is to insert two items into an empty + -- map of type @m a@. + = case f Nothing of + Nothing -> m + Just v -> emptyTM |> alterTM k' (const (Just v')) + >.> alterTM k (const (Just v)) + >.> MultiMap +xtG k f (MultiMap m) = MultiMap (alterTM k f m) + +{-# INLINEABLE mapG #-} +mapG :: TrieMap m => (a -> b) -> GenMap m a -> GenMap m b +mapG _ EmptyMap = EmptyMap +mapG f (SingletonMap k v) = SingletonMap k (f v) +mapG f (MultiMap m) = MultiMap (mapTM f m) + +{-# INLINEABLE fdG #-} +fdG :: TrieMap m => (a -> b -> b) -> GenMap m a -> b -> b +fdG _ EmptyMap = \z -> z +fdG k (SingletonMap _ v) = \z -> k v z +fdG k (MultiMap m) = foldTM k m |