diff options
author | Bartosz Nitka <niteria@gmail.com> | 2015-11-21 15:49:14 +0100 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2015-11-21 15:49:14 +0100 |
commit | 6664ab8356f00ef0b2186f30a0d29a9c0228c045 (patch) | |
tree | b65162a8759d3e353bcc79dcbcf9b1990374bc48 | |
parent | 192dd068890701a7692890677d4cbf9f2abdb64a (diff) | |
download | haskell-6664ab8356f00ef0b2186f30a0d29a9c0228c045.tar.gz |
Add DVarSet - a deterministic set of Vars
This implements `DVarSet`, a deterministic set of Vars, with an
interface very similar to `VarSet` with a couple of functions missing.
I will need this in changes that follow, one of them will be about
changing the type of the set of Vars that `RuleInfo` holds to make the
free variable computation deterministic.
Test Plan:
./validate
I can add new tests if anyone wants me to.
Reviewers: simonpj, simonmar, austin, bgamari
Reviewed By: simonmar, bgamari
Subscribers: thomie
Differential Revision: https://phabricator.haskell.org/D1487
GHC Trac Issues: #4012
-rw-r--r-- | compiler/basicTypes/VarSet.hs | 81 | ||||
-rw-r--r-- | compiler/ghc.cabal.in | 3 | ||||
-rw-r--r-- | compiler/ghc.mk | 3 | ||||
-rw-r--r-- | compiler/utils/UniqDFM.hs | 150 | ||||
-rw-r--r-- | compiler/utils/UniqDSet.hs | 88 |
5 files changed, 313 insertions, 12 deletions
diff --git a/compiler/basicTypes/VarSet.hs b/compiler/basicTypes/VarSet.hs index f5ea6edf19..e340117893 100644 --- a/compiler/basicTypes/VarSet.hs +++ b/compiler/basicTypes/VarSet.hs @@ -19,7 +19,20 @@ module VarSet ( minusVarSet, foldVarSet, filterVarSet, transCloVarSet, fixVarSet, lookupVarSet, mapVarSet, sizeVarSet, seqVarSet, - elemVarSetByKey, partitionVarSet + elemVarSetByKey, partitionVarSet, + + -- * Deterministic Var set types + DVarSet, DIdSet, DTyVarSet, + + -- ** Manipulating these sets + emptyDVarSet, unitDVarSet, mkDVarSet, + extendDVarSet, + elemDVarSet, dVarSetElems, subDVarSet, + unionDVarSet, unionDVarSets, mapUnionDVarSet, + intersectDVarSet, + isEmptyDVarSet, delDVarSet, + minusDVarSet, foldDVarSet, filterDVarSet, + sizeDVarSet, seqDVarSet, ) where #include "HsVersions.h" @@ -27,6 +40,7 @@ module VarSet ( import Var ( Var, TyVar, CoVar, Id ) import Unique import UniqSet +import UniqDSet import UniqFM( disjointUFM ) {- @@ -113,7 +127,7 @@ subVarSet s1 s2 = isEmptyVarSet (s1 `minusVarSet` s2) fixVarSet :: (VarSet -> VarSet) -- Map the current set to a new set -> VarSet -> VarSet --- (fixVarSet f s) repeatedly applies f to the set s, +-- (fixVarSet f s) repeatedly applies f to the set s, -- until it reaches a fixed point. fixVarSet fn vars | new_vars `subVarSet` vars = vars @@ -149,3 +163,66 @@ transCloVarSet fn seeds seqVarSet :: VarSet -> () seqVarSet s = sizeVarSet s `seq` () + +-- Deterministic VarSet +-- See Note [Deterministic UniqFM] in UniqDFM for explanation why we need +-- DVarSet. + +type DVarSet = UniqDSet Var +type DIdSet = UniqDSet Id +type DTyVarSet = UniqDSet TyVar + +emptyDVarSet :: DVarSet +emptyDVarSet = emptyUniqDSet + +unitDVarSet :: Var -> DVarSet +unitDVarSet = unitUniqDSet + +mkDVarSet :: [Var] -> DVarSet +mkDVarSet = mkUniqDSet + +extendDVarSet :: DVarSet -> Var -> DVarSet +extendDVarSet = addOneToUniqDSet + +elemDVarSet :: Var -> DVarSet -> Bool +elemDVarSet = elementOfUniqDSet + +dVarSetElems :: DVarSet -> [Var] +dVarSetElems = uniqDSetToList + +subDVarSet :: DVarSet -> DVarSet -> Bool +subDVarSet s1 s2 = isEmptyDVarSet (s1 `minusDVarSet` s2) + +unionDVarSet :: DVarSet -> DVarSet -> DVarSet +unionDVarSet = unionUniqDSets + +unionDVarSets :: [DVarSet] -> DVarSet +unionDVarSets = unionManyUniqDSets + +-- | Map the function over the list, and union the results +mapUnionDVarSet :: (a -> DVarSet) -> [a] -> DVarSet +mapUnionDVarSet get_set xs = foldr (unionDVarSet . get_set) emptyDVarSet xs + +intersectDVarSet :: DVarSet -> DVarSet -> DVarSet +intersectDVarSet = intersectUniqDSets + +isEmptyDVarSet :: DVarSet -> Bool +isEmptyDVarSet = isEmptyUniqDSet + +delDVarSet :: DVarSet -> Var -> DVarSet +delDVarSet = delOneFromUniqDSet + +minusDVarSet :: DVarSet -> DVarSet -> DVarSet +minusDVarSet = minusUniqDSet + +foldDVarSet :: (Var -> a -> a) -> a -> DVarSet -> a +foldDVarSet = foldUniqDSet + +filterDVarSet :: (Var -> Bool) -> DVarSet -> DVarSet +filterDVarSet = filterUniqDSet + +sizeDVarSet :: DVarSet -> Int +sizeDVarSet = sizeUniqDSet + +seqDVarSet :: DVarSet -> () +seqDVarSet s = sizeDVarSet s `seq` () diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 4c740f1002..b78c2b89e9 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -468,8 +468,9 @@ Library State Stream StringBuffer - UniqFM UniqDFM + UniqDSet + UniqFM UniqSet Util Vectorise.Builtins.Base diff --git a/compiler/ghc.mk b/compiler/ghc.mk index 6fde5c0301..e3f824e5f3 100644 --- a/compiler/ghc.mk +++ b/compiler/ghc.mk @@ -595,8 +595,9 @@ compiler_stage2_dll0_MODULES = \ TysPrim \ TysWiredIn \ Unify \ - UniqFM \ UniqDFM \ + UniqDSet \ + UniqFM \ UniqSet \ UniqSupply \ Unique \ diff --git a/compiler/utils/UniqDFM.hs b/compiler/utils/UniqDFM.hs index 5f6554ed6c..3f2830ab1f 100644 --- a/compiler/utils/UniqDFM.hs +++ b/compiler/utils/UniqDFM.hs @@ -25,11 +25,23 @@ module UniqDFM ( -- ** Manipulating those mappings emptyUDFM, + unitUDFM, addToUDFM, + delFromUDFM, + plusUDFM, lookupUDFM, + elemUDFM, foldUDFM, eltsUDFM, + filterUDFM, + isNullUDFM, + sizeUDFM, + intersectUDFM, + minusUDFM, + udfmToList, + udfmToUfm, + alwaysUnsafeUfmToUdfm, ) where import FastString @@ -41,16 +53,32 @@ import Data.Typeable import Data.Data import Data.List (sortBy) import Data.Function (on) +import UniqFM (UniqFM, listToUFM_Directly, ufmToList) -- Note [Deterministic UniqFM] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- A @UniqDFM@ is just like @UniqFM@ with the following additional +-- property: the function `udfmToList` returns the elements in some +-- deterministic order not depending on the Unique key for those elements. +-- +-- If the client of the map performs operations on the map in deterministic +-- order then `udfmToList` returns them in deterministic order. +-- +-- There is an implementation cost: each element is given a serial number +-- as it is added, and `udfmToList` sorts it's result by this serial +-- number. So you should only use `UniqDFM` if you need the deterministic +-- property. +-- +-- `foldUDFM` also preserves determinism. +-- -- Normal @UniqFM@ when you turn it into a list will use -- Data.IntMap.toList function that returns the elements in the order of -- the keys. The keys in @UniqFM@ are always @Uniques@, so you end up with -- with a list ordered by @Uniques@. -- The order of @Uniques@ is known to be not stable across rebuilds. -- See Note [Unique Determinism] in Unique. - +-- +-- -- There's more than one way to implement this. The implementation here tags -- every value with the insertion time that can later be used to sort the -- values when asked to convert to a list. @@ -61,12 +89,15 @@ import Data.Function (on) -- -- where the list determines the order. This makes deletion tricky as we'd -- only accumulate elements in that list, but makes merging easier as you --- don't have to renumber everything. --- I've tested both approaches by replacing UniqFM and the cost was about --- the same for both. We don't need merging nor deletion yet, but when we --- do it might be worth to reevaluate the trade-offs here. - -data TaggedVal val = TaggedVal val {-# UNPACK #-} !Int +-- can just merge both structures independently. +-- Deletion can probably be done in amortized fashion when the size of the +-- list is twice the size of the set. + +-- | A type of values tagged with insertion time +data TaggedVal val = + TaggedVal + val + {-# UNPACK #-} !Int -- ^ insertion time deriving (Data, Typeable) taggedFst :: TaggedVal val -> val @@ -81,19 +112,88 @@ instance Eq val => Eq (TaggedVal val) where instance Functor TaggedVal where fmap f (TaggedVal val i) = TaggedVal (f val) i -data UniqDFM ele = UDFM !(M.IntMap (TaggedVal ele)) {-# UNPACK #-} !Int +-- | Type of unique deterministic finite maps +data UniqDFM ele = + UDFM + !(M.IntMap (TaggedVal ele)) -- A map where keys are Unique's values and + -- values are tagged with insertion time. + -- The invariant is that all the tags will + -- be distinct within a single map + {-# UNPACK #-} !Int -- Upper bound on the values' insertion + -- time. See Note [Overflow on plusUDFM] deriving (Data, Typeable, Functor) emptyUDFM :: UniqDFM elt emptyUDFM = UDFM M.empty 0 +unitUDFM :: Uniquable key => key -> elt -> UniqDFM elt +unitUDFM k v = UDFM (M.singleton (getKey $ getUnique k) (TaggedVal v 0)) 1 + addToUDFM :: Uniquable key => UniqDFM elt -> key -> elt -> UniqDFM elt addToUDFM (UDFM m i) k v = UDFM (M.insert (getKey $ getUnique k) (TaggedVal v i) m) (i + 1) +addToUDFM_Directly :: UniqDFM elt -> Unique -> elt -> UniqDFM elt +addToUDFM_Directly (UDFM m i) u v = + UDFM (M.insert (getKey u) (TaggedVal v i) m) (i + 1) + +addListToUDFM_Directly :: UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt +addListToUDFM_Directly = foldl (\m (k, v) -> addToUDFM_Directly m k v) + +delFromUDFM :: Uniquable key => UniqDFM elt -> key -> UniqDFM elt +delFromUDFM (UDFM m i) k = UDFM (M.delete (getKey $ getUnique k) m) i + +-- Note [Overflow on plusUDFM] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- There are multiple ways of implementing plusUDFM. +-- The main problem that needs to be solved is overlap on times of +-- insertion between different keys in two maps. +-- Consider: +-- +-- A = fromList [(a, (x, 1))] +-- B = fromList [(b, (y, 1))] +-- +-- If you merge them naively you end up with: +-- +-- C = fromList [(a, (x, 1)), (b, (y, 1))] +-- +-- Which loses information about ordering and brings us back into +-- non-deterministic world. +-- +-- The solution I considered before would increment the tags on one of the +-- sets by the upper bound of the other set. The problem with this approach +-- is that you'll run out of tags for some merge patterns. +-- Say you start with A with upper bound 1, you merge A with A to get A' and +-- the upper bound becomes 2. You merge A' with A' and the upper bound +-- doubles again. After 64 merges you overflow. +-- This solution would have the same time complexity as plusUFM, namely O(n+m). +-- +-- The solution I ended up with has time complexity of +-- O(m log m + m * min (n+m, W)) where m is the smaller set. +-- It simply inserts the elements of the smaller set into the larger +-- set in the order that they were inserted into the smaller set. That's +-- O(m log m) for extracting the elements from the smaller set in the +-- insertion order and O(m * min(n+m, W)) to insert them into the bigger +-- set. + +plusUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt +plusUDFM udfml@(UDFM _ i) udfmr@(UDFM _ j) + -- we will use the upper bound on the tag as a proxy for the set size, + -- to insert the smaller one into the bigger one + | i > j = insertUDFMIntoLeft udfml udfmr + | otherwise = insertUDFMIntoLeft udfmr udfml + +insertUDFMIntoLeft :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt +insertUDFMIntoLeft udfml udfmr = addListToUDFM_Directly udfml $ udfmToList udfmr + lookupUDFM :: Uniquable key => UniqDFM elt -> key -> Maybe elt lookupUDFM (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey $ getUnique k) m +elemUDFM :: Uniquable key => key -> UniqDFM elt -> Bool +elemUDFM k (UDFM m _i) = M.member (getKey $ getUnique k) m + +-- | Performs a deterministic fold over the UniqDFM. +-- It's O(n log n) while the corresponding function on `UniqFM` is O(n). foldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a foldUDFM k z m = foldr k z (eltsUDFM m) @@ -101,11 +201,45 @@ eltsUDFM :: UniqDFM elt -> [elt] eltsUDFM (UDFM m _i) = map taggedFst $ sortBy (compare `on` taggedSnd) $ M.elems m +filterUDFM :: (elt -> Bool) -> UniqDFM elt -> UniqDFM elt +filterUDFM p (UDFM m i) = UDFM (M.filter (\(TaggedVal v _) -> p v) m) i + +-- | Converts `UniqDFM` to a list, with elements in deterministic order. +-- It's O(n log n) while the corresponding function on `UniqFM` is O(n). udfmToList :: UniqDFM elt -> [(Unique, elt)] udfmToList (UDFM m _i) = [ (getUnique k, taggedFst v) | (k, v) <- sortBy (compare `on` (taggedSnd . snd)) $ M.toList m ] +isNullUDFM :: UniqDFM elt -> Bool +isNullUDFM (UDFM m _) = M.null m + +sizeUDFM :: UniqDFM elt -> Int +sizeUDFM (UDFM m _i) = M.size m + +intersectUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt +intersectUDFM (UDFM x i) (UDFM y _j) = UDFM (M.intersection x y) i + -- M.intersection is left biased, that means the result will only have + -- a subset of elements from the left set, so `i` is a good upper bound. + +minusUDFM :: UniqDFM elt1 -> UniqDFM elt2 -> UniqDFM elt1 +minusUDFM (UDFM x i) (UDFM y _j) = UDFM (M.difference x y) i + -- M.difference returns a subset of a left set, so `i` is a good upper + -- bound. + +-- | This allows for lossy conversion from UniqDFM to UniqFM +udfmToUfm :: UniqDFM elt -> UniqFM elt +udfmToUfm (UDFM m _i) = + listToUFM_Directly [(getUnique k, taggedFst tv) | (k, tv) <- M.toList m] + +listToUDFM_Directly :: [(Unique, elt)] -> UniqDFM elt +listToUDFM_Directly = foldl (\m (u, v) -> addToUDFM_Directly m u v) emptyUDFM + +-- This should not be used in commited code, provided for convenience to +-- make ad-hoc conversions when developing +alwaysUnsafeUfmToUdfm :: UniqFM elt -> UniqDFM elt +alwaysUnsafeUfmToUdfm = listToUDFM_Directly . ufmToList + -- Output-ery instance Outputable a => Outputable (UniqDFM a) where diff --git a/compiler/utils/UniqDSet.hs b/compiler/utils/UniqDSet.hs new file mode 100644 index 0000000000..bf9f7a301c --- /dev/null +++ b/compiler/utils/UniqDSet.hs @@ -0,0 +1,88 @@ +-- (c) Bartosz Nitka, Facebook, 2015 + +-- | +-- Specialised deterministic sets, for things with @Uniques@ +-- +-- Based on @UniqDFMs@ (as you would expect). +-- See Note [Deterministic UniqFM] in UniqDFM for explanation why we need it. +-- +-- Basically, the things need to be in class @Uniquable@. + +module UniqDSet ( + -- * Unique set type + UniqDSet, -- type synonym for UniqFM a + + -- ** Manipulating these sets + delOneFromUniqDSet, + emptyUniqDSet, + unitUniqDSet, + mkUniqDSet, + addOneToUniqDSet, addListToUniqDSet, + unionUniqDSets, unionManyUniqDSets, + minusUniqDSet, + intersectUniqDSets, + foldUniqDSet, + elementOfUniqDSet, + filterUniqDSet, + sizeUniqDSet, + isEmptyUniqDSet, + lookupUniqDSet, + uniqDSetToList, + ) where + +import UniqDFM +import Unique + +type UniqDSet a = UniqDFM a + +emptyUniqDSet :: UniqDSet a +emptyUniqDSet = emptyUDFM + +unitUniqDSet :: Uniquable a => a -> UniqDSet a +unitUniqDSet x = unitUDFM x x + +mkUniqDSet :: Uniquable a => [a] -> UniqDSet a +mkUniqDSet = foldl addOneToUniqDSet emptyUniqDSet + +addOneToUniqDSet :: Uniquable a => UniqDSet a -> a -> UniqDSet a +addOneToUniqDSet set x = addToUDFM set x x + +addListToUniqDSet :: Uniquable a => UniqDSet a -> [a] -> UniqDSet a +addListToUniqDSet = foldl addOneToUniqDSet + +delOneFromUniqDSet :: Uniquable a => UniqDSet a -> a -> UniqDSet a +delOneFromUniqDSet = delFromUDFM + +unionUniqDSets :: UniqDSet a -> UniqDSet a -> UniqDSet a +unionUniqDSets = plusUDFM + +unionManyUniqDSets :: [UniqDSet a] -> UniqDSet a +unionManyUniqDSets [] = emptyUniqDSet +unionManyUniqDSets sets = foldr1 unionUniqDSets sets + +minusUniqDSet :: UniqDSet a -> UniqDSet a -> UniqDSet a +minusUniqDSet = minusUDFM + +intersectUniqDSets :: UniqDSet a -> UniqDSet a -> UniqDSet a +intersectUniqDSets = intersectUDFM + +foldUniqDSet :: (a -> b -> b) -> b -> UniqDSet a -> b +foldUniqDSet = foldUDFM + +elementOfUniqDSet :: Uniquable a => a -> UniqDSet a -> Bool +elementOfUniqDSet = elemUDFM + +filterUniqDSet :: (a -> Bool) -> UniqDSet a -> UniqDSet a +filterUniqDSet = filterUDFM + +sizeUniqDSet :: UniqDSet a -> Int +sizeUniqDSet = sizeUDFM + +isEmptyUniqDSet :: UniqDSet a -> Bool +isEmptyUniqDSet = isNullUDFM + +lookupUniqDSet :: Uniquable a => UniqDSet a -> a -> Maybe a +lookupUniqDSet = lookupUDFM + +uniqDSetToList :: UniqDSet a -> [a] +uniqDSetToList = eltsUDFM |