diff options
author | Bartosz Nitka <niteria@gmail.com> | 2015-11-21 15:49:14 +0100 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2015-11-21 15:49:14 +0100 |
commit | 6664ab8356f00ef0b2186f30a0d29a9c0228c045 (patch) | |
tree | b65162a8759d3e353bcc79dcbcf9b1990374bc48 /compiler/utils/UniqDFM.hs | |
parent | 192dd068890701a7692890677d4cbf9f2abdb64a (diff) | |
download | haskell-6664ab8356f00ef0b2186f30a0d29a9c0228c045.tar.gz |
Add DVarSet - a deterministic set of Vars
This implements `DVarSet`, a deterministic set of Vars, with an
interface very similar to `VarSet` with a couple of functions missing.
I will need this in changes that follow, one of them will be about
changing the type of the set of Vars that `RuleInfo` holds to make the
free variable computation deterministic.
Test Plan:
./validate
I can add new tests if anyone wants me to.
Reviewers: simonpj, simonmar, austin, bgamari
Reviewed By: simonmar, bgamari
Subscribers: thomie
Differential Revision: https://phabricator.haskell.org/D1487
GHC Trac Issues: #4012
Diffstat (limited to 'compiler/utils/UniqDFM.hs')
-rw-r--r-- | compiler/utils/UniqDFM.hs | 150 |
1 files changed, 142 insertions, 8 deletions
diff --git a/compiler/utils/UniqDFM.hs b/compiler/utils/UniqDFM.hs index 5f6554ed6c..3f2830ab1f 100644 --- a/compiler/utils/UniqDFM.hs +++ b/compiler/utils/UniqDFM.hs @@ -25,11 +25,23 @@ module UniqDFM ( -- ** Manipulating those mappings emptyUDFM, + unitUDFM, addToUDFM, + delFromUDFM, + plusUDFM, lookupUDFM, + elemUDFM, foldUDFM, eltsUDFM, + filterUDFM, + isNullUDFM, + sizeUDFM, + intersectUDFM, + minusUDFM, + udfmToList, + udfmToUfm, + alwaysUnsafeUfmToUdfm, ) where import FastString @@ -41,16 +53,32 @@ import Data.Typeable import Data.Data import Data.List (sortBy) import Data.Function (on) +import UniqFM (UniqFM, listToUFM_Directly, ufmToList) -- Note [Deterministic UniqFM] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- A @UniqDFM@ is just like @UniqFM@ with the following additional +-- property: the function `udfmToList` returns the elements in some +-- deterministic order not depending on the Unique key for those elements. +-- +-- If the client of the map performs operations on the map in deterministic +-- order then `udfmToList` returns them in deterministic order. +-- +-- There is an implementation cost: each element is given a serial number +-- as it is added, and `udfmToList` sorts it's result by this serial +-- number. So you should only use `UniqDFM` if you need the deterministic +-- property. +-- +-- `foldUDFM` also preserves determinism. +-- -- Normal @UniqFM@ when you turn it into a list will use -- Data.IntMap.toList function that returns the elements in the order of -- the keys. The keys in @UniqFM@ are always @Uniques@, so you end up with -- with a list ordered by @Uniques@. -- The order of @Uniques@ is known to be not stable across rebuilds. -- See Note [Unique Determinism] in Unique. - +-- +-- -- There's more than one way to implement this. The implementation here tags -- every value with the insertion time that can later be used to sort the -- values when asked to convert to a list. @@ -61,12 +89,15 @@ import Data.Function (on) -- -- where the list determines the order. This makes deletion tricky as we'd -- only accumulate elements in that list, but makes merging easier as you --- don't have to renumber everything. --- I've tested both approaches by replacing UniqFM and the cost was about --- the same for both. We don't need merging nor deletion yet, but when we --- do it might be worth to reevaluate the trade-offs here. - -data TaggedVal val = TaggedVal val {-# UNPACK #-} !Int +-- can just merge both structures independently. +-- Deletion can probably be done in amortized fashion when the size of the +-- list is twice the size of the set. + +-- | A type of values tagged with insertion time +data TaggedVal val = + TaggedVal + val + {-# UNPACK #-} !Int -- ^ insertion time deriving (Data, Typeable) taggedFst :: TaggedVal val -> val @@ -81,19 +112,88 @@ instance Eq val => Eq (TaggedVal val) where instance Functor TaggedVal where fmap f (TaggedVal val i) = TaggedVal (f val) i -data UniqDFM ele = UDFM !(M.IntMap (TaggedVal ele)) {-# UNPACK #-} !Int +-- | Type of unique deterministic finite maps +data UniqDFM ele = + UDFM + !(M.IntMap (TaggedVal ele)) -- A map where keys are Unique's values and + -- values are tagged with insertion time. + -- The invariant is that all the tags will + -- be distinct within a single map + {-# UNPACK #-} !Int -- Upper bound on the values' insertion + -- time. See Note [Overflow on plusUDFM] deriving (Data, Typeable, Functor) emptyUDFM :: UniqDFM elt emptyUDFM = UDFM M.empty 0 +unitUDFM :: Uniquable key => key -> elt -> UniqDFM elt +unitUDFM k v = UDFM (M.singleton (getKey $ getUnique k) (TaggedVal v 0)) 1 + addToUDFM :: Uniquable key => UniqDFM elt -> key -> elt -> UniqDFM elt addToUDFM (UDFM m i) k v = UDFM (M.insert (getKey $ getUnique k) (TaggedVal v i) m) (i + 1) +addToUDFM_Directly :: UniqDFM elt -> Unique -> elt -> UniqDFM elt +addToUDFM_Directly (UDFM m i) u v = + UDFM (M.insert (getKey u) (TaggedVal v i) m) (i + 1) + +addListToUDFM_Directly :: UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt +addListToUDFM_Directly = foldl (\m (k, v) -> addToUDFM_Directly m k v) + +delFromUDFM :: Uniquable key => UniqDFM elt -> key -> UniqDFM elt +delFromUDFM (UDFM m i) k = UDFM (M.delete (getKey $ getUnique k) m) i + +-- Note [Overflow on plusUDFM] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- There are multiple ways of implementing plusUDFM. +-- The main problem that needs to be solved is overlap on times of +-- insertion between different keys in two maps. +-- Consider: +-- +-- A = fromList [(a, (x, 1))] +-- B = fromList [(b, (y, 1))] +-- +-- If you merge them naively you end up with: +-- +-- C = fromList [(a, (x, 1)), (b, (y, 1))] +-- +-- Which loses information about ordering and brings us back into +-- non-deterministic world. +-- +-- The solution I considered before would increment the tags on one of the +-- sets by the upper bound of the other set. The problem with this approach +-- is that you'll run out of tags for some merge patterns. +-- Say you start with A with upper bound 1, you merge A with A to get A' and +-- the upper bound becomes 2. You merge A' with A' and the upper bound +-- doubles again. After 64 merges you overflow. +-- This solution would have the same time complexity as plusUFM, namely O(n+m). +-- +-- The solution I ended up with has time complexity of +-- O(m log m + m * min (n+m, W)) where m is the smaller set. +-- It simply inserts the elements of the smaller set into the larger +-- set in the order that they were inserted into the smaller set. That's +-- O(m log m) for extracting the elements from the smaller set in the +-- insertion order and O(m * min(n+m, W)) to insert them into the bigger +-- set. + +plusUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt +plusUDFM udfml@(UDFM _ i) udfmr@(UDFM _ j) + -- we will use the upper bound on the tag as a proxy for the set size, + -- to insert the smaller one into the bigger one + | i > j = insertUDFMIntoLeft udfml udfmr + | otherwise = insertUDFMIntoLeft udfmr udfml + +insertUDFMIntoLeft :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt +insertUDFMIntoLeft udfml udfmr = addListToUDFM_Directly udfml $ udfmToList udfmr + lookupUDFM :: Uniquable key => UniqDFM elt -> key -> Maybe elt lookupUDFM (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey $ getUnique k) m +elemUDFM :: Uniquable key => key -> UniqDFM elt -> Bool +elemUDFM k (UDFM m _i) = M.member (getKey $ getUnique k) m + +-- | Performs a deterministic fold over the UniqDFM. +-- It's O(n log n) while the corresponding function on `UniqFM` is O(n). foldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a foldUDFM k z m = foldr k z (eltsUDFM m) @@ -101,11 +201,45 @@ eltsUDFM :: UniqDFM elt -> [elt] eltsUDFM (UDFM m _i) = map taggedFst $ sortBy (compare `on` taggedSnd) $ M.elems m +filterUDFM :: (elt -> Bool) -> UniqDFM elt -> UniqDFM elt +filterUDFM p (UDFM m i) = UDFM (M.filter (\(TaggedVal v _) -> p v) m) i + +-- | Converts `UniqDFM` to a list, with elements in deterministic order. +-- It's O(n log n) while the corresponding function on `UniqFM` is O(n). udfmToList :: UniqDFM elt -> [(Unique, elt)] udfmToList (UDFM m _i) = [ (getUnique k, taggedFst v) | (k, v) <- sortBy (compare `on` (taggedSnd . snd)) $ M.toList m ] +isNullUDFM :: UniqDFM elt -> Bool +isNullUDFM (UDFM m _) = M.null m + +sizeUDFM :: UniqDFM elt -> Int +sizeUDFM (UDFM m _i) = M.size m + +intersectUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt +intersectUDFM (UDFM x i) (UDFM y _j) = UDFM (M.intersection x y) i + -- M.intersection is left biased, that means the result will only have + -- a subset of elements from the left set, so `i` is a good upper bound. + +minusUDFM :: UniqDFM elt1 -> UniqDFM elt2 -> UniqDFM elt1 +minusUDFM (UDFM x i) (UDFM y _j) = UDFM (M.difference x y) i + -- M.difference returns a subset of a left set, so `i` is a good upper + -- bound. + +-- | This allows for lossy conversion from UniqDFM to UniqFM +udfmToUfm :: UniqDFM elt -> UniqFM elt +udfmToUfm (UDFM m _i) = + listToUFM_Directly [(getUnique k, taggedFst tv) | (k, tv) <- M.toList m] + +listToUDFM_Directly :: [(Unique, elt)] -> UniqDFM elt +listToUDFM_Directly = foldl (\m (u, v) -> addToUDFM_Directly m u v) emptyUDFM + +-- This should not be used in commited code, provided for convenience to +-- make ad-hoc conversions when developing +alwaysUnsafeUfmToUdfm :: UniqFM elt -> UniqDFM elt +alwaysUnsafeUfmToUdfm = listToUDFM_Directly . ufmToList + -- Output-ery instance Outputable a => Outputable (UniqDFM a) where |