diff options
Diffstat (limited to 'testsuite/tests/programs/maessen-hashtab')
5 files changed, 646 insertions, 0 deletions
diff --git a/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs b/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs new file mode 100644 index 0000000000..8251a760c8 --- /dev/null +++ b/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs @@ -0,0 +1,339 @@ +----------------------------------------------------------------------------- +-- | +-- Module : Data.HashTable +-- Copyright : (c) The University of Glasgow 2003 +-- License : BSD-style (see the file libraries/base/LICENSE) +-- +-- Maintainer : libraries@haskell.org +-- Stability : provisional +-- Portability : portable +-- +-- An implementation of extensible hash tables, as described in +-- Per-Ake Larson, /Dynamic Hash Tables/, CACM 31(4), April 1988, +-- pp. 446--457. The implementation is also derived from the one +-- in GHC's runtime system (@ghc\/rts\/Hash.{c,h}@). +-- +----------------------------------------------------------------------------- + +module Data.HashTab ( + -- * Basic hash table operations + HashTable, new, insert, delete, lookup, update, + -- * Converting to and from lists + fromList, toList, + -- * Hash functions + -- $hash_functions + hashInt, hashString, + prime, + -- * Diagnostics + longestChain + ) where + +-- This module is imported by Data.Typeable, which is pretty low down in the +-- module hierarchy, so don't import "high-level" modules + +-- Right now we import high-level modules with gay abandon. +import Prelude hiding ( lookup ) +import Data.Tuple ( fst ) +import Data.Bits +import Data.Maybe +import Data.List ( maximumBy, partition, concat, foldl ) +import Data.Int ( Int32 ) + +import Data.Array.Base +import Data.Array hiding (bounds) +import Data.Array.IO + +import Data.Char ( ord ) +import Data.IORef ( IORef, newIORef, readIORef, writeIORef ) +import Control.Monad ( mapM, sequence_ ) + + +----------------------------------------------------------------------- + +readHTArray :: HTArray a -> Int32 -> IO a +readMutArray :: MutArray a -> Int32 -> IO a +writeMutArray :: MutArray a -> Int32 -> a -> IO () +freezeArray :: MutArray a -> IO (HTArray a) +thawArray :: HTArray a -> IO (MutArray a) +newMutArray :: (Int32, Int32) -> a -> IO (MutArray a) +#if defined(DEBUG) || defined(__NHC__) +type MutArray a = IOArray Int32 a +type HTArray a = MutArray a +newMutArray = newArray +readHTArray = readArray +readMutArray = readArray +writeMutArray = writeArray +freezeArray = return +thawArray = return +#else +type MutArray a = IOArray Int32 a +type HTArray a = Array Int32 a +newMutArray = newArray +readHTArray arr i = return $! (unsafeAt arr (fromIntegral i)) +readMutArray arr i = unsafeRead arr (fromIntegral i) +writeMutArray arr i x = unsafeWrite arr (fromIntegral i) x +freezeArray = unsafeFreeze +thawArray = unsafeThaw +#endif + +newtype HashTable key val = HashTable (IORef (HT key val)) +-- TODO: the IORef should really be an MVar. + +data HT key val + = HT { + kcount :: !Int32, -- Total number of keys. + buckets :: !(HTArray [(key,val)]), + bmask :: !Int32, + hash_fn :: key -> Int32, + cmp :: key -> key -> Bool + } + +-- ----------------------------------------------------------------------------- +-- Sample hash functions + +-- $hash_functions +-- +-- This implementation of hash tables uses the low-order /n/ bits of the hash +-- value for a key, where /n/ varies as the hash table grows. A good hash +-- function therefore will give an even distribution regardless of /n/. +-- +-- If your keyspace is integrals such that the low-order bits between +-- keys are highly variable, then you could get away with using 'id' +-- as the hash function. +-- +-- We provide some sample hash functions for 'Int' and 'String' below. + +-- | A sample hash function for 'Int', implemented as simply @(x `mod` P)@ +-- where P is a suitable prime (currently 1500007). Should give +-- reasonable results for most distributions of 'Int' values, except +-- when the keys are all multiples of the prime! +-- +hashInt :: Int -> Int32 +hashInt = (`rem` prime) . fromIntegral + +-- | A sample hash function for 'String's. The implementation is: +-- +-- > hashString = fromIntegral . foldr f 0 +-- > where f c m = ord c + (m * 128) `rem` 1500007 +-- +-- which seems to give reasonable results. +-- +hashString :: String -> Int32 +hashString = fromIntegral . foldl f 0 + where f m c = ord c + (m * 128) `rem` fromIntegral prime + +-- | A prime larger than the maximum hash table size +prime :: Int32 +prime = 1500007 + +-- ----------------------------------------------------------------------------- +-- Parameters + +tABLE_MAX = 1024 * 1024 :: Int32 -- Maximum size of hash table +#if tABLE_MIN +#else +tABLE_MIN = 16 :: Int32 + +hLOAD = 4 :: Int32 -- Maximum average load of a single hash bucket + +hYSTERESIS = 0 :: Int32 -- entries to ignore in load computation +#endif + +{- Hysteresis favors long association-list-like behavior for small tables. -} + +-- ----------------------------------------------------------------------------- +-- Creating a new hash table + +-- | Creates a new hash table. The following property should hold for the @eq@ +-- and @hash@ functions passed to 'new': +-- +-- > eq A B => hash A == hash B +-- +new + :: (key -> key -> Bool) -- ^ @eq@: An equality comparison on keys + -> (key -> Int32) -- ^ @hash@: A hash function on keys + -> IO (HashTable key val) -- ^ Returns: an empty hash table + +new cmpr hash = do + -- make a new hash table with a single, empty, segment + let mask = tABLE_MIN-1 + bkts' <- newMutArray (0,mask) [] + bkts <- freezeArray bkts' + + let + kcnt = 0 + ht = HT { buckets=bkts, kcount=kcnt, bmask=mask, + hash_fn=hash, cmp=cmpr } + + table <- newIORef ht + return (HashTable table) + +-- ----------------------------------------------------------------------------- +-- Inserting a key\/value pair into the hash table + +-- | Inserts an key\/value mapping into the hash table. +-- +-- Note that 'insert' doesn't remove the old entry from the table - +-- the behaviour is like an association list, where 'lookup' returns +-- the most-recently-inserted mapping for a key in the table. The +-- reason for this is to keep 'insert' as efficient as possible. If +-- you need to update a mapping, then we provide 'update'. +-- +insert :: HashTable key val -> key -> val -> IO () + +insert (HashTable ref) key val = do + table@HT{ kcount=k, buckets=bkts, bmask=b } <- readIORef ref + let table1 = table{ kcount = k+1 } + indx = bucketIndex table key + bucket <- readHTArray bkts indx + bkts' <- thawArray bkts + writeMutArray bkts' indx ((key,val):bucket) + freezeArray bkts' + table2 <- + if tooBig k b + then expandHashTable table1 + else return table1 + writeIORef ref table2 + +tooBig :: Int32 -> Int32 -> Bool +tooBig k b = k-hYSTERESIS > hLOAD * b + +bucketIndex :: HT key val -> key -> Int32 +bucketIndex HT{ hash_fn=hash, bmask=mask } key = + let h = hash key + in (h .&. mask) + +expandHashTable :: HT key val -> IO (HT key val) +expandHashTable + table@HT{ buckets=bkts, bmask=mask } = do + let + oldsize = mask + 1 + newmask = mask + mask + 1 + newsize = newmask + 1 + -- + if newsize > tABLE_MAX + then return table + else do + -- + newbkts' <- newMutArray (0,newmask) [] + + let + table'=table{ bmask=newmask } + splitBucket oldindex = do + bucket <- readHTArray bkts oldindex + let (oldb,newb) = partition ((oldindex==).bucketIndex table' . fst) bucket + writeMutArray newbkts' oldindex oldb + writeMutArray newbkts' (oldindex + oldsize) newb + mapM_ splitBucket [0..mask] + + newbkts <- freezeArray newbkts' + + return ( table'{ buckets=newbkts } ) + +-- ----------------------------------------------------------------------------- +-- Deleting a mapping from the hash table + +-- Remove a key from a bucket +deleteBucket :: (key -> Bool) -> [(key,val)] -> (Int32, [(key, val)]) +deleteBucket _ [] = (0,[]) +deleteBucket del (pair@(k,_):bucket) = + case deleteBucket del bucket of + (dels, bucket') | del k -> dels' `seq` (dels', bucket') + | otherwise -> (dels, pair:bucket') + where dels' = dels + 1 + +-- | Remove an entry from the hash table. +delete :: HashTable key val -> key -> IO () + +delete (HashTable ref) key = do + table@HT{ buckets=bkts, kcount=kcnt, cmp=cmpr } <- readIORef ref + let indx = bucketIndex table key + bkts' <- thawArray bkts + bucket <- readMutArray bkts' indx + let (removed,bucket') = deleteBucket (cmpr key) bucket + writeMutArray bkts' indx bucket' + freezeArray bkts' + writeIORef ref ( table{kcount = kcnt - removed} ) + +-- ----------------------------------------------------------------------------- +-- Updating a mapping in the hash table + +-- | Updates an entry in the hash table, returning 'True' if there was +-- already an entry for this key, or 'False' otherwise. After 'update' +-- there will always be exactly one entry for the given key in the table. +-- +-- 'insert' is more efficient than 'update' if you don't care about +-- multiple entries, or you know for sure that multiple entries can't +-- occur. However, 'update' is more efficient than 'delete' followed +-- by 'insert'. +update :: HashTable key val -> key -> val -> IO Bool + +update (HashTable ref) key val = do + table@HT{ kcount=k, buckets=bkts, cmp=cmpr, bmask=b } <- readIORef ref + let indx = bucketIndex table key + bkts' <- thawArray bkts + bucket <- readMutArray bkts' indx + let (deleted,bucket') = deleteBucket (cmpr key) bucket + k' = k + 1 - deleted + table1 = table{ kcount=k' } + + writeMutArray bkts' indx ((key,val):bucket') + freezeArray bkts' + table2 <- + if tooBig k' b -- off by one from insert's resize heuristic. + then expandHashTable table1 + else return table1 + writeIORef ref table2 + return (deleted>0) + +-- ----------------------------------------------------------------------------- +-- Looking up an entry in the hash table + +-- | Looks up the value of a key in the hash table. +lookup :: HashTable key val -> key -> IO (Maybe val) + +lookup (HashTable ref) key = do + table@HT{ buckets=bkts, cmp=cmpr } <- readIORef ref + let indx = bucketIndex table key + bucket <- readHTArray bkts indx + case [ val | (key',val) <- bucket, cmpr key key' ] of + [] -> return Nothing + (v:_) -> return (Just v) + +-- ----------------------------------------------------------------------------- +-- Converting to/from lists + +-- | Convert a list of key\/value pairs into a hash table. Equality on keys +-- is taken from the Eq instance for the key type. +-- +fromList :: (Eq key) => (key -> Int32) -> [(key,val)] -> IO (HashTable key val) +fromList hash list = do + table <- new (==) hash + sequence_ [ insert table k v | (k,v) <- list ] + return table + +-- | Converts a hash table to a list of key\/value pairs. +-- +toList :: (Ord key, Ord val) => HashTable key val -> IO [(key,val)] +toList (HashTable ref) = do + HT{ buckets=bkts, bmask=b } <- readIORef ref + fmap concat (mapM (readHTArray bkts) [0..b]) + +-- ----------------------------------------------------------------------------- +-- Diagnostics + +-- | This function is useful for determining whether your hash function +-- is working well for your data set. It returns the longest chain +-- of key\/value pairs in the hash table for which all the keys hash to +-- the same bucket. If this chain is particularly long (say, longer +-- than 10 elements), then it might be a good idea to try a different +-- hash function. +-- +longestChain :: HashTable key val -> IO [(key,val)] +longestChain (HashTable ref) = do + HT{ buckets=bkts, bmask=b } <- readIORef ref + let lengthCmp (_:x)(_:y) = lengthCmp x y + lengthCmp [] [] = EQ + lengthCmp [] _ = LT + lengthCmp _ [] = GT + fmap (maximumBy lengthCmp) (mapM (readHTArray bkts) [0..b]) diff --git a/testsuite/tests/programs/maessen-hashtab/HashTest.hs b/testsuite/tests/programs/maessen-hashtab/HashTest.hs new file mode 100644 index 0000000000..51c60c0640 --- /dev/null +++ b/testsuite/tests/programs/maessen-hashtab/HashTest.hs @@ -0,0 +1,278 @@ +{- Test code for Data.HashTable -} + +module Main(main) where + +import Prelude hiding (lookup) +import qualified Prelude (lookup) +import Data.Maybe(isJust,isNothing) +import Data.Int(Int32) +import Test.QuickCheck +import System.IO.Unsafe(unsafePerformIO) +import Data.HashTab +import Control.Monad(liftM2, foldM) +import System.Random +import System.Environment + +infixr 0 ==. +infixr 0 ==~ +infixr 0 ~~ + +type HT = HashTable Int Int +newtype HashFun = HF {unHF :: (Int -> Int32)} +data Empty = E {e :: (IO HT), hfe :: HashFun} +data MkH = H {h :: (IO HT), hfh :: HashFun} +newtype List a = L [a] + +data Action = Lookup Int + | Insert Int Int + | Delete Int + | Update Int Int + deriving (Show) + +instance Arbitrary Action where + arbitrary = frequency [(10,fmap Lookup arbitrary), + (5, liftM2 Insert arbitrary arbitrary), + (3, liftM2 Update arbitrary arbitrary), + (1, fmap Delete arbitrary)] + coarbitrary = error "coarbitrary Action" + +simA :: [Action] -> [Either Bool [Int]] +simA = fst . foldl sim ([],[]) + where sim :: ([Either Bool [Int]], [Action]) -> Action -> + ([Either Bool [Int]], [Action]) + sim (res, past) (Lookup k) = (Right (lkup k past) : res, past) + sim (res, past) (Insert k v) = (res, Insert k v : past) + sim (res, past) (Delete k) = (res, Delete k : past) + sim (res, past) (Update k v) = + (Left (not (null l)) : res, Update k v : past) + where l = lkup k past + lkup _ [] = [] + lkup k (Delete k' : _) + | k==k' = [] + lkup k (Update k' v : _) + | k==k' = [v] + lkup k (Insert k' v : past) + | k==k' = v:lkup k past + lkup k (_ : past) = lkup k past + +runA :: HashFun -> [Action] -> IO [Either Bool (Maybe Int)] +runA hf acts = do + ht <- new (==) (unHF hf) + let run res (Lookup a) = fmap (lkup res) $ lookup ht a + run res (Insert k v) = insert ht k v >> return res + run res (Delete k) = delete ht k >> return res + run res (Update k v) = fmap (upd res) $ update ht k v + lkup res m = Right m : res + upd res b = Left b : res + foldM run [] acts + +(~~) :: IO [Either Bool (Maybe Int)] -> [Either Bool [Int]] -> Bool +acts ~~ sims = and $ zipWith same (unsafePerformIO acts) sims + where same (Left b) (Left b') = b==b' + same (Right Nothing) (Right []) = True + same (Right (Just a)) (Right xs) = a `elem` xs + same _ _ = False + +lookups :: HT -> [Int] -> IO [Maybe Int] +lookups ht ks = mapM (lookup ht) ks + +instance Show HashFun where + showsPrec _ (HF hf) r + | hf 1 == 0 = "degenerate"++r + | otherwise = "usual"++r + +instance Show Empty where + showsPrec _ ee r = shows (hfe ee) r + +instance Show MkH where + showsPrec _ hh r = shows (hfh hh) $ + ("; "++shows (unsafePerformIO (h hh >>= toList)) r) + +instance Show a => Show (List a) where + showsPrec _ (L l) r = shows l r + +instance Arbitrary HashFun where + arbitrary = frequency [(20,return (HF hashInt)), + (1,return (HF (const 0)))] + coarbitrary = error "coarbitrary HashFun" + +instance Arbitrary Empty where + arbitrary = fmap mkE arbitrary + where mkE (HF hf) = E {e = new (==) hf, hfe=HF hf} + coarbitrary = error "coarbitrary Empty" + +instance Arbitrary a => Arbitrary (List a) where + arbitrary = do + sz <- frequency [(50, sized return), + (1,return (4096*2)), + (0, return (1024*1024))] + resize sz $ fmap L $ sized vector + coarbitrary = error "coarbitrary (List a)" + +instance Arbitrary MkH where + arbitrary = do + hf <- arbitrary + L list <- arbitrary + let mkH act = H { h = act, hfh = hf } + return (mkH . fromList (unHF hf) $ list) + coarbitrary = error "coarbitrary MkH" + +(==~) :: (Eq a) => IO a -> IO a -> Bool +act1 ==~ act2 = unsafePerformIO act1 == unsafePerformIO act2 + +(==.) :: (Eq a) => IO a -> a -> Bool +act ==. v = unsafePerformIO act == v + +notin :: (Testable a) => Int -> MkH -> a -> Property +k `notin` hh = \prop -> + let f = (not . isJust . unsafePerformIO) (h hh >>= flip lookup k) in + f `trivial` prop + +prop_emptyLookup :: Empty -> Int -> Bool +prop_emptyLookup ee k = + isNothing . unsafePerformIO $ + (do mt <- e ee + lookup mt k) + +prop_emptyToList :: Empty -> Bool +prop_emptyToList ee = + (do mt <- e ee + toList mt) ==. [] + +prop_emptyFromList :: HashFun -> Int -> Bool +prop_emptyFromList hf k = + (do mt <- new (==) (unHF hf) :: IO HT + lookup mt k) ==~ + (do mt <- fromList (unHF hf) [] + lookup mt k) + +prop_insert :: MkH -> Int -> Int -> Bool +prop_insert hh k v = + (do ht <- h hh + insert ht k v + lookup ht k) ==. Just v + +prop_insertu :: MkH -> Int -> Int -> List Int -> Bool +prop_insertu hh k v (L ks) = + let ks' = filter (k /=) ks in + (do ht <- h hh + insert ht k v + lookups ht ks') ==~ + (do ht <- h hh + lookups ht ks') + +prop_delete :: MkH -> Int -> Property +prop_delete hh k = + k `notin` hh $ + isNothing . unsafePerformIO $ + (do ht <- h hh + delete ht k + lookup ht k) + +prop_deleteu :: MkH -> Int -> List Int -> Bool +prop_deleteu hh k (L ks) = + let ks' = filter (k /=) ks in + (do ht <- h hh + delete ht k + lookups ht ks') ==~ + (do ht <- h hh + lookups ht ks') + +naiveUpdate :: HT -> Int -> Int -> IO () +naiveUpdate ht k v = do + delete ht k + insert ht k v + +prop_update :: MkH -> Int -> Int -> List Int -> Bool +prop_update hh k v (L ks) = + (do ht <- h hh + _ <- update ht k v + lookups ht ks) ==~ + (do ht <- h hh + naiveUpdate ht k v + lookups ht ks) + +prop_updatec :: MkH -> Int -> Int -> Bool +prop_updatec hh k v = + (do ht <- h hh + _ <- update ht k v + lookup ht k) ==. Just v + +prop_updateLookup :: MkH -> Int -> Int -> Property +prop_updateLookup hh k v = + k `notin` hh $ + (do ht <- h hh + update ht k v) ==~ + (do ht <- h hh + fmap isJust (lookup ht k)) + +prop_simulation :: HashFun -> List Action -> Property +prop_simulation hf (L acts) = + (null acts `trivial`) $ + runA hf acts ~~ simA acts + +{- + +For "fromList" and "toList" properties we're a bit sloppy: we perform +multiple insertions for a key (potentially) but give nor promises +about which one we will retrieve with lookup, or what order they'll be +returned by toList (or if they'll all be returned at all). Thus we +insert all occurrences of a key with the same value, and do all +checking via lookups. + +-} + +prop_fromList :: HashFun -> List Int -> List Int -> Property +prop_fromList hf (L l) (L ks) = + null l `trivial` + let assocs = map (\t -> (t,t)) l in + ( do ht <- fromList (unHF hf) assocs + lookups ht ks) ==. (map (`Prelude.lookup` assocs) ks) + +prop_fromListInsert :: HashFun -> List (Int,Int) -> Int -> Int -> List Int -> Property +prop_fromListInsert hf (L l) k v (L ks) = + null l `trivial` + (( do ht <- fromList (unHF hf) l + insert ht k v + lookups ht ks) ==~ + ( do ht <- fromList (unHF hf) (l++[(k,v)]) + lookups ht ks)) + +prop_toList :: HashFun -> List Int -> List Int -> Property +prop_toList hf (L l) (L ks) = + null l `trivial` + let assocs = map (\t -> (t,t)) l in + ( do ht <- fromList (unHF hf) assocs + lookups ht ks) ==~ + ( do ht <- fromList (unHF hf) assocs + fmap (\as -> map (`Prelude.lookup` as) ks) $ toList ht ) + +te :: (Testable a) => String -> a -> IO () +-- te name prop = putStrLn name >> verboseCheck prop +te name prop = do + putStr name + check (defaultConfig{configMaxTest = 500, + configMaxFail = 10000, + configEvery = \_ _ -> "" }) prop + +main :: IO () +main = do + [r] <- getArgs + setStdGen (mkStdGen (read r :: Int)) + sequence_ $ + [ te "emptyLookup:" prop_emptyLookup, + te "emptyToList:" prop_emptyToList, + te "emptyFromList:" prop_emptyFromList, + te "insert:" prop_insert, + te "insertu:" prop_insertu, + te "delete:" prop_delete, + te "deleteu:" prop_deleteu, + te "update:" prop_update, + te "updatec:" prop_updatec, + te "updateLookup:" prop_updateLookup, + te "fromList:" prop_fromList, + te "fromListInsert:" prop_fromListInsert, + te "toList:" prop_toList, + te "simulation:" prop_simulation + ] + putStrLn "OK" diff --git a/testsuite/tests/programs/maessen-hashtab/Makefile b/testsuite/tests/programs/maessen-hashtab/Makefile new file mode 100644 index 0000000000..9101fbd40a --- /dev/null +++ b/testsuite/tests/programs/maessen-hashtab/Makefile @@ -0,0 +1,3 @@ +TOP=../../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk diff --git a/testsuite/tests/programs/maessen-hashtab/maessen_hashtab.stdout b/testsuite/tests/programs/maessen-hashtab/maessen_hashtab.stdout new file mode 100644 index 0000000000..b667dcaae0 --- /dev/null +++ b/testsuite/tests/programs/maessen-hashtab/maessen_hashtab.stdout @@ -0,0 +1,15 @@ +emptyLookup:OK, passed 500 tests. +emptyToList:OK, passed 500 tests. +emptyFromList:OK, passed 500 tests. +insert:OK, passed 500 tests. +insertu:OK, passed 500 tests. +delete:OK, passed 500 tests (60% trivial). +deleteu:OK, passed 500 tests. +update:OK, passed 500 tests. +updatec:OK, passed 500 tests. +updateLookup:OK, passed 500 tests (63% trivial). +fromList:OK, passed 500 tests (2% trivial). +fromListInsert:OK, passed 500 tests (2% trivial). +toList:OK, passed 500 tests (1% trivial). +simulation:OK, passed 500 tests (1% trivial). +OK diff --git a/testsuite/tests/programs/maessen-hashtab/test.T b/testsuite/tests/programs/maessen-hashtab/test.T new file mode 100644 index 0000000000..807cb85e81 --- /dev/null +++ b/testsuite/tests/programs/maessen-hashtab/test.T @@ -0,0 +1,11 @@ + +test('maessen_hashtab', + [reqlib('QuickCheck'), + extra_clean(['HashTest.hi', 'HashTest.o', + 'Data/HashTab.hi', 'Data/HashTab.o']), + skip_if_fast, +# this test runs out of time when not optimised: + omit_ways(['normal','ghci','threaded1']), + extra_run_opts('99999')], + multimod_compile_and_run, ['HashTest', '-cpp']) + |