summaryrefslogtreecommitdiff
path: root/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs
diff options
context:
space:
mode:
Diffstat (limited to 'testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs')
-rw-r--r--testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs339
1 files changed, 339 insertions, 0 deletions
diff --git a/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs b/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs
new file mode 100644
index 0000000000..8251a760c8
--- /dev/null
+++ b/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs
@@ -0,0 +1,339 @@
+-----------------------------------------------------------------------------
+-- |
+-- Module : Data.HashTable
+-- Copyright : (c) The University of Glasgow 2003
+-- License : BSD-style (see the file libraries/base/LICENSE)
+--
+-- Maintainer : libraries@haskell.org
+-- Stability : provisional
+-- Portability : portable
+--
+-- An implementation of extensible hash tables, as described in
+-- Per-Ake Larson, /Dynamic Hash Tables/, CACM 31(4), April 1988,
+-- pp. 446--457. The implementation is also derived from the one
+-- in GHC's runtime system (@ghc\/rts\/Hash.{c,h}@).
+--
+-----------------------------------------------------------------------------
+
+module Data.HashTab (
+ -- * Basic hash table operations
+ HashTable, new, insert, delete, lookup, update,
+ -- * Converting to and from lists
+ fromList, toList,
+ -- * Hash functions
+ -- $hash_functions
+ hashInt, hashString,
+ prime,
+ -- * Diagnostics
+ longestChain
+ ) where
+
+-- This module is imported by Data.Typeable, which is pretty low down in the
+-- module hierarchy, so don't import "high-level" modules
+
+-- Right now we import high-level modules with gay abandon.
+import Prelude hiding ( lookup )
+import Data.Tuple ( fst )
+import Data.Bits
+import Data.Maybe
+import Data.List ( maximumBy, partition, concat, foldl )
+import Data.Int ( Int32 )
+
+import Data.Array.Base
+import Data.Array hiding (bounds)
+import Data.Array.IO
+
+import Data.Char ( ord )
+import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
+import Control.Monad ( mapM, sequence_ )
+
+
+-----------------------------------------------------------------------
+
+readHTArray :: HTArray a -> Int32 -> IO a
+readMutArray :: MutArray a -> Int32 -> IO a
+writeMutArray :: MutArray a -> Int32 -> a -> IO ()
+freezeArray :: MutArray a -> IO (HTArray a)
+thawArray :: HTArray a -> IO (MutArray a)
+newMutArray :: (Int32, Int32) -> a -> IO (MutArray a)
+#if defined(DEBUG) || defined(__NHC__)
+type MutArray a = IOArray Int32 a
+type HTArray a = MutArray a
+newMutArray = newArray
+readHTArray = readArray
+readMutArray = readArray
+writeMutArray = writeArray
+freezeArray = return
+thawArray = return
+#else
+type MutArray a = IOArray Int32 a
+type HTArray a = Array Int32 a
+newMutArray = newArray
+readHTArray arr i = return $! (unsafeAt arr (fromIntegral i))
+readMutArray arr i = unsafeRead arr (fromIntegral i)
+writeMutArray arr i x = unsafeWrite arr (fromIntegral i) x
+freezeArray = unsafeFreeze
+thawArray = unsafeThaw
+#endif
+
+newtype HashTable key val = HashTable (IORef (HT key val))
+-- TODO: the IORef should really be an MVar.
+
+data HT key val
+ = HT {
+ kcount :: !Int32, -- Total number of keys.
+ buckets :: !(HTArray [(key,val)]),
+ bmask :: !Int32,
+ hash_fn :: key -> Int32,
+ cmp :: key -> key -> Bool
+ }
+
+-- -----------------------------------------------------------------------------
+-- Sample hash functions
+
+-- $hash_functions
+--
+-- This implementation of hash tables uses the low-order /n/ bits of the hash
+-- value for a key, where /n/ varies as the hash table grows. A good hash
+-- function therefore will give an even distribution regardless of /n/.
+--
+-- If your keyspace is integrals such that the low-order bits between
+-- keys are highly variable, then you could get away with using 'id'
+-- as the hash function.
+--
+-- We provide some sample hash functions for 'Int' and 'String' below.
+
+-- | A sample hash function for 'Int', implemented as simply @(x `mod` P)@
+-- where P is a suitable prime (currently 1500007). Should give
+-- reasonable results for most distributions of 'Int' values, except
+-- when the keys are all multiples of the prime!
+--
+hashInt :: Int -> Int32
+hashInt = (`rem` prime) . fromIntegral
+
+-- | A sample hash function for 'String's. The implementation is:
+--
+-- > hashString = fromIntegral . foldr f 0
+-- > where f c m = ord c + (m * 128) `rem` 1500007
+--
+-- which seems to give reasonable results.
+--
+hashString :: String -> Int32
+hashString = fromIntegral . foldl f 0
+ where f m c = ord c + (m * 128) `rem` fromIntegral prime
+
+-- | A prime larger than the maximum hash table size
+prime :: Int32
+prime = 1500007
+
+-- -----------------------------------------------------------------------------
+-- Parameters
+
+tABLE_MAX = 1024 * 1024 :: Int32 -- Maximum size of hash table
+#if tABLE_MIN
+#else
+tABLE_MIN = 16 :: Int32
+
+hLOAD = 4 :: Int32 -- Maximum average load of a single hash bucket
+
+hYSTERESIS = 0 :: Int32 -- entries to ignore in load computation
+#endif
+
+{- Hysteresis favors long association-list-like behavior for small tables. -}
+
+-- -----------------------------------------------------------------------------
+-- Creating a new hash table
+
+-- | Creates a new hash table. The following property should hold for the @eq@
+-- and @hash@ functions passed to 'new':
+--
+-- > eq A B => hash A == hash B
+--
+new
+ :: (key -> key -> Bool) -- ^ @eq@: An equality comparison on keys
+ -> (key -> Int32) -- ^ @hash@: A hash function on keys
+ -> IO (HashTable key val) -- ^ Returns: an empty hash table
+
+new cmpr hash = do
+ -- make a new hash table with a single, empty, segment
+ let mask = tABLE_MIN-1
+ bkts' <- newMutArray (0,mask) []
+ bkts <- freezeArray bkts'
+
+ let
+ kcnt = 0
+ ht = HT { buckets=bkts, kcount=kcnt, bmask=mask,
+ hash_fn=hash, cmp=cmpr }
+
+ table <- newIORef ht
+ return (HashTable table)
+
+-- -----------------------------------------------------------------------------
+-- Inserting a key\/value pair into the hash table
+
+-- | Inserts an key\/value mapping into the hash table.
+--
+-- Note that 'insert' doesn't remove the old entry from the table -
+-- the behaviour is like an association list, where 'lookup' returns
+-- the most-recently-inserted mapping for a key in the table. The
+-- reason for this is to keep 'insert' as efficient as possible. If
+-- you need to update a mapping, then we provide 'update'.
+--
+insert :: HashTable key val -> key -> val -> IO ()
+
+insert (HashTable ref) key val = do
+ table@HT{ kcount=k, buckets=bkts, bmask=b } <- readIORef ref
+ let table1 = table{ kcount = k+1 }
+ indx = bucketIndex table key
+ bucket <- readHTArray bkts indx
+ bkts' <- thawArray bkts
+ writeMutArray bkts' indx ((key,val):bucket)
+ freezeArray bkts'
+ table2 <-
+ if tooBig k b
+ then expandHashTable table1
+ else return table1
+ writeIORef ref table2
+
+tooBig :: Int32 -> Int32 -> Bool
+tooBig k b = k-hYSTERESIS > hLOAD * b
+
+bucketIndex :: HT key val -> key -> Int32
+bucketIndex HT{ hash_fn=hash, bmask=mask } key =
+ let h = hash key
+ in (h .&. mask)
+
+expandHashTable :: HT key val -> IO (HT key val)
+expandHashTable
+ table@HT{ buckets=bkts, bmask=mask } = do
+ let
+ oldsize = mask + 1
+ newmask = mask + mask + 1
+ newsize = newmask + 1
+ --
+ if newsize > tABLE_MAX
+ then return table
+ else do
+ --
+ newbkts' <- newMutArray (0,newmask) []
+
+ let
+ table'=table{ bmask=newmask }
+ splitBucket oldindex = do
+ bucket <- readHTArray bkts oldindex
+ let (oldb,newb) = partition ((oldindex==).bucketIndex table' . fst) bucket
+ writeMutArray newbkts' oldindex oldb
+ writeMutArray newbkts' (oldindex + oldsize) newb
+ mapM_ splitBucket [0..mask]
+
+ newbkts <- freezeArray newbkts'
+
+ return ( table'{ buckets=newbkts } )
+
+-- -----------------------------------------------------------------------------
+-- Deleting a mapping from the hash table
+
+-- Remove a key from a bucket
+deleteBucket :: (key -> Bool) -> [(key,val)] -> (Int32, [(key, val)])
+deleteBucket _ [] = (0,[])
+deleteBucket del (pair@(k,_):bucket) =
+ case deleteBucket del bucket of
+ (dels, bucket') | del k -> dels' `seq` (dels', bucket')
+ | otherwise -> (dels, pair:bucket')
+ where dels' = dels + 1
+
+-- | Remove an entry from the hash table.
+delete :: HashTable key val -> key -> IO ()
+
+delete (HashTable ref) key = do
+ table@HT{ buckets=bkts, kcount=kcnt, cmp=cmpr } <- readIORef ref
+ let indx = bucketIndex table key
+ bkts' <- thawArray bkts
+ bucket <- readMutArray bkts' indx
+ let (removed,bucket') = deleteBucket (cmpr key) bucket
+ writeMutArray bkts' indx bucket'
+ freezeArray bkts'
+ writeIORef ref ( table{kcount = kcnt - removed} )
+
+-- -----------------------------------------------------------------------------
+-- Updating a mapping in the hash table
+
+-- | Updates an entry in the hash table, returning 'True' if there was
+-- already an entry for this key, or 'False' otherwise. After 'update'
+-- there will always be exactly one entry for the given key in the table.
+--
+-- 'insert' is more efficient than 'update' if you don't care about
+-- multiple entries, or you know for sure that multiple entries can't
+-- occur. However, 'update' is more efficient than 'delete' followed
+-- by 'insert'.
+update :: HashTable key val -> key -> val -> IO Bool
+
+update (HashTable ref) key val = do
+ table@HT{ kcount=k, buckets=bkts, cmp=cmpr, bmask=b } <- readIORef ref
+ let indx = bucketIndex table key
+ bkts' <- thawArray bkts
+ bucket <- readMutArray bkts' indx
+ let (deleted,bucket') = deleteBucket (cmpr key) bucket
+ k' = k + 1 - deleted
+ table1 = table{ kcount=k' }
+
+ writeMutArray bkts' indx ((key,val):bucket')
+ freezeArray bkts'
+ table2 <-
+ if tooBig k' b -- off by one from insert's resize heuristic.
+ then expandHashTable table1
+ else return table1
+ writeIORef ref table2
+ return (deleted>0)
+
+-- -----------------------------------------------------------------------------
+-- Looking up an entry in the hash table
+
+-- | Looks up the value of a key in the hash table.
+lookup :: HashTable key val -> key -> IO (Maybe val)
+
+lookup (HashTable ref) key = do
+ table@HT{ buckets=bkts, cmp=cmpr } <- readIORef ref
+ let indx = bucketIndex table key
+ bucket <- readHTArray bkts indx
+ case [ val | (key',val) <- bucket, cmpr key key' ] of
+ [] -> return Nothing
+ (v:_) -> return (Just v)
+
+-- -----------------------------------------------------------------------------
+-- Converting to/from lists
+
+-- | Convert a list of key\/value pairs into a hash table. Equality on keys
+-- is taken from the Eq instance for the key type.
+--
+fromList :: (Eq key) => (key -> Int32) -> [(key,val)] -> IO (HashTable key val)
+fromList hash list = do
+ table <- new (==) hash
+ sequence_ [ insert table k v | (k,v) <- list ]
+ return table
+
+-- | Converts a hash table to a list of key\/value pairs.
+--
+toList :: (Ord key, Ord val) => HashTable key val -> IO [(key,val)]
+toList (HashTable ref) = do
+ HT{ buckets=bkts, bmask=b } <- readIORef ref
+ fmap concat (mapM (readHTArray bkts) [0..b])
+
+-- -----------------------------------------------------------------------------
+-- Diagnostics
+
+-- | This function is useful for determining whether your hash function
+-- is working well for your data set. It returns the longest chain
+-- of key\/value pairs in the hash table for which all the keys hash to
+-- the same bucket. If this chain is particularly long (say, longer
+-- than 10 elements), then it might be a good idea to try a different
+-- hash function.
+--
+longestChain :: HashTable key val -> IO [(key,val)]
+longestChain (HashTable ref) = do
+ HT{ buckets=bkts, bmask=b } <- readIORef ref
+ let lengthCmp (_:x)(_:y) = lengthCmp x y
+ lengthCmp [] [] = EQ
+ lengthCmp [] _ = LT
+ lengthCmp _ [] = GT
+ fmap (maximumBy lengthCmp) (mapM (readHTArray bkts) [0..b])