1 files changed, 339 insertions, 0 deletions
diff --git a/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs b/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs
new file mode 100644
index 0000000000..8251a760c8
--- /dev/null
+++ b/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs
@@ -0,0 +1,339 @@
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.HashTable
+-- Copyright   :  (c) The University of Glasgow 2003
+-- License     :  BSD-style (see the file libraries/base/LICENSE)
+--
+-- Maintainer  :  libraries@haskell.org
+-- Stability   :  provisional
+-- Portability :  portable
+--
+-- An implementation of extensible hash tables, as described in
+-- Per-Ake Larson, /Dynamic Hash Tables/, CACM 31(4), April 1988,
+-- pp. 446--457.  The implementation is also derived from the one
+-- in GHC's runtime system (@ghc\/rts\/Hash.{c,h}@).
+--
+-----------------------------------------------------------------------------
+
+module Data.HashTab (
+	-- * Basic hash table operations
+	HashTable, new, insert, delete, lookup, update,
+	-- * Converting to and from lists
+	fromList, toList,
+	-- * Hash functions
+	-- $hash_functions
+	hashInt, hashString,
+	prime,
+	-- * Diagnostics
+	longestChain
+ ) where
+
+-- This module is imported by Data.Typeable, which is pretty low down in the
+-- module hierarchy, so don't import "high-level" modules
+
+-- Right now we import high-level modules with gay abandon.
+import Prelude	hiding	( lookup )
+import Data.Tuple	( fst )
+import Data.Bits
+import Data.Maybe
+import Data.List	( maximumBy, partition, concat, foldl )
+import Data.Int		( Int32 )
+
+import Data.Array.Base
+import Data.Array       hiding (bounds)
+import Data.Array.IO
+
+import Data.Char	( ord )
+import Data.IORef	( IORef, newIORef, readIORef, writeIORef )
+import Control.Monad	( mapM, sequence_ )
+
+
+-----------------------------------------------------------------------
+
+readHTArray  :: HTArray a -> Int32 -> IO a
+readMutArray  :: MutArray a -> Int32 -> IO a
+writeMutArray :: MutArray a -> Int32 -> a -> IO ()
+freezeArray  :: MutArray a -> IO (HTArray a)
+thawArray    :: HTArray a -> IO (MutArray a)
+newMutArray   :: (Int32, Int32) -> a -> IO (MutArray a)
+#if defined(DEBUG) || defined(__NHC__)
+type MutArray a = IOArray Int32 a
+type HTArray a = MutArray a
+newMutArray = newArray
+readHTArray  = readArray
+readMutArray = readArray
+writeMutArray = writeArray
+freezeArray = return
+thawArray = return
+#else
+type MutArray a = IOArray Int32 a
+type HTArray a = Array Int32 a
+newMutArray = newArray
+readHTArray arr i = return $! (unsafeAt arr (fromIntegral i))
+readMutArray arr i = unsafeRead arr (fromIntegral i)
+writeMutArray arr i x = unsafeWrite arr (fromIntegral i) x
+freezeArray = unsafeFreeze
+thawArray = unsafeThaw
+#endif
+
+newtype HashTable key val = HashTable (IORef (HT key val))
+-- TODO: the IORef should really be an MVar.
+
+data HT key val
+  = HT {
+	kcount  :: !Int32,              -- Total number of keys.
+	buckets :: !(HTArray [(key,val)]),
+        bmask   :: !Int32,
+	hash_fn :: key -> Int32,
+	cmp     :: key -> key -> Bool
+   }
+
+-- -----------------------------------------------------------------------------
+-- Sample hash functions
+
+-- $hash_functions
+--
+-- This implementation of hash tables uses the low-order /n/ bits of the hash
+-- value for a key, where /n/ varies as the hash table grows.  A good hash
+-- function therefore will give an even distribution regardless of /n/.
+--
+-- If your keyspace is integrals such that the low-order bits between
+-- keys are highly variable, then you could get away with using 'id'
+-- as the hash function.
+--
+-- We provide some sample hash functions for 'Int' and 'String' below.
+
+-- | A sample hash function for 'Int', implemented as simply @(x `mod` P)@
+-- where P is a suitable prime (currently 1500007).  Should give
+-- reasonable results for most distributions of 'Int' values, except
+-- when the keys are all multiples of the prime!
+--
+hashInt :: Int -> Int32
+hashInt = (`rem` prime) . fromIntegral
+
+-- | A sample hash function for 'String's.  The implementation is:
+--
+-- >    hashString = fromIntegral . foldr f 0
+-- >      where f c m = ord c + (m * 128) `rem` 1500007
+--
+-- which seems to give reasonable results.
+--
+hashString :: String -> Int32
+hashString = fromIntegral . foldl f 0
+  where f m c = ord c + (m * 128) `rem` fromIntegral prime
+
+-- | A prime larger than the maximum hash table size
+prime :: Int32
+prime = 1500007
+
+-- -----------------------------------------------------------------------------
+-- Parameters
+
+tABLE_MAX  = 1024 * 1024 :: Int32  -- Maximum size of hash table
+#if tABLE_MIN
+#else
+tABLE_MIN  = 16 :: Int32
+
+hLOAD = 4 :: Int32 -- Maximum average load of a single hash bucket
+
+hYSTERESIS = 0 :: Int32 -- entries to ignore in load computation
+#endif
+
+{- Hysteresis favors long association-list-like behavior for small tables. -}
+
+-- -----------------------------------------------------------------------------
+-- Creating a new hash table
+
+-- | Creates a new hash table.  The following property should hold for the @eq@
+-- and @hash@ functions passed to 'new':
+--
+-- >   eq A B  =>  hash A == hash B
+--
+new
+  :: (key -> key -> Bool)    -- ^ @eq@: An equality comparison on keys
+  -> (key -> Int32)	     -- ^ @hash@: A hash function on keys
+  -> IO (HashTable key val)  -- ^ Returns: an empty hash table
+
+new cmpr hash = do
+  -- make a new hash table with a single, empty, segment
+  let mask = tABLE_MIN-1
+  bkts'  <- newMutArray (0,mask) []
+  bkts   <- freezeArray bkts'
+
+  let
+    kcnt = 0
+    ht = HT {  buckets=bkts, kcount=kcnt, bmask=mask,
+               hash_fn=hash, cmp=cmpr }
+
+  table <- newIORef ht
+  return (HashTable table)
+
+-- -----------------------------------------------------------------------------
+-- Inserting a key\/value pair into the hash table
+
+-- | Inserts an key\/value mapping into the hash table.
+--
+-- Note that 'insert' doesn't remove the old entry from the table -
+-- the behaviour is like an association list, where 'lookup' returns
+-- the most-recently-inserted mapping for a key in the table.  The
+-- reason for this is to keep 'insert' as efficient as possible.  If
+-- you need to update a mapping, then we provide 'update'.
+--
+insert :: HashTable key val -> key -> val -> IO ()
+
+insert (HashTable ref) key val = do
+  table@HT{ kcount=k, buckets=bkts, bmask=b } <- readIORef ref
+  let table1 = table{ kcount = k+1 }
+      indx = bucketIndex table key
+  bucket <- readHTArray bkts indx
+  bkts' <- thawArray bkts
+  writeMutArray bkts' indx ((key,val):bucket)
+  freezeArray bkts'
+  table2 <-
+	if tooBig k b
+	   then expandHashTable table1
+	   else return table1
+  writeIORef ref table2
+
+tooBig :: Int32 -> Int32 -> Bool
+tooBig k b = k-hYSTERESIS > hLOAD * b
+
+bucketIndex :: HT key val -> key -> Int32
+bucketIndex HT{ hash_fn=hash, bmask=mask } key =
+  let h = hash key
+  in  (h .&. mask)
+
+expandHashTable :: HT key val -> IO (HT key val)
+expandHashTable
+      table@HT{ buckets=bkts, bmask=mask } = do
+   let
+      oldsize = mask + 1
+      newmask = mask + mask + 1
+      newsize = newmask + 1
+   --
+   if newsize > tABLE_MAX
+      then return table
+      else do
+   --
+   newbkts' <- newMutArray (0,newmask) []
+
+   let
+    table'=table{ bmask=newmask }
+    splitBucket oldindex = do
+      bucket <- readHTArray bkts oldindex
+      let (oldb,newb) = partition ((oldindex==).bucketIndex table' . fst) bucket
+      writeMutArray newbkts' oldindex oldb
+      writeMutArray newbkts' (oldindex + oldsize) newb
+   mapM_ splitBucket [0..mask]
+
+   newbkts <- freezeArray newbkts'
+
+   return ( table'{ buckets=newbkts } )
+
+-- -----------------------------------------------------------------------------
+-- Deleting a mapping from the hash table
+
+-- Remove a key from a bucket
+deleteBucket :: (key -> Bool) -> [(key,val)] -> (Int32, [(key, val)])
+deleteBucket _   [] = (0,[])
+deleteBucket del (pair@(k,_):bucket) =
+  case deleteBucket del bucket of
+    (dels, bucket') | del k     -> dels' `seq` (dels', bucket')
+                    | otherwise -> (dels, pair:bucket')
+      where dels' = dels + 1
+
+-- | Remove an entry from the hash table.
+delete :: HashTable key val -> key -> IO ()
+
+delete (HashTable ref) key = do
+  table@HT{ buckets=bkts, kcount=kcnt, cmp=cmpr } <- readIORef ref
+  let indx = bucketIndex table key
+  bkts' <- thawArray bkts
+  bucket <- readMutArray bkts' indx
+  let (removed,bucket') = deleteBucket (cmpr key) bucket
+  writeMutArray bkts' indx bucket'
+  freezeArray bkts'
+  writeIORef ref ( table{kcount = kcnt - removed} )
+
+-- -----------------------------------------------------------------------------
+-- Updating a mapping in the hash table
+
+-- | Updates an entry in the hash table, returning 'True' if there was
+-- already an entry for this key, or 'False' otherwise.  After 'update'
+-- there will always be exactly one entry for the given key in the table.
+--
+-- 'insert' is more efficient than 'update' if you don't care about
+-- multiple entries, or you know for sure that multiple entries can't
+-- occur.  However, 'update' is more efficient than 'delete' followed
+-- by 'insert'.
+update :: HashTable key val -> key -> val -> IO Bool
+
+update (HashTable ref) key val = do
+  table@HT{ kcount=k, buckets=bkts, cmp=cmpr, bmask=b } <- readIORef ref
+  let indx = bucketIndex table key
+  bkts' <- thawArray bkts
+  bucket <- readMutArray bkts' indx
+  let (deleted,bucket') = deleteBucket (cmpr key) bucket
+      k' = k + 1 - deleted
+      table1 = table{ kcount=k' }
+
+  writeMutArray bkts' indx ((key,val):bucket')
+  freezeArray bkts'
+  table2 <-
+	if tooBig k' b          -- off by one from insert's resize heuristic.
+	   then expandHashTable table1
+	   else return table1
+  writeIORef ref table2
+  return (deleted>0)
+
+-- -----------------------------------------------------------------------------
+-- Looking up an entry in the hash table
+
+-- | Looks up the value of a key in the hash table.
+lookup :: HashTable key val -> key -> IO (Maybe val)
+
+lookup (HashTable ref) key = do
+  table@HT{ buckets=bkts, cmp=cmpr } <- readIORef ref
+  let indx = bucketIndex table key
+  bucket <- readHTArray bkts indx
+  case [ val | (key',val) <- bucket, cmpr key key' ] of
+	[] -> return Nothing
+	(v:_) -> return (Just v)
+
+-- -----------------------------------------------------------------------------
+-- Converting to/from lists
+
+-- | Convert a list of key\/value pairs into a hash table.  Equality on keys
+-- is taken from the Eq instance for the key type.
+--
+fromList :: (Eq key) => (key -> Int32) -> [(key,val)] -> IO (HashTable key val)
+fromList hash list = do
+  table <- new (==) hash
+  sequence_ [ insert table k v | (k,v) <- list ]
+  return table
+
+-- | Converts a hash table to a list of key\/value pairs.
+--
+toList :: (Ord key, Ord val) => HashTable key val -> IO [(key,val)]
+toList (HashTable ref) = do
+  HT{ buckets=bkts, bmask=b } <- readIORef ref
+  fmap concat (mapM (readHTArray bkts) [0..b])
+
+-- -----------------------------------------------------------------------------
+-- Diagnostics
+
+-- | This function is useful for determining whether your hash function
+-- is working well for your data set.  It returns the longest chain
+-- of key\/value pairs in the hash table for which all the keys hash to
+-- the same bucket.  If this chain is particularly long (say, longer
+-- than 10 elements), then it might be a good idea to try a different
+-- hash function.
+--
+longestChain :: HashTable key val -> IO [(key,val)]
+longestChain (HashTable ref) = do
+  HT{ buckets=bkts, bmask=b } <- readIORef ref
+  let lengthCmp (_:x)(_:y) = lengthCmp x y
+      lengthCmp []   []    = EQ
+      lengthCmp []   _     = LT
+      lengthCmp _    []    = GT
+  fmap (maximumBy lengthCmp) (mapM (readHTArray bkts) [0..b])