diff options
Diffstat (limited to 'testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs')
-rw-r--r-- | testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs | 339 |
1 files changed, 0 insertions, 339 deletions
diff --git a/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs b/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs deleted file mode 100644 index 85b3046db9..0000000000 --- a/testsuite/tests/programs/maessen-hashtab/Data/HashTab.hs +++ /dev/null @@ -1,339 +0,0 @@ ------------------------------------------------------------------------------ --- | --- Module : Data.HashTable --- Copyright : (c) The University of Glasgow 2003 --- License : BSD-style (see the file libraries/base/LICENSE) --- --- Maintainer : libraries@haskell.org --- Stability : provisional --- Portability : portable --- --- An implementation of extensible hash tables, as described in --- Per-Ake Larson, /Dynamic Hash Tables/, CACM 31(4), April 1988, --- pp. 446--457. The implementation is also derived from the one --- in GHC's runtime system (@ghc\/rts\/Hash.{c,h}@). --- ------------------------------------------------------------------------------ - -module Data.HashTab ( - -- * Basic hash table operations - HashTable, new, insert, delete, lookup, update, - -- * Converting to and from lists - fromList, toList, - -- * Hash functions - -- $hash_functions - hashInt, hashString, - prime, - -- * Diagnostics - longestChain - ) where - --- This module is imported by Data.Typeable, which is pretty low down in the --- module hierarchy, so don't import "high-level" modules - --- Right now we import high-level modules with gay abandon. -import Prelude hiding ( lookup ) -import Data.Tuple ( fst ) -import Data.Bits -import Data.Maybe -import Data.List ( maximumBy, partition, concat, foldl ) -import Data.Int ( Int32 ) - -import Data.Array.Base -import Data.Array hiding (bounds) -import Data.Array.IO - -import Data.Char ( ord ) -import Data.IORef ( IORef, newIORef, readIORef, writeIORef ) -import Control.Monad ( mapM, sequence_ ) - - ------------------------------------------------------------------------ - -readHTArray :: HTArray a -> Int32 -> IO a -readMutArray :: MutArray a -> Int32 -> IO a -writeMutArray :: MutArray a -> Int32 -> a -> IO () -freezeArray :: MutArray a -> IO (HTArray a) -thawArray :: HTArray a -> IO (MutArray a) -newMutArray :: (Int32, Int32) -> a -> IO (MutArray a) -#if defined(DEBUG) || defined(__NHC__) -type MutArray a = IOArray Int32 a -type HTArray a = MutArray a -newMutArray = newArray -readHTArray = readArray -readMutArray = readArray -writeMutArray = writeArray -freezeArray = return -thawArray = return -#else -type MutArray a = IOArray Int32 a -type HTArray a = Array Int32 a -newMutArray = newArray -readHTArray arr i = return $! (unsafeAt arr (fromIntegral i)) -readMutArray arr i = unsafeRead arr (fromIntegral i) -writeMutArray arr i x = unsafeWrite arr (fromIntegral i) x -freezeArray = unsafeFreeze -thawArray = unsafeThaw -#endif - -newtype HashTable key val = HashTable (IORef (HT key val)) --- TODO: the IORef should really be an MVar. - -data HT key val - = HT { - kcount :: !Int32, -- Total number of keys. - buckets :: !(HTArray [(key,val)]), - bmask :: !Int32, - hash_fn :: key -> Int32, - cmp :: key -> key -> Bool - } - --- ----------------------------------------------------------------------------- --- Sample hash functions - --- $hash_functions --- --- This implementation of hash tables uses the low-order /n/ bits of the hash --- value for a key, where /n/ varies as the hash table grows. A good hash --- function therefore will give an even distribution regardless of /n/. --- --- If your keyspace is integrals such that the low-order bits between --- keys are highly variable, then you could get away with using 'id' --- as the hash function. --- --- We provide some sample hash functions for 'Int' and 'String' below. - --- | A sample hash function for 'Int', implemented as simply @(x `mod` P)@ --- where P is a suitable prime (currently 1500007). Should give --- reasonable results for most distributions of 'Int' values, except --- when the keys are all multiples of the prime! --- -hashInt :: Int -> Int32 -hashInt = (`rem` prime) . fromIntegral - --- | A sample hash function for 'String's. The implementation is: --- --- > hashString = fromIntegral . foldr f 0 --- > where f c m = ord c + (m * 128) `rem` 1500007 --- --- which seems to give reasonable results. --- -hashString :: String -> Int32 -hashString = fromIntegral . foldl f 0 - where f m c = ord c + (m * 128) `rem` fromIntegral prime - --- | A prime larger than the maximum hash table size -prime :: Int32 -prime = 1500007 - --- ----------------------------------------------------------------------------- --- Parameters - -tABLE_MAX = 1024 * 1024 :: Int32 -- Maximum size of hash table -#if tABLE_MIN -#else -tABLE_MIN = 16 :: Int32 - -hLOAD = 4 :: Int32 -- Maximum average load of a single hash bucket - -hYSTERESIS = 0 :: Int32 -- entries to ignore in load computation -#endif - -{- Hysteresis favors long association-list-like behavior for small tables. -} - --- ----------------------------------------------------------------------------- --- Creating a new hash table - --- | Creates a new hash table. The following property should hold for the @eq@ --- and @hash@ functions passed to 'new': --- --- > eq A B => hash A == hash B --- -new - :: (key -> key -> Bool) -- ^ @eq@: An equality comparison on keys - -> (key -> Int32) -- ^ @hash@: A hash function on keys - -> IO (HashTable key val) -- ^ Returns: an empty hash table - -new cmpr hash = do - -- make a new hash table with a single, empty, segment - let mask = tABLE_MIN-1 - bkts' <- newMutArray (0,mask) [] - bkts <- freezeArray bkts' - - let - kcnt = 0 - ht = HT { buckets=bkts, kcount=kcnt, bmask=mask, - hash_fn=hash, cmp=cmpr } - - table <- newIORef ht - return (HashTable table) - --- ----------------------------------------------------------------------------- --- Inserting a key\/value pair into the hash table - --- | Inserts a key\/value mapping into the hash table. --- --- Note that 'insert' doesn't remove the old entry from the table - --- the behaviour is like an association list, where 'lookup' returns --- the most-recently-inserted mapping for a key in the table. The --- reason for this is to keep 'insert' as efficient as possible. If --- you need to update a mapping, then we provide 'update'. --- -insert :: HashTable key val -> key -> val -> IO () - -insert (HashTable ref) key val = do - table@HT{ kcount=k, buckets=bkts, bmask=b } <- readIORef ref - let table1 = table{ kcount = k+1 } - indx = bucketIndex table key - bucket <- readHTArray bkts indx - bkts' <- thawArray bkts - writeMutArray bkts' indx ((key,val):bucket) - freezeArray bkts' - table2 <- - if tooBig k b - then expandHashTable table1 - else return table1 - writeIORef ref table2 - -tooBig :: Int32 -> Int32 -> Bool -tooBig k b = k-hYSTERESIS > hLOAD * b - -bucketIndex :: HT key val -> key -> Int32 -bucketIndex HT{ hash_fn=hash, bmask=mask } key = - let h = hash key - in (h .&. mask) - -expandHashTable :: HT key val -> IO (HT key val) -expandHashTable - table@HT{ buckets=bkts, bmask=mask } = do - let - oldsize = mask + 1 - newmask = mask + mask + 1 - newsize = newmask + 1 - -- - if newsize > tABLE_MAX - then return table - else do - -- - newbkts' <- newMutArray (0,newmask) [] - - let - table'=table{ bmask=newmask } - splitBucket oldindex = do - bucket <- readHTArray bkts oldindex - let (oldb,newb) = partition ((oldindex==).bucketIndex table' . fst) bucket - writeMutArray newbkts' oldindex oldb - writeMutArray newbkts' (oldindex + oldsize) newb - mapM_ splitBucket [0..mask] - - newbkts <- freezeArray newbkts' - - return ( table'{ buckets=newbkts } ) - --- ----------------------------------------------------------------------------- --- Deleting a mapping from the hash table - --- Remove a key from a bucket -deleteBucket :: (key -> Bool) -> [(key,val)] -> (Int32, [(key, val)]) -deleteBucket _ [] = (0,[]) -deleteBucket del (pair@(k,_):bucket) = - case deleteBucket del bucket of - (dels, bucket') | del k -> dels' `seq` (dels', bucket') - | otherwise -> (dels, pair:bucket') - where dels' = dels + 1 - --- | Remove an entry from the hash table. -delete :: HashTable key val -> key -> IO () - -delete (HashTable ref) key = do - table@HT{ buckets=bkts, kcount=kcnt, cmp=cmpr } <- readIORef ref - let indx = bucketIndex table key - bkts' <- thawArray bkts - bucket <- readMutArray bkts' indx - let (removed,bucket') = deleteBucket (cmpr key) bucket - writeMutArray bkts' indx bucket' - freezeArray bkts' - writeIORef ref ( table{kcount = kcnt - removed} ) - --- ----------------------------------------------------------------------------- --- Updating a mapping in the hash table - --- | Updates an entry in the hash table, returning 'True' if there was --- already an entry for this key, or 'False' otherwise. After 'update' --- there will always be exactly one entry for the given key in the table. --- --- 'insert' is more efficient than 'update' if you don't care about --- multiple entries, or you know for sure that multiple entries can't --- occur. However, 'update' is more efficient than 'delete' followed --- by 'insert'. -update :: HashTable key val -> key -> val -> IO Bool - -update (HashTable ref) key val = do - table@HT{ kcount=k, buckets=bkts, cmp=cmpr, bmask=b } <- readIORef ref - let indx = bucketIndex table key - bkts' <- thawArray bkts - bucket <- readMutArray bkts' indx - let (deleted,bucket') = deleteBucket (cmpr key) bucket - k' = k + 1 - deleted - table1 = table{ kcount=k' } - - writeMutArray bkts' indx ((key,val):bucket') - freezeArray bkts' - table2 <- - if tooBig k' b -- off by one from insert's resize heuristic. - then expandHashTable table1 - else return table1 - writeIORef ref table2 - return (deleted>0) - --- ----------------------------------------------------------------------------- --- Looking up an entry in the hash table - --- | Looks up the value of a key in the hash table. -lookup :: HashTable key val -> key -> IO (Maybe val) - -lookup (HashTable ref) key = do - table@HT{ buckets=bkts, cmp=cmpr } <- readIORef ref - let indx = bucketIndex table key - bucket <- readHTArray bkts indx - case [ val | (key',val) <- bucket, cmpr key key' ] of - [] -> return Nothing - (v:_) -> return (Just v) - --- ----------------------------------------------------------------------------- --- Converting to/from lists - --- | Convert a list of key\/value pairs into a hash table. Equality on keys --- is taken from the Eq instance for the key type. --- -fromList :: (Eq key) => (key -> Int32) -> [(key,val)] -> IO (HashTable key val) -fromList hash list = do - table <- new (==) hash - sequence_ [ insert table k v | (k,v) <- list ] - return table - --- | Converts a hash table to a list of key\/value pairs. --- -toList :: (Ord key, Ord val) => HashTable key val -> IO [(key,val)] -toList (HashTable ref) = do - HT{ buckets=bkts, bmask=b } <- readIORef ref - fmap concat (mapM (readHTArray bkts) [0..b]) - --- ----------------------------------------------------------------------------- --- Diagnostics - --- | This function is useful for determining whether your hash function --- is working well for your data set. It returns the longest chain --- of key\/value pairs in the hash table for which all the keys hash to --- the same bucket. If this chain is particularly long (say, longer --- than 10 elements), then it might be a good idea to try a different --- hash function. --- -longestChain :: HashTable key val -> IO [(key,val)] -longestChain (HashTable ref) = do - HT{ buckets=bkts, bmask=b } <- readIORef ref - let lengthCmp (_:x)(_:y) = lengthCmp x y - lengthCmp [] [] = EQ - lengthCmp [] _ = LT - lengthCmp _ [] = GT - fmap (maximumBy lengthCmp) (mapM (readHTArray bkts) [0..b]) |