/*----------------------------------------------------------------------------- * * (c) The AQUA Project, Glasgow University, 1995-1998 * (c) The GHC Team, 1999 * * Dynamically expanding linear hash tables, as described in * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988, * pp. 446 -- 457. * -------------------------------------------------------------------------- */ #include "PosixSource.h" #include "Rts.h" #include "Hash.h" #include "RtsUtils.h" #include #include #define HSEGSIZE 1024 /* Size of a single hash table segment */ /* Also the minimum size of a hash table */ #define HDIRSIZE 1024 /* Size of the segment directory */ /* Maximum hash table size is HSEGSIZE * HDIRSIZE */ #define HLOAD 5 /* Maximum average load of a single hash bucket */ #define HCHUNK (1024 * sizeof(W_) / sizeof(HashList)) /* Number of HashList cells to allocate in one go */ /* Linked list of (key, data) pairs for separate chaining */ struct hashlist { StgWord key; void *data; struct hashlist *next; /* Next cell in bucket chain (same hash value) */ }; typedef struct hashlist HashList; typedef int HashFunction(HashTable *table, StgWord key); typedef int CompareFunction(StgWord key1, StgWord key2); struct hashtable { int split; /* Next bucket to split when expanding */ int max; /* Max bucket of smaller table */ int mask1; /* Mask for doing the mod of h_1 (smaller table) */ int mask2; /* Mask for doing the mod of h_2 (larger table) */ int kcount; /* Number of keys */ int bcount; /* Number of buckets */ HashList **dir[HDIRSIZE]; /* Directory of segments */ HashFunction *hash; /* hash function */ CompareFunction *compare; /* key comparison function */ }; /* ----------------------------------------------------------------------------- * Hash first using the smaller table. If the bucket is less than the * next bucket to be split, re-hash using the larger table. * -------------------------------------------------------------------------- */ static int hashWord(HashTable *table, StgWord key) { int bucket; /* Strip the boring zero bits */ key /= sizeof(StgWord); /* Mod the size of the hash table (a power of 2) */ bucket = key & table->mask1; if (bucket < table->split) { /* Mod the size of the expanded hash table (also a power of 2) */ bucket = key & table->mask2; } return bucket; } static int hashStr(HashTable *table, char *key) { int h, bucket; char *s; s = key; for (h=0; *s; s++) { h *= 128; h += *s; h = h % 1048583; /* some random large prime */ } /* Mod the size of the hash table (a power of 2) */ bucket = h & table->mask1; if (bucket < table->split) { /* Mod the size of the expanded hash table (also a power of 2) */ bucket = h & table->mask2; } return bucket; } static int compareWord(StgWord key1, StgWord key2) { return (key1 == key2); } static int compareStr(StgWord key1, StgWord key2) { return (strcmp((char *)key1, (char *)key2) == 0); } /* ----------------------------------------------------------------------------- * Allocate a new segment of the dynamically growing hash table. * -------------------------------------------------------------------------- */ static void allocSegment(HashTable *table, int segment) { table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *), "allocSegment"); } /* ----------------------------------------------------------------------------- * Expand the larger hash table by one bucket, and split one bucket * from the smaller table into two parts. Only the bucket referenced * by @table->split@ is affected by the expansion. * -------------------------------------------------------------------------- */ static void expand(HashTable *table) { int oldsegment; int oldindex; int newbucket; int newsegment; int newindex; HashList *hl; HashList *next; HashList *old, *new; if (table->split + table->max >= HDIRSIZE * HSEGSIZE) /* Wow! That's big. Too big, so don't expand. */ return; /* Calculate indices of bucket to split */ oldsegment = table->split / HSEGSIZE; oldindex = table->split % HSEGSIZE; newbucket = table->max + table->split; /* And the indices of the new bucket */ newsegment = newbucket / HSEGSIZE; newindex = newbucket % HSEGSIZE; if (newindex == 0) allocSegment(table, newsegment); if (++table->split == table->max) { table->split = 0; table->max *= 2; table->mask1 = table->mask2; table->mask2 = table->mask2 << 1 | 1; } table->bcount++; /* Split the bucket, paying no attention to the original order */ old = new = NULL; for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) { next = hl->next; if (table->hash(table, hl->key) == newbucket) { hl->next = new; new = hl; } else { hl->next = old; old = hl; } } table->dir[oldsegment][oldindex] = old; table->dir[newsegment][newindex] = new; return; } void * lookupHashTable(HashTable *table, StgWord key) { int bucket; int segment; int index; HashList *hl; bucket = table->hash(table, key); segment = bucket / HSEGSIZE; index = bucket % HSEGSIZE; for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) if (table->compare(hl->key, key)) return hl->data; /* It's not there */ return NULL; } /* ----------------------------------------------------------------------------- * We allocate the hashlist cells in large chunks to cut down on malloc * overhead. Although we keep a free list of hashlist cells, we make * no effort to actually return the space to the malloc arena. * -------------------------------------------------------------------------- */ static HashList *freeList = NULL; static struct chunkList { void *chunk; struct chunkList *next; } *chunks; static HashList * allocHashList(void) { HashList *hl, *p; struct chunkList *cl; if ((hl = freeList) != NULL) { freeList = hl->next; } else { hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList"); cl = stgMallocBytes(sizeof (*cl), "allocHashList: chunkList"); cl->chunk = hl; cl->next = chunks; chunks = cl; freeList = hl + 1; for (p = freeList; p < hl + HCHUNK - 1; p++) p->next = p + 1; p->next = NULL; } return hl; } static void freeHashList(HashList *hl) { hl->next = freeList; freeList = hl; } void insertHashTable(HashTable *table, StgWord key, void *data) { int bucket; int segment; int index; HashList *hl; // Disable this assert; sometimes it's useful to be able to // overwrite entries in the hash table. // ASSERT(lookupHashTable(table, key) == NULL); /* When the average load gets too high, we expand the table */ if (++table->kcount >= HLOAD * table->bcount) expand(table); bucket = table->hash(table, key); segment = bucket / HSEGSIZE; index = bucket % HSEGSIZE; hl = allocHashList(); hl->key = key; hl->data = data; hl->next = table->dir[segment][index]; table->dir[segment][index] = hl; } void * removeHashTable(HashTable *table, StgWord key, void *data) { int bucket; int segment; int index; HashList *hl; HashList *prev = NULL; bucket = table->hash(table, key); segment = bucket / HSEGSIZE; index = bucket % HSEGSIZE; for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) { if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) { if (prev == NULL) table->dir[segment][index] = hl->next; else prev->next = hl->next; freeHashList(hl); table->kcount--; return hl->data; } prev = hl; } /* It's not there */ ASSERT(data == NULL); return NULL; } /* ----------------------------------------------------------------------------- * When we free a hash table, we are also good enough to free the * data part of each (key, data) pair, as long as our caller can tell * us how to do it. * -------------------------------------------------------------------------- */ void freeHashTable(HashTable *table, void (*freeDataFun)(void *) ) { long segment; long index; HashList *hl; HashList *next; /* The last bucket with something in it is table->max + table->split - 1 */ segment = (table->max + table->split - 1) / HSEGSIZE; index = (table->max + table->split - 1) % HSEGSIZE; while (segment >= 0) { while (index >= 0) { for (hl = table->dir[segment][index]; hl != NULL; hl = next) { next = hl->next; if (freeDataFun != NULL) (*freeDataFun)(hl->data); freeHashList(hl); } index--; } stgFree(table->dir[segment]); segment--; index = HSEGSIZE - 1; } stgFree(table); } /* ----------------------------------------------------------------------------- * When we initialize a hash table, we set up the first segment as well, * initializing all of the first segment's hash buckets to NULL. * -------------------------------------------------------------------------- */ static HashTable * allocHashTable_(HashFunction *hash, CompareFunction *compare) { HashTable *table; HashList **hb; table = stgMallocBytes(sizeof(HashTable),"allocHashTable"); allocSegment(table, 0); for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++) *hb = NULL; table->split = 0; table->max = HSEGSIZE; table->mask1 = HSEGSIZE - 1; table->mask2 = 2 * HSEGSIZE - 1; table->kcount = 0; table->bcount = HSEGSIZE; table->hash = hash; table->compare = compare; return table; } HashTable * allocHashTable(void) { return allocHashTable_(hashWord, compareWord); } HashTable * allocStrHashTable(void) { return allocHashTable_((HashFunction *)hashStr, (CompareFunction *)compareStr); } void exitHashTable(void) { struct chunkList *cl; while ((cl = chunks) != NULL) { chunks = cl->next; stgFree(cl->chunk); stgFree(cl); } }