/*----------------------------------------------------------------------------- * * (c) The AQUA Project, Glasgow University, 1995-1998 * (c) The GHC Team, 1999 * * Dynamically expanding linear hash tables, as described in * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988, * pp. 446 -- 457. * -------------------------------------------------------------------------- */ #include "PosixSource.h" #include "Rts.h" #include "Hash.h" #include "RtsUtils.h" #include "xxhash.h" #include #define HSEGSIZE 1024 /* Size of a single hash table segment */ /* Also the minimum size of a hash table */ #define HDIRSIZE 1024 /* Size of the segment directory */ /* Maximum hash table size is HSEGSIZE * HDIRSIZE */ #define HLOAD 5 /* Maximum average load of a single hash bucket */ #define HCHUNK (1024 * sizeof(W_) / sizeof(HashList)) /* Number of HashList cells to allocate in one go */ /* Linked list of (key, data) pairs for separate chaining */ typedef struct hashlist { StgWord key; const void *data; struct hashlist *next; /* Next cell in bucket chain (same hash value) */ } HashList; typedef struct chunklist { struct chunklist *next; } HashListChunk; struct hashtable { int split; /* Next bucket to split when expanding */ int max; /* Max bucket of smaller table */ int mask1; /* Mask for doing the mod of h_1 (smaller table) */ int mask2; /* Mask for doing the mod of h_2 (larger table) */ int kcount; /* Number of keys */ int bcount; /* Number of buckets */ HashList **dir[HDIRSIZE]; /* Directory of segments */ HashList *freeList; /* free list of HashLists */ HashListChunk *chunks; /* list of HashListChunks so we can later free them */ }; /* Create an identical structure, but is distinct on a type level, * for string hash table. Since it's a direct embedding of * a hashtable and not a reference, there shouldn't be * any overhead post-compilation. */ struct strhashtable { struct hashtable table; }; /* ----------------------------------------------------------------------------- * Hash first using the smaller table. If the bucket is less than the * next bucket to be split, re-hash using the larger table. * -------------------------------------------------------------------------- */ int hashWord(const HashTable *table, StgWord key) { int bucket; /* Strip the boring zero bits */ key >>= sizeof(StgWord); /* Mod the size of the hash table (a power of 2) */ bucket = key & table->mask1; if (bucket < table->split) { /* Mod the size of the expanded hash table (also a power of 2) */ bucket = key & table->mask2; } return bucket; } int hashStr(const HashTable *table, StgWord w) { const char *key = (char*) w; #if defined(x86_64_HOST_ARCH) StgWord h = XXH64 (key, strlen(key), 1048583); #else StgWord h = XXH32 (key, strlen(key), 1048583); #endif /* Mod the size of the hash table (a power of 2) */ int bucket = h & table->mask1; if (bucket < table->split) { /* Mod the size of the expanded hash table (also a power of 2) */ bucket = h & table->mask2; } return bucket; } STATIC_INLINE int compareWord(StgWord key1, StgWord key2) { return (key1 == key2); } STATIC_INLINE int compareStr(StgWord key1, StgWord key2) { return (strcmp((char *)key1, (char *)key2) == 0); } /* ----------------------------------------------------------------------------- * Allocate a new segment of the dynamically growing hash table. * -------------------------------------------------------------------------- */ STATIC_INLINE void allocSegment(HashTable *table, int segment) { table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *), "allocSegment"); } /* ----------------------------------------------------------------------------- * Expand the larger hash table by one bucket, and split one bucket * from the smaller table into two parts. Only the bucket referenced * by @table->split@ is affected by the expansion. * -------------------------------------------------------------------------- */ STATIC_INLINE void expand(HashTable *table, HashFunction f) { int oldsegment; int oldindex; int newbucket; int newsegment; int newindex; HashList *hl; HashList *next; HashList *old, *new; if (table->split + table->max >= HDIRSIZE * HSEGSIZE) /* Wow! That's big. Too big, so don't expand. */ return; /* Calculate indices of bucket to split */ oldsegment = table->split / HSEGSIZE; oldindex = table->split % HSEGSIZE; newbucket = table->max + table->split; /* And the indices of the new bucket */ newsegment = newbucket / HSEGSIZE; newindex = newbucket % HSEGSIZE; if (newindex == 0) allocSegment(table, newsegment); if (++table->split == table->max) { table->split = 0; table->max *= 2; table->mask1 = table->mask2; table->mask2 = table->mask2 << 1 | 1; } table->bcount++; /* Split the bucket, paying no attention to the original order */ old = new = NULL; for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) { next = hl->next; if (f(table, hl->key) == newbucket) { hl->next = new; new = hl; } else { hl->next = old; old = hl; } } table->dir[oldsegment][oldindex] = old; table->dir[newsegment][newindex] = new; return; } STATIC_INLINE void* lookupHashTable_inlined(const HashTable *table, StgWord key, HashFunction f, CompareFunction cmp) { int bucket; int segment; int index; HashList *hl; bucket = f(table, key); segment = bucket / HSEGSIZE; index = bucket % HSEGSIZE; for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) { if (cmp(hl->key, key)) return (void *) hl->data; } /* It's not there */ return NULL; } void * lookupHashTable_(const HashTable *table, StgWord key, HashFunction f, CompareFunction cmp) { return lookupHashTable_inlined(table, key, f, cmp); } void * lookupHashTable(const HashTable *table, StgWord key) { return lookupHashTable_inlined(table, key, hashWord, compareWord); } void * lookupStrHashTable(const StrHashTable* table, const char* key) { return lookupHashTable_inlined(&table->table, (StgWord) key, hashStr, compareStr); } // Puts up to szKeys keys of the hash table into the given array. Returns the // actual amount of keys that have been retrieved. // // If the table is modified concurrently, the function behavior is undefined. // int keysHashTable(HashTable *table, StgWord keys[], int szKeys) { int segment, index; int k = 0; HashList *hl; /* The last bucket with something in it is table->max + table->split - 1 */ segment = (table->max + table->split - 1) / HSEGSIZE; index = (table->max + table->split - 1) % HSEGSIZE; while (segment >= 0 && k < szKeys) { while (index >= 0 && k < szKeys) { hl = table->dir[segment][index]; while (hl && k < szKeys) { keys[k] = hl->key; k += 1; hl = hl->next; } index--; } segment--; index = HSEGSIZE - 1; } return k; } /* ----------------------------------------------------------------------------- * We allocate the hashlist cells in large chunks to cut down on malloc * overhead. Although we keep a free list of hashlist cells, we make * no effort to actually return the space to the malloc arena. Eventually * they will all be freed when we free the HashListChunks. * -------------------------------------------------------------------------- */ static HashList * allocHashList (HashTable *table) { if (table->freeList != NULL) { HashList *hl = table->freeList; table->freeList = hl->next; return hl; } else { /* We allocate one block of memory which contains: * * 1. A HashListChunk, which gets linked onto HashTable.chunks. * This forms a list of all chunks associated with the HashTable * and is what we will free when we free the HashTable. * * 2. Several HashLists. One of these will get returned. The rest are * placed on the freeList. * */ HashListChunk *cl = stgMallocBytes(sizeof(HashListChunk) + HCHUNK * sizeof(HashList), "allocHashList"); HashList *hl = (HashList *) &cl[1]; cl->next = table->chunks; table->chunks = cl; table->freeList = hl + 1; HashList *p = table->freeList; for (; p < hl + HCHUNK - 1; p++) p->next = p + 1; p->next = NULL; return hl; } } static void freeHashList (HashTable *table, HashList *hl) { // We place the HashList on the freeList. We make no attempt to bound the // size of the free list for the time being. The HashLists on the freeList // are freed when the HashTable itself is freed as a result of freeing the // HashListChunks. hl->next = table->freeList; table->freeList = hl; } STATIC_INLINE void insertHashTable_inlined(HashTable *table, StgWord key, const void *data, HashFunction f) { int bucket; int segment; int index; HashList *hl; // Disable this assert; sometimes it's useful to be able to // overwrite entries in the hash table. // ASSERT(lookupHashTable(table, key) == NULL); /* When the average load gets too high, we expand the table */ if (++table->kcount >= HLOAD * table->bcount) expand(table, f); bucket = f(table, key); segment = bucket / HSEGSIZE; index = bucket % HSEGSIZE; hl = allocHashList(table); hl->key = key; hl->data = data; hl->next = table->dir[segment][index]; table->dir[segment][index] = hl; } void insertHashTable_(HashTable *table, StgWord key, const void *data, HashFunction f) { return insertHashTable_inlined(table, key, data, f); } void insertHashTable(HashTable *table, StgWord key, const void *data) { insertHashTable_inlined(table, key, data, hashWord); } void insertStrHashTable(StrHashTable *table, const char * key, const void *data) { insertHashTable_inlined(&table->table, (StgWord) key, data, hashStr); } STATIC_INLINE void* removeHashTable_inlined(HashTable *table, StgWord key, const void *data, HashFunction f, CompareFunction cmp) { int bucket; int segment; int index; HashList *hl; HashList *prev = NULL; bucket = f(table, key); segment = bucket / HSEGSIZE; index = bucket % HSEGSIZE; for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) { if (cmp(hl->key, key) && (data == NULL || hl->data == data)) { if (prev == NULL) table->dir[segment][index] = hl->next; else prev->next = hl->next; freeHashList(table,hl); table->kcount--; return (void *) hl->data; } prev = hl; } /* It's not there */ ASSERT(data == NULL); return NULL; } void* removeHashTable_(HashTable *table, StgWord key, const void *data, HashFunction f, CompareFunction cmp) { return removeHashTable_inlined(table, key, data, f, cmp); } void * removeHashTable(HashTable *table, StgWord key, const void *data) { return removeHashTable_inlined(table, key, data, hashWord, compareWord); } void * removeStrHashTable(StrHashTable *table, const char * key, const void *data) { return removeHashTable_inlined(&table->table, (StgWord) key, data, hashStr, compareStr); } /* ----------------------------------------------------------------------------- * When we free a hash table, we are also good enough to free the * data part of each (key, data) pair, as long as our caller can tell * us how to do it. * -------------------------------------------------------------------------- */ void freeHashTable(HashTable *table, void (*freeDataFun)(void *) ) { /* The last bucket with something in it is table->max + table->split - 1 */ long segment = (table->max + table->split - 1) / HSEGSIZE; long index = (table->max + table->split - 1) % HSEGSIZE; /* Free table segments */ while (segment >= 0) { while (index >= 0) { HashList *next; for (HashList *hl = table->dir[segment][index]; hl != NULL; hl = next) { next = hl->next; if (freeDataFun != NULL) (*freeDataFun)((void *) hl->data); } index--; } stgFree(table->dir[segment]); segment--; index = HSEGSIZE - 1; } /* Free chunks */ HashListChunk *cl = table->chunks; while (cl != NULL) { HashListChunk *old = cl; cl = cl->next; stgFree(old); } stgFree(table); } /* ----------------------------------------------------------------------------- * Map a function over all the keys/values in a HashTable * -------------------------------------------------------------------------- */ void mapHashTable(HashTable *table, void *data, MapHashFn fn) { /* The last bucket with something in it is table->max + table->split - 1 */ long segment = (table->max + table->split - 1) / HSEGSIZE; long index = (table->max + table->split - 1) % HSEGSIZE; while (segment >= 0) { while (index >= 0) { for (HashList *hl = table->dir[segment][index]; hl != NULL; hl = hl->next) { fn(data, hl->key, hl->data); } index--; } segment--; index = HSEGSIZE - 1; } } void mapHashTableKeys(HashTable *table, void *data, MapHashFnKeys fn) { /* The last bucket with something in it is table->max + table->split - 1 */ long segment = (table->max + table->split - 1) / HSEGSIZE; long index = (table->max + table->split - 1) % HSEGSIZE; while (segment >= 0) { while (index >= 0) { for (HashList *hl = table->dir[segment][index]; hl != NULL; hl = hl->next) { fn(data, &hl->key, hl->data); } index--; } segment--; index = HSEGSIZE - 1; } } void iterHashTable(HashTable *table, void *data, IterHashFn fn) { /* The last bucket with something in it is table->max + table->split - 1 */ long segment = (table->max + table->split - 1) / HSEGSIZE; long index = (table->max + table->split - 1) % HSEGSIZE; while (segment >= 0) { while (index >= 0) { for (HashList *hl = table->dir[segment][index]; hl != NULL; hl = hl->next) { if (!fn(data, hl->key, hl->data)) { return; } } index--; } segment--; index = HSEGSIZE - 1; } } /* ----------------------------------------------------------------------------- * When we initialize a hash table, we set up the first segment as well, * initializing all of the first segment's hash buckets to NULL. * -------------------------------------------------------------------------- */ HashTable * allocHashTable(void) { HashTable *table; HashList **hb; table = stgMallocBytes(sizeof(HashTable),"allocHashTable"); allocSegment(table, 0); for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++) *hb = NULL; table->split = 0; table->max = HSEGSIZE; table->mask1 = HSEGSIZE - 1; table->mask2 = 2 * HSEGSIZE - 1; table->kcount = 0; table->bcount = HSEGSIZE; table->freeList = NULL; table->chunks = NULL; return table; } int keyCountHashTable (HashTable *table) { return table->kcount; }