/* Hash Tables Implementation. * * This file implements in memory hash tables with insert/del/replace/find/ * get-random-element operations. Hash tables will auto resize if needed * tables of power of two in size are used, collisions are handled by * chaining. See the source code for more information... :) * * Copyright (c) 2006-2010, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "fmacros.h" #include #include #include #include #include #include #include #include "dict.h" #include "zmalloc.h" /* Using dictEnableResize() / dictDisableResize() we make possible to * enable/disable resizing of the hash table as needed. This is very important * for Redis, as we use copy-on-write and don't want to move too much memory * around when there is a child performing saving operations. */ static int dict_can_resize = 1; /* ---------------------------- Utility funcitons --------------------------- */ static void _dictPanic(const char *fmt, ...) { va_list ap; va_start(ap, fmt); fprintf(stderr, "\nDICT LIBRARY PANIC: "); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n\n"); va_end(ap); } /* ------------------------- Heap Management Wrappers------------------------ */ static void *_dictAlloc(size_t size) { void *p = zmalloc(size); if (p == NULL) _dictPanic("Out of memory"); return p; } static void _dictFree(void *ptr) { zfree(ptr); } /* -------------------------- private prototypes ---------------------------- */ static int _dictExpandIfNeeded(dict *ht); static unsigned long _dictNextPower(unsigned long size); static int _dictKeyIndex(dict *ht, const void *key); static int _dictInit(dict *ht, dictType *type, void *privDataPtr); /* -------------------------- hash functions -------------------------------- */ /* Thomas Wang's 32 bit Mix Function */ unsigned int dictIntHashFunction(unsigned int key) { key += ~(key << 15); key ^= (key >> 10); key += (key << 3); key ^= (key >> 6); key += ~(key << 11); key ^= (key >> 16); return key; } /* Identity hash function for integer keys */ unsigned int dictIdentityHashFunction(unsigned int key) { return key; } /* Generic hash function (a popular one from Bernstein). * I tested a few and this was the best. */ unsigned int dictGenHashFunction(const unsigned char *buf, int len) { unsigned int hash = 5381; while (len--) hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */ return hash; } /* ----------------------------- API implementation ------------------------- */ /* Reset an hashtable already initialized with ht_init(). * NOTE: This function should only called by ht_destroy(). */ static void _dictReset(dictht *ht) { ht->table = NULL; ht->size = 0; ht->sizemask = 0; ht->used = 0; } /* Create a new hash table */ dict *dictCreate(dictType *type, void *privDataPtr) { dict *d = _dictAlloc(sizeof(*d)); _dictInit(d,type,privDataPtr); return d; } /* Initialize the hash table */ int _dictInit(dict *d, dictType *type, void *privDataPtr) { _dictReset(&d->ht[0]); _dictReset(&d->ht[1]); d->type = type; d->privdata = privDataPtr; d->rehashidx = -1; d->iterators = 0; return DICT_OK; } /* Resize the table to the minimal size that contains all the elements, * but with the invariant of a USER/BUCKETS ration near to <= 1 */ int dictResize(dict *d) { int minimal; if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR; minimal = d->ht[0].used; if (minimal < DICT_HT_INITIAL_SIZE) minimal = DICT_HT_INITIAL_SIZE; return dictExpand(d, minimal); } /* Expand or create the hashtable */ int dictExpand(dict *d, unsigned long size) { dictht n; /* the new hashtable */ unsigned long realsize = _dictNextPower(size); /* the size is invalid if it is smaller than the number of * elements already inside the hashtable */ if (dictIsRehashing(d) || d->ht[0].used > size) return DICT_ERR; n.size = realsize; n.sizemask = realsize-1; n.table = _dictAlloc(realsize*sizeof(dictEntry*)); n.used = 0; /* Initialize all the pointers to NULL */ memset(n.table, 0, realsize*sizeof(dictEntry*)); /* Is this the first initialization? If so it's not really a rehashing * we just set the first hash table so that it can accept keys. */ if (d->ht[0].table == NULL) { d->ht[0] = n; return DICT_OK; } /* Prepare a second hash table for incremental rehashing */ d->ht[1] = n; d->rehashidx = 0; return DICT_OK; } /* Performs N steps of incremental rehashing. Returns 1 if there are still * keys to move from the old to the new hash table, otherwise 0 is returned. * Note that a rehashing step consists in moving a bucket (that may have more * thank one key as we use chaining) from the old to the new hash table. */ int dictRehash(dict *d, int n) { if (!dictIsRehashing(d)) return 0; while(n--) { dictEntry *de, *nextde; /* Check if we already rehashed the whole table... */ if (d->ht[0].used == 0) { _dictFree(d->ht[0].table); d->ht[0] = d->ht[1]; _dictReset(&d->ht[1]); d->rehashidx = -1; return 0; } /* Note that rehashidx can't overflow as we are sure there are more * elements because ht[0].used != 0 */ while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++; de = d->ht[0].table[d->rehashidx]; /* Move all the keys in this bucket from the old to the new hash HT */ while(de) { unsigned int h; nextde = de->next; /* Get the index in the new hash table */ h = dictHashKey(d, de->key) & d->ht[1].sizemask; de->next = d->ht[1].table[h]; d->ht[1].table[h] = de; d->ht[0].used--; d->ht[1].used++; de = nextde; } d->ht[0].table[d->rehashidx] = NULL; d->rehashidx++; } return 1; } long long timeInMilliseconds(void) { struct timeval tv; gettimeofday(&tv,NULL); return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000); } /* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */ int dictRehashMilliseconds(dict *d, int ms) { long long start = timeInMilliseconds(); int rehashes = 0; while(dictRehash(d,100)) { rehashes += 100; if (timeInMilliseconds()-start > ms) break; } return rehashes; } /* This function performs just a step of rehashing, and only if there are * not iterators bound to our hash table. When we have iterators in the middle * of a rehashing we can't mess with the two hash tables otherwise some element * can be missed or duplicated. * * This function is called by common lookup or update operations in the * dictionary so that the hash table automatically migrates from H1 to H2 * while it is actively used. */ static void _dictRehashStep(dict *d) { if (d->iterators == 0) dictRehash(d,1); } /* Add an element to the target hash table */ int dictAdd(dict *d, void *key, void *val) { int index; dictEntry *entry; dictht *ht; if (dictIsRehashing(d)) _dictRehashStep(d); /* Get the index of the new element, or -1 if * the element already exists. */ if ((index = _dictKeyIndex(d, key)) == -1) return DICT_ERR; /* Allocates the memory and stores key */ ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0]; entry = _dictAlloc(sizeof(*entry)); entry->next = ht->table[index]; ht->table[index] = entry; ht->used++; /* Set the hash entry fields. */ dictSetHashKey(d, entry, key); dictSetHashVal(d, entry, val); return DICT_OK; } /* Add an element, discarding the old if the key already exists. * Return 1 if the key was added from scratch, 0 if there was already an * element with such key and dictReplace() just performed a value update * operation. */ int dictReplace(dict *d, void *key, void *val) { dictEntry *entry, auxentry; /* Try to add the element. If the key * does not exists dictAdd will suceed. */ if (dictAdd(d, key, val) == DICT_OK) return 1; /* It already exists, get the entry */ entry = dictFind(d, key); /* Free the old value and set the new one */ /* Set the new value and free the old one. Note that it is important * to do that in this order, as the value may just be exactly the same * as the previous one. In this context, think to reference counting, * you want to increment (set), and then decrement (free), and not the * reverse. */ auxentry = *entry; dictSetHashVal(d, entry, val); dictFreeEntryVal(d, &auxentry); return 0; } /* Search and remove an element */ static int dictGenericDelete(dict *d, const void *key, int nofree) { unsigned int h, idx; dictEntry *he, *prevHe; int table; if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */ if (dictIsRehashing(d)) _dictRehashStep(d); h = dictHashKey(d, key); for (table = 0; table <= 1; table++) { idx = h & d->ht[table].sizemask; he = d->ht[table].table[idx]; prevHe = NULL; while(he) { if (dictCompareHashKeys(d, key, he->key)) { /* Unlink the element from the list */ if (prevHe) prevHe->next = he->next; else d->ht[table].table[idx] = he->next; if (!nofree) { dictFreeEntryKey(d, he); dictFreeEntryVal(d, he); } _dictFree(he); d->ht[table].used--; return DICT_OK; } prevHe = he; he = he->next; } if (!dictIsRehashing(d)) break; } return DICT_ERR; /* not found */ } int dictDelete(dict *ht, const void *key) { return dictGenericDelete(ht,key,0); } int dictDeleteNoFree(dict *ht, const void *key) { return dictGenericDelete(ht,key,1); } /* Destroy an entire dictionary */ int _dictClear(dict *d, dictht *ht) { unsigned long i; /* Free all the elements */ for (i = 0; i < ht->size && ht->used > 0; i++) { dictEntry *he, *nextHe; if ((he = ht->table[i]) == NULL) continue; while(he) { nextHe = he->next; dictFreeEntryKey(d, he); dictFreeEntryVal(d, he); _dictFree(he); ht->used--; he = nextHe; } } /* Free the table and the allocated cache structure */ _dictFree(ht->table); /* Re-initialize the table */ _dictReset(ht); return DICT_OK; /* never fails */ } /* Clear & Release the hash table */ void dictRelease(dict *d) { _dictClear(d,&d->ht[0]); _dictClear(d,&d->ht[1]); _dictFree(d); } dictEntry *dictFind(dict *d, const void *key) { dictEntry *he; unsigned int h, idx, table; if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */ if (dictIsRehashing(d)) _dictRehashStep(d); h = dictHashKey(d, key); for (table = 0; table <= 1; table++) { idx = h & d->ht[table].sizemask; he = d->ht[table].table[idx]; while(he) { if (dictCompareHashKeys(d, key, he->key)) return he; he = he->next; } if (!dictIsRehashing(d)) return NULL; } return NULL; } void *dictFetchValue(dict *d, const void *key) { dictEntry *he; he = dictFind(d,key); return he ? dictGetEntryVal(he) : NULL; } dictIterator *dictGetIterator(dict *d) { dictIterator *iter = _dictAlloc(sizeof(*iter)); iter->d = d; iter->table = 0; iter->index = -1; iter->entry = NULL; iter->nextEntry = NULL; return iter; } dictEntry *dictNext(dictIterator *iter) { while (1) { if (iter->entry == NULL) { dictht *ht = &iter->d->ht[iter->table]; if (iter->index == -1 && iter->table == 0) iter->d->iterators++; iter->index++; if (iter->index >= (signed) ht->size) { if (dictIsRehashing(iter->d) && iter->table == 0) { iter->table++; iter->index = 0; ht = &iter->d->ht[1]; } else { break; } } iter->entry = ht->table[iter->index]; } else { iter->entry = iter->nextEntry; } if (iter->entry) { /* We need to save the 'next' here, the iterator user * may delete the entry we are returning. */ iter->nextEntry = iter->entry->next; return iter->entry; } } return NULL; } void dictReleaseIterator(dictIterator *iter) { if (!(iter->index == -1 && iter->table == 0)) iter->d->iterators--; _dictFree(iter); } /* Return a random entry from the hash table. Useful to * implement randomized algorithms */ dictEntry *dictGetRandomKey(dict *d) { dictEntry *he, *orighe; unsigned int h; int listlen, listele; if (dictSize(d) == 0) return NULL; if (dictIsRehashing(d)) _dictRehashStep(d); if (dictIsRehashing(d)) { do { h = random() % (d->ht[0].size+d->ht[1].size); he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] : d->ht[0].table[h]; } while(he == NULL); } else { do { h = random() & d->ht[0].sizemask; he = d->ht[0].table[h]; } while(he == NULL); } /* Now we found a non empty bucket, but it is a linked * list and we need to get a random element from the list. * The only sane way to do so is counting the elements and * select a random index. */ listlen = 0; orighe = he; while(he) { he = he->next; listlen++; } listele = random() % listlen; he = orighe; while(listele--) he = he->next; return he; } /* ------------------------- private functions ------------------------------ */ /* Expand the hash table if needed */ static int _dictExpandIfNeeded(dict *d) { /* If the hash table is empty expand it to the intial size, * if the table is "full" dobule its size. */ if (dictIsRehashing(d)) return DICT_OK; if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE); if (d->ht[0].used >= d->ht[0].size && dict_can_resize) return dictExpand(d, ((d->ht[0].size > d->ht[0].used) ? d->ht[0].size : d->ht[0].used)*2); return DICT_OK; } /* Our hash table capability is a power of two */ static unsigned long _dictNextPower(unsigned long size) { unsigned long i = DICT_HT_INITIAL_SIZE; if (size >= LONG_MAX) return LONG_MAX; while(1) { if (i >= size) return i; i *= 2; } } /* Returns the index of a free slot that can be populated with * an hash entry for the given 'key'. * If the key already exists, -1 is returned. * * Note that if we are in the process of rehashing the hash table, the * index is always returned in the context of the second (new) hash table. */ static int _dictKeyIndex(dict *d, const void *key) { unsigned int h, idx, table; dictEntry *he; /* Expand the hashtable if needed */ if (_dictExpandIfNeeded(d) == DICT_ERR) return -1; /* Compute the key hash value */ h = dictHashKey(d, key); for (table = 0; table <= 1; table++) { idx = h & d->ht[table].sizemask; /* Search if this slot does not already contain the given key */ he = d->ht[table].table[idx]; while(he) { if (dictCompareHashKeys(d, key, he->key)) return -1; he = he->next; } if (!dictIsRehashing(d)) break; } return idx; } void dictEmpty(dict *d) { _dictClear(d,&d->ht[0]); _dictClear(d,&d->ht[1]); d->rehashidx = -1; d->iterators = 0; } #define DICT_STATS_VECTLEN 50 static void _dictPrintStatsHt(dictht *ht) { unsigned long i, slots = 0, chainlen, maxchainlen = 0; unsigned long totchainlen = 0; unsigned long clvector[DICT_STATS_VECTLEN]; if (ht->used == 0) { printf("No stats available for empty dictionaries\n"); return; } for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0; for (i = 0; i < ht->size; i++) { dictEntry *he; if (ht->table[i] == NULL) { clvector[0]++; continue; } slots++; /* For each hash entry on this slot... */ chainlen = 0; he = ht->table[i]; while(he) { chainlen++; he = he->next; } clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++; if (chainlen > maxchainlen) maxchainlen = chainlen; totchainlen += chainlen; } printf("Hash table stats:\n"); printf(" table size: %ld\n", ht->size); printf(" number of elements: %ld\n", ht->used); printf(" different slots: %ld\n", slots); printf(" max chain length: %ld\n", maxchainlen); printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots); printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots); printf(" Chain length distribution:\n"); for (i = 0; i < DICT_STATS_VECTLEN-1; i++) { if (clvector[i] == 0) continue; printf(" %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100); } } void dictPrintStats(dict *d) { _dictPrintStatsHt(&d->ht[0]); if (dictIsRehashing(d)) { printf("-- Rehashing into ht[1]:\n"); _dictPrintStatsHt(&d->ht[1]); } } void dictEnableResize(void) { dict_can_resize = 1; } void dictDisableResize(void) { dict_can_resize = 0; } /* ----------------------- StringCopy Hash Table Type ------------------------*/ static unsigned int _dictStringCopyHTHashFunction(const void *key) { return dictGenHashFunction(key, strlen(key)); } static void *_dictStringCopyHTKeyDup(void *privdata, const void *key) { int len = strlen(key); char *copy = _dictAlloc(len+1); DICT_NOTUSED(privdata); memcpy(copy, key, len); copy[len] = '\0'; return copy; } static void *_dictStringKeyValCopyHTValDup(void *privdata, const void *val) { int len = strlen(val); char *copy = _dictAlloc(len+1); DICT_NOTUSED(privdata); memcpy(copy, val, len); copy[len] = '\0'; return copy; } static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1, const void *key2) { DICT_NOTUSED(privdata); return strcmp(key1, key2) == 0; } static void _dictStringCopyHTKeyDestructor(void *privdata, void *key) { DICT_NOTUSED(privdata); _dictFree((void*)key); /* ATTENTION: const cast */ } static void _dictStringKeyValCopyHTValDestructor(void *privdata, void *val) { DICT_NOTUSED(privdata); _dictFree((void*)val); /* ATTENTION: const cast */ } dictType dictTypeHeapStringCopyKey = { _dictStringCopyHTHashFunction, /* hash function */ _dictStringCopyHTKeyDup, /* key dup */ NULL, /* val dup */ _dictStringCopyHTKeyCompare, /* key compare */ _dictStringCopyHTKeyDestructor, /* key destructor */ NULL /* val destructor */ }; /* This is like StringCopy but does not auto-duplicate the key. * It's used for intepreter's shared strings. */ dictType dictTypeHeapStrings = { _dictStringCopyHTHashFunction, /* hash function */ NULL, /* key dup */ NULL, /* val dup */ _dictStringCopyHTKeyCompare, /* key compare */ _dictStringCopyHTKeyDestructor, /* key destructor */ NULL /* val destructor */ }; /* This is like StringCopy but also automatically handle dynamic * allocated C strings as values. */ dictType dictTypeHeapStringCopyKeyValue = { _dictStringCopyHTHashFunction, /* hash function */ _dictStringCopyHTKeyDup, /* key dup */ _dictStringKeyValCopyHTValDup, /* val dup */ _dictStringCopyHTKeyCompare, /* key compare */ _dictStringCopyHTKeyDestructor, /* key destructor */ _dictStringKeyValCopyHTValDestructor, /* val destructor */ };