diff options
author | Bruno Haible <bruno@clisp.org> | 2002-12-19 12:35:17 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2002-12-19 12:35:17 +0000 |
commit | 1186e616cbdad01f89e59a971f7073e55bcd0111 (patch) | |
tree | fb75cfa992e5543810c3ee9dfd7475ca084637a2 /src/hash-table.cc | |
parent | 388a431c7d1b5f856993425e4a947b8aaa709431 (diff) | |
download | gperf-1186e616cbdad01f89e59a971f7073e55bcd0111.tar.gz |
Rework the hash table code.
Diffstat (limited to 'src/hash-table.cc')
-rw-r--r-- | src/hash-table.cc | 98 |
1 files changed, 76 insertions, 22 deletions
diff --git a/src/hash-table.cc b/src/hash-table.cc index b6622ba..3b8c139 100644 --- a/src/hash-table.cc +++ b/src/hash-table.cc @@ -28,21 +28,61 @@ #include <hash.h> #include "options.h" -/* The size of the hash table is always the smallest power of 2 >= the size - indicated by the user. This allows several optimizations, including - the use of double hashing and elimination of the mod instruction. - Note that the size had better be larger than the number of items - in the hash table, else there's trouble!!! Note that the memory - for the hash table is allocated *outside* the intialization routine. - This compromises information hiding somewhat, but greatly reduces - memory fragmentation, since we can now use alloca! */ - -Hash_Table::Hash_Table (KeywordExt **table_ptr, int s, bool ignore_len): - _table (table_ptr), _size (s), _collisions (0), _ignore_length (ignore_len) +/* We use a hash table with double hashing. This is the simplest kind of + hash table, given that we always only insert and never remove entries + from the hash table. */ + +/* To make double hashing efficient, there need to be enough spare entries. */ +static const int size_factor = 10; + +/* We make the size of the hash table a power of 2. This allows for two + optimizations: It eliminates the modulo instruction, and allows for an + easy secondary hashing function. */ + +/* Constructor. */ +Hash_Table::Hash_Table (unsigned int size, bool ignore_length) + : _ignore_length (ignore_length), + _collisions (0) { + /* There need to be enough spare entries. */ + size = size * size_factor; + + /* Find smallest power of 2 that is >= size. */ + unsigned int shift = 0; + if ((size >> 16) > 0) + { + size = size >> 16; + shift += 16; + } + if ((size >> 8) > 0) + { + size = size >> 8; + shift += 8; + } + if ((size >> 4) > 0) + { + size = size >> 4; + shift += 4; + } + if ((size >> 2) > 0) + { + size = size >> 2; + shift += 2; + } + if ((size >> 1) > 0) + { + size = size >> 1; + shift += 1; + } + _log_size = shift; + _size = 1 << shift; + + /* Allocate table. */ + _table = new KeywordExt*[_size]; memset (_table, 0, _size * sizeof (*_table)); } +/* Destructor. */ Hash_Table::~Hash_Table () { if (option[DEBUG]) @@ -76,24 +116,38 @@ Hash_Table::~Hash_Table () fprintf (stderr, "\nend dumping hash table\n\n"); } + delete[] _table; } -/* If the ITEM is already in the hash table return the item found - in the table. Otherwise inserts the ITEM, and returns FALSE. - Uses double hashing. */ +/* Compares two items. */ +inline bool +Hash_Table::equal (KeywordExt *item1, KeywordExt *item2) +{ + return item1->_selchars_length == item2->_selchars_length + && memcmp (item1->_selchars, item2->_selchars, item2->_selchars_length) + == 0 + && (_ignore_length + || item1->_allchars_length == item2->_allchars_length); +} +/* Attempts to insert ITEM in the table. If there is already an equal + entry in it, returns it. Otherwise inserts ITEM and returns NULL. */ KeywordExt * Hash_Table::insert (KeywordExt *item) { - unsigned hash_val = hashpjw (item->_selchars, item->_selchars_length); - int probe = hash_val & (_size - 1); - int increment = ((hash_val ^ item->_allchars_length) | 1) & (_size - 1); - - while (_table[probe]) + unsigned hash_val = hashpjw (item->_selchars, item->_selchars_length); + unsigned int probe = hash_val & (_size - 1); + unsigned int increment = + (((hash_val >> _log_size) + ^ (_ignore_length ? 0 : item->_allchars_length)) + << 1) + 1; + /* Note that because _size is a power of 2 and increment is odd, + we have gcd(increment,_size) = 1, which guarantees that we'll find + an empty entry during the loop. */ + + while (_table[probe] != NULL) { - if (_table[probe]->_selchars_length == item->_selchars_length - && memcmp (_table[probe]->_selchars, item->_selchars, item->_selchars_length) == 0 - && (_ignore_length || _table[probe]->_allchars_length == item->_allchars_length)) + if (equal (_table[probe], item)) return _table[probe]; _collisions++; |