diff options
author | Bruno Haible <bruno@clisp.org> | 2003-03-14 11:01:01 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2003-03-14 11:01:01 +0000 |
commit | 7a8b43182a9e171bbd307466dde6b5c7878135f4 (patch) | |
tree | 3a7f41281fe9963b12196c521e54a1a814b3a973 | |
parent | 19c69d8e5a9151d25938eb06244978eb753995e8 (diff) | |
download | gperf-7a8b43182a9e171bbd307466dde6b5c7878135f4.tar.gz |
Optimized choice during collision resolution.
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | src/options.cc | 10 | ||||
-rw-r--r-- | src/options.h | 5 | ||||
-rw-r--r-- | src/search.cc | 111 | ||||
-rw-r--r-- | src/search.h | 9 |
5 files changed, 136 insertions, 13 deletions
@@ -1,5 +1,19 @@ 2002-12-07 Bruno Haible <bruno@clisp.org> + * src/options.h (OPT_CHOICE): New enum value. + * src/options.cc (Options::~Options): Update. + (long_options): New option --optimized-collision-resolution. + (Options::parse_options): Accept option -O. + * src/search.h (Search::sort_by_occurrence): Change argument to + 'unsigned int'. + (Search::compute_occurrence, Search::sort_by_occurrence): New method + declarations. + * src/search.cc (Search::sort_by_occurrence): Change argument to + 'unsigned int'. + (Search::compute_occurrence, Search::sort_by_occurrence): New methods. + (Search::find_asso_values): Implement OPT_CHOICE. More debugging + output. + * src/search.cc (Search::prepare_asso_values) [DEBUG]: Also print the keyword list in order. (Search::find_asso_values) [DEBUG]: Upon failure, print the union_set. diff --git a/src/options.cc b/src/options.cc index bfe4007..e5428e5 100644 --- a/src/options.cc +++ b/src/options.cc @@ -473,6 +473,7 @@ Options::~Options () "\nENUM is........: %s" "\nINCLUDE is.....: %s" "\nSEVENBIT is....: %s" + "\nOPT_CHOICE is..: %s" "\niterations = %d" "\nlookup function name = %s" "\nhash function name = %s" @@ -505,6 +506,7 @@ Options::~Options () _option_word & ENUM ? "enabled" : "disabled", _option_word & INCLUDE ? "enabled" : "disabled", _option_word & SEVENBIT ? "enabled" : "disabled", + _option_word & OPT_CHOICE ? "enabled" : "disabled", _iterations, _function_name, _hash_name, _wordlist_name, _slot_name, _initializer_suffix, _asso_iterations, _jump, _size_multiple, @@ -648,6 +650,7 @@ static const struct option long_options[] = { "multiple-iterations", required_argument, NULL, 'm' }, { "no-strlen", no_argument, NULL, 'n' }, { "occurrence-sort", no_argument, NULL, 'o' }, + { "optimized-collision-resolution", no_argument, NULL, 'O' }, { "random", no_argument, NULL, 'r' }, { "size-multiple", required_argument, NULL, 's' }, { "help", no_argument, NULL, 'h' }, @@ -667,7 +670,7 @@ Options::parse_options (int argc, char *argv[]) while ((option_char = getopt_long (_argument_count, _argument_vector, - "acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oprs:S:tTvW:Z:7", + "acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oOprs:S:tTvW:Z:7", long_options, NULL)) != -1) { @@ -862,6 +865,11 @@ Options::parse_options (int argc, char *argv[]) _option_word |= ORDER; break; } + case 'O': /* Optimized choice during collision resolution. */ + { + _option_word |= OPT_CHOICE; + break; + } case 'p': /* Generated lookup function a pointer instead of int. */ break; /* This is now the default. */ case 'r': /* Utilize randomness to initialize the associated values table. */ diff --git a/src/options.h b/src/options.h index 2a20d57..bad6966 100644 --- a/src/options.h +++ b/src/options.h @@ -101,7 +101,10 @@ enum Option_Type INCLUDE = 1 << 20, /* Assume 7-bit, not 8-bit, characters. */ - SEVENBIT = 1 << 21 + SEVENBIT = 1 << 21, + + /* Apply optimized collision resolution to speed-up search time. */ + OPT_CHOICE = 1 << 22 }; /* Class manager for gperf program Options. */ diff --git a/src/search.cc b/src/search.cc index cb0ed8f..0046c55 100644 --- a/src/search.cc +++ b/src/search.cc @@ -27,6 +27,7 @@ #include <stdlib.h> /* declares exit(), rand(), srand() */ #include <string.h> /* declares memset(), memcmp() */ #include <time.h> /* declares time() */ +#include <math.h> /* declares exp() */ #include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */ #include "options.h" #include "hash-table.h" @@ -904,7 +905,7 @@ Search::compute_hash (KeywordExt *keyword) const const unsigned int *p = keyword->_selchars; int i = keyword->_selchars_length; for (; i > 0; p++, i--) - sum += _asso_values[*p]; + sum += _asso_values[*p]; return keyword->_hash_value = sum; } @@ -962,20 +963,96 @@ compute_disjoint_union (const unsigned int *set_1, int size_1, /* Sorts the given set in increasing frequency of _occurrences[]. */ inline void -Search::sort_by_occurrence (unsigned int *set, int len) const +Search::sort_by_occurrence (unsigned int *set, unsigned int len) const { /* Use bubble sort, since the set is typically short. */ - for (int i = 1; i < len; i++) + for (unsigned int i = 1; i < len; i++) { - int curr; + unsigned int j; unsigned int tmp; - for (curr = i, tmp = set[curr]; - curr > 0 && _occurrences[tmp] < _occurrences[set[curr-1]]; - curr--) - set[curr] = set[curr - 1]; + for (j = i, tmp = set[j]; + j > 0 && _occurrences[tmp] < _occurrences[set[j-1]]; + j--) + set[j] = set[j - 1]; - set[curr] = tmp; + set[j] = tmp; + } +} + +/* Computes the frequency of occurrence of a character among the keywords + up to the given keyword. */ +inline unsigned int +Search::compute_occurrence (unsigned int c, KeywordExt *curr) const +{ + unsigned int occurrence = 0; + + for (KeywordExt_List *temp = _head; ; temp = temp->rest()) + { + KeywordExt *keyword = temp->first(); + + int m = keyword->_selchars_length; + for (int i = 0; i < m; i++) + if (keyword->_selchars[i] == c) + { + occurrence++; + break; + } + + if (keyword == curr) + break; + } + + return occurrence; +} + +/* Sorts the given set in increasing frequency of occurrences among the + keywords up to the given keyword. */ + +inline void +Search::sort_by_occurrence (unsigned int *set, unsigned int len, KeywordExt *curr) const +{ + unsigned int occurrences[len]; + + for (unsigned int j = 0; j < len; j++) + occurrences[j] = 0; + for (KeywordExt_List *temp = _head; ; temp = temp->rest()) + { + KeywordExt *keyword = temp->first(); + + int m = keyword->_selchars_length; + for (unsigned int j = 0; j < len; j++) + { + unsigned int c = set[j]; + + for (int i = 0; i < m; i++) + if (keyword->_selchars[i] == c) + { + occurrences[j]++; + break; + } + } + + if (keyword == curr) + break; + } + + /* Use bubble sort, since the set is typically short. */ + for (unsigned int i = 1; i < len; i++) + { + unsigned int j; + unsigned int set_tmp, occ_tmp; + + for (j = i, set_tmp = set[j], occ_tmp = occurrences[j]; + j > 0 && occ_tmp < occurrences[j-1]; + j--) + { + set[j] = set[j - 1]; + occurrences[j] = occurrences[j - 1]; + } + + set[j] = set_tmp; + occurrences[j] = occ_tmp; } } @@ -1152,12 +1229,26 @@ Search::find_asso_values () /* Sort by decreasing occurrence: Try least-used characters c first. The idea is that this reduces the number of freshly introduced collisions. */ - sort_by_occurrence (union_set, union_set_length); + if (option[OPT_CHOICE]) + sort_by_occurrence (union_set, union_set_length, curr); + else + sort_by_occurrence (union_set, union_set_length); for (union_index = 0; union_index < union_set_length; union_index++) { c = union_set[union_index]; + if (option[DEBUG]) + { + unsigned int n = sp - stack + 1; + unsigned int o = compute_occurrence (c, curr); + fprintf (stderr, "Expected number of iterations between %g and %g\n", + exp (static_cast<double>((2*n-o)*o) + / static_cast<double>(2*_max_hash_value)), + exp (static_cast<double>((2*n-o)*o) + / static_cast<double>(2*_asso_value_max))); + } + /* Try various other values for _asso_values[c]. A value is successful if, with it, the recomputed hash values for the keywords from _head->first() to curr - inclusive - give no diff --git a/src/search.h b/src/search.h index aefcd08..928c615 100644 --- a/src/search.h +++ b/src/search.h @@ -90,8 +90,15 @@ private: and stores it in keyword->_hash_value. */ int compute_hash (KeywordExt *keyword) const; + /* Computes the frequency of occurrence of a character among the keywords + up to the given keyword. */ + unsigned int compute_occurrence (unsigned int c, KeywordExt *curr) const; + /* Sorts the given set in increasing frequency of _occurrences[]. */ - void sort_by_occurrence (unsigned int *set, int len) const; + void sort_by_occurrence (unsigned int *set, unsigned int len) const; + /* Sorts the given set in increasing frequency of occurrences among the + keywords up to the given keyword. */ + void sort_by_occurrence (unsigned int *set, unsigned int len, KeywordExt *curr) const; bool has_collisions (KeywordExt *curr); |