summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2003-03-14 11:01:01 +0000
committerBruno Haible <bruno@clisp.org>2003-03-14 11:01:01 +0000
commit7a8b43182a9e171bbd307466dde6b5c7878135f4 (patch)
tree3a7f41281fe9963b12196c521e54a1a814b3a973
parent19c69d8e5a9151d25938eb06244978eb753995e8 (diff)
downloadgperf-7a8b43182a9e171bbd307466dde6b5c7878135f4.tar.gz
Optimized choice during collision resolution.
-rw-r--r--ChangeLog14
-rw-r--r--src/options.cc10
-rw-r--r--src/options.h5
-rw-r--r--src/search.cc111
-rw-r--r--src/search.h9
5 files changed, 136 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 4b2eb2a..b6386c3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
2002-12-07 Bruno Haible <bruno@clisp.org>
+ * src/options.h (OPT_CHOICE): New enum value.
+ * src/options.cc (Options::~Options): Update.
+ (long_options): New option --optimized-collision-resolution.
+ (Options::parse_options): Accept option -O.
+ * src/search.h (Search::sort_by_occurrence): Change argument to
+ 'unsigned int'.
+ (Search::compute_occurrence, Search::sort_by_occurrence): New method
+ declarations.
+ * src/search.cc (Search::sort_by_occurrence): Change argument to
+ 'unsigned int'.
+ (Search::compute_occurrence, Search::sort_by_occurrence): New methods.
+ (Search::find_asso_values): Implement OPT_CHOICE. More debugging
+ output.
+
* src/search.cc (Search::prepare_asso_values) [DEBUG]: Also print
the keyword list in order.
(Search::find_asso_values) [DEBUG]: Upon failure, print the union_set.
diff --git a/src/options.cc b/src/options.cc
index bfe4007..e5428e5 100644
--- a/src/options.cc
+++ b/src/options.cc
@@ -473,6 +473,7 @@ Options::~Options ()
"\nENUM is........: %s"
"\nINCLUDE is.....: %s"
"\nSEVENBIT is....: %s"
+ "\nOPT_CHOICE is..: %s"
"\niterations = %d"
"\nlookup function name = %s"
"\nhash function name = %s"
@@ -505,6 +506,7 @@ Options::~Options ()
_option_word & ENUM ? "enabled" : "disabled",
_option_word & INCLUDE ? "enabled" : "disabled",
_option_word & SEVENBIT ? "enabled" : "disabled",
+ _option_word & OPT_CHOICE ? "enabled" : "disabled",
_iterations,
_function_name, _hash_name, _wordlist_name, _slot_name,
_initializer_suffix, _asso_iterations, _jump, _size_multiple,
@@ -648,6 +650,7 @@ static const struct option long_options[] =
{ "multiple-iterations", required_argument, NULL, 'm' },
{ "no-strlen", no_argument, NULL, 'n' },
{ "occurrence-sort", no_argument, NULL, 'o' },
+ { "optimized-collision-resolution", no_argument, NULL, 'O' },
{ "random", no_argument, NULL, 'r' },
{ "size-multiple", required_argument, NULL, 's' },
{ "help", no_argument, NULL, 'h' },
@@ -667,7 +670,7 @@ Options::parse_options (int argc, char *argv[])
while ((option_char =
getopt_long (_argument_count, _argument_vector,
- "acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oprs:S:tTvW:Z:7",
+ "acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oOprs:S:tTvW:Z:7",
long_options, NULL))
!= -1)
{
@@ -862,6 +865,11 @@ Options::parse_options (int argc, char *argv[])
_option_word |= ORDER;
break;
}
+ case 'O': /* Optimized choice during collision resolution. */
+ {
+ _option_word |= OPT_CHOICE;
+ break;
+ }
case 'p': /* Generated lookup function a pointer instead of int. */
break; /* This is now the default. */
case 'r': /* Utilize randomness to initialize the associated values table. */
diff --git a/src/options.h b/src/options.h
index 2a20d57..bad6966 100644
--- a/src/options.h
+++ b/src/options.h
@@ -101,7 +101,10 @@ enum Option_Type
INCLUDE = 1 << 20,
/* Assume 7-bit, not 8-bit, characters. */
- SEVENBIT = 1 << 21
+ SEVENBIT = 1 << 21,
+
+ /* Apply optimized collision resolution to speed-up search time. */
+ OPT_CHOICE = 1 << 22
};
/* Class manager for gperf program Options. */
diff --git a/src/search.cc b/src/search.cc
index cb0ed8f..0046c55 100644
--- a/src/search.cc
+++ b/src/search.cc
@@ -27,6 +27,7 @@
#include <stdlib.h> /* declares exit(), rand(), srand() */
#include <string.h> /* declares memset(), memcmp() */
#include <time.h> /* declares time() */
+#include <math.h> /* declares exp() */
#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */
#include "options.h"
#include "hash-table.h"
@@ -904,7 +905,7 @@ Search::compute_hash (KeywordExt *keyword) const
const unsigned int *p = keyword->_selchars;
int i = keyword->_selchars_length;
for (; i > 0; p++, i--)
- sum += _asso_values[*p];
+ sum += _asso_values[*p];
return keyword->_hash_value = sum;
}
@@ -962,20 +963,96 @@ compute_disjoint_union (const unsigned int *set_1, int size_1,
/* Sorts the given set in increasing frequency of _occurrences[]. */
inline void
-Search::sort_by_occurrence (unsigned int *set, int len) const
+Search::sort_by_occurrence (unsigned int *set, unsigned int len) const
{
/* Use bubble sort, since the set is typically short. */
- for (int i = 1; i < len; i++)
+ for (unsigned int i = 1; i < len; i++)
{
- int curr;
+ unsigned int j;
unsigned int tmp;
- for (curr = i, tmp = set[curr];
- curr > 0 && _occurrences[tmp] < _occurrences[set[curr-1]];
- curr--)
- set[curr] = set[curr - 1];
+ for (j = i, tmp = set[j];
+ j > 0 && _occurrences[tmp] < _occurrences[set[j-1]];
+ j--)
+ set[j] = set[j - 1];
- set[curr] = tmp;
+ set[j] = tmp;
+ }
+}
+
+/* Computes the frequency of occurrence of a character among the keywords
+ up to the given keyword. */
+inline unsigned int
+Search::compute_occurrence (unsigned int c, KeywordExt *curr) const
+{
+ unsigned int occurrence = 0;
+
+ for (KeywordExt_List *temp = _head; ; temp = temp->rest())
+ {
+ KeywordExt *keyword = temp->first();
+
+ int m = keyword->_selchars_length;
+ for (int i = 0; i < m; i++)
+ if (keyword->_selchars[i] == c)
+ {
+ occurrence++;
+ break;
+ }
+
+ if (keyword == curr)
+ break;
+ }
+
+ return occurrence;
+}
+
+/* Sorts the given set in increasing frequency of occurrences among the
+ keywords up to the given keyword. */
+
+inline void
+Search::sort_by_occurrence (unsigned int *set, unsigned int len, KeywordExt *curr) const
+{
+ unsigned int occurrences[len];
+
+ for (unsigned int j = 0; j < len; j++)
+ occurrences[j] = 0;
+ for (KeywordExt_List *temp = _head; ; temp = temp->rest())
+ {
+ KeywordExt *keyword = temp->first();
+
+ int m = keyword->_selchars_length;
+ for (unsigned int j = 0; j < len; j++)
+ {
+ unsigned int c = set[j];
+
+ for (int i = 0; i < m; i++)
+ if (keyword->_selchars[i] == c)
+ {
+ occurrences[j]++;
+ break;
+ }
+ }
+
+ if (keyword == curr)
+ break;
+ }
+
+ /* Use bubble sort, since the set is typically short. */
+ for (unsigned int i = 1; i < len; i++)
+ {
+ unsigned int j;
+ unsigned int set_tmp, occ_tmp;
+
+ for (j = i, set_tmp = set[j], occ_tmp = occurrences[j];
+ j > 0 && occ_tmp < occurrences[j-1];
+ j--)
+ {
+ set[j] = set[j - 1];
+ occurrences[j] = occurrences[j - 1];
+ }
+
+ set[j] = set_tmp;
+ occurrences[j] = occ_tmp;
}
}
@@ -1152,12 +1229,26 @@ Search::find_asso_values ()
/* Sort by decreasing occurrence: Try least-used characters c first.
The idea is that this reduces the number of freshly introduced
collisions. */
- sort_by_occurrence (union_set, union_set_length);
+ if (option[OPT_CHOICE])
+ sort_by_occurrence (union_set, union_set_length, curr);
+ else
+ sort_by_occurrence (union_set, union_set_length);
for (union_index = 0; union_index < union_set_length; union_index++)
{
c = union_set[union_index];
+ if (option[DEBUG])
+ {
+ unsigned int n = sp - stack + 1;
+ unsigned int o = compute_occurrence (c, curr);
+ fprintf (stderr, "Expected number of iterations between %g and %g\n",
+ exp (static_cast<double>((2*n-o)*o)
+ / static_cast<double>(2*_max_hash_value)),
+ exp (static_cast<double>((2*n-o)*o)
+ / static_cast<double>(2*_asso_value_max)));
+ }
+
/* Try various other values for _asso_values[c]. A value is
successful if, with it, the recomputed hash values for the
keywords from _head->first() to curr - inclusive - give no
diff --git a/src/search.h b/src/search.h
index aefcd08..928c615 100644
--- a/src/search.h
+++ b/src/search.h
@@ -90,8 +90,15 @@ private:
and stores it in keyword->_hash_value. */
int compute_hash (KeywordExt *keyword) const;
+ /* Computes the frequency of occurrence of a character among the keywords
+ up to the given keyword. */
+ unsigned int compute_occurrence (unsigned int c, KeywordExt *curr) const;
+
/* Sorts the given set in increasing frequency of _occurrences[]. */
- void sort_by_occurrence (unsigned int *set, int len) const;
+ void sort_by_occurrence (unsigned int *set, unsigned int len) const;
+ /* Sorts the given set in increasing frequency of occurrences among the
+ keywords up to the given keyword. */
+ void sort_by_occurrence (unsigned int *set, unsigned int len, KeywordExt *curr) const;
bool has_collisions (KeywordExt *curr);