summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2003-01-15 13:01:25 +0000
committerBruno Haible <bruno@clisp.org>2003-01-15 13:01:25 +0000
commitc3467c53024650c0a495d30caabad74ecea4f080 (patch)
tree0dfd3df1b3c4747fac443f22abf2eaa4157a4e7d
parentc67f999b54f456288561652503e8ad314a671f98 (diff)
downloadgperf-c3467c53024650c0a495d30caabad74ecea4f080.tar.gz
New option --multiple-iterations.
-rw-r--r--ChangeLog23
-rw-r--r--NEWS5
-rw-r--r--doc/gperf.texi8
-rw-r--r--src/keyword-list.cc42
-rw-r--r--src/keyword-list.h7
-rw-r--r--src/options.cc26
-rw-r--r--src/options.h6
-rw-r--r--src/options.icc7
-rw-r--r--src/search.cc83
-rw-r--r--src/search.h5
-rw-r--r--tests/test-6.exp5
11 files changed, 207 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index 55e718c..1f06aaf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,26 @@
+2002-11-04 Bruno Haible <bruno@clisp.org>
+
+ * src/options.h (Options::_asso_iterations): New field.
+ (Options::get_asso_iterations): New method declaration.
+ * src/options.icc (Options::get_asso_iterations): New method.
+ * src/options.cc (Options::short_usage): Mention j<jump> and m<num>.
+ (Options::long_usage): Document option -m.
+ (Options::Options): Initialize _asso_iterations.
+ (Options::~Options): Print _asso_iterations too.
+ (long_options): Add --multiple-iterations.
+ (Options::parse_options): Handle option -m.
+ * src/keyword-list.h (copy_list, delete_list): New declarations.
+ * src/keyword-list.cc (copy_list, delete_list): New functions.
+ * src/search.h (Search::_initial_asso_value, Search::_jump): New fields.
+ * src/search.cc (Search::prepare_asso_values): Initialize
+ _initial_asso_value and _jump here.
+ (Search::init_asso_values): Use _initial_asso_value.
+ (Search::try_asso_value): Use _jump.
+ (Search::optimize): If option -m was given, iterate over different
+ values for _initial_asso_value and _jump.
+ * doc/gperf.texi (Algorithmic Details): Document option -m.
+ * tests/test-6.exp: Update.
+
2002-11-03 Bruno Haible <bruno@clisp.org>
Bug fix: When option -j 0 was used without option -r, the output was
diff --git a/NEWS b/NEWS
index 9a2877f..d638a60 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,8 @@
+New in 2.8:
+
+* Added option -m/--multiple-iterations that reduces the size of the
+ generated table.
+
New in 2.7.2:
* Keywords may now be enclosed in double quotes; this permits the use of
diff --git a/doc/gperf.texi b/doc/gperf.texi
index 8b5ce72..46f0832 100644
--- a/doc/gperf.texi
+++ b/doc/gperf.texi
@@ -546,7 +546,7 @@ Also, in this case the @samp{-c} option is ignored.
There are @emph{many} options to @code{gperf}. They were added to make
the program more convenient for use with real applications. ``On-line''
-help is readily available via the @samp{-h} option. Here is the
+help is readily available via the @samp{--help} option. Here is the
complete list of options.
@menu
@@ -802,6 +802,12 @@ iterate when resolving a collision. `0' means iterate by the number of
keywords. This option is probably most useful when used in conjunction
with options @samp{-D} and/or @samp{-S} for @emph{large} keyword sets.
+@item -m @var{iterations}
+@itemx --multiple-iterations=@var{iterations}
+Perform multiple choices of the @samp{-i} and @samp{-j} values, and
+choose the best results. This increases the running time by a factor of
+@var{iterations} but does a good job minimizing the generated table size.
+
@item -i @var{initial-value}
@itemx --initial-asso=@var{initial-value}
Provides an initial @var{value} for the associate values array. Default
diff --git a/src/keyword-list.cc b/src/keyword-list.cc
index b154a98..8e0983c 100644
--- a/src/keyword-list.cc
+++ b/src/keyword-list.cc
@@ -25,18 +25,60 @@
#include <stddef.h>
+/* -------------------------- Keyword_List class --------------------------- */
+
/* Constructor. */
Keyword_List::Keyword_List (Keyword *car)
: _cdr (NULL), _car (car)
{
}
+/* ------------------------- KeywordExt_List class ------------------------- */
+
/* Unused constructor. */
KeywordExt_List::KeywordExt_List (KeywordExt *car)
: Keyword_List (car)
{
}
+/* ------------------------ Keyword_List functions ------------------------- */
+
+/* Copies a linear list, sharing the list elements. */
+Keyword_List *
+copy_list (Keyword_List *list)
+{
+ Keyword_List *result;
+ Keyword_List **lastp = &result;
+ while (list != NULL)
+ {
+ Keyword_List *new_cons = new Keyword_List (list->first());
+ *lastp = new_cons;
+ lastp = &new_cons->rest();
+ list = list->rest();
+ }
+ *lastp = NULL;
+ return result;
+}
+
+/* Copies a linear list, sharing the list elements. */
+KeywordExt_List *
+copy_list (KeywordExt_List *list)
+{
+ return static_cast<KeywordExt_List *> (copy_list (static_cast<Keyword_List *> (list)));
+}
+
+/* Deletes a linear list, keeping the list elements in memory. */
+void
+delete_list (Keyword_List *list)
+{
+ while (list != NULL)
+ {
+ Keyword_List *rest = list->rest();
+ delete list;
+ list = rest;
+ }
+}
+
#ifndef __OPTIMIZE__
diff --git a/src/keyword-list.h b/src/keyword-list.h
index 09f4988..888eb89 100644
--- a/src/keyword-list.h
+++ b/src/keyword-list.h
@@ -57,6 +57,13 @@ public:
KeywordExt_List *& rest ();
};
+/* Copies a linear list, sharing the list elements. */
+extern Keyword_List * copy_list (Keyword_List *list);
+extern KeywordExt_List * copy_list (KeywordExt_List *list);
+
+/* Deletes a linear list, keeping the list elements in memory. */
+extern void delete_list (Keyword_List *list);
+
#ifdef __OPTIMIZE__
#define INLINE inline
diff --git a/src/options.cc b/src/options.cc
index 4a24212..f207ff9 100644
--- a/src/options.cc
+++ b/src/options.cc
@@ -65,7 +65,7 @@ static const char *const DEFAULT_DELIMITERS = ",\n";
void
Options::short_usage (FILE * stream) const
{
- fprintf (stream, "Usage: %s [-cCdDef[num]F<initializers>GhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
+ fprintf (stream, "Usage: %s [-cCdDef[num]F<initializers>GhH<hashname>i<init>Ij<jump>k<keys>K<keyname>lL<language>m<num>nN<function name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
"Try '%s --help' for more information.\n",
program_name, program_name);
}
@@ -193,6 +193,12 @@ Options::long_usage (FILE * stream) const
" when resolving a collision. '0' means \"iterate by\n"
" the number of keywords\".\n");
fprintf (stream,
+ " -m, --multiple-iterations=ITERATIONS\n"
+ " Perform multiple choices of the -i and -j values,\n"
+ " and choose the best results. This increases the\n"
+ " running time by a factor of ITERATIONS but does a\n"
+ " good job minimizing the generated table size.\n");
+ fprintf (stream,
" -i, --initial-asso=N Provide an initial value for the associate values\n"
" array. Default is 0. Setting this value larger helps\n"
" inflate the size of the final table.\n");
@@ -413,6 +419,7 @@ Options::Options ()
_iterations (0),
_jump (DEFAULT_JUMP_VALUE),
_initial_asso_value (0),
+ _asso_iterations (0),
_total_switches (1),
_size_multiple (1),
_function_name (DEFAULT_NAME),
@@ -459,6 +466,7 @@ Options::~Options ()
"\nword list name = %s"
"\nkey name = %s"
"\ninitializer suffix = %s"
+ "\nasso_values iterations = %d"
"\njump value = %d"
"\nhash table size multiplier = %d"
"\ninitial associated value = %d"
@@ -486,8 +494,8 @@ Options::~Options ()
_option_word & SEVENBIT ? "enabled" : "disabled",
_iterations,
_function_name, _hash_name, _wordlist_name, _key_name,
- _initializer_suffix, _jump, _size_multiple, _initial_asso_value,
- _delimiters, _total_switches);
+ _initializer_suffix, _asso_iterations, _jump, _size_multiple,
+ _initial_asso_value, _delimiters, _total_switches);
if (_option_word & ALLCHARS)
fprintf (stderr, "all characters are used in the hash function\n");
else
@@ -535,6 +543,7 @@ static const struct option long_options[] =
{ "fast", required_argument, NULL, 'f' },
{ "initial-asso", required_argument, NULL, 'i' },
{ "jump", required_argument, NULL, 'j' },
+ { "multiple-iterations", required_argument, NULL, 'm' },
{ "no-strlen", no_argument, NULL, 'n' },
{ "occurrence-sort", no_argument, NULL, 'o' },
{ "random", no_argument, NULL, 'r' },
@@ -556,7 +565,7 @@ Options::parse_options (int argc, char *argv[])
while ((option_char =
getopt_long (_argument_count, _argument_vector,
- "adcCDe:Ef:F:gGhH:i:Ij:k:K:lL:nN:oprs:S:tTvW:Z:7",
+ "acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oprs:S:tTvW:Z:7",
long_options, NULL))
!= -1)
{
@@ -738,6 +747,15 @@ Options::parse_options (int argc, char *argv[])
}
break;
}
+ case 'm': /* Multiple iterations for finding good asso_values. */
+ {
+ if ((_asso_iterations = atoi (/*getopt*/optarg)) < 0)
+ {
+ fprintf (stderr, "asso_iterations value must not be negative, assuming 0\n");
+ _asso_iterations = 0;
+ }
+ break;
+ }
case 'n': /* Don't include the length when computing hash function. */
{
_option_word |= NOLENGTH;
diff --git a/src/options.h b/src/options.h
index ca39c7e..e2e12b9 100644
--- a/src/options.h
+++ b/src/options.h
@@ -190,6 +190,9 @@ public:
/* Returns the initial associated character value. */
int get_initial_asso_value () const;
+ /* Returns the number of iterations for finding good asso_values. */
+ int get_asso_iterations () const;
+
/* Returns the total number of switch statements to generate. */
int get_total_switches () const;
@@ -250,6 +253,9 @@ private:
/* Initial value for asso_values table. */
int _initial_asso_value;
+ /* Number of attempts at finding good asso_values. */
+ int _asso_iterations;
+
/* Number of switch statements to generate. */
int _total_switches;
diff --git a/src/options.icc b/src/options.icc
index 96ca159..2585570 100644
--- a/src/options.icc
+++ b/src/options.icc
@@ -146,6 +146,13 @@ Options::get_initial_asso_value () const
return _initial_asso_value;
}
+/* Returns the number of iterations for finding finding good asso_values. */
+INLINE int
+Options::get_asso_iterations () const
+{
+ return _asso_iterations;
+}
+
/* Returns the total number of switch statements to generate. */
INLINE int
Options::get_total_switches () const
diff --git a/src/search.cc b/src/search.cc
index 3935260..72a61e7 100644
--- a/src/search.cc
+++ b/src/search.cc
@@ -434,6 +434,9 @@ Search::prepare_asso_values ()
if (option[RANDOM] || option.get_jump () == 0)
/* We will use rand(), so initialize the random number generator. */
srand (reinterpret_cast<long>(time (0)));
+
+ _initial_asso_value = (option[RANDOM] ? -1 : option.get_initial_asso_value ());
+ _jump = option.get_jump ();
}
/* Puts a first guess into asso_values[]. */
@@ -441,14 +444,14 @@ Search::prepare_asso_values ()
void
Search::init_asso_values ()
{
- if (option[RANDOM])
+ if (_initial_asso_value < 0)
{
for (int i = 0; i < _alpha_size; i++)
_asso_values[i] = rand () & (_asso_value_max - 1);
}
else
{
- int asso_value = option.get_initial_asso_value ();
+ int asso_value = _initial_asso_value;
asso_value = asso_value & (_asso_value_max - 1);
for (int i = 0; i < _alpha_size; i++)
@@ -565,7 +568,7 @@ Search::try_asso_value (unsigned char c, KeywordExt *curr, int iterations)
/* Try next value. Wrap around mod _asso_value_max. */
_asso_values[c] =
- (_asso_values[c] + (option.get_jump () ? option.get_jump () : rand ()))
+ (_asso_values[c] + (_jump != 0 ? _jump : rand ()))
& (_asso_value_max - 1);
/* Iteration Number array is a win, O(1) intialization time! */
@@ -733,7 +736,77 @@ Search::optimize ()
prepare_asso_values ();
/* Search for good _asso_values[]. */
- find_asso_values ();
+ int asso_iteration;
+ if ((asso_iteration = option.get_asso_iterations ()) == 0)
+ /* Try only the given _initial_asso_value and _jump. */
+ find_asso_values ();
+ else
+ {
+ /* Try different pairs of _initial_asso_value and _jump, in the
+ following order:
+ (0, 1)
+ (1, 1)
+ (2, 1) (0, 3)
+ (3, 1) (1, 3)
+ (4, 1) (2, 3) (0, 5)
+ (5, 1) (3, 3) (1, 5)
+ ..... */
+ KeywordExt_List *saved_head = _head;
+ int best_initial_asso_value = 0;
+ int best_jump = 1;
+ int *best_asso_values = new int[_alpha_size];
+ int best_collisions = INT_MAX;
+ int best_max_hash_value = INT_MAX;
+
+ _initial_asso_value = 0; _jump = 1;
+ for (;;)
+ {
+ /* Restore the keyword list in its original order. */
+ _head = copy_list (saved_head);
+ /* Find good _asso_values[]. */
+ find_asso_values ();
+ /* Test whether it is the best solution so far. */
+ int collisions = 0;
+ int max_hash_value = INT_MIN;
+ _collision_detector->clear ();
+ for (KeywordExt_List *ptr = _head; ptr; ptr = ptr->rest())
+ {
+ KeywordExt *keyword = ptr->first();
+ int hashcode = compute_hash (keyword);
+ if (max_hash_value < hashcode)
+ max_hash_value = hashcode;
+ if (_collision_detector->set_bit (hashcode))
+ collisions++;
+ }
+ if (collisions < best_collisions
+ || (collisions == best_collisions
+ && max_hash_value < best_max_hash_value))
+ {
+ memcpy (best_asso_values, _asso_values,
+ _alpha_size * sizeof (_asso_values[0]));
+ best_collisions = collisions;
+ best_max_hash_value = max_hash_value;
+ }
+ /* Delete the copied keyword list. */
+ delete_list (_head);
+
+ if (--asso_iteration == 0)
+ break;
+ /* Prepare for next iteration. */
+ if (_initial_asso_value >= 2)
+ _initial_asso_value -= 2, _jump += 2;
+ else
+ _initial_asso_value += _jump, _jump = 1;
+ }
+ _head = saved_head;
+ /* Install the best found asso_values. */
+ _initial_asso_value = best_initial_asso_value;
+ _jump = best_jump;
+ memcpy (_asso_values, best_asso_values,
+ _alpha_size * sizeof (_asso_values[0]));
+ delete[] best_asso_values;
+ /* The keywords' _hash_value fields are recomputed below. */
+ }
/* Make one final check, just to make sure nothing weird happened.... */
_collision_detector->clear ();
@@ -749,7 +822,7 @@ Search::optimize ()
{
fprintf (stderr,
"\nInternal error, duplicate value %d:\n"
- "try options -D or -r, or use new key positions.\n\n",
+ "try options -D or -m or -r, or use new key positions.\n\n",
hashcode);
exit (1);
}
diff --git a/src/search.h b/src/search.h
index c8e0766..f058a4d 100644
--- a/src/search.h
+++ b/src/search.h
@@ -132,6 +132,11 @@ private:
/* Exclusive upper bound for every _asso_values[c]. A power of 2. */
int _asso_value_max;
+ /* Initial value for asso_values table. -1 means random. */
+ int _initial_asso_value;
+ /* Jump length when trying alternative values. 0 means random. */
+ int _jump;
+
/* Maximal possible hash value. */
int _max_hash_value;
diff --git a/tests/test-6.exp b/tests/test-6.exp
index 839c25c..62ae824 100644
--- a/tests/test-6.exp
+++ b/tests/test-6.exp
@@ -87,6 +87,11 @@ Algorithm employed by gperf:
argument represents the number of times to iterate
when resolving a collision. '0' means "iterate by
the number of keywords".
+ -m, --multiple-iterations=ITERATIONS
+ Perform multiple choices of the -i and -j values,
+ and choose the best results. This increases the
+ running time by a factor of ITERATIONS but does a
+ good job minimizing the generated table size.
-i, --initial-asso=N Provide an initial value for the associate values
array. Default is 0. Setting this value larger helps
inflate the size of the final table.