diff options
author | Bruno Haible <bruno@clisp.org> | 2003-01-15 13:01:25 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2003-01-15 13:01:25 +0000 |
commit | c3467c53024650c0a495d30caabad74ecea4f080 (patch) | |
tree | 0dfd3df1b3c4747fac443f22abf2eaa4157a4e7d | |
parent | c67f999b54f456288561652503e8ad314a671f98 (diff) | |
download | gperf-c3467c53024650c0a495d30caabad74ecea4f080.tar.gz |
New option --multiple-iterations.
-rw-r--r-- | ChangeLog | 23 | ||||
-rw-r--r-- | NEWS | 5 | ||||
-rw-r--r-- | doc/gperf.texi | 8 | ||||
-rw-r--r-- | src/keyword-list.cc | 42 | ||||
-rw-r--r-- | src/keyword-list.h | 7 | ||||
-rw-r--r-- | src/options.cc | 26 | ||||
-rw-r--r-- | src/options.h | 6 | ||||
-rw-r--r-- | src/options.icc | 7 | ||||
-rw-r--r-- | src/search.cc | 83 | ||||
-rw-r--r-- | src/search.h | 5 | ||||
-rw-r--r-- | tests/test-6.exp | 5 |
11 files changed, 207 insertions, 10 deletions
@@ -1,3 +1,26 @@ +2002-11-04 Bruno Haible <bruno@clisp.org> + + * src/options.h (Options::_asso_iterations): New field. + (Options::get_asso_iterations): New method declaration. + * src/options.icc (Options::get_asso_iterations): New method. + * src/options.cc (Options::short_usage): Mention j<jump> and m<num>. + (Options::long_usage): Document option -m. + (Options::Options): Initialize _asso_iterations. + (Options::~Options): Print _asso_iterations too. + (long_options): Add --multiple-iterations. + (Options::parse_options): Handle option -m. + * src/keyword-list.h (copy_list, delete_list): New declarations. + * src/keyword-list.cc (copy_list, delete_list): New functions. + * src/search.h (Search::_initial_asso_value, Search::_jump): New fields. + * src/search.cc (Search::prepare_asso_values): Initialize + _initial_asso_value and _jump here. + (Search::init_asso_values): Use _initial_asso_value. + (Search::try_asso_value): Use _jump. + (Search::optimize): If option -m was given, iterate over different + values for _initial_asso_value and _jump. + * doc/gperf.texi (Algorithmic Details): Document option -m. + * tests/test-6.exp: Update. + 2002-11-03 Bruno Haible <bruno@clisp.org> Bug fix: When option -j 0 was used without option -r, the output was @@ -1,3 +1,8 @@ +New in 2.8: + +* Added option -m/--multiple-iterations that reduces the size of the + generated table. + New in 2.7.2: * Keywords may now be enclosed in double quotes; this permits the use of diff --git a/doc/gperf.texi b/doc/gperf.texi index 8b5ce72..46f0832 100644 --- a/doc/gperf.texi +++ b/doc/gperf.texi @@ -546,7 +546,7 @@ Also, in this case the @samp{-c} option is ignored. There are @emph{many} options to @code{gperf}. They were added to make the program more convenient for use with real applications. ``On-line'' -help is readily available via the @samp{-h} option. Here is the +help is readily available via the @samp{--help} option. Here is the complete list of options. @menu @@ -802,6 +802,12 @@ iterate when resolving a collision. `0' means iterate by the number of keywords. This option is probably most useful when used in conjunction with options @samp{-D} and/or @samp{-S} for @emph{large} keyword sets. +@item -m @var{iterations} +@itemx --multiple-iterations=@var{iterations} +Perform multiple choices of the @samp{-i} and @samp{-j} values, and +choose the best results. This increases the running time by a factor of +@var{iterations} but does a good job minimizing the generated table size. + @item -i @var{initial-value} @itemx --initial-asso=@var{initial-value} Provides an initial @var{value} for the associate values array. Default diff --git a/src/keyword-list.cc b/src/keyword-list.cc index b154a98..8e0983c 100644 --- a/src/keyword-list.cc +++ b/src/keyword-list.cc @@ -25,18 +25,60 @@ #include <stddef.h> +/* -------------------------- Keyword_List class --------------------------- */ + /* Constructor. */ Keyword_List::Keyword_List (Keyword *car) : _cdr (NULL), _car (car) { } +/* ------------------------- KeywordExt_List class ------------------------- */ + /* Unused constructor. */ KeywordExt_List::KeywordExt_List (KeywordExt *car) : Keyword_List (car) { } +/* ------------------------ Keyword_List functions ------------------------- */ + +/* Copies a linear list, sharing the list elements. */ +Keyword_List * +copy_list (Keyword_List *list) +{ + Keyword_List *result; + Keyword_List **lastp = &result; + while (list != NULL) + { + Keyword_List *new_cons = new Keyword_List (list->first()); + *lastp = new_cons; + lastp = &new_cons->rest(); + list = list->rest(); + } + *lastp = NULL; + return result; +} + +/* Copies a linear list, sharing the list elements. */ +KeywordExt_List * +copy_list (KeywordExt_List *list) +{ + return static_cast<KeywordExt_List *> (copy_list (static_cast<Keyword_List *> (list))); +} + +/* Deletes a linear list, keeping the list elements in memory. */ +void +delete_list (Keyword_List *list) +{ + while (list != NULL) + { + Keyword_List *rest = list->rest(); + delete list; + list = rest; + } +} + #ifndef __OPTIMIZE__ diff --git a/src/keyword-list.h b/src/keyword-list.h index 09f4988..888eb89 100644 --- a/src/keyword-list.h +++ b/src/keyword-list.h @@ -57,6 +57,13 @@ public: KeywordExt_List *& rest (); }; +/* Copies a linear list, sharing the list elements. */ +extern Keyword_List * copy_list (Keyword_List *list); +extern KeywordExt_List * copy_list (KeywordExt_List *list); + +/* Deletes a linear list, keeping the list elements in memory. */ +extern void delete_list (Keyword_List *list); + #ifdef __OPTIMIZE__ #define INLINE inline diff --git a/src/options.cc b/src/options.cc index 4a24212..f207ff9 100644 --- a/src/options.cc +++ b/src/options.cc @@ -65,7 +65,7 @@ static const char *const DEFAULT_DELIMITERS = ",\n"; void Options::short_usage (FILE * stream) const { - fprintf (stream, "Usage: %s [-cCdDef[num]F<initializers>GhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n" + fprintf (stream, "Usage: %s [-cCdDef[num]F<initializers>GhH<hashname>i<init>Ij<jump>k<keys>K<keyname>lL<language>m<num>nN<function name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n" "Try '%s --help' for more information.\n", program_name, program_name); } @@ -193,6 +193,12 @@ Options::long_usage (FILE * stream) const " when resolving a collision. '0' means \"iterate by\n" " the number of keywords\".\n"); fprintf (stream, + " -m, --multiple-iterations=ITERATIONS\n" + " Perform multiple choices of the -i and -j values,\n" + " and choose the best results. This increases the\n" + " running time by a factor of ITERATIONS but does a\n" + " good job minimizing the generated table size.\n"); + fprintf (stream, " -i, --initial-asso=N Provide an initial value for the associate values\n" " array. Default is 0. Setting this value larger helps\n" " inflate the size of the final table.\n"); @@ -413,6 +419,7 @@ Options::Options () _iterations (0), _jump (DEFAULT_JUMP_VALUE), _initial_asso_value (0), + _asso_iterations (0), _total_switches (1), _size_multiple (1), _function_name (DEFAULT_NAME), @@ -459,6 +466,7 @@ Options::~Options () "\nword list name = %s" "\nkey name = %s" "\ninitializer suffix = %s" + "\nasso_values iterations = %d" "\njump value = %d" "\nhash table size multiplier = %d" "\ninitial associated value = %d" @@ -486,8 +494,8 @@ Options::~Options () _option_word & SEVENBIT ? "enabled" : "disabled", _iterations, _function_name, _hash_name, _wordlist_name, _key_name, - _initializer_suffix, _jump, _size_multiple, _initial_asso_value, - _delimiters, _total_switches); + _initializer_suffix, _asso_iterations, _jump, _size_multiple, + _initial_asso_value, _delimiters, _total_switches); if (_option_word & ALLCHARS) fprintf (stderr, "all characters are used in the hash function\n"); else @@ -535,6 +543,7 @@ static const struct option long_options[] = { "fast", required_argument, NULL, 'f' }, { "initial-asso", required_argument, NULL, 'i' }, { "jump", required_argument, NULL, 'j' }, + { "multiple-iterations", required_argument, NULL, 'm' }, { "no-strlen", no_argument, NULL, 'n' }, { "occurrence-sort", no_argument, NULL, 'o' }, { "random", no_argument, NULL, 'r' }, @@ -556,7 +565,7 @@ Options::parse_options (int argc, char *argv[]) while ((option_char = getopt_long (_argument_count, _argument_vector, - "adcCDe:Ef:F:gGhH:i:Ij:k:K:lL:nN:oprs:S:tTvW:Z:7", + "acCdDe:Ef:F:gGhH:i:Ij:k:K:lL:m:nN:oprs:S:tTvW:Z:7", long_options, NULL)) != -1) { @@ -738,6 +747,15 @@ Options::parse_options (int argc, char *argv[]) } break; } + case 'm': /* Multiple iterations for finding good asso_values. */ + { + if ((_asso_iterations = atoi (/*getopt*/optarg)) < 0) + { + fprintf (stderr, "asso_iterations value must not be negative, assuming 0\n"); + _asso_iterations = 0; + } + break; + } case 'n': /* Don't include the length when computing hash function. */ { _option_word |= NOLENGTH; diff --git a/src/options.h b/src/options.h index ca39c7e..e2e12b9 100644 --- a/src/options.h +++ b/src/options.h @@ -190,6 +190,9 @@ public: /* Returns the initial associated character value. */ int get_initial_asso_value () const; + /* Returns the number of iterations for finding good asso_values. */ + int get_asso_iterations () const; + /* Returns the total number of switch statements to generate. */ int get_total_switches () const; @@ -250,6 +253,9 @@ private: /* Initial value for asso_values table. */ int _initial_asso_value; + /* Number of attempts at finding good asso_values. */ + int _asso_iterations; + /* Number of switch statements to generate. */ int _total_switches; diff --git a/src/options.icc b/src/options.icc index 96ca159..2585570 100644 --- a/src/options.icc +++ b/src/options.icc @@ -146,6 +146,13 @@ Options::get_initial_asso_value () const return _initial_asso_value; } +/* Returns the number of iterations for finding finding good asso_values. */ +INLINE int +Options::get_asso_iterations () const +{ + return _asso_iterations; +} + /* Returns the total number of switch statements to generate. */ INLINE int Options::get_total_switches () const diff --git a/src/search.cc b/src/search.cc index 3935260..72a61e7 100644 --- a/src/search.cc +++ b/src/search.cc @@ -434,6 +434,9 @@ Search::prepare_asso_values () if (option[RANDOM] || option.get_jump () == 0) /* We will use rand(), so initialize the random number generator. */ srand (reinterpret_cast<long>(time (0))); + + _initial_asso_value = (option[RANDOM] ? -1 : option.get_initial_asso_value ()); + _jump = option.get_jump (); } /* Puts a first guess into asso_values[]. */ @@ -441,14 +444,14 @@ Search::prepare_asso_values () void Search::init_asso_values () { - if (option[RANDOM]) + if (_initial_asso_value < 0) { for (int i = 0; i < _alpha_size; i++) _asso_values[i] = rand () & (_asso_value_max - 1); } else { - int asso_value = option.get_initial_asso_value (); + int asso_value = _initial_asso_value; asso_value = asso_value & (_asso_value_max - 1); for (int i = 0; i < _alpha_size; i++) @@ -565,7 +568,7 @@ Search::try_asso_value (unsigned char c, KeywordExt *curr, int iterations) /* Try next value. Wrap around mod _asso_value_max. */ _asso_values[c] = - (_asso_values[c] + (option.get_jump () ? option.get_jump () : rand ())) + (_asso_values[c] + (_jump != 0 ? _jump : rand ())) & (_asso_value_max - 1); /* Iteration Number array is a win, O(1) intialization time! */ @@ -733,7 +736,77 @@ Search::optimize () prepare_asso_values (); /* Search for good _asso_values[]. */ - find_asso_values (); + int asso_iteration; + if ((asso_iteration = option.get_asso_iterations ()) == 0) + /* Try only the given _initial_asso_value and _jump. */ + find_asso_values (); + else + { + /* Try different pairs of _initial_asso_value and _jump, in the + following order: + (0, 1) + (1, 1) + (2, 1) (0, 3) + (3, 1) (1, 3) + (4, 1) (2, 3) (0, 5) + (5, 1) (3, 3) (1, 5) + ..... */ + KeywordExt_List *saved_head = _head; + int best_initial_asso_value = 0; + int best_jump = 1; + int *best_asso_values = new int[_alpha_size]; + int best_collisions = INT_MAX; + int best_max_hash_value = INT_MAX; + + _initial_asso_value = 0; _jump = 1; + for (;;) + { + /* Restore the keyword list in its original order. */ + _head = copy_list (saved_head); + /* Find good _asso_values[]. */ + find_asso_values (); + /* Test whether it is the best solution so far. */ + int collisions = 0; + int max_hash_value = INT_MIN; + _collision_detector->clear (); + for (KeywordExt_List *ptr = _head; ptr; ptr = ptr->rest()) + { + KeywordExt *keyword = ptr->first(); + int hashcode = compute_hash (keyword); + if (max_hash_value < hashcode) + max_hash_value = hashcode; + if (_collision_detector->set_bit (hashcode)) + collisions++; + } + if (collisions < best_collisions + || (collisions == best_collisions + && max_hash_value < best_max_hash_value)) + { + memcpy (best_asso_values, _asso_values, + _alpha_size * sizeof (_asso_values[0])); + best_collisions = collisions; + best_max_hash_value = max_hash_value; + } + /* Delete the copied keyword list. */ + delete_list (_head); + + if (--asso_iteration == 0) + break; + /* Prepare for next iteration. */ + if (_initial_asso_value >= 2) + _initial_asso_value -= 2, _jump += 2; + else + _initial_asso_value += _jump, _jump = 1; + } + _head = saved_head; + /* Install the best found asso_values. */ + _initial_asso_value = best_initial_asso_value; + _jump = best_jump; + memcpy (_asso_values, best_asso_values, + _alpha_size * sizeof (_asso_values[0])); + delete[] best_asso_values; + /* The keywords' _hash_value fields are recomputed below. */ + } /* Make one final check, just to make sure nothing weird happened.... */ _collision_detector->clear (); @@ -749,7 +822,7 @@ Search::optimize () { fprintf (stderr, "\nInternal error, duplicate value %d:\n" - "try options -D or -r, or use new key positions.\n\n", + "try options -D or -m or -r, or use new key positions.\n\n", hashcode); exit (1); } diff --git a/src/search.h b/src/search.h index c8e0766..f058a4d 100644 --- a/src/search.h +++ b/src/search.h @@ -132,6 +132,11 @@ private: /* Exclusive upper bound for every _asso_values[c]. A power of 2. */ int _asso_value_max; + /* Initial value for asso_values table. -1 means random. */ + int _initial_asso_value; + /* Jump length when trying alternative values. 0 means random. */ + int _jump; + /* Maximal possible hash value. */ int _max_hash_value; diff --git a/tests/test-6.exp b/tests/test-6.exp index 839c25c..62ae824 100644 --- a/tests/test-6.exp +++ b/tests/test-6.exp @@ -87,6 +87,11 @@ Algorithm employed by gperf: argument represents the number of times to iterate when resolving a collision. '0' means "iterate by the number of keywords". + -m, --multiple-iterations=ITERATIONS + Perform multiple choices of the -i and -j values, + and choose the best results. This increases the + running time by a factor of ITERATIONS but does a + good job minimizing the generated table size. -i, --initial-asso=N Provide an initial value for the associate values array. Default is 0. Setting this value larger helps inflate the size of the final table. |