Completely new asso_values search algorithm.

author: Bruno Haible <bruno@clisp.org> 2003-03-18 10:22:37 +0000
committer: Bruno Haible <bruno@clisp.org> 2003-03-18 10:22:37 +0000
commit: 6d268d095b4cb2bab821e82b785a4b74810b01a6 (patch)
tree: 5b3f7f60909fc6a7d5f4697c9d4ae58f947f0a7a /src
parent: 40f37680ac107a61bbe14efe6213f70c91c6b461 (diff)
download: gperf-6d268d095b4cb2bab821e82b785a4b74810b01a6.tar.gz
8 files changed, 536 insertions, 681 deletions
diff --git a/src/keyword-list.cc b/src/keyword-list.cc
index 4f27117..d4b55f9 100644
--- a/src/keyword-list.cc
+++ b/src/keyword-list.cc
@@ -35,7 +35,7 @@ Keyword_List::Keyword_List (Keyword *car)
 
 /* ------------------------- KeywordExt_List class ------------------------- */
 
-/* Unused constructor.  */
+/* Constructor.  */
 KeywordExt_List::KeywordExt_List (KeywordExt *car)
   : Keyword_List (car)
 {
diff --git a/src/keyword-list.h b/src/keyword-list.h
index 664f7de..d371022 100644
--- a/src/keyword-list.h
+++ b/src/keyword-list.h
@@ -48,7 +48,7 @@ protected:
 class KeywordExt_List : public Keyword_List
 {
 public:
-  /* Unused constructor.  */
+  /* Constructor.  */
                         KeywordExt_List (KeywordExt *car);
 
   /* Access to first element of list.  */
diff --git a/src/keyword.h b/src/keyword.h
index de89730..02e0364 100644
--- a/src/keyword.h
+++ b/src/keyword.h
@@ -75,7 +75,6 @@ struct KeywordExt : public Keyword
   void                  delete_selchars ();
 
   /* Data members used by the algorithm.  */
-  int                   _occurrence; /* Frequency of key set occurrences.  */
   int                   _hash_value; /* Hash value for the keyword.  */
 
   /* Data members used by the output routines.  */
diff --git a/src/options.cc b/src/options.cc
index e5428e5..88dacf2 100644
--- a/src/options.cc
+++ b/src/options.cc
@@ -196,13 +196,6 @@ Options::long_usage (FILE * stream) const
            "  -D, --duplicates       Handle keywords that hash to duplicate values. This\n"
            "                         is useful for certain highly redundant keyword sets.\n");
   fprintf (stream,
-           "  -f, --fast=ITERATIONS  Generate the gen-perf.hash function \"fast\". This\n"
-           "                         decreases gperf's running time at the cost of\n"
-           "                         minimizing generated table size. The numeric\n"
-           "                         argument represents the number of times to iterate\n"
-           "                         when resolving a collision. '0' means \"iterate by\n"
-           "                         the number of keywords\".\n");
-  fprintf (stream,
            "  -m, --multiple-iterations=ITERATIONS\n"
            "                         Perform multiple choices of the -i and -j values,\n"
            "                         and choose the best results. This increases the\n"
@@ -221,10 +214,6 @@ Options::long_usage (FILE * stream) const
            "  -n, --no-strlen        Do not include the length of the keyword when\n"
            "                         computing the hash function.\n");
   fprintf (stream,
-           "  -o, --occurrence-sort  Reorders input keys by frequency of occurrence of\n"
-           "                         the key sets. This should decrease the search time\n"
-           "                         dramatically.\n");
-  fprintf (stream,
            "  -r, --random           Utilizes randomness to initialize the associated\n"
            "                         values table.\n");
   fprintf (stream,
@@ -429,7 +418,6 @@ Options::Options ()
     _input_file_name (NULL),
     _output_file_name (NULL),
     _language (NULL),
-    _iterations (0),
     _jump (DEFAULT_JUMP_VALUE),
     _initial_asso_value (0),
     _asso_iterations (0),
@@ -454,14 +442,12 @@ Options::~Options ()
     {
       fprintf (stderr, "\ndumping Options:"
                "\nDEBUG is.......: %s"
-               "\nORDER is.......: %s"
                "\nTYPE is........: %s"
                "\nRANDOM is......: %s"
                "\nSWITCH is......: %s"
                "\nNOLENGTH is....: %s"
                "\nLENTABLE is....: %s"
                "\nDUP is.........: %s"
-               "\nFAST is........: %s"
                "\nCOMP is........: %s"
                "\nNOTYPE is......: %s"
                "\nGLOBAL is......: %s"
@@ -473,8 +459,6 @@ Options::~Options ()
                "\nENUM is........: %s"
                "\nINCLUDE is.....: %s"
                "\nSEVENBIT is....: %s"
-               "\nOPT_CHOICE is..: %s"
-               "\niterations = %d"
                "\nlookup function name = %s"
                "\nhash function name = %s"
                "\nword list name = %s"
@@ -487,14 +471,12 @@ Options::~Options ()
                "\ndelimiters = %s"
                "\nnumber of switch statements = %d\n",
                _option_word & DEBUG ? "enabled" : "disabled",
-               _option_word & ORDER ? "enabled" : "disabled",
                _option_word & TYPE ? "enabled" : "disabled",
                _option_word & RANDOM ? "enabled" : "disabled",
                _option_word & SWITCH ? "enabled" : "disabled",
                _option_word & NOLENGTH ? "enabled" : "disabled",
                _option_word & LENTABLE ? "enabled" : "disabled",
                _option_word & DUP ? "enabled" : "disabled",
-               _option_word & FAST ? "enabled" : "disabled",
                _option_word & COMP ? "enabled" : "disabled",
                _option_word & NOTYPE ? "enabled" : "disabled",
                _option_word & GLOBAL ? "enabled" : "disabled",
@@ -506,8 +488,6 @@ Options::~Options ()
                _option_word & ENUM ? "enabled" : "disabled",
                _option_word & INCLUDE ? "enabled" : "disabled",
                _option_word & SEVENBIT ? "enabled" : "disabled",
-               _option_word & OPT_CHOICE ? "enabled" : "disabled",
-               _iterations,
                _function_name, _hash_name, _wordlist_name, _slot_name,
                _initializer_suffix, _asso_iterations, _jump, _size_multiple,
                _initial_asso_value, _delimiters, _total_switches);
@@ -711,15 +691,7 @@ Options::parse_options (int argc, char *argv[])
             break;
           }
         case 'f':               /* Generate the hash table "fast". */
-          {
-            _option_word |= FAST;
-            if ((_iterations = atoi (/*getopt*/optarg)) < 0)
-              {
-                fprintf (stderr, "iterations value must not be negative, assuming 0\n");
-                _iterations = 0;
-              }
-            break;
-          }
+          break;                /* Not needed any more.  */
         case 'F':
           {
             _initializer_suffix = /*getopt*/optarg;
@@ -861,15 +833,9 @@ Options::parse_options (int argc, char *argv[])
             break;
           }
         case 'o':               /* Order input by frequency of key set occurrence. */
-          {
-            _option_word |= ORDER;
-            break;
-          }
+          break;                /* Not needed any more.  */
         case 'O':               /* Optimized choice during collision resolution.  */
-          {
-            _option_word |= OPT_CHOICE;
-            break;
-          }
+          break;                /* Not needed any more.  */
         case 'p':               /* Generated lookup function a pointer instead of int. */
           break;                /* This is now the default. */
         case 'r':               /* Utilize randomness to initialize the associated values table. */
diff --git a/src/options.h b/src/options.h
index bad6966..9d37201 100644
--- a/src/options.h
+++ b/src/options.h
@@ -39,72 +39,63 @@ enum Option_Type
   /* Enable debugging (prints diagnostics to stderr).  */
   DEBUG        = 1 << 0,
 
-  /* Apply ordering heuristic to speed-up search time.  */
-  ORDER        = 1 << 1,
-
   /* Use the given key positions.  */
-  POSITIONS    = 1 << 2,
+  POSITIONS    = 1 << 1,
 
   /* Use all characters in hash function.  */
-  ALLCHARS     = 1 << 3,
+  ALLCHARS     = 1 << 2,
 
   /* Handle user-defined type structured keyword input.  */
-  TYPE         = 1 << 4,
+  TYPE         = 1 << 3,
 
   /* Randomly initialize the associated values table.  */
-  RANDOM       = 1 << 5,
+  RANDOM       = 1 << 4,
 
   /* Generate switch output to save space.  */
-  SWITCH       = 1 << 6,
+  SWITCH       = 1 << 5,
 
   /* Don't include keyword length in hash computations.  */
-  NOLENGTH     = 1 << 7,
+  NOLENGTH     = 1 << 6,
 
   /* Generate a length table for string comparison.  */
-  LENTABLE     = 1 << 8,
+  LENTABLE     = 1 << 7,
 
   /* Handle duplicate hash values for keywords.  */
-  DUP          = 1 << 9,
-
-  /* Generate the hash function "fast".  */
-  FAST         = 1 << 10,
+  DUP          = 1 << 8,
 
   /* Don't include user-defined type definition in output -- it's already
      defined elsewhere.  */
-  NOTYPE       = 1 << 11,
+  NOTYPE       = 1 << 9,
 
   /* Generate strncmp rather than strcmp.  */
-  COMP         = 1 << 12,
+  COMP         = 1 << 10,
 
   /* Make the keyword table a global variable.  */
-  GLOBAL       = 1 << 13,
+  GLOBAL       = 1 << 11,
 
   /* Make the generated tables readonly (const).  */
-  CONST        = 1 << 14,
+  CONST        = 1 << 12,
 
   /* Generate K&R C code: no prototypes, no const.  */
-  KRC          = 1 << 15,
+  KRC          = 1 << 13,
 
   /* Generate C code: no prototypes, but const (user can #define it away).  */
-  C            = 1 << 16,
+  C            = 1 << 14,
 
   /* Generate ISO/ANSI C code: prototypes and const, but no class.  */
-  ANSIC        = 1 << 17,
+  ANSIC        = 1 << 15,
 
   /* Generate C++ code: prototypes, const, class, inline, enum.  */
-  CPLUSPLUS    = 1 << 18,
+  CPLUSPLUS    = 1 << 16,
 
   /* Use enum for constants.  */
-  ENUM         = 1 << 19,
+  ENUM         = 1 << 17,
 
   /* Generate #include statements.  */
-  INCLUDE      = 1 << 20,
+  INCLUDE      = 1 << 18,
 
   /* Assume 7-bit, not 8-bit, characters.  */
-  SEVENBIT     = 1 << 21,
-
-  /* Apply optimized collision resolution to speed-up search time.  */
-  OPT_CHOICE   = 1 << 22
+  SEVENBIT     = 1 << 19
 };
 
 /* Class manager for gperf program Options. */
@@ -140,9 +131,6 @@ public:
   /* Sets the output language, if not already set.  */
   void                  set_language (const char *language);
 
-  /* Returns the iterations value.  */
-  int                   get_iterations () const;
-
   /* Returns the jump value.  */
   int                   get_jump () const;
 
@@ -222,9 +210,6 @@ private:
   /* The output language.  */
   const char *          _language;
 
-  /* Amount to iterate when a collision occurs.  */
-  int                   _iterations;
-
   /* Jump length when trying alternative values.  */
   int                   _jump;
 
diff --git a/src/options.icc b/src/options.icc
index c54af90..a9295a0 100644
--- a/src/options.icc
+++ b/src/options.icc
@@ -51,13 +51,6 @@ Options::get_output_file_name () const
   return _output_file_name;
 }
 
-/* Returns the iterations value.  */
-INLINE int
-Options::get_iterations () const
-{
-  return _iterations;
-}
-
 /* Returns the jump value.  */
 INLINE int
 Options::get_jump () const
diff --git a/src/search.cc b/src/search.cc
index 705827e..8fc1f7c 100644
--- a/src/search.cc
+++ b/src/search.cc
@@ -634,140 +634,6 @@ Search::prepare ()
 
   /* Memory allocation.  */
   _asso_values = new int[_alpha_size];
-  _determined = new bool[_alpha_size];
-}
-
-/* ---------------- Reordering the Keyword list (optional) ----------------- */
-
-/* Computes the sum of occurrences of the _selchars of a keyword.
-   This is a kind of correlation measure: Keywords which have many
-   selected characters in common with other keywords have a high
-   occurrence sum.  Keywords whose selected characters don't occur
-   in other keywords have a low occurrence sum.  */
-
-inline int
-Search::compute_occurrence (KeywordExt *ptr) const
-{
-  int value = 0;
-
-  const unsigned int *p = ptr->_selchars;
-  unsigned int i = ptr->_selchars_length;
-  for (; i > 0; p++, i--)
-    value += _occurrences[*p];
-
-  return value;
-}
-
-/* Comparison function for sorting by decreasing _occurrence valuation.  */
-static bool
-greater_by_occurrence (KeywordExt *keyword1, KeywordExt *keyword2)
-{
-  return keyword1->_occurrence > keyword2->_occurrence;
-}
-
-/* Auxiliary function for reorder():
-   Sets all alphabet characters as undetermined.  */
-
-inline void
-Search::clear_determined ()
-{
-  memset (_determined, 0, _alpha_size * sizeof (_determined[0]));
-}
-
-/* Auxiliary function for reorder():
-   Sets all selected characters of the keyword as determined.  */
-
-inline void
-Search::set_determined (KeywordExt *keyword)
-{
-  const unsigned int *p = keyword->_selchars;
-  unsigned int i = keyword->_selchars_length;
-  for (; i > 0; p++, i--)
-    _determined[*p] = true;
-}
-
-/* Auxiliary function for reorder():
-   Returns true if the keyword's selected characters are all determined.  */
-
-inline bool
-Search::already_determined (KeywordExt *keyword) const
-{
-  const unsigned int *p = keyword->_selchars;
-  unsigned int i = keyword->_selchars_length;
-  for (; i > 0; p++, i--)
-    if (!_determined[*p])
-      return false;
-
-  return true;
-}
-
-/* Reorders the keyword list so as to minimize search times.
-   First the list is reordered so that frequently occuring keys appear first.
-   Then the list is reordered so that keys whose values are already determined
-   will be placed towards the front of the list.  This helps prune the search
-   time by handling inevitable collisions early in the search process.  See
-   Cichelli's paper from Jan 1980 JACM for details.... */
-
-void
-Search::reorder ()
-{
-  KeywordExt_List *ptr;
-
-  /* Compute the _occurrence valuation of every keyword on the list.  */
-  for (ptr = _head; ptr; ptr = ptr->rest())
-    {
-      KeywordExt *keyword = ptr->first();
-
-      keyword->_occurrence = compute_occurrence (keyword);
-    }
-
-  /* Sort the list by decreasing _occurrence valuation.  */
-  _head = mergesort_list (_head, greater_by_occurrence);
-
-  /* Reorder the list to maximize the efficiency of the search.  */
-
-  /* At the beginning, consider that no asso_values[c] is fixed.  */
-  clear_determined ();
-  for (ptr = _head; ptr != NULL && ptr->rest() != NULL; ptr = ptr->rest())
-    {
-      KeywordExt *keyword = ptr->first();
-
-      /* Then we'll fix asso_values[c] for all c occurring in this keyword.  */
-      set_determined (keyword);
-
-      /* Then we wish to test for hash value collisions the remaining keywords
-         whose hash value is completely determined, as quickly as possible.
-         For this purpose, move all the completely determined keywords in the
-         remaining list immediately past this keyword.  */
-      KeywordExt_List *curr_ptr;
-      KeywordExt_List *next_ptr; /* = curr_ptr->rest() */
-      for (curr_ptr = ptr, next_ptr = curr_ptr->rest();
-           next_ptr != NULL;
-           next_ptr = curr_ptr->rest())
-        {
-          KeywordExt *next_keyword = next_ptr->first();
-
-          if (already_determined (next_keyword))
-            {
-              if (curr_ptr == ptr)
-                /* Keep next_ptr where it is.  */
-                curr_ptr = next_ptr;
-              else
-                {
-                  /* Remove next_ptr from its current list position... */
-                  curr_ptr->rest() = next_ptr->rest();
-                  /* ... and insert it right after ptr.  */
-                  next_ptr->rest() = ptr->rest();
-                  ptr->rest() = next_ptr;
-                }
-
-              /* Advance ptr.  */
-              ptr = ptr->rest();
-            }
-          else
-            curr_ptr = next_ptr;
-        }
-    }
 }
 
 /* ------------------------------------------------------------------------- */
@@ -873,515 +739,585 @@ Search::prepare_asso_values ()
   _jump = option.get_jump ();
 }
 
-/* Puts a first guess into asso_values[].  */
+/* Finds some _asso_values[] that fit.  */
 
-void
-Search::init_asso_values ()
+/* The idea is to choose the _asso_values[] one by one, in a way that
+   a choice that has been made never needs to be undone later.  This
+   means that we split the work into several steps.  Each step chooses
+   one or more _asso_values[c].  The result of choosing one or more
+   _asso_values[c] is that the partitioning of the keyword set gets
+   broader.
+   Look at this partitioning:  After every step, the _asso_values[] of a
+   certain set C of characters are undetermined.  (At the beginning, C
+   is the set of characters c with _occurrences[c] > 0.  At the end, C
+   is empty.)  To each keyword K, we associate the multiset of _selchars
+   for which the _asso_values[] are undetermined:
+                    K  -->  K->_selchars intersect C.
+   Consider two keywords equivalent if their value under this mapping is
+   the same.  This introduces an equivalence relation on the set of
+   keywords.  The equivalence classes partition the keyword set.  (At the
+   beginning, the partition is the finest possible: each K is an equivalence
+   class by itself, because all K have a different _selchars.  At the end,
+   all K have been merged into a single equivalence class.)
+   The partition before a step is always a refinement of the partition
+   after the step.
+   We choose the steps in such a way that the partition really becomes
+   broader at each step.  (A step that only chooses an _asso_values[c]
+   without changing the partition is better merged with the previous step,
+   to avoid useless backtracking.)  */
+
+struct EquivalenceClass
 {
-  if (_initial_asso_value < 0)
-    {
-      for (unsigned int i = 0; i < _alpha_size; i++)
-        _asso_values[i] = rand () & (_asso_value_max - 1);
-    }
-  else
-    {
-      int asso_value = _initial_asso_value;
-
-      asso_value = asso_value & (_asso_value_max - 1);
-      for (unsigned int i = 0; i < _alpha_size; i++)
-        _asso_values[i] = asso_value;
-    }
-}
-
-/* Computes a keyword's hash value, relative to the current _asso_values[],
-   and stores it in keyword->_hash_value.
-   This is called very frequently, and needs to be fast!  */
+  /* The keywords in this equivalence class.  */
+  KeywordExt_List *     _keywords;
+  KeywordExt_List *     _keywords_last;
+  /* The number of keywords in this equivalence class.  */
+  unsigned int          _cardinality;
+  /* The undetermined selected characters for the keywords in this
+     equivalence class, as a canonically reordered multiset.  */
+  unsigned int *        _undetermined_chars;
+  unsigned int          _undetermined_chars_length;
+
+  EquivalenceClass *    _next;
+};
 
-inline int
-Search::compute_hash (KeywordExt *keyword) const
+struct Step
 {
-  int sum = option[NOLENGTH] ? 0 : keyword->_allchars_length;
-
-  const unsigned int *p = keyword->_selchars;
-  int i = keyword->_selchars_length;
-  for (; i > 0; p++, i--)
-    sum += _asso_values[*p];
-
-  return keyword->_hash_value = sum;
-}
-
-/* Computes the disjoint union of two multisets of characters, i.e.
-   the set of characters that are contained with a different multiplicity
-   in set_1 and set_2.  This includes those characters which are contained
-   in one of the sets but not both.
-   Both sets set_1[0..size_1-1] and set_2[0..size_2-1] are given ordered.
-   The result, an ordered set (not multiset!) is stored in set_3[0...].
-   Returns the size of the resulting set.  */
+  /* The characters whose values are being determined in this step.  */
+  unsigned int          _changing_count;
+  unsigned int *        _changing;
+  /* Exclusive upper bound for the _asso_values[c] of this step.
+     A power of 2.  */
+  unsigned int          _asso_value_max;
+  /* The characters whose values will be determined after this step.  */
+  bool *                _undetermined;
+  /* The keyword set partition after this step.  */
+  EquivalenceClass *    _partition;
+  /* The expected number of iterations in this step.  */
+  double                _expected_lower;
+  double                _expected_upper;
+
+  Step *                _next;
+};
 
-inline int
-compute_disjoint_union (const unsigned int *set_1, int size_1,
-                        const unsigned int *set_2, int size_2,
-                        unsigned int *set_3)
+static inline bool
+equals (const unsigned int *ptr1, const unsigned int *ptr2, unsigned int len)
 {
-  unsigned int *base = set_3;
-
-  while (size_1 > 0 && size_2 > 0)
-    if (*set_1 == *set_2)
-      {
-        set_1++, size_1--;
-        set_2++, size_2--;
-      }
-    else
-      {
-        unsigned int next;
-        if (*set_1 < *set_2)
-          next = *set_1++, size_1--;
-        else
-          next = *set_2++, size_2--;
-        if (set_3 == base || next != set_3[-1])
-          *set_3++ = next;
-      }
-
-  while (size_1 > 0)
-    {
-      unsigned int next;
-      next = *set_1++, size_1--;
-      if (set_3 == base || next != set_3[-1])
-        *set_3++ = next;
-    }
-
-  while (size_2 > 0)
+  while (len > 0)
     {
-      unsigned int next;
-      next = *set_2++, size_2--;
-      if (set_3 == base || next != set_3[-1])
-        *set_3++ = next;
+      if (*ptr1 != *ptr2)
+        return false;
+      ptr1++;
+      ptr2++;
+      len--;
     }
-  return set_3 - base;
+  return true;
 }
 
-/* Sorts the given set in increasing frequency of _occurrences[].  */
-
-inline void
-Search::sort_by_occurrence (unsigned int *set, unsigned int len) const
+EquivalenceClass *
+Search::compute_partition (bool *undetermined) const
 {
-  /* Use bubble sort, since the set is typically short.  */
-  for (unsigned int i = 1; i < len; i++)
+  EquivalenceClass *partition = NULL;
+  EquivalenceClass *partition_last = NULL;
+  for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
     {
-      unsigned int j;
-      unsigned int tmp;
+      KeywordExt *keyword = temp->first();
 
-      for (j = i, tmp = set[j];
-           j > 0 && _occurrences[tmp] < _occurrences[set[j-1]];
-           j--)
-        set[j] = set[j - 1];
+      /* Compute the undetermined characters for this keyword.  */
+      unsigned int *undetermined_chars =
+        new unsigned int[keyword->_selchars_length];
+      unsigned int undetermined_chars_length = 0;
+
+      for (int i = 0; i < keyword->_selchars_length; i++)
+        if (undetermined[keyword->_selchars[i]])
+          undetermined_chars[undetermined_chars_length++] = keyword->_selchars[i];
+
+      /* Look up the equivalence class to which this keyword belongs.  */
+      EquivalenceClass *equclass;
+      for (equclass = partition; equclass; equclass = equclass->_next)
+        if (equclass->_undetermined_chars_length == undetermined_chars_length
+            && equals (equclass->_undetermined_chars, undetermined_chars,
+                       undetermined_chars_length))
+          break;
+      if (equclass == NULL)
+        {
+          equclass = new EquivalenceClass();
+          equclass->_keywords = NULL;
+          equclass->_keywords_last = NULL;
+          equclass->_cardinality = 0;
+          equclass->_undetermined_chars = undetermined_chars;
+          equclass->_undetermined_chars_length = undetermined_chars_length;
+          equclass->_next = NULL;
+          if (partition)
+            partition_last->_next = equclass;
+          else
+            partition = equclass;
+          partition_last = equclass;
+        }
+      else
+        delete[] undetermined_chars;
 
-      set[j] = tmp;
+      /* Add the keyword to the equivalence class.  */
+      KeywordExt_List *cons = new KeywordExt_List(keyword);
+      if (equclass->_keywords)
+        equclass->_keywords_last->rest() = cons;
+      else
+        equclass->_keywords = cons;
+      equclass->_keywords_last = cons;
+      equclass->_cardinality++;
     }
+
+  /* Free some of the allocated memory.  The caller doesn't need it.  */
+  for (EquivalenceClass *cls = partition; cls; cls = cls->_next)
+    delete[] cls->_undetermined_chars;
+
+  return partition;
 }
 
-/* Computes the frequency of occurrence of a character among the keywords
-   up to the given keyword.  */
-inline unsigned int
-Search::compute_occurrence (unsigned int c, KeywordExt *curr) const
+static void
+delete_partition (EquivalenceClass *partition)
 {
-  unsigned int occurrence = 0;
-
-  for (KeywordExt_List *temp = _head; ; temp = temp->rest())
+  while (partition != NULL)
     {
-      KeywordExt *keyword = temp->first();
-
-      int m = keyword->_selchars_length;
-      for (int i = 0; i < m; i++)
-        if (keyword->_selchars[i] == c)
-          {
-            occurrence++;
-            break;
-          }
-
-      if (keyword == curr)
-        break;
+      EquivalenceClass *equclass = partition;
+      partition = equclass->_next;
+      delete_list (equclass->_keywords);
+      //delete[] equclass->_undetermined_chars; // already freed above
+      delete equclass;
     }
-
-  return occurrence;
 }
 
-/* Sorts the given set in increasing frequency of occurrences among the
-   keywords up to the given keyword.  */
-
-inline void
-Search::sort_by_occurrence (unsigned int *set, unsigned int len, KeywordExt *curr) const
+/* Compute the possible number of collisions when _asso_values[c] is
+   chosen, leading to the given partition.  */
+unsigned int
+Search::count_possible_collisions (EquivalenceClass *partition, unsigned int c) const
 {
-  unsigned int occurrences[len];
-
-  for (unsigned int j = 0; j < len; j++)
-    occurrences[j] = 0;
-  for (KeywordExt_List *temp = _head; ; temp = temp->rest())
+  /* Every equivalence class p is split according to the frequency of
+     occurrence of c, leading to equivalence classes p1, p2, ...
+     This leads to   (|p|^2 - |p1|^2 - |p2|^2 - ...)/2  possible collisions.
+     Return the sum of this expression over all equivalence classes.  */
+  unsigned int sum = 0;
+  unsigned int m = get_max_keysig_size();
+  unsigned int split_cardinalities[m+1];
+  for (EquivalenceClass *cls = partition; cls; cls = cls->_next)
     {
-      KeywordExt *keyword = temp->first();
+      for (unsigned int i = 0; i <= m; i++)
+        split_cardinalities[i] = 0;
 
-      int m = keyword->_selchars_length;
-      for (unsigned int j = 0; j < len; j++)
+      for (KeywordExt_List *temp = cls->_keywords; temp; temp = temp->rest())
         {
-          unsigned int c = set[j];
+          KeywordExt *keyword = temp->first();
 
-          for (int i = 0; i < m; i++)
+          unsigned int count = 0;
+          for (int i = 0; i < keyword->_selchars_length; i++)
             if (keyword->_selchars[i] == c)
-              {
-                occurrences[j]++;
-                break;
-              }
+              count++;
+
+          split_cardinalities[count]++;
         }
 
-      if (keyword == curr)
-        break;
+      sum += cls->_cardinality * cls->_cardinality;
+      for (unsigned int i = 0; i <= m; i++)
+        sum -= split_cardinalities[i] * split_cardinalities[i];
     }
+  return sum;
+}
 
-  /* Use bubble sort, since the set is typically short.  */
-  for (unsigned int i = 1; i < len; i++)
+/* Test whether adding c to the undetermined characters changes the given
+   partition.  */
+bool
+Search::unchanged_partition (EquivalenceClass *partition, unsigned int c) const
+{
+  for (EquivalenceClass *cls = partition; cls; cls = cls->_next)
     {
-      unsigned int j;
-      unsigned int set_tmp, occ_tmp;
+      unsigned int first_count = UINT_MAX;
 
-      for (j = i, set_tmp = set[j], occ_tmp = occurrences[j];
-           j > 0 && occ_tmp < occurrences[j-1];
-           j--)
+      for (KeywordExt_List *temp = cls->_keywords; temp; temp = temp->rest())
         {
-          set[j] = set[j - 1];
-          occurrences[j] = occurrences[j - 1];
-        }
+          KeywordExt *keyword = temp->first();
+
+          unsigned int count = 0;
+          for (int i = 0; i < keyword->_selchars_length; i++)
+            if (keyword->_selchars[i] == c)
+              count++;
 
-      set[j] = set_tmp;
-      occurrences[j] = occ_tmp;
+          if (temp == cls->_keywords)
+            first_count = count;
+          else if (count != first_count)
+            /* c would split this equivalence class.  */
+            return false;
+        }
     }
+  return true;
 }
 
-/* Returns true if the recomputed hash values for the keywords from
-   _head->first() to curr - inclusive - give at least one collision.
-   This is called very frequently, and needs to be fast!  */
-bool
-Search::has_collisions (KeywordExt *curr)
+void
+Search::find_asso_values ()
 {
-  /* Iteration Number array is a win, O(1) initialization time!  */
-  _collision_detector->clear ();
+  Step *steps;
 
-  for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
-    {
-      KeywordExt *keyword = ptr->first();
+  /* Determine the steps, starting with the last one.  */
+  {
+    bool *undetermined;
+    bool *determined;
 
-      /* Compute new hash code for the keyword, and see whether it
-         collides with another keyword's hash code.  If we have too
-         many collisions, we can safely abort the fruitless loop.  */
-      if (_collision_detector->set_bit (compute_hash (keyword)))
-        return true;
+    steps = NULL;
 
-      if (keyword == curr)
-        return false;
-    }
-}
+    undetermined = new bool[_alpha_size];
+    for (unsigned int c = 0; c < _alpha_size; c++)
+      undetermined[c] = false;
 
-/* Tests whether the given keyword has the same hash value as another one
-   earlier in the list.  If yes, this earlier keyword is returned (more
-   precisely, the first one of them, but it doesn't really matter which one).
-   If no collision is present, NULL is returned.  */
-KeywordExt *
-Search::collision_prior_to (KeywordExt *curr)
-{
-  for (KeywordExt_List *prior_ptr = _head;
-       prior_ptr->first() != curr;
-       prior_ptr = prior_ptr->rest())
-    {
-      KeywordExt *prior = prior_ptr->first();
+    determined = new bool[_alpha_size];
+    for (unsigned int c = 0; c < _alpha_size; c++)
+      determined[c] = true;
 
-      if (prior->_hash_value == curr->_hash_value)
-        return prior;
-    }
-  return NULL;
-}
+    for (;;)
+      {
+        /* Compute the partition that needs to be refined.  */
+        EquivalenceClass *partition = compute_partition (undetermined);
+
+        /* Determine the main character to be chosen in this step.
+           Choosing such a character c has the effect of splitting every
+           equivalence class (according the the frequency of occurrence of c).
+           We choose the c with the minimum number of possible collisions,
+           so that characters which lead to a large number of collisions get
+           handled early during the search.  */
+        unsigned int chosen_c;
+        unsigned int chosen_possible_collisions;
+        {
+          unsigned int best_c = 0;
+          unsigned int best_possible_collisions = UINT_MAX;
+          for (unsigned int c = 0; c < _alpha_size; c++)
+            if (_occurrences[c] > 0 && determined[c])
+              {
+                unsigned int possible_collisions =
+                  count_possible_collisions (partition, c);
+                if (possible_collisions < best_possible_collisions)
+                  {
+                    best_c = c;
+                    best_possible_collisions = possible_collisions;
+                  }
+              }
+          if (best_possible_collisions == UINT_MAX)
+            {
+              /* All c with _occurrences[c] > 0 are undetermined.  We are
+                 are the starting situation and don't need any more step.  */
+              delete_partition (partition);
+              break;
+            }
+          chosen_c = best_c;
+          chosen_possible_collisions = best_possible_collisions;
+        }
 
-/* Finding good asso_values is normally straightforwards, but needs
-   backtracking in some cases.  The recurse/backtrack depth can be at most
-   _list_len.  Since we cannot assume that the C stack is large enough,
-   we perform the processing without recursion, and simulate the stack.  */
-struct StackEntry
-{
-  /* The current keyword.  */
-  KeywordExt *          _curr;
+        /* We need one more step.  */
+        Step *step = new Step();
 
-  /* The prior keyword, with which curr collides.  */
-  KeywordExt *          _prior;
+        step->_undetermined = new bool[_alpha_size];
+        memcpy (step->_undetermined, undetermined, _alpha_size*sizeof(bool));
 
-  /* Scratch set.  */
-  unsigned int *        _union_set;
-  unsigned int          _union_set_length;
+        step->_partition = partition;
 
-  /* Current index into the scratch set.  */
-  unsigned int          _union_index;
+        /* Now determine how the equivalence classes will be before this
+           step.  */
+        undetermined[chosen_c] = true;
+        partition = compute_partition (undetermined);
 
-  /* Trying a different value for _asso_values[_c].  */
-  unsigned int          _c;
+        /* Now determine which other characters should be determined in this
+           step, because they will not change the equivalence classes at
+           this point.  It is the set of all c which, for all equivalence
+           classes, have the same frequency of occurrence in every keyword
+           of the equivalence class.  */
+        for (unsigned int c = 0; c < _alpha_size; c++)
+          if (_occurrences[c] > 0 && determined[c]
+              && unchanged_partition (partition, c))
+            {
+              undetermined[c] = true;
+              determined[c] = false;
+            }
 
-  /* The original value of _asso_values[_c].  */
-  unsigned int          _original_asso_value;
+        /* main_c must be one of these.  */
+        if (determined[chosen_c])
+          abort ();
 
-  /* Remaining number of iterations.  */
-  int                   _iter;
-};
+        /* Now the set of changing characters of this step.  */
+        unsigned int changing_count;
 
-/* Finds some _asso_values[] that fit.  */
+        changing_count = 0;
+        for (unsigned int c = 0; c < _alpha_size; c++)
+          if (undetermined[c] && !step->_undetermined[c])
+            changing_count++;
 
-void
-Search::find_asso_values ()
-{
-  /* Add one keyword after the other and see whether its hash value collides
-     with one of the previous hash values.  If so, change some asso_values[]
-     entry until the number of collisions so far is reduced.  Then continue
-     with the next keyword.  */
-
-  init_asso_values ();
-
-  int iterations =
-    !option[FAST]
-    ? _asso_value_max    /* Try all possible values of _asso_values[c].  */
-    : option.get_iterations ()
-      ? option.get_iterations ()
-      : keyword_list_length ();
-
-  /* Allocate stack.  */
-  StackEntry *stack = new StackEntry[_list_len];
-  {
-    KeywordExt_List *ptr = _head;
-    for (int i = 0; i < _list_len; i++, ptr = ptr->rest())
-      {
-        stack[i]._curr = ptr->first();
-        stack[i]._union_set = new unsigned int [2 * get_max_keysig_size ()];
-      }
-  }
+        unsigned int *changing = new unsigned int[changing_count];
+        changing_count = 0;
+        for (unsigned int c = 0; c < _alpha_size; c++)
+          if (undetermined[c] && !step->_undetermined[c])
+            changing[changing_count++] = c;
 
-  /* Backtracking according to the standard Prolog call pattern:
+        step->_changing = changing;
+        step->_changing_count = changing_count;
 
-                          +------------------+
-        -------CALL------>|                  |-------RETURN------>
-                          |                  |
-        <------FAIL-------|                  |<------REDO---------
-                          +------------------+
+        step->_asso_value_max = _asso_value_max;
 
-     A CALL and RETURN increase the stack pointer, FAIL and REDO decrease it.
-   */
-  {
-    /* Current stack pointer.  */
-    StackEntry *sp = &stack[0];
-
-    /* Local variables corresponding to *sp.  */
-    /* The current keyword.  */
-    KeywordExt *curr;
-    /* The prior keyword, with which curr collides.  */
-    KeywordExt *prior;
-    /* Scratch set.  */
-    unsigned int *union_set;
-    unsigned int union_set_length;
-    /* Current index into the scratch set.  */
-    unsigned int union_index;
-    /* Trying a different value for _asso_values[c].  */
-    unsigned int c;
-    /* The original value of _asso_values[c].  */
-    unsigned int original_asso_value;
-    /* Remaining number of iterations.  */
-    int iter;
-
-    /* ==== CALL ==== */
-   CALL:
-
-    /* Next keyword from the list.  */
-    curr = sp->_curr;
-
-    /* Compute this keyword's hash value.  */
-    compute_hash (curr);
-
-    /* See if it collides with a prior keyword.  */
-    prior = collision_prior_to (curr);
-
-    if (prior != NULL)
-      {
-        /* Handle collision: Attempt to change an _asso_value[], in order to
-           resolve a hash value collision between the two given keywords.  */
+        step->_expected_lower =
+          exp (static_cast<double>(chosen_possible_collisions)
+               / static_cast<double>(_max_hash_value));
+        step->_expected_upper =
+          exp (static_cast<double>(chosen_possible_collisions)
+               / static_cast<double>(_asso_value_max));
 
-        if (option[DEBUG])
-          {
-            fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
-                     sp - stack + 1,
-                     prior->_allchars_length, prior->_allchars,
-                     curr->_allchars_length, curr->_allchars,
-                     curr->_hash_value);
-            fflush (stderr);
-          }
+        delete_partition (partition);
 
-        /* To achieve that the two hash values become different, we have to
-           change an _asso_values[c] for a character c that contributes to the
-           hash functions of prior and curr with different multiplicity.
-           So we compute the set of such c.  */
-        union_set = sp->_union_set;
-        union_set_length =
-          compute_disjoint_union (prior->_selchars, prior->_selchars_length,
-                                  curr->_selchars, curr->_selchars_length,
-                                  union_set);
-
-        /* Sort by decreasing occurrence: Try least-used characters c first.
-           The idea is that this reduces the number of freshly introduced
-           collisions.  */
-        if (option[OPT_CHOICE])
-          sort_by_occurrence (union_set, union_set_length, curr);
-        else
-          sort_by_occurrence (union_set, union_set_length);
+        step->_next = steps;
+        steps = step;
+      }
 
-        for (union_index = 0; union_index < union_set_length; union_index++)
-          {
-            c = union_set[union_index];
+    delete[] determined;
+    delete[] undetermined;
+  }
 
-            if (option[DEBUG])
-              {
-                unsigned int n = sp - stack + 1;
-                unsigned int o = compute_occurrence (c, curr);
-                fprintf (stderr, "Expected number of iterations between %g and %g\n",
-                         exp (static_cast<double>((2*n-o)*o)
-                              / static_cast<double>(2*_max_hash_value)),
-                         exp (static_cast<double>((2*n-o)*o)
-                              / static_cast<double>(2*_asso_value_max)));
-              }
+  if (option[DEBUG])
+    {
+      unsigned int stepno = 0;
+      for (Step *step = steps; step; step = step->_next)
+        {
+          stepno++;
+          fprintf (stderr, "Step %u chooses _asso_values[", stepno);
+          for (unsigned int i = 0; i < step->_changing_count; i++)
+            {
+              if (i > 0)
+                fprintf (stderr, ",");
+              fprintf (stderr, "'%c'", step->_changing[i]);
+            }
+          fprintf (stderr, "], expected number of iterations between %g and %g.\n",
+                   step->_expected_lower, step->_expected_upper);
+          fprintf (stderr, "Keyword equivalence classes:\n");
+          for (EquivalenceClass *cls = step->_partition; cls; cls = cls->_next)
+            {
+              fprintf (stderr, "\n");
+              for (KeywordExt_List *temp = cls->_keywords; temp; temp = temp->rest())
+                {
+                  KeywordExt *keyword = temp->first();
+                  fprintf (stderr, "  %.*s\n",
+                           keyword->_allchars_length, keyword->_allchars);
+                }
+            }
+          fprintf (stderr, "\n");
+        }
+    }
 
-            /* Try various other values for _asso_values[c].  A value is
-               successful if, with it, the recomputed hash values for the
-               keywords from _head->first() to curr - inclusive - give no
-               collisions.  Up to the given number of iterations are performed.
-               If successful, _asso_values[c] is changed, and the recursion
-               continues. If all iterations are unsuccessful, _asso_values[c]
-               is restored and we backtrack, trying the next union_index.  */
+  /* Initialize _asso_values[].  (The value given here matters only
+     for those c which occur in all keywords with equal multiplicity.)  */
+  for (unsigned int c = 0; c < _alpha_size; c++)
+    _asso_values[c] = 0;
 
-            original_asso_value = _asso_values[c];
+  unsigned int stepno = 0;
+  for (Step *step = steps; step; step = step->_next)
+    {
+      stepno++;
 
-            /* Try many valid associated values.  */
-            for (iter = iterations; iter > 0; iter--)
-              {
-                /* Try next value.  Wrap around mod _asso_value_max.  */
-                _asso_values[c] =
-                  (_asso_values[c] + (_jump != 0 ? _jump : rand ()))
-                  & (_asso_value_max - 1);
+      /* Initialize the asso_values[].  */
+      unsigned int k = step->_changing_count;
+      for (unsigned int i = 0; i < k; i++)
+        {
+          unsigned int c = step->_changing[i];
+          _asso_values[c] =
+            (_initial_asso_value < 0 ? rand () : _initial_asso_value)
+            & (step->_asso_value_max - 1);
+        }
+
+      unsigned int iterations = 0;
+      unsigned int iter[k];
+      for (unsigned int i = 0; i < k; i++)
+        iter[i] = 0;
+      unsigned int ii = (_jump != 0 ? k - 1 : 0);
+
+      for (;;)
+        {
+          /* Test whether these asso_values[] lead to collisions among
+             the equivalence classes that should be collision-free.  */
+          bool has_collision = false;
+          for (EquivalenceClass *cls = step->_partition; cls; cls = cls->_next)
+            {
+              /* Iteration Number array is a win, O(1) initialization time!  */
+              _collision_detector->clear ();
 
-                if (!has_collisions (curr))
+              for (KeywordExt_List *ptr = cls->_keywords; ptr; ptr = ptr->rest())
+                {
+                  KeywordExt *keyword = ptr->first();
+
+                  /* Compute the new hash code for the keyword, leaving apart
+                     the yet undetermined asso_values[].  */
+                  int hashcode;
                   {
-                    /* Good, this _asso_values[] modification reduces the
-                       number of collisions so far.
-                       All keyword->_hash_value up to curr - inclusive -
-                       have been updated.  */
-                    if (option[DEBUG])
-                      {
-                        fprintf (stderr, "- resolved after %d iterations by "
-                                 "changing asso_value['%c'] (char #%d) to %d\n",
-                                 iterations - iter + 1, c,
-                                 union_index + 1, _asso_values[c]);
-                        fflush (stderr);
-                      }
-                    goto RECURSE_COLLISION;
-                   BACKTRACK_COLLISION:
-                    if (option[DEBUG])
-                      {
-                        fprintf (stderr, "back to collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
-                                 sp - stack + 1,
-                                 prior->_allchars_length, prior->_allchars,
-                                 curr->_allchars_length, curr->_allchars,
-                                 curr->_hash_value);
-                        fflush (stderr);
-                      }
+                    int sum = option[NOLENGTH] ? 0 : keyword->_allchars_length;
+                    const unsigned int *p = keyword->_selchars;
+                    int i = keyword->_selchars_length;
+                    for (; i > 0; p++, i--)
+                      if (!step->_undetermined[*p])
+                        sum += _asso_values[*p];
+                    hashcode = sum;
                   }
-              }
 
-            /* Restore original values, no more tries.  */
-            _asso_values[c] = original_asso_value;
-          }
+                  /* See whether it collides with another keyword's hash code,
+                     from the same equivalence class.  */
+                  if (_collision_detector->set_bit (hashcode))
+                    {
+                      has_collision = true;
+                      break;
+                    }
+                }
 
-        /* Failed to resolve a collision.  */
+              /* Don't need to continue looking at the other equivalence
+                 classes if we already have found a collision.  */
+              if (has_collision)
+                break;
+            }
 
-        /* Recompute all keyword->_hash_value up to curr - exclusive -.  */
-        for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
-          {
-            KeywordExt* keyword = ptr->first();
-            if (keyword == curr)
-              break;
-            compute_hash (keyword);
-          }
+          iterations++;
+          if (!has_collision)
+            break;
 
-        if (option[DEBUG])
-          {
-            fprintf (stderr, "** collision not resolved after %d iterations of asso_value[",
-                     iterations);
-            for (union_index = 0; union_index < union_set_length; union_index++)
+          /* Try other asso_values[].  */
+          if (_jump != 0)
+            {
+              /* The way we try various values for
+                   asso_values[step->_changing[0],...step->_changing[k-1]]
+                 is like this:
+                 for (bound = 0,1,...)
+                   for (ii = 0,...,k-1)
+                     iter[ii] := bound
+                     iter[0..ii-1] := values <= bound
+                     iter[ii+1..k-1] := values < bound
+                 and
+                   asso_values[step->_changing[i]] =
+                     _initial_asso_value + iter[i] * _jump.
+                 This makes it more likely to find small asso_values[].
+               */
+              unsigned int bound = iter[ii];
+              unsigned int i = 0;
+              while (i < ii)
+                {
+                  unsigned int c = step->_changing[i];
+                  iter[i]++;
+                  _asso_values[c] =
+                    (_asso_values[c] + _jump) & (step->_asso_value_max - 1);
+                  if (iter[i] <= bound)
+                    goto found_next;
+                  _asso_values[c] =
+                    (_asso_values[c] - iter[i] * _jump)
+                    & (step->_asso_value_max - 1);
+                  iter[i] = 0;
+                  i++;
+                }
+              i = ii + 1;
+              while (i < k)
+                {
+                  unsigned int c = step->_changing[i];
+                  iter[i]++;
+                  _asso_values[c] =
+                    (_asso_values[c] + _jump) & (step->_asso_value_max - 1);
+                  if (iter[i] < bound)
+                    goto found_next;
+                  _asso_values[c] =
+                    (_asso_values[c] - iter[i] * _jump)
+                    & (step->_asso_value_max - 1);
+                  iter[i] = 0;
+                  i++;
+                }
+              /* Switch from one ii to the next.  */
               {
-                if (union_index > 0)
-                  fprintf (stderr, ",");
-                fprintf(stderr, "'%c'", union_set[union_index]);
+                unsigned int c = step->_changing[ii];
+                _asso_values[c] =
+                  (_asso_values[c] - bound * _jump)
+                  & (step->_asso_value_max - 1);
+                iter[ii] = 0;
               }
-            fprintf (stderr, "], backtracking...\n");
-            fflush (stderr);
-          }
-      }
-    else
-      {
-        /* Nothing to do, just recurse.  */
-        goto RECURSE_NO_COLLISION;
-       BACKTRACK_NO_COLLISION: ;
-      }
+              /* Here all iter[i] == 0.  */
+              ii++;
+              if (ii == k)
+                {
+                  ii = 0;
+                  bound++;
+                  if (bound == step->_asso_value_max)
+                    {
+                      /* Out of search space!  We can either backtrack, or
+                         increase the available search space of this step.
+                         It seems simpler to choose the latter solution.  */
+                      step->_asso_value_max = 2 * step->_asso_value_max;
+                      if (step->_asso_value_max > _asso_value_max)
+                        {
+                          _asso_value_max = step->_asso_value_max;
+                          /* Reinitialize _max_hash_value.  */
+                          _max_hash_value =
+                            (option[NOLENGTH] ? 0 : max_key_length ())
+                            + (_asso_value_max - 1) * get_max_keysig_size ();
+                          /* Reinitialize _collision_detector.  */
+                          delete _collision_detector;
+                          _collision_detector =
+                            new Bool_Array (_max_hash_value + 1);
+                        }
+                    }
+                }
+              {
+                unsigned int c = step->_changing[ii];
+                iter[ii] = bound;
+                _asso_values[c] =
+                  (_asso_values[c] + bound * _jump)
+                  & (step->_asso_value_max - 1);
+              }
+             found_next: ;
+            }
+          else
+            {
+              /* Random.  */
+              unsigned int c = step->_changing[ii];
+              _asso_values[c] =
+                (_asso_values[c] + rand ()) & (step->_asso_value_max - 1);
+              /* Next time, change the next c.  */
+              ii++;
+              if (ii == k)
+                ii = 0;
+            }
+        }
+      if (option[DEBUG])
+        {
+          fprintf (stderr, "Step %u chose _asso_values[", stepno);
+          for (unsigned int i = 0; i < step->_changing_count; i++)
+            {
+              if (i > 0)
+                fprintf (stderr, ",");
+              fprintf (stderr, "'%c'", step->_changing[i]);
+            }
+          fprintf (stderr, "] in %u iterations.\n", iterations);
+        }
+    }
 
-    /* ==== FAIL ==== */
-    if (sp != stack)
-      {
-        sp--;
-        /* ==== REDO ==== */
-        curr = sp->_curr;
-        prior = sp->_prior;
-        if (prior == NULL)
-          goto BACKTRACK_NO_COLLISION;
-        union_set = sp->_union_set;
-        union_set_length = sp->_union_set_length;
-        union_index = sp->_union_index;
-        c = sp->_c;
-        original_asso_value = sp->_original_asso_value;
-        iter = sp->_iter;
-        goto BACKTRACK_COLLISION;
-      }
+  /* Free allocated memory.  */
+  while (steps != NULL)
+    {
+      Step *step = steps;
+      steps = step->_next;
+      delete[] step->_changing;
+      delete[] step->_undetermined;
+      delete_partition (step->_partition);
+      delete step;
+    }
+}
 
-    /* No solution found after an exhaustive search!
-       We should ideally turn off option[FAST] and, if that doesn't help,
-       multiply _asso_value_max by 2.  */
-    fprintf (stderr,
-             "\nBig failure, always got duplicate hash code values.\n");
-    if (option[POSITIONS])
-      fprintf (stderr, "try options -m or -r, or use new key positions.\n\n");
-    else
-      fprintf (stderr, "try options -m or -r.\n\n");
-    exit (1);
-
-   RECURSE_COLLISION:
-    /*sp->_union_set = union_set;*/ // redundant
-    sp->_union_set_length = union_set_length;
-    sp->_union_index = union_index;
-    sp->_c = c;
-    sp->_original_asso_value = original_asso_value;
-    sp->_iter = iter;
-   RECURSE_NO_COLLISION:
-    /*sp->_curr = curr;*/ // redundant
-    sp->_prior = prior;
-    /* ==== RETURN ==== */
-    sp++;
-    if (sp - stack < _list_len)
-      goto CALL;
-  }
+/* Computes a keyword's hash value, relative to the current _asso_values[],
+   and stores it in keyword->_hash_value.  */
 
-  /* Deallocate stack.  */
-  {
-    for (int i = 0; i < _list_len; i++)
-      delete[] stack[i]._union_set;
-  }
-  delete[] stack;
+inline int
+Search::compute_hash (KeywordExt *keyword) const
+{
+  int sum = option[NOLENGTH] ? 0 : keyword->_allchars_length;
+
+  const unsigned int *p = keyword->_selchars;
+  int i = keyword->_selchars_length;
+  for (; i > 0; p++, i--)
+    sum += _asso_values[*p];
+
+  return keyword->_hash_value = sum;
 }
 
 /* Finds good _asso_values[].  */
@@ -1390,8 +1326,6 @@ void
 Search::find_good_asso_values ()
 {
   prepare ();
-  if (option[ORDER])
-    reorder ();
   prepare_asso_values ();
 
   /* Search for good _asso_values[].  */
@@ -1529,7 +1463,6 @@ Search::optimize ()
 Search::~Search ()
 {
   delete _collision_detector;
-  delete[] _determined;
   if (option[DEBUG])
     {
       fprintf (stderr, "\ndumping occurrence and associated values tables\n");
diff --git a/src/search.h b/src/search.h
index 2ff3f78..ada14d2 100644
--- a/src/search.h
+++ b/src/search.h
@@ -30,6 +30,8 @@
 #include "positions.h"
 #include "bool-array.h"
 
+struct EquivalenceClass;
+
 class Search
 {
 public:
@@ -62,16 +64,6 @@ private:
 
   void                  prepare ();
 
-  /* Computes the sum of occurrences of the _selchars of a keyword.  */
-  int                   compute_occurrence (KeywordExt *ptr) const;
-
-  /* Auxiliary functions used by Search::reorder().  */
-  void                  clear_determined ();
-  void                  set_determined (KeywordExt *keyword);
-  bool                  already_determined (KeywordExt *keyword) const;
-  /* Reorders the keyword list so as to minimize search times.  */
-  void                  reorder ();
-
   /* Returns the length of keyword list.  */
   int                   keyword_list_length () const;
 
@@ -83,30 +75,20 @@ private:
 
   /* Initializes the asso_values[] related parameters.  */
   void                  prepare_asso_values ();
-  /* Puts a first guess into asso_values[].  */
-  void                  init_asso_values ();
-
-  /* Computes a keyword's hash value, relative to the current _asso_values[],
-     and stores it in keyword->_hash_value.  */
-  int                   compute_hash (KeywordExt *keyword) const;
 
-  /* Computes the frequency of occurrence of a character among the keywords
-     up to the given keyword.  */
-  unsigned int          compute_occurrence (unsigned int c, KeywordExt *curr) const;
+  EquivalenceClass *    compute_partition (bool *undetermined) const;
 
-  /* Sorts the given set in increasing frequency of _occurrences[].  */
-  void                  sort_by_occurrence (unsigned int *set, unsigned int len) const;
-  /* Sorts the given set in increasing frequency of occurrences among the
-     keywords up to the given keyword.  */
-  void                  sort_by_occurrence (unsigned int *set, unsigned int len, KeywordExt *curr) const;
+  unsigned int          count_possible_collisions (EquivalenceClass *partition, unsigned int c) const;
 
-  bool                  has_collisions (KeywordExt *curr);
-
-  KeywordExt *          collision_prior_to (KeywordExt *curr);
+  bool                  unchanged_partition (EquivalenceClass *partition, unsigned int c) const;
 
   /* Finds some _asso_values[] that fit.  */
   void                  find_asso_values ();
 
+  /* Computes a keyword's hash value, relative to the current _asso_values[],
+     and stores it in keyword->_hash_value.  */
+  int                   compute_hash (KeywordExt *keyword) const;
+
   /* Finds good _asso_values[].  */
   void                  find_good_asso_values ();
 
@@ -152,9 +134,6 @@ private:
   /* Length of _head list.  Number of keywords, not counting duplicates.  */
   int                   _list_len;
 
-  /* Vector used during Search::reorder().  */
-  bool *                _determined;
-
   /* Exclusive upper bound for every _asso_values[c].  A power of 2.  */
   unsigned int          _asso_value_max;
author	Bruno Haible <bruno@clisp.org>	2003-03-18 10:22:37 +0000
committer	Bruno Haible <bruno@clisp.org>	2003-03-18 10:22:37 +0000
commit	6d268d095b4cb2bab821e82b785a4b74810b01a6 (patch)
tree	5b3f7f60909fc6a7d5f4697c9d4ae58f947f0a7a /src
parent	40f37680ac107a61bbe14efe6213f70c91c6b461 (diff)
download	gperf-6d268d095b4cb2bab821e82b785a4b74810b01a6.tar.gz