summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2003-02-20 12:21:17 +0000
committerBruno Haible <bruno@clisp.org>2003-02-20 12:21:17 +0000
commitf1da37e04b52e2479b1dcf7570fb195f3bf2f024 (patch)
tree5c535c44b12cc7f3068b9dbc0c86ab34e4aef88e
parent1d73fbe0191e1249bf4f61149b4f4352e53438cb (diff)
downloadgperf-f1da37e04b52e2479b1dcf7570fb195f3bf2f024.tar.gz
Prepare for backtracking.
-rw-r--r--ChangeLog15
-rw-r--r--src/search.cc364
-rw-r--r--src/search.h16
3 files changed, 248 insertions, 147 deletions
diff --git a/ChangeLog b/ChangeLog
index cc32f6a..ba7a397 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,20 @@
2002-11-19 Bruno Haible <bruno@clisp.org>
+ Prepare for backtracking.
+ * src/search.h (Search::try_asso_value, Search::change_some_asso_value):
+ Remove declarations.
+ (Search::less_collisions, Search::collision_prior_to): New declarations.
+ (Search::_fewest_collisions, Search::_union_set, Search::_num_done):
+ Remove fields.
+ * src/search.cc (Search::prepare_asso_values): Don't initialize
+ _union_set.
+ (Search::try_asso_value, Search::change_some_asso_value): Remove
+ methods.
+ (Search::less_collisions, Search::collision_prior_to): New methods.
+ (StackEntry): New class.
+ (Search::find_asso_values): Reorganized to use pseudo-recursion.
+ (Search::~Search): Don't free _union_set.
+
* src/search.h (Search::find_good_asso_values): New declaration.
* src/search.cc: Add comments about the basic structure of the
algorithm.
diff --git a/src/search.cc b/src/search.cc
index 826c43f..7b6210f 100644
--- a/src/search.cc
+++ b/src/search.cc
@@ -771,9 +771,6 @@ Search::prepare_asso_values ()
values. */
_collision_detector = new Bool_Array (_max_hash_value + 1);
- /* Allocate scratch set. */
- _union_set = new unsigned int [2 * get_max_keysig_size ()];
-
if (option[DEBUG])
fprintf (stderr, "total non-linked keys = %d\nmaximum associated value is %d"
"\nmaximum size of generated hash table is %d\n",
@@ -894,91 +891,97 @@ Search::sort_by_occurrence (unsigned int *set, int len) const
}
}
-/* Tries various other values for _asso_values[c]. A value is successful
- if, with it, the recomputed hash values for the keywords from
- _head->first() to curr - inclusive - give fewer than _fewest_collisions
- collisions. Up to the given number of iterations are performed.
- If successful, _asso_values[c] is changed, _fewest_collisions is decreased,
- and false is returned.
- If all iterations are unsuccessful, _asso_values[c] is restored and
- true is returned.
+/* If the recomputed hash values for the keywords from _head->first() to
+ curr - inclusive - give fewer than collision_bound collisions, this
+ collision count is returned. Otherwise some value >= collision_bound
+ is returned.
This is called very frequently, and needs to be fast! */
-
-inline bool
-Search::try_asso_value (unsigned int c, KeywordExt *curr, int iterations)
+unsigned int
+Search::less_collisions (KeywordExt *curr, unsigned int collision_bound)
{
- int original_value = _asso_values[c];
+ unsigned int collisions = 0;
- /* Try many valid associated values. */
- for (int i = iterations - 1; i >= 0; i--)
- {
- int collisions = 0;
+ /* Iteration Number array is a win, O(1) initialization time! */
+ _collision_detector->clear ();
- /* Try next value. Wrap around mod _asso_value_max. */
- _asso_values[c] =
- (_asso_values[c] + (_jump != 0 ? _jump : rand ()))
- & (_asso_value_max - 1);
+ for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
+ {
+ KeywordExt *keyword = ptr->first();
- /* Iteration Number array is a win, O(1) intialization time! */
- _collision_detector->clear ();
+ /* Compute new hash code for the keyword, and see whether it
+ collides with another keyword's hash code. If we have too
+ many collisions, we can safely abort the fruitless loop. */
+ if (_collision_detector->set_bit (compute_hash (keyword))
+ && ++collisions >= collision_bound)
+ return collision_bound; /* >= collision_bound */
- for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
- {
- KeywordExt *keyword = ptr->first();
+ if (keyword == curr)
+ return collisions; /* < collision_bound */
+ }
+}
- /* Compute new hash code for the keyword, and see whether it
- collides with another keyword's hash code. If we have too
- many collisions, we can safely abort the fruitless loop. */
- if (_collision_detector->set_bit (compute_hash (keyword))
- && ++collisions >= _fewest_collisions)
- break;
+/* Tests whether the given keyword has the same hash value as another one
+ earlier in the list. If yes, this earlier keyword is returned (more
+ precisely, the first one of them, but it doesn't really matter which one).
+ If no collision is present, NULL is returned. */
+KeywordExt *
+Search::collision_prior_to (KeywordExt *curr)
+{
+ for (KeywordExt_List *prior_ptr = _head;
+ prior_ptr->first() != curr;
+ prior_ptr = prior_ptr->rest())
+ {
+ KeywordExt *prior = prior_ptr->first();
- if (keyword == curr)
- {
- _fewest_collisions = collisions;
- if (option[DEBUG])
- fprintf (stderr, "- resolved after %d iterations",
- iterations - i);
- return false;
- }
- }
+ if (prior->_hash_value == curr->_hash_value)
+ return prior;
}
-
- /* Restore original values, no more tries. */
- _asso_values[c] = original_value;
- return true;
+ return NULL;
}
-/* Attempts to change an _asso_value[], in order to resolve a hash value
- collision between the two given keywords. */
+/* Finding good asso_values is normally straightforwards, but needs
+ backtracking in some cases. The recurse/backtrack depth can be at most
+ _list_len. Since we cannot assume that the C stack is large enough,
+ we perform the processing without recursion, and simulate the stack. */
+struct StackEntry
+{
+ /* The number of collisions so far. */
+ unsigned int _collisions_so_far;
+
+ /* The current keyword. */
+ KeywordExt * _curr;
+
+ /* The prior keyword, with which curr collides. */
+ KeywordExt * _prior;
+
+ /* Scratch set. */
+ unsigned int * _union_set;
+ unsigned int _union_set_length;
+
+ /* Current index into the scratch set. */
+ unsigned int _union_index;
+
+ /* Trying a different value for _asso_values[_c]. */
+ unsigned int _c;
+
+ /* The original value of _asso_values[_c]. */
+ unsigned int _original_asso_value;
+
+ /* Remaining number of iterations. */
+ int _iter;
+};
+
+/* Finds some _asso_values[] that fit. */
void
-Search::change_some_asso_value (KeywordExt *prior, KeywordExt *curr)
+Search::find_asso_values ()
{
- if (option[DEBUG])
- {
- fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
- _num_done,
- prior->_allchars_length, prior->_allchars,
- curr->_allchars_length, curr->_allchars,
- curr->_hash_value);
- fflush (stderr);
- }
+ /* Add one keyword after the other and see whether its hash value collides
+ with one of the previous hash values. If so, change some asso_values[]
+ entry until the number of collisions so far is reduced. Then continue
+ with the next keyword. */
- /* To achieve that the two hash values become different, we have to
- change an _asso_values[c] for a character c that contributes to the
- hash functions of prior and curr with different multiplicity.
- So we compute the set of such c. */
- unsigned int *union_set = _union_set;
- int union_set_length =
- compute_disjoint_union (prior->_selchars, prior->_selchars_length,
- curr->_selchars, curr->_selchars_length,
- union_set);
-
- /* Sort by decreasing occurrence: Try least-used characters c first.
- The idea is that this reduces the number of freshly introduced
- collisions. */
- sort_by_occurrence (union_set, union_set_length);
+ init_asso_values ();
int iterations =
!option[FAST]
@@ -987,79 +990,175 @@ Search::change_some_asso_value (KeywordExt *prior, KeywordExt *curr)
? option.get_iterations ()
: keyword_list_length ();
- const unsigned int *p = union_set;
- int i = union_set_length;
- for (; i > 0; p++, i--)
- if (!try_asso_value (*p, curr, iterations))
+ /* Allocate stack. */
+ StackEntry *stack = new StackEntry[_list_len];
+ {
+ KeywordExt_List *ptr = _head;
+ for (int i = 0; i < _list_len; i++, ptr = ptr->rest())
{
- /* Good, this _asso_values[] modification reduces the number of
- collisions so far.
- All keyword->_hash_value up to curr - inclusive - and
- _fewest_collisions have been updated. */
+ stack[i]._curr = ptr->first();
+ stack[i]._union_set = new unsigned int [2 * get_max_keysig_size ()];
+ }
+ }
+
+ {
+ /* Current stack pointer. */
+ StackEntry *sp = &stack[0];
+
+ /* Local variables corresponding to *sp. */
+ /* The number of collisions so far. */
+ unsigned int collisions_so_far;
+ /* The current keyword. */
+ KeywordExt *curr;
+ /* The prior keyword, with which curr collides. */
+ KeywordExt *prior;
+ /* Scratch set. */
+ unsigned int *union_set;
+ unsigned int union_set_length;
+ /* Current index into the scratch set. */
+ unsigned int union_index;
+ /* Trying a different value for _asso_values[c]. */
+ unsigned int c;
+ /* The original value of _asso_values[c]. */
+ unsigned int original_asso_value;
+ /* Remaining number of iterations. */
+ int iter;
+
+ collisions_so_far = 0;
+
+ STARTOUTERLOOP:
+
+ /* Next keyword from the list. */
+ curr = sp->_curr;
+
+ /* Compute this keyword's hash value. */
+ compute_hash (curr);
+
+ /* See if it collides with a prior keyword. */
+ prior = collision_prior_to (curr);
+
+ if (prior != NULL)
+ {
+ collisions_so_far++;
+
+ /* Handle collision: Attempt to change an _asso_value[], in order to
+ resolve a hash value collision between the two given keywords. */
+
if (option[DEBUG])
{
- fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n",
- *p, p - union_set + 1, _asso_values[*p]);
+ fprintf (stderr, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
+ sp - stack + 1,
+ prior->_allchars_length, prior->_allchars,
+ curr->_allchars_length, curr->_allchars,
+ curr->_hash_value);
fflush (stderr);
}
- return;
- }
- /* Failed to resolve a collision. */
+ /* To achieve that the two hash values become different, we have to
+ change an _asso_values[c] for a character c that contributes to the
+ hash functions of prior and curr with different multiplicity.
+ So we compute the set of such c. */
+ union_set = sp->_union_set;
+ union_set_length =
+ compute_disjoint_union (prior->_selchars, prior->_selchars_length,
+ curr->_selchars, curr->_selchars_length,
+ union_set);
+
+ /* Sort by decreasing occurrence: Try least-used characters c first.
+ The idea is that this reduces the number of freshly introduced
+ collisions. */
+ sort_by_occurrence (union_set, union_set_length);
+
+ for (union_index = 0; union_index < union_set_length; union_index++)
+ {
+ c = union_set[union_index];
- /* Recompute all keyword->_hash_value up to curr - inclusive -. */
- for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
- {
- KeywordExt* keyword = ptr->first();
- compute_hash (keyword);
- if (keyword == curr)
- break;
- }
+ /* Try various other values for _asso_values[c]. A value is
+ successful if, with it, the recomputed hash values for the
+ keywords from _head->first() to curr - inclusive - give fewer
+ than collisions_so_far collisions. Up to the given number of
+ iterations are performed. If successful, _asso_values[c] is
+ changed, collisions_so_far is decreased, and the recursion
+ continued. If all iterations are unsuccessful, _asso_values[c]
+ is restored and we backtrack, trying the next union_index. */
- if (option[DEBUG])
- {
- fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
- iterations, _fewest_collisions + _total_duplicates);
- fflush (stderr);
- }
-}
+ original_asso_value = _asso_values[c];
-/* Finds some _asso_values[] that fit. */
+ /* Try many valid associated values. */
+ for (iter = iterations; iter > 0; iter--)
+ {
+ /* Try next value. Wrap around mod _asso_value_max. */
+ _asso_values[c] =
+ (_asso_values[c] + (_jump != 0 ? _jump : rand ()))
+ & (_asso_value_max - 1);
+
+ unsigned int collisions =
+ less_collisions (curr, collisions_so_far);
+ if (collisions < collisions_so_far)
+ {
+ collisions_so_far = collisions;
+ /* Good, this _asso_values[] modification reduces the
+ number of collisions so far.
+ All keyword->_hash_value up to curr - inclusive -
+ have been updated. */
+ if (option[DEBUG])
+ {
+ fprintf (stderr, "- resolved after %d iterations by "
+ "changing asso_value['%c'] (char #%d) to %d\n",
+ iterations - iter + 1, c,
+ union_index + 1, _asso_values[c]);
+ fflush (stderr);
+ }
+ goto RECURSE;
+ }
+ }
-void
-Search::find_asso_values ()
-{
- _fewest_collisions = 0;
- init_asso_values ();
+ /* Restore original values, no more tries. */
+ _asso_values[c] = original_asso_value;
+ }
- /* Add one keyword after the other and see whether its hash value collides
- with one of the previous hash values. */
- _num_done = 1;
- for (KeywordExt_List *curr_ptr = _head;
- curr_ptr != NULL;
- curr_ptr = curr_ptr->rest(), _num_done++)
- {
- KeywordExt *curr = curr_ptr->first();
+ /* Failed to resolve a collision. */
- /* Compute this keyword's hash value. */
- compute_hash (curr);
+ /* Recompute all keyword->_hash_value up to curr - inclusive -. */
+ for (KeywordExt_List *ptr = _head; ; ptr = ptr->rest())
+ {
+ KeywordExt* keyword = ptr->first();
+ compute_hash (keyword);
+ if (keyword == curr)
+ break;
+ }
- /* See if it collides with a prior keyword. */
- for (KeywordExt_List *prior_ptr = _head;
- prior_ptr != curr_ptr;
- prior_ptr = prior_ptr->rest())
- {
- KeywordExt *prior = prior_ptr->first();
+ if (option[DEBUG])
+ {
+ fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
+ iterations, collisions_so_far + _total_duplicates);
+ fflush (stderr);
+ }
+ }
+ RECURSE:
+ sp->_collisions_so_far = collisions_so_far;
+ /*sp->_curr = curr;*/ // redundant
+ sp->_prior = prior;
+ /*sp->_union_set = union_set;*/ // redundant
+ sp->_union_set_length = union_set_length;
+ sp->_union_index = union_index;
+ sp->_c = c;
+ sp->_original_asso_value = original_asso_value;
+ sp->_iter = iter;
+ sp++;
+ if (sp - stack < _list_len)
+ {
+ /*collisions_so_far = sp[-1]._collisions_so_far;*/ // redundant
+ goto STARTOUTERLOOP;
+ }
+ }
- if (prior->_hash_value == curr->_hash_value)
- {
- _fewest_collisions++;
- /* Handle collision. */
- change_some_asso_value (prior, curr);
- break;
- }
- }
- }
+ /* Deallocate stack. */
+ {
+ for (int i = 0; i < _list_len; i++)
+ delete[] stack[i]._union_set;
+ }
+ delete[] stack;
}
/* Finds good _asso_values[]. */
@@ -1210,7 +1309,6 @@ Search::optimize ()
Search::~Search ()
{
- delete[] _union_set;
delete _collision_detector;
delete[] _determined;
if (option[DEBUG])
diff --git a/src/search.h b/src/search.h
index f120c3f..afd1bfa 100644
--- a/src/search.h
+++ b/src/search.h
@@ -93,12 +93,9 @@ private:
/* Sorts the given set in increasing frequency of _occurrences[]. */
void sort_by_occurrence (unsigned int *set, int len) const;
- /* Tries various other values for _asso_values[c]. */
- bool try_asso_value (unsigned int c, KeywordExt *curr, int iterations);
+ unsigned int less_collisions (KeywordExt *curr, unsigned int collision_bound);
- /* Attempts to change an _asso_value[], in order to resolve a hash value
- collision between the two given keywords. */
- void change_some_asso_value (KeywordExt *prior, KeywordExt *curr);
+ KeywordExt * collision_prior_to (KeywordExt *curr);
/* Finds some _asso_values[] that fit. */
void find_asso_values ();
@@ -164,15 +161,6 @@ private:
/* Sparse bit vector for collision detection. */
Bool_Array * _collision_detector;
-
- /* Minimal number of collisions found so far. */
- int _fewest_collisions;
-
- /* Scratch set, used during Search::change_some_asso_value. */
- unsigned int * _union_set;
-
- /* Number of keyword being handled during Search::find_asso_values. */
- int _num_done;
};
#endif