summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2003-04-11 09:10:58 +0000
committerBruno Haible <bruno@clisp.org>2003-04-11 09:10:58 +0000
commitab6f0966b7a74d90cfe036e6b1c2a633e46668f3 (patch)
tree45e00a76b68a20e135c5be054fe54f4ac9740618
parentf619dea2f545f3e3a0265912249c09f58671cb5e (diff)
downloadgperf-ab6f0966b7a74d90cfe036e6b1c2a633e46668f3.tar.gz
Cleanup the alpha_unify handling code.
-rw-r--r--ChangeLog10
-rw-r--r--src/search.cc85
-rw-r--r--src/search.h7
3 files changed, 71 insertions, 31 deletions
diff --git a/ChangeLog b/ChangeLog
index a74b52a..3dc748e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2002-12-12 Bruno Haible <bruno@clisp.org>
+ * src/search.h (Search::init_selchars_tuple,
+ Search::count_duplicates_tuple): Add alpha_unify argument.
+ (Search::count_duplicates_tuple): New method declaration.
+ * src/search.cc (Search::init_selchars_tuple,
+ Search::count_duplicates_tuple): Add alpha_unify argument.
+ (Search::find_positions): Update.
+ (Search::count_duplicates_tuple): New method.
+ (Search::count_duplicates_multiset): Free temp alpha_unify vector.
+ (Search::find_alpha_inc): Call count_duplicates_tuple.
+
* src/configure.in: Add test for stack-allocated variable-size arrays.
* src/config.h.in: Regenerated.
* src/search.cc: Include config.h.
diff --git a/src/search.cc b/src/search.cc
index a1e3d70..6eec1bb 100644
--- a/src/search.cc
+++ b/src/search.cc
@@ -58,7 +58,7 @@
/* ================================ Theory ================================= */
-/* The most general form of the hash function is
+/* The general form of the hash function is
hash (keyword) = sum (asso_values[keyword[i] + alpha_inc[i]] : i in Pos)
+ len (keyword)
@@ -75,6 +75,8 @@
are nonnegative integers alpha_inc[i] such that all multisets
{keyword[i] + alpha_inc[i] : i in Pos} are different.
+ Define selchars[keyword] := {keyword[i] + alpha_inc[i] : i in Pos}.
+
Theorem 3: If all multisets selchars[keyword] are different, there are
nonnegative integers asso_values[c] such that all hash values
sum (asso_values[c] : c in selchars[keyword]) are different.
@@ -98,14 +100,25 @@
proj1 : String --> Map (Pos --> N)
proj2 : Map (Pos --> N) --> Map (Pos --> N) / S(Pos)
proj3 : Map (Pos --> N) / S(Pos) --> N
- where N denotes the set of nonnegative integers, and S(Pos) is the
- symmetric group over Pos.
+ where
+ N denotes the set of nonnegative integers,
+ Map (A --> B) := Hom_Set (A, B) is the set of maps from A to B, and
+ S(Pos) is the symmetric group over Pos.
This was the theory for option[NOLENGTH]; if !option[NOLENGTH], slight
modifications apply:
proj1 : String --> Map (Pos --> N) x N
proj2 : Map (Pos --> N) x N --> Map (Pos --> N) / S(Pos) x N
proj3 : Map (Pos --> N) / S(Pos) x N --> N
+
+ For a case-insensitive hash function, the general form is
+
+ hash (keyword) =
+ sum (asso_values[alpha_unify[keyword[i] + alpha_inc[i]]] : i in Pos)
+ + len (keyword)
+
+ where alpha_unify[c] is chosen so that an upper/lower case change in
+ keyword[i] doesn't change alpha_unify[keyword[i] + alpha_inc[i]].
*/
/* ==================== Initialization and Preparation ===================== */
@@ -118,17 +131,15 @@ Search::Search (KeywordExt_List *list)
void
Search::prepare ()
{
- KeywordExt_List *temp;
-
/* Compute the total number of keywords. */
_total_keys = 0;
- for (temp = _head; temp; temp = temp->rest())
+ for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
_total_keys++;
/* Compute the minimum and maximum keyword length. */
_max_key_len = INT_MIN;
_min_key_len = INT_MAX;
- for (temp = _head; temp; temp = temp->rest())
+ for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
{
KeywordExt *keyword = temp->first();
@@ -142,16 +153,16 @@ Search::prepare ()
expressions don't work correctly for looking up an empty string. */
if (_min_key_len == 0)
{
- fprintf (stderr, "Empty input key is not allowed.\n"
- "To recognize an empty input key, your code should check for\n"
- "len == 0 before calling the gperf generated lookup function.\n");
+ fprintf (stderr, "Empty input keyword is not allowed.\n"
+ "To recognize an empty input keyword, your code should check for\n"
+ "len == 0 before calling the gperf generated lookup function.\n");
exit (1);
}
/* Exit program if the characters in the keywords are not in the required
range. */
if (option[SEVENBIT])
- for (temp = _head; temp; temp = temp->rest())
+ for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
{
KeywordExt *keyword = temp->first();
@@ -185,6 +196,7 @@ Search::compute_alpha_unify () const
{
if (option[UPPERLOWER])
{
+ /* Uppercase to lowercase mapping. */
unsigned int alpha_size = compute_alpha_size();
unsigned int *alpha_unify = new unsigned int[alpha_size];
for (unsigned int c = 0; c < alpha_size; c++)
@@ -194,15 +206,16 @@ Search::compute_alpha_unify () const
return alpha_unify;
}
else
+ /* Identity mapping. */
return NULL;
}
/* Initializes each keyword's _selchars array. */
void
-Search::init_selchars_tuple (const Positions& positions) const
+Search::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) const
{
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
- temp->first()->init_selchars_tuple(positions, _alpha_unify);
+ temp->first()->init_selchars_tuple(positions, alpha_unify);
}
/* Deletes each keyword's _selchars array. */
@@ -218,12 +231,12 @@ Search::delete_selchars () const
# K - # proj1 (K)
where K is the multiset of given keywords. */
unsigned int
-Search::count_duplicates_tuple (const Positions& positions) const
+Search::count_duplicates_tuple (const Positions& positions, const unsigned int *alpha_unify) const
{
/* Run through the keyword list and count the duplicates incrementally.
The result does not depend on the order of the keyword list, thanks to
the formula above. */
- init_selchars_tuple (positions);
+ init_selchars_tuple (positions, alpha_unify);
unsigned int count = 0;
{
@@ -253,8 +266,8 @@ Search::find_positions ()
return;
}
- /* Compute preliminary value for _alpha_unify. */
- _alpha_unify = compute_alpha_unify ();
+ /* Compute preliminary alpha_unify table. */
+ unsigned int *alpha_unify = compute_alpha_unify ();
/* 1. Find positions that must occur in order to distinguish duplicates. */
Positions mandatory;
@@ -322,7 +335,8 @@ Search::find_positions ()
int imax = (_max_key_len - 1 < Positions::MAX_KEY_POS - 1
? _max_key_len - 1 : Positions::MAX_KEY_POS - 1);
Positions current = mandatory;
- unsigned int current_duplicates_count = count_duplicates_tuple (current);
+ unsigned int current_duplicates_count =
+ count_duplicates_tuple (current, alpha_unify);
for (;;)
{
Positions best;
@@ -333,7 +347,8 @@ Search::find_positions ()
{
Positions tryal = current;
tryal.add (i);
- unsigned int try_duplicates_count = count_duplicates_tuple (tryal);
+ unsigned int try_duplicates_count =
+ count_duplicates_tuple (tryal, alpha_unify);
/* We prefer 'try' to 'best' if it produces less duplicates,
or if it produces the same number of duplicates but with
@@ -366,7 +381,8 @@ Search::find_positions ()
{
Positions tryal = current;
tryal.remove (i);
- unsigned int try_duplicates_count = count_duplicates_tuple (tryal);
+ unsigned int try_duplicates_count =
+ count_duplicates_tuple (tryal, alpha_unify);
/* We prefer 'try' to 'best' if it produces less duplicates,
or if it produces the same number of duplicates but with
@@ -406,7 +422,7 @@ Search::find_positions ()
tryal.remove (i2);
tryal.add (i3);
unsigned int try_duplicates_count =
- count_duplicates_tuple (tryal);
+ count_duplicates_tuple (tryal, alpha_unify);
/* We prefer 'try' to 'best' if it produces less duplicates,
or if it produces the same number of duplicates but with
@@ -459,8 +475,21 @@ Search::find_positions ()
fprintf (stderr, "\n");
}
- /* Free preliminary value for _alpha_unify. */
- delete[] _alpha_unify;
+ /* Free preliminary alpha_unify table. */
+ delete[] alpha_unify;
+}
+
+/* Count the duplicate keywords that occur with the found set of positions.
+ In other words, it returns the difference
+ # K - # proj1 (K)
+ where K is the multiset of given keywords. */
+unsigned int
+Search::count_duplicates_tuple () const
+{
+ unsigned int *alpha_unify = compute_alpha_unify ();
+ unsigned int count = count_duplicates_tuple (_key_positions, alpha_unify);
+ delete[] alpha_unify;
+ return count;
}
/* ===================== Finding good alpha increments ===================== */
@@ -558,9 +587,8 @@ Search::count_duplicates_multiset (const unsigned int *alpha_inc) const
/* Run through the keyword list and count the duplicates incrementally.
The result does not depend on the order of the keyword list, thanks to
the formula above. */
- init_selchars_multiset (_key_positions,
- compute_alpha_unify (_key_positions, alpha_inc),
- alpha_inc);
+ unsigned int *alpha_unify = compute_alpha_unify (_key_positions, alpha_inc);
+ init_selchars_multiset (_key_positions, alpha_unify, alpha_inc);
unsigned int count = 0;
{
@@ -574,6 +602,7 @@ Search::count_duplicates_multiset (const unsigned int *alpha_inc) const
}
delete_selchars ();
+ delete[] alpha_unify;
return count;
}
@@ -586,9 +615,7 @@ Search::find_alpha_inc ()
/* The goal is to choose _alpha_inc[] such that it doesn't introduce
artificial duplicates.
In other words, the goal is # proj2 (proj1 (K)) = # proj1 (K). */
- _alpha_unify = compute_alpha_unify ();
- unsigned int duplicates_goal = count_duplicates_tuple (_key_positions);
- delete[] _alpha_unify;
+ unsigned int duplicates_goal = count_duplicates_tuple ();
/* Start with zero increments. This is sufficient in most cases. */
unsigned int *current = new unsigned int [_max_key_len];
diff --git a/src/search.h b/src/search.h
index 17c874c..c098f50 100644
--- a/src/search.h
+++ b/src/search.h
@@ -50,16 +50,19 @@ private:
unsigned int * compute_alpha_unify () const;
/* Initializes each keyword's _selchars array. */
- void init_selchars_tuple (const Positions& positions) const;
+ void init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
/* Deletes each keyword's _selchars array. */
void delete_selchars () const;
/* Count the duplicate keywords that occur with a given set of positions. */
- unsigned int count_duplicates_tuple (const Positions& positions) const;
+ unsigned int count_duplicates_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
/* Find good key positions. */
void find_positions ();
+ /* Count the duplicate keywords that occur with the found set of positions. */
+ unsigned int count_duplicates_tuple () const;
+
/* Computes the upper bound on the indices passed to asso_values[]. */
unsigned int compute_alpha_size (const unsigned int *alpha_inc) const;