summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2003-02-11 11:09:27 +0000
committerBruno Haible <bruno@clisp.org>2003-02-11 11:09:27 +0000
commit810fef43aebd9cd1d58d9a6a412c49835d3e8471 (patch)
treee00cbf3d2754cfbdfa69af299f91b21fe7e8e326 /src
parent6202aaadb1a2904f456c2ee55623bf4a1a951ad7 (diff)
downloadgperf-810fef43aebd9cd1d58d9a6a412c49835d3e8471.tar.gz
When the option -k is not given, the default key positions are now computed
depending on the set of keywords.
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.in4
-rw-r--r--src/hash-table.cc15
-rw-r--r--src/keyword.cc27
-rw-r--r--src/keyword.h9
-rw-r--r--src/main.cc3
-rw-r--r--src/options.cc142
-rw-r--r--src/options.h65
-rw-r--r--src/options.icc39
-rw-r--r--src/output.cc75
-rw-r--r--src/output.h10
-rw-r--r--src/search.cc230
-rw-r--r--src/search.h24
12 files changed, 522 insertions, 121 deletions
diff --git a/src/Makefile.in b/src/Makefile.in
index 1aecdcf..d959696 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -91,8 +91,8 @@ KEYWORD_LIST_H = keyword-list.h keyword-list.icc $(KEYWORD_H)
INPUT_H = input.h $(KEYWORD_LIST_H)
BOOL_ARRAY_H = bool-array.h bool-array.icc $(OPTIONS_H)
HASH_TABLE_H = hash-table.h $(KEYWORD_H)
-SEARCH_H = search.h $(KEYWORD_LIST_H) $(BOOL_ARRAY_H)
-OUTPUT_H = output.h $(KEYWORD_LIST_H)
+SEARCH_H = search.h $(KEYWORD_LIST_H) $(OPTIONS_H) $(BOOL_ARRAY_H)
+OUTPUT_H = output.h $(KEYWORD_LIST_H) $(OPTIONS_H)
version.o : version.cc $(VERSION_H)
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $(srcdir)/version.cc
diff --git a/src/hash-table.cc b/src/hash-table.cc
index a4d2a1b..6928eb6 100644
--- a/src/hash-table.cc
+++ b/src/hash-table.cc
@@ -89,16 +89,11 @@ Hash_Table::~Hash_Table ()
{
int field_width;
- if (option[ALLCHARS])
- {
- field_width = 0;
- for (int i = _size - 1; i >= 0; i--)
- if (_table[i])
- if (field_width < _table[i]->_selchars_length)
- field_width = _table[i]->_selchars_length;
- }
- else
- field_width = option.get_max_keysig_size ();
+ field_width = 0;
+ for (int i = _size - 1; i >= 0; i--)
+ if (_table[i])
+ if (field_width < _table[i]->_selchars_length)
+ field_width = _table[i]->_selchars_length;
fprintf (stderr,
"\ndumping the hash table\n"
diff --git a/src/keyword.cc b/src/keyword.cc
index 6250165..73608d0 100644
--- a/src/keyword.cc
+++ b/src/keyword.cc
@@ -47,7 +47,7 @@ static inline void sort_char_set (unsigned char *base, int len)
}
}
-/* Initialize selchars and selchars_length.
+/* Initializes selchars and selchars_length.
The hash function will be computed as
asso_values[allchars[key_pos[0]]] + asso_values[allchars[key_pos[1]]] + ...
We compute selchars as the multiset
@@ -57,14 +57,15 @@ static inline void sort_char_set (unsigned char *base, int len)
Furthermore we sort the selchars array, to ease detection of duplicates
later.
*/
-void KeywordExt::init_selchars ()
+void
+KeywordExt::init_selchars (bool use_all_chars, const Positions& positions)
{
const char *k = _allchars;
unsigned char *key_set =
- new unsigned char[(option[ALLCHARS] ? _allchars_length : option.get_max_keysig_size ())];
+ new unsigned char[(use_all_chars ? _allchars_length : positions.get_size ())];
unsigned char *ptr = key_set;
- if (option[ALLCHARS])
+ if (use_all_chars)
/* Use all the character positions in the KEY. */
for (int i = _allchars_length; i > 0; k++, i--)
{
@@ -76,7 +77,7 @@ void KeywordExt::init_selchars ()
{
/* Iterate through the list of key_positions, initializing selchars
(via ptr). */
- PositionIterator iter (option.get_key_positions ());
+ PositionIterator iter (positions);
for (int i; (i = iter.next ()) != PositionIterator::EOS; )
{
@@ -91,15 +92,6 @@ void KeywordExt::init_selchars ()
continue;
ptr++;
}
-
- /* Didn't get any hits and user doesn't want to consider the
- keylength, so there are essentially no usable hash positions! */
- if (ptr == key_set && option[NOLENGTH])
- {
- fprintf (stderr, "Can't hash keyword %.*s with chosen key positions.\n",
- _allchars_length, _allchars);
- exit (1);
- }
}
/* Sort the KEY_SET items alphabetically. */
@@ -109,6 +101,13 @@ void KeywordExt::init_selchars ()
_selchars_length = ptr - key_set;
}
+/* Deletes selchars. */
+void
+KeywordExt::delete_selchars ()
+{
+ delete[] _selchars;
+}
+
/* ------------------------- Keyword_Factory class ------------------------- */
diff --git a/src/keyword.h b/src/keyword.h
index 79a71df..b2f0a75 100644
--- a/src/keyword.h
+++ b/src/keyword.h
@@ -26,6 +26,9 @@
#ifndef keyword_h
#define keyword_h 1
+/* Class defined in "options.h". */
+class Positions;
+
/* An instance of this class is a keyword, as specified in the input file. */
struct Keyword
@@ -64,8 +67,10 @@ struct KeywordExt : public Keyword
KeywordExt * _duplicate_link;
/* Methods depending on the keyposition list. */
- /* Initialize selchars and selchars_length. */
- void init_selchars ();
+ /* Initializes selchars and selchars_length. */
+ void init_selchars (bool use_all_chars, const Positions& positions);
+ /* Deletes selchars. */
+ void delete_selchars ();
/* Data members used by the algorithm. */
int _occurrence; /* Frequency of key set occurrences. */
diff --git a/src/main.cc b/src/main.cc
index 7552b3c..8509cc0 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -103,9 +103,10 @@ main (int argc, char *argv[])
inputter._verbatim_code_end,
inputter._verbatim_code_lineno,
searcher._total_keys,
- searcher._total_duplicates,
searcher._max_key_len,
searcher._min_key_len,
+ searcher._key_positions,
+ searcher._total_duplicates,
searcher._alpha_size,
searcher._occurrences,
searcher._asso_values);
diff --git a/src/options.cc b/src/options.cc
index 7bbe27e..885504e 100644
--- a/src/options.cc
+++ b/src/options.cc
@@ -442,7 +442,7 @@ Options::Options ()
_hash_name (DEFAULT_HASH_NAME),
_wordlist_name (DEFAULT_WORDLIST_NAME),
_delimiters (DEFAULT_DELIMITERS),
- _key_positions (1, Positions::LASTCHAR)
+ _key_positions ()
{
}
@@ -766,6 +766,7 @@ Options::parse_options (int argc, char *argv[])
}
case 'k': /* Sets key positions used for hash function. */
{
+ _option_word |= POSITIONS;
const int BAD_VALUE = -2;
const int EOS = PositionIterator::EOS;
int value;
@@ -782,7 +783,7 @@ Options::parse_options (int argc, char *argv[])
{
if (value == BAD_VALUE)
{
- fprintf (stderr, "Invalid key value or range, use 1,2,3-%d,'$' or '*'.\n",
+ fprintf (stderr, "Invalid position value or range, use 1,2,3-%d,'$' or '*'.\n",
Positions::MAX_KEY_POS);
short_usage (stderr);
exit (1);
@@ -793,7 +794,7 @@ Options::parse_options (int argc, char *argv[])
Since all key positions are in the range
1..Positions::MAX_KEY_POS or == Positions::LASTCHAR,
there must be duplicates. */
- fprintf (stderr, "Duplicate keys selected\n");
+ fprintf (stderr, "Duplicate key positions selected\n");
short_usage (stderr);
exit (1);
}
@@ -803,7 +804,7 @@ Options::parse_options (int argc, char *argv[])
unsigned int total_keysig_size = key_pos - key_positions;
if (total_keysig_size == 0)
{
- fprintf (stderr, "No keys selected.\n");
+ fprintf (stderr, "No key positions selected.\n");
short_usage (stderr);
exit (1);
}
@@ -814,7 +815,7 @@ Options::parse_options (int argc, char *argv[])
when generating code. */
if (! _key_positions.sort())
{
- fprintf (stderr, "Duplicate keys selected\n");
+ fprintf (stderr, "Duplicate key positions selected\n");
short_usage (stderr);
exit (1);
}
@@ -939,6 +940,137 @@ Options::parse_options (int argc, char *argv[])
}
}
+/* ---------------------------- Class Positions ---------------------------- */
+
+/* Set operations. Assumes the array is in reverse order. */
+
+bool
+Positions::contains (int pos) const
+{
+ unsigned int count = _size;
+ const unsigned char *p = _positions + _size - 1;
+
+ for (; count > 0; p--, count--)
+ {
+ if (*p == pos)
+ return true;
+ if (*p > pos)
+ break;
+ }
+ return false;
+}
+
+void
+Positions::add (int pos)
+{
+ unsigned int count = _size;
+
+ if (count == MAX_KEY_POS + 1)
+ {
+ fprintf (stderr, "Positions::add internal error: overflow\n");
+ exit (1);
+ }
+
+ unsigned char *p = _positions + _size - 1;
+
+ for (; count > 0; p--, count--)
+ {
+ if (*p == pos)
+ {
+ fprintf (stderr, "Positions::add internal error: duplicate\n");
+ exit (1);
+ }
+ if (*p > pos)
+ break;
+ p[1] = p[0];
+ }
+ p[1] = pos;
+ _size++;
+}
+
+void
+Positions::remove (int pos)
+{
+ unsigned int count = _size;
+ if (count > 0)
+ {
+ unsigned char *p = _positions + _size - 1;
+
+ if (*p == pos)
+ {
+ _size--;
+ return;
+ }
+ if (*p < pos)
+ {
+ unsigned char prev = *p;
+
+ for (;;)
+ {
+ p--;
+ count--;
+ if (count == 0)
+ break;
+ if (*p == pos)
+ {
+ *p = prev;
+ _size--;
+ return;
+ }
+ if (*p > pos)
+ break;
+ unsigned char curr = *p;
+ *p = prev;
+ prev = curr;
+ }
+ }
+ }
+ fprintf (stderr, "Positions::remove internal error: not found\n");
+ exit (1);
+}
+
+/* Output in external syntax. */
+void
+Positions::print () const
+{
+ bool first = true;
+ bool seen_LASTCHAR = false;
+ unsigned int count = _size;
+ const unsigned char *p = _positions + _size - 1;
+
+ for (; count > 0; p--, count--)
+ {
+ if (*p == LASTCHAR)
+ seen_LASTCHAR = true;
+ else
+ {
+ if (!first)
+ printf (",");
+ printf ("%d", *p);
+ if (count > 0 && p[-1] == *p + 1)
+ {
+ printf ("-");
+ do
+ {
+ p--;
+ count--;
+ }
+ while (count > 0 && p[-1] == *p + 1);
+ printf ("%d", *p);
+ }
+ first = false;
+ }
+ }
+ if (seen_LASTCHAR)
+ {
+ if (!first)
+ printf (",");
+ printf ("$");
+ }
+}
+
+/* ------------------------------------------------------------------------- */
+
#ifndef __OPTIMIZE__
#define INLINE /* not inline */
diff --git a/src/options.h b/src/options.h
index edb1fca..ec32c97 100644
--- a/src/options.h
+++ b/src/options.h
@@ -41,63 +41,66 @@ enum Option_Type
/* Apply ordering heuristic to speed-up search time. */
ORDER = 1 << 1,
+ /* Use the given key positions. */
+ POSITIONS = 1 << 2,
+
/* Use all characters in hash function. */
- ALLCHARS = 1 << 2,
+ ALLCHARS = 1 << 3,
/* Handle user-defined type structured keyword input. */
- TYPE = 1 << 3,
+ TYPE = 1 << 4,
/* Randomly initialize the associated values table. */
- RANDOM = 1 << 4,
+ RANDOM = 1 << 5,
/* Generate switch output to save space. */
- SWITCH = 1 << 5,
+ SWITCH = 1 << 6,
/* Don't include keyword length in hash computations. */
- NOLENGTH = 1 << 6,
+ NOLENGTH = 1 << 7,
/* Generate a length table for string comparison. */
- LENTABLE = 1 << 7,
+ LENTABLE = 1 << 8,
/* Handle duplicate hash values for keywords. */
- DUP = 1 << 8,
+ DUP = 1 << 9,
/* Generate the hash function "fast". */
- FAST = 1 << 9,
+ FAST = 1 << 10,
/* Don't include user-defined type definition in output -- it's already
defined elsewhere. */
- NOTYPE = 1 << 10,
+ NOTYPE = 1 << 11,
/* Generate strncmp rather than strcmp. */
- COMP = 1 << 11,
+ COMP = 1 << 12,
/* Make the keyword table a global variable. */
- GLOBAL = 1 << 12,
+ GLOBAL = 1 << 13,
/* Make the generated tables readonly (const). */
- CONST = 1 << 13,
+ CONST = 1 << 14,
/* Generate K&R C code: no prototypes, no const. */
- KRC = 1 << 14,
+ KRC = 1 << 15,
/* Generate C code: no prototypes, but const (user can #define it away). */
- C = 1 << 15,
+ C = 1 << 16,
/* Generate ISO/ANSI C code: prototypes and const, but no class. */
- ANSIC = 1 << 16,
+ ANSIC = 1 << 17,
/* Generate C++ code: prototypes, const, class, inline, enum. */
- CPLUSPLUS = 1 << 17,
+ CPLUSPLUS = 1 << 18,
/* Use enum for constants. */
- ENUM = 1 << 18,
+ ENUM = 1 << 19,
/* Generate #include statements. */
- INCLUDE = 1 << 21,
+ INCLUDE = 1 << 20,
/* Assume 7-bit, not 8-bit, characters. */
- SEVENBIT = 1 << 22
+ SEVENBIT = 1 << 21
};
/* This class denotes a set of key positions. */
@@ -115,8 +118,14 @@ public:
/* Constructors. */
Positions ();
- Positions (int key1);
- Positions (int key1, int key2);
+ Positions (int pos1);
+ Positions (int pos1, int pos2);
+
+ /* Copy constructor. */
+ Positions (const Positions& src);
+
+ /* Assignment operator. */
+ Positions& operator= (const Positions& src);
/* Accessors. */
int operator[] (unsigned int index) const;
@@ -130,6 +139,14 @@ public:
Returns true if there are no duplicates, false otherwise. */
bool sort ();
+ /* Set operations. Assumes the array is in reverse order. */
+ bool contains (int pos) const;
+ void add (int pos);
+ void remove (int pos);
+
+ /* Output in external syntax. */
+ void print () const;
+
private:
/* Number of positions. */
unsigned int _size;
@@ -246,13 +263,9 @@ public:
void set_delimiters (const char *delimiters);
/* Returns key positions.
- Only to be called if !options[ALLCHARS]. */
+ Only to be used if !options[ALLCHARS]. */
const Positions& get_key_positions () const;
- /* Returns total distinct key positions.
- Only to be called if !options[ALLCHARS]. */
- int get_max_keysig_size () const;
-
private:
/* Prints program usage to given stream. */
void short_usage (FILE * stream) const;
diff --git a/src/options.icc b/src/options.icc
index 5b7e9b1..444c849 100644
--- a/src/options.icc
+++ b/src/options.icc
@@ -32,18 +32,37 @@ Positions::Positions ()
}
INLINE
-Positions::Positions (int key1)
+Positions::Positions (int pos1)
: _size (1)
{
- _positions[0] = key1;
+ _positions[0] = pos1;
}
INLINE
-Positions::Positions (int key1, int key2)
+Positions::Positions (int pos1, int pos2)
: _size (2)
{
- _positions[0] = key1;
- _positions[1] = key2;
+ _positions[0] = pos1;
+ _positions[1] = pos2;
+}
+
+/* Copy constructor. */
+
+INLINE
+Positions::Positions (const Positions& src)
+ : _size (src._size)
+{
+ memcpy (_positions, src._positions, _size * sizeof (_positions[0]));
+}
+
+/* Assignment operator. */
+
+INLINE Positions&
+Positions::operator= (const Positions& src)
+{
+ _size = src._size;
+ memcpy (_positions, src._positions, _size * sizeof (_positions[0]));
+ return *this;
}
/* Accessors. */
@@ -238,17 +257,9 @@ Options::get_delimiters () const
}
/* Returns key positions.
- Only to be called if !options[ALLCHARS]. */
+ Only to be used if !options[ALLCHARS]. */
INLINE const Positions&
Options::get_key_positions () const
{
return _key_positions;
}
-
-/* Returns total distinct key positions.
- Only to be called if !options[ALLCHARS]. */
-INLINE int
-Options::get_max_keysig_size () const
-{
- return _key_positions.get_size();
-}
diff --git a/src/output.cc b/src/output.cc
index eaf3260..8a34c21 100644
--- a/src/output.cc
+++ b/src/output.cc
@@ -87,8 +87,9 @@ Output::Output (KeywordExt_List *head, const char *struct_decl,
unsigned int verbatim_declarations_lineno,
const char *verbatim_code, const char *verbatim_code_end,
unsigned int verbatim_code_lineno,
- int total_keys, int total_duplicates, int max_key_len,
- int min_key_len, int alpha_size, const int *occurrences,
+ int total_keys, int max_key_len, int min_key_len,
+ const Positions& positions, int total_duplicates,
+ int alpha_size, const int *occurrences,
const int *asso_values)
: _head (head), _struct_decl (struct_decl),
_struct_decl_lineno (struct_decl_lineno), _return_type (return_type),
@@ -99,9 +100,10 @@ Output::Output (KeywordExt_List *head, const char *struct_decl,
_verbatim_code (verbatim_code),
_verbatim_code_end (verbatim_code_end),
_verbatim_code_lineno (verbatim_code_lineno),
- _total_keys (total_keys), _total_duplicates (total_duplicates),
+ _total_keys (total_keys),
_max_key_len (max_key_len), _min_key_len (min_key_len),
- _alpha_size (alpha_size),
+ _key_positions (positions),
+ _total_duplicates (total_duplicates), _alpha_size (alpha_size),
_occurrences (occurrences), _asso_values (asso_values)
{
}
@@ -480,32 +482,33 @@ Output::output_hash_function () const
printf ("{\n");
/* First the asso_values array. */
- {
- printf (" static %s%s asso_values[] =\n"
- " {",
- const_readonly_array,
- smallest_integral_type (_max_hash_value + 1));
+ if (option[ALLCHARS] || _key_positions.get_size() > 0)
+ {
+ printf (" static %s%s asso_values[] =\n"
+ " {",
+ const_readonly_array,
+ smallest_integral_type (_max_hash_value + 1));
- const int columns = 10;
+ const int columns = 10;
- /* Calculate maximum number of digits required for MAX_HASH_VALUE. */
- int field_width = 2;
- for (int trunc = _max_hash_value; (trunc /= 10) > 0;)
- field_width++;
+ /* Calculate maximum number of digits required for MAX_HASH_VALUE. */
+ int field_width = 2;
+ for (int trunc = _max_hash_value; (trunc /= 10) > 0;)
+ field_width++;
- for (int count = 0; count < _alpha_size; count++)
- {
- if (count > 0)
- printf (",");
- if ((count % columns) == 0)
- printf ("\n ");
- printf ("%*d", field_width,
- _occurrences[count] ? _asso_values[count] : _max_hash_value + 1);
- }
+ for (int count = 0; count < _alpha_size; count++)
+ {
+ if (count > 0)
+ printf (",");
+ if ((count % columns) == 0)
+ printf ("\n ");
+ printf ("%*d", field_width,
+ _occurrences[count] ? _asso_values[count] : _max_hash_value + 1);
+ }
- printf ("\n"
- " };\n");
- }
+ printf ("\n"
+ " };\n");
+ }
if (option[ALLCHARS])
{
@@ -526,12 +529,18 @@ Output::output_hash_function () const
" }\n"
" return hval;\n");
}
+ else if (_key_positions.get_size() == 0)
+ {
+ /* Trivial case: No key positions at all. */
+ printf (" return %s;\n",
+ option[NOLENGTH] ? "0" : "len");
+ }
else
{
/* Iterate through the key positions. Remember that Positions::sort()
has sorted them in decreasing order, with Positions::LASTCHAR coming
last. */
- PositionIterator iter (option.get_key_positions());
+ PositionIterator iter (_key_positions);
int key_pos;
/* Get the highest key position. */
@@ -547,9 +556,9 @@ Output::output_hash_function () const
printf (" return %s",
option[NOLENGTH] ? "" : "len + ");
- if (option.get_key_positions().get_size() == 2
- && option.get_key_positions()[0] == 1
- && option.get_key_positions()[1] == Positions::LASTCHAR)
+ if (_key_positions.get_size() == 2
+ && _key_positions[0] == 1
+ && _key_positions[1] == Positions::LASTCHAR)
/* Optimize special case of "-k 1,$". */
printf ("asso_values[%sstr[len - 1]] + asso_values[%sstr[0]]",
char_to_index, char_to_index);
@@ -1492,6 +1501,12 @@ Output::output ()
printf (" code produced by gperf version %s */\n", version_string);
option.print_options ();
printf ("\n");
+ if (!option[POSITIONS])
+ {
+ printf ("/* Computed positions: -k'");
+ _key_positions.print();
+ printf ("' */\n");
+ }
if (_verbatim_declarations < _verbatim_declarations_end)
{
diff --git a/src/output.h b/src/output.h
index d9756ab..e141645 100644
--- a/src/output.h
+++ b/src/output.h
@@ -27,6 +27,7 @@
#define output_h 1
#include "keyword-list.h"
+#include "options.h"
/* OSF/1 cxx needs these forward declarations. */
struct Output_Constants;
@@ -48,8 +49,9 @@ public:
const char *verbatim_code_end,
unsigned int verbatim_code_lineno,
int total_keys,
- int total_duplicates,
int max_key_len, int min_key_len,
+ const Positions& positions,
+ int total_duplicates,
int alpha_size,
const int *occurrences,
const int *asso_values);
@@ -113,12 +115,14 @@ private:
unsigned int const _verbatim_code_lineno;
/* Total number of keys, counting duplicates. */
int const _total_keys;
- /* Total number of duplicate hash values. */
- int const _total_duplicates;
/* Maximum length of the longest keyword. */
int const _max_key_len;
/* Minimum length of the shortest keyword. */
int const _min_key_len;
+ /* Key positions. Only to be used if !options[ALLCHARS]. */
+ Positions const _key_positions;
+ /* Total number of duplicate hash values. */
+ int const _total_duplicates;
/* Minimum hash value for all keywords. */
int _min_hash_value;
/* Maximum hash value for all keywords. */
diff --git a/src/search.cc b/src/search.cc
index a17cc28..98efcc6 100644
--- a/src/search.cc
+++ b/src/search.cc
@@ -27,7 +27,7 @@
#include <stdlib.h> /* declares exit(), rand(), srand() */
#include <string.h> /* declares memset(), memcmp() */
#include <time.h> /* declares time() */
-#include <limits.h> /* defines INT_MIN, INT_MAX */
+#include <limits.h> /* defines INT_MIN, INT_MAX, UINT_MAX */
#include "options.h"
#include "hash-table.h"
@@ -35,6 +35,7 @@
Search::Search (KeywordExt_List *list)
: _head (list),
+ _key_positions (option.get_key_positions()),
_alpha_size (option[SEVENBIT] ? 128 : 256),
_occurrences (new int[_alpha_size]),
_asso_values (new int[_alpha_size]),
@@ -43,7 +44,7 @@ Search::Search (KeywordExt_List *list)
}
void
-Search::prepare ()
+Search::preprepare ()
{
KeywordExt_List *temp;
@@ -52,10 +53,6 @@ Search::prepare ()
for (temp = _head; temp; temp = temp->rest())
_total_keys++;
- /* Initialize each keyword's _selchars array. */
- for (temp = _head; temp; temp = temp->rest())
- temp->first()->init_selchars();
-
/* Compute the minimum and maximum keyword length. */
_max_key_len = INT_MIN;
_min_key_len = INT_MAX;
@@ -78,6 +75,212 @@ Search::prepare ()
"len == 0 before calling the gperf generated lookup function.\n");
exit (1);
}
+}
+
+/* Initializes each keyword's _selchars array. */
+void
+Search::init_selchars (bool use_all_chars, const Positions& positions) const
+{
+ for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
+ temp->first()->init_selchars(use_all_chars, positions);
+}
+
+/* Deletes each keyword's _selchars array. */
+void
+Search::delete_selchars () const
+{
+ for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
+ temp->first()->delete_selchars();
+}
+
+/* Count the duplicate keywords that occur with a given set of positions. */
+unsigned int
+Search::count_duplicates (const Positions& positions) const
+{
+ init_selchars (false, positions);
+
+ unsigned int count = 0;
+ Hash_Table representatives (_total_keys, option[NOLENGTH]);
+ for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
+ {
+ KeywordExt *keyword = temp->first();
+ if (representatives.insert (keyword))
+ count++;
+ }
+
+ delete_selchars ();
+
+ return count;
+}
+
+void
+Search::find_positions ()
+{
+ /* Determine good key positions. */
+
+ /* 1. Find positions that must occur in order to distinguish duplicates. */
+ Positions mandatory;
+
+ if (!option[DUP])
+ {
+ for (KeywordExt_List *l1 = _head; l1 && l1->rest(); l1 = l1->rest())
+ {
+ KeywordExt *keyword1 = l1->first();
+ for (KeywordExt_List *l2 = l1->rest(); l2; l2 = l2->rest())
+ {
+ KeywordExt *keyword2 = l2->first();
+
+ /* If keyword1 and keyword2 have the same length and differ
+ in just one position, and it is not the last character,
+ this position is mandatory. */
+ if (keyword1->_allchars_length == keyword2->_allchars_length)
+ {
+ int n = keyword1->_allchars_length;
+ int i;
+ for (i = 1; i < n; i++)
+ if (keyword1->_allchars[i-1] != keyword2->_allchars[i-1])
+ break;
+ if (i < n
+ && memcmp (&keyword1->_allchars[i],
+ &keyword2->_allchars[i],
+ n - i)
+ == 0)
+ {
+ /* Position i is mandatory. */
+ if (!mandatory.contains (i))
+ mandatory.add (i);
+ }
+ }
+ }
+ }
+ }
+
+ /* 2. Add positions, as long as this decreases the duplicates count. */
+ int imax = (_max_key_len < Positions::MAX_KEY_POS
+ ? _max_key_len : Positions::MAX_KEY_POS);
+ Positions current = mandatory;
+ unsigned int current_duplicates_count = count_duplicates (current);
+ for (;;)
+ {
+ Positions best;
+ unsigned int best_duplicates_count = UINT_MAX;
+
+ for (int i = imax; i >= 0; i--)
+ if (!current.contains (i))
+ {
+ Positions tryal = current;
+ tryal.add (i);
+ unsigned int try_duplicates_count = count_duplicates (tryal);
+
+ /* We prefer 'try' to 'best' if it produces less duplicates,
+ or if it produces the same number of duplicates but with
+ a more efficient hash function. */
+ if (try_duplicates_count < best_duplicates_count
+ || (try_duplicates_count == best_duplicates_count && i > 0))
+ {
+ best = tryal;
+ best_duplicates_count = try_duplicates_count;
+ }
+ }
+
+ /* Stop adding positions when it gives no improvement. */
+ if (best_duplicates_count >= current_duplicates_count)
+ break;
+
+ current = best;
+ current_duplicates_count = best_duplicates_count;
+ }
+
+ /* 3. Remove positions, as long as this doesn't increase the duplicates
+ count. */
+ for (;;)
+ {
+ Positions best;
+ unsigned int best_duplicates_count = UINT_MAX;
+
+ for (int i = imax; i >= 0; i--)
+ if (current.contains (i) && !mandatory.contains (i))
+ {
+ Positions tryal = current;
+ tryal.remove (i);
+ unsigned int try_duplicates_count = count_duplicates (tryal);
+
+ /* We prefer 'try' to 'best' if it produces less duplicates,
+ or if it produces the same number of duplicates but with
+ a more efficient hash function. */
+ if (try_duplicates_count < best_duplicates_count
+ || (try_duplicates_count == best_duplicates_count && i == 0))
+ {
+ best = tryal;
+ best_duplicates_count = try_duplicates_count;
+ }
+ }
+
+ /* Stop removing positions when it gives no improvement. */
+ if (best_duplicates_count > current_duplicates_count)
+ break;
+
+ current = best;
+ current_duplicates_count = best_duplicates_count;
+ }
+
+ /* 4. Replace two positions by one, as long as this doesn't increase the
+ duplicates count. */
+ for (;;)
+ {
+ Positions best;
+ unsigned int best_duplicates_count = UINT_MAX;
+
+ for (int i1 = imax; i1 >= 0; i1--)
+ if (current.contains (i1) && !mandatory.contains (i1))
+ for (int i2 = imax; i2 >= 0; i2--)
+ if (current.contains (i2) && !mandatory.contains (i2) && i2 != i1)
+ for (int i3 = imax; i3 >= 0; i3--)
+ if (!current.contains (i3))
+ {
+ Positions tryal = current;
+ tryal.remove (i1);
+ tryal.remove (i2);
+ tryal.add (i3);
+ unsigned int try_duplicates_count =
+ count_duplicates (tryal);
+
+ /* We prefer 'try' to 'best' if it produces less duplicates,
+ or if it produces the same number of duplicates but with
+ a more efficient hash function. */
+ if (try_duplicates_count < best_duplicates_count
+ || (try_duplicates_count == best_duplicates_count
+ && (i1 == 0 || i2 == 0 || i3 > 0)))
+ {
+ best = tryal;
+ best_duplicates_count = try_duplicates_count;
+ }
+ }
+
+ /* Stop removing positions when it gives no improvement. */
+ if (best_duplicates_count > current_duplicates_count)
+ break;
+
+ current = best;
+ current_duplicates_count = best_duplicates_count;
+ }
+
+ /* That's it. Hope it's good enough. */
+ _key_positions = current;
+}
+
+void
+Search::prepare ()
+{
+ KeywordExt_List *temp;
+
+ preprepare ();
+
+ if (!option[POSITIONS])
+ find_positions ();
+
+ /* Initialize each keyword's _selchars array. */
+ init_selchars (option[ALLCHARS], _key_positions);
/* Check for duplicates, i.e. keywords with the same _selchars array
(and - if !option[NOLENGTH] - also the same length).
@@ -140,8 +343,12 @@ Search::prepare ()
_total_duplicates);
else
{
- fprintf (stderr, "%d input keys have identical hash values,\ntry different key positions or use option -D.\n",
+ fprintf (stderr, "%d input keys have identical hash values,\n",
_total_duplicates);
+ if (option[POSITIONS])
+ fprintf (stderr, "try different key positions or use option -D.\n");
+ else
+ fprintf (stderr, "use option -D.\n");
exit (1);
}
}
@@ -313,7 +520,7 @@ Search::max_key_length () const
int
Search::get_max_keysig_size () const
{
- return option[ALLCHARS] ? _max_key_len : option.get_max_keysig_size ();
+ return option[ALLCHARS] ? _max_key_len : _key_positions.get_size();
}
/* ---------------------- Finding good asso_values[] ----------------------- */
@@ -758,9 +965,12 @@ Search::optimize ()
else /* Yow, big problems. we're outta here! */
{
fprintf (stderr,
- "\nInternal error, duplicate value %d:\n"
- "try options -D or -m or -r, or use new key positions.\n\n",
+ "\nInternal error, duplicate hash code value %d:\n",
hashcode);
+ if (option[POSITIONS])
+ fprintf (stderr, "try options -m or -D or -r, or use new key positions.\n\n");
+ else
+ fprintf (stderr, "try options -m or -D or -r.\n\n");
exit (1);
}
}
diff --git a/src/search.h b/src/search.h
index 7e1f781..0d67458 100644
--- a/src/search.h
+++ b/src/search.h
@@ -27,6 +27,7 @@
#define search_h 1
#include "keyword-list.h"
+#include "options.h"
#include "bool-array.h"
class Search
@@ -36,6 +37,18 @@ public:
~Search ();
void optimize ();
private:
+ void preprepare ();
+
+ /* Initializes each keyword's _selchars array. */
+ void init_selchars (bool use_all_chars, const Positions& positions) const;
+ /* Deletes each keyword's _selchars array. */
+ void delete_selchars () const;
+
+ /* Count the duplicate keywords that occur with a given set of positions. */
+ unsigned int count_duplicates (const Positions& positions) const;
+
+ void find_positions ();
+
void prepare ();
/* Computes the sum of occurrences of the _selchars of a keyword. */
@@ -90,16 +103,19 @@ public:
/* Total number of keywords, counting duplicates. */
int _total_keys;
- /* Total number of duplicates that have been moved to _duplicate_link lists
- (not counting their representatives which stay on the main list). */
- int _total_duplicates;
-
/* Maximum length of the longest keyword. */
int _max_key_len;
/* Minimum length of the shortest keyword. */
int _min_key_len;
+ /* User-specified or computed key positions. */
+ Positions _key_positions;
+
+ /* Total number of duplicates that have been moved to _duplicate_link lists
+ (not counting their representatives which stay on the main list). */
+ int _total_duplicates;
+
/* Size of alphabet. */
int const _alpha_size;