summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2003-04-07 09:50:11 +0000
committerBruno Haible <bruno@clisp.org>2003-04-07 09:50:11 +0000
commit2535f34494011d57d9603d6e44604d357f83a4b0 (patch)
treeb64db3a05de58cdd2af82157b5218a13fd318b83
parentec3d1127fa7742bd291bfe039fd76b4d2381da6f (diff)
downloadgperf-2535f34494011d57d9603d6e44604d357f83a4b0.tar.gz
Move the handling of ALLCHARS (-k'*') into the Positions class.
-rw-r--r--ChangeLog52
-rw-r--r--src/keyword.cc100
-rw-r--r--src/keyword.h6
-rw-r--r--src/options.cc7
-rw-r--r--src/options.h14
-rw-r--r--src/options.icc3
-rw-r--r--src/output.cc30
-rw-r--r--src/output.h2
-rw-r--r--src/positions.cc63
-rw-r--r--src/positions.h44
-rw-r--r--src/positions.icc139
-rw-r--r--src/search.cc170
-rw-r--r--src/search.h4
-rw-r--r--tests/chill.exp1
14 files changed, 396 insertions, 239 deletions
diff --git a/ChangeLog b/ChangeLog
index e4f2495..ffed0de 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,55 @@
+2002-12-12 Bruno Haible <bruno@clisp.org>
+
+ * src/positions.h (Positions::is_useall, Positions::set_useall,
+ Positions::iterator, Positions::reviterator): New method declarations.
+ (Positions::_useall): New field.
+ (PositionIterator): Make constructor private. Add a constructor and a
+ copy constructor.
+ (PositionIterator::remaining): New declaration.
+ (PositionReverseIterator): Make constructor private. Add a constructor
+ and a copy constructor.
+ (PositionReverseIterator::remaining): New declaration.
+ (PositionReverseIterator::_minindex): New field.
+ * src/positions.icc (Positions::Positions): Initialize _useall.
+ (Positions::operator=): Likewise.
+ (Positions::is_useall, Positions::set_useall): New methods.
+ (Positions::sort): Do nothing if _useall is set.
+ (Positions::iterator, Positions::reviterator): New methods.
+ (PositionIterator::PositionIterator): New constructor.
+ (PositionIterator::remaining): New method.
+ (PositionReverseIterator::PositionReverseIterator): New constructor.
+ (PositionReverseIterator::next): Use _minindex as bound.
+ (PositionReverseIterator::remaining): New method.
+ * src/positions.cc (Positions::add, Positions::remove): Reset the
+ useall flag.
+ (Positions::print): Handle the useall case.
+ * src/options.h (ALLCHARS): Remove.
+ * src/options.cc (Options::~Options): Update.
+ (Options::parse_options): Use Positions::set_useall().
+ * src/keyword.h (KeywordExt::init_selchars_tuple,
+ KeywordExt::init_selchars_multiset, KeywordExt::init_selchars_low):
+ Remove use_all_chars argument.
+ * src/keyword.cc (KeywordExt::init_selchars_low): Remove use_all_chars
+ argument. Tell the position iterator to stop at _allchars_length.
+ Remove special case code for -k'*'.
+ (KeywordExt::init_selchars_tuple, KeywordExt::init_selchars_multiset):
+ Remove use_all_chars argument.
+ * src/search.h (Search::init_selchars_tuple): Remove use_all_chars
+ argument.
+ (Search::init_selchars_multiset): Likewise.
+ * src/search.cc (Search::init_selchars_tuple): Remove use_all_chars
+ argument.
+ (Search::count_duplicates_tuple, Search::find_positions): Update.
+ (Search::compute_alpha_unify): Remove special case code for -k'*'.
+ (Search::init_selchars_multiset): Remove use_all_chars argument.
+ (Search::count_duplicates_multiset): Update.
+ (Search::find_alpha_inc): Remove special case code for -k'*'.
+ (Search::prepare): Update.
+ (Search::get_max_keysig_size): Update.
+ * src/output.cc (Output::output_hash_function): Remove special case
+ code for -k'*'.
+ * tests/chill.exp: Regenerated.
+
2002-12-11 Bruno Haible <bruno@clisp.org>
Change the positions to be 0-based, instead of 1-based.
diff --git a/src/keyword.cc b/src/keyword.cc
index fc819b6..fbf1a0a 100644
--- a/src/keyword.cc
+++ b/src/keyword.cc
@@ -48,64 +48,62 @@ static inline void sort_char_set (unsigned int *base, int len)
}
/* Initializes selchars and selchars_length.
- The hash function will be computed as
- asso_values[allchars[key_pos[0]]] + asso_values[allchars[key_pos[1]]] + ...
- We compute selchars as the multiset
- { allchars[key_pos[0]], allchars[key_pos[1]], ... }
- so that the hash function becomes
- asso_values[selchars[0]] + asso_values[selchars[1]] + ...
+
+ General idea:
+ The hash function will be computed as
+ asso_values[allchars[key_pos[0]]] +
+ asso_values[allchars[key_pos[1]]] + ...
+ We compute selchars as the multiset
+ { allchars[key_pos[0]], allchars[key_pos[1]], ... }
+ so that the hash function becomes
+ asso_values[selchars[0]] + asso_values[selchars[1]] + ...
Furthermore we sort the selchars array, to ease detection of duplicates
later.
+
+ More in detail: The arguments alpha_unify (used for case-insensitive
+ hash functions) and alpha_inc (used to disambiguate permutations)
+ apply slight modifications. The hash function will be computed as
+ sum (j=0,1,...: k = key_pos[j]:
+ asso_values[alpha_unify[allchars[k]+alpha_inc[k]]])
+ + (allchars_length if !option[NOLENGTH], 0 otherwise).
+ We compute selchars as the multiset
+ { alpha_unify[allchars[k]+alpha_inc[k]] : j=0,1,..., k = key_pos[j] }
+ so that the hash function becomes
+ asso_values[selchars[0]] + asso_values[selchars[1]] + ...
+ + (allchars_length if !option[NOLENGTH], 0 otherwise).
*/
unsigned int *
-KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
+KeywordExt::init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
{
- const char *k = _allchars;
- unsigned int *key_set =
- new unsigned int[(use_all_chars ? _allchars_length : positions.get_size ())];
+ /* Iterate through the list of positions, initializing selchars
+ (via ptr). */
+ PositionIterator iter = positions.iterator(_allchars_length);
+
+ unsigned int *key_set = new unsigned int[iter.remaining()];
unsigned int *ptr = key_set;
- if (use_all_chars)
- /* Use all the character positions in the KEY. */
- for (int i = _allchars_length; i > 0; k++, i--)
- {
- unsigned int c = static_cast<unsigned char>(*k);
- if (alpha_inc)
- c += alpha_inc[k-_allchars];
- if (alpha_unify)
- c = alpha_unify[c];
- *ptr = c;
- ptr++;
- }
- else
- /* Only use those character positions specified by the user. */
+ for (int i; (i = iter.next ()) != PositionIterator::EOS; )
{
- /* Iterate through the list of key_positions, initializing selchars
- (via ptr). */
- PositionIterator iter (positions);
-
- for (int i; (i = iter.next ()) != PositionIterator::EOS; )
+ unsigned int c;
+ if (i == Positions::LASTCHAR)
+ /* Special notation for last KEY position, i.e. '$'. */
+ c = static_cast<unsigned char>(_allchars[_allchars_length - 1]);
+ else if (i < _allchars_length)
{
- unsigned int c;
- if (i == Positions::LASTCHAR)
- /* Special notation for last KEY position, i.e. '$'. */
- c = static_cast<unsigned char>(_allchars[_allchars_length - 1]);
- else if (i < _allchars_length)
- {
- /* Within range of KEY length, so we'll keep it. */
- c = static_cast<unsigned char>(_allchars[i]);
- if (alpha_inc)
- c += alpha_inc[i];
- }
- else
- /* Out of range of KEY length, so we'll just skip it. */
- continue;
- if (alpha_unify)
- c = alpha_unify[c];
- *ptr = c;
- ptr++;
+ /* Within range of KEY length, so we'll keep it. */
+ c = static_cast<unsigned char>(_allchars[i]);
+ if (alpha_inc)
+ c += alpha_inc[i];
}
+ else
+ /* Out of range of KEY length, the iterator should not have
+ produced this. */
+ abort ();
+ if (alpha_unify)
+ c = alpha_unify[c];
+ *ptr = c;
+ ptr++;
}
_selchars = key_set;
@@ -115,16 +113,16 @@ KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, c
}
void
-KeywordExt::init_selchars_tuple (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify)
+KeywordExt::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify)
{
- init_selchars_low (use_all_chars, positions, alpha_unify, NULL);
+ init_selchars_low (positions, alpha_unify, NULL);
}
void
-KeywordExt::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
+KeywordExt::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
{
unsigned int *selchars =
- init_selchars_low (use_all_chars, positions, alpha_unify, alpha_inc);
+ init_selchars_low (positions, alpha_unify, alpha_inc);
/* Sort the selchars elements alphabetically. */
sort_char_set (selchars, _selchars_length);
diff --git a/src/keyword.h b/src/keyword.h
index 9f09186..c0e2156 100644
--- a/src/keyword.h
+++ b/src/keyword.h
@@ -68,9 +68,9 @@ struct KeywordExt : public Keyword
/* Methods depending on the keyposition list. */
/* Initializes selchars and selchars_length, without reordering. */
- void init_selchars_tuple (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify);
+ void init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify);
/* Initializes selchars and selchars_length, with reordering. */
- void init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc);
+ void init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc);
/* Deletes selchars. */
void delete_selchars ();
@@ -81,7 +81,7 @@ struct KeywordExt : public Keyword
int _final_index;
private:
- unsigned int * init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc);
+ unsigned int * init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc);
};
/* An abstract factory for creating Keyword instances.
diff --git a/src/options.cc b/src/options.cc
index 6e8793e..8880858 100644
--- a/src/options.cc
+++ b/src/options.cc
@@ -514,14 +514,14 @@ Options::~Options ()
_function_name, _hash_name, _wordlist_name, _slot_name,
_initializer_suffix, _asso_iterations, _jump, _size_multiple,
_initial_asso_value, _delimiters, _total_switches);
- if (_option_word & ALLCHARS)
+ if (_key_positions.is_useall())
fprintf (stderr, "all characters are used in the hash function\n");
else
{
fprintf (stderr, "maximum keysig size = %d\nkey positions are: \n",
_key_positions.get_size());
- PositionIterator iter (_key_positions);
+ PositionIterator iter = _key_positions.iterator();
for (int pos; (pos = iter.next()) != PositionIterator::EOS; )
if (pos == Positions::LASTCHAR)
fprintf (stderr, "$\n");
@@ -773,9 +773,10 @@ Options::parse_options (int argc, char *argv[])
PositionStringParser sparser (/*getopt*/optarg, 1, Positions::MAX_KEY_POS, Positions::LASTCHAR, BAD_VALUE, EOS);
if (/*getopt*/optarg [0] == '*') /* Use all the characters for hashing!!!! */
- _option_word |= ALLCHARS;
+ _key_positions.set_useall(true);
else
{
+ _key_positions.set_useall(false);
int *key_positions = _key_positions.pointer();
int *key_pos;
diff --git a/src/options.h b/src/options.h
index 7eb7fe1..0bde55c 100644
--- a/src/options.h
+++ b/src/options.h
@@ -96,22 +96,19 @@ enum Option_Type
/* Use the given key positions. */
POSITIONS = 1 << 16,
- /* Use all characters in hash function. */
- ALLCHARS = 1 << 17,
-
/* Handle duplicate hash values for keywords. */
- DUP = 1 << 18,
+ DUP = 1 << 17,
/* Don't include keyword length in hash computations. */
- NOLENGTH = 1 << 19,
+ NOLENGTH = 1 << 18,
/* Randomly initialize the associated values table. */
- RANDOM = 1 << 20,
+ RANDOM = 1 << 19,
/* --- Informative output --- */
/* Enable debugging (prints diagnostics to stderr). */
- DEBUG = 1 << 21
+ DEBUG = 1 << 20
};
/* Class manager for gperf program Options. */
@@ -197,8 +194,7 @@ public:
/* Sets the delimiters string, if not already set. */
void set_delimiters (const char *delimiters);
- /* Returns key positions.
- Only to be used if !options[ALLCHARS]. */
+ /* Returns key positions. */
const Positions& get_key_positions () const;
private:
diff --git a/src/options.icc b/src/options.icc
index a9295a0..f519133 100644
--- a/src/options.icc
+++ b/src/options.icc
@@ -135,8 +135,7 @@ Options::get_delimiters () const
return _delimiters;
}
-/* Returns key positions.
- Only to be used if !options[ALLCHARS]. */
+/* Returns key positions. */
INLINE const Positions&
Options::get_key_positions () const
{
diff --git a/src/output.cc b/src/output.cc
index 05ac35a..94e5952 100644
--- a/src/output.cc
+++ b/src/output.cc
@@ -606,7 +606,7 @@ Output::output_hash_function () const
printf ("{\n");
/* First the asso_values array. */
- if (option[ALLCHARS] || _key_positions.get_size() > 0)
+ if (_key_positions.get_size() > 0)
{
printf (" static %s%s asso_values[] =\n"
" {",
@@ -633,31 +633,7 @@ Output::output_hash_function () const
" };\n");
}
- if (option[ALLCHARS])
- {
- /* User wants *all* characters considered in hash. */
- printf (" register int hval = %s;\n\n"
- " switch (%s)\n"
- " {\n"
- " default:\n",
- option[NOLENGTH] ? "0" : "len",
- option[NOLENGTH] ? "len" : "hval");
-
- for (int i = _max_key_len; i > 0; i--)
- {
- printf (" case %d:\n"
- " hval += asso_values[%sstr[%d]",
- i, char_to_index, i - 1);
- if (_alpha_inc[i - 1])
- printf ("+%u", _alpha_inc[i - 1]);
- printf ("];\n");
- }
-
- printf (" break;\n"
- " }\n"
- " return hval;\n");
- }
- else if (_key_positions.get_size() == 0)
+ if (_key_positions.get_size() == 0)
{
/* Trivial case: No key positions at all. */
printf (" return %s;\n",
@@ -668,7 +644,7 @@ Output::output_hash_function () const
/* Iterate through the key positions. Remember that Positions::sort()
has sorted them in decreasing order, with Positions::LASTCHAR coming
last. */
- PositionIterator iter (_key_positions);
+ PositionIterator iter = _key_positions.iterator(_max_key_len);
int key_pos;
/* Get the highest key position. */
diff --git a/src/output.h b/src/output.h
index 843fdad..780b4f1 100644
--- a/src/output.h
+++ b/src/output.h
@@ -119,7 +119,7 @@ private:
int const _max_key_len;
/* Minimum length of the shortest keyword. */
int const _min_key_len;
- /* Key positions. Only to be used if !options[ALLCHARS]. */
+ /* Key positions. */
Positions const _key_positions;
/* Adjustments to add to bytes add specific key positions. */
const unsigned int * const _alpha_inc;
diff --git a/src/positions.cc b/src/positions.cc
index 2a096c1..f6439e3 100644
--- a/src/positions.cc
+++ b/src/positions.cc
@@ -50,6 +50,8 @@ Positions::contains (int pos) const
void
Positions::add (int pos)
{
+ set_useall (false);
+
unsigned int count = _size;
if (count == MAX_SIZE)
@@ -78,6 +80,8 @@ Positions::add (int pos)
void
Positions::remove (int pos)
{
+ set_useall (false);
+
unsigned int count = _size;
if (count > 0)
{
@@ -120,40 +124,45 @@ Positions::remove (int pos)
void
Positions::print () const
{
- bool first = true;
- bool seen_LASTCHAR = false;
- unsigned int count = _size;
- const int *p = _positions + _size - 1;
-
- for (; count > 0; p--)
+ if (_useall)
+ printf ("*");
+ else
{
- count--;
- if (*p == LASTCHAR)
- seen_LASTCHAR = true;
- else
+ bool first = true;
+ bool seen_LASTCHAR = false;
+ unsigned int count = _size;
+ const int *p = _positions + _size - 1;
+
+ for (; count > 0; p--)
{
- if (!first)
- printf (",");
- printf ("%d", *p + 1);
- if (count > 0 && p[-1] == *p + 1)
+ count--;
+ if (*p == LASTCHAR)
+ seen_LASTCHAR = true;
+ else
{
- printf ("-");
- do
+ if (!first)
+ printf (",");
+ printf ("%d", *p + 1);
+ if (count > 0 && p[-1] == *p + 1)
{
- p--;
- count--;
+ printf ("-");
+ do
+ {
+ p--;
+ count--;
+ }
+ while (count > 0 && p[-1] == *p + 1);
+ printf ("%d", *p + 1);
}
- while (count > 0 && p[-1] == *p + 1);
- printf ("%d", *p + 1);
+ first = false;
}
- first = false;
}
- }
- if (seen_LASTCHAR)
- {
- if (!first)
- printf (",");
- printf ("$");
+ if (seen_LASTCHAR)
+ {
+ if (!first)
+ printf (",");
+ printf ("$");
+ }
}
}
diff --git a/src/positions.h b/src/positions.h
index 984195a..d16c214 100644
--- a/src/positions.h
+++ b/src/positions.h
@@ -57,10 +57,12 @@ public:
Positions& operator= (const Positions& src);
/* Accessors. */
+ bool is_useall () const;
int operator[] (unsigned int index) const;
unsigned int get_size () const;
/* Write access. */
+ void set_useall (bool useall);
int * pointer ();
void set_size (unsigned int size);
@@ -68,6 +70,17 @@ public:
Returns true if there are no duplicates, false otherwise. */
bool sort ();
+ /* Creates an iterator, returning the positions in descending order. */
+ PositionIterator iterator () const;
+ /* Creates an iterator, returning the positions in descending order,
+ that apply to strings of length <= maxlen. */
+ PositionIterator iterator (int maxlen) const;
+ /* Creates an iterator, returning the positions in ascending order. */
+ PositionReverseIterator reviterator () const;
+ /* Creates an iterator, returning the positions in ascending order,
+ that apply to strings of length <= maxlen. */
+ PositionReverseIterator reviterator (int maxlen) const;
+
/* Set operations. Assumes the array is in reverse order. */
bool contains (int pos) const;
void add (int pos);
@@ -77,6 +90,8 @@ public:
void print () const;
private:
+ /* The special case denoted by '*'. */
+ bool _useall;
/* Number of positions. */
unsigned int _size;
/* Array of positions. 0 for the first char, 1 for the second char etc.,
@@ -88,9 +103,10 @@ private:
class PositionIterator
{
+ friend class Positions;
public:
- /* Initializes an iterator through POSITIONS. */
- PositionIterator (Positions const& positions);
+ /* Copy constructor. */
+ PositionIterator (const PositionIterator& src);
/* End of iteration marker. */
enum { EOS = -2 };
@@ -98,7 +114,16 @@ public:
/* Retrieves the next position, or EOS past the end. */
int next ();
+ /* Returns the number of remaining positions, i.e. how often next() will
+ return a value != EOS. */
+ unsigned int remaining () const;
+
private:
+ /* Initializes an iterator through POSITIONS. */
+ PositionIterator (Positions const& positions);
+ /* Initializes an iterator through POSITIONS, ignoring positions >= maxlen. */
+ PositionIterator (Positions const& positions, int maxlen);
+
const Positions& _set;
unsigned int _index;
};
@@ -108,9 +133,10 @@ private:
class PositionReverseIterator
{
+ friend class Positions;
public:
- /* Initializes an iterator through POSITIONS. */
- PositionReverseIterator (Positions const& positions);
+ /* Copy constructor. */
+ PositionReverseIterator (const PositionReverseIterator& src);
/* End of iteration marker. */
enum { EOS = -2 };
@@ -118,9 +144,19 @@ public:
/* Retrieves the next position, or EOS past the end. */
int next ();
+ /* Returns the number of remaining positions, i.e. how often next() will
+ return a value != EOS. */
+ unsigned int remaining () const;
+
private:
+ /* Initializes an iterator through POSITIONS. */
+ PositionReverseIterator (Positions const& positions);
+ /* Initializes an iterator through POSITIONS, ignoring positions >= maxlen. */
+ PositionReverseIterator (Positions const& positions, int maxlen);
+
const Positions& _set;
unsigned int _index;
+ unsigned int _minindex;
};
#ifdef __OPTIMIZE__
diff --git a/src/positions.icc b/src/positions.icc
index 32af6b1..ca9347d 100644
--- a/src/positions.icc
+++ b/src/positions.icc
@@ -30,20 +30,23 @@
INLINE
Positions::Positions ()
- : _size (0)
+ : _useall (false),
+ _size (0)
{
}
INLINE
Positions::Positions (int pos1)
- : _size (1)
+ : _useall (false),
+ _size (1)
{
_positions[0] = pos1;
}
INLINE
Positions::Positions (int pos1, int pos2)
- : _size (2)
+ : _useall (false),
+ _size (2)
{
_positions[0] = pos1;
_positions[1] = pos2;
@@ -53,7 +56,8 @@ Positions::Positions (int pos1, int pos2)
INLINE
Positions::Positions (const Positions& src)
- : _size (src._size)
+ : _useall (src._useall),
+ _size (src._size)
{
memcpy (_positions, src._positions, _size * sizeof (_positions[0]));
}
@@ -63,6 +67,7 @@ Positions::Positions (const Positions& src)
INLINE Positions&
Positions::operator= (const Positions& src)
{
+ _useall = src._useall;
_size = src._size;
memcpy (_positions, src._positions, _size * sizeof (_positions[0]));
return *this;
@@ -70,6 +75,12 @@ Positions::operator= (const Positions& src)
/* Accessors. */
+INLINE bool
+Positions::is_useall () const
+{
+ return _useall;
+}
+
INLINE int
Positions::operator[] (unsigned int index) const
{
@@ -84,6 +95,20 @@ Positions::get_size () const
/* Write access. */
+INLINE void
+Positions::set_useall (bool useall)
+{
+ _useall = useall;
+ if (useall)
+ {
+ /* The positions are 0, 1, ..., MAX_KEY_POS-1, in descending order. */
+ _size = MAX_KEY_POS;
+ int *ptr = _positions;
+ for (int i = MAX_KEY_POS - 1; i >= 0; i--)
+ *ptr++ = i;
+ }
+}
+
INLINE int *
Positions::pointer ()
{
@@ -101,6 +126,9 @@ Positions::set_size (unsigned int size)
INLINE bool
Positions::sort ()
{
+ if (_useall)
+ return true;
+
/* Bubble sort. */
bool duplicate_free = true;
int *base = _positions;
@@ -121,6 +149,36 @@ Positions::sort ()
return duplicate_free;
}
+/* Creates an iterator, returning the positions in descending order. */
+INLINE PositionIterator
+Positions::iterator () const
+{
+ return PositionIterator (*this);
+}
+
+/* Creates an iterator, returning the positions in descending order,
+ that apply to strings of length <= maxlen. */
+INLINE PositionIterator
+Positions::iterator (int maxlen) const
+{
+ return PositionIterator (*this, maxlen);
+}
+
+/* Creates an iterator, returning the positions in ascending order. */
+INLINE PositionReverseIterator
+Positions::reviterator () const
+{
+ return PositionReverseIterator (*this);
+}
+
+/* Creates an iterator, returning the positions in ascending order,
+ that apply to strings of length <= maxlen. */
+INLINE PositionReverseIterator
+Positions::reviterator (int maxlen) const
+{
+ return PositionReverseIterator (*this, maxlen);
+}
+
/* ------------------------- Class PositionIterator ------------------------ */
/* Initializes an iterator through POSITIONS. */
@@ -131,6 +189,24 @@ PositionIterator::PositionIterator (Positions const& positions)
{
}
+/* Initializes an iterator through POSITIONS, ignoring positions >= maxlen. */
+INLINE
+PositionIterator::PositionIterator (Positions const& positions, int maxlen)
+ : _set (positions)
+{
+ if (positions._useall)
+ _index = (maxlen <= Positions::MAX_KEY_POS ? Positions::MAX_KEY_POS - maxlen : 0);
+ else
+ {
+ unsigned int index;
+ for (index = 0;
+ index < positions._size && positions._positions[index] >= maxlen;
+ index++)
+ ;
+ _index = index;
+ }
+}
+
/* Retrieves the next position, or EOS past the end. */
INLINE int
PositionIterator::next ()
@@ -138,19 +214,72 @@ PositionIterator::next ()
return (_index < _set._size ? _set._positions[_index++] : EOS);
}
+/* Returns the number of remaining positions, i.e. how often next() will
+ return a value != EOS. */
+INLINE unsigned int
+PositionIterator::remaining () const
+{
+ return _set._size - _index;
+}
+
+/* Copy constructor. */
+INLINE
+PositionIterator::PositionIterator (const PositionIterator& src)
+ : _set (src._set),
+ _index (src._index)
+{
+}
+
/* --------------------- Class PositionReverseIterator --------------------- */
/* Initializes an iterator through POSITIONS. */
INLINE
PositionReverseIterator::PositionReverseIterator (Positions const& positions)
: _set (positions),
+ _index (_set._size),
+ _minindex (0)
+{
+}
+
+/* Initializes an iterator through POSITIONS, ignoring positions >= maxlen. */
+INLINE
+PositionReverseIterator::PositionReverseIterator (Positions const& positions, int maxlen)
+ : _set (positions),
_index (_set._size)
{
+ if (positions._useall)
+ _minindex = (maxlen <= Positions::MAX_KEY_POS ? Positions::MAX_KEY_POS - maxlen : 0);
+ else
+ {
+ unsigned int index;
+ for (index = 0;
+ index < positions._size && positions._positions[index] >= maxlen;
+ index++)
+ ;
+ _minindex = index;
+ }
}
/* Retrieves the next position, or EOS past the end. */
INLINE int
PositionReverseIterator::next ()
{
- return (_index > 0 ? _set._positions[--_index] : EOS);
+ return (_index > _minindex ? _set._positions[--_index] : EOS);
+}
+
+/* Returns the number of remaining positions, i.e. how often next() will
+ return a value != EOS. */
+INLINE unsigned int
+PositionReverseIterator::remaining () const
+{
+ return _index - _minindex;
+}
+
+/* Copy constructor. */
+INLINE
+PositionReverseIterator::PositionReverseIterator (const PositionReverseIterator& src)
+ : _set (src._set),
+ _index (src._index),
+ _minindex (src._minindex)
+{
}
diff --git a/src/search.cc b/src/search.cc
index a1b71a8..61fb5ef 100644
--- a/src/search.cc
+++ b/src/search.cc
@@ -175,10 +175,10 @@ Search::compute_alpha_unify () const
/* Initializes each keyword's _selchars array. */
void
-Search::init_selchars_tuple (bool use_all_chars, const Positions& positions) const
+Search::init_selchars_tuple (const Positions& positions) const
{
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
- temp->first()->init_selchars_tuple(use_all_chars, positions, _alpha_unify);
+ temp->first()->init_selchars_tuple(positions, _alpha_unify);
}
/* Deletes each keyword's _selchars array. */
@@ -199,7 +199,7 @@ Search::count_duplicates_tuple (const Positions& positions) const
/* Run through the keyword list and count the duplicates incrementally.
The result does not depend on the order of the keyword list, thanks to
the formula above. */
- init_selchars_tuple (option[ALLCHARS], positions);
+ init_selchars_tuple (positions);
unsigned int count = 0;
{
@@ -411,7 +411,7 @@ Search::find_positions ()
{
/* Print the result. */
fprintf (stderr, "\nComputed positions: ");
- PositionReverseIterator iter (_key_positions);
+ PositionReverseIterator iter = _key_positions.reviterator();
bool seen_lastchar = false;
bool first = true;
for (int i; (i = iter.next ()) != PositionReverseIterator::EOS; )
@@ -482,49 +482,29 @@ Search::compute_alpha_unify (const Positions& positions, const unsigned int *alp
{
KeywordExt *keyword = temp->first();
- if (option[ALLCHARS])
- /* Iterate through all character positions. */
- for (int i = 0; i < keyword->_allchars_length; i++)
- {
- unsigned int c = static_cast<unsigned char>(keyword->_allchars[i]);
- if (c >= 'A' && c <= 'Z')
- c += 'a' - 'A';
- if (c >= 'a' && c <= 'z')
- {
- c += alpha_inc[i];
- /* Unify c with c - ('a'-'A'). */
- unsigned int d = alpha_unify[c];
- unsigned int b = c - ('a'-'A');
- for (int a = b; a >= 0 && alpha_unify[a] == b; a -= ('a'-'A'))
- alpha_unify[a] = d;
- }
- }
- else
- {
- /* Iterate through the selected character positions. */
- PositionIterator iter (positions);
+ /* Iterate through the selected character positions. */
+ PositionIterator iter = positions.iterator(keyword->_allchars_length);
- for (int i; (i = iter.next ()) != PositionIterator::EOS; )
+ for (int i; (i = iter.next ()) != PositionIterator::EOS; )
+ {
+ unsigned int c;
+ if (i == Positions::LASTCHAR)
+ c = static_cast<unsigned char>(keyword->_allchars[keyword->_allchars_length - 1]);
+ else if (i < keyword->_allchars_length)
+ c = static_cast<unsigned char>(keyword->_allchars[i]);
+ else
+ abort ();
+ if (c >= 'A' && c <= 'Z')
+ c += 'a' - 'A';
+ if (c >= 'a' && c <= 'z')
{
- unsigned int c;
- if (i == Positions::LASTCHAR)
- c = static_cast<unsigned char>(keyword->_allchars[keyword->_allchars_length - 1]);
- else if (i < keyword->_allchars_length)
- c = static_cast<unsigned char>(keyword->_allchars[i]);
- else
- continue;
- if (c >= 'A' && c <= 'Z')
- c += 'a' - 'A';
- if (c >= 'a' && c <= 'z')
- {
- if (i != Positions::LASTCHAR)
- c += alpha_inc[i];
- /* Unify c with c - ('a'-'A'). */
- unsigned int d = alpha_unify[c];
- unsigned int b = c - ('a'-'A');
- for (int a = b; a >= 0 && alpha_unify[a] == b; a -= ('a'-'A'))
- alpha_unify[a] = d;
- }
+ if (i != Positions::LASTCHAR)
+ c += alpha_inc[i];
+ /* Unify c with c - ('a'-'A'). */
+ unsigned int d = alpha_unify[c];
+ unsigned int b = c - ('a'-'A');
+ for (int a = b; a >= 0 && alpha_unify[a] == b; a -= ('a'-'A'))
+ alpha_unify[a] = d;
}
}
}
@@ -537,10 +517,10 @@ Search::compute_alpha_unify (const Positions& positions, const unsigned int *alp
/* Initializes each keyword's _selchars array. */
void
-Search::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const
+Search::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const
{
for (KeywordExt_List *temp = _head; temp; temp = temp->rest())
- temp->first()->init_selchars_multiset(use_all_chars, positions, alpha_unify, alpha_inc);
+ temp->first()->init_selchars_multiset(positions, alpha_unify, alpha_inc);
}
/* Count the duplicate keywords that occur with the given set of positions
@@ -554,7 +534,7 @@ Search::count_duplicates_multiset (const unsigned int *alpha_inc) const
/* Run through the keyword list and count the duplicates incrementally.
The result does not depend on the order of the keyword list, thanks to
the formula above. */
- init_selchars_multiset (option[ALLCHARS], _key_positions,
+ init_selchars_multiset (_key_positions,
compute_alpha_unify (_key_positions, alpha_inc),
alpha_inc);
@@ -596,35 +576,34 @@ Search::find_alpha_inc ()
{
/* Look which _alpha_inc[i] we are free to increment. */
unsigned int nindices;
- if (option[ALLCHARS])
- nindices = _max_key_len;
- else
- {
- /* Ignore Positions::LASTCHAR. Remember that since Positions are
- sorted in decreasing order, Positions::LASTCHAR comes last. */
- nindices = (_key_positions.get_size() == 0
- || _key_positions[_key_positions.get_size() - 1]
- != Positions::LASTCHAR
- ? _key_positions.get_size()
- : _key_positions.get_size() - 1);
- }
+ {
+ nindices = 0;
+ PositionIterator iter = _key_positions.iterator(_max_key_len);
+ for (;;)
+ {
+ int key_pos = iter.next ();
+ if (key_pos == PositionIterator::EOS)
+ break;
+ if (key_pos != Positions::LASTCHAR)
+ nindices++;
+ }
+ }
unsigned int indices[nindices];
- if (option[ALLCHARS])
- for (unsigned int j = 0; j < nindices; j++)
- indices[j] = j;
- else
- {
- PositionIterator iter (_key_positions);
- for (unsigned int j = 0; j < nindices; j++)
- {
- int key_pos = iter.next ();
- if (key_pos == PositionIterator::EOS
- || key_pos == Positions::LASTCHAR)
- abort ();
- indices[j] = key_pos;
- }
- }
+ {
+ unsigned int j = 0;
+ PositionIterator iter = _key_positions.iterator(_max_key_len);
+ for (;;)
+ {
+ int key_pos = iter.next ();
+ if (key_pos == PositionIterator::EOS)
+ break;
+ if (key_pos != Positions::LASTCHAR)
+ indices[j++] = key_pos;
+ }
+ if (!(j == nindices))
+ abort ();
+ }
/* Perform several rounds of searching for a good alpha increment.
Each round reduces the number of artificial collisions by adding
@@ -670,32 +649,16 @@ Search::find_alpha_inc ()
{
/* Print the result. */
fprintf (stderr, "\nComputed alpha increments: ");
- if (option[ALLCHARS])
- {
- bool first = true;
- for (unsigned int j = 0; j < nindices; j++)
- if (current[indices[j]] != 0)
- {
- if (!first)
- fprintf (stderr, ", ");
- fprintf (stderr, "%u:+%u",
- indices[j] + 1, current[indices[j]]);
- first = false;
- }
- }
- else
- {
- bool first = true;
- for (unsigned int j = nindices; j-- > 0; )
- if (current[indices[j]] != 0)
- {
- if (!first)
- fprintf (stderr, ", ");
- fprintf (stderr, "%u:+%u",
- indices[j] + 1, current[indices[j]]);
- first = false;
- }
- }
+ bool first = true;
+ for (unsigned int j = nindices; j-- > 0; )
+ if (current[indices[j]] != 0)
+ {
+ if (!first)
+ fprintf (stderr, ", ");
+ fprintf (stderr, "%u:+%u",
+ indices[j] + 1, current[indices[j]]);
+ first = false;
+ }
fprintf (stderr, "\n");
}
}
@@ -713,8 +676,7 @@ Search::prepare ()
KeywordExt_List *temp;
/* Initialize each keyword's _selchars array. */
- init_selchars_multiset(option[ALLCHARS], _key_positions,
- _alpha_unify, _alpha_inc);
+ init_selchars_multiset(_key_positions, _alpha_unify, _alpha_inc);
/* Check for duplicates, i.e. keywords with the same _selchars array
(and - if !option[NOLENGTH] - also the same length).
@@ -831,7 +793,7 @@ Search::max_key_length () const
int
Search::get_max_keysig_size () const
{
- return option[ALLCHARS] ? _max_key_len : _key_positions.get_size();
+ return _key_positions.is_useall() ? _max_key_len : _key_positions.get_size();
}
/* ---------------------- Finding good asso_values[] ----------------------- */
diff --git a/src/search.h b/src/search.h
index 0cd2829..9d14aec 100644
--- a/src/search.h
+++ b/src/search.h
@@ -50,7 +50,7 @@ private:
unsigned int * compute_alpha_unify () const;
/* Initializes each keyword's _selchars array. */
- void init_selchars_tuple (bool use_all_chars, const Positions& positions) const;
+ void init_selchars_tuple (const Positions& positions) const;
/* Deletes each keyword's _selchars array. */
void delete_selchars () const;
@@ -67,7 +67,7 @@ private:
unsigned int * compute_alpha_unify (const Positions& positions, const unsigned int *alpha_inc) const;
/* Initializes each keyword's _selchars array. */
- void init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const;
+ void init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const;
/* Count the duplicate keywords that occur with the given set of positions
and a given alpha_inc[] array. */
diff --git a/tests/chill.exp b/tests/chill.exp
index 089843f..4d90f67 100644
--- a/tests/chill.exp
+++ b/tests/chill.exp
@@ -55,7 +55,6 @@ hash (str, len)
switch (hval)
{
default:
- case 30:
hval += asso_values[(unsigned char)str[29]];
case 29:
hval += asso_values[(unsigned char)str[28]];