From 2535f34494011d57d9603d6e44604d357f83a4b0 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Mon, 7 Apr 2003 09:50:11 +0000 Subject: Move the handling of ALLCHARS (-k'*') into the Positions class. --- ChangeLog | 52 +++++++++++++++++ src/keyword.cc | 100 ++++++++++++++++---------------- src/keyword.h | 6 +- src/options.cc | 7 ++- src/options.h | 14 ++--- src/options.icc | 3 +- src/output.cc | 30 +--------- src/output.h | 2 +- src/positions.cc | 63 +++++++++++--------- src/positions.h | 44 ++++++++++++-- src/positions.icc | 139 ++++++++++++++++++++++++++++++++++++++++++-- src/search.cc | 170 +++++++++++++++++++++--------------------------------- src/search.h | 4 +- tests/chill.exp | 1 - 14 files changed, 396 insertions(+), 239 deletions(-) diff --git a/ChangeLog b/ChangeLog index e4f2495..ffed0de 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,55 @@ +2002-12-12 Bruno Haible + + * src/positions.h (Positions::is_useall, Positions::set_useall, + Positions::iterator, Positions::reviterator): New method declarations. + (Positions::_useall): New field. + (PositionIterator): Make constructor private. Add a constructor and a + copy constructor. + (PositionIterator::remaining): New declaration. + (PositionReverseIterator): Make constructor private. Add a constructor + and a copy constructor. + (PositionReverseIterator::remaining): New declaration. + (PositionReverseIterator::_minindex): New field. + * src/positions.icc (Positions::Positions): Initialize _useall. + (Positions::operator=): Likewise. + (Positions::is_useall, Positions::set_useall): New methods. + (Positions::sort): Do nothing if _useall is set. + (Positions::iterator, Positions::reviterator): New methods. + (PositionIterator::PositionIterator): New constructor. + (PositionIterator::remaining): New method. + (PositionReverseIterator::PositionReverseIterator): New constructor. + (PositionReverseIterator::next): Use _minindex as bound. + (PositionReverseIterator::remaining): New method. + * src/positions.cc (Positions::add, Positions::remove): Reset the + useall flag. + (Positions::print): Handle the useall case. + * src/options.h (ALLCHARS): Remove. + * src/options.cc (Options::~Options): Update. + (Options::parse_options): Use Positions::set_useall(). + * src/keyword.h (KeywordExt::init_selchars_tuple, + KeywordExt::init_selchars_multiset, KeywordExt::init_selchars_low): + Remove use_all_chars argument. + * src/keyword.cc (KeywordExt::init_selchars_low): Remove use_all_chars + argument. Tell the position iterator to stop at _allchars_length. + Remove special case code for -k'*'. + (KeywordExt::init_selchars_tuple, KeywordExt::init_selchars_multiset): + Remove use_all_chars argument. + * src/search.h (Search::init_selchars_tuple): Remove use_all_chars + argument. + (Search::init_selchars_multiset): Likewise. + * src/search.cc (Search::init_selchars_tuple): Remove use_all_chars + argument. + (Search::count_duplicates_tuple, Search::find_positions): Update. + (Search::compute_alpha_unify): Remove special case code for -k'*'. + (Search::init_selchars_multiset): Remove use_all_chars argument. + (Search::count_duplicates_multiset): Update. + (Search::find_alpha_inc): Remove special case code for -k'*'. + (Search::prepare): Update. + (Search::get_max_keysig_size): Update. + * src/output.cc (Output::output_hash_function): Remove special case + code for -k'*'. + * tests/chill.exp: Regenerated. + 2002-12-11 Bruno Haible Change the positions to be 0-based, instead of 1-based. diff --git a/src/keyword.cc b/src/keyword.cc index fc819b6..fbf1a0a 100644 --- a/src/keyword.cc +++ b/src/keyword.cc @@ -48,64 +48,62 @@ static inline void sort_char_set (unsigned int *base, int len) } /* Initializes selchars and selchars_length. - The hash function will be computed as - asso_values[allchars[key_pos[0]]] + asso_values[allchars[key_pos[1]]] + ... - We compute selchars as the multiset - { allchars[key_pos[0]], allchars[key_pos[1]], ... } - so that the hash function becomes - asso_values[selchars[0]] + asso_values[selchars[1]] + ... + + General idea: + The hash function will be computed as + asso_values[allchars[key_pos[0]]] + + asso_values[allchars[key_pos[1]]] + ... + We compute selchars as the multiset + { allchars[key_pos[0]], allchars[key_pos[1]], ... } + so that the hash function becomes + asso_values[selchars[0]] + asso_values[selchars[1]] + ... Furthermore we sort the selchars array, to ease detection of duplicates later. + + More in detail: The arguments alpha_unify (used for case-insensitive + hash functions) and alpha_inc (used to disambiguate permutations) + apply slight modifications. The hash function will be computed as + sum (j=0,1,...: k = key_pos[j]: + asso_values[alpha_unify[allchars[k]+alpha_inc[k]]]) + + (allchars_length if !option[NOLENGTH], 0 otherwise). + We compute selchars as the multiset + { alpha_unify[allchars[k]+alpha_inc[k]] : j=0,1,..., k = key_pos[j] } + so that the hash function becomes + asso_values[selchars[0]] + asso_values[selchars[1]] + ... + + (allchars_length if !option[NOLENGTH], 0 otherwise). */ unsigned int * -KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) +KeywordExt::init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) { - const char *k = _allchars; - unsigned int *key_set = - new unsigned int[(use_all_chars ? _allchars_length : positions.get_size ())]; + /* Iterate through the list of positions, initializing selchars + (via ptr). */ + PositionIterator iter = positions.iterator(_allchars_length); + + unsigned int *key_set = new unsigned int[iter.remaining()]; unsigned int *ptr = key_set; - if (use_all_chars) - /* Use all the character positions in the KEY. */ - for (int i = _allchars_length; i > 0; k++, i--) - { - unsigned int c = static_cast(*k); - if (alpha_inc) - c += alpha_inc[k-_allchars]; - if (alpha_unify) - c = alpha_unify[c]; - *ptr = c; - ptr++; - } - else - /* Only use those character positions specified by the user. */ + for (int i; (i = iter.next ()) != PositionIterator::EOS; ) { - /* Iterate through the list of key_positions, initializing selchars - (via ptr). */ - PositionIterator iter (positions); - - for (int i; (i = iter.next ()) != PositionIterator::EOS; ) + unsigned int c; + if (i == Positions::LASTCHAR) + /* Special notation for last KEY position, i.e. '$'. */ + c = static_cast(_allchars[_allchars_length - 1]); + else if (i < _allchars_length) { - unsigned int c; - if (i == Positions::LASTCHAR) - /* Special notation for last KEY position, i.e. '$'. */ - c = static_cast(_allchars[_allchars_length - 1]); - else if (i < _allchars_length) - { - /* Within range of KEY length, so we'll keep it. */ - c = static_cast(_allchars[i]); - if (alpha_inc) - c += alpha_inc[i]; - } - else - /* Out of range of KEY length, so we'll just skip it. */ - continue; - if (alpha_unify) - c = alpha_unify[c]; - *ptr = c; - ptr++; + /* Within range of KEY length, so we'll keep it. */ + c = static_cast(_allchars[i]); + if (alpha_inc) + c += alpha_inc[i]; } + else + /* Out of range of KEY length, the iterator should not have + produced this. */ + abort (); + if (alpha_unify) + c = alpha_unify[c]; + *ptr = c; + ptr++; } _selchars = key_set; @@ -115,16 +113,16 @@ KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, c } void -KeywordExt::init_selchars_tuple (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify) +KeywordExt::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) { - init_selchars_low (use_all_chars, positions, alpha_unify, NULL); + init_selchars_low (positions, alpha_unify, NULL); } void -KeywordExt::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) +KeywordExt::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) { unsigned int *selchars = - init_selchars_low (use_all_chars, positions, alpha_unify, alpha_inc); + init_selchars_low (positions, alpha_unify, alpha_inc); /* Sort the selchars elements alphabetically. */ sort_char_set (selchars, _selchars_length); diff --git a/src/keyword.h b/src/keyword.h index 9f09186..c0e2156 100644 --- a/src/keyword.h +++ b/src/keyword.h @@ -68,9 +68,9 @@ struct KeywordExt : public Keyword /* Methods depending on the keyposition list. */ /* Initializes selchars and selchars_length, without reordering. */ - void init_selchars_tuple (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify); + void init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify); /* Initializes selchars and selchars_length, with reordering. */ - void init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc); + void init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc); /* Deletes selchars. */ void delete_selchars (); @@ -81,7 +81,7 @@ struct KeywordExt : public Keyword int _final_index; private: - unsigned int * init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc); + unsigned int * init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc); }; /* An abstract factory for creating Keyword instances. diff --git a/src/options.cc b/src/options.cc index 6e8793e..8880858 100644 --- a/src/options.cc +++ b/src/options.cc @@ -514,14 +514,14 @@ Options::~Options () _function_name, _hash_name, _wordlist_name, _slot_name, _initializer_suffix, _asso_iterations, _jump, _size_multiple, _initial_asso_value, _delimiters, _total_switches); - if (_option_word & ALLCHARS) + if (_key_positions.is_useall()) fprintf (stderr, "all characters are used in the hash function\n"); else { fprintf (stderr, "maximum keysig size = %d\nkey positions are: \n", _key_positions.get_size()); - PositionIterator iter (_key_positions); + PositionIterator iter = _key_positions.iterator(); for (int pos; (pos = iter.next()) != PositionIterator::EOS; ) if (pos == Positions::LASTCHAR) fprintf (stderr, "$\n"); @@ -773,9 +773,10 @@ Options::parse_options (int argc, char *argv[]) PositionStringParser sparser (/*getopt*/optarg, 1, Positions::MAX_KEY_POS, Positions::LASTCHAR, BAD_VALUE, EOS); if (/*getopt*/optarg [0] == '*') /* Use all the characters for hashing!!!! */ - _option_word |= ALLCHARS; + _key_positions.set_useall(true); else { + _key_positions.set_useall(false); int *key_positions = _key_positions.pointer(); int *key_pos; diff --git a/src/options.h b/src/options.h index 7eb7fe1..0bde55c 100644 --- a/src/options.h +++ b/src/options.h @@ -96,22 +96,19 @@ enum Option_Type /* Use the given key positions. */ POSITIONS = 1 << 16, - /* Use all characters in hash function. */ - ALLCHARS = 1 << 17, - /* Handle duplicate hash values for keywords. */ - DUP = 1 << 18, + DUP = 1 << 17, /* Don't include keyword length in hash computations. */ - NOLENGTH = 1 << 19, + NOLENGTH = 1 << 18, /* Randomly initialize the associated values table. */ - RANDOM = 1 << 20, + RANDOM = 1 << 19, /* --- Informative output --- */ /* Enable debugging (prints diagnostics to stderr). */ - DEBUG = 1 << 21 + DEBUG = 1 << 20 }; /* Class manager for gperf program Options. */ @@ -197,8 +194,7 @@ public: /* Sets the delimiters string, if not already set. */ void set_delimiters (const char *delimiters); - /* Returns key positions. - Only to be used if !options[ALLCHARS]. */ + /* Returns key positions. */ const Positions& get_key_positions () const; private: diff --git a/src/options.icc b/src/options.icc index a9295a0..f519133 100644 --- a/src/options.icc +++ b/src/options.icc @@ -135,8 +135,7 @@ Options::get_delimiters () const return _delimiters; } -/* Returns key positions. - Only to be used if !options[ALLCHARS]. */ +/* Returns key positions. */ INLINE const Positions& Options::get_key_positions () const { diff --git a/src/output.cc b/src/output.cc index 05ac35a..94e5952 100644 --- a/src/output.cc +++ b/src/output.cc @@ -606,7 +606,7 @@ Output::output_hash_function () const printf ("{\n"); /* First the asso_values array. */ - if (option[ALLCHARS] || _key_positions.get_size() > 0) + if (_key_positions.get_size() > 0) { printf (" static %s%s asso_values[] =\n" " {", @@ -633,31 +633,7 @@ Output::output_hash_function () const " };\n"); } - if (option[ALLCHARS]) - { - /* User wants *all* characters considered in hash. */ - printf (" register int hval = %s;\n\n" - " switch (%s)\n" - " {\n" - " default:\n", - option[NOLENGTH] ? "0" : "len", - option[NOLENGTH] ? "len" : "hval"); - - for (int i = _max_key_len; i > 0; i--) - { - printf (" case %d:\n" - " hval += asso_values[%sstr[%d]", - i, char_to_index, i - 1); - if (_alpha_inc[i - 1]) - printf ("+%u", _alpha_inc[i - 1]); - printf ("];\n"); - } - - printf (" break;\n" - " }\n" - " return hval;\n"); - } - else if (_key_positions.get_size() == 0) + if (_key_positions.get_size() == 0) { /* Trivial case: No key positions at all. */ printf (" return %s;\n", @@ -668,7 +644,7 @@ Output::output_hash_function () const /* Iterate through the key positions. Remember that Positions::sort() has sorted them in decreasing order, with Positions::LASTCHAR coming last. */ - PositionIterator iter (_key_positions); + PositionIterator iter = _key_positions.iterator(_max_key_len); int key_pos; /* Get the highest key position. */ diff --git a/src/output.h b/src/output.h index 843fdad..780b4f1 100644 --- a/src/output.h +++ b/src/output.h @@ -119,7 +119,7 @@ private: int const _max_key_len; /* Minimum length of the shortest keyword. */ int const _min_key_len; - /* Key positions. Only to be used if !options[ALLCHARS]. */ + /* Key positions. */ Positions const _key_positions; /* Adjustments to add to bytes add specific key positions. */ const unsigned int * const _alpha_inc; diff --git a/src/positions.cc b/src/positions.cc index 2a096c1..f6439e3 100644 --- a/src/positions.cc +++ b/src/positions.cc @@ -50,6 +50,8 @@ Positions::contains (int pos) const void Positions::add (int pos) { + set_useall (false); + unsigned int count = _size; if (count == MAX_SIZE) @@ -78,6 +80,8 @@ Positions::add (int pos) void Positions::remove (int pos) { + set_useall (false); + unsigned int count = _size; if (count > 0) { @@ -120,40 +124,45 @@ Positions::remove (int pos) void Positions::print () const { - bool first = true; - bool seen_LASTCHAR = false; - unsigned int count = _size; - const int *p = _positions + _size - 1; - - for (; count > 0; p--) + if (_useall) + printf ("*"); + else { - count--; - if (*p == LASTCHAR) - seen_LASTCHAR = true; - else + bool first = true; + bool seen_LASTCHAR = false; + unsigned int count = _size; + const int *p = _positions + _size - 1; + + for (; count > 0; p--) { - if (!first) - printf (","); - printf ("%d", *p + 1); - if (count > 0 && p[-1] == *p + 1) + count--; + if (*p == LASTCHAR) + seen_LASTCHAR = true; + else { - printf ("-"); - do + if (!first) + printf (","); + printf ("%d", *p + 1); + if (count > 0 && p[-1] == *p + 1) { - p--; - count--; + printf ("-"); + do + { + p--; + count--; + } + while (count > 0 && p[-1] == *p + 1); + printf ("%d", *p + 1); } - while (count > 0 && p[-1] == *p + 1); - printf ("%d", *p + 1); + first = false; } - first = false; } - } - if (seen_LASTCHAR) - { - if (!first) - printf (","); - printf ("$"); + if (seen_LASTCHAR) + { + if (!first) + printf (","); + printf ("$"); + } } } diff --git a/src/positions.h b/src/positions.h index 984195a..d16c214 100644 --- a/src/positions.h +++ b/src/positions.h @@ -57,10 +57,12 @@ public: Positions& operator= (const Positions& src); /* Accessors. */ + bool is_useall () const; int operator[] (unsigned int index) const; unsigned int get_size () const; /* Write access. */ + void set_useall (bool useall); int * pointer (); void set_size (unsigned int size); @@ -68,6 +70,17 @@ public: Returns true if there are no duplicates, false otherwise. */ bool sort (); + /* Creates an iterator, returning the positions in descending order. */ + PositionIterator iterator () const; + /* Creates an iterator, returning the positions in descending order, + that apply to strings of length <= maxlen. */ + PositionIterator iterator (int maxlen) const; + /* Creates an iterator, returning the positions in ascending order. */ + PositionReverseIterator reviterator () const; + /* Creates an iterator, returning the positions in ascending order, + that apply to strings of length <= maxlen. */ + PositionReverseIterator reviterator (int maxlen) const; + /* Set operations. Assumes the array is in reverse order. */ bool contains (int pos) const; void add (int pos); @@ -77,6 +90,8 @@ public: void print () const; private: + /* The special case denoted by '*'. */ + bool _useall; /* Number of positions. */ unsigned int _size; /* Array of positions. 0 for the first char, 1 for the second char etc., @@ -88,9 +103,10 @@ private: class PositionIterator { + friend class Positions; public: - /* Initializes an iterator through POSITIONS. */ - PositionIterator (Positions const& positions); + /* Copy constructor. */ + PositionIterator (const PositionIterator& src); /* End of iteration marker. */ enum { EOS = -2 }; @@ -98,7 +114,16 @@ public: /* Retrieves the next position, or EOS past the end. */ int next (); + /* Returns the number of remaining positions, i.e. how often next() will + return a value != EOS. */ + unsigned int remaining () const; + private: + /* Initializes an iterator through POSITIONS. */ + PositionIterator (Positions const& positions); + /* Initializes an iterator through POSITIONS, ignoring positions >= maxlen. */ + PositionIterator (Positions const& positions, int maxlen); + const Positions& _set; unsigned int _index; }; @@ -108,9 +133,10 @@ private: class PositionReverseIterator { + friend class Positions; public: - /* Initializes an iterator through POSITIONS. */ - PositionReverseIterator (Positions const& positions); + /* Copy constructor. */ + PositionReverseIterator (const PositionReverseIterator& src); /* End of iteration marker. */ enum { EOS = -2 }; @@ -118,9 +144,19 @@ public: /* Retrieves the next position, or EOS past the end. */ int next (); + /* Returns the number of remaining positions, i.e. how often next() will + return a value != EOS. */ + unsigned int remaining () const; + private: + /* Initializes an iterator through POSITIONS. */ + PositionReverseIterator (Positions const& positions); + /* Initializes an iterator through POSITIONS, ignoring positions >= maxlen. */ + PositionReverseIterator (Positions const& positions, int maxlen); + const Positions& _set; unsigned int _index; + unsigned int _minindex; }; #ifdef __OPTIMIZE__ diff --git a/src/positions.icc b/src/positions.icc index 32af6b1..ca9347d 100644 --- a/src/positions.icc +++ b/src/positions.icc @@ -30,20 +30,23 @@ INLINE Positions::Positions () - : _size (0) + : _useall (false), + _size (0) { } INLINE Positions::Positions (int pos1) - : _size (1) + : _useall (false), + _size (1) { _positions[0] = pos1; } INLINE Positions::Positions (int pos1, int pos2) - : _size (2) + : _useall (false), + _size (2) { _positions[0] = pos1; _positions[1] = pos2; @@ -53,7 +56,8 @@ Positions::Positions (int pos1, int pos2) INLINE Positions::Positions (const Positions& src) - : _size (src._size) + : _useall (src._useall), + _size (src._size) { memcpy (_positions, src._positions, _size * sizeof (_positions[0])); } @@ -63,6 +67,7 @@ Positions::Positions (const Positions& src) INLINE Positions& Positions::operator= (const Positions& src) { + _useall = src._useall; _size = src._size; memcpy (_positions, src._positions, _size * sizeof (_positions[0])); return *this; @@ -70,6 +75,12 @@ Positions::operator= (const Positions& src) /* Accessors. */ +INLINE bool +Positions::is_useall () const +{ + return _useall; +} + INLINE int Positions::operator[] (unsigned int index) const { @@ -84,6 +95,20 @@ Positions::get_size () const /* Write access. */ +INLINE void +Positions::set_useall (bool useall) +{ + _useall = useall; + if (useall) + { + /* The positions are 0, 1, ..., MAX_KEY_POS-1, in descending order. */ + _size = MAX_KEY_POS; + int *ptr = _positions; + for (int i = MAX_KEY_POS - 1; i >= 0; i--) + *ptr++ = i; + } +} + INLINE int * Positions::pointer () { @@ -101,6 +126,9 @@ Positions::set_size (unsigned int size) INLINE bool Positions::sort () { + if (_useall) + return true; + /* Bubble sort. */ bool duplicate_free = true; int *base = _positions; @@ -121,6 +149,36 @@ Positions::sort () return duplicate_free; } +/* Creates an iterator, returning the positions in descending order. */ +INLINE PositionIterator +Positions::iterator () const +{ + return PositionIterator (*this); +} + +/* Creates an iterator, returning the positions in descending order, + that apply to strings of length <= maxlen. */ +INLINE PositionIterator +Positions::iterator (int maxlen) const +{ + return PositionIterator (*this, maxlen); +} + +/* Creates an iterator, returning the positions in ascending order. */ +INLINE PositionReverseIterator +Positions::reviterator () const +{ + return PositionReverseIterator (*this); +} + +/* Creates an iterator, returning the positions in ascending order, + that apply to strings of length <= maxlen. */ +INLINE PositionReverseIterator +Positions::reviterator (int maxlen) const +{ + return PositionReverseIterator (*this, maxlen); +} + /* ------------------------- Class PositionIterator ------------------------ */ /* Initializes an iterator through POSITIONS. */ @@ -131,6 +189,24 @@ PositionIterator::PositionIterator (Positions const& positions) { } +/* Initializes an iterator through POSITIONS, ignoring positions >= maxlen. */ +INLINE +PositionIterator::PositionIterator (Positions const& positions, int maxlen) + : _set (positions) +{ + if (positions._useall) + _index = (maxlen <= Positions::MAX_KEY_POS ? Positions::MAX_KEY_POS - maxlen : 0); + else + { + unsigned int index; + for (index = 0; + index < positions._size && positions._positions[index] >= maxlen; + index++) + ; + _index = index; + } +} + /* Retrieves the next position, or EOS past the end. */ INLINE int PositionIterator::next () @@ -138,19 +214,72 @@ PositionIterator::next () return (_index < _set._size ? _set._positions[_index++] : EOS); } +/* Returns the number of remaining positions, i.e. how often next() will + return a value != EOS. */ +INLINE unsigned int +PositionIterator::remaining () const +{ + return _set._size - _index; +} + +/* Copy constructor. */ +INLINE +PositionIterator::PositionIterator (const PositionIterator& src) + : _set (src._set), + _index (src._index) +{ +} + /* --------------------- Class PositionReverseIterator --------------------- */ /* Initializes an iterator through POSITIONS. */ INLINE PositionReverseIterator::PositionReverseIterator (Positions const& positions) + : _set (positions), + _index (_set._size), + _minindex (0) +{ +} + +/* Initializes an iterator through POSITIONS, ignoring positions >= maxlen. */ +INLINE +PositionReverseIterator::PositionReverseIterator (Positions const& positions, int maxlen) : _set (positions), _index (_set._size) { + if (positions._useall) + _minindex = (maxlen <= Positions::MAX_KEY_POS ? Positions::MAX_KEY_POS - maxlen : 0); + else + { + unsigned int index; + for (index = 0; + index < positions._size && positions._positions[index] >= maxlen; + index++) + ; + _minindex = index; + } } /* Retrieves the next position, or EOS past the end. */ INLINE int PositionReverseIterator::next () { - return (_index > 0 ? _set._positions[--_index] : EOS); + return (_index > _minindex ? _set._positions[--_index] : EOS); +} + +/* Returns the number of remaining positions, i.e. how often next() will + return a value != EOS. */ +INLINE unsigned int +PositionReverseIterator::remaining () const +{ + return _index - _minindex; +} + +/* Copy constructor. */ +INLINE +PositionReverseIterator::PositionReverseIterator (const PositionReverseIterator& src) + : _set (src._set), + _index (src._index), + _minindex (src._minindex) +{ } diff --git a/src/search.cc b/src/search.cc index a1b71a8..61fb5ef 100644 --- a/src/search.cc +++ b/src/search.cc @@ -175,10 +175,10 @@ Search::compute_alpha_unify () const /* Initializes each keyword's _selchars array. */ void -Search::init_selchars_tuple (bool use_all_chars, const Positions& positions) const +Search::init_selchars_tuple (const Positions& positions) const { for (KeywordExt_List *temp = _head; temp; temp = temp->rest()) - temp->first()->init_selchars_tuple(use_all_chars, positions, _alpha_unify); + temp->first()->init_selchars_tuple(positions, _alpha_unify); } /* Deletes each keyword's _selchars array. */ @@ -199,7 +199,7 @@ Search::count_duplicates_tuple (const Positions& positions) const /* Run through the keyword list and count the duplicates incrementally. The result does not depend on the order of the keyword list, thanks to the formula above. */ - init_selchars_tuple (option[ALLCHARS], positions); + init_selchars_tuple (positions); unsigned int count = 0; { @@ -411,7 +411,7 @@ Search::find_positions () { /* Print the result. */ fprintf (stderr, "\nComputed positions: "); - PositionReverseIterator iter (_key_positions); + PositionReverseIterator iter = _key_positions.reviterator(); bool seen_lastchar = false; bool first = true; for (int i; (i = iter.next ()) != PositionReverseIterator::EOS; ) @@ -482,49 +482,29 @@ Search::compute_alpha_unify (const Positions& positions, const unsigned int *alp { KeywordExt *keyword = temp->first(); - if (option[ALLCHARS]) - /* Iterate through all character positions. */ - for (int i = 0; i < keyword->_allchars_length; i++) - { - unsigned int c = static_cast(keyword->_allchars[i]); - if (c >= 'A' && c <= 'Z') - c += 'a' - 'A'; - if (c >= 'a' && c <= 'z') - { - c += alpha_inc[i]; - /* Unify c with c - ('a'-'A'). */ - unsigned int d = alpha_unify[c]; - unsigned int b = c - ('a'-'A'); - for (int a = b; a >= 0 && alpha_unify[a] == b; a -= ('a'-'A')) - alpha_unify[a] = d; - } - } - else - { - /* Iterate through the selected character positions. */ - PositionIterator iter (positions); + /* Iterate through the selected character positions. */ + PositionIterator iter = positions.iterator(keyword->_allchars_length); - for (int i; (i = iter.next ()) != PositionIterator::EOS; ) + for (int i; (i = iter.next ()) != PositionIterator::EOS; ) + { + unsigned int c; + if (i == Positions::LASTCHAR) + c = static_cast(keyword->_allchars[keyword->_allchars_length - 1]); + else if (i < keyword->_allchars_length) + c = static_cast(keyword->_allchars[i]); + else + abort (); + if (c >= 'A' && c <= 'Z') + c += 'a' - 'A'; + if (c >= 'a' && c <= 'z') { - unsigned int c; - if (i == Positions::LASTCHAR) - c = static_cast(keyword->_allchars[keyword->_allchars_length - 1]); - else if (i < keyword->_allchars_length) - c = static_cast(keyword->_allchars[i]); - else - continue; - if (c >= 'A' && c <= 'Z') - c += 'a' - 'A'; - if (c >= 'a' && c <= 'z') - { - if (i != Positions::LASTCHAR) - c += alpha_inc[i]; - /* Unify c with c - ('a'-'A'). */ - unsigned int d = alpha_unify[c]; - unsigned int b = c - ('a'-'A'); - for (int a = b; a >= 0 && alpha_unify[a] == b; a -= ('a'-'A')) - alpha_unify[a] = d; - } + if (i != Positions::LASTCHAR) + c += alpha_inc[i]; + /* Unify c with c - ('a'-'A'). */ + unsigned int d = alpha_unify[c]; + unsigned int b = c - ('a'-'A'); + for (int a = b; a >= 0 && alpha_unify[a] == b; a -= ('a'-'A')) + alpha_unify[a] = d; } } } @@ -537,10 +517,10 @@ Search::compute_alpha_unify (const Positions& positions, const unsigned int *alp /* Initializes each keyword's _selchars array. */ void -Search::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const +Search::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const { for (KeywordExt_List *temp = _head; temp; temp = temp->rest()) - temp->first()->init_selchars_multiset(use_all_chars, positions, alpha_unify, alpha_inc); + temp->first()->init_selchars_multiset(positions, alpha_unify, alpha_inc); } /* Count the duplicate keywords that occur with the given set of positions @@ -554,7 +534,7 @@ Search::count_duplicates_multiset (const unsigned int *alpha_inc) const /* Run through the keyword list and count the duplicates incrementally. The result does not depend on the order of the keyword list, thanks to the formula above. */ - init_selchars_multiset (option[ALLCHARS], _key_positions, + init_selchars_multiset (_key_positions, compute_alpha_unify (_key_positions, alpha_inc), alpha_inc); @@ -596,35 +576,34 @@ Search::find_alpha_inc () { /* Look which _alpha_inc[i] we are free to increment. */ unsigned int nindices; - if (option[ALLCHARS]) - nindices = _max_key_len; - else - { - /* Ignore Positions::LASTCHAR. Remember that since Positions are - sorted in decreasing order, Positions::LASTCHAR comes last. */ - nindices = (_key_positions.get_size() == 0 - || _key_positions[_key_positions.get_size() - 1] - != Positions::LASTCHAR - ? _key_positions.get_size() - : _key_positions.get_size() - 1); - } + { + nindices = 0; + PositionIterator iter = _key_positions.iterator(_max_key_len); + for (;;) + { + int key_pos = iter.next (); + if (key_pos == PositionIterator::EOS) + break; + if (key_pos != Positions::LASTCHAR) + nindices++; + } + } unsigned int indices[nindices]; - if (option[ALLCHARS]) - for (unsigned int j = 0; j < nindices; j++) - indices[j] = j; - else - { - PositionIterator iter (_key_positions); - for (unsigned int j = 0; j < nindices; j++) - { - int key_pos = iter.next (); - if (key_pos == PositionIterator::EOS - || key_pos == Positions::LASTCHAR) - abort (); - indices[j] = key_pos; - } - } + { + unsigned int j = 0; + PositionIterator iter = _key_positions.iterator(_max_key_len); + for (;;) + { + int key_pos = iter.next (); + if (key_pos == PositionIterator::EOS) + break; + if (key_pos != Positions::LASTCHAR) + indices[j++] = key_pos; + } + if (!(j == nindices)) + abort (); + } /* Perform several rounds of searching for a good alpha increment. Each round reduces the number of artificial collisions by adding @@ -670,32 +649,16 @@ Search::find_alpha_inc () { /* Print the result. */ fprintf (stderr, "\nComputed alpha increments: "); - if (option[ALLCHARS]) - { - bool first = true; - for (unsigned int j = 0; j < nindices; j++) - if (current[indices[j]] != 0) - { - if (!first) - fprintf (stderr, ", "); - fprintf (stderr, "%u:+%u", - indices[j] + 1, current[indices[j]]); - first = false; - } - } - else - { - bool first = true; - for (unsigned int j = nindices; j-- > 0; ) - if (current[indices[j]] != 0) - { - if (!first) - fprintf (stderr, ", "); - fprintf (stderr, "%u:+%u", - indices[j] + 1, current[indices[j]]); - first = false; - } - } + bool first = true; + for (unsigned int j = nindices; j-- > 0; ) + if (current[indices[j]] != 0) + { + if (!first) + fprintf (stderr, ", "); + fprintf (stderr, "%u:+%u", + indices[j] + 1, current[indices[j]]); + first = false; + } fprintf (stderr, "\n"); } } @@ -713,8 +676,7 @@ Search::prepare () KeywordExt_List *temp; /* Initialize each keyword's _selchars array. */ - init_selchars_multiset(option[ALLCHARS], _key_positions, - _alpha_unify, _alpha_inc); + init_selchars_multiset(_key_positions, _alpha_unify, _alpha_inc); /* Check for duplicates, i.e. keywords with the same _selchars array (and - if !option[NOLENGTH] - also the same length). @@ -831,7 +793,7 @@ Search::max_key_length () const int Search::get_max_keysig_size () const { - return option[ALLCHARS] ? _max_key_len : _key_positions.get_size(); + return _key_positions.is_useall() ? _max_key_len : _key_positions.get_size(); } /* ---------------------- Finding good asso_values[] ----------------------- */ diff --git a/src/search.h b/src/search.h index 0cd2829..9d14aec 100644 --- a/src/search.h +++ b/src/search.h @@ -50,7 +50,7 @@ private: unsigned int * compute_alpha_unify () const; /* Initializes each keyword's _selchars array. */ - void init_selchars_tuple (bool use_all_chars, const Positions& positions) const; + void init_selchars_tuple (const Positions& positions) const; /* Deletes each keyword's _selchars array. */ void delete_selchars () const; @@ -67,7 +67,7 @@ private: unsigned int * compute_alpha_unify (const Positions& positions, const unsigned int *alpha_inc) const; /* Initializes each keyword's _selchars array. */ - void init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const; + void init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const; /* Count the duplicate keywords that occur with the given set of positions and a given alpha_inc[] array. */ diff --git a/tests/chill.exp b/tests/chill.exp index 089843f..4d90f67 100644 --- a/tests/chill.exp +++ b/tests/chill.exp @@ -55,7 +55,6 @@ hash (str, len) switch (hval) { default: - case 30: hval += asso_values[(unsigned char)str[29]]; case 29: hval += asso_values[(unsigned char)str[28]]; -- cgit v1.2.1