summaryrefslogtreecommitdiff
path: root/src/keyword.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/keyword.cc')
-rw-r--r--src/keyword.cc100
1 files changed, 49 insertions, 51 deletions
diff --git a/src/keyword.cc b/src/keyword.cc
index fc819b6..fbf1a0a 100644
--- a/src/keyword.cc
+++ b/src/keyword.cc
@@ -48,64 +48,62 @@ static inline void sort_char_set (unsigned int *base, int len)
}
/* Initializes selchars and selchars_length.
- The hash function will be computed as
- asso_values[allchars[key_pos[0]]] + asso_values[allchars[key_pos[1]]] + ...
- We compute selchars as the multiset
- { allchars[key_pos[0]], allchars[key_pos[1]], ... }
- so that the hash function becomes
- asso_values[selchars[0]] + asso_values[selchars[1]] + ...
+
+ General idea:
+ The hash function will be computed as
+ asso_values[allchars[key_pos[0]]] +
+ asso_values[allchars[key_pos[1]]] + ...
+ We compute selchars as the multiset
+ { allchars[key_pos[0]], allchars[key_pos[1]], ... }
+ so that the hash function becomes
+ asso_values[selchars[0]] + asso_values[selchars[1]] + ...
Furthermore we sort the selchars array, to ease detection of duplicates
later.
+
+ More in detail: The arguments alpha_unify (used for case-insensitive
+ hash functions) and alpha_inc (used to disambiguate permutations)
+ apply slight modifications. The hash function will be computed as
+ sum (j=0,1,...: k = key_pos[j]:
+ asso_values[alpha_unify[allchars[k]+alpha_inc[k]]])
+ + (allchars_length if !option[NOLENGTH], 0 otherwise).
+ We compute selchars as the multiset
+ { alpha_unify[allchars[k]+alpha_inc[k]] : j=0,1,..., k = key_pos[j] }
+ so that the hash function becomes
+ asso_values[selchars[0]] + asso_values[selchars[1]] + ...
+ + (allchars_length if !option[NOLENGTH], 0 otherwise).
*/
unsigned int *
-KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
+KeywordExt::init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
{
- const char *k = _allchars;
- unsigned int *key_set =
- new unsigned int[(use_all_chars ? _allchars_length : positions.get_size ())];
+ /* Iterate through the list of positions, initializing selchars
+ (via ptr). */
+ PositionIterator iter = positions.iterator(_allchars_length);
+
+ unsigned int *key_set = new unsigned int[iter.remaining()];
unsigned int *ptr = key_set;
- if (use_all_chars)
- /* Use all the character positions in the KEY. */
- for (int i = _allchars_length; i > 0; k++, i--)
- {
- unsigned int c = static_cast<unsigned char>(*k);
- if (alpha_inc)
- c += alpha_inc[k-_allchars];
- if (alpha_unify)
- c = alpha_unify[c];
- *ptr = c;
- ptr++;
- }
- else
- /* Only use those character positions specified by the user. */
+ for (int i; (i = iter.next ()) != PositionIterator::EOS; )
{
- /* Iterate through the list of key_positions, initializing selchars
- (via ptr). */
- PositionIterator iter (positions);
-
- for (int i; (i = iter.next ()) != PositionIterator::EOS; )
+ unsigned int c;
+ if (i == Positions::LASTCHAR)
+ /* Special notation for last KEY position, i.e. '$'. */
+ c = static_cast<unsigned char>(_allchars[_allchars_length - 1]);
+ else if (i < _allchars_length)
{
- unsigned int c;
- if (i == Positions::LASTCHAR)
- /* Special notation for last KEY position, i.e. '$'. */
- c = static_cast<unsigned char>(_allchars[_allchars_length - 1]);
- else if (i < _allchars_length)
- {
- /* Within range of KEY length, so we'll keep it. */
- c = static_cast<unsigned char>(_allchars[i]);
- if (alpha_inc)
- c += alpha_inc[i];
- }
- else
- /* Out of range of KEY length, so we'll just skip it. */
- continue;
- if (alpha_unify)
- c = alpha_unify[c];
- *ptr = c;
- ptr++;
+ /* Within range of KEY length, so we'll keep it. */
+ c = static_cast<unsigned char>(_allchars[i]);
+ if (alpha_inc)
+ c += alpha_inc[i];
}
+ else
+ /* Out of range of KEY length, the iterator should not have
+ produced this. */
+ abort ();
+ if (alpha_unify)
+ c = alpha_unify[c];
+ *ptr = c;
+ ptr++;
}
_selchars = key_set;
@@ -115,16 +113,16 @@ KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, c
}
void
-KeywordExt::init_selchars_tuple (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify)
+KeywordExt::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify)
{
- init_selchars_low (use_all_chars, positions, alpha_unify, NULL);
+ init_selchars_low (positions, alpha_unify, NULL);
}
void
-KeywordExt::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
+KeywordExt::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc)
{
unsigned int *selchars =
- init_selchars_low (use_all_chars, positions, alpha_unify, alpha_inc);
+ init_selchars_low (positions, alpha_unify, alpha_inc);
/* Sort the selchars elements alphabetically. */
sort_char_set (selchars, _selchars_length);