diff options
Diffstat (limited to 'src/keyword.cc')
-rw-r--r-- | src/keyword.cc | 100 |
1 files changed, 49 insertions, 51 deletions
diff --git a/src/keyword.cc b/src/keyword.cc index fc819b6..fbf1a0a 100644 --- a/src/keyword.cc +++ b/src/keyword.cc @@ -48,64 +48,62 @@ static inline void sort_char_set (unsigned int *base, int len) } /* Initializes selchars and selchars_length. - The hash function will be computed as - asso_values[allchars[key_pos[0]]] + asso_values[allchars[key_pos[1]]] + ... - We compute selchars as the multiset - { allchars[key_pos[0]], allchars[key_pos[1]], ... } - so that the hash function becomes - asso_values[selchars[0]] + asso_values[selchars[1]] + ... + + General idea: + The hash function will be computed as + asso_values[allchars[key_pos[0]]] + + asso_values[allchars[key_pos[1]]] + ... + We compute selchars as the multiset + { allchars[key_pos[0]], allchars[key_pos[1]], ... } + so that the hash function becomes + asso_values[selchars[0]] + asso_values[selchars[1]] + ... Furthermore we sort the selchars array, to ease detection of duplicates later. + + More in detail: The arguments alpha_unify (used for case-insensitive + hash functions) and alpha_inc (used to disambiguate permutations) + apply slight modifications. The hash function will be computed as + sum (j=0,1,...: k = key_pos[j]: + asso_values[alpha_unify[allchars[k]+alpha_inc[k]]]) + + (allchars_length if !option[NOLENGTH], 0 otherwise). + We compute selchars as the multiset + { alpha_unify[allchars[k]+alpha_inc[k]] : j=0,1,..., k = key_pos[j] } + so that the hash function becomes + asso_values[selchars[0]] + asso_values[selchars[1]] + ... + + (allchars_length if !option[NOLENGTH], 0 otherwise). */ unsigned int * -KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) +KeywordExt::init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) { - const char *k = _allchars; - unsigned int *key_set = - new unsigned int[(use_all_chars ? _allchars_length : positions.get_size ())]; + /* Iterate through the list of positions, initializing selchars + (via ptr). */ + PositionIterator iter = positions.iterator(_allchars_length); + + unsigned int *key_set = new unsigned int[iter.remaining()]; unsigned int *ptr = key_set; - if (use_all_chars) - /* Use all the character positions in the KEY. */ - for (int i = _allchars_length; i > 0; k++, i--) - { - unsigned int c = static_cast<unsigned char>(*k); - if (alpha_inc) - c += alpha_inc[k-_allchars]; - if (alpha_unify) - c = alpha_unify[c]; - *ptr = c; - ptr++; - } - else - /* Only use those character positions specified by the user. */ + for (int i; (i = iter.next ()) != PositionIterator::EOS; ) { - /* Iterate through the list of key_positions, initializing selchars - (via ptr). */ - PositionIterator iter (positions); - - for (int i; (i = iter.next ()) != PositionIterator::EOS; ) + unsigned int c; + if (i == Positions::LASTCHAR) + /* Special notation for last KEY position, i.e. '$'. */ + c = static_cast<unsigned char>(_allchars[_allchars_length - 1]); + else if (i < _allchars_length) { - unsigned int c; - if (i == Positions::LASTCHAR) - /* Special notation for last KEY position, i.e. '$'. */ - c = static_cast<unsigned char>(_allchars[_allchars_length - 1]); - else if (i < _allchars_length) - { - /* Within range of KEY length, so we'll keep it. */ - c = static_cast<unsigned char>(_allchars[i]); - if (alpha_inc) - c += alpha_inc[i]; - } - else - /* Out of range of KEY length, so we'll just skip it. */ - continue; - if (alpha_unify) - c = alpha_unify[c]; - *ptr = c; - ptr++; + /* Within range of KEY length, so we'll keep it. */ + c = static_cast<unsigned char>(_allchars[i]); + if (alpha_inc) + c += alpha_inc[i]; } + else + /* Out of range of KEY length, the iterator should not have + produced this. */ + abort (); + if (alpha_unify) + c = alpha_unify[c]; + *ptr = c; + ptr++; } _selchars = key_set; @@ -115,16 +113,16 @@ KeywordExt::init_selchars_low (bool use_all_chars, const Positions& positions, c } void -KeywordExt::init_selchars_tuple (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify) +KeywordExt::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) { - init_selchars_low (use_all_chars, positions, alpha_unify, NULL); + init_selchars_low (positions, alpha_unify, NULL); } void -KeywordExt::init_selchars_multiset (bool use_all_chars, const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) +KeywordExt::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) { unsigned int *selchars = - init_selchars_low (use_all_chars, positions, alpha_unify, alpha_inc); + init_selchars_low (positions, alpha_unify, alpha_inc); /* Sort the selchars elements alphabetically. */ sort_char_set (selchars, _selchars_length); |