diff options
author | Bruno Haible <bruno@clisp.org> | 2002-12-16 14:40:19 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2002-12-16 14:40:19 +0000 |
commit | 21cd7bfd2416e9afb5fc6ee8b0cd6b8b8990f773 (patch) | |
tree | e84b163131efa39e87e733775b4eb8cd0c1fd83c | |
parent | ebc7fe61881637eaeccbe18bebedd0f089c78029 (diff) | |
download | gperf-21cd7bfd2416e9afb5fc6ee8b0cd6b8b8990f773.tar.gz |
Use 'unsigned char' instead of 'char' in many places, to reduce casts.
-rw-r--r-- | ChangeLog | 25 | ||||
-rw-r--r-- | lib/hash.cc | 8 | ||||
-rw-r--r-- | lib/hash.h | 6 | ||||
-rw-r--r-- | src/Makefile.in | 2 | ||||
-rw-r--r-- | src/keyword.cc | 89 | ||||
-rw-r--r-- | src/keyword.h | 36 | ||||
-rw-r--r-- | src/keyword.icc | 42 | ||||
-rw-r--r-- | src/options.cc | 2 | ||||
-rw-r--r-- | src/search.cc | 55 | ||||
-rw-r--r-- | src/search.h | 6 |
10 files changed, 186 insertions, 85 deletions
@@ -1,5 +1,30 @@ 2002-11-02 Bruno Haible <bruno@clisp.org> + * lib/hashpjw.h (hashpjw): Change argument type to 'unsigned char *'. + * lib/hash.cc (hashpjw): Likewise. + * src/keyword.icc: New file. + * src/keyword.h: Include keyword.icc. + (KeywordExt::_selchars): Change type to 'unsigned char *'. + * src/keyword.cc: Include keyword.icc. + (Keyword::Keyword, KeywordExt::KeywordExt): Move to keyword.icc. + (sort_char_set): Change argument type to 'unsigned char *'. + (KeywordExt::init_selchars): Update. + * src/search.h (Search::compute_disjoint_union): Change argument types + to 'unsigned char *'. + (Search::sort_set): Likewise. + (Search::affects_prev): Change argument type to 'unsigned char'. + * src/search.cc (Search::prepare): Initialize _duplicate_link here. + (Search::get_occurrence, Search::set_determined, + Search::already_determined, Search::hash): Update. + (Search::compute_disjoint_union): Change argument types to + 'unsigned char *'. + (Search::sort_set): Likewise. + (Search::affects_prev): Change argument type to 'unsigned char'. + (Search::change): Update. + * src/Makefile.in (KEYWORD_H): Add keyword.icc. + + * src/options.cc (Options::parse_options): Fix error message. + * src/read-line.h (Read_Line::Read_Line): Make FILE* argument mandatory. Move body to read-line.icc. * src/read-line.icc (Read_Line::Read_Line): New constructor. diff --git a/lib/hash.cc b/lib/hash.cc index b5bb4ad..1795cee 100644 --- a/lib/hash.cc +++ b/lib/hash.cc @@ -1,6 +1,6 @@ /* -Copyright (C) 1990, 2000 Free Software Foundation - written by Doug Lea (dl@rocky.oswego.edu) +Copyright (C) 1990, 2000, 2002 Free Software Foundation + written by Doug Lea <dl@rocky.oswego.edu> */ #include <hash.h> @@ -12,14 +12,14 @@ Copyright (C) 1990, 2000 Free Software Foundation */ unsigned int -hashpjw (const char *x, unsigned int len) // From Dragon book, p436 +hashpjw (const unsigned char *x, unsigned int len) // From Dragon book, p436 { unsigned int h = 0; unsigned int g; for (; len > 0; len--) { - h = (h << 4) + (unsigned char) *x++; + h = (h << 4) + *x++; if ((g = h & 0xf0000000) != 0) h = (h ^ (g >> 24)) ^ g; } @@ -1,8 +1,8 @@ // This may look like C code, but it is really -*- C++ -*- /* -Copyright (C) 1988, 1992, 2000 Free Software Foundation - written by Doug Lea (dl@rocky.oswego.edu) +Copyright (C) 1988, 1992, 2000, 2002 Free Software Foundation + written by Doug Lea <dl@rocky.oswego.edu> */ #ifndef _hash_h @@ -10,6 +10,6 @@ Copyright (C) 1988, 1992, 2000 Free Software Foundation /* a hash function for char[] arrays using the method described in Aho, Sethi, & Ullman, p 436. */ -extern unsigned int hashpjw (const char *string, unsigned int len); +extern unsigned int hashpjw (const unsigned char *string, unsigned int len); #endif diff --git a/src/Makefile.in b/src/Makefile.in index fe2da6a..b12152a 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -87,7 +87,7 @@ CONFIG_H = config.h VERSION_H = version.h OPTIONS_H = options.h options.icc READ_LINE_H = read-line.h read-line.icc -KEYWORD_H = keyword.h +KEYWORD_H = keyword.h keyword.icc KEYWORD_LIST_H = keyword-list.h $(KEYWORD_H) INPUT_H = input.h $(READ_LINE_H) $(KEYWORD_LIST_H) BOOL_ARRAY_H = bool-array.h bool-array.icc $(OPTIONS_H) diff --git a/src/keyword.cc b/src/keyword.cc index ad65dd9..a7aae2c 100644 --- a/src/keyword.cc +++ b/src/keyword.cc @@ -29,31 +29,16 @@ #include "options.h" -/* Keyword class. */ +/* --------------------------- KeywordExt class --------------------------- */ -/* Constructor. */ -Keyword::Keyword (const char *s, int s_len, const char *r) - : _allchars (s), _allchars_length (s_len), _rest (r) -{ -} - - -/* KeywordExt class. */ - -/* Constructor. */ -KeywordExt::KeywordExt (const char *s, int s_len, const char *r) - : Keyword (s, s_len, r), _duplicate_link (NULL), _final_index (0) -{ -} - -/* Sort a small set of 'char', base[0..len-1], in place. */ -static inline void sort_char_set (char *base, int len) +/* Sort a small set of 'unsigned char', base[0..len-1], in place. */ +static inline void sort_char_set (unsigned char *base, int len) { /* Bubble sort is sufficient here. */ for (int i = 1; i < len; i++) { int j; - char tmp; + unsigned char tmp; for (j = i, tmp = base[j]; j > 0 && tmp < base[j - 1]; j--) base[j] = base[j - 1]; @@ -62,42 +47,55 @@ static inline void sort_char_set (char *base, int len) } } -/* Initialize selchars and selchars_length, and update occurrences. */ +/* Initialize selchars and selchars_length, and update occurrences. + The hash function will be computed as + asso_values[allchars[key_pos[0]]] + asso_values[allchars[key_pos[1]]] + ... + We compute selchars as the multiset + { allchars[key_pos[0]], allchars[key_pos[1]], ... } + so that the hash function becomes + asso_values[selchars[0]] + asso_values[selchars[1]] + ... + Furthermore we sort the selchars array, to ease detection of duplicates + later. + */ void KeywordExt::init_selchars (int *occurrences) { const char *k = _allchars; - char *key_set = - new char[(option[ALLCHARS] ? _allchars_length : option.get_max_keysig_size ())]; - char *ptr = key_set; + unsigned char *key_set = + new unsigned char[(option[ALLCHARS] ? _allchars_length : option.get_max_keysig_size ())]; + unsigned char *ptr = key_set; if (option[ALLCHARS]) - /* Use all the character positions in the KEY. */ - for (int i = _allchars_length; i > 0; k++, ptr++, i--) - occurrences[static_cast<unsigned char>(*ptr = *k)]++; + /* Use all the character positions in the KEY. */ + for (int i = _allchars_length; i > 0; k++, i--) + { + *ptr = static_cast<unsigned char>(*k); + occurrences[*ptr]++; + ptr++; + } else - /* Only use those character positions specified by the user. */ + /* Only use those character positions specified by the user. */ { /* Iterate through the list of key_positions, initializing occurrences - table and selchars (via char * pointer ptr). */ + table and selchars (via ptr). */ PositionIterator iter (option.get_key_positions ()); for (int i; (i = iter.next ()) != PositionIterator::EOS; ) { if (i == Positions::LASTCHAR) - /* Special notation for last KEY position, i.e. '$'. */ - *ptr = _allchars[_allchars_length - 1]; + /* Special notation for last KEY position, i.e. '$'. */ + *ptr = static_cast<unsigned char>(_allchars[_allchars_length - 1]); else if (i <= _allchars_length) - /* Within range of KEY length, so we'll keep it. */ - *ptr = _allchars[i - 1]; + /* Within range of KEY length, so we'll keep it. */ + *ptr = static_cast<unsigned char>(_allchars[i - 1]); else - /* Out of range of KEY length, so we'll just skip it. */ + /* Out of range of KEY length, so we'll just skip it. */ continue; - occurrences[static_cast<unsigned char>(*ptr)]++; + occurrences[*ptr]++; ptr++; } /* Didn't get any hits and user doesn't want to consider the - keylength, so there are essentially no usable hash positions! */ + keylength, so there are essentially no usable hash positions! */ if (ptr == key_set && option[NOLENGTH]) { fprintf (stderr, "Can't hash keyword %.*s with chosen key positions.\n", @@ -106,7 +104,7 @@ void KeywordExt::init_selchars (int *occurrences) } } - /* Sort the KEY_SET items alphabetically. */ + /* Sort the KEY_SET items alphabetically. */ sort_char_set (key_set, ptr - key_set); _selchars = key_set; @@ -114,8 +112,21 @@ void KeywordExt::init_selchars (int *occurrences) } -/* Keyword_Factory class. */ +/* ------------------------- Keyword_Factory class ------------------------- */ + +Keyword_Factory::Keyword_Factory () +{ +} + +Keyword_Factory::~Keyword_Factory () +{ +} + + +#ifndef __OPTIMIZE__ -Keyword_Factory::Keyword_Factory () {} +#define INLINE /* not inline */ +#include "keyword.icc" +#undef INLINE -Keyword_Factory::~Keyword_Factory () {} +#endif /* not defined __OPTIMIZE__ */ diff --git a/src/keyword.h b/src/keyword.h index 705c481..5dbd399 100644 --- a/src/keyword.h +++ b/src/keyword.h @@ -27,10 +27,12 @@ #define keyword_h 1 /* An instance of this class is a keyword, as specified in the input file. */ + struct Keyword { /* Constructor. */ - Keyword (const char *allchars, int allchars_length, const char *rest); + Keyword (const char *allchars, int allchars_length, + const char *rest); /* Data members defined immediately by the input file. */ /* The keyword as a string, possibly containing NUL bytes. */ @@ -41,15 +43,18 @@ struct Keyword }; /* A keyword, in the context of a given keyposition list. */ + struct KeywordExt : public Keyword { /* Constructor. */ - KeywordExt (const char *allchars, int allchars_length, const char *rest); + KeywordExt (const char *allchars, int allchars_length, + const char *rest); /* Data members depending on the keyposition list. */ /* The selected characters that participate for the hash function, - reordered according to the keyposition list. */ - const char * _selchars; + selected according to the keyposition list, as a canonically reordered + multiset. */ + const unsigned char * _selchars; int _selchars_length; /* Chained list of keywords having the same selchars. */ KeywordExt * _duplicate_link; @@ -59,22 +64,37 @@ struct KeywordExt : public Keyword void init_selchars (int *occurrences); /* Data members used by the algorithm. */ - int _occurrence; /* A metric for frequency of key set occurrences. */ - int _hash_value; /* Hash value for the key. */ + int _occurrence; /* Frequency of key set occurrences. */ + int _hash_value; /* Hash value for the keyword. */ /* Data members used by the output routines. */ int _final_index; }; -/* A factory for creating Keyword instances. */ +/* An abstract factory for creating Keyword instances. + This factory is used to make the Input class independent of the concrete + class KeywordExt. */ + class Keyword_Factory { public: + /* Constructor. */ Keyword_Factory (); + /* Destructor. */ virtual ~Keyword_Factory (); + /* Creates a new Keyword. */ - virtual Keyword * create_keyword (const char *allchars, int allchars_length, + virtual /*abstract */ Keyword * + create_keyword (const char *allchars, int allchars_length, const char *rest) = 0; }; +#ifdef __OPTIMIZE__ + +#define INLINE inline +#include "keyword.icc" +#undef INLINE + +#endif + #endif diff --git a/src/keyword.icc b/src/keyword.icc new file mode 100644 index 0000000..baa61d7 --- /dev/null +++ b/src/keyword.icc @@ -0,0 +1,42 @@ +/* Inline Functions for keyword.{h,cc}. + + Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. + Written by Douglas C. Schmidt <schmidt@ics.uci.edu> + and Bruno Haible <bruno@clisp.org>. + + This file is part of GNU GPERF. + + GNU GPERF is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU GPERF is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* ----------------------------- Keyword class ----------------------------- */ + +/* Constructor. */ +INLINE +Keyword::Keyword (const char *allchars, int allchars_length, const char *rest) + : _allchars (allchars), _allchars_length (allchars_length), _rest (rest) +{ +} + + +/* --------------------------- KeywordExt class --------------------------- */ + +/* Constructor. */ +INLINE +KeywordExt::KeywordExt (const char *allchars, int allchars_length, const char *rest) + : Keyword (allchars, allchars_length, rest), + _final_index (-1) +{ +} diff --git a/src/options.cc b/src/options.cc index 5f9fc3e..4a24212 100644 --- a/src/options.cc +++ b/src/options.cc @@ -671,7 +671,7 @@ Options::parse_options (int argc, char *argv[]) { if (value == BAD_VALUE) { - fprintf (stderr, "Illegal key value or range, use 1,2,3-%d,'$' or '*'.\n", + fprintf (stderr, "Invalid key value or range, use 1,2,3-%d,'$' or '*'.\n", Positions::MAX_KEY_POS); short_usage (stderr); exit (1); diff --git a/src/search.cc b/src/search.cc index e760e66..dac5932 100644 --- a/src/search.cc +++ b/src/search.cc @@ -98,7 +98,10 @@ Search::prepare () keyword->_selchars_length, keyword->_selchars); } else - trail = temp; + { + temp->first()->_duplicate_link = NULL; + trail = temp; + } /* Update minimum and maximum keyword length, if needed. */ if (_max_key_len < keyword->_allchars_length) @@ -203,10 +206,10 @@ Search::get_occurrence (KeywordExt *ptr) { int value = 0; - const char *p = ptr->_selchars; + const unsigned char *p = ptr->_selchars; unsigned int i = ptr->_selchars_length; for (; i > 0; p++, i--) - value += _occurrences[static_cast<unsigned char>(*p)]; + value += _occurrences[*p]; return value; } @@ -217,10 +220,10 @@ Search::get_occurrence (KeywordExt *ptr) inline void Search::set_determined (KeywordExt *ptr) { - const char *p = ptr->_selchars; + const unsigned char *p = ptr->_selchars; unsigned int i = ptr->_selchars_length; for (; i > 0; p++, i--) - _determined[static_cast<unsigned char>(*p)] = true; + _determined[*p] = true; } /* Returns TRUE if PTR's key set is already completely determined. */ @@ -230,10 +233,10 @@ Search::already_determined (KeywordExt *ptr) { bool is_determined = true; - const char *p = ptr->_selchars; + const unsigned char *p = ptr->_selchars; unsigned int i = ptr->_selchars_length; for (; is_determined && i > 0; p++, i--) - is_determined = _determined[static_cast<unsigned char>(*p)]; + is_determined = _determined[*p]; return is_determined; } @@ -316,10 +319,10 @@ Search::hash (KeywordExt *key_node) { int sum = option[NOLENGTH] ? 0 : key_node->_allchars_length; - const char *p = key_node->_selchars; + const unsigned char *p = key_node->_selchars; int i = key_node->_selchars_length; for (; i > 0; p++, i--) - sum += _asso_values[static_cast<unsigned char>(*p)]; + sum += _asso_values[*p]; return key_node->_hash_value = sum; } @@ -330,16 +333,16 @@ Search::hash (KeywordExt *key_node) of the combined set. */ inline int -Search::compute_disjoint_union (const char *set_1, int size_1, const char *set_2, int size_2, char *set_3) +Search::compute_disjoint_union (const unsigned char *set_1, int size_1, const unsigned char *set_2, int size_2, unsigned char *set_3) { - char *base = set_3; + unsigned char *base = set_3; while (size_1 > 0 && size_2 > 0) if (*set_1 == *set_2) set_1++, size_1--, set_2++, size_2--; else { - char next; + unsigned char next; if (*set_1 < *set_2) next = *set_1++, size_1--; else @@ -350,7 +353,7 @@ Search::compute_disjoint_union (const char *set_1, int size_1, const char *set_ while (size_1 > 0) { - char next; + unsigned char next; next = *set_1++, size_1--; if (set_3 == base || next != set_3[-1]) *set_3++ = next; @@ -358,7 +361,7 @@ Search::compute_disjoint_union (const char *set_1, int size_1, const char *set_ while (size_2 > 0) { - char next; + unsigned char next; next = *set_2++, size_2--; if (set_3 == base || next != set_3[-1]) *set_3++ = next; @@ -372,17 +375,17 @@ Search::compute_disjoint_union (const char *set_1, int size_1, const char *set_ the UNION_SET is typically short. */ inline void -Search::sort_set (char *union_set, int len) +Search::sort_set (unsigned char *union_set, int len) { int i, j; for (i = 0, j = len - 1; i < j; i++) { int curr; - char tmp; + unsigned char tmp; for (curr = i + 1, tmp = union_set[curr]; - curr > 0 && _occurrences[static_cast<unsigned char>(tmp)] < _occurrences[static_cast<unsigned char>(union_set[curr-1])]; + curr > 0 && _occurrences[tmp] < _occurrences[union_set[curr-1]]; curr--) union_set[curr] = union_set[curr - 1]; @@ -397,9 +400,9 @@ Search::sort_set (char *union_set, int len) Option.Get_Jump was forced to be an odd value! */ inline bool -Search::affects_prev (char c, KeywordExt *curr) +Search::affects_prev (unsigned char c, KeywordExt *curr) { - int original_char = _asso_values[static_cast<unsigned char>(c)]; + int original_char = _asso_values[c]; int total_iterations = !option[FAST] ? get_asso_max () : option.get_iterations () ? option.get_iterations () : keyword_list_length (); @@ -409,8 +412,8 @@ Search::affects_prev (char c, KeywordExt *curr) { int collisions = 0; - _asso_values[static_cast<unsigned char>(c)] = - (_asso_values[static_cast<unsigned char>(c)] + (option.get_jump () ? option.get_jump () : rand ())) + _asso_values[c] = + (_asso_values[c] + (option.get_jump () ? option.get_jump () : rand ())) & (get_asso_max () - 1); /* Iteration Number array is a win, O(1) intialization time! */ @@ -436,7 +439,7 @@ Search::affects_prev (char c, KeywordExt *curr) } /* Restore original values, no more tries. */ - _asso_values[static_cast<unsigned char>(c)] = original_char; + _asso_values[c] = original_char; /* If we're this far it's time to try the next character.... */ return true; } @@ -446,11 +449,11 @@ Search::affects_prev (char c, KeywordExt *curr) void Search::change (KeywordExt *prior, KeywordExt *curr) { - static char *union_set; + static unsigned char *union_set; int union_set_length; if (!union_set) - union_set = new char [2 * get_max_keysig_size ()]; + union_set = new unsigned char [2 * get_max_keysig_size ()]; if (option[DEBUG]) { @@ -467,7 +470,7 @@ Search::change (KeywordExt *prior, KeywordExt *curr) /* Try changing some values, if change doesn't alter other values continue normal action. */ _fewest_collisions++; - const char *p = union_set; + const unsigned char *p = union_set; int i = union_set_length; for (; i > 0; p++, i--) if (!affects_prev (*p, curr)) @@ -475,7 +478,7 @@ Search::change (KeywordExt *prior, KeywordExt *curr) if (option[DEBUG]) { fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n", - *p, p - union_set + 1, _asso_values[static_cast<unsigned char>(*p)]); + *p, p - union_set + 1, _asso_values[*p]); fflush (stderr); } return; /* Good, doesn't affect previous hash values, we'll take it. */ diff --git a/src/search.h b/src/search.h index ef83da4..12b3c7a 100644 --- a/src/search.h +++ b/src/search.h @@ -47,9 +47,9 @@ private: int max_key_length (); int get_max_keysig_size (); int hash (KeywordExt *key_node); - static int compute_disjoint_union (const char *set_1, int size_1, const char *set_2, int size_2, char *set_3); - void sort_set (char *union_set, int len); - bool affects_prev (char c, KeywordExt *curr); + static int compute_disjoint_union (const unsigned char *set_1, int size_1, const unsigned char *set_2, int size_2, unsigned char *set_3); + void sort_set (unsigned char *union_set, int len); + bool affects_prev (unsigned char c, KeywordExt *curr); void change (KeywordExt *prior, KeywordExt *curr); void sort (); public: |