summaryrefslogtreecommitdiff
path: root/src/search.h
blob: d9af416461c3c428f370acf977d35da61be041d9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
/* This may look like C code, but it is really -*- C++ -*- */

/* Search algorithm.

   Copyright (C) 1989-1998, 2000, 2002, 2009 Free Software Foundation, Inc.
   Written by Douglas C. Schmidt <schmidt@ics.uci.edu>
   and Bruno Haible <bruno@clisp.org>.

   This file is part of GNU GPERF.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#ifndef search_h
#define search_h 1

#include "keyword-list.h"
#include "positions.h"
#include "bool-array.h"

struct EquivalenceClass;

class Search
{
public:
                        Search (KeywordExt_List *list);
                        ~Search ();
  void                  optimize ();
private:
  void                  prepare ();

  /* Computes the upper bound on the indices passed to asso_values[],
     assuming no alpha_increments.  */
  unsigned int          compute_alpha_size () const;

  /* Computes the unification rules between different asso_values[c],
     assuming no alpha_increments.  */
  unsigned int *        compute_alpha_unify () const;

  /* Initializes each keyword's _selchars array.  */
  void                  init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
  /* Deletes each keyword's _selchars array.  */
  void                  delete_selchars () const;

  /* Count the duplicate keywords that occur with a given set of positions.  */
  unsigned int          count_duplicates_tuple (const Positions& positions, const unsigned int *alpha_unify) const;

  /* Find good key positions.  */
  void                  find_positions ();

  /* Count the duplicate keywords that occur with the found set of positions.  */
  unsigned int          count_duplicates_tuple () const;

  /* Computes the upper bound on the indices passed to asso_values[].  */
  unsigned int          compute_alpha_size (const unsigned int *alpha_inc) const;

  /* Computes the unification rules between different asso_values[c].  */
  unsigned int *        compute_alpha_unify (const Positions& positions, const unsigned int *alpha_inc) const;

  /* Initializes each keyword's _selchars array.  */
  void                  init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const;

  /* Count the duplicate keywords that occur with the given set of positions
     and a given alpha_inc[] array.  */
  unsigned int          count_duplicates_multiset (const unsigned int *alpha_inc) const;

  /* Find good _alpha_inc[].  */
  void                  find_alpha_inc ();

  /* Initializes the asso_values[] related parameters.  */
  void                  prepare_asso_values ();

  EquivalenceClass *    compute_partition (bool *undetermined) const;

  unsigned int          count_possible_collisions (EquivalenceClass *partition, unsigned int c) const;

  bool                  unchanged_partition (EquivalenceClass *partition, unsigned int c) const;

  /* Finds some _asso_values[] that fit.  */
  void                  find_asso_values ();

  /* Computes a keyword's hash value, relative to the current _asso_values[],
     and stores it in keyword->_hash_value.  */
  int                   compute_hash (KeywordExt *keyword) const;

  /* Finds good _asso_values[].  */
  void                  find_good_asso_values ();

  /* Sorts the keyword list by hash value.  */
  void                  sort ();

public:

  /* Linked list of keywords.  */
  KeywordExt_List *     _head;

  /* Total number of keywords, counting duplicates.  */
  int                   _total_keys;

  /* Maximum length of the longest keyword.  */
  int                   _max_key_len;

  /* Minimum length of the shortest keyword.  */
  int                   _min_key_len;

  /* Whether the hash function includes the length.  */
  bool                  _hash_includes_len;

  /* User-specified or computed key positions.  */
  Positions             _key_positions;

  /* Adjustments to add to bytes add specific key positions.  */
  unsigned int *        _alpha_inc;

  /* Size of alphabet.  */
  unsigned int          _alpha_size;

  /* Alphabet character unification, either the identity or a mapping from
     upper case characters to lower case characters (and maybe more).  */
  unsigned int *        _alpha_unify;

  /* Maximum _selchars_length over all keywords.  */
  unsigned int          _max_selchars_length;

  /* Total number of duplicates that have been moved to _duplicate_link lists
     (not counting their representatives which stay on the main list).  */
  int                   _total_duplicates;

  /* Counts occurrences of each key set character.
     _occurrences[c] is the number of times that c occurs among the _selchars
     of a keyword.  */
  int *                 _occurrences;
  /* Value associated with each character. */
  int *                 _asso_values;

private:

  /* Length of _head list.  Number of keywords, not counting duplicates.  */
  int                   _list_len;

  /* Exclusive upper bound for every _asso_values[c].  A power of 2.  */
  unsigned int          _asso_value_max;

  /* Initial value for asso_values table.  -1 means random.  */
  int                   _initial_asso_value;
  /* Jump length when trying alternative values.  0 means random.  */
  int                   _jump;

  /* Maximal possible hash value.  */
  int                   _max_hash_value;

  /* Sparse bit vector for collision detection.  */
  Bool_Array *          _collision_detector;
};

#endif