diff options
Diffstat (limited to 'ACE/apps/gperf/src/Gen_Perf.cpp')
-rw-r--r-- | ACE/apps/gperf/src/Gen_Perf.cpp | 457 |
1 files changed, 457 insertions, 0 deletions
diff --git a/ACE/apps/gperf/src/Gen_Perf.cpp b/ACE/apps/gperf/src/Gen_Perf.cpp new file mode 100644 index 00000000000..88c362e0218 --- /dev/null +++ b/ACE/apps/gperf/src/Gen_Perf.cpp @@ -0,0 +1,457 @@ +// -*- C++ -*- + +// $Id$ + +// Copyright (C) 1989 Free Software Foundation, Inc. +// written by Douglas C. Schmidt (schmidt@cs.wustl.edu) + +// This file is part of GNU GPERF. + +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#include "Gen_Perf.h" + +ACE_RCSID(src, Gen_Perf, "$Id$") + +#if defined (ACE_HAS_GPERF) + +#include "Vectors.h" +#include "ace/OS_NS_stdlib.h" +#include "ace/OS_NS_time.h" +#include "ace/OS_Memory.h" + +// Current release version. +extern const char *version_string; + +// Reads input keys, possibly applies the reordering heuristic, sets +// the maximum associated value size (rounded up to the nearest power +// of 2), may initialize the associated values array, and determines +// the maximum hash table size. Note: using the random numbers is +// often helpful, though not as deterministic, of course! + +Gen_Perf::Gen_Perf (void) + : fewest_collisions (0), + num_done (1) +{ +} + +// Merge two disjoint hash key multisets to form the ordered disjoint +// union of the sets. (In a multiset, an element can occur multiple +// times). Precondition: both set1 and set2 must be +// ordered. Returns the length of the combined set. + +int +Gen_Perf::compute_disjoint_union (char *set1, char *set2, char *set3) +{ + char *base = set3; + + while (*set1 && *set2) + if (*set1 == *set2) + set1++, set2++; + else + { + *set3 = *set1 < *set2 ? *set1++ : *set2++; + if (set3 == base || *set3 != *(set3 - 1)) + set3++; + } + + while (*set1) + { + *set3 = *set1++; + if (set3 == base || *set3 != *(set3 - 1)) + set3++; + } + + while (*set2) + { + *set3 = *set2++; + if (set3 == base || *set3 != *(set3 - 1)) + set3++; + } + *set3 = '\0'; + return set3 - base; +} + +// Sort the UNION_SET in increasing frequency of occurrence. This +// speeds up later processing since we may assume the resulting set +// (Set_3, in this case), is ordered. Uses insertion sort, since the +// UNION_SET is typically short. + +void +Gen_Perf::sort_set (char *union_set, int len) +{ + for (int i = 0, j = len - 1; i < j; i++) + { + int curr, tmp; + + for (curr = i + 1, tmp = (int) union_set[curr]; + curr > 0 + && Vectors::occurrences[tmp] < Vectors::occurrences[(int) union_set[curr - 1]]; + curr--) + union_set[curr] = union_set[curr - 1]; + + union_set[curr] = static_cast<char> (tmp); + } +} + +// Generate a keysig's hash value. + +int +Gen_Perf::hash (List_Node *key_node) +{ + int sum = option[NOLENGTH] ? 0 : key_node->length; + + for (char *ptr = key_node->keysig; *ptr; ptr++) + sum += Vectors::asso_values[(int) *ptr]; + + key_node->hash_value = sum; + return sum; +} + +// Find out how character value change affects successfully hash +// items. Returns FALSE if no other hash values are affected, else +// returns TRUE. Note that because Option.Get_Asso_Max is a power of +// two we can guarantee that all legal Vectors::Asso_Values are +// visited without repetition since Option.Get_Jump was forced to be +// an odd value! + +inline int +Gen_Perf::affects_prev (char c, List_Node *curr) +{ + int original_char = Vectors::asso_values[(int) c]; + int total_iterations; + + if (!option[FAST]) + total_iterations = option.asso_max (); + else + { + total_iterations = option.iterations (); + + if (total_iterations == 0) + total_iterations = this->key_list.keyword_list_length (); + } + + // Try all legal associated values. + + for (int i = total_iterations - 1; i >= 0; i--) + { + int collisions = 0; + + Vectors::asso_values[(int) c] = Vectors::asso_values[(int) c] + + (option.jump () ? option.jump () : ACE_OS::rand ()) & option.asso_max () - 1; + + // Iteration Number array is a win, O(1) intialization time! + this->char_search.reset (); + + // See how this asso_value change affects previous keywords. If + // it does better than before we'll take it! + + for (List_Node *ptr = this->key_list.head; + this->char_search.find (this->hash (ptr)) == 0 + || ++collisions < fewest_collisions; + ptr = ptr->next) + if (ptr == curr) + { + fewest_collisions = collisions; + if (option[DEBUGGING]) + ACE_DEBUG ((LM_DEBUG, + "- resolved after %d iterations", + total_iterations - i)); + return 0; + } + } + + // Restore original values, no more tries. + Vectors::asso_values[(int) c] = original_char; + // If we're this far it's time to try the next character.... + return 1; +} + +// Change a character value, try least-used characters first. + +int +Gen_Perf::change (List_Node *prior, List_Node *curr) +{ + if (option[DEBUGGING]) + ACE_DEBUG ((LM_DEBUG, + "collision on keyword #%d, prior = \"%s\", curr = \"%s\" hash = %d\n", + num_done, + prior->key, + curr->key, + curr->hash_value)); + Gen_Perf::sort_set (this->union_set, + compute_disjoint_union (prior->keysig, + curr->keysig, + this->union_set)); + + // Try changing some values, if change doesn't alter other values + // continue normal action. + fewest_collisions++; + + for (char *temp = union_set; *temp != '\0'; temp++) + if (affects_prev (*temp, curr) == 0) + { + if (option[DEBUGGING]) + ACE_DEBUG ((LM_DEBUG, + " by changing asso_value['%c'] (char #%d) to %d\n", + *temp, + temp - union_set + 1, + Vectors::asso_values[(int) *temp])); + // Good, doesn't affect previous hash values, we'll take it. + return 0; + } + + for (List_Node *ptr = this->key_list.head; + ptr != curr; + ptr = ptr->next) + this->hash (ptr); + + this->hash (curr); + + if (option[DEBUGGING]) + ACE_DEBUG ((LM_DEBUG, + "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n", + !option[FAST] ? option.asso_max () : option.iterations () ? option.iterations () : this->key_list.keyword_list_length (), + fewest_collisions + this->key_list.total_duplicates)); + return 0; +} + +int +Gen_Perf::open (void) +{ + if (this->key_list.read_keys () == -1) + return -1; + + if (option[ORDER]) + this->key_list.reorder (); + + int asso_value_max = option.asso_max (); + int non_linked_length = this->key_list.keyword_list_length (); + + if (asso_value_max == 0) + asso_value_max = non_linked_length; + else if (asso_value_max > 0) + asso_value_max *= non_linked_length; + else // if (asso_value_max < 0) + asso_value_max = non_linked_length / -asso_value_max; + + option.asso_max (ACE_POW (asso_value_max)); + + if (option[RANDOM]) + { + ACE_OS::srand (ACE_OS::time (0)); + + for (int i = 0; i < ACE_STANDARD_CHARACTER_SET_SIZE; i++) + Vectors::asso_values[i] = (ACE_OS::rand () & asso_value_max - 1); + } + else + { + int asso_value = option.initial_value (); + + // Initialize array if user requests non-zero default. + if (asso_value) + for (int i = ACE_STANDARD_CHARACTER_SET_SIZE - 1; i >= 0; i--) + Vectors::asso_values[i] = asso_value & option.asso_max () - 1; + } + + this->max_hash_value = this->key_list.max_key_length () + + option.asso_max () + * option.max_keysig_size (); + + ACE_NEW_RETURN (this->union_set, + char[2 * option.max_keysig_size () + 1], + -1); + printf ("/* "); + + if (option[C]) + printf ("C"); + + else if (option[CPLUSPLUS]) + printf ("C++"); + + printf (" code produced by gperf version %s */\n", + version_string); + Options::print_options (); + + if (option[DEBUGGING]) + ACE_DEBUG ((LM_DEBUG, + "total non-linked keys = %d\n" + "total duplicates = %d\n" + "maximum associated value is %d\n" + "maximum size of generated hash table is %d\n", + non_linked_length, + this->key_list.total_duplicates, + asso_value_max, + max_hash_value)); + if (this->char_search.open (max_hash_value + 1) == -1) + return -1; + return 0; +} + +// For binary search, do normal string sort on the keys, and then +// assign hash values from 0 to N-1. Then go ahead with the normal +// logic that is there for perfect hashing. +int +Gen_Perf::compute_binary_search (void) +{ + // Do a string sort. + this->key_list.string_sort (); + + // Assign hash values. + List_Node *curr; + int hash_value; + for (hash_value = 0, curr = this->key_list.head; + curr != 0; + curr = curr->next, hash_value++) + { + curr->hash_value = hash_value; + } + + return 0; +} + +int +Gen_Perf::compute_linear_search (void) +{ + // Convert the list of keys to a linear list without + // equivalence classes. + this->key_list.string_sort (); + + // Assign hash values. + List_Node *curr; + int hash_value; + for (hash_value = 0, curr = this->key_list.head; + curr != 0; + curr = curr->next, hash_value++) + { + curr->hash_value = hash_value; + } + return 0; +} + +int +Gen_Perf::compute_perfect_hash (void) +{ + List_Node *curr; + + for (curr = this->key_list.head; + curr != 0; + curr = curr->next) + { + this->hash (curr); + + for (List_Node *ptr = this->key_list.head; + ptr != curr; + ptr = ptr->next) + if (ptr->hash_value == curr->hash_value) + { + if (this->change (ptr, curr) == -1) + return -1; + break; + } + num_done++; + } + + // Make one final check, just to make sure nothing weird happened... + + this->char_search.reset (); + + for (curr = this->key_list.head; + curr; + curr = curr->next) + if (this->char_search.find (this->hash (curr)) != 0) + if (option[DUP]) + // Keep track of the number of "dynamic" links (i.e., keys + // that hash to the same value) so that we can use it later + // when generating the output. + this->key_list.total_duplicates++; + else + { + // Yow, big problems. we're outta here! + ACE_ERROR ((LM_ERROR, + "\nInternal error, duplicate value %d:\n" + "try options -D or -r, or use new key positions.\n\n", + this->hash (curr))); + return -1; + } + + return 0; +} + +// Does the hard stuff.... Initializes the Bool Array, and attempts +// to find a perfect function that will hash all the key words without +// getting any duplications. This is made much easier since we aren't +// attempting to generate *minimum* functions, only perfect ones. If +// we can't generate a perfect function in one pass *and* the user +// hasn't enabled the DUP option, we'll inform the user to try the +// randomization option, use -D, or choose alternative key positions. +// The alternatives (e.g., back-tracking) are too time-consuming, i.e, +// exponential in the number of keys. + +int +Gen_Perf::run (void) +{ + if (this->open () == -1) + return 1; + + if (option[BINARYSEARCH]) + { + if (this->compute_binary_search () == -1) + return 1; + } + else if (option[LINEARSEARCH]) + { + if (this->compute_linear_search () == -1) + return 1; + } + else + { + if (this->compute_perfect_hash () == -1) + return 1; + + // Sorts the key word list by hash value, and then outputs the + // list. The generated hash table code is only output if the + // early stage of processing turned out O.K. + this->key_list.sort (); + } + + this->key_list.output (); + return 0; +} + +// Prints out some diagnostics upon completion. + +Gen_Perf::~Gen_Perf (void) +{ + if (option[DEBUGGING]) + { + ACE_DEBUG ((LM_DEBUG, + "\ndumping occurrence and associated values tables\n")); + for (int i = 0; i < ACE_STANDARD_CHARACTER_SET_SIZE; i++) + if (Vectors::occurrences[i]) + ACE_DEBUG ((LM_DEBUG, + "Vectors::asso_values[%c] = %6d, Vectors::occurrences[%c] = %6d\n", + i, + Vectors::asso_values[i], + i, + Vectors::occurrences[i])); + ACE_DEBUG ((LM_DEBUG, + "end table dumping\n")); + } + + delete [] this->union_set; +} + +#endif /* ACE_HAS_GPERF */ |